SMEInstrFormats.td 195 KB


  1. //=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
  13. def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
  14. def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
  15. def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
  16. def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
  17. def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
  18. def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
  19. def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3, 1>", []>;
  20. def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1, 1>", []>;
  21. def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; // nop
  22. def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
  23. def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>;
  24. def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
  25. //===----------------------------------------------------------------------===//
  26. // SME Pseudo Classes
  27. //===----------------------------------------------------------------------===//
  28. def getSMEPseudoMap : InstrMapping {
  29. let FilterClass = "SMEPseudo2Instr";
  30. let RowFields = ["PseudoName"];
  31. let ColFields = ["IsInstr"];
  32. let KeyCol = ["0"];
  33. let ValueCols = [["1"]];
  34. }
  35. class SMEPseudo2Instr<string name, bit instr> {
  36. string PseudoName = name;
  37. bit IsInstr = instr;
  38. }
  39. class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
  40. : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
  41. zpr_ty:$zn, zpr_ty:$zm), []>,
  42. Sched<[]> {
  43. // Translated to the actual instructions in AArch64ISelLowering.cpp
  44. let SMEMatrixType = za_flag;
  45. let usesCustomInserter = 1;
  46. }
  47. class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
  48. ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
  49. : SMEPseudo2Instr<name, 0>,
  50. Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
  51. let SMEMatrixType = za_flag;
  52. let usesCustomInserter = 1;
  53. }
  54. class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
  55. SMEMatrixTypeEnum za_flag>
  56. : SMEPseudo2Instr<name, 0>,
  57. Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
  58. let SMEMatrixType = za_flag;
  59. let usesCustomInserter = 1;
  60. }
  61. class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
  62. ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag>
  63. : SMEPseudo2Instr<name, 0>,
  64. Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
  65. let SMEMatrixType = za_flag;
  66. let usesCustomInserter = 1;
  67. }
  68. //===----------------------------------------------------------------------===//
  69. // SME pattern match helpers.
  70. //===----------------------------------------------------------------------===//
  71. class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
  72. ValueType vt, ComplexPattern tileslice>
  73. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm),
  74. (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>;
  75. class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
  76. ValueType vt, ComplexPattern tileslice>
  77. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
  78. (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
  79. zpr_ty:$Zm)>;
  80. class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
  81. ValueType vt, ComplexPattern tileslice>
  82. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
  83. vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
  84. (!cast<Instruction>(name # _PSEUDO) $base, $offset,
  85. (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
  86. zpr_ty:$Zm)>;
  87. class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
  88. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
  89. (!cast<Instruction>(name # _PSEUDO) $base, $offset,
  90. (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
  91. (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
  92. class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
  93. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
  94. vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
  95. (!cast<Instruction>(name # _PSEUDO) $base, $offset,
  96. (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
  97. (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
  98. class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
  99. Operand imm_ty, ComplexPattern tileslice>
  100. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)),
  101. (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>;
  102. class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
  103. Operand imm_ty, ComplexPattern tileslice>
  104. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
  105. (!cast<Instruction>(name # _PSEUDO) $base, $offset,
  106. (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
  107. class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
  108. Operand imm_ty, ComplexPattern tileslice>
  109. : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
  110. vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
  111. (!cast<Instruction>(name # _PSEUDO) $base, $offset,
  112. (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
  113. zpr_ty:$Zm, imm_ty:$i)>;
  114. class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
  115. : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
  116. (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
  117. //===----------------------------------------------------------------------===//
  118. // SME Outer Products
  119. //===----------------------------------------------------------------------===//
  120. class sme_fp_outer_product_inst<bit S, bits<2> sz, bit op, MatrixTileOperand za_ty,
  121. ZPRRegOp zpr_ty, string mnemonic>
  122. : I<(outs za_ty:$ZAda),
  123. (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
  124. mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
  125. "", []>,
  126. Sched<[]> {
  127. bits<5> Zm;
  128. bits<3> Pm;
  129. bits<3> Pn;
  130. bits<5> Zn;
  131. let Inst{31-25} = 0b1000000;
  132. let Inst{24} = op;
  133. let Inst{23} = 0b1;
  134. let Inst{22-21} = sz;
  135. let Inst{20-16} = Zm;
  136. let Inst{15-13} = Pm;
  137. let Inst{12-10} = Pn;
  138. let Inst{9-5} = Zn;
  139. let Inst{4} = S;
  140. let Inst{3} = op;
  141. let Constraints = "$ZAda = $_ZAda";
  142. }
  143. multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
  144. def NAME : sme_fp_outer_product_inst<S, 0b00, 0b0, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  145. bits<2> ZAda;
  146. let Inst{1-0} = ZAda;
  147. let Inst{2} = 0b0;
  148. }
  149. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
  150. def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
  151. (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
  152. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
  153. }
  154. multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
  155. def NAME : sme_fp_outer_product_inst<S, 0b10, 0b0, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  156. bits<3> ZAda;
  157. let Inst{2-0} = ZAda;
  158. }
  159. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
  160. def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
  161. (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
  162. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
  163. }
  164. multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s>{
  165. def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b1, TileOp16, ZPR16, mnemonic> {
  166. bits<1> ZAda;
  167. let Inst{2-1} = 0b00;
  168. let Inst{0} = ZAda;
  169. }
  170. }
  171. class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
  172. MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
  173. string mnemonic>
  174. : I<(outs za_ty:$ZAda),
  175. (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
  176. mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
  177. "", []>,
  178. Sched<[]> {
  179. bits<5> Zm;
  180. bits<3> Pm;
  181. bits<3> Pn;
  182. bits<5> Zn;
  183. let Inst{31-25} = 0b1010000;
  184. let Inst{24} = opc{2}; // u0
  185. let Inst{23} = 0b1;
  186. let Inst{22} = sz;
  187. let Inst{21} = opc{1}; // u1
  188. let Inst{20-16} = Zm;
  189. let Inst{15-13} = Pm;
  190. let Inst{12-10} = Pn;
  191. let Inst{9-5} = Zn;
  192. let Inst{4} = opc{0}; //S;
  193. let Inst{3} = sme2;
  194. let Constraints = "$ZAda = $_ZAda";
  195. }
  196. multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
  197. SDPatternOperator op> {
  198. def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32,
  199. ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  200. bits<2> ZAda;
  201. let Inst{1-0} = ZAda;
  202. let Inst{2} = 0b0;
  203. }
  204. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
  205. def : Pat<(op timm32_0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
  206. (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
  207. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
  208. }
  209. multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
  210. SDPatternOperator op> {
  211. def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
  212. ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  213. bits<3> ZAda;
  214. let Inst{2-0} = ZAda;
  215. }
  216. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
  217. def : Pat<(op timm32_0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
  218. (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
  219. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
  220. }
  221. class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
  222. : I<(outs TileOp32:$ZAda),
  223. (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
  224. mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
  225. "", []>,
  226. Sched<[]> {
  227. bits<5> Zm;
  228. bits<3> Pm;
  229. bits<3> Pn;
  230. bits<5> Zn;
  231. bits<2> ZAda;
  232. let Inst{31-25} = 0b1000000;
  233. let Inst{24} = !if(opc{2}, 0, 1);
  234. let Inst{23-22} = 0b10;
  235. let Inst{21} = opc{1};
  236. let Inst{20-16} = Zm;
  237. let Inst{15-13} = Pm;
  238. let Inst{12-10} = Pn;
  239. let Inst{9-5} = Zn;
  240. let Inst{4} = opc{0};
  241. let Inst{3} = opc{2};
  242. let Inst{2} = 0b0;
  243. let Inst{1-0} = ZAda;
  244. let Constraints = "$ZAda = $_ZAda";
  245. }
  246. multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
  247. def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  248. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
  249. def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
  250. (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
  251. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
  252. }
  253. multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
  254. def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  255. def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
  256. def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
  257. (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
  258. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
  259. }
  260. //===----------------------------------------------------------------------===//
  261. // SME Add Vector to Tile
  262. //===----------------------------------------------------------------------===//
  263. class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
  264. ZPRRegOp zpr_ty, string mnemonic>
  265. : I<(outs tile_ty:$ZAda),
  266. (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
  267. mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
  268. "", []>, Sched<[]> {
  269. bits<3> Pm;
  270. bits<3> Pn;
  271. bits<5> Zn;
  272. let Inst{31-23} = 0b110000001;
  273. let Inst{22} = op;
  274. let Inst{21-17} = 0b01000;
  275. let Inst{16} = V;
  276. let Inst{15-13} = Pm;
  277. let Inst{12-10} = Pn;
  278. let Inst{9-5} = Zn;
  279. let Inst{4-3} = 0b00;
  280. let Constraints = "$ZAda = $_ZAda";
  281. }
  282. class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
  283. : Pseudo<(outs),
  284. (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
  285. Sched<[]> {
  286. // Translated to the actual instructions in AArch64ISelLowering.cpp
  287. let SMEMatrixType = za_flag;
  288. let usesCustomInserter = 1;
  289. }
  290. multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
  291. def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  292. bits<2> ZAda;
  293. let Inst{2} = 0b0;
  294. let Inst{1-0} = ZAda;
  295. }
  296. def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
  297. def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
  298. (nxv4i32 ZPR32:$zn)),
  299. (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>;
  300. }
  301. multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> {
  302. def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  303. bits<3> ZAda;
  304. let Inst{2-0} = ZAda;
  305. }
  306. def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
  307. let Predicates = [HasSMEI16I64] in {
  308. def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
  309. (nxv2i64 ZPR64:$zn)),
  310. (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>;
  311. }
  312. }
  313. //===----------------------------------------------------------------------===//
  314. // SME Contiguous Loads
  315. //===----------------------------------------------------------------------===//
  316. class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
  317. string mnemonic, string argstr>
  318. : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
  319. bits<5> Rm;
  320. bits<2> Rv;
  321. bits<3> Pg;
  322. bits<5> Rn;
  323. let Inst{31-25} = 0b1110000;
  324. let Inst{24} = Q;
  325. let Inst{23-22} = msz;
  326. let Inst{21} = 0b0;
  327. let Inst{20-16} = Rm;
  328. let Inst{15} = V;
  329. let Inst{14-13} = Rv;
  330. let Inst{12-10} = Pg;
  331. let Inst{9-5} = Rn;
  332. let Inst{4} = 0b0;
  333. let mayLoad = 1;
  334. }
  335. class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
  336. MatrixTileVectorOperand tile_ty, bit is_col,
  337. Operand imm_ty, RegisterOperand gpr_ty>
  338. : sme_mem_ld_ss_base<
  339. Q, is_col, msz, (outs tile_ty:$ZAt),
  340. (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
  341. gpr_ty:$Rm),
  342. mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
  343. multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
  344. MatrixTileVectorOperand tile_ty,
  345. Operand imm_ty, RegisterOperand gpr_ty,
  346. string pg_suffix=""> {
  347. def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
  348. (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
  349. // Default XZR offset aliases
  350. def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
  351. (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
  352. def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
  353. (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
  354. }
  355. multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
  356. string pg_suffix=""> {
  357. defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
  358. !if(is_col, TileVectorOpV8, TileVectorOpH8),
  359. sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
  360. defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
  361. !if(is_col, TileVectorOpV16, TileVectorOpH16),
  362. sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
  363. defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
  364. !if(is_col, TileVectorOpV32, TileVectorOpH32),
  365. sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
  366. defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
  367. !if(is_col, TileVectorOpV64, TileVectorOpH64),
  368. sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
  369. defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
  370. !if(is_col, TileVectorOpV128, TileVectorOpH128),
  371. sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
  372. }
  373. multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
  374. defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
  375. }
  376. multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
  377. Operand tile_ty, Operand offset_ty,
  378. ComplexPattern addr,
  379. ComplexPattern tileslice> {
  380. // base, tileslice
  381. def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
  382. (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
  383. (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
  384. // reg + reg, tileslice
  385. let AddedComplexity = 1 in {
  386. def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
  387. tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
  388. offset_ty:$imm))),
  389. (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
  390. }
  391. }
  392. class sme_load_pseudo
  393. : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
  394. i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
  395. Sched<[]> {
  396. // Translated to the actual instructions in AArch64ISelLowering.cpp
  397. let usesCustomInserter = 1;
  398. let mayLoad = 1;
  399. }
  400. multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
  401. def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
  402. !if(is_col, TileVectorOpV8, TileVectorOpH8),
  403. is_col, sme_elm_idx0_15, GPR64shifted8> {
  404. bits<4> imm;
  405. let Inst{3-0} = imm;
  406. }
  407. def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
  408. !if(is_col, TileVectorOpV16, TileVectorOpH16),
  409. is_col, sme_elm_idx0_7, GPR64shifted16> {
  410. bits<1> ZAt;
  411. bits<3> imm;
  412. let Inst{3} = ZAt;
  413. let Inst{2-0} = imm;
  414. }
  415. def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
  416. !if(is_col, TileVectorOpV32, TileVectorOpH32),
  417. is_col, sme_elm_idx0_3, GPR64shifted32> {
  418. bits<2> ZAt;
  419. bits<2> imm;
  420. let Inst{3-2} = ZAt;
  421. let Inst{1-0} = imm;
  422. }
  423. def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
  424. !if(is_col, TileVectorOpV64, TileVectorOpH64),
  425. is_col, sme_elm_idx0_1, GPR64shifted64> {
  426. bits<3> ZAt;
  427. bits<1> imm;
  428. let Inst{3-1} = ZAt;
  429. let Inst{0} = imm;
  430. }
  431. def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
  432. !if(is_col, TileVectorOpV128, TileVectorOpH128),
  433. is_col, sme_elm_idx0_0, GPR64shifted128> {
  434. bits<4> ZAt;
  435. let Inst{3-0} = ZAt;
  436. }
  437. defm : sme_mem_ld_ss_aliases<NAME, is_col>;
  438. // Pseudo instructions for lowering intrinsics, using immediates instead of
  439. // tile registers.
  440. def _PSEUDO_B : sme_load_pseudo;
  441. def _PSEUDO_H : sme_load_pseudo;
  442. def _PSEUDO_S : sme_load_pseudo;
  443. def _PSEUDO_D : sme_load_pseudo;
  444. def _PSEUDO_Q : sme_load_pseudo;
  445. defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
  446. !if(is_col, int_aarch64_sme_ld1b_vert,
  447. int_aarch64_sme_ld1b_horiz),
  448. sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
  449. tileslice8>;
  450. defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
  451. !if(is_col, int_aarch64_sme_ld1h_vert,
  452. int_aarch64_sme_ld1h_horiz),
  453. timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
  454. tileslice16>;
  455. defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
  456. !if(is_col, int_aarch64_sme_ld1w_vert,
  457. int_aarch64_sme_ld1w_horiz),
  458. timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
  459. tileslice32>;
  460. defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
  461. !if(is_col, int_aarch64_sme_ld1d_vert,
  462. int_aarch64_sme_ld1d_horiz),
  463. timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
  464. tileslice64>;
  465. defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  466. !if(is_col, int_aarch64_sme_ld1q_vert,
  467. int_aarch64_sme_ld1q_horiz),
  468. timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
  469. tileslice128>;
  470. }
  471. multiclass sme_mem_ld_ss<string mnemonic> {
  472. defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
  473. defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
  474. }
  475. //===----------------------------------------------------------------------===//
  476. // SME Contiguous Stores
  477. //===----------------------------------------------------------------------===//
  478. class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
  479. string mnemonic, string argstr>
  480. : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
  481. bits<5> Rm;
  482. bits<2> Rv;
  483. bits<3> Pg;
  484. bits<5> Rn;
  485. let Inst{31-25} = 0b1110000;
  486. let Inst{24} = Q;
  487. let Inst{23-22} = msz;
  488. let Inst{21} = 0b1;
  489. let Inst{20-16} = Rm;
  490. let Inst{15} = V;
  491. let Inst{14-13} = Rv;
  492. let Inst{12-10} = Pg;
  493. let Inst{9-5} = Rn;
  494. let Inst{4} = 0b0;
  495. let mayStore = 1;
  496. let hasSideEffects = 1;
  497. }
  498. class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
  499. MatrixTileVectorOperand tile_ty, bit is_col,
  500. Operand imm_ty, RegisterOperand gpr_ty>
  501. : sme_mem_st_ss_base<
  502. Q, is_col, msz,
  503. (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
  504. GPR64sp:$Rn, gpr_ty:$Rm),
  505. mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
  506. multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
  507. defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
  508. }
  509. multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
  510. Operand offset_ty,
  511. ComplexPattern imm2tile,
  512. ComplexPattern addr,
  513. ComplexPattern tileslice> {
  514. // base, tileslice
  515. def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
  516. (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
  517. (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
  518. // reg + reg, tileslice
  519. let AddedComplexity = 1 in {
  520. def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
  521. (imm2tile untyped:$tile),
  522. (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
  523. (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
  524. }
  525. }
  526. multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
  527. def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
  528. !if(is_col, TileVectorOpV8, TileVectorOpH8),
  529. is_col, sme_elm_idx0_15, GPR64shifted8> {
  530. bits<4> imm;
  531. let Inst{3-0} = imm;
  532. }
  533. def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
  534. !if(is_col, TileVectorOpV16, TileVectorOpH16),
  535. is_col, sme_elm_idx0_7, GPR64shifted16> {
  536. bits<1> ZAt;
  537. bits<3> imm;
  538. let Inst{3} = ZAt;
  539. let Inst{2-0} = imm;
  540. }
  541. def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
  542. !if(is_col, TileVectorOpV32, TileVectorOpH32),
  543. is_col, sme_elm_idx0_3, GPR64shifted32> {
  544. bits<2> ZAt;
  545. bits<2> imm;
  546. let Inst{3-2} = ZAt;
  547. let Inst{1-0} = imm;
  548. }
  549. def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
  550. !if(is_col, TileVectorOpV64, TileVectorOpH64),
  551. is_col, sme_elm_idx0_1, GPR64shifted64> {
  552. bits<3> ZAt;
  553. bits<1> imm;
  554. let Inst{3-1} = ZAt;
  555. let Inst{0} = imm;
  556. }
  557. def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
  558. !if(is_col, TileVectorOpV128, TileVectorOpH128),
  559. is_col, sme_elm_idx0_0, GPR64shifted128> {
  560. bits<4> ZAt;
  561. let Inst{3-0} = ZAt;
  562. }
  563. defm : sme_mem_st_ss_aliases<NAME, is_col>;
  564. defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
  565. !if(is_col, int_aarch64_sme_st1b_vert,
  566. int_aarch64_sme_st1b_horiz),
  567. timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
  568. tileslice8>;
  569. defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
  570. !if(is_col, int_aarch64_sme_st1h_vert,
  571. int_aarch64_sme_st1h_horiz),
  572. timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
  573. tileslice16>;
  574. defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
  575. !if(is_col, int_aarch64_sme_st1w_vert,
  576. int_aarch64_sme_st1w_horiz),
  577. timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
  578. tileslice32>;
  579. defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
  580. !if(is_col, int_aarch64_sme_st1d_vert,
  581. int_aarch64_sme_st1d_horiz),
  582. timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
  583. tileslice64>;
  584. defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
  585. !if(is_col, int_aarch64_sme_st1q_vert,
  586. int_aarch64_sme_st1q_horiz),
  587. sme_elm_idx0_0, imm_to_tile128,
  588. am_sve_regreg_lsl4, tileslice128>;
  589. }
  590. multiclass sme_mem_st_ss<string mnemonic> {
  591. defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
  592. defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
  593. }
  594. //===----------------------------------------------------------------------===//
  595. // SME Save and Restore Array
  596. //===----------------------------------------------------------------------===//
  597. class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
  598. : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
  599. []>,
  600. Sched<[]> {
  601. bits<2> Rv;
  602. bits<5> Rn;
  603. bits<4> imm4;
  604. let Inst{31-22} = 0b1110000100;
  605. let Inst{21} = isStore;
  606. let Inst{20-15} = 0b000000;
  607. let Inst{14-13} = Rv;
  608. let Inst{12-10} = 0b000;
  609. let Inst{9-5} = Rn;
  610. let Inst{4} = 0b0;
  611. let Inst{3-0} = imm4;
  612. }
  613. let mayStore = 1 in
  614. class sme_spill_inst<string opcodestr>
  615. : sme_spill_fill_base<0b1, (outs),
  616. (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
  617. sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
  618. imm0_15:$offset),
  619. opcodestr>;
  620. let mayLoad = 1 in
  621. class sme_fill_inst<string opcodestr>
  622. : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
  623. (ins MatrixIndexGPR32Op12_15:$Rv,
  624. sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
  625. imm0_15:$offset),
  626. opcodestr>;
  627. multiclass sme_spill<string opcodestr> {
  628. def NAME : sme_spill_inst<opcodestr>;
  629. def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
  630. (!cast<Instruction>(NAME) MatrixOp:$ZAt,
  631. MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
  632. // base
  633. def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
  634. (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
  635. // scalar + immediate (mul vl)
  636. let AddedComplexity = 2 in {
  637. def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx,
  638. (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
  639. (!cast<Instruction>(NAME) ZA, $idx, 0, $base, $imm4)>;
  640. }
  641. }
  642. multiclass sme_fill<string opcodestr> {
  643. def NAME : sme_fill_inst<opcodestr>;
  644. def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
  645. (!cast<Instruction>(NAME) MatrixOp:$ZAt,
  646. MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
  647. def NAME # _PSEUDO
  648. : Pseudo<(outs),
  649. (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
  650. GPR64sp:$base), []>,
  651. Sched<[]> {
  652. // Translated to actual instruction in AArch64ISelLowering.cpp
  653. let usesCustomInserter = 1;
  654. let mayLoad = 1;
  655. }
  656. // base
  657. def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
  658. (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
  659. // scalar + immediate (mul vl)
  660. let AddedComplexity = 2 in {
  661. def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx,
  662. (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
  663. (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm4, $base)>;
  664. }
  665. }
  666. //===----------------------------------------------------------------------===//
  667. // Move instructions
  668. //===----------------------------------------------------------------------===//
  669. class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
  670. string mnemonic, string argstr>
  671. : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
  672. bits<2> Rv;
  673. bits<3> Pg;
  674. bits<5> Zn;
  675. let Inst{31-24} = 0b11000000;
  676. let Inst{23-22} = sz;
  677. let Inst{21-17} = 0b00000;
  678. let Inst{16} = Q;
  679. let Inst{15} = V;
  680. let Inst{14-13} = Rv;
  681. let Inst{12-10} = Pg;
  682. let Inst{9-5} = Zn;
  683. let Inst{4} = 0b0;
  684. }
  685. class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
  686. bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
  687. string mnemonic>
  688. : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
  689. (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
  690. mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
  691. let Constraints = "$ZAd = $_ZAd";
  692. }
  693. multiclass sme_vector_to_tile_aliases<Instruction inst,
  694. MatrixTileVectorOperand tile_ty,
  695. ZPRRegOp zpr_ty, Operand imm_ty> {
  696. def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
  697. (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
  698. }
  699. multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
  700. ValueType ppr_vt, Operand imm_ty,
  701. Operand offset_ty,
  702. SDPatternOperator op,
  703. ComplexPattern tileslice> {
  704. def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx,
  705. (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
  706. (inst imm_ty:$tile, $idx, 0, $pg, $zn)>;
  707. let AddedComplexity = 1 in {
  708. def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
  709. offset_ty:$imm)),
  710. (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
  711. (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
  712. }
  713. }
  714. class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
  715. : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
  716. i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
  717. Sched<[]> {
  718. // Translated to the actual instructions in AArch64ISelLowering.cpp
  719. let SMEMatrixType = za_flag;
  720. let usesCustomInserter = 1;
  721. }
  722. multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
  723. def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
  724. TileVectorOpH8),
  725. is_col, sme_elm_idx0_15, ZPR8, mnemonic>,
  726. SMEPseudo2Instr<NAME # _B, 1> {
  727. bits<4> imm;
  728. let Inst{3-0} = imm;
  729. }
  730. def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
  731. TileVectorOpH16),
  732. is_col, sme_elm_idx0_7, ZPR16, mnemonic>,
  733. SMEPseudo2Instr<NAME # _H, 1> {
  734. bits<1> ZAd;
  735. bits<3> imm;
  736. let Inst{3} = ZAd;
  737. let Inst{2-0} = imm;
  738. }
  739. def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
  740. TileVectorOpH32),
  741. is_col, sme_elm_idx0_3, ZPR32, mnemonic>,
  742. SMEPseudo2Instr<NAME # _S, 1> {
  743. bits<2> ZAd;
  744. bits<2> imm;
  745. let Inst{3-2} = ZAd;
  746. let Inst{1-0} = imm;
  747. }
  748. def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
  749. TileVectorOpH64),
  750. is_col, sme_elm_idx0_1, ZPR64, mnemonic>,
  751. SMEPseudo2Instr<NAME # _D, 1> {
  752. bits<3> ZAd;
  753. bits<1> imm;
  754. let Inst{3-1} = ZAd;
  755. let Inst{0} = imm;
  756. }
  757. def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
  758. TileVectorOpH128),
  759. is_col, sme_elm_idx0_0, ZPR128, mnemonic>,
  760. SMEPseudo2Instr<NAME # _Q, 1> {
  761. bits<4> ZAd;
  762. bits<1> imm;
  763. let Inst{3-0} = ZAd;
  764. }
  765. // Pseudo instructions for lowering intrinsics, using immediates instead of
  766. // tile registers.
  767. def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>;
  768. def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>;
  769. def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>;
  770. def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>;
  771. def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>;
  772. defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
  773. !if(is_col, TileVectorOpV8,
  774. TileVectorOpH8),
  775. ZPR8, sme_elm_idx0_15>;
  776. defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
  777. !if(is_col, TileVectorOpV16,
  778. TileVectorOpH16),
  779. ZPR16, sme_elm_idx0_7>;
  780. defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
  781. !if(is_col, TileVectorOpV32,
  782. TileVectorOpH32),
  783. ZPR32, sme_elm_idx0_3>;
  784. defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
  785. !if(is_col, TileVectorOpV64,
  786. TileVectorOpH64),
  787. ZPR64, sme_elm_idx0_1>;
  788. defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
  789. !if(is_col, TileVectorOpV128,
  790. TileVectorOpH128),
  791. ZPR128, sme_elm_idx0_0>;
  792. defvar op = !if(is_col, int_aarch64_sme_write_vert,
  793. int_aarch64_sme_write_horiz);
  794. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
  795. nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
  796. op, tileslice8>;
  797. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
  798. nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
  799. op, tileslice16>;
  800. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
  801. nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
  802. op, tileslice16>;
  803. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
  804. nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
  805. op, tileslice16>;
  806. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
  807. nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
  808. op, tileslice32>;
  809. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
  810. nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
  811. op, tileslice32>;
  812. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
  813. nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
  814. op, tileslice64>;
  815. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
  816. nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
  817. op, tileslice64>;
  818. defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
  819. int_aarch64_sme_writeq_horiz);
  820. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  821. nxv16i8, nxv16i1, sme_elm_idx0_15,
  822. sme_elm_idx0_0, opq, tileslice128>;
  823. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  824. nxv8i16, nxv8i1, sme_elm_idx0_15,
  825. sme_elm_idx0_0, opq, tileslice128>;
  826. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  827. nxv8f16, nxv8i1, sme_elm_idx0_15,
  828. sme_elm_idx0_0, opq, tileslice128>;
  829. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  830. nxv8bf16, nxv8i1, sme_elm_idx0_15,
  831. sme_elm_idx0_0, opq, tileslice128>;
  832. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  833. nxv4i32, nxv4i1, sme_elm_idx0_15,
  834. sme_elm_idx0_0, opq, tileslice128>;
  835. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  836. nxv4f32, nxv4i1, sme_elm_idx0_15,
  837. sme_elm_idx0_0, opq, tileslice128>;
  838. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  839. nxv2i64, nxv2i1, sme_elm_idx0_15,
  840. sme_elm_idx0_0, opq, tileslice128>;
  841. defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
  842. nxv2f64, nxv2i1, sme_elm_idx0_15,
  843. sme_elm_idx0_0, opq, tileslice128>;
  844. }
  845. multiclass sme_vector_to_tile<string mnemonic> {
  846. defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
  847. defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
  848. }
  849. class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
  850. string mnemonic, string argstr>
  851. : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
  852. bits<2> Rv;
  853. bits<3> Pg;
  854. bits<5> Zd;
  855. let Inst{31-24} = 0b11000000;
  856. let Inst{23-22} = sz;
  857. let Inst{21-17} = 0b00001;
  858. let Inst{16} = Q;
  859. let Inst{15} = V;
  860. let Inst{14-13} = Rv;
  861. let Inst{12-10} = Pg;
  862. let Inst{9} = 0b0;
  863. let Inst{4-0} = Zd;
  864. }
  865. class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
  866. MatrixTileVectorOperand tile_ty,
  867. bit is_col, Operand imm_ty, string mnemonic>
  868. : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
  869. (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
  870. mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
  871. let Constraints = "$Zd = $_Zd";
  872. }
  873. multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
  874. MatrixTileVectorOperand tile_ty,
  875. Operand imm_ty > {
  876. def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
  877. (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
  878. }
  879. multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
  880. ValueType ppr_vt, Operand offset_ty,
  881. ComplexPattern imm2tile,
  882. ComplexPattern tileslice,
  883. SDPatternOperator op> {
  884. def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
  885. (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
  886. (inst $passthru, $pg, $tile, $idx, 0)>;
  887. let AddedComplexity = 1 in {
  888. def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
  889. (imm2tile untyped:$tile),
  890. (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
  891. offset_ty:$imm)))),
  892. (inst $passthru, $pg, $tile, $idx, $imm)>;
  893. }
  894. }
  895. multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
  896. def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
  897. TileVectorOpH8),
  898. is_col, sme_elm_idx0_15, mnemonic> {
  899. bits<4> imm;
  900. let Inst{8-5} = imm;
  901. }
  902. def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
  903. TileVectorOpH16),
  904. is_col, sme_elm_idx0_7, mnemonic> {
  905. bits<1> ZAn;
  906. bits<3> imm;
  907. let Inst{8} = ZAn;
  908. let Inst{7-5} = imm;
  909. }
  910. def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
  911. TileVectorOpH32),
  912. is_col, sme_elm_idx0_3, mnemonic> {
  913. bits<2> ZAn;
  914. bits<2> imm;
  915. let Inst{8-7} = ZAn;
  916. let Inst{6-5} = imm;
  917. }
  918. def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
  919. TileVectorOpH64),
  920. is_col, sme_elm_idx0_1, mnemonic> {
  921. bits<3> ZAn;
  922. bits<1> imm;
  923. let Inst{8-6} = ZAn;
  924. let Inst{5} = imm;
  925. }
  926. def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
  927. TileVectorOpH128),
  928. is_col, sme_elm_idx0_0, mnemonic> {
  929. bits<4> ZAn;
  930. let Inst{8-5} = ZAn;
  931. }
  932. defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
  933. !if(is_col, TileVectorOpV8,
  934. TileVectorOpH8), sme_elm_idx0_15>;
  935. defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
  936. !if(is_col, TileVectorOpV16,
  937. TileVectorOpH16), sme_elm_idx0_7>;
  938. defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
  939. !if(is_col, TileVectorOpV32,
  940. TileVectorOpH32), sme_elm_idx0_3>;
  941. defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
  942. !if(is_col, TileVectorOpV64,
  943. TileVectorOpH64), sme_elm_idx0_1>;
  944. defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
  945. !if(is_col, TileVectorOpV128,
  946. TileVectorOpH128), sme_elm_idx0_0>;
  947. defvar op = !if(is_col, int_aarch64_sme_read_vert,
  948. int_aarch64_sme_read_horiz);
  949. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
  950. nxv16i8, nxv16i1, sme_elm_idx0_15,
  951. imm_to_tile8, tileslice8, op>;
  952. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
  953. nxv8i16, nxv8i1, sme_elm_idx0_7,
  954. imm_to_tile16, tileslice16, op>;
  955. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
  956. nxv8f16, nxv8i1, sme_elm_idx0_7,
  957. imm_to_tile16, tileslice16, op>;
  958. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
  959. nxv8bf16, nxv8i1, sme_elm_idx0_7,
  960. imm_to_tile16, tileslice16, op>;
  961. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
  962. nxv4i32, nxv4i1, sme_elm_idx0_3,
  963. imm_to_tile32, tileslice32, op>;
  964. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
  965. nxv4f32, nxv4i1, sme_elm_idx0_3,
  966. imm_to_tile32, tileslice32, op>;
  967. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
  968. nxv2i64, nxv2i1, sme_elm_idx0_1,
  969. imm_to_tile64, tileslice64, op>;
  970. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
  971. nxv2f64, nxv2i1, sme_elm_idx0_1,
  972. imm_to_tile64, tileslice64, op>;
  973. defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
  974. int_aarch64_sme_readq_horiz);
  975. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  976. nxv16i8, nxv16i1, sme_elm_idx0_0,
  977. imm_to_tile128, tileslice128, opq>;
  978. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  979. nxv8i16, nxv8i1, sme_elm_idx0_0,
  980. imm_to_tile128, tileslice128, opq>;
  981. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  982. nxv8f16, nxv8i1, sme_elm_idx0_0,
  983. imm_to_tile128, tileslice128, opq>;
  984. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  985. nxv8bf16, nxv8i1, sme_elm_idx0_0,
  986. imm_to_tile128, tileslice128, opq>;
  987. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  988. nxv4i32, nxv4i1, sme_elm_idx0_0,
  989. imm_to_tile128, tileslice128, opq>;
  990. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  991. nxv4f32, nxv4i1, sme_elm_idx0_0,
  992. imm_to_tile128, tileslice128, opq>;
  993. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  994. nxv2i64, nxv2i1, sme_elm_idx0_0,
  995. imm_to_tile128, tileslice128, opq>;
  996. defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
  997. nxv2f64, nxv2i1, sme_elm_idx0_0,
  998. imm_to_tile128, tileslice128, opq>;
  999. }
  1000. multiclass sme_tile_to_vector<string mnemonic> {
  1001. defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
  1002. defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
  1003. }
  1004. //===----------------------------------------------------------------------===//
  1005. // SME Zero
  1006. //===----------------------------------------------------------------------===//
  1007. // NOTE: This definition isn't really correct because there are outputs, i.e.
  1008. // the tile registers being zeroed. We fix this up in a custom inserter that
  1009. // marks the appropriate registers as being implicitly defined.
  1010. class sme_zero_inst<string mnemonic>
  1011. : I<(outs), (ins MatrixTileList:$imm),
  1012. mnemonic, "\t$imm", "", []>, Sched<[]> {
  1013. bits<8> imm;
  1014. let Inst{31-8} = 0b110000000000100000000000;
  1015. let Inst{7-0} = imm;
  1016. }
  1017. multiclass sme_zero<string mnemonic> {
  1018. def NAME : sme_zero_inst<mnemonic>;
  1019. def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
  1020. def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
  1021. def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
  1022. def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
  1023. def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
  1024. def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
  1025. def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
  1026. def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
  1027. def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
  1028. def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
  1029. def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
  1030. def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
  1031. def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
  1032. def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
  1033. def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
  1034. def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
  1035. Sched<[]> {
  1036. // Translated to the actual instructions in AArch64ISelLowering.cpp
  1037. let usesCustomInserter = 1;
  1038. }
  1039. def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
  1040. (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
  1041. }
  1042. //===----------------------------------------------------------------------===//
  1043. // SVE2 Instructions
  1044. //===----------------------------------------------------------------------===//
  1045. class sve2_int_perm_revd<string asm>
  1046. : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
  1047. asm, "\t$Zd, $Pg/m, $Zn", "", []>,
  1048. Sched<[]> {
  1049. bits<5> Zd;
  1050. bits<3> Pg;
  1051. bits<5> Zn;
  1052. let Inst{31-24} = 0b00000101;
  1053. let Inst{23-22} = 0b00; // size
  1054. let Inst{21-13} = 0b101110100;
  1055. let Inst{12-10} = Pg;
  1056. let Inst{9-5} = Zn;
  1057. let Inst{4-0} = Zd;
  1058. let Constraints = "$Zd = $_Zd";
  1059. let DestructiveInstType = DestructiveUnary;
  1060. let ElementSize = ZPR128.ElementSize;
  1061. }
  1062. multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
  1063. def NAME : sve2_int_perm_revd<asm>;
  1064. def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
  1065. def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
  1066. def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
  1067. def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
  1068. }
  1069. class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
  1070. : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
  1071. asm, "\t$Zd, $Zn, $Zm", "", []>,
  1072. Sched<[]> {
  1073. bits<5> Zm;
  1074. bits<5> Zn;
  1075. bits<5> Zd;
  1076. let Inst{31-24} = 0b01000100;
  1077. let Inst{23-22} = sz;
  1078. let Inst{21} = 0b0;
  1079. let Inst{20-16} = Zm;
  1080. let Inst{15-11} = 0b11000;
  1081. let Inst{10} = U;
  1082. let Inst{9-5} = Zn;
  1083. let Inst{4-0} = Zd;
  1084. let Constraints = "$Zd = $_Zd";
  1085. let DestructiveInstType = DestructiveOther;
  1086. let ElementSize = zpr_ty.ElementSize;
  1087. }
  1088. multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
  1089. def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
  1090. def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
  1091. def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
  1092. def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
  1093. def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
  1094. def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
  1095. def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
  1096. def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
  1097. }
  1098. class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
  1099. : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
  1100. MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
  1101. asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
  1102. Sched<[]> {
  1103. bits<2> Rv;
  1104. bits<4> Pn;
  1105. bits<4> Pm;
  1106. bits<4> Pd;
  1107. let Inst{31-24} = 0b00100101;
  1108. let Inst{21} = 0b1;
  1109. let Inst{17-16} = Rv;
  1110. let Inst{15-14} = 0b01;
  1111. let Inst{13-10} = Pn;
  1112. let Inst{9} = 0b0;
  1113. let Inst{8-5} = Pm;
  1114. let Inst{4} = 0b0;
  1115. let Inst{3-0} = Pd;
  1116. }
  1117. multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
  1118. def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
  1119. bits<4> imm;
  1120. let Inst{23-22} = imm{3-2};
  1121. let Inst{20-19} = imm{1-0};
  1122. let Inst{18} = 0b1;
  1123. }
  1124. def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
  1125. bits<3> imm;
  1126. let Inst{23-22} = imm{2-1};
  1127. let Inst{20} = imm{0};
  1128. let Inst{19-18} = 0b10;
  1129. }
  1130. def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
  1131. bits<2> imm;
  1132. let Inst{23-22} = imm{1-0};
  1133. let Inst{20-18} = 0b100;
  1134. }
  1135. def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
  1136. bits<1> imm;
  1137. let Inst{23} = imm;
  1138. let Inst{22} = 0b1;
  1139. let Inst{20-18} = 0b000;
  1140. }
  1141. def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
  1142. (!cast<Instruction>(NAME # _B) PNRAny:$Pd,
  1143. PNRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>;
  1144. def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
  1145. (!cast<Instruction>(NAME # _H) PNRAny:$Pd,
  1146. PNRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;
  1147. def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
  1148. (!cast<Instruction>(NAME # _S) PNRAny:$Pd,
  1149. PNRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;
  1150. def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
  1151. (!cast<Instruction>(NAME # _D) PNRAny:$Pd,
  1152. PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;
  1153. def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
  1154. MatrixIndexGPR32Op12_15:$idx)),
  1155. (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
  1156. def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
  1157. MatrixIndexGPR32Op12_15:$idx)),
  1158. (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
  1159. def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
  1160. MatrixIndexGPR32Op12_15:$idx)),
  1161. (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
  1162. def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
  1163. MatrixIndexGPR32Op12_15:$idx)),
  1164. (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
  1165. let AddedComplexity = 1 in {
  1166. def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
  1167. (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
  1168. (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
  1169. def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
  1170. (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
  1171. (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
  1172. def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
  1173. (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
  1174. (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
  1175. def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
  1176. (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
  1177. (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
  1178. }
  1179. }
  1180. //===----------------------------------------------------------------------===//
  1181. // SME2 Instructions
  1182. //===----------------------------------------------------------------------===//
  1183. //===----------------------------------------------------------------------===//
  1184. // SME2 single-multi ternary int/fp, two/four registers
  1185. class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op,
  1186. MatrixOperand matrix_ty,
  1187. RegisterOperand multi_vector_ty,
  1188. ZPRRegOp zpr_ty,
  1189. string mnemonic>
  1190. : I<(outs matrix_ty:$ZAd),
  1191. (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
  1192. sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
  1193. mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm",
  1194. "", []> , Sched<[]> {
  1195. bits<4> Zm;
  1196. bits<5> Zn;
  1197. bits<2> Rv;
  1198. bits<3> imm3;
  1199. let Inst{31-23} = 0b110000010;
  1200. let Inst{22} = op{6}; //sz
  1201. let Inst{21} = 0b1;
  1202. let Inst{20} = op{5}; //vgx4
  1203. let Inst{19-16} = Zm;
  1204. let Inst{15} = 0b0;
  1205. let Inst{14-13} = Rv;
  1206. let Inst{12-10} = op{4-2};
  1207. let Inst{9-5} = Zn;
  1208. let Inst{4-3} = op{1-0};
  1209. let Inst{2-0} = imm3;
  1210. let Constraints = "$ZAd = $_ZAd";
  1211. }
  1212. multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op,
  1213. MatrixOperand matrix_ty,
  1214. RegisterOperand multi_vector_ty,
  1215. ZPRRegOp zpr_ty>{
  1216. def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  1217. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
  1218. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
  1219. }
  1220. multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op,
  1221. MatrixOperand matrix_ty,
  1222. RegisterOperand multi_vector_ty,
  1223. ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
  1224. def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  1225. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
  1226. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
  1227. def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
  1228. def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
  1229. }
  1230. multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
  1231. MatrixOperand matrix_ty,
  1232. RegisterOperand multi_vector_ty,
  1233. ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
  1234. def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  1235. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
  1236. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
  1237. def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
  1238. def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
  1239. }
  1240. //===----------------------------------------------------------------------===//
  1241. // SME2 multiple vectors ternary INT/FP two and four registers
  1242. class sme2_dot_mla_add_sub_array_vg2_multi<bits<6> op,
  1243. MatrixOperand matrix_ty,
  1244. RegisterOperand multi_vector_ty,
  1245. string mnemonic>
  1246. : I<(outs matrix_ty:$ZAd),
  1247. (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
  1248. sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
  1249. mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm",
  1250. "", []>, Sched<[]>{
  1251. bits<4> Zm;
  1252. bits<4> Zn;
  1253. bits<2> Rv;
  1254. bits<3> imm3;
  1255. let Inst{31-23} = 0b110000011;
  1256. let Inst{22} = op{5}; //sz
  1257. let Inst{21} = 0b1;
  1258. let Inst{20-17} = Zm;
  1259. let Inst{16-15} = 0b00;
  1260. let Inst{14-13} = Rv;
  1261. let Inst{12-10} = op{4-2};
  1262. let Inst{9-6} = Zn;
  1263. let Inst{5} = 0b0;
  1264. let Inst{4-3} = op{1-0};
  1265. let Inst{2-0} = imm3;
  1266. let Constraints = "$ZAd = $_ZAd";
  1267. }
  1268. multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<6> op,
  1269. MatrixOperand matrix_ty,
  1270. RegisterOperand multi_vector_ty, ValueType zpr_ty,
  1271. SDPatternOperator intrinsic> {
  1272. def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  1273. def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
  1274. def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
  1275. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
  1276. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
  1277. }
  1278. class sme2_dot_mla_add_sub_array_vg4_multi<bits<6> op,
  1279. MatrixOperand matrix_ty,
  1280. RegisterOperand multi_vector_ty,
  1281. string mnemonic>
  1282. : I<(outs matrix_ty:$ZAd),
  1283. (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
  1284. sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
  1285. mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm",
  1286. "", []>, Sched<[]>{
  1287. bits<3> Zm;
  1288. bits<3> Zn;
  1289. bits<2> Rv;
  1290. bits<3> imm3;
  1291. let Inst{31-23} = 0b110000011;
  1292. let Inst{22} = op{5}; //sz
  1293. let Inst{21} = 0b1;
  1294. let Inst{20-18} = Zm;
  1295. let Inst{17-15} = 0b010;
  1296. let Inst{14-13} = Rv;
  1297. let Inst{12-10} = op{4-2};
  1298. let Inst{9-7} = Zn;
  1299. let Inst{6-5} = 0b00;
  1300. let Inst{4-3} = op{1-0};
  1301. let Inst{2-0} = imm3;
  1302. let Constraints = "$ZAd = $_ZAd";
  1303. }
  1304. multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<6> op,
  1305. MatrixOperand matrix_ty,
  1306. RegisterOperand multi_vector_ty,
  1307. ValueType zpr_ty, SDPatternOperator intrinsic>{
  1308. def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
  1309. def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
  1310. def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
  1311. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
  1312. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
  1313. }
  1314. //===----------------------------------------------------------------------===//
  1315. // SME2 multiple vectors binary two or four registers
  1316. class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op,
  1317. MatrixOperand matrix_ty,
  1318. RegisterOperand vector_ty>
  1319. : I<(outs matrix_ty:$ZAdn),
  1320. (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm),
  1321. mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm",
  1322. "", []>, Sched<[]> {
  1323. bits<2> Rv;
  1324. bits<3> imm3;
  1325. let Inst{31-23} = 0b110000011;
  1326. let Inst{22} = sz;
  1327. let Inst{21-19} = 0b100;
  1328. let Inst{18} = op{2};
  1329. let Inst{17} = 0b0;
  1330. let Inst{16} = vg4;
  1331. let Inst{15} = 0b0;
  1332. let Inst{14-13} = Rv;
  1333. let Inst{12-10} = 0b111;
  1334. let Inst{5} = 0b0;
  1335. let Inst{4-3} = op{1-0};
  1336. let Inst{2-0} = imm3;
  1337. let Constraints = "$ZAdn = $_ZAdn";
  1338. }
  1339. class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
  1340. MatrixOperand matrix_ty,
  1341. RegisterOperand vector_ty>
  1342. : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> {
  1343. bits<4> Zm;
  1344. let Inst{9-6} = Zm;
  1345. }
  1346. multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
  1347. MatrixOperand matrix_ty,
  1348. RegisterOperand vector_ty> {
  1349. def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>;
  1350. def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
  1351. (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
  1352. }
  1353. class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
  1354. MatrixOperand matrix_ty,
  1355. RegisterOperand vector_ty>
  1356. : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> {
  1357. bits<3> Zm;
  1358. let Inst{9-7} = Zm;
  1359. let Inst{6} = 0b0;
  1360. }
  1361. multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
  1362. MatrixOperand matrix_ty,
  1363. RegisterOperand vector_ty> {
  1364. def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>;
  1365. def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
  1366. (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
  1367. }
  1368. //===----------------------------------------------------------------------===//
  1369. // SME2 Multi-vector - Multiple and Single SVE Destructive
  1370. // Two and Four registers
  1371. class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op,
  1372. RegisterOperand vector_ty,
  1373. ZPRRegOp zpr_ty,
  1374. string mnemonic>
  1375. : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
  1376. mnemonic, "\t$Zdn, $_Zdn, $Zm",
  1377. "", []>, Sched<[]> {
  1378. bits<4> Zm;
  1379. bits<4> Zdn;
  1380. let Inst{31-24} = 0b11000001;
  1381. let Inst{23-22} = sz;
  1382. let Inst{21-20} = 0b10;
  1383. let Inst{19-16} = Zm;
  1384. let Inst{15-11} = 0b10100;
  1385. let Inst{10-5} = op{6-1};
  1386. let Inst{4-1} = Zdn;
  1387. let Inst{0} = op{0};
  1388. let Constraints = "$Zdn = $_Zdn";
  1389. }
  1390. multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
  1391. def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
  1392. def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
  1393. def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
  1394. }
  1395. multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
  1396. def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>;
  1397. def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
  1398. def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
  1399. def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
  1400. }
  1401. // SME2.1 fmax/fmin instructions.
  1402. multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> {
  1403. def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r,
  1404. ZPR4b16, mnemonic>;
  1405. }
  1406. class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op,
  1407. RegisterOperand vector_ty,
  1408. ZPRRegOp zpr_ty,
  1409. string mnemonic>
  1410. : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
  1411. mnemonic, "\t$Zdn, $_Zdn, $Zm",
  1412. "", []>, Sched<[]> {
  1413. bits<4> Zm;
  1414. bits<3> Zdn;
  1415. let Inst{31-24} = 0b11000001;
  1416. let Inst{23-22} = sz;
  1417. let Inst{21-20} = 0b10;
  1418. let Inst{19-16} = Zm;
  1419. let Inst{15-11} = 0b10101;
  1420. let Inst{10-5} = op{6-1};
  1421. let Inst{4-2} = Zdn;
  1422. let Inst{1} = 0b0;
  1423. let Inst{0} = op{0};
  1424. let Constraints = "$Zdn = $_Zdn";
  1425. }
  1426. multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
  1427. def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
  1428. def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
  1429. def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
  1430. }
  1431. multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
  1432. def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>;
  1433. def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
  1434. def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
  1435. def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
  1436. }
  1437. // SME2.1 fmax/fmin instructions.
  1438. multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> {
  1439. def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r,
  1440. ZPR4b16, mnemonic>;
  1441. }
  1442. class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op,
  1443. RegisterOperand vector_ty,
  1444. string mnemonic>
  1445. : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
  1446. mnemonic, "\t$Zdn, $_Zdn, $Zm",
  1447. "", []>, Sched<[]> {
  1448. bits<4> Zm;
  1449. bits<4> Zdn;
  1450. let Inst{31-24} = 0b11000001;
  1451. let Inst{23-22} = sz;
  1452. let Inst{21} = 0b1;
  1453. let Inst{20-17} = Zm;
  1454. let Inst{16-11} = 0b010110;
  1455. let Inst{10-5} = op{6-1};
  1456. let Inst{4-1} = Zdn;
  1457. let Inst{0} = op{0};
  1458. let Constraints = "$Zdn = $_Zdn";
  1459. }
  1460. multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
  1461. def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
  1462. def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
  1463. def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
  1464. }
  1465. multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
  1466. def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>;
  1467. def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
  1468. def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
  1469. def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
  1470. }
  1471. // SME2.1 fmax/fmin instructions.
  1472. multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> {
  1473. def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r,
  1474. mnemonic>;
  1475. }
  1476. class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op,
  1477. RegisterOperand vector_ty,
  1478. string mnemonic>
  1479. : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
  1480. mnemonic, "\t$Zdn, $_Zdn, $Zm",
  1481. "", []>, Sched<[]> {
  1482. bits<3> Zm;
  1483. bits<3> Zdn;
  1484. let Inst{31-24} = 0b11000001;
  1485. let Inst{23-22} = sz;
  1486. let Inst{21} = 0b1;
  1487. let Inst{20-18} = Zm;
  1488. let Inst{17-11} = 0b0010111;
  1489. let Inst{10-5} = op{6-1};
  1490. let Inst{4-2} = Zdn;
  1491. let Inst{1} = 0b0;
  1492. let Inst{0} = op{0};
  1493. let Constraints = "$Zdn = $_Zdn";
  1494. }
  1495. multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
  1496. def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
  1497. def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
  1498. def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
  1499. }
  1500. multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
  1501. def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>;
  1502. def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
  1503. def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
  1504. def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
  1505. }
  1506. // SME2.1 fmax/fmin instructions.
  1507. multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> {
  1508. def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r,
  1509. mnemonic>;
  1510. }
  1511. //===----------------------------------------------------------------------===//
  1512. // SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources
  1513. class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
  1514. RegisterOperand multi_vector_ty,
  1515. string mnemonic, string vg_acronym="">
  1516. : I<(outs MatrixOp32:$ZAda),
  1517. (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3),
  1518. mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3",
  1519. "", []>, Sched<[]> {
  1520. bits<4> Zm;
  1521. bits<2> Rv;
  1522. let Inst{31-24} = 0b11000001;
  1523. let Inst{23-22} = op0;
  1524. let Inst{21} = 0b0;
  1525. let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1);
  1526. let Inst{19-16} = Zm;
  1527. let Inst{14-13} = Rv;
  1528. let Inst{12} = 0b1;
  1529. let Inst{4-3} = op;
  1530. let Constraints = "$ZAda = $_ZAda";
  1531. }
  1532. multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1533. def _S : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
  1534. mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
  1535. bits<3> i3;
  1536. bits<5> Zn;
  1537. bits<3> imm;
  1538. let Inst{15} = i3{2};
  1539. let Inst{11-10} = i3{1-0};
  1540. let Inst{9-5} = Zn;
  1541. let Inst{2-0} = imm;
  1542. }
  1543. def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
  1544. def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
  1545. }
  1546. class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
  1547. : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r,
  1548. mnemonic, "vgx2"> {
  1549. bits<3> i3;
  1550. bits<4> Zn;
  1551. bits<2> imm;
  1552. let Inst{15} = 0b0;
  1553. let Inst{11-10} = i3{2-1};
  1554. let Inst{9-6} = Zn;
  1555. let Inst{5} = 0b0;
  1556. let Inst{2} = i3{0};
  1557. let Inst{1-0} = imm;
  1558. }
  1559. multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1560. def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1561. def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
  1562. def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
  1563. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
  1564. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
  1565. }
  1566. multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1567. def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1568. def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
  1569. def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
  1570. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
  1571. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
  1572. }
  1573. class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
  1574. : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r,
  1575. mnemonic, "vgx4"> {
  1576. bits<3> i3;
  1577. bits<3> Zn;
  1578. bits<2> imm;
  1579. let Inst{15} = 0b1;
  1580. let Inst{11-10} = i3{2-1};
  1581. let Inst{9-7} = Zn;
  1582. let Inst{6-5} = 0b00;
  1583. let Inst{2} = i3{0};
  1584. let Inst{1-0} = imm;
  1585. }
  1586. multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1587. def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1588. def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
  1589. def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
  1590. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
  1591. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
  1592. }
  1593. multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1594. def _S : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1595. def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
  1596. def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
  1597. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
  1598. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
  1599. }
  1600. class sme2_mla_long_array<bits<2>op0, bits<2> op, Operand index_ty,
  1601. RegisterOperand first_vector_ty,
  1602. RegisterOperand second_vector_ty,
  1603. string mnemonic, string vg_acronym="">
  1604. : I<(outs MatrixOp32:$ZAda),
  1605. (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
  1606. index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
  1607. mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
  1608. "", []> , Sched<[]> {
  1609. bits<2> Rv;
  1610. let Inst{31-24} = 0b11000001;
  1611. let Inst{23-22} = op0;
  1612. let Inst{21} = 0b1;
  1613. let Inst{15} = 0b0;
  1614. let Inst{14-13} = Rv;
  1615. let Inst{12-11} = 0b01;
  1616. let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0);
  1617. let Inst{4-3} = op;
  1618. let Constraints = "$ZAda = $_ZAda";
  1619. }
  1620. multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1621. def _S : sme2_mla_long_array<op0, op, uimm3s2range, ZPR16, ZPR4b16,
  1622. mnemonic> , SMEPseudo2Instr<NAME # _S, 1>{
  1623. bits<4> Zm;
  1624. bits<5> Zn;
  1625. bits<3> imm;
  1626. let Inst{20} = 0b0;
  1627. let Inst{19-16} = Zm;
  1628. let Inst{9-5} = Zn;
  1629. let Inst{2-0} = imm;
  1630. }
  1631. def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
  1632. def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _S, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
  1633. }
  1634. class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op,
  1635. RegisterOperand first_vector_ty,
  1636. string mnemonic, string vg_acronym>
  1637. : sme2_mla_long_array<op0, op, uimm2s2range, first_vector_ty, ZPR4b16,
  1638. mnemonic, vg_acronym> {
  1639. bits<4> Zm;
  1640. bits<5> Zn;
  1641. bits<2> imm;
  1642. let Inst{20} = vg4;
  1643. let Inst{19-16} = Zm;
  1644. let Inst{9-5} = Zn;
  1645. let Inst{2} = 0b0;
  1646. let Inst{1-0} = imm;
  1647. }
  1648. multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1649. def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic,
  1650. "vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
  1651. def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
  1652. def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
  1653. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1654. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
  1655. }
  1656. multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1657. def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic,
  1658. "vgx2">, SMEPseudo2Instr<NAME # _S, 1>;
  1659. def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
  1660. def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
  1661. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1662. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
  1663. }
  1664. multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1665. def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic,
  1666. "vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
  1667. def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
  1668. def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, tileslicerange2s2>;
  1669. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1670. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
  1671. }
  1672. multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1673. def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic,
  1674. "vgx4">, SMEPseudo2Instr<NAME # _S, 1>;
  1675. def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _S, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
  1676. def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
  1677. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1678. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
  1679. }
  1680. class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<2> op>
  1681. : sme2_mla_long_array<op0, op, uimm2s2range, ZZ_h_mul_r, ZZ_h_mul_r, mnemonic,
  1682. "vgx2"> {
  1683. bits<4> Zm;
  1684. bits<4> Zn;
  1685. bits<2> imm;
  1686. let Inst{20-17} = Zm;
  1687. let Inst{16} = 0b0;
  1688. let Inst{9-6} = Zn;
  1689. let Inst{5} = 0b0;
  1690. let Inst{2} = 0b0;
  1691. let Inst{1-0} = imm;
  1692. }
  1693. multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1694. def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1695. def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
  1696. def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
  1697. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1698. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
  1699. }
  1700. multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1701. def _S : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1702. def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
  1703. def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
  1704. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
  1705. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
  1706. }
  1707. class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<2> op>
  1708. : sme2_mla_long_array<op0, op, uimm2s2range, ZZZZ_h_mul_r, ZZZZ_h_mul_r, mnemonic,
  1709. "vgx4"> {
  1710. bits<3> Zm;
  1711. bits<3> Zn;
  1712. bits<2> imm;
  1713. let Inst{20-18} = Zm;
  1714. let Inst{17} = 0b0;
  1715. let Inst{16} = 0b1;
  1716. let Inst{9-7} = Zn;
  1717. let Inst{6-5} = 0b00;
  1718. let Inst{2} = 0b0;
  1719. let Inst{1-0} = imm;
  1720. }
  1721. multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
  1722. def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1723. def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
  1724. def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
  1725. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  1726. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
  1727. }
  1728. multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
  1729. def _S : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
  1730. def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _S, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
  1731. def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _S, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
  1732. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
  1733. (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
  1734. }
  1735. //===----------------------------------------------------------------------===//
  1736. class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty,
  1737. RegisterOperand second_ty, string mnemonic>
  1738. : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
  1739. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1740. bits<4> Zn;
  1741. bits<4> Zd;
  1742. let Inst{31-24} = 0b11000001;
  1743. let Inst{23-22} = sz;
  1744. let Inst{21-20} = 0b10;
  1745. let Inst{19-16} = op{4-1};
  1746. let Inst{15-10} = 0b111000;
  1747. let Inst{9-6} = Zn;
  1748. let Inst{5} = op{0};
  1749. let Inst{4-1} = Zd;
  1750. let Inst{0} = 0b0;
  1751. }
  1752. // SME2 multi-vec FP to int convert two registers
  1753. // SME2 multi-vec int to FP two registers
  1754. multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> {
  1755. def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
  1756. }
  1757. // SME2 multi-vec FRINT two registers
  1758. multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> {
  1759. def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
  1760. }
  1761. class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty,
  1762. RegisterOperand second_ty, string mnemonic>
  1763. : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
  1764. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1765. bits<3> Zn;
  1766. bits<3> Zd;
  1767. let Inst{31-24} = 0b11000001;
  1768. let Inst{23-22} = sz;
  1769. let Inst{21-20} = 0b11;
  1770. let Inst{19-16} = op{6-3};
  1771. let Inst{15-10} = 0b111000;
  1772. let Inst{9-7} = Zn;
  1773. let Inst{6-5} = op{2-1};
  1774. let Inst{4-2} = Zd;
  1775. let Inst{1} = op{0};
  1776. let Inst{0} = 0b0;
  1777. }
  1778. // SME2 multi-vec FP to int convert four registers
  1779. // SME2 multi-vec int to FP four registers
  1780. multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
  1781. def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
  1782. }
  1783. // SME2 multi-vec quadwords ZIP four registers
  1784. multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> {
  1785. def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r,
  1786. mnemonic>;
  1787. def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r,
  1788. mnemonic>;
  1789. def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
  1790. mnemonic>;
  1791. def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r,
  1792. mnemonic>;
  1793. }
  1794. // SME2 multi-vec quadwords ZIP four registers
  1795. multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> {
  1796. def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r,
  1797. mnemonic>;
  1798. }
  1799. // SME2 multi-vec FRINT four registers
  1800. multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
  1801. def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
  1802. mnemonic>;
  1803. }
  1804. class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
  1805. : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
  1806. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1807. bits<4> Zn;
  1808. bits<5> Zd;
  1809. let Inst{31-23} = 0b110000010;
  1810. let Inst{22} = op{3};
  1811. let Inst{21-18} = 0b1000;
  1812. let Inst{17-16} = op{2-1};
  1813. let Inst{15-10} = 0b111000;
  1814. let Inst{9-6} = Zn;
  1815. let Inst{5} = op{0};
  1816. let Inst{4-0} = Zd;
  1817. }
  1818. // SME2 multi-vec FP down convert two registers
  1819. // SME2 multi-vec int down convert two registers
  1820. multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
  1821. ValueType in_vt, SDPatternOperator intrinsic> {
  1822. def NAME : sme2_cvt_vg2_single<mnemonic, op>;
  1823. def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
  1824. }
  1825. class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
  1826. RegisterOperand second_ty, string mnemonic>
  1827. : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
  1828. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1829. bits<5> Zn;
  1830. bits<4> Zd;
  1831. let Inst{31-24} = 0b11000001;
  1832. let Inst{23-22} = sz;
  1833. let Inst{21-19} = 0b100;
  1834. let Inst{18-16} = op;
  1835. let Inst{15-10} = 0b111000;
  1836. let Inst{9-5} = Zn;
  1837. let Inst{4-1} = Zd;
  1838. let Inst{0} = u;
  1839. }
  1840. // SME2 multi-vec unpack two registers
  1841. multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> {
  1842. def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>;
  1843. def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>;
  1844. def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>;
  1845. }
  1846. // SME2.1 multi-vec convert two registers
  1847. multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
  1848. def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
  1849. }
  1850. class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
  1851. RegisterOperand second_ty, string mnemonic>
  1852. : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
  1853. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1854. bits<3> Zn;
  1855. bits<5> Zd;
  1856. let Inst{31-24} = 0b11000001;
  1857. let Inst{23} = sz;
  1858. let Inst{22} = op{2};
  1859. let Inst{21-10} = 0b110011111000;
  1860. let Inst{9-7} = Zn;
  1861. let Inst{6-5} = op{1-0};
  1862. let Inst{4-0} = Zd;
  1863. }
  1864. // SME2 multi-vec int down convert four registers
  1865. multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
  1866. def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
  1867. def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
  1868. def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
  1869. def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
  1870. }
  1871. class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
  1872. RegisterOperand second_ty, string mnemonic>
  1873. : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
  1874. mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
  1875. bits<4> Zn;
  1876. bits<3> Zd;
  1877. let Inst{31-24} = 0b11000001;
  1878. let Inst{23-22} = sz;
  1879. let Inst{21-10} = 0b110101111000;
  1880. let Inst{9-6} = Zn;
  1881. let Inst{5} = 0b0;
  1882. let Inst{4-2} = Zd;
  1883. let Inst{1} = 0b0;
  1884. let Inst{0} = u;
  1885. }
  1886. // SME2 multi-vec UNPK four registers
  1887. multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> {
  1888. def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>;
  1889. def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>;
  1890. def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>;
  1891. }
  1892. //===----------------------------------------------------------------------===//
  1893. // SME2 multi-vec CLAMP registers
  1894. class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u,
  1895. RegisterOperand multi_vector_ty,
  1896. ZPRRegOp vector_ty, string mnemonic>
  1897. : I<(outs multi_vector_ty:$Zd),
  1898. (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm),
  1899. mnemonic, "\t$Zd, $Zn, $Zm",
  1900. "", []>, Sched<[]>{
  1901. bits<5> Zm;
  1902. bits<5> Zn;
  1903. let Inst{31-24} = 0b11000001;
  1904. let Inst{23-22} = sz;
  1905. let Inst{21} = 0b1;
  1906. let Inst{20-16} = Zm;
  1907. let Inst{15-13} = 0b110;
  1908. let Inst{12-10} = op1;
  1909. let Inst{9-5} = Zn;
  1910. let Inst{0} = u;
  1911. let Constraints = "$Zd = $_Zd";
  1912. }
  1913. class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u,
  1914. RegisterOperand multi_vector_ty,
  1915. ZPRRegOp vector_ty, string mnemonic>
  1916. : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
  1917. mnemonic>{
  1918. bits<4> Zd;
  1919. let Inst{4-1} = Zd;
  1920. }
  1921. multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{
  1922. def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>;
  1923. def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>;
  1924. def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>;
  1925. }
  1926. multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{
  1927. def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>;
  1928. def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>;
  1929. def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>;
  1930. def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>;
  1931. }
  1932. // SME2.1 multi-vec FCLAMP two registers
  1933. multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> {
  1934. def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16,
  1935. mnemonic>;
  1936. }
  1937. class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u,
  1938. RegisterOperand multi_vector_ty,
  1939. ZPRRegOp vector_ty, string mnemonic>
  1940. : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
  1941. mnemonic>{
  1942. bits<3> Zd;
  1943. let Inst{4-2} = Zd;
  1944. let Inst{1} = 0b0;
  1945. }
  1946. multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{
  1947. def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>;
  1948. def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>;
  1949. def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>;
  1950. }
  1951. multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{
  1952. def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>;
  1953. def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>;
  1954. def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>;
  1955. def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>;
  1956. }
  1957. // SME2.1 multi-vec FCLAMP four registers
  1958. multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> {
  1959. def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16,
  1960. mnemonic>;
  1961. }
  1962. // SME2 multi-vec ZIP two registers
  1963. class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u,
  1964. RegisterOperand multi_vector_ty,
  1965. ZPRRegOp vector_ty, string mnemonic>
  1966. : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
  1967. mnemonic, "\t$Zd, $Zn, $Zm",
  1968. "", []>, Sched<[]>{
  1969. bits<4> Zd;
  1970. bits<5> Zm;
  1971. bits<5> Zn;
  1972. let Inst{31-24} = 0b11000001;
  1973. let Inst{23-22} = sz;
  1974. let Inst{21} = 0b1;
  1975. let Inst{20-16} = Zm;
  1976. let Inst{15-11} = 0b11010;
  1977. let Inst{10} = q;
  1978. let Inst{9-5} = Zn;
  1979. let Inst{4-1} = Zd;
  1980. let Inst{0} = u;
  1981. }
  1982. multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
  1983. def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>;
  1984. def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>;
  1985. def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>;
  1986. def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>;
  1987. def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>;
  1988. }
  1989. //===----------------------------------------------------------------------===//
  1990. // SME2 Dot Products and MLA
  1991. class sme2_multi_vec_array_vg2_index<bit sz, bits<6> op, MatrixOperand matrix_ty,
  1992. RegisterOperand multi_vector_ty,
  1993. ZPRRegOp vector_ty, Operand index_ty,
  1994. string mnemonic>
  1995. : I<(outs matrix_ty:$ZAda),
  1996. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  1997. multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
  1998. mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i",
  1999. "", []>, Sched<[]> {
  2000. bits<4> Zm;
  2001. bits<2> Rv;
  2002. bits<4> Zn;
  2003. bits<3> imm3;
  2004. let Inst{31-23} = 0b110000010;
  2005. let Inst{22} = sz;
  2006. let Inst{21-20} = 0b01;
  2007. let Inst{19-16} = Zm;
  2008. let Inst{15} = 0b0;
  2009. let Inst{14-13} = Rv;
  2010. let Inst{12-10} = op{5-3};
  2011. let Inst{9-6} = Zn;
  2012. let Inst{5-3} = op{2-0};
  2013. let Inst{2-0} = imm3;
  2014. let Constraints = "$ZAda = $_ZAda";
  2015. }
  2016. // SME2 multi-vec ternary indexed two registers 32-bit
  2017. multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<4> op,
  2018. RegisterOperand multi_vector_ty,
  2019. ZPRRegOp vector_ty, ValueType vt,
  2020. SDPatternOperator intrinsic> {
  2021. def NAME : sme2_multi_vec_array_vg2_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
  2022. VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  2023. bits<2> i;
  2024. let Inst{11-10} = i;
  2025. }
  2026. def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
  2027. def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
  2028. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
  2029. (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2030. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
  2031. }
  2032. // SME2.1 multi-vec ternary indexed two registers 16-bit
  2033. multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> op> {
  2034. def NAME : sme2_multi_vec_array_vg2_index<0b0, {0b1,?,?,op,?}, MatrixOp16,
  2035. ZZ_h_mul_r, ZPR4b16,
  2036. VectorIndexH, mnemonic> {
  2037. bits<3> i;
  2038. let Inst{11-10} = i{2-1};
  2039. let Inst{3} = i{0};
  2040. }
  2041. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
  2042. (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2043. ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
  2044. }
  2045. // SME2 multi-vec ternary indexed two registers 64-bit
  2046. class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
  2047. RegisterOperand multi_vector_ty,
  2048. ZPRRegOp vector_ty,
  2049. string mnemonic>
  2050. : I<(outs MatrixOp64:$ZAda),
  2051. (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2052. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
  2053. mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1",
  2054. "", []>, Sched<[]> {
  2055. bits<4> Zm;
  2056. bits<2> Rv;
  2057. bits<1> i1;
  2058. bits<4> Zn;
  2059. bits<3> imm3;
  2060. let Inst{31-20} = 0b110000011101;
  2061. let Inst{19-16} = Zm;
  2062. let Inst{15} = 0b0;
  2063. let Inst{14-13} = Rv;
  2064. let Inst{12-11} = 0b00;
  2065. let Inst{10} = i1;
  2066. let Inst{9-6} = Zn;
  2067. let Inst{5} = 0b0;
  2068. let Inst{4-3} = op;
  2069. let Inst{2-0} = imm3;
  2070. let Constraints = "$ZAda = $_ZAda";
  2071. }
  2072. multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op,
  2073. RegisterOperand multi_vector_ty,
  2074. ZPRRegOp vector_ty, ValueType vt,
  2075. SDPatternOperator intrinsic> {
  2076. def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty,
  2077. mnemonic>, SMEPseudo2Instr<NAME, 1>;
  2078. def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
  2079. def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>;
  2080. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
  2081. (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2082. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
  2083. }
  2084. class sme2_multi_vec_array_vg4_index<bit sz, bits<6> op, MatrixOperand matrix_ty,
  2085. RegisterOperand multi_vector_ty,
  2086. ZPRRegOp vector_ty, Operand index_ty,
  2087. string mnemonic>
  2088. : I<(outs matrix_ty:$ZAda),
  2089. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2090. multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
  2091. mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i",
  2092. "", []>, Sched<[]> {
  2093. bits<4> Zm;
  2094. bits<2> Rv;
  2095. bits<3> Zn;
  2096. bits<3> imm3;
  2097. let Inst{31-23} = 0b110000010;
  2098. let Inst{22} = sz;
  2099. let Inst{21-20} = 0b01;
  2100. let Inst{19-16} = Zm;
  2101. let Inst{15} = 0b1;
  2102. let Inst{14-13} = Rv;
  2103. let Inst{12-10} = op{5-3};
  2104. let Inst{9-7} = Zn;
  2105. let Inst{6} = 0b0;
  2106. let Inst{5-3} = op{2-0};
  2107. let Inst{2-0} = imm3;
  2108. let Constraints = "$ZAda = $_ZAda";
  2109. }
  2110. // SME2 multi-vec ternary indexed four registers 32-bit
  2111. multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
  2112. RegisterOperand multi_vector_ty,
  2113. ZPRRegOp vector_ty, ValueType vt,
  2114. SDPatternOperator intrinsic> {
  2115. def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty,
  2116. vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
  2117. bits<2> i;
  2118. let Inst{11-10} = i;
  2119. }
  2120. def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
  2121. def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
  2122. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
  2123. (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2124. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
  2125. }
  2126. // SME2.1 multi-vec ternary indexed four registers 16-bit
  2127. multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<2> op> {
  2128. def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
  2129. ZZZZ_h_mul_r, ZPR4b16,
  2130. VectorIndexH, mnemonic>{
  2131. bits<3> i;
  2132. let Inst{11-10} = i{2-1};
  2133. let Inst{3} = i{0};
  2134. }
  2135. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
  2136. (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
  2137. sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
  2138. }
  2139. // SME2 multi-vec ternary indexed four registers 64-bit
  2140. class sme2_multi_vec_array_vg4_index_64b<bits<3> op,
  2141. RegisterOperand multi_vector_ty,
  2142. ZPRRegOp vector_ty,
  2143. string mnemonic>
  2144. : I<(outs MatrixOp64:$ZAda),
  2145. (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2146. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
  2147. mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1",
  2148. "", []>, Sched<[]> {
  2149. bits<4> Zm;
  2150. bits<2> Rv;
  2151. bits<1> i1;
  2152. bits<3> Zn;
  2153. bits<3> imm3;
  2154. let Inst{31-20} = 0b110000011101;
  2155. let Inst{19-16} = Zm;
  2156. let Inst{15} = 0b1;
  2157. let Inst{14-13} = Rv;
  2158. let Inst{12} = 0b0;
  2159. let Inst{11} = op{2};
  2160. let Inst{10} = i1;
  2161. let Inst{9-7} = Zn;
  2162. let Inst{6-5} = 0b00;
  2163. let Inst{4-3} = op{1-0};
  2164. let Inst{2-0} = imm3;
  2165. let Constraints = "$ZAda = $_ZAda";
  2166. }
  2167. multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
  2168. RegisterOperand multi_vector_ty,
  2169. ZPRRegOp vector_ty, ValueType vty,
  2170. SDPatternOperator intrinsic> {
  2171. def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty,
  2172. mnemonic>, SMEPseudo2Instr<NAME, 1>;
  2173. def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
  2174. def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>;
  2175. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
  2176. (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
  2177. multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
  2178. }
  2179. //===----------------------------------------------------------------------===//
  2180. // SME2 multi-vec indexed long long MLA one source 32-bit
  2181. class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
  2182. : I<(outs MatrixOp32:$ZAda),
  2183. (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
  2184. mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
  2185. "", []>, Sched<[]> {
  2186. bits<4> Zm;
  2187. bits<2> Rv;
  2188. bits<4> i;
  2189. bits<5> Zn;
  2190. bits<2> imm2;
  2191. let Inst{31-20} = 0b110000010000;
  2192. let Inst{19-16} = Zm;
  2193. let Inst{15} = i{3};
  2194. let Inst{14-13} = Rv;
  2195. let Inst{12-10} = i{2-0};
  2196. let Inst{9-5} = Zn;
  2197. let Inst{4-2} = op;
  2198. let Inst{1-0} = imm2;
  2199. let Constraints = "$ZAda = $_ZAda";
  2200. }
  2201. // SME2 multi-vec indexed long long MLA one source 64-bit
  2202. class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
  2203. : I<(outs MatrixOp64:$ZAda),
  2204. (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH:$i),
  2205. mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
  2206. "", []>, Sched<[]> {
  2207. bits<4> Zm;
  2208. bits<2> Rv;
  2209. bits<3> i;
  2210. bits<5> Zn;
  2211. bits<2> imm2;
  2212. let Inst{31-20} = 0b110000011000;
  2213. let Inst{19-16} = Zm;
  2214. let Inst{15} = i{2};
  2215. let Inst{14-13} = Rv;
  2216. let Inst{12} = 0b0;
  2217. let Inst{11-10} = i{1-0};
  2218. let Inst{9-5} = Zn;
  2219. let Inst{4-3} = op;
  2220. let Inst{2} = 0b0;
  2221. let Inst{1-0} = imm2;
  2222. let Constraints = "$ZAda = $_ZAda";
  2223. }
  2224. class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
  2225. RegisterOperand vector_ty,
  2226. string mnemonic>
  2227. : I<(outs MatrixOp32:$ZAda),
  2228. (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
  2229. vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
  2230. mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
  2231. "", []>, Sched<[]> {
  2232. bits<4> Zm;
  2233. bits<2> Rv;
  2234. bits<4> i;
  2235. bit imm;
  2236. let Inst{31-20} = 0b110000010001;
  2237. let Inst{19-16} = Zm;
  2238. let Inst{15} = vg4;
  2239. let Inst{14-13} = Rv;
  2240. let Inst{12} = 0b0;
  2241. let Inst{11-10} = i{3-2};
  2242. let Inst{5-3} = op;
  2243. let Inst{2-1} = i{1-0};
  2244. let Inst{0} = imm;
  2245. let Constraints = "$ZAda = $_ZAda";
  2246. }
  2247. //SME2 multi-vec indexed long long MLA two sources 32-bit
  2248. multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<3> op> {
  2249. def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic> {
  2250. bits<4> Zn;
  2251. let Inst{9-6} = Zn;
  2252. }
  2253. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
  2254. (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
  2255. }
  2256. // SME2 multi-vec indexed long long MLA four sources 32-bit
  2257. multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<3> op> {
  2258. def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic> {
  2259. bits<3> Zn;
  2260. let Inst{9-7} = Zn;
  2261. let Inst{6} = 0b0;
  2262. }
  2263. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
  2264. (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
  2265. }
  2266. class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op,
  2267. RegisterOperand vector_ty,
  2268. string mnemonic>
  2269. : I<(outs MatrixOp64:$ZAda),
  2270. (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
  2271. vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH:$i),
  2272. mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
  2273. "", []>, Sched<[]> {
  2274. bits<4> Zm;
  2275. bits<2> Rv;
  2276. bits<3> i;
  2277. bit imm;
  2278. let Inst{31-20} = 0b110000011001;
  2279. let Inst{19-16} = Zm;
  2280. let Inst{15} = vg4;
  2281. let Inst{14-13} = Rv;
  2282. let Inst{12-11} = 0b00;
  2283. let Inst{10} = i{2};
  2284. let Inst{5} = 0b0;
  2285. let Inst{4-3} = op;
  2286. let Inst{2-1} = i{1-0};
  2287. let Inst{0} = imm;
  2288. let Constraints = "$ZAda = $_ZAda";
  2289. }
  2290. // SME2 multi-vec indexed long long MLA two sources 64-bit
  2291. multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op> {
  2292. def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>{
  2293. bits<4> Zn;
  2294. let Inst{9-6} = Zn;
  2295. }
  2296. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
  2297. (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
  2298. }
  2299. // SME2 multi-vec indexed long long MLA four sources 64-bit
  2300. multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op> {
  2301. def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>{
  2302. bits<3> Zn;
  2303. let Inst{9-7} = Zn;
  2304. let Inst{6} = 0b0;
  2305. }
  2306. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
  2307. (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
  2308. }
  2309. //SME2 multiple and single vector long long FMA one source
  2310. class sme2_mla_ll_array_single<string mnemonic, bits<4> op,
  2311. MatrixOperand matrix_ty, ZPRRegOp vector_ty,
  2312. ZPRRegOp zpr_ty>
  2313. : I<(outs matrix_ty:$ZAda),
  2314. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm,
  2315. vector_ty:$Zn, zpr_ty:$Zm),
  2316. mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  2317. "", []>, Sched<[]> {
  2318. bits<4> Zm;
  2319. bits<2> Rv;
  2320. bits<5> Zn;
  2321. bits<2> imm;
  2322. let Inst{31-23} = 0b110000010;
  2323. let Inst{22} = op{3}; //sz
  2324. let Inst{21-20} = 0b10;
  2325. let Inst{19-16} = Zm;
  2326. let Inst{15} = 0b0;
  2327. let Inst{14-13} = Rv;
  2328. let Inst{12-10} = 0b001;
  2329. let Inst{9-5} = Zn;
  2330. let Inst{4-2} = op{2-0};
  2331. let Inst{1-0} = imm;
  2332. let Constraints = "$ZAda = $_ZAda";
  2333. }
  2334. class sme2_mla_ll_array_vg24_single<bits<5> op, MatrixOperand matrix_ty,
  2335. RegisterOperand vector_ty, ZPRRegOp zpr_ty,
  2336. string mnemonic>
  2337. : I<(outs matrix_ty:$ZAda),
  2338. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
  2339. vector_ty:$Zn, zpr_ty:$Zm),
  2340. mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{3}, "vgx4", "vgx2") # "], $Zn, $Zm",
  2341. "", []>, Sched<[]> {
  2342. bits<4> Zm;
  2343. bits<2> Rv;
  2344. bits<5> Zn;
  2345. bit imm;
  2346. let Inst{31-23} = 0b110000010;
  2347. let Inst{22} = op{4}; //sz
  2348. let Inst{21} = 0b1;
  2349. let Inst{20} = op{3}; //vg4
  2350. let Inst{19-16} = Zm;
  2351. let Inst{15} = 0b0;
  2352. let Inst{14-13} = Rv;
  2353. let Inst{12-10} = 0b000;
  2354. let Inst{9-5} = Zn;
  2355. let Inst{4-2} = op{2-0};
  2356. let Inst{1} = 0b0;
  2357. let Inst{0} = imm;
  2358. let Constraints = "$ZAda = $_ZAda";
  2359. }
  2360. //SME2 single-multi long long MLA two and four sources
  2361. multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<5> op,
  2362. MatrixOperand matrix_ty,
  2363. RegisterOperand multi_vector_ty,
  2364. ZPRRegOp zpr_ty> {
  2365. def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty,
  2366. zpr_ty, mnemonic>;
  2367. def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm",
  2368. (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
  2369. }
  2370. // SME2 multiple vectors long long MLA two sources
  2371. class sme2_mla_ll_array_vg2_multi<bits<4> op, MatrixOperand matrix_ty,
  2372. RegisterOperand vector_ty,string mnemonic>
  2373. : I<(outs matrix_ty:$ZAda),
  2374. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
  2375. vector_ty:$Zn, vector_ty:$Zm),
  2376. mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm",
  2377. "", []>, Sched<[]> {
  2378. bits<4> Zm;
  2379. bits<2> Rv;
  2380. bits<4> Zn;
  2381. bit imm;
  2382. let Inst{31-23} = 0b110000011;
  2383. let Inst{22} = op{3}; // sz
  2384. let Inst{21} = 0b1;
  2385. let Inst{20-17} = Zm;
  2386. let Inst{16-15} = 0b00;
  2387. let Inst{14-13} = Rv;
  2388. let Inst{12-10} = 0b000;
  2389. let Inst{9-6} = Zn;
  2390. let Inst{5} = 0b0;
  2391. let Inst{4-2} = op{2-0};
  2392. let Inst{1} = 0b0;
  2393. let Inst{0} = imm;
  2394. let Constraints = "$ZAda = $_ZAda";
  2395. }
  2396. multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<4> op,
  2397. MatrixOperand matrix_ty,
  2398. RegisterOperand vector_ty> {
  2399. def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>;
  2400. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  2401. (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
  2402. }
  2403. // SME2 multiple vectors long long MLA four sources
  2404. class sme2_mla_ll_array_vg4_multi<bits<4> op,MatrixOperand matrix_ty,
  2405. RegisterOperand vector_ty,
  2406. string mnemonic>
  2407. : I<(outs matrix_ty:$ZAda),
  2408. (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
  2409. vector_ty:$Zn, vector_ty:$Zm),
  2410. mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm",
  2411. "", []>, Sched<[]> {
  2412. bits<3> Zm;
  2413. bits<2> Rv;
  2414. bits<3> Zn;
  2415. bit imm;
  2416. let Inst{31-23} = 0b110000011;
  2417. let Inst{22} = op{3}; // sz
  2418. let Inst{21} = 0b1;
  2419. let Inst{20-18} = Zm;
  2420. let Inst{17-15} = 0b010;
  2421. let Inst{14-13} = Rv;
  2422. let Inst{12-10} = 0b000;
  2423. let Inst{9-7} = Zn;
  2424. let Inst{6-5} = 0b00;
  2425. let Inst{4-2} = op{2-0};
  2426. let Inst{1} = 0b0;
  2427. let Inst{0} = imm;
  2428. let Constraints = "$ZAda = $_ZAda";
  2429. }
  2430. multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<4> op,
  2431. MatrixOperand matrix_ty,
  2432. RegisterOperand vector_ty> {
  2433. def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>;
  2434. def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
  2435. (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
  2436. }
  2437. //===----------------------------------------------------------------------===//
  2438. // SME2 Outer Product and Accumulate
  2439. multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op> {
  2440. def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic> {
  2441. bits<2> ZAda;
  2442. let Inst{1-0} = ZAda;
  2443. let Inst{2} = 0b0;
  2444. }
  2445. }
  2446. multiclass sme2_bfp_mopx_tile<string mnemonic, bits<3> op> {
  2447. def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>;
  2448. }
  2449. //===----------------------------------------------------------------------===///
  2450. // SME2 Zero Lookup Table.
  2451. class sme2_zero_zt<string mnemonic, bits<4> opc>
  2452. : I<(outs ZTR:$ZT), (ins ),
  2453. mnemonic, "\t\\{ $ZT \\}",
  2454. "", []>, Sched<[]> {
  2455. let Inst{31-4} = 0b1100000001001000000000000000;
  2456. let Inst{3-0} = opc;
  2457. }
  2458. //===----------------------------------------------------------------------===//
  2459. // SME2 lookup table load/store
  2460. class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
  2461. : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)),
  2462. !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)),
  2463. mnemonic, "\t$ZTt, [$Rn]",
  2464. "", []>, Sched<[]> {
  2465. bits<5> Rn;
  2466. let Inst{31-22} = 0b1110000100;
  2467. let Inst{21-16} = opc{7-2};
  2468. let Inst{15-10} = 0b100000;
  2469. let Inst{9-5} = Rn;
  2470. let Inst{4-2} = 0b000;
  2471. let Inst{1-0} = opc{1-0};
  2472. let mayLoad = !not(opc{7});
  2473. let mayStore = opc{7};
  2474. }
  2475. //===----------------------------------------------------------------------===///
  2476. // SME2 move to/from lookup table
  2477. class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
  2478. : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3),
  2479. mnemonic, "\t$Rt, $ZTt$imm3",
  2480. "", []>, Sched<[]> {
  2481. bits<3> imm3;
  2482. bits<5> Rt;
  2483. let Inst{31-15} = 0b11000000010011000;
  2484. let Inst{14-12} = imm3;
  2485. let Inst{11-5} = opc;
  2486. let Inst{4-0} = Rt;
  2487. }
  2488. class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
  2489. : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt),
  2490. mnemonic, "\t$ZTt$imm3, $Rt",
  2491. "", []>, Sched<[]> {
  2492. bits<3> imm3;
  2493. bits<5> Rt;
  2494. let Inst{31-15} = 0b11000000010011100;
  2495. let Inst{14-12} = imm3;
  2496. let Inst{11-5} = opc;
  2497. let Inst{4-0} = Rt;
  2498. }
  2499. //===----------------------------------------------------------------------===//
  2500. // SME2 lookup table expand one register
  2501. class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
  2502. AsmVectorIndexOpnd index_ty, string mnemonic>
  2503. : I<(outs vector_ty:$Zd),
  2504. (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
  2505. mnemonic, "\t$Zd, $ZTt, $Zn$i",
  2506. "", []>, Sched<[]> {
  2507. bits<5> Zn;
  2508. bits<5> Zd;
  2509. let Inst{31-19} = 0b1100000011001;
  2510. let Inst{18-14} = opc{6-2};
  2511. let Inst{13-12} = sz;
  2512. let Inst{11-10} = opc{1-0};
  2513. let Inst{9-5} = Zn;
  2514. let Inst{4-0} = Zd;
  2515. }
  2516. class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
  2517. string mnemonic>
  2518. : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB, mnemonic> {
  2519. bits<4> i;
  2520. let Inst{17-14} = i;
  2521. }
  2522. multiclass sme2_luti2_vector_index<string mnemonic> {
  2523. def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
  2524. def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
  2525. def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
  2526. }
  2527. class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
  2528. string mnemonic>
  2529. : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
  2530. bits<3> i;
  2531. let Inst{16-14} = i;
  2532. }
  2533. multiclass sme2_luti4_vector_index<string mnemonic> {
  2534. def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>;
  2535. def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>;
  2536. def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>;
  2537. }
  2538. // SME2 lookup table expand two contiguous registers
  2539. class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty,
  2540. AsmVectorIndexOpnd index_ty, string mnemonic>
  2541. : I<(outs vector_ty:$Zd),
  2542. (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
  2543. mnemonic, "\t$Zd, $ZTt, $Zn$i",
  2544. "", []>, Sched<[]> {
  2545. bits<5> Zn;
  2546. bits<4> Zd;
  2547. let Inst{31-19} = 0b1100000010001;
  2548. let Inst{18-15} = opc{5-2};
  2549. let Inst{14} = 0b1;
  2550. let Inst{13-12} = sz;
  2551. let Inst{11-10} = opc{1-0};
  2552. let Inst{9-5} = Zn;
  2553. let Inst{4-1} = Zd;
  2554. let Inst{0} = 0b0;
  2555. }
  2556. class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
  2557. string mnemonic>
  2558. : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
  2559. bits<3> i;
  2560. let Inst{17-15} = i;
  2561. }
  2562. multiclass sme2_luti2_vector_vg2_index<string mnemonic> {
  2563. def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
  2564. def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
  2565. def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
  2566. }
  2567. class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
  2568. string mnemonic>
  2569. : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
  2570. bits<2> i;
  2571. let Inst{16-15} = i;
  2572. }
  2573. multiclass sme2_luti4_vector_vg2_index<string mnemonic> {
  2574. def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
  2575. def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
  2576. def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
  2577. }
  2578. // SME2 lookup table expand four contiguous registers
  2579. class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty,
  2580. AsmVectorIndexOpnd index_ty, string mnemonic>
  2581. : I<(outs vector_ty:$Zd),
  2582. (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
  2583. mnemonic, "\t$Zd, $ZTt, $Zn$i",
  2584. "", []>, Sched<[]> {
  2585. bits<5> Zn;
  2586. bits<3> Zd;
  2587. let Inst{31-19} = 0b1100000010001;
  2588. let Inst{18-16} = opc{4-2};
  2589. let Inst{15-14} = 0b10;
  2590. let Inst{13-12} = sz;
  2591. let Inst{11-10} = opc{1-0};
  2592. let Inst{9-5} = Zn;
  2593. let Inst{4-2} = Zd;
  2594. let Inst{1-0} = 0b00;
  2595. }
  2596. class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
  2597. string mnemonic>
  2598. : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
  2599. bits<2> i;
  2600. let Inst{17-16} = i;
  2601. }
  2602. multiclass sme2_luti2_vector_vg4_index<string mnemonic> {
  2603. def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>;
  2604. def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
  2605. def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
  2606. }
  2607. class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
  2608. string mnemonic>
  2609. : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> {
  2610. bits<1> i;
  2611. let Inst{16} = i;
  2612. }
  2613. multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
  2614. def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
  2615. def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
  2616. }
  2617. //===----------------------------------------------------------------------===//
  2618. // SME2 MOV
  2619. class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
  2620. RegisterOperand tile_ty,
  2621. Operand index_ty,
  2622. RegisterOperand vector_ty,
  2623. string mnemonic>
  2624. : I<(outs tile_ty:$ZAd),
  2625. (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn),
  2626. mnemonic, "\t$ZAd[$Rs, $imm], $Zn",
  2627. "", []>, Sched<[]> {
  2628. bits<2> Rs;
  2629. bits<4> Zn;
  2630. let Inst{31-24} = 0b11000000;
  2631. let Inst{23-22} = sz;
  2632. let Inst{21-16} = 0b000100;
  2633. let Inst{15} = v;
  2634. let Inst{14-13} = Rs;
  2635. let Inst{12-10} = 0b000;
  2636. let Inst{9-6} = Zn;
  2637. let Inst{5-3} = 0b000;
  2638. let Constraints = "$ZAd = $_ZAd";
  2639. }
  2640. multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst,
  2641. RegisterOperand tile_or_array_ty,
  2642. RegisterOperand rv_ty,
  2643. Operand index_ty,
  2644. RegisterOperand vector_ty,
  2645. string mnemonic,
  2646. string vg_acronym=""> {
  2647. def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn",
  2648. (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>;
  2649. }
  2650. // SME2 move vector to tile, two registers
  2651. multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
  2652. def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v,
  2653. !if(v, TileVectorOpV8,
  2654. TileVectorOpH8),
  2655. uimm3s2range, ZZ_b_mul_r,
  2656. mnemonic> {
  2657. bits<3> imm;
  2658. let Inst{2-0} = imm;
  2659. }
  2660. def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v,
  2661. !if(v, TileVectorOpV16,
  2662. TileVectorOpH16),
  2663. uimm2s2range, ZZ_h_mul_r,
  2664. mnemonic> {
  2665. bits<1> ZAd;
  2666. bits<2> imm;
  2667. let Inst{2} = ZAd;
  2668. let Inst{1-0} = imm;
  2669. }
  2670. def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v,
  2671. !if(v, TileVectorOpV32,
  2672. TileVectorOpH32),
  2673. uimm1s2range, ZZ_s_mul_r,
  2674. mnemonic> {
  2675. bits<2> ZAd;
  2676. bits<1> imm;
  2677. let Inst{2-1} = ZAd;
  2678. let Inst{0} = imm;
  2679. }
  2680. def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v,
  2681. !if(v, TileVectorOpV64,
  2682. TileVectorOpH64),
  2683. uimm0s2range, ZZ_d_mul_r,
  2684. mnemonic> {
  2685. bits<3> ZAd;
  2686. let Inst{2-0} = ZAd;
  2687. }
  2688. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
  2689. !if(v, TileVectorOpV8,
  2690. TileVectorOpH8),
  2691. MatrixIndexGPR32Op12_15,
  2692. uimm3s2range, ZZ_b_mul_r,
  2693. "mov">;
  2694. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
  2695. !if(v, TileVectorOpV16,
  2696. TileVectorOpH16),
  2697. MatrixIndexGPR32Op12_15,
  2698. uimm2s2range, ZZ_h_mul_r,
  2699. "mov">;
  2700. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
  2701. !if(v, TileVectorOpV32,
  2702. TileVectorOpH32),
  2703. MatrixIndexGPR32Op12_15,
  2704. uimm1s2range, ZZ_s_mul_r,
  2705. "mov">;
  2706. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
  2707. !if(v, TileVectorOpV64,
  2708. TileVectorOpH64),
  2709. MatrixIndexGPR32Op12_15,
  2710. uimm0s2range, ZZ_d_mul_r,
  2711. "mov">;
  2712. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
  2713. !if(v, TileVectorOpV8,
  2714. TileVectorOpH8),
  2715. MatrixIndexGPR32Op12_15,
  2716. uimm3s2range, ZZ_b_mul_r,
  2717. "mova">;
  2718. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
  2719. !if(v, TileVectorOpV16,
  2720. TileVectorOpH16),
  2721. MatrixIndexGPR32Op12_15,
  2722. uimm2s2range, ZZ_h_mul_r,
  2723. "mova">;
  2724. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
  2725. !if(v, TileVectorOpV32,
  2726. TileVectorOpH32),
  2727. MatrixIndexGPR32Op12_15,
  2728. uimm1s2range, ZZ_s_mul_r,
  2729. "mova">;
  2730. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
  2731. !if(v, TileVectorOpV64,
  2732. TileVectorOpH64),
  2733. MatrixIndexGPR32Op12_15,
  2734. uimm0s2range, ZZ_d_mul_r,
  2735. "mova">;
  2736. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
  2737. !if(v, TileVectorOpV8,
  2738. TileVectorOpH8),
  2739. MatrixIndexGPR32Op12_15,
  2740. uimm3s2range, ZZ_b_mul_r,
  2741. "mova">;
  2742. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
  2743. !if(v, TileVectorOpV16,
  2744. TileVectorOpH16),
  2745. MatrixIndexGPR32Op12_15,
  2746. uimm2s2range, ZZ_h_mul_r,
  2747. "mova">;
  2748. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
  2749. !if(v, TileVectorOpV32,
  2750. TileVectorOpH32),
  2751. MatrixIndexGPR32Op12_15,
  2752. uimm1s2range, ZZ_s_mul_r,
  2753. "mova">;
  2754. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
  2755. !if(v, TileVectorOpV64,
  2756. TileVectorOpH64),
  2757. MatrixIndexGPR32Op12_15,
  2758. uimm0s2range, ZZ_d_mul_r,
  2759. "mova">;
  2760. }
  2761. multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic>{
  2762. defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic>;
  2763. defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic>;
  2764. }
  2765. class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
  2766. RegisterOperand tile_ty,
  2767. Operand index_ty,
  2768. RegisterOperand vector_ty,
  2769. string mnemonic>
  2770. : I<(outs tile_ty:$ZAd),
  2771. (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm,
  2772. vector_ty:$Zn),
  2773. mnemonic,
  2774. "\t$ZAd[$Rs, $imm], $Zn",
  2775. "", []>, Sched<[]> {
  2776. bits<2> Rs;
  2777. bits<3> Zn;
  2778. let Inst{31-24} = 0b11000000;
  2779. let Inst{23-22} = sz;
  2780. let Inst{21-16} = 0b000100;
  2781. let Inst{15} = v;
  2782. let Inst{14-13} = Rs;
  2783. let Inst{12-10} = 0b001;
  2784. let Inst{9-7} = Zn;
  2785. let Inst{6-3} = 0b0000;
  2786. let Inst{2-0} = op;
  2787. let Constraints = "$ZAd = $_ZAd";
  2788. }
  2789. // SME2 move vector to tile, four registers
  2790. multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
  2791. def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?},
  2792. !if(v, TileVectorOpV8,
  2793. TileVectorOpH8),
  2794. uimm2s4range, ZZZZ_b_mul_r,
  2795. mnemonic> {
  2796. bits<2> imm;
  2797. let Inst{1-0} = imm;
  2798. }
  2799. def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?},
  2800. !if(v, TileVectorOpV16,
  2801. TileVectorOpH16),
  2802. uimm1s4range, ZZZZ_h_mul_r,
  2803. mnemonic> {
  2804. bits<1> ZAd;
  2805. bits<1> imm;
  2806. let Inst{1} = ZAd;
  2807. let Inst{0} = imm;
  2808. }
  2809. def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?},
  2810. !if(v, TileVectorOpV32,
  2811. TileVectorOpH32),
  2812. uimm0s4range, ZZZZ_s_mul_r,
  2813. mnemonic> {
  2814. bits<2> ZAd;
  2815. let Inst{1-0} = ZAd;
  2816. }
  2817. def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?},
  2818. !if(v, TileVectorOpV64,
  2819. TileVectorOpH64),
  2820. uimm0s4range, ZZZZ_d_mul_r,
  2821. mnemonic> {
  2822. bits<3> ZAd;
  2823. let Inst{2-0} = ZAd;
  2824. }
  2825. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
  2826. !if(v, TileVectorOpV8,
  2827. TileVectorOpH8),
  2828. MatrixIndexGPR32Op12_15,
  2829. uimm2s4range, ZZZZ_b_mul_r,
  2830. "mov">;
  2831. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
  2832. !if(v, TileVectorOpV16,
  2833. TileVectorOpH16),
  2834. MatrixIndexGPR32Op12_15,
  2835. uimm1s4range, ZZZZ_h_mul_r,
  2836. "mov">;
  2837. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
  2838. !if(v, TileVectorOpV32,
  2839. TileVectorOpH32),
  2840. MatrixIndexGPR32Op12_15,
  2841. uimm0s4range, ZZZZ_s_mul_r,
  2842. "mov">;
  2843. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
  2844. !if(v, TileVectorOpV64,
  2845. TileVectorOpH64),
  2846. MatrixIndexGPR32Op12_15,
  2847. uimm0s4range, ZZZZ_d_mul_r,
  2848. "mov">;
  2849. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
  2850. !if(v, TileVectorOpV8,
  2851. TileVectorOpH8),
  2852. MatrixIndexGPR32Op12_15,
  2853. uimm2s4range, ZZZZ_b_mul_r,
  2854. "mova">;
  2855. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
  2856. !if(v, TileVectorOpV16,
  2857. TileVectorOpH16),
  2858. MatrixIndexGPR32Op12_15,
  2859. uimm1s4range, ZZZZ_h_mul_r,
  2860. "mova">;
  2861. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
  2862. !if(v, TileVectorOpV32,
  2863. TileVectorOpH32),
  2864. MatrixIndexGPR32Op12_15,
  2865. uimm0s4range, ZZZZ_s_mul_r,
  2866. "mova">;
  2867. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
  2868. !if(v, TileVectorOpV64,
  2869. TileVectorOpH64),
  2870. MatrixIndexGPR32Op12_15,
  2871. uimm0s4range, ZZZZ_d_mul_r,
  2872. "mova">;
  2873. }
  2874. multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic>{
  2875. defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic>;
  2876. defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic>;
  2877. }
  2878. // SME Move into Array
  2879. class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty,
  2880. RegisterOperand vector_ty,
  2881. string mnemonic,
  2882. string vg_acronym="">
  2883. : I<(outs array_ty:$ZAd),
  2884. (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm,
  2885. vector_ty:$Zn),
  2886. mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn",
  2887. "", []>, Sched<[]> {
  2888. bits<2> Rs;
  2889. bits<3> imm;
  2890. let Inst{31-15} = 0b11000000000001000;
  2891. let Inst{14-13} = Rs;
  2892. let Inst{12-11} = 0b01;
  2893. let Inst{10-6} = op;
  2894. let Inst{5-3} = 0b000;
  2895. let Inst{2-0} = imm;
  2896. let Constraints = "$ZAd = $_ZAd";
  2897. }
  2898. // MOVA (vector to array, two registers)
  2899. multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic> {
  2900. def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64,
  2901. ZZ_d_mul_r, mnemonic, "vgx2">{
  2902. bits<4> Zn;
  2903. let Inst{9-6} = Zn;
  2904. }
  2905. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2906. MatrixOp8,
  2907. MatrixIndexGPR32Op8_11,
  2908. sme_elm_idx0_7, ZZ_b_mul_r,
  2909. "mova">;
  2910. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2911. MatrixOp16,
  2912. MatrixIndexGPR32Op8_11,
  2913. sme_elm_idx0_7, ZZ_h_mul_r,
  2914. "mova">;
  2915. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2916. MatrixOp32,
  2917. MatrixIndexGPR32Op8_11,
  2918. sme_elm_idx0_7, ZZ_s_mul_r,
  2919. "mova">;
  2920. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2921. MatrixOp64,
  2922. MatrixIndexGPR32Op8_11,
  2923. sme_elm_idx0_7, ZZ_d_mul_r,
  2924. "mova">;
  2925. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2926. MatrixOp8,
  2927. MatrixIndexGPR32Op8_11,
  2928. sme_elm_idx0_7, ZZ_b_mul_r,
  2929. "mova", "vgx2">;
  2930. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2931. MatrixOp16,
  2932. MatrixIndexGPR32Op8_11,
  2933. sme_elm_idx0_7, ZZ_h_mul_r,
  2934. "mova", "vgx2">;
  2935. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2936. MatrixOp32,
  2937. MatrixIndexGPR32Op8_11,
  2938. sme_elm_idx0_7, ZZ_s_mul_r,
  2939. "mova", "vgx2">;
  2940. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2941. MatrixOp8,
  2942. MatrixIndexGPR32Op8_11,
  2943. sme_elm_idx0_7, ZZ_b_mul_r,
  2944. "mov">;
  2945. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2946. MatrixOp16,
  2947. MatrixIndexGPR32Op8_11,
  2948. sme_elm_idx0_7, ZZ_h_mul_r,
  2949. "mov">;
  2950. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2951. MatrixOp32,
  2952. MatrixIndexGPR32Op8_11,
  2953. sme_elm_idx0_7, ZZ_s_mul_r,
  2954. "mov">;
  2955. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2956. MatrixOp64,
  2957. MatrixIndexGPR32Op8_11,
  2958. sme_elm_idx0_7, ZZ_d_mul_r,
  2959. "mov">;
  2960. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2961. MatrixOp8,
  2962. MatrixIndexGPR32Op8_11,
  2963. sme_elm_idx0_7, ZZ_b_mul_r,
  2964. "mov", "vgx2">;
  2965. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2966. MatrixOp16,
  2967. MatrixIndexGPR32Op8_11,
  2968. sme_elm_idx0_7, ZZ_h_mul_r,
  2969. "mov", "vgx2">;
  2970. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2971. MatrixOp32,
  2972. MatrixIndexGPR32Op8_11,
  2973. sme_elm_idx0_7, ZZ_s_mul_r,
  2974. "mov", "vgx2">;
  2975. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
  2976. MatrixOp64,
  2977. MatrixIndexGPR32Op8_11,
  2978. sme_elm_idx0_7, ZZ_d_mul_r,
  2979. "mov", "vgx2">;
  2980. }
  2981. // MOVA (vector to array, four registers)
  2982. multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic> {
  2983. def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64,
  2984. ZZZZ_d_mul_r, mnemonic, "vgx4"> {
  2985. bits<3> Zn;
  2986. let Inst{9-7} = Zn;
  2987. }
  2988. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2989. MatrixOp8,
  2990. MatrixIndexGPR32Op8_11,
  2991. sme_elm_idx0_7, ZZZZ_b_mul_r,
  2992. "mova">;
  2993. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2994. MatrixOp16,
  2995. MatrixIndexGPR32Op8_11,
  2996. sme_elm_idx0_7, ZZZZ_h_mul_r,
  2997. "mova">;
  2998. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  2999. MatrixOp32,
  3000. MatrixIndexGPR32Op8_11,
  3001. sme_elm_idx0_7, ZZZZ_s_mul_r,
  3002. "mova">;
  3003. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3004. MatrixOp64,
  3005. MatrixIndexGPR32Op8_11,
  3006. sme_elm_idx0_7, ZZZZ_d_mul_r,
  3007. "mova">;
  3008. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3009. MatrixOp8,
  3010. MatrixIndexGPR32Op8_11,
  3011. sme_elm_idx0_7, ZZZZ_b_mul_r,
  3012. "mova", "vgx4">;
  3013. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3014. MatrixOp16,
  3015. MatrixIndexGPR32Op8_11,
  3016. sme_elm_idx0_7, ZZZZ_h_mul_r,
  3017. "mova", "vgx4">;
  3018. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3019. MatrixOp32,
  3020. MatrixIndexGPR32Op8_11,
  3021. sme_elm_idx0_7, ZZZZ_s_mul_r,
  3022. "mova", "vgx4">;
  3023. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3024. MatrixOp8,
  3025. MatrixIndexGPR32Op8_11,
  3026. sme_elm_idx0_7, ZZZZ_b_mul_r,
  3027. "mov">;
  3028. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3029. MatrixOp16,
  3030. MatrixIndexGPR32Op8_11,
  3031. sme_elm_idx0_7, ZZZZ_h_mul_r,
  3032. "mov">;
  3033. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3034. MatrixOp32,
  3035. MatrixIndexGPR32Op8_11,
  3036. sme_elm_idx0_7, ZZZZ_s_mul_r,
  3037. "mov">;
  3038. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3039. MatrixOp64,
  3040. MatrixIndexGPR32Op8_11,
  3041. sme_elm_idx0_7, ZZZZ_d_mul_r,
  3042. "mov">;
  3043. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3044. MatrixOp8,
  3045. MatrixIndexGPR32Op8_11,
  3046. sme_elm_idx0_7, ZZZZ_b_mul_r,
  3047. "mov", "vgx4">;
  3048. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3049. MatrixOp16,
  3050. MatrixIndexGPR32Op8_11,
  3051. sme_elm_idx0_7, ZZZZ_h_mul_r,
  3052. "mov", "vgx4">;
  3053. defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
  3054. MatrixOp32,
  3055. MatrixIndexGPR32Op8_11,
  3056. sme_elm_idx0_7, ZZZZ_s_mul_r,
  3057. "mov", "vgx4">;
  3058. defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
  3059. MatrixOp64,
  3060. MatrixIndexGPR32Op8_11,
  3061. sme_elm_idx0_7, ZZZZ_d_mul_r,
  3062. "mov", "vgx4">;
  3063. }
  3064. class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op,
  3065. RegisterOperand vector_ty,
  3066. RegisterOperand tile_ty,
  3067. Operand index_ty,
  3068. string mnemonic>
  3069. : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
  3070. (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
  3071. mnemonic,
  3072. "\t$Zd, $ZAn[$Rs, $imm]",
  3073. "", []>, Sched<[]> {
  3074. bits<4> Zd;
  3075. bits<2> Rs;
  3076. let Inst{31-24} = 0b11000000;
  3077. let Inst{23-22} = sz;
  3078. let Inst{21-16} = 0b000110;
  3079. let Inst{15} = v;
  3080. let Inst{14-13} = Rs;
  3081. let Inst{12-11} = 0b00;
  3082. let Inst{10-8} = op;
  3083. let Inst{4-1} = Zd;
  3084. let Inst{0} = 0b0;
  3085. let Constraints = !if(op{1}, "$ZAn = $_ZAn", "");
  3086. }
  3087. multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst,
  3088. RegisterOperand vector_ty,
  3089. RegisterOperand tile_or_array_ty,
  3090. RegisterOperand rv_ty,
  3091. Operand index_ty,
  3092. string mnemonic,
  3093. string vg_acronym=""> {
  3094. def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
  3095. (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>;
  3096. }
  3097. multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> {
  3098. def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
  3099. !if(v, TileVectorOpV8,
  3100. TileVectorOpH8),
  3101. uimm3s2range, mnemonic> {
  3102. bits<3> imm;
  3103. let Inst{7-5} = imm;
  3104. }
  3105. def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
  3106. !if(v, TileVectorOpV16,
  3107. TileVectorOpH16),
  3108. uimm2s2range, mnemonic> {
  3109. bits<1> ZAn;
  3110. bits<2> imm;
  3111. let Inst{7} = ZAn;
  3112. let Inst{6-5} = imm;
  3113. }
  3114. def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
  3115. !if(v, TileVectorOpV32,
  3116. TileVectorOpH32),
  3117. uimm1s2range, mnemonic> {
  3118. bits<2> ZAn;
  3119. bits<1> imm;
  3120. let Inst{7-6} = ZAn;
  3121. let Inst{5} = imm;
  3122. }
  3123. def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
  3124. !if(v, TileVectorOpV64,
  3125. TileVectorOpH64),
  3126. uimm0s2range, mnemonic> {
  3127. bits<3> ZAn;
  3128. let Inst{7-5} = ZAn;
  3129. }
  3130. if !eq(mnemonic, "mova") then {
  3131. defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B),
  3132. ZZ_b_mul_r,
  3133. !if(v, TileVectorOpV8,
  3134. TileVectorOpH8),
  3135. MatrixIndexGPR32Op12_15,
  3136. uimm3s2range, "mov">;
  3137. defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H),
  3138. ZZ_h_mul_r,
  3139. !if(v, TileVectorOpV16,
  3140. TileVectorOpH16),
  3141. MatrixIndexGPR32Op12_15,
  3142. uimm2s2range, "mov">;
  3143. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
  3144. ZZ_s_mul_r,
  3145. !if(v, TileVectorOpV32,
  3146. TileVectorOpH32),
  3147. MatrixIndexGPR32Op12_15,
  3148. uimm1s2range, "mov">;
  3149. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
  3150. ZZ_d_mul_r,
  3151. !if(v, TileVectorOpV64,
  3152. TileVectorOpH64),
  3153. MatrixIndexGPR32Op12_15,
  3154. uimm0s2range, "mov">;
  3155. }
  3156. defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B),
  3157. ZZ_b_mul_r,
  3158. !if(v, TileVectorOpV8,
  3159. TileVectorOpH8),
  3160. MatrixIndexGPR32Op12_15,
  3161. uimm3s2range, mnemonic>;
  3162. defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H),
  3163. ZZ_h_mul_r,
  3164. !if(v, TileVectorOpV16,
  3165. TileVectorOpH16),
  3166. MatrixIndexGPR32Op12_15,
  3167. uimm2s2range, mnemonic>;
  3168. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
  3169. ZZ_s_mul_r,
  3170. !if(v, TileVectorOpV32,
  3171. TileVectorOpH32),
  3172. MatrixIndexGPR32Op12_15,
  3173. uimm1s2range, mnemonic>;
  3174. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
  3175. ZZ_d_mul_r,
  3176. !if(v, TileVectorOpV64,
  3177. TileVectorOpH64),
  3178. MatrixIndexGPR32Op12_15,
  3179. uimm0s2range, mnemonic>;
  3180. }
  3181. // SME2 move tile to vector, two registers
  3182. multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
  3183. defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>;
  3184. defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
  3185. }
  3186. // SME2p1 move tile to vector and zero tile, two registers
  3187. multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
  3188. defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
  3189. defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
  3190. }
  3191. class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
  3192. RegisterOperand vector_ty,
  3193. RegisterOperand tile_ty,
  3194. Operand index_ty,
  3195. string mnemonic>
  3196. : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
  3197. (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
  3198. mnemonic,
  3199. "\t$Zd, $ZAn[$Rs, $imm]",
  3200. "", []>, Sched<[]> {
  3201. bits<3> Zd;
  3202. bits<2> Rs;
  3203. let Inst{31-24} = 0b11000000;
  3204. let Inst{23-22} = sz;
  3205. let Inst{21-16} = 0b000110;
  3206. let Inst{15} = v;
  3207. let Inst{14-13} = Rs;
  3208. let Inst{12-11} = 0b00;
  3209. let Inst{10-5} = op{5-0};
  3210. let Inst{4-2} = Zd;
  3211. let Inst{1-0} = 0b00;
  3212. let Constraints = !if(op{4}, "$ZAn = $_ZAn", "");
  3213. }
  3214. multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> {
  3215. def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?},
  3216. ZZZZ_b_mul_r,
  3217. !if(v, TileVectorOpV8,
  3218. TileVectorOpH8),
  3219. uimm2s4range, mnemonic> {
  3220. bits<2> imm;
  3221. let Inst{6-5} = imm;
  3222. }
  3223. def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?},
  3224. ZZZZ_h_mul_r,
  3225. !if(v, TileVectorOpV16,
  3226. TileVectorOpH16),
  3227. uimm1s4range, mnemonic> {
  3228. bits<1> ZAn;
  3229. bits<1> imm;
  3230. let Inst{6} = ZAn;
  3231. let Inst{5} = imm;
  3232. }
  3233. def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?},
  3234. ZZZZ_s_mul_r,
  3235. !if(v, TileVectorOpV32,
  3236. TileVectorOpH32),
  3237. uimm0s4range, mnemonic> {
  3238. bits<2> ZAn;
  3239. let Inst{6-5} = ZAn;
  3240. }
  3241. def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?},
  3242. ZZZZ_d_mul_r,
  3243. !if(v, TileVectorOpV64,
  3244. TileVectorOpH64),
  3245. uimm0s4range, mnemonic> {
  3246. bits<3> ZAn;
  3247. let Inst{7-5} = ZAn;
  3248. }
  3249. if !eq(mnemonic, "mova") then {
  3250. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B),
  3251. ZZZZ_b_mul_r,
  3252. !if(v, TileVectorOpV8,
  3253. TileVectorOpH8),
  3254. MatrixIndexGPR32Op12_15,
  3255. uimm2s4range, "mov">;
  3256. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H),
  3257. ZZZZ_h_mul_r,
  3258. !if(v, TileVectorOpV16,
  3259. TileVectorOpH16),
  3260. MatrixIndexGPR32Op12_15,
  3261. uimm1s4range, "mov">;
  3262. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
  3263. ZZZZ_s_mul_r,
  3264. !if(v, TileVectorOpV32,
  3265. TileVectorOpH32),
  3266. MatrixIndexGPR32Op12_15,
  3267. uimm0s4range, "mov">;
  3268. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
  3269. ZZZZ_d_mul_r,
  3270. !if(v, TileVectorOpV64,
  3271. TileVectorOpH64),
  3272. MatrixIndexGPR32Op12_15,
  3273. uimm0s4range, "mov">;
  3274. }
  3275. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B),
  3276. ZZZZ_b_mul_r,
  3277. !if(v, TileVectorOpV8,
  3278. TileVectorOpH8),
  3279. MatrixIndexGPR32Op12_15,
  3280. uimm2s4range, mnemonic>;
  3281. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H),
  3282. ZZZZ_h_mul_r,
  3283. !if(v, TileVectorOpV16,
  3284. TileVectorOpH16),
  3285. MatrixIndexGPR32Op12_15,
  3286. uimm1s4range, mnemonic>;
  3287. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
  3288. ZZZZ_s_mul_r,
  3289. !if(v, TileVectorOpV32,
  3290. TileVectorOpH32),
  3291. MatrixIndexGPR32Op12_15,
  3292. uimm0s4range, mnemonic>;
  3293. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
  3294. ZZZZ_d_mul_r,
  3295. !if(v, TileVectorOpV64,
  3296. TileVectorOpH64),
  3297. MatrixIndexGPR32Op12_15,
  3298. uimm0s4range, mnemonic>;
  3299. }
  3300. // SME2 move tile to vector, four registers
  3301. multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
  3302. defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>;
  3303. defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>;
  3304. }
  3305. // SME2p1 move tile to vector and zero tile, four registers
  3306. multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
  3307. defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
  3308. defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
  3309. }
  3310. class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
  3311. RegisterOperand array_ty,
  3312. string mnemonic, string vg_acronym>
  3313. : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)),
  3314. (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm),
  3315. mnemonic,
  3316. "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]",
  3317. "", []>, Sched<[]> {
  3318. bits<2> Rs;
  3319. bits<3> imm;
  3320. let Inst{31-15} = 0b11000000000001100;
  3321. let Inst{14-13} = Rs;
  3322. let Inst{12-11} = 0b01;
  3323. let Inst{10-8} = op{3-1};
  3324. let Inst{7-5} = imm;
  3325. let Inst{1} = op{0};
  3326. let Inst{0} = 0b0;
  3327. let Constraints = !if(op{2}, "$ZAn = $_ZAn", "");
  3328. }
  3329. // move array to vector, two registers.
  3330. multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
  3331. def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
  3332. mnemonic, "vgx2"> {
  3333. bits<4> Zd;
  3334. let Inst{4-1} = Zd;
  3335. }
  3336. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3337. ZZ_b_mul_r, MatrixOp8,
  3338. MatrixIndexGPR32Op8_11,
  3339. sme_elm_idx0_7, mnemonic>;
  3340. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3341. ZZ_h_mul_r, MatrixOp16,
  3342. MatrixIndexGPR32Op8_11,
  3343. sme_elm_idx0_7, mnemonic>;
  3344. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3345. ZZ_s_mul_r, MatrixOp32,
  3346. MatrixIndexGPR32Op8_11,
  3347. sme_elm_idx0_7, mnemonic>;
  3348. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3349. ZZ_d_mul_r, MatrixOp64,
  3350. MatrixIndexGPR32Op8_11,
  3351. sme_elm_idx0_7, mnemonic>;
  3352. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3353. ZZ_b_mul_r, MatrixOp8,
  3354. MatrixIndexGPR32Op8_11,
  3355. sme_elm_idx0_7, mnemonic, "vgx2">;
  3356. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3357. ZZ_h_mul_r, MatrixOp16,
  3358. MatrixIndexGPR32Op8_11,
  3359. sme_elm_idx0_7, mnemonic, "vgx2">;
  3360. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3361. ZZ_s_mul_r, MatrixOp32,
  3362. MatrixIndexGPR32Op8_11,
  3363. sme_elm_idx0_7, mnemonic, "vgx2">;
  3364. if !eq(mnemonic, "mova") then {
  3365. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3366. ZZ_b_mul_r, MatrixOp8,
  3367. MatrixIndexGPR32Op8_11,
  3368. sme_elm_idx0_7, "mov">;
  3369. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3370. ZZ_h_mul_r, MatrixOp16,
  3371. MatrixIndexGPR32Op8_11,
  3372. sme_elm_idx0_7, "mov">;
  3373. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3374. ZZ_s_mul_r, MatrixOp32,
  3375. MatrixIndexGPR32Op8_11,
  3376. sme_elm_idx0_7, "mov">;
  3377. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3378. ZZ_d_mul_r, MatrixOp64,
  3379. MatrixIndexGPR32Op8_11,
  3380. sme_elm_idx0_7, "mov">;
  3381. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3382. ZZ_b_mul_r, MatrixOp8,
  3383. MatrixIndexGPR32Op8_11,
  3384. sme_elm_idx0_7, "mov", "vgx2">;
  3385. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3386. ZZ_h_mul_r, MatrixOp16,
  3387. MatrixIndexGPR32Op8_11,
  3388. sme_elm_idx0_7, "mov", "vgx2">;
  3389. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3390. ZZ_s_mul_r, MatrixOp32,
  3391. MatrixIndexGPR32Op8_11,
  3392. sme_elm_idx0_7, "mov", "vgx2">;
  3393. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
  3394. ZZ_d_mul_r, MatrixOp64,
  3395. MatrixIndexGPR32Op8_11,
  3396. sme_elm_idx0_7, "mov", "vgx2">;
  3397. }
  3398. }
  3399. // move array to vector, four registers
  3400. multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
  3401. def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
  3402. mnemonic, "vgx4"> {
  3403. bits<3> Zd;
  3404. let Inst{4-2} = Zd;
  3405. }
  3406. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3407. ZZZZ_b_mul_r, MatrixOp8,
  3408. MatrixIndexGPR32Op8_11,
  3409. sme_elm_idx0_7, mnemonic>;
  3410. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3411. ZZZZ_h_mul_r, MatrixOp16,
  3412. MatrixIndexGPR32Op8_11,
  3413. sme_elm_idx0_7, mnemonic>;
  3414. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3415. ZZZZ_s_mul_r, MatrixOp32,
  3416. MatrixIndexGPR32Op8_11,
  3417. sme_elm_idx0_7, mnemonic>;
  3418. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3419. ZZZZ_d_mul_r, MatrixOp64,
  3420. MatrixIndexGPR32Op8_11,
  3421. sme_elm_idx0_7, mnemonic>;
  3422. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3423. ZZZZ_b_mul_r, MatrixOp8,
  3424. MatrixIndexGPR32Op8_11,
  3425. sme_elm_idx0_7, mnemonic, "vgx4">;
  3426. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3427. ZZZZ_h_mul_r, MatrixOp16,
  3428. MatrixIndexGPR32Op8_11,
  3429. sme_elm_idx0_7, mnemonic, "vgx4">;
  3430. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3431. ZZZZ_s_mul_r, MatrixOp32,
  3432. MatrixIndexGPR32Op8_11,
  3433. sme_elm_idx0_7, mnemonic, "vgx4">;
  3434. if !eq(mnemonic, "mova") then {
  3435. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3436. ZZZZ_b_mul_r, MatrixOp8,
  3437. MatrixIndexGPR32Op8_11,
  3438. sme_elm_idx0_7, "mov">;
  3439. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3440. ZZZZ_h_mul_r, MatrixOp16,
  3441. MatrixIndexGPR32Op8_11,
  3442. sme_elm_idx0_7, "mov">;
  3443. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3444. ZZZZ_s_mul_r, MatrixOp32,
  3445. MatrixIndexGPR32Op8_11,
  3446. sme_elm_idx0_7, "mov">;
  3447. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3448. ZZZZ_d_mul_r, MatrixOp64,
  3449. MatrixIndexGPR32Op8_11,
  3450. sme_elm_idx0_7, "mov">;
  3451. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3452. ZZZZ_b_mul_r, MatrixOp8,
  3453. MatrixIndexGPR32Op8_11,
  3454. sme_elm_idx0_7, "mov", "vgx4">;
  3455. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3456. ZZZZ_h_mul_r, MatrixOp16,
  3457. MatrixIndexGPR32Op8_11,
  3458. sme_elm_idx0_7, "mov", "vgx4">;
  3459. defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
  3460. ZZZZ_s_mul_r, MatrixOp32,
  3461. MatrixIndexGPR32Op8_11,
  3462. sme_elm_idx0_7, "mov", "vgx4">;
  3463. defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
  3464. ZZZZ_d_mul_r, MatrixOp64,
  3465. MatrixIndexGPR32Op8_11,
  3466. sme_elm_idx0_7, "mov", "vgx4">;
  3467. }
  3468. }
  3469. //===----------------------------------------------------------------------===//
  3470. // SME2 multi-vec saturating shift right narrow
  3471. class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
  3472. : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
  3473. mnemonic, "\t$Zd, $Zn, $imm4",
  3474. "", []>, Sched<[]> {
  3475. bits<4> imm4;
  3476. bits<4> Zn;
  3477. bits<5> Zd;
  3478. let Inst{31-21} = 0b11000001111;
  3479. let Inst{20} = op;
  3480. let Inst{19-16} = imm4;
  3481. let Inst{15-10} = 0b110101;
  3482. let Inst{9-6} = Zn;
  3483. let Inst{5} = u;
  3484. let Inst{4-0} = Zd;
  3485. }
  3486. multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> {
  3487. def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
  3488. }
  3489. class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
  3490. RegisterOperand vector_ty, Operand imm_ty,
  3491. string mnemonic>
  3492. : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm),
  3493. mnemonic, "\t$Zd, $Zn, $imm",
  3494. "", []>, Sched<[]> {
  3495. bits<3> Zn;
  3496. bits<5> Zd;
  3497. let Inst{31-24} = 0b11000001;
  3498. let Inst{23-22} = sz;
  3499. let Inst{21} = 0b1;
  3500. // Inst{20-16} = imm5;
  3501. let Inst{15-11} = 0b11011;
  3502. let Inst{10} = op{2};
  3503. let Inst{9-7} = Zn;
  3504. let Inst{6-5} = op{1-0};
  3505. let Inst{4-0} = Zd;
  3506. }
  3507. multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op> {
  3508. def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
  3509. mnemonic>{
  3510. bits<5> imm;
  3511. let Inst{20-16} = imm;
  3512. }
  3513. def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
  3514. mnemonic> {
  3515. bits<6> imm;
  3516. let Inst{22} = imm{5};
  3517. let Inst{20-16} = imm{4-0};
  3518. }
  3519. }
  3520. //===----------------------------------------------------------------------===//
  3521. // SME2 Multi-vector - SVE Select
  3522. class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty,
  3523. string mnemonic>
  3524. : I<(outs vector_ty:$Zd),
  3525. (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm),
  3526. mnemonic, "\t$Zd, $PNg, $Zn, $Zm",
  3527. "", []>, Sched<[]> {
  3528. bits<3> PNg;
  3529. let Inst{31-24} = 0b11000001;
  3530. let Inst{23-22} = sz;
  3531. let Inst{21} = 0b1;
  3532. let Inst{17-16} = op{3-2};
  3533. let Inst{15-13} = 0b100;
  3534. let Inst{12-10} = PNg;
  3535. let Inst{6} = op{1};
  3536. let Inst{5} = 0b0;
  3537. let Inst{1} = op{0};
  3538. let Inst{0} = 0b0;
  3539. }
  3540. class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty,
  3541. string mnemonic>
  3542. : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> {
  3543. bits<4> Zm;
  3544. bits<4> Zn;
  3545. bits<4> Zd;
  3546. let Inst{20-17} = Zm;
  3547. let Inst{9-6} = Zn;
  3548. let Inst{4-1} = Zd;
  3549. }
  3550. multiclass sme2_sel_vector_vg2<string mnemonic>{
  3551. def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>;
  3552. def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>;
  3553. def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>;
  3554. def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>;
  3555. }
  3556. class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty,
  3557. string mnemonic>
  3558. : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> {
  3559. bits<3> Zm;
  3560. bits<3> Zn;
  3561. bits<3> Zd;
  3562. let Inst{20-18} = Zm;
  3563. let Inst{9-7} = Zn;
  3564. let Inst{4-2} = Zd;
  3565. }
  3566. multiclass sme2_sel_vector_vg4<string mnemonic> {
  3567. def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>;
  3568. def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>;
  3569. def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>;
  3570. def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>;
  3571. }
  3572. //===----------------------------------------------------------------------===//
  3573. // Non contiguous Load and Store
  3574. class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
  3575. RegisterOperand multi_vector_ty,
  3576. RegisterOperand gpr_ty,
  3577. string mnemonic>
  3578. : I<(outs multi_vector_ty:$Zt),
  3579. (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
  3580. mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
  3581. "", []>, Sched<[]> {
  3582. bits<5> Rm;
  3583. bits<3> PNg;
  3584. bits<5> Rn;
  3585. bits<4> Zt;
  3586. let Inst{31-21} = 0b10100001000;
  3587. let Inst{20-16} = Rm;
  3588. let Inst{15} = 0b0;
  3589. let Inst{14-13} = msz;
  3590. let Inst{12-10} = PNg;
  3591. let Inst{9-5} = Rn;
  3592. let Inst{4} = Zt{3};
  3593. let Inst{3} = n;
  3594. let Inst{2-0} = Zt{2-0};
  3595. let mayLoad = 1;
  3596. }
  3597. class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
  3598. RegisterOperand multi_vector_ty,
  3599. RegisterOperand gpr_ty,
  3600. string mnemonic>
  3601. : I<(outs multi_vector_ty:$Zt),
  3602. (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
  3603. mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
  3604. "", []>, Sched<[]> {
  3605. bits<5> Rm;
  3606. bits<3> PNg;
  3607. bits<5> Rn;
  3608. bits<3> Zt;
  3609. let Inst{31-21} = 0b10100001000;
  3610. let Inst{20-16} = Rm;
  3611. let Inst{15} = 0b1;
  3612. let Inst{14-13} = msz;
  3613. let Inst{12-10} = PNg;
  3614. let Inst{9-5} = Rn;
  3615. let Inst{4} = Zt{2};
  3616. let Inst{3} = n;
  3617. let Inst{2} = 0b0;
  3618. let Inst{1-0} = Zt{1-0};
  3619. let mayLoad = 1;
  3620. }
  3621. class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
  3622. RegisterOperand multi_vector_ty,
  3623. Operand index_ty,
  3624. string mnemonic>
  3625. : I<(outs multi_vector_ty:$Zt),
  3626. (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
  3627. mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]",
  3628. "", []>, Sched<[]> {
  3629. bits<4> imm4;
  3630. bits<3> PNg;
  3631. bits<5> Rn;
  3632. let Inst{31-20} = 0b101000010100;
  3633. let Inst{19-16} = imm4;
  3634. let Inst{15} = op{1};
  3635. let Inst{14-13} = msz;
  3636. let Inst{12-10} = PNg;
  3637. let Inst{9-5} = Rn;
  3638. let Inst{3} = n;
  3639. let Inst{2} = op{0};
  3640. let mayLoad = 1;
  3641. }
  3642. multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
  3643. RegisterOperand multi_vector_ty,
  3644. Operand index_ty,
  3645. string mnemonic>{
  3646. def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
  3647. multi_vector_ty,
  3648. index_ty, mnemonic> {
  3649. bits<4> Zt;
  3650. let Inst{4} = Zt{3};
  3651. let Inst{2-0} = Zt{2-0};
  3652. }
  3653. def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
  3654. (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
  3655. }
  3656. multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
  3657. RegisterOperand multi_vector_ty,
  3658. Operand index_ty,
  3659. string mnemonic> {
  3660. def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
  3661. multi_vector_ty,
  3662. index_ty, mnemonic> {
  3663. bits<3> Zt;
  3664. let Inst{4} = Zt{2};
  3665. let Inst{1-0} = Zt{1-0};
  3666. }
  3667. def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
  3668. (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
  3669. }
  3670. //===----------------------------------------------------------------------===//
  3671. // SME2 Non-Contiguous Store
  3672. class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
  3673. RegisterOperand multi_vector_ty,
  3674. RegisterOperand gpr_ty,
  3675. string mnemonic>
  3676. : I<(outs ),
  3677. (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
  3678. mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
  3679. "", []>, Sched<[]> {
  3680. bits<5> Rm;
  3681. bits<3> PNg;
  3682. bits<5> Rn;
  3683. bits<4> Zt;
  3684. let Inst{31-21} = 0b10100001001;
  3685. let Inst{20-16} = Rm;
  3686. let Inst{15} = 0b0;
  3687. let Inst{14-13} = msz;
  3688. let Inst{12-10} = PNg;
  3689. let Inst{9-5} = Rn;
  3690. let Inst{4} = Zt{3};
  3691. let Inst{3} = n;
  3692. let Inst{2-0} = Zt{2-0};
  3693. let mayStore = 1;
  3694. }
  3695. class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
  3696. RegisterOperand multi_vector_ty,
  3697. RegisterOperand gpr_ty,
  3698. string mnemonic>
  3699. : I<(outs ),
  3700. (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
  3701. mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
  3702. "", []>, Sched<[]> {
  3703. bits<5> Rm;
  3704. bits<3> PNg;
  3705. bits<5> Rn;
  3706. bits<3> Zt;
  3707. let Inst{31-21} = 0b10100001001;
  3708. let Inst{20-16} = Rm;
  3709. let Inst{15} = 0b1;
  3710. let Inst{14-13} = msz;
  3711. let Inst{12-10} = PNg;
  3712. let Inst{9-5} = Rn;
  3713. let Inst{4} = Zt{2};
  3714. let Inst{3} = n;
  3715. let Inst{2} = 0b0;
  3716. let Inst{1-0} = Zt{1-0};
  3717. let mayStore = 1;
  3718. }
  3719. class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
  3720. RegisterOperand multi_vector_ty,
  3721. Operand index_ty,
  3722. string mnemonic>
  3723. : I<(outs ),
  3724. (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
  3725. mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]",
  3726. "", []>, Sched<[]> {
  3727. bits<4> imm4;
  3728. bits<3> PNg;
  3729. bits<5> Rn;
  3730. let Inst{31-20} = 0b101000010110;
  3731. let Inst{19-16} = imm4;
  3732. let Inst{15} = op{1};
  3733. let Inst{14-13} = msz;
  3734. let Inst{12-10} = PNg;
  3735. let Inst{9-5} = Rn;
  3736. let Inst{3} = n;
  3737. let Inst{2} = op{0};
  3738. let mayStore = 1;
  3739. }
  3740. multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
  3741. RegisterOperand multi_vector_ty,
  3742. Operand index_ty,
  3743. string mnemonic> {
  3744. def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
  3745. multi_vector_ty,
  3746. index_ty, mnemonic> {
  3747. bits<4> Zt;
  3748. let Inst{4} = Zt{3};
  3749. let Inst{2-0} = Zt{2-0};
  3750. }
  3751. def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
  3752. (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
  3753. }
  3754. multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
  3755. RegisterOperand multi_vector_ty,
  3756. Operand index_ty,
  3757. string mnemonic> {
  3758. def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
  3759. multi_vector_ty,
  3760. index_ty, mnemonic> {
  3761. bits<3> Zt;
  3762. let Inst{4} = Zt{2};
  3763. let Inst{1-0} = Zt{1-0};
  3764. }
  3765. def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
  3766. (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
  3767. }
  3768. //===----------------------------------------------------------------------===//
  3769. // SME2.1
  3770. //===----------------------------------------------------------------------===//
  3771. // SME zeroing move array to vector
  3772. class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty,
  3773. RegisterOperand tile_ty, Operand index_ty,
  3774. string mnemonic>
  3775. : I<(outs vector_ty:$Zd, tile_ty:$ZAn),
  3776. (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
  3777. mnemonic, "\t$Zd, $ZAn[$Rs, $imm]",
  3778. "", []>, Sched<[]> {
  3779. bits<2> Rs;
  3780. bits<5> Zd;
  3781. let Inst{31-24} = 0b11000000;
  3782. let Inst{23-22} = sz;
  3783. let Inst{21-17} = 0b00001;
  3784. let Inst{16} = q;
  3785. let Inst{15} = v;
  3786. let Inst{14-13} = Rs;
  3787. let Inst{12-9} = 0b0001;
  3788. let Inst{4-0} = Zd;
  3789. let Constraints = "$ZAn = $_ZAn";
  3790. }
  3791. multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
  3792. def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
  3793. !if(v, TileVectorOpV8, TileVectorOpH8),
  3794. sme_elm_idx0_15, mnemonic> {
  3795. bits<4> imm;
  3796. let Inst{8-5} = imm;
  3797. }
  3798. def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
  3799. !if(v, TileVectorOpV16, TileVectorOpH16),
  3800. sme_elm_idx0_7, mnemonic> {
  3801. bits<1> ZAn;
  3802. bits<3> imm;
  3803. let Inst{8} = ZAn;
  3804. let Inst{7-5} = imm;
  3805. }
  3806. def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
  3807. !if(v, TileVectorOpV32, TileVectorOpH32),
  3808. sme_elm_idx0_3, mnemonic> {
  3809. bits<2> ZAn;
  3810. bits<2> imm;
  3811. let Inst{8-7} = ZAn;
  3812. let Inst{6-5} = imm;
  3813. }
  3814. def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
  3815. !if(v, TileVectorOpV64, TileVectorOpH64),
  3816. sme_elm_idx0_1, mnemonic> {
  3817. bits<3> ZAn;
  3818. bits<1> imm;
  3819. let Inst{8-6} = ZAn;
  3820. let Inst{5} = imm;
  3821. }
  3822. def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
  3823. !if(v, TileVectorOpV128, TileVectorOpH128),
  3824. sme_elm_idx0_0, mnemonic> {
  3825. bits<4> ZAn;
  3826. let Inst{8-5} = ZAn;
  3827. }
  3828. }
  3829. multiclass sme2p1_movaz_tile_to_vec<string mnemonic>{
  3830. defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
  3831. defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
  3832. }
  3833. //===----------------------------------------------------------------------===//
  3834. // SME2.1 multiple vectors zero array
  3835. class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
  3836. string vg_acronym="">
  3837. : I<(outs MatrixOp64:$ZAd),
  3838. (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm),
  3839. mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
  3840. "", []>, Sched<[]> {
  3841. bits <2> Rv;
  3842. let Inst{31-18} = 0b11000000000011;
  3843. let Inst{17-15} = opc{5-3};
  3844. let Inst{14-13} = Rv;
  3845. let Inst{12-3} = 0b0000000000;
  3846. let Inst{2-0} = opc{2-0};
  3847. let Constraints = "$ZAd = $_ZAd";
  3848. }
  3849. multiclass sme2p1_zero_matrix<string mnemonic> {
  3850. def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> {
  3851. bits<3> imm;
  3852. let Inst{2-0} = imm;
  3853. }
  3854. def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> {
  3855. bits<3> imm;
  3856. let Inst{2-0} = imm;
  3857. }
  3858. def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> {
  3859. bits<2> imm;
  3860. let Inst{1-0} = imm;
  3861. }
  3862. def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> {
  3863. bits<2> imm;
  3864. let Inst{1-0} = imm;
  3865. }
  3866. def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> {
  3867. bits<3> imm;
  3868. let Inst{2-0} = imm;
  3869. }
  3870. def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> {
  3871. bits<2> imm;
  3872. let Inst{1-0} = imm;
  3873. }
  3874. def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> {
  3875. bits<1> imm;
  3876. let Inst{0} = imm;
  3877. }
  3878. def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> {
  3879. bits<1> imm;
  3880. let Inst{0} = imm;
  3881. }
  3882. }
  3883. //===----------------------------------------------------------------------===//
  3884. // SME2.1 lookup table expand two non-contiguous registers
  3885. class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty,
  3886. AsmVectorIndexOpnd index_ty,
  3887. string mnemonic>
  3888. : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
  3889. mnemonic, "\t$Zd, $ZTt, $Zn$i",
  3890. "", []>, Sched<[]> {
  3891. bits<5> Zn;
  3892. bits<4> Zd;
  3893. let Inst{31-19} = 0b1100000010011;
  3894. let Inst{18-15} = op;
  3895. let Inst{14} = 0b1;
  3896. let Inst{13-12} = sz;
  3897. let Inst{11-10} = 0b00;
  3898. let Inst{9-5} = Zn;
  3899. let Inst{4} = Zd{3};
  3900. let Inst{3} = 0b0;
  3901. let Inst{2-0} = Zd{2-0};
  3902. }
  3903. class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
  3904. AsmVectorIndexOpnd index_ty,
  3905. string mnemonic>
  3906. : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> {
  3907. bits<3> i;
  3908. let Inst{17-15} = i;
  3909. }
  3910. multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> {
  3911. def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH,
  3912. mnemonic>;
  3913. def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH,
  3914. mnemonic>;
  3915. }
  3916. class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
  3917. AsmVectorIndexOpnd index_ty,
  3918. string mnemonic>
  3919. : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> {
  3920. bits<2> i;
  3921. let Inst{16-15} = i;
  3922. }
  3923. multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> {
  3924. def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS,
  3925. mnemonic>;
  3926. def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS,
  3927. mnemonic>;
  3928. }
  3929. // SME2.1 lookup table expand four non-contiguous registers
  3930. class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty,
  3931. AsmVectorIndexOpnd index_ty,
  3932. string mnemonic>
  3933. : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
  3934. mnemonic, "\t$Zd, $ZTt, $Zn$i",
  3935. "", []>, Sched<[]> {
  3936. bits<5> Zn;
  3937. bits<3> Zd;
  3938. let Inst{31-19} = 0b1100000010011;
  3939. let Inst{18-16} = op;
  3940. let Inst{15-14} = 0b10;
  3941. let Inst{13-12} = sz;
  3942. let Inst{11-10} = 0b00;
  3943. let Inst{9-5} = Zn;
  3944. let Inst{4} = Zd{2};
  3945. let Inst{3-2} = 0b00;
  3946. let Inst{1-0} = Zd{1-0};
  3947. }
  3948. class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
  3949. AsmVectorIndexOpnd index_ty,
  3950. string mnemonic>
  3951. : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> {
  3952. bits<2> i;
  3953. let Inst{17-16} = i;
  3954. }
  3955. multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> {
  3956. def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS,
  3957. mnemonic>;
  3958. def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS,
  3959. mnemonic>;
  3960. }
  3961. class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
  3962. AsmVectorIndexOpnd index_ty,
  3963. string mnemonic>
  3964. : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> {
  3965. bit i;
  3966. let Inst{16} = i;
  3967. }
  3968. multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
  3969. def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
  3970. }