arm_cde.td 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. //===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the set of ACLE-specified source-level intrinsic
  10. // functions wrapping the CDE instructions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. include "arm_mve_defs.td"
  14. // f64 is not defined in arm_mve_defs.td because MVE instructions only work with
  15. // f16 and f32
  16. def f64: PrimitiveType<"f", 64>;
  17. // Float<t> expects t to be a scalar type, and expands to the floating-point
  18. // type of the same width.
  19. class Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
  20. def FScalar: Float<Scalar>;
  21. // ACLE CDE intrinsic
  22. class CDEIntrinsic<Type ret, dag args, dag codegen>
  23. : Intrinsic<ret, args, codegen> {
  24. let builtinExtension = "cde";
  25. }
  26. // Immediate (in range [0, 2^numBits - 1])
  27. class IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>;
  28. // numBits-wide immediate of type u32
  29. class CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>;
  30. // LLVM IR CDE intrinsic
  31. class CDEIRInt<string name, list<Type> params = [], bit appendKind = 0>
  32. : IRIntBase<"arm_cde_" # name, params, appendKind>;
  33. // Class for generating function macros in arm_cde.h:
  34. // "#define <name>(<params>) <definition>"
  35. class FunctionMacro<list<string> params_, string definition_> {
  36. list<string> params = params_;
  37. string definition = definition_;
  38. }
  39. // Coprocessor immediate
  40. def imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>;
  41. // Immediate integer parameters
  42. def imm_3b : CDEImmediateBits<3>;
  43. def imm_4b : CDEImmediateBits<4>;
  44. def imm_6b : CDEImmediateBits<6>;
  45. def imm_7b : CDEImmediateBits<7>;
  46. def imm_9b : CDEImmediateBits<9>;
  47. def imm_11b : CDEImmediateBits<11>;
  48. def imm_12b : CDEImmediateBits<12>;
  49. def imm_13b : CDEImmediateBits<13>;
  50. // CX* instructions operating on GPRs
  51. multiclass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
  52. defvar cp = (args imm_coproc:$cp);
  53. let pnt = PNT_None, params = T.None in {
  54. def "" : CDEIntrinsic<u32, !con(cp, argsReg, argsImm),
  55. !con((CDEIRInt<NAME> $cp), cgArgs, (? $imm))>;
  56. def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg, argsImm),
  57. !con((CDEIRInt<NAME # "a"> $cp, $acc),
  58. cgArgs, (? $imm))>;
  59. def d :
  60. CDEIntrinsic<u64, !con(cp, argsReg, argsImm),
  61. (seq !con((CDEIRInt<NAME # "d"> $cp), cgArgs, (? $imm)):$pair,
  62. (or (shl (u64 (xval $pair, 1)), (u64 32)),
  63. (u64 (xval $pair, 0))))>;
  64. def da :
  65. CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg, argsImm),
  66. (seq (u32 (lshr $acc, (u64 32))):$acc_hi,
  67. (u32 $acc):$acc_lo,
  68. !con((CDEIRInt<NAME # "da"> $cp, $acc_lo, $acc_hi), cgArgs,
  69. (? $imm)):$pair,
  70. (or (shl (u64 (xval $pair, 1)), (u64 32)),
  71. (u64 (xval $pair, 0))))>;
  72. }
  73. }
  74. defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
  75. defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
  76. defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
  77. // VCX* instructions operating on VFP registers
  78. multiclass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
  79. defvar cp = (args imm_coproc:$cp);
  80. let pnt = PNT_None, params = [u32] in {
  81. def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
  82. (bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
  83. Scalar)>;
  84. def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
  85. (bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
  86. (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
  87. }
  88. let pnt = PNT_None, params = [u64] in {
  89. def d : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
  90. (bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
  91. Scalar)>;
  92. def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
  93. (bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
  94. (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
  95. }
  96. }
  97. defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
  98. defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
  99. (? (bitcast $n, FScalar))>;
  100. defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
  101. (args u32:$n, u32:$m), (args u64:$n, u64:$m),
  102. (? (bitcast $n, FScalar), (bitcast $m, FScalar))>;
  103. // VCX* instructions operating on Q vector registers
  104. def v16u8 : VecOf<u8>;
  105. let pnt = PNT_None, params = [u8] in
  106. def vcx1q : CDEIntrinsic<Vector, (args imm_coproc:$cp, imm_12b:$imm),
  107. (CDEIRInt<"vcx1q"> $cp, $imm)>;
  108. let pnt = PNT_Type, params = T.All, polymorphicOnly = 1 in {
  109. def vcx1qa :
  110. CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$acc, imm_12b:$imm),
  111. (bitcast (CDEIRInt<"vcx1qa"> $cp, (bitcast $acc, v16u8), $imm),
  112. Vector)>;
  113. def vcx2q :
  114. CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
  115. (bitcast (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm),
  116. Vector)>;
  117. def vcx2q_u8 :
  118. CDEIntrinsic<v16u8, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
  119. (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm)>;
  120. def vcx2qa_impl :
  121. CDEIntrinsic<Vector,
  122. (args imm_coproc:$cp, Vector:$acc, v16u8:$n, imm_7b:$imm),
  123. (bitcast (CDEIRInt<"vcx2qa"> $cp, (bitcast $acc, v16u8), $n, $imm),
  124. Vector)>;
  125. def vcx3q_impl :
  126. CDEIntrinsic<Vector,
  127. (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
  128. (bitcast (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm),
  129. Vector)>;
  130. def vcx3q_u8_impl :
  131. CDEIntrinsic<v16u8,
  132. (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
  133. (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm)>;
  134. def vcx3qa_impl :
  135. CDEIntrinsic<Vector,
  136. (args imm_coproc:$cp, Vector:$acc, v16u8:$n, v16u8:$m, imm_4b:$imm),
  137. (bitcast (CDEIRInt<"vcx3qa"> $cp, (bitcast $acc, v16u8), $n, $m,
  138. $imm),
  139. Vector)>;
  140. }
  141. // Reinterpret intrinsics required to implement __arm_vcx*q with 2 or 3
  142. // polymorphic paramters.
  143. let params = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32],
  144. headerOnly = 1, polymorphicOnly = 1 in
  145. def vreinterpretq_u8 :
  146. Intrinsic<v16u8, (args Vector:$x), (vreinterpret $x, v16u8)>;
  147. // We need vreinterpretq_u8_u8 to avoid doing smart tricks in the macros
  148. let params = [u8], polymorphicOnly = 1 in
  149. def vreinterpretq_u8_cde :
  150. CDEIntrinsic<v16u8, (args Vector:$x), (id $x)>,
  151. NameOverride<"vreinterpretq_u8">;
  152. def vcx2qa : FunctionMacro<
  153. ["cp", "acc", "n", "imm"],
  154. "__arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))">;
  155. def vcx3q : FunctionMacro<
  156. ["cp", "n", "m", "imm"],
  157. "__arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
  158. def vcx3q_u8 : FunctionMacro<
  159. ["cp", "n", "m", "imm"],
  160. "__arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
  161. def vcx3qa : FunctionMacro<
  162. ["cp", "acc", "n", "m", "imm"],
  163. "__arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), "
  164. "__arm_vreinterpretq_u8(m), (imm))">;
  165. class CDEIntrinsicMasked<string irname, dag argsReg, dag imm, dag cgArgs>
  166. : CDEIntrinsic<Vector,
  167. !con((args imm_coproc:$cp, Vector:$inactive_or_acc),
  168. argsReg, imm, (args Predicate:$pred)),
  169. !con((CDEIRInt<irname # "_predicated", [Vector,Predicate]>
  170. $cp, $inactive_or_acc), cgArgs, (? $imm, $pred))> {
  171. let params = T.All;
  172. let polymorphicOnly = 1;
  173. }
  174. def vcx1q_m : CDEIntrinsicMasked<"vcx1q", (args), (args imm_12b:$imm), (?)>;
  175. def vcx1qa_m : CDEIntrinsicMasked<"vcx1qa", (args), (args imm_12b:$imm), (?)>;
  176. multiclass VCXPredicated<dag argsReg, dag imm, dag cgArgs,
  177. list<string> macroArgs, string macro> {
  178. def _m_impl : CDEIntrinsicMasked<NAME, argsReg, imm, cgArgs>;
  179. def a_m_impl : CDEIntrinsicMasked<NAME#"a", argsReg, imm, cgArgs>;
  180. def _m: FunctionMacro<
  181. !listconcat(["cp", "inactive"], macroArgs, ["imm", "pred"]),
  182. "__arm_"#NAME#"_m_impl((cp), (inactive), "#macro#" (imm), (pred))">;
  183. def a_m: FunctionMacro<
  184. !listconcat(["cp", "acc"], macroArgs, ["imm", "pred"]),
  185. "__arm_"#NAME#"a_m_impl((cp), (acc), "#macro#" (imm), (pred))">;
  186. }
  187. defm vcx2q :
  188. VCXPredicated<(args v16u8:$n), (args imm_7b:$imm), (? $n), ["n"],
  189. "__arm_vreinterpretq_u8(n),">;
  190. defm vcx3q :
  191. VCXPredicated<(args v16u8:$n, v16u8:$m), (args imm_4b:$imm), (? $n, $m),
  192. ["n", "m"], "__arm_vreinterpretq_u8(n), "
  193. "__arm_vreinterpretq_u8(m),">;
  194. // vreinterpretq intrinsics required by the ACLE CDE specification
  195. foreach desttype = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32] in {
  196. let params = [u8], headerOnly = 1, pnt = PNT_None in
  197. def "vreinterpretq_" # desttype : Intrinsic<
  198. VecOf<desttype>, (args Vector:$x), (vreinterpret $x, VecOf<desttype>)>;
  199. }