IntrinsicsARM.td 68 KB


  1. //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines all of the ARM-specific intrinsics.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // TLS
  14. let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
  15. // A space-consuming intrinsic primarily for testing ARMConstantIslands. The
  16. // first argument is the number of bytes this "instruction" takes up, the second
  17. // and return value are essentially chains, used to force ordering during ISel.
  18. def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
  19. // 16-bit multiplications
  20. def int_arm_smulbb : ClangBuiltin<"__builtin_arm_smulbb">,
  21. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  22. [IntrNoMem]>;
  23. def int_arm_smulbt : ClangBuiltin<"__builtin_arm_smulbt">,
  24. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  25. [IntrNoMem]>;
  26. def int_arm_smultb : ClangBuiltin<"__builtin_arm_smultb">,
  27. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  28. [IntrNoMem]>;
  29. def int_arm_smultt : ClangBuiltin<"__builtin_arm_smultt">,
  30. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  31. [IntrNoMem]>;
  32. def int_arm_smulwb : ClangBuiltin<"__builtin_arm_smulwb">,
  33. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  34. [IntrNoMem]>;
  35. def int_arm_smulwt : ClangBuiltin<"__builtin_arm_smulwt">,
  36. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  37. [IntrNoMem]>;
  38. //===----------------------------------------------------------------------===//
  39. // Saturating Arithmetic
  40. def int_arm_qadd : ClangBuiltin<"__builtin_arm_qadd">,
  41. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  42. [Commutative, IntrNoMem]>;
  43. def int_arm_qsub : ClangBuiltin<"__builtin_arm_qsub">,
  44. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  45. [IntrNoMem]>;
  46. def int_arm_ssat : ClangBuiltin<"__builtin_arm_ssat">,
  47. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  48. [IntrNoMem]>;
  49. def int_arm_usat : ClangBuiltin<"__builtin_arm_usat">,
  50. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  51. [IntrNoMem]>;
  52. // Accumulating multiplications
  53. def int_arm_smlabb : ClangBuiltin<"__builtin_arm_smlabb">,
  54. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  55. llvm_i32_ty],
  56. [IntrNoMem]>;
  57. def int_arm_smlabt : ClangBuiltin<"__builtin_arm_smlabt">,
  58. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  59. llvm_i32_ty],
  60. [IntrNoMem]>;
  61. def int_arm_smlatb : ClangBuiltin<"__builtin_arm_smlatb">,
  62. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  63. llvm_i32_ty],
  64. [IntrNoMem]>;
  65. def int_arm_smlatt : ClangBuiltin<"__builtin_arm_smlatt">,
  66. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  67. llvm_i32_ty],
  68. [IntrNoMem]>;
  69. def int_arm_smlawb : ClangBuiltin<"__builtin_arm_smlawb">,
  70. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  71. llvm_i32_ty],
  72. [IntrNoMem]>;
  73. def int_arm_smlawt : ClangBuiltin<"__builtin_arm_smlawt">,
  74. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  75. llvm_i32_ty],
  76. [IntrNoMem]>;
  77. // Parallel 16-bit saturation
  78. def int_arm_ssat16 : ClangBuiltin<"__builtin_arm_ssat16">,
  79. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  80. [IntrNoMem]>;
  81. def int_arm_usat16 : ClangBuiltin<"__builtin_arm_usat16">,
  82. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  83. [IntrNoMem]>;
  84. // Packing and unpacking
  85. def int_arm_sxtab16 : ClangBuiltin<"__builtin_arm_sxtab16">,
  86. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  87. [IntrNoMem]>;
  88. def int_arm_sxtb16 : ClangBuiltin<"__builtin_arm_sxtb16">,
  89. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  90. def int_arm_uxtab16 : ClangBuiltin<"__builtin_arm_uxtab16">,
  91. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  92. [IntrNoMem]>;
  93. def int_arm_uxtb16 : ClangBuiltin<"__builtin_arm_uxtb16">,
  94. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  95. // Parallel selection, reads the GE flags.
  96. def int_arm_sel : ClangBuiltin<"__builtin_arm_sel">,
  97. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  98. [IntrReadMem]>;
  99. // Parallel 8-bit addition and subtraction
  100. def int_arm_qadd8 : ClangBuiltin<"__builtin_arm_qadd8">,
  101. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  102. [IntrNoMem]>;
  103. def int_arm_qsub8 : ClangBuiltin<"__builtin_arm_qsub8">,
  104. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  105. [IntrNoMem]>;
  106. // Writes to the GE bits.
  107. def int_arm_sadd8 : ClangBuiltin<"__builtin_arm_sadd8">,
  108. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  109. def int_arm_shadd8 : ClangBuiltin<"__builtin_arm_shadd8">,
  110. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  111. [IntrNoMem]>;
  112. def int_arm_shsub8 : ClangBuiltin<"__builtin_arm_shsub8">,
  113. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  114. [IntrNoMem]>;
  115. // Writes to the GE bits.
  116. def int_arm_ssub8 : ClangBuiltin<"__builtin_arm_ssub8">,
  117. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  118. // Writes to the GE bits.
  119. def int_arm_uadd8 : ClangBuiltin<"__builtin_arm_uadd8">,
  120. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  121. def int_arm_uhadd8 : ClangBuiltin<"__builtin_arm_uhadd8">,
  122. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  123. [IntrNoMem]>;
  124. def int_arm_uhsub8 : ClangBuiltin<"__builtin_arm_uhsub8">,
  125. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  126. [IntrNoMem]>;
  127. def int_arm_uqadd8 : ClangBuiltin<"__builtin_arm_uqadd8">,
  128. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  129. [IntrNoMem]>;
  130. def int_arm_uqsub8 : ClangBuiltin<"__builtin_arm_uqsub8">,
  131. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  132. [IntrNoMem]>;
  133. // Writes to the GE bits.
  134. def int_arm_usub8 : ClangBuiltin<"__builtin_arm_usub8">,
  135. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  136. // Sum of 8-bit absolute differences
  137. def int_arm_usad8 : ClangBuiltin<"__builtin_arm_usad8">,
  138. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  139. [IntrNoMem]>;
  140. def int_arm_usada8 : ClangBuiltin<"__builtin_arm_usada8">,
  141. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  142. llvm_i32_ty],
  143. [IntrNoMem]>;
  144. // Parallel 16-bit addition and subtraction
  145. def int_arm_qadd16 : ClangBuiltin<"__builtin_arm_qadd16">,
  146. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  147. [IntrNoMem]>;
  148. def int_arm_qasx : ClangBuiltin<"__builtin_arm_qasx">,
  149. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  150. [IntrNoMem]>;
  151. def int_arm_qsax : ClangBuiltin<"__builtin_arm_qsax">,
  152. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  153. [IntrNoMem]>;
  154. def int_arm_qsub16 : ClangBuiltin<"__builtin_arm_qsub16">,
  155. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  156. [IntrNoMem]>;
  157. // Writes to the GE bits.
  158. def int_arm_sadd16 : ClangBuiltin<"__builtin_arm_sadd16">,
  159. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  160. // Writes to the GE bits.
  161. def int_arm_sasx : ClangBuiltin<"__builtin_arm_sasx">,
  162. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  163. def int_arm_shadd16 : ClangBuiltin<"__builtin_arm_shadd16">,
  164. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  165. [IntrNoMem]>;
  166. def int_arm_shasx : ClangBuiltin<"__builtin_arm_shasx">,
  167. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  168. [IntrNoMem]>;
  169. def int_arm_shsax : ClangBuiltin<"__builtin_arm_shsax">,
  170. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  171. [IntrNoMem]>;
  172. def int_arm_shsub16 : ClangBuiltin<"__builtin_arm_shsub16">,
  173. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  174. [IntrNoMem]>;
  175. // Writes to the GE bits.
  176. def int_arm_ssax : ClangBuiltin<"__builtin_arm_ssax">,
  177. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  178. // Writes to the GE bits.
  179. def int_arm_ssub16 : ClangBuiltin<"__builtin_arm_ssub16">,
  180. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  181. // Writes to the GE bits.
  182. def int_arm_uadd16 : ClangBuiltin<"__builtin_arm_uadd16">,
  183. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  184. // Writes to the GE bits.
  185. def int_arm_uasx : ClangBuiltin<"__builtin_arm_uasx">,
  186. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  187. def int_arm_uhadd16 : ClangBuiltin<"__builtin_arm_uhadd16">,
  188. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  189. [IntrNoMem]>;
  190. def int_arm_uhasx : ClangBuiltin<"__builtin_arm_uhasx">,
  191. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  192. [IntrNoMem]>;
  193. def int_arm_uhsax : ClangBuiltin<"__builtin_arm_uhsax">,
  194. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  195. [IntrNoMem]>;
  196. def int_arm_uhsub16 : ClangBuiltin<"__builtin_arm_uhsub16">,
  197. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  198. [IntrNoMem]>;
  199. def int_arm_uqadd16 : ClangBuiltin<"__builtin_arm_uqadd16">,
  200. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  201. [IntrNoMem]>;
  202. def int_arm_uqasx : ClangBuiltin<"__builtin_arm_uqasx">,
  203. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  204. [IntrNoMem]>;
  205. def int_arm_uqsax : ClangBuiltin<"__builtin_arm_uqsax">,
  206. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  207. [IntrNoMem]>;
  208. def int_arm_uqsub16 : ClangBuiltin<"__builtin_arm_uqsub16">,
  209. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  210. [IntrNoMem]>;
  211. // Writes to the GE bits.
  212. def int_arm_usax : ClangBuiltin<"__builtin_arm_usax">,
  213. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  214. // Writes to the GE bits.
  215. def int_arm_usub16 : ClangBuiltin<"__builtin_arm_usub16">,
  216. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  217. // Parallel 16-bit multiplication
  218. def int_arm_smlad : ClangBuiltin<"__builtin_arm_smlad">,
  219. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  220. llvm_i32_ty],
  221. [IntrNoMem]>;
  222. def int_arm_smladx : ClangBuiltin<"__builtin_arm_smladx">,
  223. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  224. llvm_i32_ty],
  225. [IntrNoMem]>;
  226. def int_arm_smlald : ClangBuiltin<"__builtin_arm_smlald">,
  227. DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
  228. llvm_i64_ty],
  229. [IntrNoMem]>;
  230. def int_arm_smlaldx : ClangBuiltin<"__builtin_arm_smlaldx">,
  231. DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
  232. llvm_i64_ty],
  233. [IntrNoMem]>;
  234. def int_arm_smlsd : ClangBuiltin<"__builtin_arm_smlsd">,
  235. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  236. llvm_i32_ty],
  237. [IntrNoMem]>;
  238. def int_arm_smlsdx : ClangBuiltin<"__builtin_arm_smlsdx">,
  239. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  240. llvm_i32_ty],
  241. [IntrNoMem]>;
  242. def int_arm_smlsld : ClangBuiltin<"__builtin_arm_smlsld">,
  243. DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
  244. llvm_i64_ty],
  245. [IntrNoMem]>;
  246. def int_arm_smlsldx : ClangBuiltin<"__builtin_arm_smlsldx">,
  247. DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
  248. llvm_i64_ty],
  249. [IntrNoMem]>;
  250. def int_arm_smuad : ClangBuiltin<"__builtin_arm_smuad">,
  251. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  252. [IntrNoMem]>;
  253. def int_arm_smuadx : ClangBuiltin<"__builtin_arm_smuadx">,
  254. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  255. [IntrNoMem]>;
  256. def int_arm_smusd : ClangBuiltin<"__builtin_arm_smusd">,
  257. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  258. [IntrNoMem]>;
  259. def int_arm_smusdx : ClangBuiltin<"__builtin_arm_smusdx">,
  260. DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  261. [IntrNoMem]>;
  262. //===----------------------------------------------------------------------===//
  263. // Load, Store and Clear exclusive
  264. // TODO: Add applicable default attributes.
  265. def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  266. def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  267. def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  268. def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  269. def int_arm_clrex : Intrinsic<[]>;
  270. def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  271. llvm_ptr_ty]>;
  272. def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  273. def int_arm_stlexd : Intrinsic<[llvm_i32_ty],
  274. [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
  275. def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  276. //===----------------------------------------------------------------------===//
  277. // Data barrier instructions
  278. // TODO: Add applicable default attributes.
  279. def int_arm_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
  280. Intrinsic<[], [llvm_i32_ty]>;
  281. def int_arm_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
  282. Intrinsic<[], [llvm_i32_ty]>;
  283. def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
  284. Intrinsic<[], [llvm_i32_ty]>;
  285. //===----------------------------------------------------------------------===//
  286. // VFP
  287. def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
  288. DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
  289. def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
  290. DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
  291. def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],
  292. [llvm_anyfloat_ty], [IntrNoMem]>;
  293. def int_arm_vcvtru : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
  294. [IntrNoMem]>;
  295. //===----------------------------------------------------------------------===//
  296. // Coprocessor
  297. // TODO: Add applicable default attributes.
  298. def int_arm_ldc : ClangBuiltin<"__builtin_arm_ldc">,
  299. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  300. def int_arm_ldcl : ClangBuiltin<"__builtin_arm_ldcl">,
  301. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  302. def int_arm_ldc2 : ClangBuiltin<"__builtin_arm_ldc2">,
  303. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  304. def int_arm_ldc2l : ClangBuiltin<"__builtin_arm_ldc2l">,
  305. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  306. def int_arm_stc : ClangBuiltin<"__builtin_arm_stc">,
  307. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  308. def int_arm_stcl : ClangBuiltin<"__builtin_arm_stcl">,
  309. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  310. def int_arm_stc2 : ClangBuiltin<"__builtin_arm_stc2">,
  311. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  312. def int_arm_stc2l : ClangBuiltin<"__builtin_arm_stc2l">,
  313. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  314. // Move to coprocessor
  315. def int_arm_mcr : ClangBuiltin<"__builtin_arm_mcr">,
  316. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  317. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  318. def int_arm_mcr2 : ClangBuiltin<"__builtin_arm_mcr2">,
  319. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  320. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  321. // Move from coprocessor
  322. def int_arm_mrc : ClangBuiltin<"__builtin_arm_mrc">,
  323. MSBuiltin<"_MoveFromCoprocessor">,
  324. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  325. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  326. def int_arm_mrc2 : ClangBuiltin<"__builtin_arm_mrc2">,
  327. MSBuiltin<"_MoveFromCoprocessor2">,
  328. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  329. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  330. // Coprocessor data processing
  331. def int_arm_cdp : ClangBuiltin<"__builtin_arm_cdp">,
  332. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  333. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  334. def int_arm_cdp2 : ClangBuiltin<"__builtin_arm_cdp2">,
  335. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  336. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  337. // Move from two registers to coprocessor
  338. def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  339. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  340. def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  341. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  342. def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  343. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  344. def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  345. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  346. //===----------------------------------------------------------------------===//
  347. // CRC32
  348. def int_arm_crc32b : DefaultAttrsIntrinsic<
  349. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  350. def int_arm_crc32cb : DefaultAttrsIntrinsic<
  351. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  352. def int_arm_crc32h : DefaultAttrsIntrinsic<
  353. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  354. def int_arm_crc32ch : DefaultAttrsIntrinsic<
  355. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  356. def int_arm_crc32w : DefaultAttrsIntrinsic<
  357. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  358. def int_arm_crc32cw : DefaultAttrsIntrinsic<
  359. [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  360. //===----------------------------------------------------------------------===//
  361. // CMSE
  362. // TODO: Add applicable default attributes.
  363. def int_arm_cmse_tt : ClangBuiltin<"__builtin_arm_cmse_TT">,
  364. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  365. def int_arm_cmse_ttt : ClangBuiltin<"__builtin_arm_cmse_TTT">,
  366. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  367. def int_arm_cmse_tta : ClangBuiltin<"__builtin_arm_cmse_TTA">,
  368. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  369. def int_arm_cmse_ttat : ClangBuiltin<"__builtin_arm_cmse_TTAT">,
  370. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  371. //===----------------------------------------------------------------------===//
  372. // HINT
  373. // TODO: Add applicable default attributes.
  374. def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
  375. def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>;
  376. //===----------------------------------------------------------------------===//
  377. // UND (reserved undefined sequence)
  378. // TODO: Add applicable default attributes.
  379. def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>;
  380. //===----------------------------------------------------------------------===//
  381. // Advanced SIMD (NEON)
  382. // The following classes do not correspond directly to GCC builtins.
  383. class Neon_1Arg_Intrinsic
  384. : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  385. class Neon_1Arg_Narrow_Intrinsic
  386. : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>],
  387. [IntrNoMem]>;
  388. class Neon_2Arg_Intrinsic
  389. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  390. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  391. class Neon_2Arg_Narrow_Intrinsic
  392. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  393. [LLVMExtendedType<0>, LLVMExtendedType<0>],
  394. [IntrNoMem]>;
  395. class Neon_2Arg_Long_Intrinsic
  396. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  397. [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
  398. [IntrNoMem]>;
  399. class Neon_3Arg_Intrinsic
  400. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  401. [LLVMMatchType<0>, LLVMMatchType<0>,
  402. LLVMMatchType<0>],
  403. [IntrNoMem]>;
  404. class Neon_3Arg_Long_Intrinsic
  405. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  406. [LLVMMatchType<0>, LLVMTruncatedType<0>,
  407. LLVMTruncatedType<0>],
  408. [IntrNoMem]>;
  409. class Neon_1FloatArg_Intrinsic
  410. : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  411. class Neon_CvtFxToFP_Intrinsic
  412. : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
  413. [IntrNoMem]>;
  414. class Neon_CvtFPToFx_Intrinsic
  415. : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
  416. [IntrNoMem]>;
  417. class Neon_CvtFPtoInt_1Arg_Intrinsic
  418. : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
  419. [IntrNoMem]>;
  420. class Neon_Compare_Intrinsic
  421. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  422. [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>;
  423. // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
  424. // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
  425. // Overall, the classes range from 2 to 6 v8i8 arguments.
  426. class Neon_Tbl2Arg_Intrinsic
  427. : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
  428. [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  429. class Neon_Tbl3Arg_Intrinsic
  430. : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
  431. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
  432. [IntrNoMem]>;
  433. class Neon_Tbl4Arg_Intrinsic
  434. : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
  435. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  436. llvm_v8i8_ty],
  437. [IntrNoMem]>;
  438. class Neon_Tbl5Arg_Intrinsic
  439. : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
  440. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  441. llvm_v8i8_ty, llvm_v8i8_ty],
  442. [IntrNoMem]>;
  443. class Neon_Tbl6Arg_Intrinsic
  444. : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
  445. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  446. llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
  447. [IntrNoMem]>;
  448. // Arithmetic ops
  449. let IntrProperties = [IntrNoMem, Commutative] in {
  450. // Vector Add.
  451. def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
  452. def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
  453. def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
  454. def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
  455. def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
  456. // Vector Multiply.
  457. def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
  458. def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
  459. def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
  460. def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
  461. def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
  462. def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
  463. def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
  464. // Vector Maximum.
  465. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
  466. def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
  467. def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
  468. // Vector Minimum.
  469. def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
  470. def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
  471. def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
  472. // Vector Reciprocal Step.
  473. def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
  474. // Vector Reciprocal Square Root Step.
  475. def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
  476. }
  477. // Vector Subtract.
  478. def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
  479. def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
  480. def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
  481. // Vector Absolute Compare.
  482. def int_arm_neon_vacge : Neon_Compare_Intrinsic;
  483. def int_arm_neon_vacgt : Neon_Compare_Intrinsic;
  484. // Vector Absolute Differences.
  485. def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
  486. def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
  487. // Vector Pairwise Add.
  488. def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
  489. // Vector Pairwise Add Long.
  490. // Note: This is different than the other "long" NEON intrinsics because
  491. // the result vector has half as many elements as the source vector.
  492. // The source and destination vector types must be specified separately.
  493. def int_arm_neon_vpaddls : DefaultAttrsIntrinsic<
  494. [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  495. def int_arm_neon_vpaddlu : DefaultAttrsIntrinsic<
  496. [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  497. // Vector Pairwise Add and Accumulate Long.
  498. // Note: This is similar to vpaddl but the destination vector also appears
  499. // as the first argument.
  500. def int_arm_neon_vpadals : DefaultAttrsIntrinsic<
  501. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>;
  502. def int_arm_neon_vpadalu : DefaultAttrsIntrinsic<
  503. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>;
  504. // Vector Pairwise Maximum and Minimum.
  505. def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
  506. def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
  507. def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
  508. def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
  509. // Vector Shifts:
  510. //
  511. // The various saturating and rounding vector shift operations need to be
  512. // represented by intrinsics in LLVM, and even the basic VSHL variable shift
  513. // operation cannot be safely translated to LLVM's shift operators. VSHL can
  514. // be used for both left and right shifts, or even combinations of the two,
  515. // depending on the signs of the shift amounts. It also has well-defined
  516. // behavior for shift amounts that LLVM leaves undefined. Only basic shifts
  517. // by constants can be represented with LLVM's shift operators.
  518. //
  519. // The shift counts for these intrinsics are always vectors, even for constant
  520. // shifts, where the constant is replicated. For consistency with VSHL (and
  521. // other variable shift instructions), left shifts have positive shift counts
  522. // and right shifts have negative shift counts. This convention is also used
  523. // for constant right shift intrinsics, and to help preserve sanity, the
  524. // intrinsic names use "shift" instead of either "shl" or "shr". Where
  525. // applicable, signed and unsigned versions of the intrinsics are
  526. // distinguished with "s" and "u" suffixes. A few NEON shift instructions,
  527. // such as VQSHLU, take signed operands but produce unsigned results; these
  528. // use a "su" suffix.
  529. // Vector Shift.
  530. def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
  531. def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
  532. // Vector Rounding Shift.
  533. def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
  534. def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
  535. def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;
  536. // Vector Saturating Shift.
  537. def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
  538. def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
  539. def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
  540. def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
  541. def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
  542. def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  543. // Vector Saturating Rounding Shift.
  544. def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
  545. def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
  546. def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
  547. def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
  548. def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  549. // Vector Shift and Insert.
  550. def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
  551. // Vector Absolute Value and Saturating Absolute Value.
  552. def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
  553. def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
  554. // Vector Saturating Negate.
  555. def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;
  556. // Vector Count Leading Sign/Zero Bits.
  557. def int_arm_neon_vcls : Neon_1Arg_Intrinsic;
  558. // Vector Reciprocal Estimate.
  559. def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
  560. // Vector Reciprocal Square Root Estimate.
  561. def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
  562. // Vector Conversions Between Floating-point and Integer
  563. def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
  564. def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
  565. def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  566. def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
  567. def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  568. def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
  569. def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  570. def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
  571. // Vector Conversions Between Floating-point and Fixed-point.
  572. def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
  573. def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
  574. def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
  575. def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
  576. // Vector Conversions Between Half-Precision and Single-Precision.
  577. def int_arm_neon_vcvtfp2hf
  578. : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  579. def int_arm_neon_vcvthf2fp
  580. : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
  581. // Narrowing Saturating Vector Moves.
  582. def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
  583. def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
  584. def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
  585. // Vector Table Lookup.
  586. // The first 1-4 arguments are the table.
  587. def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
  588. def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
  589. def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
  590. def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
  591. // Vector Table Extension.
  592. // Some elements of the destination vector may not be updated, so the original
  593. // value of that vector is passed as the first argument. The next 1-4
  594. // arguments after that are the table.
  595. def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
  596. def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
  597. def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
  598. def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
  599. // Vector and Scalar Rounding.
  600. def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
  601. def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
  602. def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
  603. def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
  604. def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
  605. def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
  606. // De-interleaving vector loads from N-element structures.
  607. // Source operands are the address and alignment.
  608. def int_arm_neon_vld1 : DefaultAttrsIntrinsic<
  609. [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_i32_ty],
  610. [IntrReadMem, IntrArgMemOnly]>;
  611. def int_arm_neon_vld2 : DefaultAttrsIntrinsic<
  612. [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty],
  613. [IntrReadMem, IntrArgMemOnly]>;
  614. def int_arm_neon_vld3 : DefaultAttrsIntrinsic<
  615. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
  616. [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  617. def int_arm_neon_vld4 : DefaultAttrsIntrinsic<
  618. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  619. [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  620. def int_arm_neon_vld1x2 : DefaultAttrsIntrinsic<
  621. [llvm_anyvector_ty, LLVMMatchType<0>],
  622. [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
  623. def int_arm_neon_vld1x3 : DefaultAttrsIntrinsic<
  624. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
  625. [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
  626. def int_arm_neon_vld1x4 : DefaultAttrsIntrinsic<
  627. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  628. [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
  629. // Vector load N-element structure to one lane.
  630. // Source operands are: the address, the N input vectors (since only one
  631. // lane is assigned), the lane number, and the alignment.
  632. def int_arm_neon_vld2lane : DefaultAttrsIntrinsic<
  633. [llvm_anyvector_ty, LLVMMatchType<0>],
  634. [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
  635. llvm_i32_ty],
  636. [IntrReadMem, IntrArgMemOnly]>;
  637. def int_arm_neon_vld3lane : DefaultAttrsIntrinsic<
  638. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
  639. [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  640. llvm_i32_ty, llvm_i32_ty],
  641. [IntrReadMem, IntrArgMemOnly]>;
  642. def int_arm_neon_vld4lane : DefaultAttrsIntrinsic<
  643. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  644. [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  645. LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
  646. [IntrReadMem, IntrArgMemOnly]>;
  647. // Vector load N-element structure to all lanes.
  648. // Source operands are the address and alignment.
  649. def int_arm_neon_vld2dup : DefaultAttrsIntrinsic<
  650. [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty],
  651. [IntrReadMem, IntrArgMemOnly]>;
  652. def int_arm_neon_vld3dup : DefaultAttrsIntrinsic<
  653. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
  654. [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  655. def int_arm_neon_vld4dup : DefaultAttrsIntrinsic<
  656. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  657. [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  658. // Interleaving vector stores from N-element structures.
  659. // Source operands are: the address, the N vectors, and the alignment.
  660. def int_arm_neon_vst1 : DefaultAttrsIntrinsic<
  661. [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>;
  662. def int_arm_neon_vst2 : DefaultAttrsIntrinsic<
  663. [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  664. [IntrArgMemOnly]>;
  665. def int_arm_neon_vst3 : DefaultAttrsIntrinsic<
  666. [],
  667. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  668. llvm_i32_ty],
  669. [IntrArgMemOnly]>;
  670. def int_arm_neon_vst4 : DefaultAttrsIntrinsic<
  671. [],
  672. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  673. LLVMMatchType<1>, llvm_i32_ty],
  674. [IntrArgMemOnly]>;
  675. def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic<
  676. [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>],
  677. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  678. def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic<
  679. [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
  680. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  681. def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic<
  682. [],
  683. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  684. LLVMMatchType<1>],
  685. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  686. // Vector store N-element structure from one lane.
  687. // Source operands are: the address, the N vectors, the lane number, and
  688. // the alignment.
  689. def int_arm_neon_vst2lane : DefaultAttrsIntrinsic<
  690. [],
  691. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty,
  692. llvm_i32_ty],
  693. [IntrArgMemOnly]>;
  694. def int_arm_neon_vst3lane : DefaultAttrsIntrinsic<
  695. [],
  696. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  697. llvm_i32_ty, llvm_i32_ty],
  698. [IntrArgMemOnly]>;
  699. def int_arm_neon_vst4lane : DefaultAttrsIntrinsic<
  700. [],
  701. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  702. LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty],
  703. [IntrArgMemOnly]>;
  704. // Vector bitwise select.
  705. def int_arm_neon_vbsl : DefaultAttrsIntrinsic<
  706. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  707. [IntrNoMem]>;
  708. // Crypto instructions
  709. class AES_1Arg_Intrinsic : DefaultAttrsIntrinsic<
  710. [llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
  711. class AES_2Arg_Intrinsic : DefaultAttrsIntrinsic<
  712. [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
  713. class SHA_1Arg_Intrinsic : DefaultAttrsIntrinsic<
  714. [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  715. class SHA_2Arg_Intrinsic : DefaultAttrsIntrinsic<
  716. [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
  717. class SHA_3Arg_i32_Intrinsic : DefaultAttrsIntrinsic<
  718. [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
  719. class SHA_3Arg_v4i32_Intrinsic : DefaultAttrsIntrinsic<
  720. [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>;
  721. def int_arm_neon_aesd : AES_2Arg_Intrinsic;
  722. def int_arm_neon_aese : AES_2Arg_Intrinsic;
  723. def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
  724. def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
  725. def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
  726. def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
  727. def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
  728. def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
  729. def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
  730. def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
  731. def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
  732. def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
  733. def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
  734. def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
  735. def int_arm_neon_vqrdmlah : Neon_3Arg_Intrinsic;
  736. def int_arm_neon_vqrdmlsh : Neon_3Arg_Intrinsic;
  737. // Armv8.2-A dot product instructions
  738. class Neon_Dot_Intrinsic
  739. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  740. [LLVMMatchType<0>, llvm_anyvector_ty,
  741. LLVMMatchType<1>],
  742. [IntrNoMem]>;
  743. def int_arm_neon_udot : Neon_Dot_Intrinsic;
  744. def int_arm_neon_sdot : Neon_Dot_Intrinsic;
  745. // v8.6-A Matrix Multiply Intrinsics
  746. class Neon_MatMul_Intrinsic
  747. : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  748. [LLVMMatchType<0>, llvm_anyvector_ty,
  749. LLVMMatchType<1>],
  750. [IntrNoMem]>;
  751. def int_arm_neon_ummla : Neon_MatMul_Intrinsic;
  752. def int_arm_neon_smmla : Neon_MatMul_Intrinsic;
  753. def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
  754. def int_arm_neon_usdot : Neon_Dot_Intrinsic;
  755. // v8.6-A Bfloat Intrinsics
  756. def int_arm_neon_vcvtfp2bf
  757. : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  758. def int_arm_neon_vcvtbfp2bf
  759. : DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
  760. def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
  761. def int_arm_neon_bfmmla
  762. : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
  763. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  764. [IntrNoMem]>;
  765. class Neon_BF16FML_Intrinsic
  766. : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
  767. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  768. [IntrNoMem]>;
  769. def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic;
  770. def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic;
  771. def int_arm_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty],
  772. [IntrNoMem]>;
  773. def int_arm_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty],
  774. [IntrNoMem]>;
  775. def int_arm_mve_vctp8 : DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_i32_ty],
  776. [IntrNoMem]>;
  777. def int_arm_mve_vctp16 : DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_i32_ty],
  778. [IntrNoMem]>;
  779. def int_arm_mve_vctp32 : DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_i32_ty],
  780. [IntrNoMem]>;
  781. def int_arm_mve_vctp64 : DefaultAttrsIntrinsic<[llvm_v2i1_ty], [llvm_i32_ty],
  782. [IntrNoMem]>;
  783. // v8.3-A Floating-point complex add
  784. def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
  785. def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
  786. // GNU eabi mcount
  787. // TODO: Add applicable default attributes.
  788. def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>;
  789. def int_arm_mve_pred_i2v : DefaultAttrsIntrinsic<
  790. [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
  791. def int_arm_mve_pred_v2i : DefaultAttrsIntrinsic<
  792. [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  793. def int_arm_mve_vreinterpretq : DefaultAttrsIntrinsic<
  794. [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  795. def int_arm_mve_min_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  796. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  797. llvm_anyvector_ty, LLVMMatchType<0>],
  798. [IntrNoMem]>;
  799. def int_arm_mve_max_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  800. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  801. llvm_anyvector_ty, LLVMMatchType<0>],
  802. [IntrNoMem]>;
  803. def int_arm_mve_abd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  804. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  805. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  806. def int_arm_mve_add_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  807. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  808. [IntrNoMem]>;
  809. def int_arm_mve_and_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  810. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  811. [IntrNoMem]>;
  812. def int_arm_mve_bic_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  813. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  814. [IntrNoMem]>;
  815. def int_arm_mve_eor_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  816. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  817. [IntrNoMem]>;
  818. def int_arm_mve_orn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  819. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  820. [IntrNoMem]>;
  821. def int_arm_mve_orr_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  822. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  823. [IntrNoMem]>;
  824. def int_arm_mve_sub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  825. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  826. [IntrNoMem]>;
  827. def int_arm_mve_mul_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  828. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  829. [IntrNoMem]>;
  830. def int_arm_mve_mulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  831. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  832. llvm_anyvector_ty, LLVMMatchType<0>],
  833. [IntrNoMem]>;
  834. def int_arm_mve_qdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  835. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  836. [IntrNoMem]>;
  837. def int_arm_mve_rmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  838. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  839. llvm_anyvector_ty, LLVMMatchType<0>],
  840. [IntrNoMem]>;
  841. def int_arm_mve_qrdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  842. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  843. [IntrNoMem]>;
  844. def int_arm_mve_mull_int_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  845. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  846. llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
  847. [IntrNoMem]>;
  848. def int_arm_mve_mull_poly_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  849. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  850. LLVMMatchType<0>],
  851. [IntrNoMem]>;
  852. def int_arm_mve_qadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  853. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  854. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  855. def int_arm_mve_hadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  856. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  857. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  858. def int_arm_mve_rhadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  859. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  860. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  861. def int_arm_mve_qsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  862. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  863. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  864. def int_arm_mve_hsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  865. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  866. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  867. def int_arm_mve_vmina_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  868. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  869. [IntrNoMem]>;
  870. def int_arm_mve_vmaxa_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  871. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  872. [IntrNoMem]>;
  873. def int_arm_mve_vminnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  874. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  875. [IntrNoMem]>;
  876. def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  877. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  878. [IntrNoMem]>;
  879. multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
  880. LLVMType pred = llvm_anyvector_ty,
  881. list<IntrinsicProperty> props = [IntrNoMem],
  882. list<SDNodeProperty> sdprops = []> {
  883. def "": DefaultAttrsIntrinsic<rets, params, props, "", sdprops>;
  884. def _predicated: DefaultAttrsIntrinsic<rets, params # [pred], props, "",
  885. sdprops>;
  886. }
  887. multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
  888. LLVMType pred = llvm_anyvector_ty,
  889. list<IntrinsicProperty> props = [IntrNoMem]> {
  890. def "": DefaultAttrsIntrinsic<rets, params, props>;
  891. def _predicated: DefaultAttrsIntrinsic<rets, params # [pred,
  892. !if(!eq(rets[0], llvm_anyvector_ty),
  893. LLVMMatchType<0>, rets[0])], props>;
  894. }
  895. multiclass MVE_minmaxv {
  896. defm v: MVEPredicated<[llvm_i32_ty],
  897. [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  898. defm av: MVEPredicated<[llvm_i32_ty],
  899. [llvm_i32_ty, llvm_anyvector_ty]>;
  900. defm nmv: MVEPredicated<[llvm_anyfloat_ty],
  901. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  902. defm nmav: MVEPredicated<[llvm_anyfloat_ty],
  903. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  904. }
  905. defm int_arm_mve_min: MVE_minmaxv;
  906. defm int_arm_mve_max: MVE_minmaxv;
  907. defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty],
  908. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  909. defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty],
  910. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  911. // Intrinsic with a predicated and a non-predicated case. The predicated case
  912. // has two additional parameters: inactive (the value for inactive lanes, can
  913. // be undef) and predicate.
  914. multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
  915. list<LLVMType> params, LLVMType inactive,
  916. LLVMType predicate,
  917. list<IntrinsicProperty> props = [IntrNoMem]> {
  918. def "": DefaultAttrsIntrinsic<rets, flags # params, props>;
  919. def _predicated: DefaultAttrsIntrinsic<
  920. rets, flags # [inactive] # params # [predicate], props>;
  921. }
  922. defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
  923. [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
  924. defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
  925. [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
  926. defm int_arm_mve_vldr_gather_base: MVEPredicated<
  927. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
  928. llvm_anyvector_ty, [IntrReadMem], [SDNPMemOperand]>;
  929. defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
  930. [llvm_anyvector_ty, llvm_anyvector_ty],
  931. [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
  932. [SDNPMemOperand]>;
  933. defm int_arm_mve_vstr_scatter_base: MVEPredicated<
  934. [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
  935. llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
  936. defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
  937. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
  938. llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
  939. // gather_offset takes three i32 parameters. The first is the size of
  940. // memory element loaded, in bits. The second is a left bit shift to
  941. // apply to each offset in the vector parameter (must be either 0, or
  942. // correspond to the element size of the destination vector type). The
  943. // last is 1 to indicate zero extension (if the load is widening), or
  944. // 0 for sign extension.
  945. //
  946. // scatter_offset has the first two of those parameters, but since it
  947. // narrows rather than widening, it doesn't have the last one.
  948. defm int_arm_mve_vldr_gather_offset: MVEPredicated<
  949. [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
  950. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
  951. [SDNPMemOperand]>;
  952. defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
  953. [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
  954. llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem],
  955. [SDNPMemOperand]>;
  956. def int_arm_mve_shl_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  957. [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  958. [IntrNoMem]>;
  959. def int_arm_mve_shr_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  960. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
  961. llvm_anyvector_ty, LLVMMatchType<0>],
  962. [IntrNoMem]>;
  963. defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
  964. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  965. defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
  966. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  967. defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
  968. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
  969. defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
  970. [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
  971. llvm_i32_ty /*top-half*/]>;
  972. defm int_arm_mve_vsli: MVEPredicated<
  973. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  974. defm int_arm_mve_vsri: MVEPredicated<
  975. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  976. defm int_arm_mve_vshrn: MVEPredicated<
  977. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
  978. llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
  979. llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
  980. llvm_i32_ty /*top-half*/]>;
  981. defm int_arm_mve_vshl_scalar: MVEPredicated<
  982. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
  983. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  984. defm int_arm_mve_vshl_vector: MVEPredicatedM<
  985. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
  986. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  987. // MVE scalar shifts.
  988. class ARM_MVE_qrshift_single<list<LLVMType> value,
  989. list<LLVMType> saturate = []> :
  990. DefaultAttrsIntrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
  991. multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
  992. // Most of these shifts come in 32- and 64-bit versions. But only
  993. // the 64-bit ones have the extra saturation argument (if any).
  994. def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
  995. def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
  996. }
  997. defm int_arm_mve_urshr: ARM_MVE_qrshift;
  998. defm int_arm_mve_uqshl: ARM_MVE_qrshift;
  999. defm int_arm_mve_srshr: ARM_MVE_qrshift;
  1000. defm int_arm_mve_sqshl: ARM_MVE_qrshift;
  1001. defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
  1002. defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
  1003. // LSLL and ASRL only have 64-bit versions, not 32.
  1004. def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  1005. def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  1006. def int_arm_mve_vabd: DefaultAttrsIntrinsic<
  1007. [llvm_anyvector_ty],
  1008. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1009. [IntrNoMem]>;
  1010. def int_arm_mve_vadc: DefaultAttrsIntrinsic<
  1011. [llvm_anyvector_ty, llvm_i32_ty],
  1012. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  1013. def int_arm_mve_vsbc: DefaultAttrsIntrinsic<
  1014. [llvm_anyvector_ty, llvm_i32_ty],
  1015. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  1016. def int_arm_mve_vadc_predicated: DefaultAttrsIntrinsic<
  1017. [llvm_anyvector_ty, llvm_i32_ty],
  1018. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  1019. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  1020. def int_arm_mve_vsbc_predicated: DefaultAttrsIntrinsic<
  1021. [llvm_anyvector_ty, llvm_i32_ty],
  1022. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  1023. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  1024. def int_arm_mve_vshlc: DefaultAttrsIntrinsic<
  1025. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  1026. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  1027. llvm_i32_ty /* shift count */], [IntrNoMem]>;
  1028. def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic<
  1029. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  1030. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  1031. llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
  1032. def int_arm_mve_vmulh: DefaultAttrsIntrinsic<
  1033. [llvm_anyvector_ty],
  1034. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1035. [IntrNoMem]>;
  1036. def int_arm_mve_vqdmulh: DefaultAttrsIntrinsic<
  1037. [llvm_anyvector_ty],
  1038. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  1039. def int_arm_mve_vhadd: DefaultAttrsIntrinsic<
  1040. [llvm_anyvector_ty],
  1041. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1042. [IntrNoMem]>;
  1043. def int_arm_mve_vrhadd: DefaultAttrsIntrinsic<
  1044. [llvm_anyvector_ty],
  1045. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1046. [IntrNoMem]>;
  1047. def int_arm_mve_vhsub: DefaultAttrsIntrinsic<
  1048. [llvm_anyvector_ty],
  1049. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1050. [IntrNoMem]>;
  1051. def int_arm_mve_vrmulh: DefaultAttrsIntrinsic<
  1052. [llvm_anyvector_ty],
  1053. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  1054. [IntrNoMem]>;
  1055. def int_arm_mve_vqrdmulh: DefaultAttrsIntrinsic<
  1056. [llvm_anyvector_ty],
  1057. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  1058. def int_arm_mve_vmull: DefaultAttrsIntrinsic<
  1059. [llvm_anyvector_ty],
  1060. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  1061. llvm_i32_ty /* top */], [IntrNoMem]>;
  1062. def int_arm_mve_vmull_poly: DefaultAttrsIntrinsic<
  1063. [llvm_anyvector_ty],
  1064. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
  1065. // The first two parameters are compile-time constants:
  1066. // * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq)
  1067. // instruction. Note: the flag is inverted to match the corresponding
  1068. // bit in the instruction encoding
  1069. // * Rotation angle: 0 mean 90 deg, 1 means 180 deg
  1070. defm int_arm_mve_vcaddq : MVEMXPredicated<
  1071. [llvm_anyvector_ty],
  1072. [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  1073. LLVMMatchType<0>, llvm_anyvector_ty>;
  1074. // The first operand of the following two intrinsics is the rotation angle
  1075. // (must be a compile-time constant):
  1076. // 0 - 0 deg
  1077. // 1 - 90 deg
  1078. // 2 - 180 deg
  1079. // 3 - 270 deg
  1080. defm int_arm_mve_vcmulq : MVEMXPredicated<
  1081. [llvm_anyvector_ty],
  1082. [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  1083. LLVMMatchType<0>, llvm_anyvector_ty>;
  1084. defm int_arm_mve_vcmlaq : MVEPredicated<
  1085. [llvm_anyvector_ty],
  1086. [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  1087. llvm_anyvector_ty>;
  1088. def int_arm_mve_vld2q: DefaultAttrsIntrinsic<
  1089. [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty],
  1090. [IntrReadMem, IntrArgMemOnly]>;
  1091. def int_arm_mve_vld4q: DefaultAttrsIntrinsic<
  1092. [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  1093. [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
  1094. def int_arm_mve_vst2q: DefaultAttrsIntrinsic<
  1095. [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  1096. [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
  1097. def int_arm_mve_vst4q: DefaultAttrsIntrinsic<
  1098. [],
  1099. [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
  1100. LLVMMatchType<1>, llvm_i32_ty],
  1101. [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
  1102. // MVE vector absolute difference and accumulate across vector
  1103. // The first operand is an 'unsigned' flag. The remaining operands are:
  1104. // * accumulator
  1105. // * first vector operand
  1106. // * second vector operand
  1107. // * mask (only in predicated versions)
  1108. defm int_arm_mve_vabav: MVEPredicated<
  1109. [llvm_i32_ty],
  1110. [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
  1111. // The following 3 intrinsics are MVE vector reductions with two vector
  1112. // operands.
  1113. // The first 3 operands are boolean flags (must be compile-time constants):
  1114. // * unsigned - the instruction operates on vectors of unsigned values and
  1115. // unsigned scalars
  1116. // * subtract - the instruction performs subtraction after multiplication of
  1117. // lane pairs (e.g., vmlsdav vs vmladav)
  1118. // * exchange - the instruction exchanges successive even and odd lanes of
  1119. // the first operands before multiplication of lane pairs
  1120. // (e.g., vmladavx vs vmladav)
  1121. // The remaining operands are:
  1122. // * accumulator
  1123. // * first vector operand
  1124. // * second vector operand
  1125. // * mask (only in predicated versions)
  1126. // Version with 32-bit result, vml{a,s}dav[a][x]
  1127. defm int_arm_mve_vmldava: MVEPredicated<
  1128. [llvm_i32_ty],
  1129. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1130. llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1131. llvm_anyvector_ty>;
  1132. // Version with 64-bit result, vml{a,s}ldav[a][x]
  1133. defm int_arm_mve_vmlldava: MVEPredicated<
  1134. [llvm_i32_ty, llvm_i32_ty],
  1135. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1136. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1137. llvm_anyvector_ty>;
  1138. // Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
  1139. defm int_arm_mve_vrmlldavha: MVEPredicated<
  1140. [llvm_i32_ty, llvm_i32_ty],
  1141. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1142. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1143. llvm_anyvector_ty>;
  1144. defm int_arm_mve_vidup: MVEMXPredicated<
  1145. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1146. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1147. LLVMMatchType<0>, llvm_anyvector_ty>;
  1148. defm int_arm_mve_vddup: MVEMXPredicated<
  1149. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1150. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1151. LLVMMatchType<0>, llvm_anyvector_ty>;
  1152. defm int_arm_mve_viwdup: MVEMXPredicated<
  1153. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1154. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1155. LLVMMatchType<0>, llvm_anyvector_ty>;
  1156. defm int_arm_mve_vdwdup: MVEMXPredicated<
  1157. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1158. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1159. LLVMMatchType<0>, llvm_anyvector_ty>;
  1160. // Flags:
  1161. // * unsigned
  1162. defm int_arm_mve_vcvt_fix: MVEMXPredicated<
  1163. [llvm_anyvector_ty /* output */], [llvm_i32_ty],
  1164. [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
  1165. LLVMMatchType<0>, llvm_anyvector_ty>;
  1166. def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic<
  1167. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
  1168. llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
  1169. [IntrNoMem]>;
  1170. foreach suffix = ["a","n","p","m"] in {
  1171. defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated<
  1172. [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */],
  1173. [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
  1174. }
  1175. def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
  1176. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1177. def int_arm_mve_vcls: DefaultAttrsIntrinsic<
  1178. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1179. defm int_arm_mve_vbrsr: MVEMXPredicated<
  1180. [llvm_anyvector_ty], [],
  1181. [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>;
  1182. def int_arm_mve_vqdmull: DefaultAttrsIntrinsic<
  1183. [llvm_anyvector_ty],
  1184. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  1185. [IntrNoMem]>;
  1186. def int_arm_mve_vqdmull_predicated: DefaultAttrsIntrinsic<
  1187. [llvm_anyvector_ty],
  1188. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  1189. LLVMMatchType<0>],
  1190. [IntrNoMem]>;
  1191. class MVESimpleUnaryPredicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1192. [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1193. def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
  1194. def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
  1195. def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
  1196. def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
  1197. def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
  1198. def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
  1199. def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
  1200. def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
  1201. def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
  1202. def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
  1203. def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
  1204. def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
  1205. def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
  1206. def int_arm_mve_vrev_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1207. [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
  1208. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1209. def int_arm_mve_vmovl_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1210. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
  1211. llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
  1212. def int_arm_mve_vmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1213. [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
  1214. llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
  1215. def int_arm_mve_vqmovn: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1216. [LLVMMatchType<0>, llvm_anyvector_ty,
  1217. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1218. llvm_i32_ty /* top half */], [IntrNoMem]>;
  1219. def int_arm_mve_vqmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1220. [LLVMMatchType<0>, llvm_anyvector_ty,
  1221. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1222. llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1223. def int_arm_mve_fma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1224. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1225. LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1226. def int_arm_mve_vmla_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1227. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1228. llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
  1229. [IntrNoMem]>;
  1230. def int_arm_mve_vmlas_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
  1231. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1232. llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
  1233. [IntrNoMem]>;
  1234. defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty],
  1235. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1236. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1237. defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty],
  1238. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1239. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1240. defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty],
  1241. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1242. llvm_i32_ty /* addend (scalar) */]>;
  1243. defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty],
  1244. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1245. llvm_i32_ty /* addend (scalar) */]>;
  1246. defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty],
  1247. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  1248. llvm_i32_ty /* exchange */, llvm_i32_ty /* round */,
  1249. llvm_i32_ty /* subtract */]>;
  1250. // CDE (Custom Datapath Extension)
  1251. multiclass CDEGPRIntrinsics<list<LLVMType> args> {
  1252. def "" : DefaultAttrsIntrinsic<
  1253. [llvm_i32_ty],
  1254. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1255. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1256. def a : DefaultAttrsIntrinsic<
  1257. [llvm_i32_ty],
  1258. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args,
  1259. [llvm_i32_ty /* imm */]),
  1260. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1261. def d: DefaultAttrsIntrinsic<
  1262. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1263. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1264. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1265. def da: DefaultAttrsIntrinsic<
  1266. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1267. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */,
  1268. llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]),
  1269. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>;
  1270. }
  1271. defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
  1272. defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
  1273. defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
  1274. multiclass CDEVCXIntrinsics<list<LLVMType> args> {
  1275. def "" : DefaultAttrsIntrinsic<
  1276. [llvm_anyfloat_ty],
  1277. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1278. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1279. def a : DefaultAttrsIntrinsic<
  1280. [llvm_anyfloat_ty],
  1281. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1282. args, [llvm_i32_ty /* imm */]),
  1283. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1284. }
  1285. defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
  1286. defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
  1287. defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
  1288. multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
  1289. def "" : DefaultAttrsIntrinsic<
  1290. [llvm_v16i8_ty],
  1291. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1292. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1293. def a : DefaultAttrsIntrinsic<
  1294. [llvm_v16i8_ty],
  1295. !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
  1296. args, [llvm_i32_ty /* imm */]),
  1297. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1298. def _predicated : DefaultAttrsIntrinsic<
  1299. [llvm_anyvector_ty],
  1300. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
  1301. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1302. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1303. def a_predicated : DefaultAttrsIntrinsic<
  1304. [llvm_anyvector_ty],
  1305. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1306. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1307. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1308. }
  1309. defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
  1310. defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>;
  1311. defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>;
  1312. } // end TargetPrefix