IntrinsicsARM.td 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390
  1. //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines all of the ARM-specific intrinsics.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // TLS
  14. let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
  15. // A space-consuming intrinsic primarily for testing ARMConstantIslands. The
  16. // first argument is the number of bytes this "instruction" takes up, the second
  17. // and return value are essentially chains, used to force ordering during ISel.
  18. def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
  19. // 16-bit multiplications
  20. def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
  21. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  22. def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">,
  23. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  24. def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">,
  25. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  26. def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">,
  27. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  28. def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">,
  29. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  30. def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">,
  31. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  32. //===----------------------------------------------------------------------===//
  33. // Saturating Arithmetic
  34. def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
  35. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  36. [Commutative, IntrNoMem]>;
  37. def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
  38. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  39. def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
  40. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  41. def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
  42. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  43. // Accumulating multiplications
  44. def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">,
  45. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  46. [IntrNoMem]>;
  47. def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">,
  48. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  49. [IntrNoMem]>;
  50. def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">,
  51. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  52. [IntrNoMem]>;
  53. def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">,
  54. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  55. [IntrNoMem]>;
  56. def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">,
  57. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  58. [IntrNoMem]>;
  59. def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">,
  60. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  61. [IntrNoMem]>;
  62. // Parallel 16-bit saturation
  63. def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">,
  64. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  65. def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">,
  66. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  67. // Packing and unpacking
  68. def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">,
  69. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  70. def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">,
  71. Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  72. def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">,
  73. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  74. def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">,
  75. Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  76. // Parallel selection, reads the GE flags.
  77. def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">,
  78. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
  79. // Parallel 8-bit addition and subtraction
  80. def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">,
  81. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  82. def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">,
  83. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  84. // Writes to the GE bits.
  85. def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">,
  86. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  87. def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">,
  88. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  89. def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">,
  90. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  91. // Writes to the GE bits.
  92. def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">,
  93. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  94. // Writes to the GE bits.
  95. def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">,
  96. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  97. def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">,
  98. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  99. def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">,
  100. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  101. def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">,
  102. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  103. def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">,
  104. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  105. // Writes to the GE bits.
  106. def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">,
  107. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  108. // Sum of 8-bit absolute differences
  109. def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">,
  110. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  111. def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">,
  112. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  113. [IntrNoMem]>;
  114. // Parallel 16-bit addition and subtraction
  115. def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">,
  116. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  117. def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">,
  118. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  119. def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">,
  120. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  121. def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">,
  122. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  123. // Writes to the GE bits.
  124. def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">,
  125. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  126. // Writes to the GE bits.
  127. def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">,
  128. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  129. def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">,
  130. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  131. def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">,
  132. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  133. def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">,
  134. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  135. def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">,
  136. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  137. // Writes to the GE bits.
  138. def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">,
  139. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  140. // Writes to the GE bits.
  141. def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">,
  142. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  143. // Writes to the GE bits.
  144. def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">,
  145. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  146. // Writes to the GE bits.
  147. def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">,
  148. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  149. def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">,
  150. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  151. def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">,
  152. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  153. def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">,
  154. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  155. def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">,
  156. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  157. def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">,
  158. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  159. def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">,
  160. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  161. def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">,
  162. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  163. def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">,
  164. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  165. // Writes to the GE bits.
  166. def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">,
  167. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  168. // Writes to the GE bits.
  169. def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">,
  170. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  171. // Parallel 16-bit multiplication
  172. def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">,
  173. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  174. [IntrNoMem]>;
  175. def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">,
  176. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  177. [IntrNoMem]>;
  178. def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">,
  179. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  180. [IntrNoMem]>;
  181. def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">,
  182. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  183. [IntrNoMem]>;
  184. def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">,
  185. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  186. [IntrNoMem]>;
  187. def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">,
  188. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  189. [IntrNoMem]>;
  190. def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">,
  191. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  192. [IntrNoMem]>;
  193. def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">,
  194. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  195. [IntrNoMem]>;
  196. def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">,
  197. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  198. def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">,
  199. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  200. def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">,
  201. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  202. def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">,
  203. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  204. //===----------------------------------------------------------------------===//
  205. // Load, Store and Clear exclusive
  206. def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  207. def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  208. def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  209. def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  210. def int_arm_clrex : Intrinsic<[]>;
  211. def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  212. llvm_ptr_ty]>;
  213. def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  214. def int_arm_stlexd : Intrinsic<[llvm_i32_ty],
  215. [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
  216. def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  217. //===----------------------------------------------------------------------===//
  218. // Data barrier instructions
  219. def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
  220. Intrinsic<[], [llvm_i32_ty]>;
  221. def int_arm_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
  222. Intrinsic<[], [llvm_i32_ty]>;
  223. def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
  224. Intrinsic<[], [llvm_i32_ty]>;
  225. //===----------------------------------------------------------------------===//
  226. // VFP
  227. def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
  228. Intrinsic<[llvm_i32_ty], [], []>;
  229. def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
  230. Intrinsic<[], [llvm_i32_ty], []>;
  231. def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
  232. [IntrNoMem]>;
  233. def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
  234. [IntrNoMem]>;
  235. //===----------------------------------------------------------------------===//
  236. // Coprocessor
  237. def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
  238. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  239. def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
  240. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  241. def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
  242. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  243. def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
  244. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  245. def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
  246. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  247. def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
  248. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  249. def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
  250. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  251. def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
  252. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  253. // Move to coprocessor
  254. def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
  255. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  256. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  257. def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
  258. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  259. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  260. // Move from coprocessor
  261. def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
  262. MSBuiltin<"_MoveFromCoprocessor">,
  263. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  264. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  265. def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
  266. MSBuiltin<"_MoveFromCoprocessor2">,
  267. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  268. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  269. // Coprocessor data processing
  270. def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
  271. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  272. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  273. def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
  274. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  275. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  276. // Move from two registers to coprocessor
  277. def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  278. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  279. def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  280. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  281. def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  282. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  283. def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  284. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  285. //===----------------------------------------------------------------------===//
  286. // CRC32
  287. def int_arm_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  288. [IntrNoMem]>;
  289. def int_arm_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  290. [IntrNoMem]>;
  291. def int_arm_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  292. [IntrNoMem]>;
  293. def int_arm_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  294. [IntrNoMem]>;
  295. def int_arm_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  296. [IntrNoMem]>;
  297. def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  298. [IntrNoMem]>;
  299. //===----------------------------------------------------------------------===//
  300. // CMSE
  301. def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
  302. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  303. def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
  304. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  305. def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
  306. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  307. def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
  308. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  309. //===----------------------------------------------------------------------===//
  310. // HINT
  311. def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
  312. def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>;
  313. //===----------------------------------------------------------------------===//
  314. // UND (reserved undefined sequence)
  315. def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>;
  316. //===----------------------------------------------------------------------===//
  317. // Advanced SIMD (NEON)
  318. // The following classes do not correspond directly to GCC builtins.
  319. class Neon_1Arg_Intrinsic
  320. : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  321. class Neon_1Arg_Narrow_Intrinsic
  322. : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
  323. class Neon_2Arg_Intrinsic
  324. : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  325. [IntrNoMem]>;
  326. class Neon_2Arg_Narrow_Intrinsic
  327. : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>],
  328. [IntrNoMem]>;
  329. class Neon_2Arg_Long_Intrinsic
  330. : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
  331. [IntrNoMem]>;
  332. class Neon_3Arg_Intrinsic
  333. : Intrinsic<[llvm_anyvector_ty],
  334. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  335. [IntrNoMem]>;
  336. class Neon_3Arg_Long_Intrinsic
  337. : Intrinsic<[llvm_anyvector_ty],
  338. [LLVMMatchType<0>, LLVMTruncatedType<0>, LLVMTruncatedType<0>],
  339. [IntrNoMem]>;
  340. class Neon_1FloatArg_Intrinsic
  341. : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  342. class Neon_CvtFxToFP_Intrinsic
  343. : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
  344. class Neon_CvtFPToFx_Intrinsic
  345. : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
  346. class Neon_CvtFPtoInt_1Arg_Intrinsic
  347. : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  348. class Neon_Compare_Intrinsic
  349. : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
  350. [IntrNoMem]>;
  351. // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
  352. // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
  353. // Overall, the classes range from 2 to 6 v8i8 arguments.
  354. class Neon_Tbl2Arg_Intrinsic
  355. : Intrinsic<[llvm_v8i8_ty],
  356. [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  357. class Neon_Tbl3Arg_Intrinsic
  358. : Intrinsic<[llvm_v8i8_ty],
  359. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  360. class Neon_Tbl4Arg_Intrinsic
  361. : Intrinsic<[llvm_v8i8_ty],
  362. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
  363. [IntrNoMem]>;
  364. class Neon_Tbl5Arg_Intrinsic
  365. : Intrinsic<[llvm_v8i8_ty],
  366. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  367. llvm_v8i8_ty], [IntrNoMem]>;
  368. class Neon_Tbl6Arg_Intrinsic
  369. : Intrinsic<[llvm_v8i8_ty],
  370. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  371. llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  372. // Arithmetic ops
  373. let IntrProperties = [IntrNoMem, Commutative] in {
  374. // Vector Add.
  375. def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
  376. def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
  377. def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
  378. def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
  379. def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
  380. // Vector Multiply.
  381. def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
  382. def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
  383. def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
  384. def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
  385. def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
  386. def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
  387. def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
  388. // Vector Maximum.
  389. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
  390. def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
  391. def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
  392. // Vector Minimum.
  393. def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
  394. def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
  395. def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
  396. // Vector Reciprocal Step.
  397. def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
  398. // Vector Reciprocal Square Root Step.
  399. def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
  400. }
  401. // Vector Subtract.
  402. def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
  403. def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
  404. def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
  405. // Vector Absolute Compare.
  406. def int_arm_neon_vacge : Neon_Compare_Intrinsic;
  407. def int_arm_neon_vacgt : Neon_Compare_Intrinsic;
  408. // Vector Absolute Differences.
  409. def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
  410. def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
  411. // Vector Pairwise Add.
  412. def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
  413. // Vector Pairwise Add Long.
  414. // Note: This is different than the other "long" NEON intrinsics because
  415. // the result vector has half as many elements as the source vector.
  416. // The source and destination vector types must be specified separately.
  417. def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
  418. [IntrNoMem]>;
  419. def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
  420. [IntrNoMem]>;
  421. // Vector Pairwise Add and Accumulate Long.
  422. // Note: This is similar to vpaddl but the destination vector also appears
  423. // as the first argument.
  424. def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
  425. [LLVMMatchType<0>, llvm_anyvector_ty],
  426. [IntrNoMem]>;
  427. def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
  428. [LLVMMatchType<0>, llvm_anyvector_ty],
  429. [IntrNoMem]>;
  430. // Vector Pairwise Maximum and Minimum.
  431. def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
  432. def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
  433. def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
  434. def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
  435. // Vector Shifts:
  436. //
  437. // The various saturating and rounding vector shift operations need to be
  438. // represented by intrinsics in LLVM, and even the basic VSHL variable shift
  439. // operation cannot be safely translated to LLVM's shift operators. VSHL can
  440. // be used for both left and right shifts, or even combinations of the two,
  441. // depending on the signs of the shift amounts. It also has well-defined
  442. // behavior for shift amounts that LLVM leaves undefined. Only basic shifts
  443. // by constants can be represented with LLVM's shift operators.
  444. //
  445. // The shift counts for these intrinsics are always vectors, even for constant
  446. // shifts, where the constant is replicated. For consistency with VSHL (and
  447. // other variable shift instructions), left shifts have positive shift counts
  448. // and right shifts have negative shift counts. This convention is also used
  449. // for constant right shift intrinsics, and to help preserve sanity, the
  450. // intrinsic names use "shift" instead of either "shl" or "shr". Where
  451. // applicable, signed and unsigned versions of the intrinsics are
  452. // distinguished with "s" and "u" suffixes. A few NEON shift instructions,
  453. // such as VQSHLU, take signed operands but produce unsigned results; these
  454. // use a "su" suffix.
  455. // Vector Shift.
  456. def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
  457. def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
  458. // Vector Rounding Shift.
  459. def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
  460. def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
  461. def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;
  462. // Vector Saturating Shift.
  463. def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
  464. def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
  465. def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
  466. def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
  467. def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
  468. def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  469. // Vector Saturating Rounding Shift.
  470. def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
  471. def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
  472. def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
  473. def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
  474. def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  475. // Vector Shift and Insert.
  476. def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
  477. // Vector Absolute Value and Saturating Absolute Value.
  478. def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
  479. def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
  480. // Vector Saturating Negate.
  481. def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;
  482. // Vector Count Leading Sign/Zero Bits.
  483. def int_arm_neon_vcls : Neon_1Arg_Intrinsic;
  484. // Vector Reciprocal Estimate.
  485. def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
  486. // Vector Reciprocal Square Root Estimate.
  487. def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
  488. // Vector Conversions Between Floating-point and Integer
  489. def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
  490. def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
  491. def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  492. def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
  493. def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  494. def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
  495. def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  496. def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
  497. // Vector Conversions Between Floating-point and Fixed-point.
  498. def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
  499. def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
  500. def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
  501. def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
  502. // Vector Conversions Between Half-Precision and Single-Precision.
  503. def int_arm_neon_vcvtfp2hf
  504. : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  505. def int_arm_neon_vcvthf2fp
  506. : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
  507. // Narrowing Saturating Vector Moves.
  508. def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
  509. def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
  510. def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
  511. // Vector Table Lookup.
  512. // The first 1-4 arguments are the table.
  513. def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
  514. def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
  515. def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
  516. def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
  517. // Vector Table Extension.
  518. // Some elements of the destination vector may not be updated, so the original
  519. // value of that vector is passed as the first argument. The next 1-4
  520. // arguments after that are the table.
  521. def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
  522. def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
  523. def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
  524. def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
  525. // Vector and Scalar Rounding.
  526. def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
  527. def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
  528. def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
  529. def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
  530. def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
  531. def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
  532. // De-interleaving vector loads from N-element structures.
  533. // Source operands are the address and alignment.
  534. def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
  535. [llvm_anyptr_ty, llvm_i32_ty],
  536. [IntrReadMem, IntrArgMemOnly]>;
  537. def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  538. [llvm_anyptr_ty, llvm_i32_ty],
  539. [IntrReadMem, IntrArgMemOnly]>;
  540. def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  541. LLVMMatchType<0>],
  542. [llvm_anyptr_ty, llvm_i32_ty],
  543. [IntrReadMem, IntrArgMemOnly]>;
  544. def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  545. LLVMMatchType<0>, LLVMMatchType<0>],
  546. [llvm_anyptr_ty, llvm_i32_ty],
  547. [IntrReadMem, IntrArgMemOnly]>;
  548. def int_arm_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  549. [LLVMAnyPointerType<LLVMMatchType<0>>],
  550. [IntrReadMem, IntrArgMemOnly]>;
  551. def int_arm_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  552. LLVMMatchType<0>],
  553. [LLVMAnyPointerType<LLVMMatchType<0>>],
  554. [IntrReadMem, IntrArgMemOnly]>;
  555. def int_arm_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  556. LLVMMatchType<0>, LLVMMatchType<0>],
  557. [LLVMAnyPointerType<LLVMMatchType<0>>],
  558. [IntrReadMem, IntrArgMemOnly]>;
  559. // Vector load N-element structure to one lane.
  560. // Source operands are: the address, the N input vectors (since only one
  561. // lane is assigned), the lane number, and the alignment.
  562. def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  563. [llvm_anyptr_ty, LLVMMatchType<0>,
  564. LLVMMatchType<0>, llvm_i32_ty,
  565. llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  566. def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  567. LLVMMatchType<0>],
  568. [llvm_anyptr_ty, LLVMMatchType<0>,
  569. LLVMMatchType<0>, LLVMMatchType<0>,
  570. llvm_i32_ty, llvm_i32_ty],
  571. [IntrReadMem, IntrArgMemOnly]>;
  572. def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  573. LLVMMatchType<0>, LLVMMatchType<0>],
  574. [llvm_anyptr_ty, LLVMMatchType<0>,
  575. LLVMMatchType<0>, LLVMMatchType<0>,
  576. LLVMMatchType<0>, llvm_i32_ty,
  577. llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  578. // Vector load N-element structure to all lanes.
  579. // Source operands are the address and alignment.
  580. def int_arm_neon_vld2dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  581. [llvm_anyptr_ty, llvm_i32_ty],
  582. [IntrReadMem, IntrArgMemOnly]>;
  583. def int_arm_neon_vld3dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  584. LLVMMatchType<0>],
  585. [llvm_anyptr_ty, llvm_i32_ty],
  586. [IntrReadMem, IntrArgMemOnly]>;
  587. def int_arm_neon_vld4dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  588. LLVMMatchType<0>, LLVMMatchType<0>],
  589. [llvm_anyptr_ty, llvm_i32_ty],
  590. [IntrReadMem, IntrArgMemOnly]>;
  591. // Interleaving vector stores from N-element structures.
  592. // Source operands are: the address, the N vectors, and the alignment.
  593. def int_arm_neon_vst1 : Intrinsic<[],
  594. [llvm_anyptr_ty, llvm_anyvector_ty,
  595. llvm_i32_ty], [IntrArgMemOnly]>;
  596. def int_arm_neon_vst2 : Intrinsic<[],
  597. [llvm_anyptr_ty, llvm_anyvector_ty,
  598. LLVMMatchType<1>, llvm_i32_ty],
  599. [IntrArgMemOnly]>;
  600. def int_arm_neon_vst3 : Intrinsic<[],
  601. [llvm_anyptr_ty, llvm_anyvector_ty,
  602. LLVMMatchType<1>, LLVMMatchType<1>,
  603. llvm_i32_ty], [IntrArgMemOnly]>;
  604. def int_arm_neon_vst4 : Intrinsic<[],
  605. [llvm_anyptr_ty, llvm_anyvector_ty,
  606. LLVMMatchType<1>, LLVMMatchType<1>,
  607. LLVMMatchType<1>, llvm_i32_ty],
  608. [IntrArgMemOnly]>;
  609. def int_arm_neon_vst1x2 : Intrinsic<[],
  610. [llvm_anyptr_ty, llvm_anyvector_ty,
  611. LLVMMatchType<1>],
  612. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  613. def int_arm_neon_vst1x3 : Intrinsic<[],
  614. [llvm_anyptr_ty, llvm_anyvector_ty,
  615. LLVMMatchType<1>, LLVMMatchType<1>],
  616. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  617. def int_arm_neon_vst1x4 : Intrinsic<[],
  618. [llvm_anyptr_ty, llvm_anyvector_ty,
  619. LLVMMatchType<1>, LLVMMatchType<1>,
  620. LLVMMatchType<1>],
  621. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  622. // Vector store N-element structure from one lane.
  623. // Source operands are: the address, the N vectors, the lane number, and
  624. // the alignment.
  625. def int_arm_neon_vst2lane : Intrinsic<[],
  626. [llvm_anyptr_ty, llvm_anyvector_ty,
  627. LLVMMatchType<1>, llvm_i32_ty,
  628. llvm_i32_ty], [IntrArgMemOnly]>;
  629. def int_arm_neon_vst3lane : Intrinsic<[],
  630. [llvm_anyptr_ty, llvm_anyvector_ty,
  631. LLVMMatchType<1>, LLVMMatchType<1>,
  632. llvm_i32_ty, llvm_i32_ty],
  633. [IntrArgMemOnly]>;
  634. def int_arm_neon_vst4lane : Intrinsic<[],
  635. [llvm_anyptr_ty, llvm_anyvector_ty,
  636. LLVMMatchType<1>, LLVMMatchType<1>,
  637. LLVMMatchType<1>, llvm_i32_ty,
  638. llvm_i32_ty], [IntrArgMemOnly]>;
  639. // Vector bitwise select.
  640. def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
  641. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  642. [IntrNoMem]>;
  643. // Crypto instructions
  644. class AES_1Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
  645. [llvm_v16i8_ty], [IntrNoMem]>;
  646. class AES_2Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
  647. [llvm_v16i8_ty, llvm_v16i8_ty],
  648. [IntrNoMem]>;
  649. class SHA_1Arg_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  650. [IntrNoMem]>;
  651. class SHA_2Arg_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  652. [llvm_v4i32_ty, llvm_v4i32_ty],
  653. [IntrNoMem]>;
  654. class SHA_3Arg_i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  655. [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
  656. [IntrNoMem]>;
  657. class SHA_3Arg_v4i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  658. [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty],
  659. [IntrNoMem]>;
  660. def int_arm_neon_aesd : AES_2Arg_Intrinsic;
  661. def int_arm_neon_aese : AES_2Arg_Intrinsic;
  662. def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
  663. def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
  664. def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
  665. def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
  666. def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
  667. def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
  668. def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
  669. def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
  670. def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
  671. def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
  672. def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
  673. def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
  674. def int_arm_neon_vqrdmlah : Neon_3Arg_Intrinsic;
  675. def int_arm_neon_vqrdmlsh : Neon_3Arg_Intrinsic;
  676. // Armv8.2-A dot product instructions
  677. class Neon_Dot_Intrinsic
  678. : Intrinsic<[llvm_anyvector_ty],
  679. [LLVMMatchType<0>, llvm_anyvector_ty,
  680. LLVMMatchType<1>],
  681. [IntrNoMem]>;
  682. def int_arm_neon_udot : Neon_Dot_Intrinsic;
  683. def int_arm_neon_sdot : Neon_Dot_Intrinsic;
  684. // v8.6-A Matrix Multiply Intrinsics
  685. class Neon_MatMul_Intrinsic
  686. : Intrinsic<[llvm_anyvector_ty],
  687. [LLVMMatchType<0>, llvm_anyvector_ty,
  688. LLVMMatchType<1>],
  689. [IntrNoMem]>;
  690. def int_arm_neon_ummla : Neon_MatMul_Intrinsic;
  691. def int_arm_neon_smmla : Neon_MatMul_Intrinsic;
  692. def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
  693. def int_arm_neon_usdot : Neon_Dot_Intrinsic;
  694. // v8.6-A Bfloat Intrinsics
  695. def int_arm_neon_vcvtfp2bf
  696. : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  697. def int_arm_neon_vcvtbfp2bf
  698. : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
  699. def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
  700. def int_arm_neon_bfmmla
  701. : Intrinsic<[llvm_v4f32_ty],
  702. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  703. [IntrNoMem]>;
  704. class Neon_BF16FML_Intrinsic
  705. : Intrinsic<[llvm_v4f32_ty],
  706. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  707. [IntrNoMem]>;
  708. def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic;
  709. def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic;
  710. def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  711. def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
  712. def int_arm_mve_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  713. def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  714. def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  715. def int_arm_mve_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  716. // v8.3-A Floating-point complex add
  717. def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
  718. def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
  719. // GNU eabi mcount
  720. def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>;
  721. def int_arm_mve_pred_i2v : Intrinsic<
  722. [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
  723. def int_arm_mve_pred_v2i : Intrinsic<
  724. [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  725. def int_arm_mve_vreinterpretq : Intrinsic<
  726. [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  727. def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
  728. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  729. llvm_anyvector_ty, LLVMMatchType<0>],
  730. [IntrNoMem]>;
  731. def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
  732. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  733. llvm_anyvector_ty, LLVMMatchType<0>],
  734. [IntrNoMem]>;
  735. def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
  736. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  737. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  738. def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty],
  739. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  740. [IntrNoMem]>;
  741. def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty],
  742. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  743. [IntrNoMem]>;
  744. def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty],
  745. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  746. [IntrNoMem]>;
  747. def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty],
  748. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  749. [IntrNoMem]>;
  750. def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty],
  751. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  752. [IntrNoMem]>;
  753. def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty],
  754. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  755. [IntrNoMem]>;
  756. def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty],
  757. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  758. [IntrNoMem]>;
  759. def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty],
  760. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  761. [IntrNoMem]>;
  762. def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty],
  763. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  764. llvm_anyvector_ty, LLVMMatchType<0>],
  765. [IntrNoMem]>;
  766. def int_arm_mve_qdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  767. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  768. [IntrNoMem]>;
  769. def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  770. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  771. llvm_anyvector_ty, LLVMMatchType<0>],
  772. [IntrNoMem]>;
  773. def int_arm_mve_qrdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  774. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  775. [IntrNoMem]>;
  776. def int_arm_mve_mull_int_predicated: Intrinsic<[llvm_anyvector_ty],
  777. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  778. llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
  779. [IntrNoMem]>;
  780. def int_arm_mve_mull_poly_predicated: Intrinsic<[llvm_anyvector_ty],
  781. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  782. LLVMMatchType<0>],
  783. [IntrNoMem]>;
  784. def int_arm_mve_qadd_predicated: Intrinsic<[llvm_anyvector_ty],
  785. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  786. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  787. def int_arm_mve_hadd_predicated: Intrinsic<[llvm_anyvector_ty],
  788. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  789. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  790. def int_arm_mve_rhadd_predicated: Intrinsic<[llvm_anyvector_ty],
  791. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  792. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  793. def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty],
  794. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  795. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  796. def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty],
  797. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  798. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  799. def int_arm_mve_vmina_predicated: Intrinsic<[llvm_anyvector_ty],
  800. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  801. [IntrNoMem]>;
  802. def int_arm_mve_vmaxa_predicated: Intrinsic<[llvm_anyvector_ty],
  803. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  804. [IntrNoMem]>;
  805. def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty],
  806. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  807. [IntrNoMem]>;
  808. def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty],
  809. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  810. [IntrNoMem]>;
  811. multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
  812. LLVMType pred = llvm_anyvector_ty,
  813. list<IntrinsicProperty> props = [IntrNoMem],
  814. list<SDNodeProperty> sdprops = []> {
  815. def "": Intrinsic<rets, params, props, "", sdprops>;
  816. def _predicated: Intrinsic<rets, params # [pred], props, "", sdprops>;
  817. }
  818. multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
  819. LLVMType pred = llvm_anyvector_ty,
  820. list<IntrinsicProperty> props = [IntrNoMem]> {
  821. def "": Intrinsic<rets, params, props>;
  822. def _predicated: Intrinsic<rets, params # [pred,
  823. !if(!eq(rets[0], llvm_anyvector_ty),
  824. LLVMMatchType<0>, rets[0])], props>;
  825. }
  826. multiclass MVE_minmaxv {
  827. defm v: MVEPredicated<[llvm_i32_ty],
  828. [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  829. defm av: MVEPredicated<[llvm_i32_ty],
  830. [llvm_i32_ty, llvm_anyvector_ty]>;
  831. defm nmv: MVEPredicated<[llvm_anyfloat_ty],
  832. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  833. defm nmav: MVEPredicated<[llvm_anyfloat_ty],
  834. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  835. }
  836. defm int_arm_mve_min: MVE_minmaxv;
  837. defm int_arm_mve_max: MVE_minmaxv;
  838. defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty],
  839. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  840. defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty],
  841. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  842. // Intrinsic with a predicated and a non-predicated case. The predicated case
  843. // has two additional parameters: inactive (the value for inactive lanes, can
  844. // be undef) and predicate.
  845. multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
  846. list<LLVMType> params, LLVMType inactive,
  847. LLVMType predicate,
  848. list<IntrinsicProperty> props = [IntrNoMem]> {
  849. def "": Intrinsic<rets, flags # params, props>;
  850. def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
  851. props>;
  852. }
  853. defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
  854. [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
  855. defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
  856. [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
  857. defm int_arm_mve_vldr_gather_base: MVEPredicated<
  858. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
  859. llvm_anyvector_ty, [IntrReadMem], [SDNPMemOperand]>;
  860. defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
  861. [llvm_anyvector_ty, llvm_anyvector_ty],
  862. [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
  863. [SDNPMemOperand]>;
  864. defm int_arm_mve_vstr_scatter_base: MVEPredicated<
  865. [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
  866. llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
  867. defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
  868. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
  869. llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
  870. // gather_offset takes three i32 parameters. The first is the size of
  871. // memory element loaded, in bits. The second is a left bit shift to
  872. // apply to each offset in the vector parameter (must be either 0, or
  873. // correspond to the element size of the destination vector type). The
  874. // last is 1 to indicate zero extension (if the load is widening), or
  875. // 0 for sign extension.
  876. //
  877. // scatter_offset has the first two of those parameters, but since it
  878. // narrows rather than widening, it doesn't have the last one.
  879. defm int_arm_mve_vldr_gather_offset: MVEPredicated<
  880. [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
  881. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
  882. [SDNPMemOperand]>;
  883. defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
  884. [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
  885. llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem],
  886. [SDNPMemOperand]>;
  887. def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
  888. [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  889. [IntrNoMem]>;
  890. def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
  891. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
  892. llvm_anyvector_ty, LLVMMatchType<0>],
  893. [IntrNoMem]>;
  894. defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
  895. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  896. defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
  897. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  898. defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
  899. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
  900. defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
  901. [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
  902. llvm_i32_ty /*top-half*/]>;
  903. defm int_arm_mve_vsli: MVEPredicated<
  904. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  905. defm int_arm_mve_vsri: MVEPredicated<
  906. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  907. defm int_arm_mve_vshrn: MVEPredicated<
  908. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
  909. llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
  910. llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
  911. llvm_i32_ty /*top-half*/]>;
  912. defm int_arm_mve_vshl_scalar: MVEPredicated<
  913. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
  914. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  915. defm int_arm_mve_vshl_vector: MVEPredicatedM<
  916. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
  917. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  918. // MVE scalar shifts.
  919. class ARM_MVE_qrshift_single<list<LLVMType> value,
  920. list<LLVMType> saturate = []> :
  921. Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
  922. multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
  923. // Most of these shifts come in 32- and 64-bit versions. But only
  924. // the 64-bit ones have the extra saturation argument (if any).
  925. def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
  926. def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
  927. }
  928. defm int_arm_mve_urshr: ARM_MVE_qrshift;
  929. defm int_arm_mve_uqshl: ARM_MVE_qrshift;
  930. defm int_arm_mve_srshr: ARM_MVE_qrshift;
  931. defm int_arm_mve_sqshl: ARM_MVE_qrshift;
  932. defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
  933. defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
  934. // LSLL and ASRL only have 64-bit versions, not 32.
  935. def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  936. def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  937. def int_arm_mve_vabd: Intrinsic<
  938. [llvm_anyvector_ty],
  939. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  940. [IntrNoMem]>;
  941. def int_arm_mve_vadc: Intrinsic<
  942. [llvm_anyvector_ty, llvm_i32_ty],
  943. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  944. def int_arm_mve_vsbc: Intrinsic<
  945. [llvm_anyvector_ty, llvm_i32_ty],
  946. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  947. def int_arm_mve_vadc_predicated: Intrinsic<
  948. [llvm_anyvector_ty, llvm_i32_ty],
  949. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  950. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  951. def int_arm_mve_vsbc_predicated: Intrinsic<
  952. [llvm_anyvector_ty, llvm_i32_ty],
  953. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  954. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  955. def int_arm_mve_vshlc: Intrinsic<
  956. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  957. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  958. llvm_i32_ty /* shift count */], [IntrNoMem]>;
  959. def int_arm_mve_vshlc_predicated: Intrinsic<
  960. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  961. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  962. llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
  963. def int_arm_mve_vmulh: Intrinsic<
  964. [llvm_anyvector_ty],
  965. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  966. [IntrNoMem]>;
  967. def int_arm_mve_vqdmulh: Intrinsic<
  968. [llvm_anyvector_ty],
  969. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  970. def int_arm_mve_vhadd: Intrinsic<
  971. [llvm_anyvector_ty],
  972. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  973. [IntrNoMem]>;
  974. def int_arm_mve_vrhadd: Intrinsic<
  975. [llvm_anyvector_ty],
  976. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  977. [IntrNoMem]>;
  978. def int_arm_mve_vhsub: Intrinsic<
  979. [llvm_anyvector_ty],
  980. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  981. [IntrNoMem]>;
  982. def int_arm_mve_vrmulh: Intrinsic<
  983. [llvm_anyvector_ty],
  984. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  985. [IntrNoMem]>;
  986. def int_arm_mve_vqrdmulh: Intrinsic<
  987. [llvm_anyvector_ty],
  988. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  989. def int_arm_mve_vmull: Intrinsic<
  990. [llvm_anyvector_ty],
  991. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  992. llvm_i32_ty /* top */], [IntrNoMem]>;
  993. def int_arm_mve_vmull_poly: Intrinsic<
  994. [llvm_anyvector_ty],
  995. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
  996. // The first two parameters are compile-time constants:
  997. // * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq)
  998. // instruction. Note: the flag is inverted to match the corresponding
  999. // bit in the instruction encoding
  1000. // * Rotation angle: 0 mean 90 deg, 1 means 180 deg
  1001. defm int_arm_mve_vcaddq : MVEMXPredicated<
  1002. [llvm_anyvector_ty],
  1003. [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  1004. LLVMMatchType<0>, llvm_anyvector_ty>;
  1005. // The first operand of the following two intrinsics is the rotation angle
  1006. // (must be a compile-time constant):
  1007. // 0 - 0 deg
  1008. // 1 - 90 deg
  1009. // 2 - 180 deg
  1010. // 3 - 270 deg
  1011. defm int_arm_mve_vcmulq : MVEMXPredicated<
  1012. [llvm_anyvector_ty],
  1013. [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  1014. LLVMMatchType<0>, llvm_anyvector_ty>;
  1015. defm int_arm_mve_vcmlaq : MVEPredicated<
  1016. [llvm_anyvector_ty],
  1017. [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  1018. llvm_anyvector_ty>;
  1019. def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty],
  1020. [IntrReadMem, IntrArgMemOnly]>;
  1021. def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty],
  1022. [IntrReadMem, IntrArgMemOnly]>;
  1023. def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  1024. [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
  1025. def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty],
  1026. [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
  1027. // MVE vector absolute difference and accumulate across vector
  1028. // The first operand is an 'unsigned' flag. The remaining operands are:
  1029. // * accumulator
  1030. // * first vector operand
  1031. // * second vector operand
  1032. // * mask (only in predicated versions)
  1033. defm int_arm_mve_vabav: MVEPredicated<
  1034. [llvm_i32_ty],
  1035. [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
  1036. // The following 3 instrinsics are MVE vector reductions with two vector
  1037. // operands.
  1038. // The first 3 operands are boolean flags (must be compile-time constants):
  1039. // * unsigned - the instruction operates on vectors of unsigned values and
  1040. // unsigned scalars
  1041. // * subtract - the instruction performs subtraction after multiplication of
  1042. // lane pairs (e.g., vmlsdav vs vmladav)
  1043. // * exchange - the instruction exchanges successive even and odd lanes of
  1044. // the first operands before multiplication of lane pairs
  1045. // (e.g., vmladavx vs vmladav)
  1046. // The remaining operands are:
  1047. // * accumulator
  1048. // * first vector operand
  1049. // * second vector operand
  1050. // * mask (only in predicated versions)
  1051. // Version with 32-bit result, vml{a,s}dav[a][x]
  1052. defm int_arm_mve_vmldava: MVEPredicated<
  1053. [llvm_i32_ty],
  1054. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1055. llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1056. llvm_anyvector_ty>;
  1057. // Version with 64-bit result, vml{a,s}ldav[a][x]
  1058. defm int_arm_mve_vmlldava: MVEPredicated<
  1059. [llvm_i32_ty, llvm_i32_ty],
  1060. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1061. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1062. llvm_anyvector_ty>;
  1063. // Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
  1064. defm int_arm_mve_vrmlldavha: MVEPredicated<
  1065. [llvm_i32_ty, llvm_i32_ty],
  1066. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1067. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1068. llvm_anyvector_ty>;
  1069. defm int_arm_mve_vidup: MVEMXPredicated<
  1070. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1071. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1072. LLVMMatchType<0>, llvm_anyvector_ty>;
  1073. defm int_arm_mve_vddup: MVEMXPredicated<
  1074. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1075. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1076. LLVMMatchType<0>, llvm_anyvector_ty>;
  1077. defm int_arm_mve_viwdup: MVEMXPredicated<
  1078. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1079. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1080. LLVMMatchType<0>, llvm_anyvector_ty>;
  1081. defm int_arm_mve_vdwdup: MVEMXPredicated<
  1082. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1083. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1084. LLVMMatchType<0>, llvm_anyvector_ty>;
  1085. // Flags:
  1086. // * unsigned
  1087. defm int_arm_mve_vcvt_fix: MVEMXPredicated<
  1088. [llvm_anyvector_ty /* output */], [llvm_i32_ty],
  1089. [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
  1090. LLVMMatchType<0>, llvm_anyvector_ty>;
  1091. def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
  1092. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
  1093. llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
  1094. [IntrNoMem]>;
  1095. foreach suffix = ["a","n","p","m"] in {
  1096. defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated<
  1097. [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */],
  1098. [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
  1099. }
  1100. def int_arm_mve_vrintn: Intrinsic<
  1101. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1102. def int_arm_mve_vcls: Intrinsic<
  1103. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1104. defm int_arm_mve_vbrsr: MVEMXPredicated<
  1105. [llvm_anyvector_ty], [],
  1106. [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>;
  1107. def int_arm_mve_vqdmull: Intrinsic<
  1108. [llvm_anyvector_ty],
  1109. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  1110. [IntrNoMem]>;
  1111. def int_arm_mve_vqdmull_predicated: Intrinsic<
  1112. [llvm_anyvector_ty],
  1113. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  1114. LLVMMatchType<0>],
  1115. [IntrNoMem]>;
  1116. class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
  1117. [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1118. def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
  1119. def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
  1120. def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
  1121. def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
  1122. def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
  1123. def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
  1124. def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
  1125. def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
  1126. def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
  1127. def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
  1128. def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
  1129. def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
  1130. def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
  1131. def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
  1132. [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
  1133. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1134. def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
  1135. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
  1136. llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
  1137. def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
  1138. [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
  1139. llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
  1140. def int_arm_mve_vqmovn: Intrinsic<[llvm_anyvector_ty],
  1141. [LLVMMatchType<0>, llvm_anyvector_ty,
  1142. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1143. llvm_i32_ty /* top half */], [IntrNoMem]>;
  1144. def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty],
  1145. [LLVMMatchType<0>, llvm_anyvector_ty,
  1146. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1147. llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1148. def int_arm_mve_fma_predicated: Intrinsic<[llvm_anyvector_ty],
  1149. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1150. LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1151. def int_arm_mve_vmla_n_predicated: Intrinsic<[llvm_anyvector_ty],
  1152. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1153. llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
  1154. [IntrNoMem]>;
  1155. def int_arm_mve_vmlas_n_predicated: Intrinsic<[llvm_anyvector_ty],
  1156. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1157. llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
  1158. [IntrNoMem]>;
  1159. defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty],
  1160. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1161. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1162. defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty],
  1163. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1164. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1165. defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty],
  1166. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1167. llvm_i32_ty /* addend (scalar) */]>;
  1168. defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty],
  1169. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1170. llvm_i32_ty /* addend (scalar) */]>;
  1171. defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty],
  1172. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  1173. llvm_i32_ty /* exchange */, llvm_i32_ty /* round */,
  1174. llvm_i32_ty /* subtract */]>;
  1175. // CDE (Custom Datapath Extension)
  1176. multiclass CDEGPRIntrinsics<list<LLVMType> args> {
  1177. def "" : Intrinsic<
  1178. [llvm_i32_ty],
  1179. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1180. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1181. def a : Intrinsic<
  1182. [llvm_i32_ty],
  1183. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args,
  1184. [llvm_i32_ty /* imm */]),
  1185. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1186. def d: Intrinsic<
  1187. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1188. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1189. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1190. def da: Intrinsic<
  1191. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1192. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */,
  1193. llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]),
  1194. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>;
  1195. }
  1196. defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
  1197. defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
  1198. defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
  1199. multiclass CDEVCXIntrinsics<list<LLVMType> args> {
  1200. def "" : Intrinsic<
  1201. [llvm_anyfloat_ty],
  1202. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1203. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1204. def a : Intrinsic<
  1205. [llvm_anyfloat_ty],
  1206. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1207. args, [llvm_i32_ty /* imm */]),
  1208. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1209. }
  1210. defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
  1211. defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
  1212. defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
  1213. multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
  1214. def "" : Intrinsic<
  1215. [llvm_v16i8_ty],
  1216. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1217. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1218. def a : Intrinsic<
  1219. [llvm_v16i8_ty],
  1220. !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
  1221. args, [llvm_i32_ty /* imm */]),
  1222. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1223. def _predicated : Intrinsic<
  1224. [llvm_anyvector_ty],
  1225. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
  1226. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1227. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1228. def a_predicated : Intrinsic<
  1229. [llvm_anyvector_ty],
  1230. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1231. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1232. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1233. }
  1234. defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
  1235. defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>;
  1236. defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>;
  1237. } // end TargetPrefix