IntrinsicsARM.td 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380
  1. //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines all of the ARM-specific intrinsics.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //===----------------------------------------------------------------------===//
  13. // TLS
  14. let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
  15. // A space-consuming intrinsic primarily for testing ARMConstantIslands. The
  16. // first argument is the number of bytes this "instruction" takes up, the second
  17. // and return value are essentially chains, used to force ordering during ISel.
  18. def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
  19. // 16-bit multiplications
  20. def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
  21. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  22. def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">,
  23. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  24. def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">,
  25. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  26. def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">,
  27. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  28. def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">,
  29. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  30. def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">,
  31. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  32. //===----------------------------------------------------------------------===//
  33. // Saturating Arithmetic
  34. def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
  35. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  36. [Commutative, IntrNoMem]>;
  37. def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
  38. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  39. def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
  40. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  41. def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
  42. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  43. // Accumulating multiplications
  44. def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">,
  45. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  46. [IntrNoMem]>;
  47. def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">,
  48. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  49. [IntrNoMem]>;
  50. def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">,
  51. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  52. [IntrNoMem]>;
  53. def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">,
  54. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  55. [IntrNoMem]>;
  56. def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">,
  57. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  58. [IntrNoMem]>;
  59. def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">,
  60. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  61. [IntrNoMem]>;
  62. // Parallel 16-bit saturation
  63. def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">,
  64. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  65. def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">,
  66. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  67. // Packing and unpacking
  68. def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">,
  69. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  70. def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">,
  71. Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  72. def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">,
  73. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  74. def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">,
  75. Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  76. // Parallel selection, reads the GE flags.
  77. def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">,
  78. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
  79. // Parallel 8-bit addition and subtraction
  80. def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">,
  81. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  82. def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">,
  83. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  84. // Writes to the GE bits.
  85. def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">,
  86. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  87. def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">,
  88. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  89. def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">,
  90. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  91. // Writes to the GE bits.
  92. def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">,
  93. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  94. // Writes to the GE bits.
  95. def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">,
  96. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  97. def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">,
  98. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  99. def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">,
  100. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  101. def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">,
  102. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  103. def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">,
  104. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  105. // Writes to the GE bits.
  106. def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">,
  107. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  108. // Sum of 8-bit absolute differences
  109. def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">,
  110. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  111. def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">,
  112. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  113. [IntrNoMem]>;
  114. // Parallel 16-bit addition and subtraction
  115. def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">,
  116. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  117. def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">,
  118. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  119. def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">,
  120. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  121. def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">,
  122. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  123. // Writes to the GE bits.
  124. def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">,
  125. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  126. // Writes to the GE bits.
  127. def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">,
  128. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  129. def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">,
  130. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  131. def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">,
  132. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  133. def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">,
  134. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  135. def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">,
  136. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  137. // Writes to the GE bits.
  138. def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">,
  139. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  140. // Writes to the GE bits.
  141. def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">,
  142. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  143. // Writes to the GE bits.
  144. def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">,
  145. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  146. // Writes to the GE bits.
  147. def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">,
  148. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  149. def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">,
  150. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  151. def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">,
  152. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  153. def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">,
  154. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  155. def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">,
  156. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  157. def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">,
  158. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  159. def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">,
  160. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  161. def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">,
  162. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  163. def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">,
  164. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  165. // Writes to the GE bits.
  166. def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">,
  167. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  168. // Writes to the GE bits.
  169. def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">,
  170. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  171. // Parallel 16-bit multiplication
  172. def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">,
  173. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  174. [IntrNoMem]>;
  175. def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">,
  176. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  177. [IntrNoMem]>;
  178. def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">,
  179. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  180. [IntrNoMem]>;
  181. def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">,
  182. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  183. [IntrNoMem]>;
  184. def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">,
  185. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  186. [IntrNoMem]>;
  187. def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">,
  188. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  189. [IntrNoMem]>;
  190. def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">,
  191. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  192. [IntrNoMem]>;
  193. def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">,
  194. Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
  195. [IntrNoMem]>;
  196. def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">,
  197. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  198. def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">,
  199. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  200. def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">,
  201. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  202. def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">,
  203. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
  204. //===----------------------------------------------------------------------===//
  205. // Load, Store and Clear exclusive
  206. def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  207. def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  208. def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
  209. def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
  210. def int_arm_clrex : Intrinsic<[]>;
  211. def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
  212. llvm_ptr_ty]>;
  213. def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  214. def int_arm_stlexd : Intrinsic<[llvm_i32_ty],
  215. [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
  216. def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
  217. //===----------------------------------------------------------------------===//
  218. // Data barrier instructions
  219. def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
  220. Intrinsic<[], [llvm_i32_ty]>;
  221. def int_arm_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
  222. Intrinsic<[], [llvm_i32_ty]>;
  223. def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
  224. Intrinsic<[], [llvm_i32_ty]>;
  225. //===----------------------------------------------------------------------===//
  226. // VFP
  227. def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
  228. Intrinsic<[llvm_i32_ty], [], []>;
  229. def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
  230. Intrinsic<[], [llvm_i32_ty], []>;
  231. def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
  232. [IntrNoMem]>;
  233. def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
  234. [IntrNoMem]>;
  235. //===----------------------------------------------------------------------===//
  236. // Coprocessor
  237. def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
  238. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  239. def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
  240. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  241. def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
  242. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  243. def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
  244. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  245. def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
  246. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  247. def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
  248. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  249. def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
  250. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  251. def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
  252. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
  253. // Move to coprocessor
  254. def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
  255. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  256. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  257. def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
  258. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  259. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  260. // Move from coprocessor
  261. def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
  262. MSBuiltin<"_MoveFromCoprocessor">,
  263. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  264. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  265. def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
  266. MSBuiltin<"_MoveFromCoprocessor2">,
  267. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  268. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  269. // Coprocessor data processing
  270. def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
  271. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  272. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  273. def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
  274. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  275. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  276. // Move from two registers to coprocessor
  277. def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  278. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  279. def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  280. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
  281. def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  282. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  283. def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
  284. llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  285. //===----------------------------------------------------------------------===//
  286. // CRC32
  287. def int_arm_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  288. [IntrNoMem]>;
  289. def int_arm_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  290. [IntrNoMem]>;
  291. def int_arm_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  292. [IntrNoMem]>;
  293. def int_arm_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  294. [IntrNoMem]>;
  295. def int_arm_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  296. [IntrNoMem]>;
  297. def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  298. [IntrNoMem]>;
  299. //===----------------------------------------------------------------------===//
  300. // CMSE
  301. def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
  302. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  303. def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
  304. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  305. def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
  306. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  307. def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
  308. Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
  309. //===----------------------------------------------------------------------===//
  310. // HINT
  311. def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
  312. def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>;
  313. //===----------------------------------------------------------------------===//
  314. // UND (reserved undefined sequence)
  315. def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>;
  316. //===----------------------------------------------------------------------===//
  317. // Advanced SIMD (NEON)
  318. // The following classes do not correspond directly to GCC builtins.
  319. class Neon_1Arg_Intrinsic
  320. : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  321. class Neon_1Arg_Narrow_Intrinsic
  322. : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
  323. class Neon_2Arg_Intrinsic
  324. : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  325. [IntrNoMem]>;
  326. class Neon_2Arg_Narrow_Intrinsic
  327. : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>],
  328. [IntrNoMem]>;
  329. class Neon_2Arg_Long_Intrinsic
  330. : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
  331. [IntrNoMem]>;
  332. class Neon_3Arg_Intrinsic
  333. : Intrinsic<[llvm_anyvector_ty],
  334. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  335. [IntrNoMem]>;
  336. class Neon_3Arg_Long_Intrinsic
  337. : Intrinsic<[llvm_anyvector_ty],
  338. [LLVMMatchType<0>, LLVMTruncatedType<0>, LLVMTruncatedType<0>],
  339. [IntrNoMem]>;
  340. class Neon_1FloatArg_Intrinsic
  341. : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  342. class Neon_CvtFxToFP_Intrinsic
  343. : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
  344. class Neon_CvtFPToFx_Intrinsic
  345. : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
  346. class Neon_CvtFPtoInt_1Arg_Intrinsic
  347. : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  348. class Neon_Compare_Intrinsic
  349. : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
  350. [IntrNoMem]>;
  351. // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
  352. // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
  353. // Overall, the classes range from 2 to 6 v8i8 arguments.
  354. class Neon_Tbl2Arg_Intrinsic
  355. : Intrinsic<[llvm_v8i8_ty],
  356. [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  357. class Neon_Tbl3Arg_Intrinsic
  358. : Intrinsic<[llvm_v8i8_ty],
  359. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  360. class Neon_Tbl4Arg_Intrinsic
  361. : Intrinsic<[llvm_v8i8_ty],
  362. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
  363. [IntrNoMem]>;
  364. class Neon_Tbl5Arg_Intrinsic
  365. : Intrinsic<[llvm_v8i8_ty],
  366. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  367. llvm_v8i8_ty], [IntrNoMem]>;
  368. class Neon_Tbl6Arg_Intrinsic
  369. : Intrinsic<[llvm_v8i8_ty],
  370. [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
  371. llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
  372. // Arithmetic ops
  373. let IntrProperties = [IntrNoMem, Commutative] in {
  374. // Vector Add.
  375. def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
  376. def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
  377. def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
  378. def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
  379. def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
  380. // Vector Multiply.
  381. def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
  382. def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
  383. def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
  384. def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
  385. def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
  386. def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
  387. def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
  388. // Vector Maximum.
  389. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
  390. def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
  391. def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
  392. // Vector Minimum.
  393. def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
  394. def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
  395. def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
  396. // Vector Reciprocal Step.
  397. def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
  398. // Vector Reciprocal Square Root Step.
  399. def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
  400. }
  401. // Vector Subtract.
  402. def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
  403. def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
  404. def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
  405. // Vector Absolute Compare.
  406. def int_arm_neon_vacge : Neon_Compare_Intrinsic;
  407. def int_arm_neon_vacgt : Neon_Compare_Intrinsic;
  408. // Vector Absolute Differences.
  409. def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
  410. def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
  411. // Vector Pairwise Add.
  412. def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
  413. // Vector Pairwise Add Long.
  414. // Note: This is different than the other "long" NEON intrinsics because
  415. // the result vector has half as many elements as the source vector.
  416. // The source and destination vector types must be specified separately.
  417. def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
  418. [IntrNoMem]>;
  419. def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
  420. [IntrNoMem]>;
  421. // Vector Pairwise Add and Accumulate Long.
  422. // Note: This is similar to vpaddl but the destination vector also appears
  423. // as the first argument.
  424. def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
  425. [LLVMMatchType<0>, llvm_anyvector_ty],
  426. [IntrNoMem]>;
  427. def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
  428. [LLVMMatchType<0>, llvm_anyvector_ty],
  429. [IntrNoMem]>;
  430. // Vector Pairwise Maximum and Minimum.
  431. def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
  432. def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
  433. def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
  434. def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
  435. // Vector Shifts:
  436. //
  437. // The various saturating and rounding vector shift operations need to be
  438. // represented by intrinsics in LLVM, and even the basic VSHL variable shift
  439. // operation cannot be safely translated to LLVM's shift operators. VSHL can
  440. // be used for both left and right shifts, or even combinations of the two,
  441. // depending on the signs of the shift amounts. It also has well-defined
  442. // behavior for shift amounts that LLVM leaves undefined. Only basic shifts
  443. // by constants can be represented with LLVM's shift operators.
  444. //
  445. // The shift counts for these intrinsics are always vectors, even for constant
  446. // shifts, where the constant is replicated. For consistency with VSHL (and
  447. // other variable shift instructions), left shifts have positive shift counts
  448. // and right shifts have negative shift counts. This convention is also used
  449. // for constant right shift intrinsics, and to help preserve sanity, the
  450. // intrinsic names use "shift" instead of either "shl" or "shr". Where
  451. // applicable, signed and unsigned versions of the intrinsics are
  452. // distinguished with "s" and "u" suffixes. A few NEON shift instructions,
  453. // such as VQSHLU, take signed operands but produce unsigned results; these
  454. // use a "su" suffix.
  455. // Vector Shift.
  456. def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
  457. def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
  458. // Vector Rounding Shift.
  459. def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
  460. def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
  461. def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;
  462. // Vector Saturating Shift.
  463. def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
  464. def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
  465. def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
  466. def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
  467. def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
  468. def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  469. // Vector Saturating Rounding Shift.
  470. def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
  471. def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
  472. def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
  473. def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
  474. def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;
  475. // Vector Shift and Insert.
  476. def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
  477. // Vector Absolute Value and Saturating Absolute Value.
  478. def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
  479. def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
  480. // Vector Saturating Negate.
  481. def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;
  482. // Vector Count Leading Sign/Zero Bits.
  483. def int_arm_neon_vcls : Neon_1Arg_Intrinsic;
  484. // Vector Reciprocal Estimate.
  485. def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
  486. // Vector Reciprocal Square Root Estimate.
  487. def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
  488. // Vector Conversions Between Floating-point and Integer
  489. def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
  490. def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
  491. def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  492. def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
  493. def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  494. def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
  495. def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
  496. def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
  497. // Vector Conversions Between Floating-point and Fixed-point.
  498. def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
  499. def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
  500. def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
  501. def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
  502. // Vector Conversions Between Half-Precision and Single-Precision.
  503. def int_arm_neon_vcvtfp2hf
  504. : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  505. def int_arm_neon_vcvthf2fp
  506. : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
  507. // Narrowing Saturating Vector Moves.
  508. def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
  509. def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
  510. def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
  511. // Vector Table Lookup.
  512. // The first 1-4 arguments are the table.
  513. def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
  514. def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
  515. def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
  516. def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
  517. // Vector Table Extension.
  518. // Some elements of the destination vector may not be updated, so the original
  519. // value of that vector is passed as the first argument. The next 1-4
  520. // arguments after that are the table.
  521. def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
  522. def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
  523. def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
  524. def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
  525. // Vector and Scalar Rounding.
  526. def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
  527. def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
  528. def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
  529. def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
  530. def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
  531. def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
  532. // De-interleaving vector loads from N-element structures.
  533. // Source operands are the address and alignment.
  534. def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
  535. [llvm_anyptr_ty, llvm_i32_ty],
  536. [IntrReadMem, IntrArgMemOnly]>;
  537. def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  538. [llvm_anyptr_ty, llvm_i32_ty],
  539. [IntrReadMem, IntrArgMemOnly]>;
  540. def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  541. LLVMMatchType<0>],
  542. [llvm_anyptr_ty, llvm_i32_ty],
  543. [IntrReadMem, IntrArgMemOnly]>;
  544. def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  545. LLVMMatchType<0>, LLVMMatchType<0>],
  546. [llvm_anyptr_ty, llvm_i32_ty],
  547. [IntrReadMem, IntrArgMemOnly]>;
  548. def int_arm_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  549. [LLVMAnyPointerType<LLVMMatchType<0>>],
  550. [IntrReadMem, IntrArgMemOnly]>;
  551. def int_arm_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  552. LLVMMatchType<0>],
  553. [LLVMAnyPointerType<LLVMMatchType<0>>],
  554. [IntrReadMem, IntrArgMemOnly]>;
  555. def int_arm_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  556. LLVMMatchType<0>, LLVMMatchType<0>],
  557. [LLVMAnyPointerType<LLVMMatchType<0>>],
  558. [IntrReadMem, IntrArgMemOnly]>;
  559. // Vector load N-element structure to one lane.
  560. // Source operands are: the address, the N input vectors (since only one
  561. // lane is assigned), the lane number, and the alignment.
  562. def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  563. [llvm_anyptr_ty, LLVMMatchType<0>,
  564. LLVMMatchType<0>, llvm_i32_ty,
  565. llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  566. def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  567. LLVMMatchType<0>],
  568. [llvm_anyptr_ty, LLVMMatchType<0>,
  569. LLVMMatchType<0>, LLVMMatchType<0>,
  570. llvm_i32_ty, llvm_i32_ty],
  571. [IntrReadMem, IntrArgMemOnly]>;
  572. def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  573. LLVMMatchType<0>, LLVMMatchType<0>],
  574. [llvm_anyptr_ty, LLVMMatchType<0>,
  575. LLVMMatchType<0>, LLVMMatchType<0>,
  576. LLVMMatchType<0>, llvm_i32_ty,
  577. llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
  578. // Vector load N-element structure to all lanes.
  579. // Source operands are the address and alignment.
  580. def int_arm_neon_vld2dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
  581. [llvm_anyptr_ty, llvm_i32_ty],
  582. [IntrReadMem, IntrArgMemOnly]>;
  583. def int_arm_neon_vld3dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  584. LLVMMatchType<0>],
  585. [llvm_anyptr_ty, llvm_i32_ty],
  586. [IntrReadMem, IntrArgMemOnly]>;
  587. def int_arm_neon_vld4dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
  588. LLVMMatchType<0>, LLVMMatchType<0>],
  589. [llvm_anyptr_ty, llvm_i32_ty],
  590. [IntrReadMem, IntrArgMemOnly]>;
  591. // Interleaving vector stores from N-element structures.
  592. // Source operands are: the address, the N vectors, and the alignment.
  593. def int_arm_neon_vst1 : Intrinsic<[],
  594. [llvm_anyptr_ty, llvm_anyvector_ty,
  595. llvm_i32_ty], [IntrArgMemOnly]>;
  596. def int_arm_neon_vst2 : Intrinsic<[],
  597. [llvm_anyptr_ty, llvm_anyvector_ty,
  598. LLVMMatchType<1>, llvm_i32_ty],
  599. [IntrArgMemOnly]>;
  600. def int_arm_neon_vst3 : Intrinsic<[],
  601. [llvm_anyptr_ty, llvm_anyvector_ty,
  602. LLVMMatchType<1>, LLVMMatchType<1>,
  603. llvm_i32_ty], [IntrArgMemOnly]>;
  604. def int_arm_neon_vst4 : Intrinsic<[],
  605. [llvm_anyptr_ty, llvm_anyvector_ty,
  606. LLVMMatchType<1>, LLVMMatchType<1>,
  607. LLVMMatchType<1>, llvm_i32_ty],
  608. [IntrArgMemOnly]>;
  609. def int_arm_neon_vst1x2 : Intrinsic<[],
  610. [llvm_anyptr_ty, llvm_anyvector_ty,
  611. LLVMMatchType<1>],
  612. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  613. def int_arm_neon_vst1x3 : Intrinsic<[],
  614. [llvm_anyptr_ty, llvm_anyvector_ty,
  615. LLVMMatchType<1>, LLVMMatchType<1>],
  616. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  617. def int_arm_neon_vst1x4 : Intrinsic<[],
  618. [llvm_anyptr_ty, llvm_anyvector_ty,
  619. LLVMMatchType<1>, LLVMMatchType<1>,
  620. LLVMMatchType<1>],
  621. [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
  622. // Vector store N-element structure from one lane.
  623. // Source operands are: the address, the N vectors, the lane number, and
  624. // the alignment.
  625. def int_arm_neon_vst2lane : Intrinsic<[],
  626. [llvm_anyptr_ty, llvm_anyvector_ty,
  627. LLVMMatchType<1>, llvm_i32_ty,
  628. llvm_i32_ty], [IntrArgMemOnly]>;
  629. def int_arm_neon_vst3lane : Intrinsic<[],
  630. [llvm_anyptr_ty, llvm_anyvector_ty,
  631. LLVMMatchType<1>, LLVMMatchType<1>,
  632. llvm_i32_ty, llvm_i32_ty],
  633. [IntrArgMemOnly]>;
  634. def int_arm_neon_vst4lane : Intrinsic<[],
  635. [llvm_anyptr_ty, llvm_anyvector_ty,
  636. LLVMMatchType<1>, LLVMMatchType<1>,
  637. LLVMMatchType<1>, llvm_i32_ty,
  638. llvm_i32_ty], [IntrArgMemOnly]>;
  639. // Vector bitwise select.
  640. def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
  641. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  642. [IntrNoMem]>;
  643. // Crypto instructions
  644. class AES_1Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
  645. [llvm_v16i8_ty], [IntrNoMem]>;
  646. class AES_2Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
  647. [llvm_v16i8_ty, llvm_v16i8_ty],
  648. [IntrNoMem]>;
  649. class SHA_1Arg_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  650. [IntrNoMem]>;
  651. class SHA_2Arg_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  652. [llvm_v4i32_ty, llvm_v4i32_ty],
  653. [IntrNoMem]>;
  654. class SHA_3Arg_i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  655. [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
  656. [IntrNoMem]>;
  657. class SHA_3Arg_v4i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
  658. [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty],
  659. [IntrNoMem]>;
  660. def int_arm_neon_aesd : AES_2Arg_Intrinsic;
  661. def int_arm_neon_aese : AES_2Arg_Intrinsic;
  662. def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
  663. def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
  664. def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
  665. def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
  666. def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
  667. def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
  668. def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
  669. def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
  670. def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
  671. def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
  672. def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
  673. def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
  674. // Armv8.2-A dot product instructions
  675. class Neon_Dot_Intrinsic
  676. : Intrinsic<[llvm_anyvector_ty],
  677. [LLVMMatchType<0>, llvm_anyvector_ty,
  678. LLVMMatchType<1>],
  679. [IntrNoMem]>;
  680. def int_arm_neon_udot : Neon_Dot_Intrinsic;
  681. def int_arm_neon_sdot : Neon_Dot_Intrinsic;
  682. // v8.6-A Matrix Multiply Intrinsics
  683. class Neon_MatMul_Intrinsic
  684. : Intrinsic<[llvm_anyvector_ty],
  685. [LLVMMatchType<0>, llvm_anyvector_ty,
  686. LLVMMatchType<1>],
  687. [IntrNoMem]>;
  688. def int_arm_neon_ummla : Neon_MatMul_Intrinsic;
  689. def int_arm_neon_smmla : Neon_MatMul_Intrinsic;
  690. def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
  691. def int_arm_neon_usdot : Neon_Dot_Intrinsic;
  692. // v8.6-A Bfloat Intrinsics
  693. def int_arm_neon_vcvtfp2bf
  694. : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
  695. def int_arm_neon_vcvtbfp2bf
  696. : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
  697. def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
  698. def int_arm_neon_bfmmla
  699. : Intrinsic<[llvm_v4f32_ty],
  700. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  701. [IntrNoMem]>;
  702. class Neon_BF16FML_Intrinsic
  703. : Intrinsic<[llvm_v4f32_ty],
  704. [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
  705. [IntrNoMem]>;
  706. def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic;
  707. def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic;
  708. def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  709. def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
  710. def int_arm_mve_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  711. def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  712. def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  713. // vctp64 takes v4i1, to work around v2i1 not being a legal MVE type
  714. def int_arm_mve_vctp64 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
  715. // v8.3-A Floating-point complex add
  716. def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
  717. def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
  718. // GNU eabi mcount
  719. def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>;
  720. def int_arm_mve_pred_i2v : Intrinsic<
  721. [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
  722. def int_arm_mve_pred_v2i : Intrinsic<
  723. [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  724. def int_arm_mve_vreinterpretq : Intrinsic<
  725. [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  726. def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
  727. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  728. llvm_anyvector_ty, LLVMMatchType<0>],
  729. [IntrNoMem]>;
  730. def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
  731. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  732. llvm_anyvector_ty, LLVMMatchType<0>],
  733. [IntrNoMem]>;
  734. def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
  735. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  736. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  737. def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty],
  738. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  739. [IntrNoMem]>;
  740. def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty],
  741. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  742. [IntrNoMem]>;
  743. def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty],
  744. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  745. [IntrNoMem]>;
  746. def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty],
  747. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  748. [IntrNoMem]>;
  749. def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty],
  750. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  751. [IntrNoMem]>;
  752. def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty],
  753. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  754. [IntrNoMem]>;
  755. def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty],
  756. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  757. [IntrNoMem]>;
  758. def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty],
  759. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  760. [IntrNoMem]>;
  761. def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty],
  762. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  763. llvm_anyvector_ty, LLVMMatchType<0>],
  764. [IntrNoMem]>;
  765. def int_arm_mve_qdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  766. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  767. [IntrNoMem]>;
  768. def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  769. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  770. llvm_anyvector_ty, LLVMMatchType<0>],
  771. [IntrNoMem]>;
  772. def int_arm_mve_qrdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
  773. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
  774. [IntrNoMem]>;
  775. def int_arm_mve_mull_int_predicated: Intrinsic<[llvm_anyvector_ty],
  776. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  777. llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
  778. [IntrNoMem]>;
  779. def int_arm_mve_mull_poly_predicated: Intrinsic<[llvm_anyvector_ty],
  780. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  781. LLVMMatchType<0>],
  782. [IntrNoMem]>;
  783. def int_arm_mve_qadd_predicated: Intrinsic<[llvm_anyvector_ty],
  784. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  785. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  786. def int_arm_mve_hadd_predicated: Intrinsic<[llvm_anyvector_ty],
  787. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  788. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  789. def int_arm_mve_rhadd_predicated: Intrinsic<[llvm_anyvector_ty],
  790. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  791. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  792. def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty],
  793. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  794. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  795. def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty],
  796. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
  797. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  798. def int_arm_mve_vmina_predicated: Intrinsic<[llvm_anyvector_ty],
  799. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  800. [IntrNoMem]>;
  801. def int_arm_mve_vmaxa_predicated: Intrinsic<[llvm_anyvector_ty],
  802. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  803. [IntrNoMem]>;
  804. def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty],
  805. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  806. [IntrNoMem]>;
  807. def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty],
  808. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
  809. [IntrNoMem]>;
  810. multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
  811. LLVMType pred = llvm_anyvector_ty,
  812. list<IntrinsicProperty> props = [IntrNoMem]> {
  813. def "": Intrinsic<rets, params, props>;
  814. def _predicated: Intrinsic<rets, params # [pred], props>;
  815. }
  816. multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
  817. LLVMType pred = llvm_anyvector_ty,
  818. list<IntrinsicProperty> props = [IntrNoMem]> {
  819. def "": Intrinsic<rets, params, props>;
  820. def _predicated: Intrinsic<rets, params # [pred,
  821. !if(!eq(rets[0], llvm_anyvector_ty),
  822. LLVMMatchType<0>, rets[0])], props>;
  823. }
  824. multiclass MVE_minmaxv {
  825. defm v: MVEPredicated<[llvm_i32_ty],
  826. [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  827. defm av: MVEPredicated<[llvm_i32_ty],
  828. [llvm_i32_ty, llvm_anyvector_ty]>;
  829. defm nmv: MVEPredicated<[llvm_anyfloat_ty],
  830. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  831. defm nmav: MVEPredicated<[llvm_anyfloat_ty],
  832. [LLVMMatchType<0>, llvm_anyvector_ty]>;
  833. }
  834. defm int_arm_mve_min: MVE_minmaxv;
  835. defm int_arm_mve_max: MVE_minmaxv;
  836. defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty],
  837. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  838. defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty],
  839. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  840. // Intrinsic with a predicated and a non-predicated case. The predicated case
  841. // has two additional parameters: inactive (the value for inactive lanes, can
  842. // be undef) and predicate.
  843. multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
  844. list<LLVMType> params, LLVMType inactive,
  845. LLVMType predicate,
  846. list<IntrinsicProperty> props = [IntrNoMem]> {
  847. def "": Intrinsic<rets, flags # params, props>;
  848. def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
  849. props>;
  850. }
  851. defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
  852. [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
  853. defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
  854. [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
  855. defm int_arm_mve_vldr_gather_base: MVEPredicated<
  856. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
  857. llvm_anyvector_ty, [IntrReadMem]>;
  858. defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
  859. [llvm_anyvector_ty, llvm_anyvector_ty],
  860. [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
  861. defm int_arm_mve_vstr_scatter_base: MVEPredicated<
  862. [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
  863. llvm_anyvector_ty, [IntrWriteMem]>;
  864. defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
  865. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
  866. llvm_anyvector_ty, [IntrWriteMem]>;
  867. // gather_offset takes three i32 parameters. The first is the size of
  868. // memory element loaded, in bits. The second is a left bit shift to
  869. // apply to each offset in the vector parameter (must be either 0, or
  870. // correspond to the element size of the destination vector type). The
  871. // last is 1 to indicate zero extension (if the load is widening), or
  872. // 0 for sign extension.
  873. //
  874. // scatter_offset has the first two of those parameters, but since it
  875. // narrows rather than widening, it doesn't have the last one.
  876. defm int_arm_mve_vldr_gather_offset: MVEPredicated<
  877. [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
  878. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
  879. defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
  880. [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
  881. llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
  882. def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
  883. [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  884. [IntrNoMem]>;
  885. def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
  886. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
  887. llvm_anyvector_ty, LLVMMatchType<0>],
  888. [IntrNoMem]>;
  889. defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
  890. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  891. defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
  892. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
  893. defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
  894. [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
  895. defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
  896. [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
  897. llvm_i32_ty /*top-half*/]>;
  898. defm int_arm_mve_vsli: MVEPredicated<
  899. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  900. defm int_arm_mve_vsri: MVEPredicated<
  901. [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
  902. defm int_arm_mve_vshrn: MVEPredicated<
  903. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
  904. llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
  905. llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
  906. llvm_i32_ty /*top-half*/]>;
  907. defm int_arm_mve_vshl_scalar: MVEPredicated<
  908. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
  909. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  910. defm int_arm_mve_vshl_vector: MVEPredicatedM<
  911. [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
  912. llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
  913. // MVE scalar shifts.
  914. class ARM_MVE_qrshift_single<list<LLVMType> value,
  915. list<LLVMType> saturate = []> :
  916. Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
  917. multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
  918. // Most of these shifts come in 32- and 64-bit versions. But only
  919. // the 64-bit ones have the extra saturation argument (if any).
  920. def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
  921. def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
  922. }
  923. defm int_arm_mve_urshr: ARM_MVE_qrshift;
  924. defm int_arm_mve_uqshl: ARM_MVE_qrshift;
  925. defm int_arm_mve_srshr: ARM_MVE_qrshift;
  926. defm int_arm_mve_sqshl: ARM_MVE_qrshift;
  927. defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
  928. defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
  929. // LSLL and ASRL only have 64-bit versions, not 32.
  930. def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  931. def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
  932. def int_arm_mve_vabd: Intrinsic<
  933. [llvm_anyvector_ty],
  934. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  935. [IntrNoMem]>;
  936. def int_arm_mve_vadc: Intrinsic<
  937. [llvm_anyvector_ty, llvm_i32_ty],
  938. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  939. def int_arm_mve_vsbc: Intrinsic<
  940. [llvm_anyvector_ty, llvm_i32_ty],
  941. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
  942. def int_arm_mve_vadc_predicated: Intrinsic<
  943. [llvm_anyvector_ty, llvm_i32_ty],
  944. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  945. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  946. def int_arm_mve_vsbc_predicated: Intrinsic<
  947. [llvm_anyvector_ty, llvm_i32_ty],
  948. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  949. llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
  950. def int_arm_mve_vshlc: Intrinsic<
  951. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  952. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  953. llvm_i32_ty /* shift count */], [IntrNoMem]>;
  954. def int_arm_mve_vshlc_predicated: Intrinsic<
  955. [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
  956. [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
  957. llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
  958. def int_arm_mve_vmulh: Intrinsic<
  959. [llvm_anyvector_ty],
  960. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  961. [IntrNoMem]>;
  962. def int_arm_mve_vqdmulh: Intrinsic<
  963. [llvm_anyvector_ty],
  964. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  965. def int_arm_mve_vhadd: Intrinsic<
  966. [llvm_anyvector_ty],
  967. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  968. [IntrNoMem]>;
  969. def int_arm_mve_vrhadd: Intrinsic<
  970. [llvm_anyvector_ty],
  971. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  972. [IntrNoMem]>;
  973. def int_arm_mve_vhsub: Intrinsic<
  974. [llvm_anyvector_ty],
  975. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  976. [IntrNoMem]>;
  977. def int_arm_mve_vrmulh: Intrinsic<
  978. [llvm_anyvector_ty],
  979. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
  980. [IntrNoMem]>;
  981. def int_arm_mve_vqrdmulh: Intrinsic<
  982. [llvm_anyvector_ty],
  983. [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
  984. def int_arm_mve_vmull: Intrinsic<
  985. [llvm_anyvector_ty],
  986. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
  987. llvm_i32_ty /* top */], [IntrNoMem]>;
  988. def int_arm_mve_vmull_poly: Intrinsic<
  989. [llvm_anyvector_ty],
  990. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
  991. // The first two parameters are compile-time constants:
  992. // * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq)
  993. // instruction. Note: the flag is inverted to match the corresponding
  994. // bit in the instruction encoding
  995. // * Rotation angle: 0 mean 90 deg, 1 means 180 deg
  996. defm int_arm_mve_vcaddq : MVEMXPredicated<
  997. [llvm_anyvector_ty],
  998. [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  999. LLVMMatchType<0>, llvm_anyvector_ty>;
  1000. // The first operand of the following two intrinsics is the rotation angle
  1001. // (must be a compile-time constant):
  1002. // 0 - 0 deg
  1003. // 1 - 90 deg
  1004. // 2 - 180 deg
  1005. // 3 - 270 deg
  1006. defm int_arm_mve_vcmulq : MVEMXPredicated<
  1007. [llvm_anyvector_ty],
  1008. [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
  1009. LLVMMatchType<0>, llvm_anyvector_ty>;
  1010. defm int_arm_mve_vcmlaq : MVEPredicated<
  1011. [llvm_anyvector_ty],
  1012. [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  1013. llvm_anyvector_ty>;
  1014. def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
  1015. def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
  1016. def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>;
  1017. def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>;
  1018. // MVE vector absolute difference and accumulate across vector
  1019. // The first operand is an 'unsigned' flag. The remaining operands are:
  1020. // * accumulator
  1021. // * first vector operand
  1022. // * second vector operand
  1023. // * mask (only in predicated versions)
  1024. defm int_arm_mve_vabav: MVEPredicated<
  1025. [llvm_i32_ty],
  1026. [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
  1027. // The following 3 instrinsics are MVE vector reductions with two vector
  1028. // operands.
  1029. // The first 3 operands are boolean flags (must be compile-time constants):
  1030. // * unsigned - the instruction operates on vectors of unsigned values and
  1031. // unsigned scalars
  1032. // * subtract - the instruction performs subtraction after multiplication of
  1033. // lane pairs (e.g., vmlsdav vs vmladav)
  1034. // * exchange - the instruction exchanges successive even and odd lanes of
  1035. // the first operands before multiplication of lane pairs
  1036. // (e.g., vmladavx vs vmladav)
  1037. // The remaining operands are:
  1038. // * accumulator
  1039. // * first vector operand
  1040. // * second vector operand
  1041. // * mask (only in predicated versions)
  1042. // Version with 32-bit result, vml{a,s}dav[a][x]
  1043. defm int_arm_mve_vmldava: MVEPredicated<
  1044. [llvm_i32_ty],
  1045. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1046. llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1047. llvm_anyvector_ty>;
  1048. // Version with 64-bit result, vml{a,s}ldav[a][x]
  1049. defm int_arm_mve_vmlldava: MVEPredicated<
  1050. [llvm_i32_ty, llvm_i32_ty],
  1051. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1052. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1053. llvm_anyvector_ty>;
  1054. // Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
  1055. defm int_arm_mve_vrmlldavha: MVEPredicated<
  1056. [llvm_i32_ty, llvm_i32_ty],
  1057. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1058. llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  1059. llvm_anyvector_ty>;
  1060. defm int_arm_mve_vidup: MVEMXPredicated<
  1061. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1062. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1063. LLVMMatchType<0>, llvm_anyvector_ty>;
  1064. defm int_arm_mve_vddup: MVEMXPredicated<
  1065. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1066. [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
  1067. LLVMMatchType<0>, llvm_anyvector_ty>;
  1068. defm int_arm_mve_viwdup: MVEMXPredicated<
  1069. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1070. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1071. LLVMMatchType<0>, llvm_anyvector_ty>;
  1072. defm int_arm_mve_vdwdup: MVEMXPredicated<
  1073. [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
  1074. [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
  1075. LLVMMatchType<0>, llvm_anyvector_ty>;
  1076. // Flags:
  1077. // * unsigned
  1078. defm int_arm_mve_vcvt_fix: MVEMXPredicated<
  1079. [llvm_anyvector_ty /* output */], [llvm_i32_ty],
  1080. [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
  1081. LLVMMatchType<0>, llvm_anyvector_ty>;
  1082. def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
  1083. [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
  1084. llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
  1085. [IntrNoMem]>;
  1086. foreach suffix = ["a","n","p","m"] in {
  1087. defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated<
  1088. [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */],
  1089. [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
  1090. }
  1091. def int_arm_mve_vrintn: Intrinsic<
  1092. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1093. def int_arm_mve_vcls: Intrinsic<
  1094. [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  1095. defm int_arm_mve_vbrsr: MVEMXPredicated<
  1096. [llvm_anyvector_ty], [],
  1097. [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>;
  1098. def int_arm_mve_vqdmull: Intrinsic<
  1099. [llvm_anyvector_ty],
  1100. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  1101. [IntrNoMem]>;
  1102. def int_arm_mve_vqdmull_predicated: Intrinsic<
  1103. [llvm_anyvector_ty],
  1104. [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
  1105. LLVMMatchType<0>],
  1106. [IntrNoMem]>;
  1107. class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
  1108. [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1109. def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
  1110. def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
  1111. def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
  1112. def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
  1113. def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
  1114. def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
  1115. def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
  1116. def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
  1117. def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
  1118. def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
  1119. def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
  1120. def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
  1121. def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
  1122. def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
  1123. [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
  1124. llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
  1125. def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
  1126. [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
  1127. llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
  1128. def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
  1129. [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
  1130. llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
  1131. def int_arm_mve_vqmovn: Intrinsic<[llvm_anyvector_ty],
  1132. [LLVMMatchType<0>, llvm_anyvector_ty,
  1133. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1134. llvm_i32_ty /* top half */], [IntrNoMem]>;
  1135. def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty],
  1136. [LLVMMatchType<0>, llvm_anyvector_ty,
  1137. llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
  1138. llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1139. def int_arm_mve_fma_predicated: Intrinsic<[llvm_anyvector_ty],
  1140. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1141. LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
  1142. def int_arm_mve_vmla_n_predicated: Intrinsic<[llvm_anyvector_ty],
  1143. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1144. llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
  1145. [IntrNoMem]>;
  1146. def int_arm_mve_vmlas_n_predicated: Intrinsic<[llvm_anyvector_ty],
  1147. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1148. llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
  1149. [IntrNoMem]>;
  1150. defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty],
  1151. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1152. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1153. defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty],
  1154. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
  1155. llvm_i32_ty /* mult op #2 (scalar) */]>;
  1156. defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty],
  1157. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1158. llvm_i32_ty /* addend (scalar) */]>;
  1159. defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty],
  1160. [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
  1161. llvm_i32_ty /* addend (scalar) */]>;
  1162. defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty],
  1163. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
  1164. llvm_i32_ty /* exchange */, llvm_i32_ty /* round */,
  1165. llvm_i32_ty /* subtract */]>;
  1166. // CDE (Custom Datapath Extension)
  1167. multiclass CDEGPRIntrinsics<list<LLVMType> args> {
  1168. def "" : Intrinsic<
  1169. [llvm_i32_ty],
  1170. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1171. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1172. def a : Intrinsic<
  1173. [llvm_i32_ty],
  1174. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args,
  1175. [llvm_i32_ty /* imm */]),
  1176. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1177. def d: Intrinsic<
  1178. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1179. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1180. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1181. def da: Intrinsic<
  1182. [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
  1183. !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */,
  1184. llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]),
  1185. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>;
  1186. }
  1187. defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
  1188. defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
  1189. defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
  1190. multiclass CDEVCXIntrinsics<list<LLVMType> args> {
  1191. def "" : Intrinsic<
  1192. [llvm_anyfloat_ty],
  1193. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1194. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1195. def a : Intrinsic<
  1196. [llvm_anyfloat_ty],
  1197. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1198. args, [llvm_i32_ty /* imm */]),
  1199. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1200. }
  1201. defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
  1202. defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
  1203. defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
  1204. multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
  1205. def "" : Intrinsic<
  1206. [llvm_v16i8_ty],
  1207. !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
  1208. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  1209. def a : Intrinsic<
  1210. [llvm_v16i8_ty],
  1211. !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
  1212. args, [llvm_i32_ty /* imm */]),
  1213. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1214. def _predicated : Intrinsic<
  1215. [llvm_anyvector_ty],
  1216. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
  1217. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1218. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1219. def a_predicated : Intrinsic<
  1220. [llvm_anyvector_ty],
  1221. !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
  1222. args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
  1223. [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  1224. }
  1225. defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
  1226. defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>;
  1227. defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>;
  1228. } // end TargetPrefix