IntrinsicsAMDGPU.td 82 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020
  1. //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines all of the R600-specific intrinsics.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. class AMDGPUReadPreloadRegisterIntrinsic
  13. : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  14. class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
  15. : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, GCCBuiltin<name>;
  16. // Used to tag image and resource intrinsics with information used to generate
  17. // mem operands.
  18. class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = false> {
  19. int RsrcArg = rsrcarg;
  20. bit IsImage = isimage;
  21. }
  22. let TargetPrefix = "r600" in {
  23. multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
  24. def _x : AMDGPUReadPreloadRegisterIntrinsic;
  25. def _y : AMDGPUReadPreloadRegisterIntrinsic;
  26. def _z : AMDGPUReadPreloadRegisterIntrinsic;
  27. }
  28. multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
  29. def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
  30. def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
  31. def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
  32. }
  33. defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  34. <"__builtin_r600_read_global_size">;
  35. defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  36. <"__builtin_r600_read_ngroups">;
  37. defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  38. <"__builtin_r600_read_tgid">;
  39. defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  40. defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  41. def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
  42. Intrinsic<[], [], [IntrConvergent, IntrWillReturn]>;
  43. // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
  44. def int_r600_implicitarg_ptr :
  45. GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
  46. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
  47. [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  48. def int_r600_rat_store_typed :
  49. // 1st parameter: Data
  50. // 2nd parameter: Index
  51. // 3rd parameter: Constant RAT ID
  52. Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>,
  53. GCCBuiltin<"__builtin_r600_rat_store_typed">;
  54. def int_r600_recipsqrt_ieee : Intrinsic<
  55. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  56. >;
  57. def int_r600_recipsqrt_clamped : Intrinsic<
  58. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  59. >;
  60. def int_r600_cube : Intrinsic<
  61. [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  62. >;
  63. def int_r600_store_stream_output : Intrinsic<
  64. [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
  65. >;
  66. class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
  67. llvm_v4f32_ty, // Coord
  68. llvm_i32_ty, // offset_x
  69. llvm_i32_ty, // offset_y,
  70. llvm_i32_ty, // offset_z,
  71. llvm_i32_ty, // resource_id
  72. llvm_i32_ty, // samplerid
  73. llvm_i32_ty, // coord_type_x
  74. llvm_i32_ty, // coord_type_y
  75. llvm_i32_ty, // coord_type_z
  76. llvm_i32_ty], // coord_type_w
  77. [IntrNoMem, IntrWillReturn]
  78. >;
  79. class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
  80. llvm_v4i32_ty, // Coord
  81. llvm_i32_ty, // offset_x
  82. llvm_i32_ty, // offset_y,
  83. llvm_i32_ty, // offset_z,
  84. llvm_i32_ty, // resource_id
  85. llvm_i32_ty, // samplerid
  86. llvm_i32_ty, // coord_type_x
  87. llvm_i32_ty, // coord_type_y
  88. llvm_i32_ty, // coord_type_z
  89. llvm_i32_ty], // coord_type_w
  90. [IntrNoMem, IntrWillReturn]
  91. >;
  92. def int_r600_store_swizzle :
  93. Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
  94. >;
  95. def int_r600_tex : TextureIntrinsicFloatInput;
  96. def int_r600_texc : TextureIntrinsicFloatInput;
  97. def int_r600_txl : TextureIntrinsicFloatInput;
  98. def int_r600_txlc : TextureIntrinsicFloatInput;
  99. def int_r600_txb : TextureIntrinsicFloatInput;
  100. def int_r600_txbc : TextureIntrinsicFloatInput;
  101. def int_r600_txf : TextureIntrinsicInt32Input;
  102. def int_r600_txq : TextureIntrinsicInt32Input;
  103. def int_r600_ddx : TextureIntrinsicFloatInput;
  104. def int_r600_ddy : TextureIntrinsicFloatInput;
  105. def int_r600_dot4 : Intrinsic<[llvm_float_ty],
  106. [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  107. >;
  108. def int_r600_kill : Intrinsic<[], [llvm_float_ty], [IntrWillReturn]>;
  109. } // End TargetPrefix = "r600"
  110. let TargetPrefix = "amdgcn" in {
  111. //===----------------------------------------------------------------------===//
  112. // ABI Special Intrinsics
  113. //===----------------------------------------------------------------------===//
  114. defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  115. defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  116. <"__builtin_amdgcn_workgroup_id">;
  117. def int_amdgcn_dispatch_ptr :
  118. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  119. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  120. def int_amdgcn_queue_ptr :
  121. GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
  122. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  123. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  124. def int_amdgcn_kernarg_segment_ptr :
  125. GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
  126. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  127. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  128. def int_amdgcn_implicitarg_ptr :
  129. GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
  130. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  131. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  132. def int_amdgcn_groupstaticsize :
  133. GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
  134. Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  135. def int_amdgcn_dispatch_id :
  136. GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
  137. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  138. def int_amdgcn_implicit_buffer_ptr :
  139. GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
  140. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  141. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  142. // Set EXEC to the 64-bit value given.
  143. // This is always moved to the beginning of the basic block.
  144. // FIXME: Should be mangled for wave size.
  145. def int_amdgcn_init_exec : Intrinsic<[],
  146. [llvm_i64_ty], // 64-bit literal constant
  147. [IntrConvergent, ImmArg<ArgIndex<0>>]>;
  148. // Set EXEC according to a thread count packed in an SGPR input:
  149. // thread_count = (input >> bitoffset) & 0x7f;
  150. // This is always moved to the beginning of the basic block.
  151. // Note: only inreg arguments to the parent function are valid as
  152. // inputs to this intrinsic, computed values cannot be used.
  153. def int_amdgcn_init_exec_from_input : Intrinsic<[],
  154. [llvm_i32_ty, // 32-bit SGPR input
  155. llvm_i32_ty], // bit offset of the thread count
  156. [IntrConvergent, ImmArg<ArgIndex<1>>]>;
  157. def int_amdgcn_wavefrontsize :
  158. GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
  159. Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  160. //===----------------------------------------------------------------------===//
  161. // Instruction Intrinsics
  162. //===----------------------------------------------------------------------===//
  163. // The first parameter is s_sendmsg immediate (i16),
  164. // the second one is copied to m0
  165. def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
  166. Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
  167. [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
  168. def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
  169. Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
  170. [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
  171. def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
  172. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
  173. def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
  174. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
  175. def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
  176. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  177. def int_amdgcn_div_scale : Intrinsic<
  178. // 1st parameter: Numerator
  179. // 2nd parameter: Denominator
  180. // 3rd parameter: Select quotient. Must equal Numerator or Denominator.
  181. // (0 = Denominator, 1 = Numerator).
  182. [llvm_anyfloat_ty, llvm_i1_ty],
  183. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
  184. [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, IntrWillReturn]
  185. >;
  186. def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
  187. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
  188. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  189. >;
  190. def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
  191. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  192. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  193. >;
  194. // Look Up 2.0 / pi src0 with segment select src1[4:0]
  195. def int_amdgcn_trig_preop : Intrinsic<
  196. [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
  197. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  198. >;
  199. def int_amdgcn_sin : Intrinsic<
  200. [llvm_anyfloat_ty], [LLVMMatchType<0>],
  201. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  202. >;
  203. def int_amdgcn_cos : Intrinsic<
  204. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  205. >;
  206. def int_amdgcn_log_clamp : Intrinsic<
  207. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  208. >;
  209. def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
  210. Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
  211. [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
  212. >;
  213. // Fused single-precision multiply-add with legacy behaviour for the multiply,
  214. // which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
  215. // intended for use on subtargets that have the v_fma_legacy_f32 and/or
  216. // v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
  217. // has a completely different kind of legacy behaviour.)
  218. def int_amdgcn_fma_legacy :
  219. Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  220. [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
  221. >;
  222. def int_amdgcn_rcp : Intrinsic<
  223. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  224. >;
  225. def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
  226. Intrinsic<[llvm_float_ty], [llvm_float_ty],
  227. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  228. >;
  229. def int_amdgcn_sqrt : Intrinsic<
  230. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  231. >;
  232. def int_amdgcn_rsq : Intrinsic<
  233. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  234. >;
  235. def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
  236. Intrinsic<
  237. [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  238. >;
  239. // out = 1.0 / sqrt(a) result clamped to +/- max_float.
  240. def int_amdgcn_rsq_clamp : Intrinsic<
  241. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  242. def int_amdgcn_ldexp : Intrinsic<
  243. [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
  244. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  245. >;
  246. def int_amdgcn_frexp_mant : Intrinsic<
  247. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  248. >;
  249. def int_amdgcn_frexp_exp : Intrinsic<
  250. [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  251. >;
  252. // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
  253. // and always uses rtz, so is not suitable for implementing the OpenCL
  254. // fract function. It should be ok on VI.
  255. def int_amdgcn_fract : Intrinsic<
  256. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  257. >;
  258. def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
  259. Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
  260. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  261. >;
  262. def int_amdgcn_cvt_pknorm_i16 :
  263. GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
  264. Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  265. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  266. >;
  267. def int_amdgcn_cvt_pknorm_u16 :
  268. GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
  269. Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  270. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  271. >;
  272. def int_amdgcn_cvt_pk_i16 :
  273. GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
  274. Intrinsic<
  275. [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  276. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  277. >;
  278. def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
  279. Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  280. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  281. >;
  282. def int_amdgcn_class : Intrinsic<
  283. [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
  284. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  285. >;
  286. def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
  287. Intrinsic<[llvm_anyfloat_ty],
  288. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  289. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  290. >;
  291. def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
  292. Intrinsic<[llvm_float_ty],
  293. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  294. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  295. >;
  296. def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
  297. Intrinsic<[llvm_float_ty],
  298. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  299. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  300. >;
  301. def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
  302. Intrinsic<[llvm_float_ty],
  303. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  304. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  305. >;
  306. def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
  307. Intrinsic<[llvm_float_ty],
  308. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  309. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  310. >;
  311. // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
  312. // should be used.
  313. def int_amdgcn_sffbh :
  314. Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
  315. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  316. >;
  317. // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
  318. def int_amdgcn_fmad_ftz :
  319. Intrinsic<[llvm_anyfloat_ty],
  320. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  321. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  322. >;
  323. // Fields should mirror atomicrmw
  324. class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
  325. [llvm_anyptr_ty,
  326. LLVMMatchType<0>,
  327. llvm_i32_ty, // ordering
  328. llvm_i32_ty, // scope
  329. llvm_i1_ty], // isVolatile
  330. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
  331. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "",
  332. [SDNPMemOperand]
  333. >;
  334. def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
  335. def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
  336. class AMDGPULDSIntrin :
  337. Intrinsic<[llvm_any_ty],
  338. [LLVMQualPointerType<LLVMMatchType<0>, 3>,
  339. LLVMMatchType<0>,
  340. llvm_i32_ty, // ordering
  341. llvm_i32_ty, // scope
  342. llvm_i1_ty], // isVolatile
  343. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
  344. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
  345. >;
  346. // FIXME: The m0 argument should be moved after the normal arguments
  347. class AMDGPUDSOrderedIntrinsic : Intrinsic<
  348. [llvm_i32_ty],
  349. // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
  350. // the bit packing can be optimized at the IR level.
  351. [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
  352. llvm_i32_ty, // value to add or swap
  353. llvm_i32_ty, // ordering
  354. llvm_i32_ty, // scope
  355. llvm_i1_ty, // isVolatile
  356. llvm_i32_ty, // ordered count index (OA index), also added to the address
  357. // gfx10: bits 24-27 indicate the number of active threads/dwords
  358. llvm_i1_ty, // wave release, usually set to 1
  359. llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
  360. [IntrWillReturn, NoCapture<ArgIndex<0>>,
  361. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
  362. ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>
  363. ]
  364. >;
  365. class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
  366. [llvm_i32_ty],
  367. [llvm_anyptr_ty, // LDS or GDS ptr
  368. llvm_i1_ty], // isVolatile
  369. [IntrConvergent, IntrWillReturn, IntrArgMemOnly,
  370. NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>],
  371. "",
  372. [SDNPMemOperand]
  373. >;
  374. def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
  375. def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
  376. // The pointer argument is assumed to be dynamically uniform if a VGPR.
  377. def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
  378. def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
  379. def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
  380. def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
  381. def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
  382. } // TargetPrefix = "amdgcn"
  383. // New-style image intrinsics
  384. //////////////////////////////////////////////////////////////////////////
  385. // Dimension-aware image intrinsics framework
  386. //////////////////////////////////////////////////////////////////////////
  387. // Helper class to represent (type, name) combinations of arguments. The
  388. // argument names are explanatory and used as DAG operand names for codegen
  389. // pattern matching.
  390. class AMDGPUArg<LLVMType ty, string name> {
  391. LLVMType Type = ty;
  392. string Name = name;
  393. }
  394. // Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...]
  395. class makeArgList<list<string> names, LLVMType basety> {
  396. list<AMDGPUArg> ret =
  397. !listconcat([AMDGPUArg<basety, names[0]>],
  398. !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>));
  399. }
  400. // Return arglist, with LLVMMatchType's references shifted by 'shift'.
  401. class arglistmatchshift<list<AMDGPUArg> arglist, int shift> {
  402. list<AMDGPUArg> ret =
  403. !foreach(arg, arglist,
  404. !if(!isa<LLVMMatchType>(arg.Type),
  405. AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>,
  406. arg.Name>,
  407. arg));
  408. }
  409. // Return the concatenation of the given arglists. LLVMMatchType's are adjusted
  410. // accordingly, and shifted by an additional 'shift'.
  411. class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
  412. list<AMDGPUArg> ret =
  413. !foldl([]<AMDGPUArg>, arglists, lhs, rhs,
  414. !listconcat(
  415. lhs,
  416. arglistmatchshift<rhs,
  417. !add(shift, !foldl(0, lhs, a, b,
  418. !add(a, b.Type.isAny)))>.ret));
  419. }
  420. // Represent texture/image types / dimensionality.
  421. class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
  422. list<string> coord_names, list<string> slice_names,
  423. bit msaa = 0> {
  424. AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
  425. string Name = name; // e.g. "2darraymsaa"
  426. string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
  427. bits<3> Encoding = enc;
  428. bit DA = 0; // DA bit in MIMG encoding
  429. bit MSAA = msaa;
  430. list<AMDGPUArg> CoordSliceArgs =
  431. makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
  432. list<AMDGPUArg> CoordSliceIntArgs =
  433. makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret;
  434. list<AMDGPUArg> GradientArgs =
  435. makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
  436. !foreach(name, coord_names, "d" # name # "dv")),
  437. llvm_anyfloat_ty>.ret;
  438. bits<8> NumCoords = !size(CoordSliceArgs);
  439. bits<8> NumGradients = !size(GradientArgs);
  440. }
  441. def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
  442. def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
  443. def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
  444. let DA = 1 in {
  445. def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
  446. def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
  447. def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
  448. }
  449. def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"], 1>;
  450. let DA = 1 in {
  451. def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"], 1>;
  452. }
  453. def AMDGPUDims {
  454. list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D,
  455. AMDGPUDimCube, AMDGPUDim1DArray,
  456. AMDGPUDim2DArray];
  457. list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa];
  458. list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa);
  459. }
  460. // Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof.
  461. class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> {
  462. string UpperCaseMod = ucmod;
  463. string LowerCaseMod = lcmod;
  464. // {offset} {bias} {z-compare}
  465. list<AMDGPUArg> ExtraAddrArgs = extra_addr;
  466. bit Offset = false;
  467. bit Bias = false;
  468. bit ZCompare = false;
  469. bit Gradients = false;
  470. // Name of the {lod} or {clamp} argument that is appended to the coordinates,
  471. // if any.
  472. string LodOrClamp = "";
  473. }
  474. // AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
  475. // AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4
  476. defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
  477. multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
  478. list<AMDGPUArg> extra_addr> {
  479. def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
  480. let Offset = true in
  481. def NAME#lcmod#_o : AMDGPUSampleVariant<
  482. ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
  483. }
  484. multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
  485. list<AMDGPUArg> extra_addr> {
  486. defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
  487. let ZCompare = true in
  488. defm NAME : AMDGPUSampleHelper_Offset<
  489. "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
  490. }
  491. multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod,
  492. list<AMDGPUArg> extra_addr> {
  493. defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>;
  494. let LodOrClamp = "clamp" in
  495. defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>;
  496. }
  497. defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
  498. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
  499. let Bias = true in
  500. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
  501. "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
  502. let LodOrClamp = "lod" in
  503. defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>;
  504. defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
  505. }
  506. let Gradients = true in {
  507. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
  508. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
  509. }
  510. }
  511. // Helper class to capture the profile of a dimension-aware image intrinsic.
  512. // This information is used to generate the intrinsic's type and to inform
  513. // codegen pattern matching.
  514. class AMDGPUDimProfile<string opmod,
  515. AMDGPUDimProps dim> {
  516. AMDGPUDimProps Dim = dim;
  517. string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
  518. // These are intended to be overwritten by subclasses
  519. bit IsSample = false;
  520. bit IsAtomic = false;
  521. list<LLVMType> RetTypes = [];
  522. list<AMDGPUArg> DataArgs = [];
  523. list<AMDGPUArg> ExtraAddrArgs = [];
  524. bit Offset = false;
  525. bit Bias = false;
  526. bit ZCompare = false;
  527. bit Gradients = false;
  528. string LodClampMip = "";
  529. int NumRetAndDataAnyTypes =
  530. !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b,
  531. !add(a, b.isAny));
  532. list<AMDGPUArg> AddrArgs =
  533. arglistconcat<[ExtraAddrArgs,
  534. !if(Gradients, dim.GradientArgs, []),
  535. !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
  536. !if(!empty(LodClampMip),
  537. []<AMDGPUArg>,
  538. [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
  539. NumRetAndDataAnyTypes>.ret;
  540. list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type);
  541. list<AMDGPUArg> AddrDefaultArgs =
  542. !foreach(arg, AddrArgs,
  543. AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
  544. !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type),
  545. arg.Name>);
  546. list<AMDGPUArg> AddrA16Args =
  547. !foreach(arg, AddrArgs,
  548. AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
  549. !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type),
  550. arg.Name>);
  551. }
  552. class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> {
  553. let IsSample = base.IsSample;
  554. let IsAtomic = base.IsAtomic;
  555. let RetTypes = base.RetTypes;
  556. let DataArgs = base.DataArgs;
  557. let ExtraAddrArgs = base.ExtraAddrArgs;
  558. let Offset = base.Offset;
  559. let Bias = base.Bias;
  560. let ZCompare = base.ZCompare;
  561. let Gradients = base.Gradients;
  562. let LodClampMip = base.LodClampMip;
  563. }
  564. class AMDGPUDimSampleProfile<string opmod,
  565. AMDGPUDimProps dim,
  566. AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
  567. let IsSample = true;
  568. let RetTypes = [llvm_any_ty];
  569. let ExtraAddrArgs = sample.ExtraAddrArgs;
  570. let Offset = sample.Offset;
  571. let Bias = sample.Bias;
  572. let ZCompare = sample.ZCompare;
  573. let Gradients = sample.Gradients;
  574. let LodClampMip = sample.LodOrClamp;
  575. }
  576. class AMDGPUDimNoSampleProfile<string opmod,
  577. AMDGPUDimProps dim,
  578. list<LLVMType> retty,
  579. list<AMDGPUArg> dataargs,
  580. bit Mip = false> : AMDGPUDimProfile<opmod, dim> {
  581. let RetTypes = retty;
  582. let DataArgs = dataargs;
  583. let LodClampMip = !if(Mip, "mip", "");
  584. }
  585. class AMDGPUDimAtomicProfile<string opmod,
  586. AMDGPUDimProps dim,
  587. list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
  588. let RetTypes = [llvm_anyint_ty];
  589. let DataArgs = dataargs;
  590. let IsAtomic = true;
  591. }
  592. class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
  593. list<AMDGPUArg> dataargs>
  594. : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
  595. let RetTypes = [llvm_anyfloat_ty];
  596. }
  597. class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
  598. : AMDGPUDimProfile<"GET_RESINFO", dim> {
  599. let RetTypes = [llvm_anyfloat_ty];
  600. let DataArgs = [];
  601. let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
  602. let LodClampMip = "mip";
  603. }
  604. // Helper class for figuring out image intrinsic argument indexes.
  605. class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
  606. int NumDataArgs = !size(P_.DataArgs);
  607. int NumDmaskArgs = !not(P_.IsAtomic);
  608. int NumOffsetArgs = !if(P_.Offset, 1, 0);
  609. int NumBiasArgs = !if(P_.Bias, 1, 0);
  610. int NumZCompareArgs = !if(P_.ZCompare, 1, 0);
  611. int NumExtraAddrArgs = !add(NumOffsetArgs, NumBiasArgs, NumZCompareArgs);
  612. int NumVAddrArgs = !size(P_.AddrArgs);
  613. int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0);
  614. int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs));
  615. int NumRSrcArgs = 1;
  616. int NumSampArgs = !if(P_.IsSample, 2, 0);
  617. int DmaskArgIndex = NumDataArgs;
  618. int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs);
  619. int OffsetArgIndex = VAddrArgIndex;
  620. int BiasArgIndex = !add(VAddrArgIndex, NumOffsetArgs);
  621. int ZCompareArgIndex = !add(BiasArgIndex, NumBiasArgs);
  622. int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs);
  623. int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs);
  624. int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
  625. int MipArgIndex = LodArgIndex;
  626. int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs);
  627. int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs);
  628. int UnormArgIndex = !add(SampArgIndex, 1);
  629. int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs);
  630. int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
  631. }
  632. // All dimension-aware intrinsics are derived from this class.
  633. class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
  634. list<IntrinsicProperty> props,
  635. list<SDNodeProperty> sdnodeprops> : Intrinsic<
  636. P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return
  637. !listconcat(
  638. !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic
  639. !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm)
  640. P_.AddrTypes, // vaddr(VGPR)
  641. [llvm_v8i32_ty], // rsrc(SGPR)
  642. !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
  643. llvm_i1_ty], []), // unorm(imm)
  644. [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
  645. llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
  646. !listconcat(props,
  647. !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
  648. !if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []),
  649. [IntrWillReturn],
  650. [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>,
  651. ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]),
  652. "", sdnodeprops>,
  653. AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
  654. !if(P_.IsAtomic, 0, 1)), 1> {
  655. AMDGPUDimProfile P = P_;
  656. AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);
  657. let TargetPrefix = "amdgcn";
  658. }
  659. // Marker class for intrinsics with a DMask that determines the returned
  660. // channels.
  661. class AMDGPUImageDMaskIntrinsic;
  662. defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
  663. //////////////////////////////////////////////////////////////////////////
  664. // Load and store intrinsics
  665. //////////////////////////////////////////////////////////////////////////
  666. multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
  667. list<LLVMType> retty,
  668. list<AMDGPUArg> dataargs,
  669. list<IntrinsicProperty> props,
  670. list<SDNodeProperty> sdnodeprops,
  671. bit Mip = false> {
  672. foreach dim = AMDGPUDims.NoMsaa in {
  673. def !strconcat(NAME, "_", dim.Name)
  674. : AMDGPUImageDimIntrinsic<
  675. AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
  676. props, sdnodeprops>;
  677. }
  678. }
  679. multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
  680. list<LLVMType> retty,
  681. list<AMDGPUArg> dataargs,
  682. list<IntrinsicProperty> props,
  683. list<SDNodeProperty> sdnodeprops,
  684. bit Mip = false> {
  685. foreach dim = AMDGPUDims.All in {
  686. def !strconcat(NAME, "_", dim.Name)
  687. : AMDGPUImageDimIntrinsic<
  688. AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
  689. props, sdnodeprops>;
  690. }
  691. }
  692. defm int_amdgcn_image_load
  693. : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem],
  694. [SDNPMemOperand]>,
  695. AMDGPUImageDMaskIntrinsic;
  696. defm int_amdgcn_image_load_mip
  697. : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [],
  698. [IntrReadMem, IntrWillReturn], [SDNPMemOperand], 1>,
  699. AMDGPUImageDMaskIntrinsic;
  700. defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
  701. "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
  702. [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
  703. defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
  704. "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
  705. [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
  706. //////////////////////////////////////////////////////////////////////////
  707. // MSAA intrinsics
  708. //////////////////////////////////////////////////////////////////////////
  709. foreach dim = AMDGPUDims.Msaa in {
  710. def int_amdgcn_image_msaa_load_x # _ # dim.Name:
  711. AMDGPUImageDimIntrinsic<
  712. AMDGPUDimNoSampleProfile<"MSAA_LOAD_X", dim, [llvm_any_ty], []>,
  713. [IntrReadMem], [SDNPMemOperand]>;
  714. }
  715. //////////////////////////////////////////////////////////////////////////
  716. // sample and getlod intrinsics
  717. //////////////////////////////////////////////////////////////////////////
  718. multiclass AMDGPUImageDimSampleDims<string opmod,
  719. AMDGPUSampleVariant sample,
  720. bit NoMem = false> {
  721. foreach dim = AMDGPUDims.NoMsaa in {
  722. def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
  723. AMDGPUDimSampleProfile<opmod, dim, sample>,
  724. !if(NoMem, [IntrNoMem], [IntrReadMem]),
  725. !if(NoMem, [], [SDNPMemOperand])>;
  726. }
  727. }
  728. foreach sample = AMDGPUSampleVariants in {
  729. defm int_amdgcn_image_sample # sample.LowerCaseMod
  730. : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
  731. AMDGPUImageDMaskIntrinsic;
  732. }
  733. defm int_amdgcn_image_getlod
  734. : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
  735. AMDGPUImageDMaskIntrinsic;
  736. //////////////////////////////////////////////////////////////////////////
  737. // getresinfo intrinsics
  738. //////////////////////////////////////////////////////////////////////////
  739. foreach dim = AMDGPUDims.All in {
  740. def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
  741. : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
  742. AMDGPUImageDMaskIntrinsic;
  743. }
  744. //////////////////////////////////////////////////////////////////////////
  745. // gather4 intrinsics
  746. //////////////////////////////////////////////////////////////////////////
  747. foreach sample = AMDGPUSampleVariantsNoGradients in {
  748. foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
  749. def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
  750. AMDGPUImageDimIntrinsic<
  751. AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>,
  752. [IntrReadMem], [SDNPMemOperand]>;
  753. }
  754. }
  755. }
  756. //////////////////////////////////////////////////////////////////////////
  757. // atomic intrinsics
  758. //////////////////////////////////////////////////////////////////////////
  759. defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
  760. multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
  761. int isFloat = 0> {
  762. foreach dim = AMDGPUDims.All in {
  763. def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
  764. !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
  765. AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
  766. [], [SDNPMemOperand]>;
  767. }
  768. }
  769. multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
  770. defm ""
  771. : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
  772. isFloat>;
  773. }
  774. multiclass AMDGPUImageDimFloatAtomic<string opmod> {
  775. defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
  776. }
  777. defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
  778. defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
  779. defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
  780. defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
  781. defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
  782. defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">;
  783. defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
  784. defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
  785. defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">;
  786. defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
  787. defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
  788. defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
  789. defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
  790. defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
  791. defm int_amdgcn_image_atomic_cmpswap :
  792. AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
  793. AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
  794. }
  795. //////////////////////////////////////////////////////////////////////////
  796. // Buffer intrinsics
  797. //////////////////////////////////////////////////////////////////////////
  798. let TargetPrefix = "amdgcn" in {
  799. defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
  800. class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  801. [data_ty],
  802. [llvm_v4i32_ty, // rsrc(SGPR)
  803. llvm_i32_ty, // vindex(VGPR)
  804. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  805. llvm_i1_ty, // glc(imm)
  806. llvm_i1_ty], // slc(imm)
  807. [IntrReadMem, IntrWillReturn,
  808. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  809. AMDGPURsrcIntrinsic<0>;
  810. def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
  811. def int_amdgcn_buffer_load : AMDGPUBufferLoad;
  812. def int_amdgcn_s_buffer_load : Intrinsic <
  813. [llvm_any_ty],
  814. [llvm_v4i32_ty, // rsrc(SGPR)
  815. llvm_i32_ty, // byte offset(SGPR/imm)
  816. llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
  817. [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>,
  818. AMDGPURsrcIntrinsic<0>;
  819. class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  820. [],
  821. [data_ty, // vdata(VGPR)
  822. llvm_v4i32_ty, // rsrc(SGPR)
  823. llvm_i32_ty, // vindex(VGPR)
  824. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  825. llvm_i1_ty, // glc(imm)
  826. llvm_i1_ty], // slc(imm)
  827. [IntrWriteMem, IntrWillReturn,
  828. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  829. AMDGPURsrcIntrinsic<1>;
  830. def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>;
  831. def int_amdgcn_buffer_store : AMDGPUBufferStore;
  832. // New buffer intrinsics with separate raw and struct variants. The raw
  833. // variant never has an index. The struct variant always has an index, even if
  834. // it is const 0. A struct intrinsic with constant 0 index is different to the
  835. // corresponding raw intrinsic on gfx9+ because the behavior of bound checking
  836. // and swizzling changes depending on whether idxen is set in the instruction.
  837. // These new instrinsics also keep the offset and soffset arguments separate as
  838. // they behave differently in bounds checking and swizzling.
  839. class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  840. [data_ty],
  841. [llvm_v4i32_ty, // rsrc(SGPR)
  842. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  843. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  844. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  845. // bit 1 = slc,
  846. // bit 2 = dlc on gfx10+),
  847. // swizzled buffer (bit 3 = swz))
  848. [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
  849. AMDGPURsrcIntrinsic<0>;
  850. def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
  851. def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
  852. class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  853. [data_ty],
  854. [llvm_v4i32_ty, // rsrc(SGPR)
  855. llvm_i32_ty, // vindex(VGPR)
  856. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  857. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  858. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  859. // bit 1 = slc,
  860. // bit 2 = dlc on gfx10+),
  861. // swizzled buffer (bit 3 = swz))
  862. [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  863. AMDGPURsrcIntrinsic<0>;
  864. def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
  865. def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
  866. class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  867. [],
  868. [data_ty, // vdata(VGPR)
  869. llvm_v4i32_ty, // rsrc(SGPR)
  870. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  871. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  872. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  873. // bit 1 = slc,
  874. // bit 2 = dlc on gfx10+),
  875. // swizzled buffer (bit 3 = swz))
  876. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  877. AMDGPURsrcIntrinsic<1>;
  878. def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
  879. def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
  880. class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  881. [],
  882. [data_ty, // vdata(VGPR)
  883. llvm_v4i32_ty, // rsrc(SGPR)
  884. llvm_i32_ty, // vindex(VGPR)
  885. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  886. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  887. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  888. // bit 1 = slc,
  889. // bit 2 = dlc on gfx10+),
  890. // swizzled buffer (bit 3 = swz))
  891. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  892. AMDGPURsrcIntrinsic<1>;
  893. def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
  894. def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
  895. class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
  896. !if(NoRtn, [], [data_ty]),
  897. [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
  898. llvm_v4i32_ty, // rsrc(SGPR)
  899. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  900. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  901. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  902. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  903. AMDGPURsrcIntrinsic<1, 0>;
  904. def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
  905. def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
  906. def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
  907. def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
  908. def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
  909. def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
  910. def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
  911. def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
  912. def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
  913. def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
  914. def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
  915. def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
  916. def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
  917. def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
  918. def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
  919. [llvm_anyint_ty],
  920. [LLVMMatchType<0>, // src(VGPR)
  921. LLVMMatchType<0>, // cmp(VGPR)
  922. llvm_v4i32_ty, // rsrc(SGPR)
  923. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  924. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  925. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  926. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  927. AMDGPURsrcIntrinsic<2, 0>;
  928. // gfx908 intrinsic
  929. def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
  930. class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
  931. !if(NoRtn, [], [data_ty]),
  932. [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
  933. llvm_v4i32_ty, // rsrc(SGPR)
  934. llvm_i32_ty, // vindex(VGPR)
  935. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  936. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  937. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  938. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  939. AMDGPURsrcIntrinsic<1, 0>;
  940. def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
  941. def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
  942. def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic;
  943. def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic;
  944. def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic;
  945. def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic;
  946. def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
  947. def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
  948. def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
  949. def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
  950. def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
  951. def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
  952. def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
  953. [llvm_anyint_ty],
  954. [LLVMMatchType<0>, // src(VGPR)
  955. LLVMMatchType<0>, // cmp(VGPR)
  956. llvm_v4i32_ty, // rsrc(SGPR)
  957. llvm_i32_ty, // vindex(VGPR)
  958. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  959. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  960. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  961. [ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>,
  962. AMDGPURsrcIntrinsic<2, 0>;
  963. // gfx908 intrinsic
  964. def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
  965. // gfx90a intrinsics
  966. def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
  967. def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
  968. // Obsolescent tbuffer intrinsics.
  969. def int_amdgcn_tbuffer_load : Intrinsic <
  970. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  971. [llvm_v4i32_ty, // rsrc(SGPR)
  972. llvm_i32_ty, // vindex(VGPR)
  973. llvm_i32_ty, // voffset(VGPR)
  974. llvm_i32_ty, // soffset(SGPR)
  975. llvm_i32_ty, // offset(imm)
  976. llvm_i32_ty, // dfmt(imm)
  977. llvm_i32_ty, // nfmt(imm)
  978. llvm_i1_ty, // glc(imm)
  979. llvm_i1_ty], // slc(imm)
  980. [IntrReadMem, IntrWillReturn,
  981. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
  982. ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>], "", [SDNPMemOperand]>,
  983. AMDGPURsrcIntrinsic<0>;
  984. def int_amdgcn_tbuffer_store : Intrinsic <
  985. [],
  986. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  987. llvm_v4i32_ty, // rsrc(SGPR)
  988. llvm_i32_ty, // vindex(VGPR)
  989. llvm_i32_ty, // voffset(VGPR)
  990. llvm_i32_ty, // soffset(SGPR)
  991. llvm_i32_ty, // offset(imm)
  992. llvm_i32_ty, // dfmt(imm)
  993. llvm_i32_ty, // nfmt(imm)
  994. llvm_i1_ty, // glc(imm)
  995. llvm_i1_ty], // slc(imm)
  996. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>,
  997. ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
  998. ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<9>>], "", [SDNPMemOperand]>,
  999. AMDGPURsrcIntrinsic<1>;
  1000. // New tbuffer intrinsics, with:
  1001. // - raw and struct variants
  1002. // - joint format field
  1003. // - joint cachepolicy field
  1004. def int_amdgcn_raw_tbuffer_load : Intrinsic <
  1005. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  1006. [llvm_v4i32_ty, // rsrc(SGPR)
  1007. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  1008. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  1009. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  1010. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  1011. // bit 1 = slc,
  1012. // bit 2 = dlc on gfx10+),
  1013. // swizzled buffer (bit 3 = swz))
  1014. [IntrReadMem, IntrWillReturn,
  1015. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  1016. AMDGPURsrcIntrinsic<0>;
  1017. def int_amdgcn_raw_tbuffer_store : Intrinsic <
  1018. [],
  1019. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  1020. llvm_v4i32_ty, // rsrc(SGPR)
  1021. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  1022. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  1023. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  1024. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  1025. // bit 1 = slc,
  1026. // bit 2 = dlc on gfx10+),
  1027. // swizzled buffer (bit 3 = swz))
  1028. [IntrWriteMem, IntrWillReturn,
  1029. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  1030. AMDGPURsrcIntrinsic<1>;
  1031. def int_amdgcn_struct_tbuffer_load : Intrinsic <
  1032. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  1033. [llvm_v4i32_ty, // rsrc(SGPR)
  1034. llvm_i32_ty, // vindex(VGPR)
  1035. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  1036. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  1037. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  1038. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  1039. // bit 1 = slc,
  1040. // bit 2 = dlc on gfx10+),
  1041. // swizzled buffer (bit 3 = swz))
  1042. [IntrReadMem, IntrWillReturn,
  1043. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  1044. AMDGPURsrcIntrinsic<0>;
  1045. def int_amdgcn_struct_tbuffer_store : Intrinsic <
  1046. [],
  1047. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  1048. llvm_v4i32_ty, // rsrc(SGPR)
  1049. llvm_i32_ty, // vindex(VGPR)
  1050. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  1051. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  1052. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  1053. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  1054. // bit 1 = slc,
  1055. // bit 2 = dlc on gfx10+),
  1056. // swizzled buffer (bit 3 = swz))
  1057. [IntrWriteMem, IntrWillReturn,
  1058. ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
  1059. AMDGPURsrcIntrinsic<1>;
  1060. class AMDGPUBufferAtomic : Intrinsic <
  1061. [llvm_anyint_ty],
  1062. [LLVMMatchType<0>, // vdata(VGPR)
  1063. llvm_v4i32_ty, // rsrc(SGPR)
  1064. llvm_i32_ty, // vindex(VGPR)
  1065. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1066. llvm_i1_ty], // slc(imm)
  1067. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1068. AMDGPURsrcIntrinsic<1, 0>;
  1069. def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
  1070. def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
  1071. def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
  1072. def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
  1073. def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
  1074. def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
  1075. def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
  1076. def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
  1077. def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
  1078. def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
  1079. def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
  1080. [llvm_i32_ty],
  1081. [llvm_i32_ty, // src(VGPR)
  1082. llvm_i32_ty, // cmp(VGPR)
  1083. llvm_v4i32_ty, // rsrc(SGPR)
  1084. llvm_i32_ty, // vindex(VGPR)
  1085. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1086. llvm_i1_ty], // slc(imm)
  1087. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1088. AMDGPURsrcIntrinsic<2, 0>;
  1089. def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
  1090. class AMDGPUBufferAtomicFP : Intrinsic <
  1091. [llvm_anyfloat_ty],
  1092. [LLVMMatchType<0>, // vdata(VGPR)
  1093. llvm_v4i32_ty, // rsrc(SGPR)
  1094. llvm_i32_ty, // vindex(VGPR)
  1095. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1096. llvm_i1_ty], // slc(imm)
  1097. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1098. AMDGPURsrcIntrinsic<1, 0>;
  1099. // Legacy form of the intrinsic. raw and struct forms should be preferred.
  1100. def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
  1101. } // defset AMDGPUBufferIntrinsics
  1102. // Uses that do not set the done bit should set IntrWriteMem on the
  1103. // call site.
  1104. def int_amdgcn_exp : Intrinsic <[], [
  1105. llvm_i32_ty, // tgt,
  1106. llvm_i32_ty, // en
  1107. llvm_any_ty, // src0 (f32 or i32)
  1108. LLVMMatchType<0>, // src1
  1109. LLVMMatchType<0>, // src2
  1110. LLVMMatchType<0>, // src3
  1111. llvm_i1_ty, // done
  1112. llvm_i1_ty // vm
  1113. ],
  1114. [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<6>>,
  1115. ImmArg<ArgIndex<7>>, IntrWriteMem, IntrInaccessibleMemOnly,
  1116. IntrWillReturn]
  1117. >;
  1118. // exp with compr bit set.
  1119. def int_amdgcn_exp_compr : Intrinsic <[], [
  1120. llvm_i32_ty, // tgt,
  1121. llvm_i32_ty, // en
  1122. llvm_anyvector_ty, // src0 (v2f16 or v2i16)
  1123. LLVMMatchType<0>, // src1
  1124. llvm_i1_ty, // done
  1125. llvm_i1_ty], // vm
  1126. [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>,
  1127. ImmArg<ArgIndex<5>>, IntrWriteMem, IntrInaccessibleMemOnly,
  1128. IntrWillReturn]
  1129. >;
  1130. def int_amdgcn_buffer_wbinvl1_sc :
  1131. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
  1132. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1133. def int_amdgcn_buffer_wbinvl1 :
  1134. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
  1135. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1136. def int_amdgcn_s_dcache_inv :
  1137. GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
  1138. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1139. def int_amdgcn_s_memtime :
  1140. GCCBuiltin<"__builtin_amdgcn_s_memtime">,
  1141. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1142. def int_amdgcn_s_sleep :
  1143. GCCBuiltin<"__builtin_amdgcn_s_sleep">,
  1144. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1145. IntrHasSideEffects, IntrWillReturn]> {
  1146. }
  1147. def int_amdgcn_s_incperflevel :
  1148. GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
  1149. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1150. IntrHasSideEffects, IntrWillReturn]> {
  1151. }
  1152. def int_amdgcn_s_decperflevel :
  1153. GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
  1154. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1155. IntrHasSideEffects, IntrWillReturn]> {
  1156. }
  1157. def int_amdgcn_s_sethalt :
  1158. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1159. IntrHasSideEffects, IntrWillReturn]>;
  1160. def int_amdgcn_s_getreg :
  1161. GCCBuiltin<"__builtin_amdgcn_s_getreg">,
  1162. Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  1163. [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable,
  1164. IntrWillReturn, ImmArg<ArgIndex<0>>]
  1165. >;
  1166. // Note this can be used to set FP environment properties that are
  1167. // unsafe to change in non-strictfp functions. The register properties
  1168. // available (and value required to access them) may differ per
  1169. // subtarget. llvm.amdgcn.s.setreg(hwmode, value)
  1170. def int_amdgcn_s_setreg :
  1171. GCCBuiltin<"__builtin_amdgcn_s_setreg">,
  1172. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
  1173. [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
  1174. >;
  1175. // int_amdgcn_s_getpc is provided to allow a specific style of position
  1176. // independent code to determine the high part of its address when it is
  1177. // known (through convention) that the code and any data of interest does
  1178. // not cross a 4Gb address boundary. Use for any other purpose may not
  1179. // produce the desired results as optimizations may cause code movement,
  1180. // especially as we explicitly use IntrNoMem to allow optimizations.
  1181. def int_amdgcn_s_getpc :
  1182. GCCBuiltin<"__builtin_amdgcn_s_getpc">,
  1183. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable,
  1184. IntrWillReturn]>;
  1185. // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
  1186. // param values: 0 = P10, 1 = P20, 2 = P0
  1187. def int_amdgcn_interp_mov :
  1188. GCCBuiltin<"__builtin_amdgcn_interp_mov">,
  1189. Intrinsic<[llvm_float_ty],
  1190. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1191. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1192. ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  1193. // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
  1194. // This intrinsic reads from lds, but the memory values are constant,
  1195. // so it behaves like IntrNoMem.
  1196. def int_amdgcn_interp_p1 :
  1197. GCCBuiltin<"__builtin_amdgcn_interp_p1">,
  1198. Intrinsic<[llvm_float_ty],
  1199. [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1200. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1201. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  1202. // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
  1203. def int_amdgcn_interp_p2 :
  1204. GCCBuiltin<"__builtin_amdgcn_interp_p2">,
  1205. Intrinsic<[llvm_float_ty],
  1206. [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1207. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1208. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
  1209. // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
  1210. // __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
  1211. // high selects whether high or low 16-bits are loaded from LDS
  1212. def int_amdgcn_interp_p1_f16 :
  1213. GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
  1214. Intrinsic<[llvm_float_ty],
  1215. [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
  1216. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1217. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
  1218. // __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
  1219. // high selects whether high or low 16-bits are loaded from LDS
  1220. def int_amdgcn_interp_p2_f16 :
  1221. GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
  1222. Intrinsic<[llvm_half_ty],
  1223. [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
  1224. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1225. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  1226. // Deprecated: use llvm.amdgcn.live.mask instead.
  1227. def int_amdgcn_ps_live : Intrinsic <
  1228. [llvm_i1_ty],
  1229. [],
  1230. [IntrNoMem, IntrWillReturn]>;
  1231. // Query currently live lanes.
  1232. // Returns true if lane is live (and not a helper lane).
  1233. def int_amdgcn_live_mask : Intrinsic <[llvm_i1_ty],
  1234. [], [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]
  1235. >;
  1236. def int_amdgcn_mbcnt_lo :
  1237. GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
  1238. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1239. [IntrNoMem, IntrWillReturn]>;
  1240. def int_amdgcn_mbcnt_hi :
  1241. GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
  1242. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1243. [IntrNoMem, IntrWillReturn]>;
  1244. // llvm.amdgcn.ds.swizzle src offset
  1245. def int_amdgcn_ds_swizzle :
  1246. GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
  1247. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1248. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1249. ImmArg<ArgIndex<1>>]>;
  1250. def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
  1251. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
  1252. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1253. >;
  1254. def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
  1255. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
  1256. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1257. >;
  1258. def int_amdgcn_lerp :
  1259. GCCBuiltin<"__builtin_amdgcn_lerp">,
  1260. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1261. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1262. >;
  1263. def int_amdgcn_sad_u8 :
  1264. GCCBuiltin<"__builtin_amdgcn_sad_u8">,
  1265. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1266. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1267. >;
  1268. def int_amdgcn_msad_u8 :
  1269. GCCBuiltin<"__builtin_amdgcn_msad_u8">,
  1270. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1271. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1272. >;
  1273. def int_amdgcn_sad_hi_u8 :
  1274. GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
  1275. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1276. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1277. >;
  1278. def int_amdgcn_sad_u16 :
  1279. GCCBuiltin<"__builtin_amdgcn_sad_u16">,
  1280. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1281. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1282. >;
  1283. def int_amdgcn_qsad_pk_u16_u8 :
  1284. GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
  1285. Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
  1286. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1287. >;
  1288. def int_amdgcn_mqsad_pk_u16_u8 :
  1289. GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
  1290. Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
  1291. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1292. >;
  1293. def int_amdgcn_mqsad_u32_u8 :
  1294. GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
  1295. Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
  1296. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1297. >;
  1298. def int_amdgcn_cvt_pk_u8_f32 :
  1299. GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
  1300. Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
  1301. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1302. >;
  1303. def int_amdgcn_icmp :
  1304. Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
  1305. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1306. ImmArg<ArgIndex<2>>]>;
  1307. def int_amdgcn_fcmp :
  1308. Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
  1309. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1310. ImmArg<ArgIndex<2>>]>;
  1311. def int_amdgcn_ballot :
  1312. Intrinsic<[llvm_anyint_ty], [llvm_i1_ty],
  1313. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1314. def int_amdgcn_readfirstlane :
  1315. GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
  1316. Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  1317. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1318. // The lane argument must be uniform across the currently active threads of the
  1319. // current wave. Otherwise, the result is undefined.
  1320. def int_amdgcn_readlane :
  1321. GCCBuiltin<"__builtin_amdgcn_readlane">,
  1322. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1323. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1324. // The value to write and lane select arguments must be uniform across the
  1325. // currently active threads of the current wave. Otherwise, the result is
  1326. // undefined.
  1327. def int_amdgcn_writelane :
  1328. GCCBuiltin<"__builtin_amdgcn_writelane">,
  1329. Intrinsic<[llvm_i32_ty], [
  1330. llvm_i32_ty, // uniform value to write: returned by the selected lane
  1331. llvm_i32_ty, // uniform lane select
  1332. llvm_i32_ty // returned by all lanes other than the selected one
  1333. ],
  1334. [IntrNoMem, IntrConvergent, IntrWillReturn]
  1335. >;
  1336. def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
  1337. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1338. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1339. >;
  1340. def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
  1341. [llvm_i32_ty, llvm_i32_ty],
  1342. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1343. >;
  1344. def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
  1345. [llvm_i32_ty, llvm_i32_ty],
  1346. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1347. >;
  1348. def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty],
  1349. [llvm_i32_ty, llvm_i32_ty],
  1350. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1351. >;
  1352. def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
  1353. [llvm_i32_ty, llvm_i32_ty],
  1354. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1355. >;
  1356. // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
  1357. //
  1358. // bar_val is the total number of waves that will wait on this
  1359. // barrier, minus 1.
  1360. def int_amdgcn_ds_gws_init :
  1361. GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
  1362. Intrinsic<[],
  1363. [llvm_i32_ty, llvm_i32_ty],
  1364. [IntrConvergent, IntrWriteMem,
  1365. IntrInaccessibleMemOnly, IntrWillReturn], "",
  1366. [SDNPMemOperand]
  1367. >;
  1368. // llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
  1369. // bar_val is the total number of waves that will wait on this
  1370. // barrier, minus 1.
  1371. def int_amdgcn_ds_gws_barrier :
  1372. GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
  1373. Intrinsic<[],
  1374. [llvm_i32_ty, llvm_i32_ty],
  1375. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1376. [SDNPMemOperand]
  1377. >;
  1378. // llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
  1379. def int_amdgcn_ds_gws_sema_v :
  1380. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
  1381. Intrinsic<[],
  1382. [llvm_i32_ty],
  1383. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1384. [SDNPMemOperand]
  1385. >;
  1386. // llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
  1387. def int_amdgcn_ds_gws_sema_br :
  1388. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
  1389. Intrinsic<[],
  1390. [llvm_i32_ty, llvm_i32_ty],
  1391. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1392. [SDNPMemOperand]
  1393. >;
  1394. // llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
  1395. def int_amdgcn_ds_gws_sema_p :
  1396. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
  1397. Intrinsic<[],
  1398. [llvm_i32_ty],
  1399. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1400. [SDNPMemOperand]
  1401. >;
  1402. // llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
  1403. def int_amdgcn_ds_gws_sema_release_all :
  1404. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
  1405. Intrinsic<[],
  1406. [llvm_i32_ty],
  1407. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1408. [SDNPMemOperand]
  1409. >;
  1410. // Copies the source value to the destination value, with the guarantee that
  1411. // the source value is computed as if the entire program were executed in WQM.
  1412. def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
  1413. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1414. >;
  1415. // Copies the source value to the destination value, such that the source
  1416. // is computed as if the entire program were executed in WQM if any other
  1417. // program code executes in WQM.
  1418. def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty],
  1419. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1420. >;
  1421. // Return true if at least one thread within the pixel quad passes true into
  1422. // the function.
  1423. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
  1424. [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]
  1425. >;
  1426. // If false, set EXEC=0 for the current thread until the end of program.
  1427. // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
  1428. def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
  1429. def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
  1430. Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
  1431. >;
  1432. // If false, mark all active lanes as helper lanes until the end of program.
  1433. def int_amdgcn_wqm_demote : Intrinsic<[],
  1434. [llvm_i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]
  1435. >;
  1436. // Copies the active channels of the source value to the destination value,
  1437. // with the guarantee that the source value is computed as if the entire
  1438. // program were executed in Whole Wavefront Mode, i.e. with all channels
  1439. // enabled, with a few exceptions: - Phi nodes which require WWM return an
  1440. // undefined value.
  1441. def int_amdgcn_strict_wwm : Intrinsic<[llvm_any_ty],
  1442. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
  1443. IntrConvergent, IntrWillReturn]
  1444. >;
  1445. // Deprecated. Use int_amdgcn_strict_wwm instead.
  1446. def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
  1447. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
  1448. IntrConvergent, IntrWillReturn]
  1449. >;
  1450. def int_amdgcn_strict_wqm : Intrinsic<[llvm_any_ty],
  1451. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
  1452. IntrConvergent, IntrWillReturn]
  1453. >;
  1454. // Given a value, copies it while setting all the inactive lanes to a given
  1455. // value. Note that OpenGL helper lanes are considered active, so if the
  1456. // program ever uses WQM, then the instruction and the first source will be
  1457. // computed in WQM.
  1458. def int_amdgcn_set_inactive :
  1459. Intrinsic<[llvm_anyint_ty],
  1460. [LLVMMatchType<0>, // value to be copied
  1461. LLVMMatchType<0>], // value for the inactive lanes to take
  1462. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1463. // Return if the given flat pointer points to a local memory address.
  1464. def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">,
  1465. Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
  1466. [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
  1467. >;
  1468. // Return if the given flat pointer points to a prvate memory address.
  1469. def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
  1470. Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
  1471. [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
  1472. >;
  1473. //===----------------------------------------------------------------------===//
  1474. // CI+ Intrinsics
  1475. //===----------------------------------------------------------------------===//
  1476. def int_amdgcn_s_dcache_inv_vol :
  1477. GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
  1478. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1479. def int_amdgcn_buffer_wbinvl1_vol :
  1480. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
  1481. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1482. //===----------------------------------------------------------------------===//
  1483. // VI Intrinsics
  1484. //===----------------------------------------------------------------------===//
  1485. // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1486. def int_amdgcn_mov_dpp :
  1487. Intrinsic<[llvm_anyint_ty],
  1488. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1489. llvm_i1_ty],
  1490. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1491. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>,
  1492. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  1493. // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1494. // Should be equivalent to:
  1495. // v_mov_b32 <dest> <old>
  1496. // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1497. def int_amdgcn_update_dpp :
  1498. Intrinsic<[llvm_anyint_ty],
  1499. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
  1500. llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
  1501. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1502. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
  1503. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1504. def int_amdgcn_s_dcache_wb :
  1505. GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
  1506. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1507. def int_amdgcn_s_dcache_wb_vol :
  1508. GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
  1509. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1510. def int_amdgcn_s_memrealtime :
  1511. GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
  1512. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1513. // llvm.amdgcn.ds.permute <index> <src>
  1514. def int_amdgcn_ds_permute :
  1515. GCCBuiltin<"__builtin_amdgcn_ds_permute">,
  1516. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1517. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1518. // llvm.amdgcn.ds.bpermute <index> <src>
  1519. def int_amdgcn_ds_bpermute :
  1520. GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
  1521. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1522. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1523. // llvm.amdgcn.perm <src0> <src1> <selector>
  1524. def int_amdgcn_perm :
  1525. GCCBuiltin<"__builtin_amdgcn_perm">,
  1526. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1527. [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  1528. //===----------------------------------------------------------------------===//
  1529. // GFX10 Intrinsics
  1530. //===----------------------------------------------------------------------===//
  1531. // llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
  1532. def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
  1533. Intrinsic<[llvm_i32_ty],
  1534. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
  1535. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1536. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1537. // llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
  1538. def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
  1539. Intrinsic<[llvm_i32_ty],
  1540. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
  1541. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1542. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1543. // llvm.amdgcn.mov.dpp8.i32 <src> <sel>
  1544. // <sel> is a 32-bit constant whose high 8 bits must be zero which selects
  1545. // the lanes to read from.
  1546. def int_amdgcn_mov_dpp8 :
  1547. Intrinsic<[llvm_anyint_ty],
  1548. [LLVMMatchType<0>, llvm_i32_ty],
  1549. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1550. ImmArg<ArgIndex<1>>]>;
  1551. def int_amdgcn_s_get_waveid_in_workgroup :
  1552. GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
  1553. Intrinsic<[llvm_i32_ty], [],
  1554. [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>;
  1555. class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
  1556. [vt],
  1557. [llvm_anyptr_ty, // vaddr
  1558. vt], // vdata(VGPR)
  1559. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
  1560. [SDNPMemOperand]>;
  1561. def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
  1562. // uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
  1563. // <ray_dir>, <ray_inv_dir>, <texture_descr>
  1564. // <node_ptr> is i32 or i64.
  1565. // <ray_dir> and <ray_inv_dir> are both v3f16 or both v3f32.
  1566. def int_amdgcn_image_bvh_intersect_ray :
  1567. Intrinsic<[llvm_v4i32_ty],
  1568. [llvm_anyint_ty, llvm_float_ty, llvm_v3f32_ty, llvm_anyvector_ty,
  1569. LLVMMatchType<1>, llvm_v4i32_ty],
  1570. [IntrReadMem, IntrWillReturn]>;
  1571. //===----------------------------------------------------------------------===//
  1572. // Deep learning intrinsics.
  1573. //===----------------------------------------------------------------------===//
  1574. // f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
  1575. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1576. def int_amdgcn_fdot2 :
  1577. GCCBuiltin<"__builtin_amdgcn_fdot2">,
  1578. Intrinsic<
  1579. [llvm_float_ty], // %r
  1580. [
  1581. llvm_v2f16_ty, // %a
  1582. llvm_v2f16_ty, // %b
  1583. llvm_float_ty, // %c
  1584. llvm_i1_ty // %clamp
  1585. ],
  1586. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1587. >;
  1588. // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
  1589. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1590. def int_amdgcn_sdot2 :
  1591. GCCBuiltin<"__builtin_amdgcn_sdot2">,
  1592. Intrinsic<
  1593. [llvm_i32_ty], // %r
  1594. [
  1595. llvm_v2i16_ty, // %a
  1596. llvm_v2i16_ty, // %b
  1597. llvm_i32_ty, // %c
  1598. llvm_i1_ty // %clamp
  1599. ],
  1600. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1601. >;
  1602. // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
  1603. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1604. def int_amdgcn_udot2 :
  1605. GCCBuiltin<"__builtin_amdgcn_udot2">,
  1606. Intrinsic<
  1607. [llvm_i32_ty], // %r
  1608. [
  1609. llvm_v2i16_ty, // %a
  1610. llvm_v2i16_ty, // %b
  1611. llvm_i32_ty, // %c
  1612. llvm_i1_ty // %clamp
  1613. ],
  1614. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1615. >;
  1616. // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
  1617. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
  1618. def int_amdgcn_sdot4 :
  1619. GCCBuiltin<"__builtin_amdgcn_sdot4">,
  1620. Intrinsic<
  1621. [llvm_i32_ty], // %r
  1622. [
  1623. llvm_i32_ty, // %a
  1624. llvm_i32_ty, // %b
  1625. llvm_i32_ty, // %c
  1626. llvm_i1_ty // %clamp
  1627. ],
  1628. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1629. >;
  1630. // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
  1631. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
  1632. def int_amdgcn_udot4 :
  1633. GCCBuiltin<"__builtin_amdgcn_udot4">,
  1634. Intrinsic<
  1635. [llvm_i32_ty], // %r
  1636. [
  1637. llvm_i32_ty, // %a
  1638. llvm_i32_ty, // %b
  1639. llvm_i32_ty, // %c
  1640. llvm_i1_ty // %clamp
  1641. ],
  1642. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1643. >;
  1644. // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
  1645. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
  1646. // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
  1647. def int_amdgcn_sdot8 :
  1648. GCCBuiltin<"__builtin_amdgcn_sdot8">,
  1649. Intrinsic<
  1650. [llvm_i32_ty], // %r
  1651. [
  1652. llvm_i32_ty, // %a
  1653. llvm_i32_ty, // %b
  1654. llvm_i32_ty, // %c
  1655. llvm_i1_ty // %clamp
  1656. ],
  1657. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1658. >;
  1659. // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
  1660. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
  1661. // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
  1662. def int_amdgcn_udot8 :
  1663. GCCBuiltin<"__builtin_amdgcn_udot8">,
  1664. Intrinsic<
  1665. [llvm_i32_ty], // %r
  1666. [
  1667. llvm_i32_ty, // %a
  1668. llvm_i32_ty, // %b
  1669. llvm_i32_ty, // %c
  1670. llvm_i1_ty // %clamp
  1671. ],
  1672. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1673. >;
  1674. //===----------------------------------------------------------------------===//
  1675. // gfx908 intrinsics
  1676. // ===----------------------------------------------------------------------===//
  1677. def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1678. // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
  1679. class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
  1680. GCCBuiltin<!subst("int", "__builtin", NAME)>,
  1681. Intrinsic<[DestTy],
  1682. [SrcABTy, SrcABTy, DestTy,
  1683. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1684. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1685. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1686. def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_float_ty>;
  1687. def int_amdgcn_mfma_f32_16x16x1f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
  1688. def int_amdgcn_mfma_f32_4x4x1f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
  1689. def int_amdgcn_mfma_f32_32x32x2f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
  1690. def int_amdgcn_mfma_f32_16x16x4f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
  1691. def int_amdgcn_mfma_f32_32x32x4f16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4f16_ty>;
  1692. def int_amdgcn_mfma_f32_16x16x4f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
  1693. def int_amdgcn_mfma_f32_4x4x4f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
  1694. def int_amdgcn_mfma_f32_32x32x8f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
  1695. def int_amdgcn_mfma_f32_16x16x16f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
  1696. def int_amdgcn_mfma_i32_32x32x4i8 : AMDGPUMfmaIntrinsic<llvm_v32i32_ty, llvm_i32_ty>;
  1697. def int_amdgcn_mfma_i32_16x16x4i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
  1698. def int_amdgcn_mfma_i32_4x4x4i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
  1699. def int_amdgcn_mfma_i32_32x32x8i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
  1700. def int_amdgcn_mfma_i32_16x16x16i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
  1701. def int_amdgcn_mfma_f32_32x32x2bf16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v2i16_ty>;
  1702. def int_amdgcn_mfma_f32_16x16x2bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
  1703. def int_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
  1704. def int_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
  1705. def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
  1706. //===----------------------------------------------------------------------===//
  1707. // gfx90a intrinsics
  1708. // ===----------------------------------------------------------------------===//
  1709. def int_amdgcn_global_atomic_fmin : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1710. def int_amdgcn_global_atomic_fmax : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1711. def int_amdgcn_flat_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1712. def int_amdgcn_flat_atomic_fmin : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1713. def int_amdgcn_flat_atomic_fmax : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1714. def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>;
  1715. def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
  1716. def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
  1717. def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
  1718. def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
  1719. def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
  1720. def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
  1721. //===----------------------------------------------------------------------===//
  1722. // Special Intrinsics for backend internal use only. No frontend
  1723. // should emit calls to these.
  1724. // ===----------------------------------------------------------------------===//
  1725. def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
  1726. [llvm_i1_ty], [IntrConvergent, IntrWillReturn]
  1727. >;
  1728. def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
  1729. [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
  1730. >;
  1731. def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
  1732. [llvm_i1_ty, LLVMMatchType<0>],
  1733. [IntrNoMem, IntrConvergent, IntrWillReturn]
  1734. >;
  1735. def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
  1736. [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
  1737. >;
  1738. def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty],
  1739. [IntrConvergent, IntrWillReturn]>;
  1740. // Represent unreachable in a divergent region.
  1741. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
  1742. // Emit 2.5 ulp, no denormal division. Should only be inserted by
  1743. // pass based on !fpmath metadata.
  1744. def int_amdgcn_fdiv_fast : Intrinsic<
  1745. [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
  1746. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1747. >;
  1748. // Represent a relocation constant.
  1749. def int_amdgcn_reloc_constant : Intrinsic<
  1750. [llvm_i32_ty], [llvm_metadata_ty],
  1751. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1752. >;
  1753. }