IntrinsicsAMDGPU.td 82 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027
  1. //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines all of the R600-specific intrinsics.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. class AMDGPUReadPreloadRegisterIntrinsic
  13. : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  14. class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
  15. : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, GCCBuiltin<name>;
  16. // Used to tag image and resource intrinsics with information used to generate
  17. // mem operands.
  18. class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = false> {
  19. int RsrcArg = rsrcarg;
  20. bit IsImage = isimage;
  21. }
  22. let TargetPrefix = "r600" in {
  23. multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
  24. def _x : AMDGPUReadPreloadRegisterIntrinsic;
  25. def _y : AMDGPUReadPreloadRegisterIntrinsic;
  26. def _z : AMDGPUReadPreloadRegisterIntrinsic;
  27. }
  28. multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
  29. def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
  30. def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
  31. def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
  32. }
  33. defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  34. <"__builtin_r600_read_global_size">;
  35. defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  36. <"__builtin_r600_read_ngroups">;
  37. defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  38. <"__builtin_r600_read_tgid">;
  39. defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  40. defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  41. def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
  42. Intrinsic<[], [], [IntrConvergent, IntrWillReturn]>;
  43. // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
  44. def int_r600_implicitarg_ptr :
  45. GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
  46. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
  47. [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  48. def int_r600_rat_store_typed :
  49. // 1st parameter: Data
  50. // 2nd parameter: Index
  51. // 3rd parameter: Constant RAT ID
  52. Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>,
  53. GCCBuiltin<"__builtin_r600_rat_store_typed">;
  54. def int_r600_recipsqrt_ieee : Intrinsic<
  55. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  56. >;
  57. def int_r600_recipsqrt_clamped : Intrinsic<
  58. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  59. >;
  60. def int_r600_cube : Intrinsic<
  61. [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  62. >;
  63. def int_r600_store_stream_output : Intrinsic<
  64. [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
  65. >;
  66. class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
  67. llvm_v4f32_ty, // Coord
  68. llvm_i32_ty, // offset_x
  69. llvm_i32_ty, // offset_y,
  70. llvm_i32_ty, // offset_z,
  71. llvm_i32_ty, // resource_id
  72. llvm_i32_ty, // samplerid
  73. llvm_i32_ty, // coord_type_x
  74. llvm_i32_ty, // coord_type_y
  75. llvm_i32_ty, // coord_type_z
  76. llvm_i32_ty], // coord_type_w
  77. [IntrNoMem, IntrWillReturn]
  78. >;
  79. class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
  80. llvm_v4i32_ty, // Coord
  81. llvm_i32_ty, // offset_x
  82. llvm_i32_ty, // offset_y,
  83. llvm_i32_ty, // offset_z,
  84. llvm_i32_ty, // resource_id
  85. llvm_i32_ty, // samplerid
  86. llvm_i32_ty, // coord_type_x
  87. llvm_i32_ty, // coord_type_y
  88. llvm_i32_ty, // coord_type_z
  89. llvm_i32_ty], // coord_type_w
  90. [IntrNoMem, IntrWillReturn]
  91. >;
  92. def int_r600_store_swizzle :
  93. Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWillReturn]
  94. >;
  95. def int_r600_tex : TextureIntrinsicFloatInput;
  96. def int_r600_texc : TextureIntrinsicFloatInput;
  97. def int_r600_txl : TextureIntrinsicFloatInput;
  98. def int_r600_txlc : TextureIntrinsicFloatInput;
  99. def int_r600_txb : TextureIntrinsicFloatInput;
  100. def int_r600_txbc : TextureIntrinsicFloatInput;
  101. def int_r600_txf : TextureIntrinsicInt32Input;
  102. def int_r600_txq : TextureIntrinsicInt32Input;
  103. def int_r600_ddx : TextureIntrinsicFloatInput;
  104. def int_r600_ddy : TextureIntrinsicFloatInput;
  105. def int_r600_dot4 : Intrinsic<[llvm_float_ty],
  106. [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  107. >;
  108. def int_r600_kill : Intrinsic<[], [llvm_float_ty], [IntrWillReturn]>;
  109. } // End TargetPrefix = "r600"
  110. let TargetPrefix = "amdgcn" in {
  111. //===----------------------------------------------------------------------===//
  112. // ABI Special Intrinsics
  113. //===----------------------------------------------------------------------===//
  114. defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
  115. defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
  116. <"__builtin_amdgcn_workgroup_id">;
  117. def int_amdgcn_dispatch_ptr :
  118. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  119. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  120. def int_amdgcn_queue_ptr :
  121. GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
  122. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  123. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  124. def int_amdgcn_kernarg_segment_ptr :
  125. GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
  126. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  127. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  128. def int_amdgcn_implicitarg_ptr :
  129. GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
  130. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  131. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  132. def int_amdgcn_groupstaticsize :
  133. GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
  134. Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  135. def int_amdgcn_dispatch_id :
  136. GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
  137. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  138. def int_amdgcn_implicit_buffer_ptr :
  139. GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
  140. Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
  141. [Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  142. // Set EXEC to the 64-bit value given.
  143. // This is always moved to the beginning of the basic block.
  144. // FIXME: Should be mangled for wave size.
  145. def int_amdgcn_init_exec : Intrinsic<[],
  146. [llvm_i64_ty], // 64-bit literal constant
  147. [IntrConvergent, ImmArg<ArgIndex<0>>]>;
  148. // Set EXEC according to a thread count packed in an SGPR input:
  149. // thread_count = (input >> bitoffset) & 0x7f;
  150. // This is always moved to the beginning of the basic block.
  151. // Note: only inreg arguments to the parent function are valid as
  152. // inputs to this intrinsic, computed values cannot be used.
  153. def int_amdgcn_init_exec_from_input : Intrinsic<[],
  154. [llvm_i32_ty, // 32-bit SGPR input
  155. llvm_i32_ty], // bit offset of the thread count
  156. [IntrConvergent, ImmArg<ArgIndex<1>>]>;
  157. def int_amdgcn_wavefrontsize :
  158. GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
  159. Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  160. //===----------------------------------------------------------------------===//
  161. // Instruction Intrinsics
  162. //===----------------------------------------------------------------------===//
  163. // The first parameter is s_sendmsg immediate (i16),
  164. // the second one is copied to m0
  165. def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
  166. Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
  167. [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
  168. def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
  169. Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
  170. [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
  171. def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
  172. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
  173. def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
  174. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
  175. def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
  176. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  177. def int_amdgcn_div_scale : Intrinsic<
  178. // 1st parameter: Numerator
  179. // 2nd parameter: Denominator
  180. // 3rd parameter: Select quotient. Must equal Numerator or Denominator.
  181. // (0 = Denominator, 1 = Numerator).
  182. [llvm_anyfloat_ty, llvm_i1_ty],
  183. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
  184. [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, IntrWillReturn]
  185. >;
  186. def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
  187. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
  188. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  189. >;
  190. def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
  191. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  192. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  193. >;
  194. // Look Up 2.0 / pi src0 with segment select src1[4:0]
  195. def int_amdgcn_trig_preop : Intrinsic<
  196. [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
  197. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  198. >;
  199. def int_amdgcn_sin : Intrinsic<
  200. [llvm_anyfloat_ty], [LLVMMatchType<0>],
  201. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  202. >;
  203. def int_amdgcn_cos : Intrinsic<
  204. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  205. >;
  206. def int_amdgcn_log_clamp : Intrinsic<
  207. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  208. >;
  209. def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
  210. Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
  211. [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
  212. >;
  213. // Fused single-precision multiply-add with legacy behaviour for the multiply,
  214. // which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
  215. // intended for use on subtargets that have the v_fma_legacy_f32 and/or
  216. // v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
  217. // has a completely different kind of legacy behaviour.)
  218. def int_amdgcn_fma_legacy :
  219. Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  220. [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
  221. >;
  222. def int_amdgcn_rcp : Intrinsic<
  223. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  224. >;
  225. def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
  226. Intrinsic<[llvm_float_ty], [llvm_float_ty],
  227. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  228. >;
  229. def int_amdgcn_sqrt : Intrinsic<
  230. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  231. >;
  232. def int_amdgcn_rsq : Intrinsic<
  233. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  234. >;
  235. def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
  236. Intrinsic<
  237. [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  238. >;
  239. // out = 1.0 / sqrt(a) result clamped to +/- max_float.
  240. def int_amdgcn_rsq_clamp : Intrinsic<
  241. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
  242. def int_amdgcn_ldexp : Intrinsic<
  243. [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
  244. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  245. >;
  246. def int_amdgcn_frexp_mant : Intrinsic<
  247. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  248. >;
  249. def int_amdgcn_frexp_exp : Intrinsic<
  250. [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  251. >;
  252. // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
  253. // and always uses rtz, so is not suitable for implementing the OpenCL
  254. // fract function. It should be ok on VI.
  255. def int_amdgcn_fract : Intrinsic<
  256. [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  257. >;
  258. def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
  259. Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
  260. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  261. >;
  262. def int_amdgcn_cvt_pknorm_i16 :
  263. GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
  264. Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  265. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  266. >;
  267. def int_amdgcn_cvt_pknorm_u16 :
  268. GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
  269. Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  270. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  271. >;
  272. def int_amdgcn_cvt_pk_i16 :
  273. GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
  274. Intrinsic<
  275. [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  276. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  277. >;
  278. def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
  279. Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  280. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  281. >;
  282. def int_amdgcn_class : Intrinsic<
  283. [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
  284. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  285. >;
  286. def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
  287. Intrinsic<[llvm_anyfloat_ty],
  288. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  289. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  290. >;
  291. def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
  292. Intrinsic<[llvm_float_ty],
  293. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  294. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  295. >;
  296. def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
  297. Intrinsic<[llvm_float_ty],
  298. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  299. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  300. >;
  301. def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
  302. Intrinsic<[llvm_float_ty],
  303. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  304. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  305. >;
  306. def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
  307. Intrinsic<[llvm_float_ty],
  308. [llvm_float_ty, llvm_float_ty, llvm_float_ty],
  309. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  310. >;
  311. // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
  312. // should be used.
  313. def int_amdgcn_sffbh :
  314. Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
  315. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  316. >;
  317. // v_mad_f32|f16/v_mac_f32|f16, selected regardless of denorm support.
  318. def int_amdgcn_fmad_ftz :
  319. Intrinsic<[llvm_anyfloat_ty],
  320. [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
  321. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  322. >;
  323. // Fields should mirror atomicrmw
  324. class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
  325. [llvm_anyptr_ty,
  326. LLVMMatchType<0>,
  327. llvm_i32_ty, // ordering
  328. llvm_i32_ty, // scope
  329. llvm_i1_ty], // isVolatile
  330. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
  331. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "",
  332. [SDNPMemOperand]
  333. >;
  334. def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
  335. def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
  336. class AMDGPULDSIntrin :
  337. Intrinsic<[llvm_any_ty],
  338. [LLVMQualPointerType<LLVMMatchType<0>, 3>,
  339. LLVMMatchType<0>,
  340. llvm_i32_ty, // ordering
  341. llvm_i32_ty, // scope
  342. llvm_i1_ty], // isVolatile
  343. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
  344. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
  345. >;
  346. // FIXME: The m0 argument should be moved after the normal arguments
  347. class AMDGPUDSOrderedIntrinsic : Intrinsic<
  348. [llvm_i32_ty],
  349. // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
  350. // the bit packing can be optimized at the IR level.
  351. [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
  352. llvm_i32_ty, // value to add or swap
  353. llvm_i32_ty, // ordering
  354. llvm_i32_ty, // scope
  355. llvm_i1_ty, // isVolatile
  356. llvm_i32_ty, // ordered count index (OA index), also added to the address
  357. // gfx10: bits 24-27 indicate the number of active threads/dwords
  358. llvm_i1_ty, // wave release, usually set to 1
  359. llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
  360. [IntrWillReturn, NoCapture<ArgIndex<0>>,
  361. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
  362. ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>
  363. ]
  364. >;
  365. class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
  366. [llvm_i32_ty],
  367. [llvm_anyptr_ty, // LDS or GDS ptr
  368. llvm_i1_ty], // isVolatile
  369. [IntrConvergent, IntrWillReturn, IntrArgMemOnly,
  370. NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>],
  371. "",
  372. [SDNPMemOperand]
  373. >;
  374. def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
  375. def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
  376. // The pointer argument is assumed to be dynamically uniform if a VGPR.
  377. def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
  378. def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
  379. def int_amdgcn_ds_fadd : AMDGPULDSIntrin;
  380. def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
  381. def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
  382. } // TargetPrefix = "amdgcn"
  383. // New-style image intrinsics
  384. //////////////////////////////////////////////////////////////////////////
  385. // Dimension-aware image intrinsics framework
  386. //////////////////////////////////////////////////////////////////////////
  387. // Helper class to represent (type, name) combinations of arguments. The
  388. // argument names are explanatory and used as DAG operand names for codegen
  389. // pattern matching.
  390. class AMDGPUArg<LLVMType ty, string name> {
  391. LLVMType Type = ty;
  392. string Name = name;
  393. }
  394. // Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...]
  395. class makeArgList<list<string> names, LLVMType basety> {
  396. list<AMDGPUArg> ret =
  397. !listconcat([AMDGPUArg<basety, names[0]>],
  398. !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>));
  399. }
  400. // Return arglist, with LLVMMatchType's references shifted by 'shift'.
  401. class arglistmatchshift<list<AMDGPUArg> arglist, int shift> {
  402. list<AMDGPUArg> ret =
  403. !foreach(arg, arglist,
  404. !if(!isa<LLVMMatchType>(arg.Type),
  405. AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>,
  406. arg.Name>,
  407. arg));
  408. }
  409. // Return the concatenation of the given arglists. LLVMMatchType's are adjusted
  410. // accordingly, and shifted by an additional 'shift'.
  411. class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
  412. list<AMDGPUArg> ret =
  413. !foldl([]<AMDGPUArg>, arglists, lhs, rhs,
  414. !listconcat(
  415. lhs,
  416. arglistmatchshift<rhs,
  417. !add(shift, !foldl(0, lhs, a, b,
  418. !add(a, b.Type.isAny)))>.ret));
  419. }
  420. // Represent texture/image types / dimensionality.
  421. class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
  422. list<string> coord_names, list<string> slice_names> {
  423. AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
  424. string Name = name; // e.g. "2darraymsaa"
  425. string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
  426. bits<3> Encoding = enc;
  427. bit DA = 0; // DA bit in MIMG encoding
  428. list<AMDGPUArg> CoordSliceArgs =
  429. makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
  430. list<AMDGPUArg> CoordSliceIntArgs =
  431. makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret;
  432. list<AMDGPUArg> GradientArgs =
  433. makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
  434. !foreach(name, coord_names, "d" # name # "dv")),
  435. llvm_anyfloat_ty>.ret;
  436. bits<8> NumCoords = !size(CoordSliceArgs);
  437. bits<8> NumGradients = !size(GradientArgs);
  438. }
  439. def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
  440. def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
  441. def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
  442. let DA = 1 in {
  443. def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
  444. def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
  445. def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
  446. }
  447. def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
  448. let DA = 1 in {
  449. def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
  450. }
  451. def AMDGPUDims {
  452. list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D,
  453. AMDGPUDimCube, AMDGPUDim1DArray,
  454. AMDGPUDim2DArray];
  455. list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa];
  456. list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa);
  457. }
  458. // Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof.
  459. class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> {
  460. string UpperCaseMod = ucmod;
  461. string LowerCaseMod = lcmod;
  462. // {offset} {bias} {z-compare}
  463. list<AMDGPUArg> ExtraAddrArgs = extra_addr;
  464. bit Gradients = false;
  465. // Name of the {lod} or {clamp} argument that is appended to the coordinates,
  466. // if any.
  467. string LodOrClamp = "";
  468. }
  469. // AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
  470. // AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4
  471. defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
  472. multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
  473. list<AMDGPUArg> extra_addr> {
  474. def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
  475. def NAME#lcmod#_o : AMDGPUSampleVariant<
  476. ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
  477. }
  478. multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
  479. list<AMDGPUArg> extra_addr> {
  480. defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
  481. defm NAME : AMDGPUSampleHelper_Offset<
  482. "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
  483. }
  484. multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod,
  485. list<AMDGPUArg> extra_addr> {
  486. defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>;
  487. let LodOrClamp = "clamp" in
  488. defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>;
  489. }
  490. defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
  491. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
  492. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
  493. "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
  494. let LodOrClamp = "lod" in
  495. defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>;
  496. defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
  497. }
  498. let Gradients = true in {
  499. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
  500. defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
  501. }
  502. }
  503. // Helper class to capture the profile of a dimension-aware image intrinsic.
  504. // This information is used to generate the intrinsic's type and to inform
  505. // codegen pattern matching.
  506. class AMDGPUDimProfile<string opmod,
  507. AMDGPUDimProps dim> {
  508. AMDGPUDimProps Dim = dim;
  509. string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
  510. // These are intended to be overwritten by subclasses
  511. bit IsSample = false;
  512. bit IsAtomic = false;
  513. list<LLVMType> RetTypes = [];
  514. list<AMDGPUArg> DataArgs = [];
  515. list<AMDGPUArg> ExtraAddrArgs = [];
  516. bit Gradients = false;
  517. string LodClampMip = "";
  518. int NumRetAndDataAnyTypes =
  519. !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b,
  520. !add(a, b.isAny));
  521. list<AMDGPUArg> AddrArgs =
  522. arglistconcat<[ExtraAddrArgs,
  523. !if(Gradients, dim.GradientArgs, []),
  524. !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
  525. !if(!empty(LodClampMip),
  526. []<AMDGPUArg>,
  527. [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
  528. NumRetAndDataAnyTypes>.ret;
  529. list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type);
  530. list<AMDGPUArg> AddrDefaultArgs =
  531. !foreach(arg, AddrArgs,
  532. AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
  533. !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type),
  534. arg.Name>);
  535. list<AMDGPUArg> AddrA16Args =
  536. !foreach(arg, AddrArgs,
  537. AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
  538. !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type),
  539. arg.Name>);
  540. }
  541. class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> {
  542. let IsSample = base.IsSample;
  543. let IsAtomic = base.IsAtomic;
  544. let RetTypes = base.RetTypes;
  545. let DataArgs = base.DataArgs;
  546. let ExtraAddrArgs = base.ExtraAddrArgs;
  547. let Gradients = base.Gradients;
  548. let LodClampMip = base.LodClampMip;
  549. }
  550. class AMDGPUDimSampleProfile<string opmod,
  551. AMDGPUDimProps dim,
  552. AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
  553. let IsSample = true;
  554. let RetTypes = [llvm_any_ty];
  555. let ExtraAddrArgs = sample.ExtraAddrArgs;
  556. let Gradients = sample.Gradients;
  557. let LodClampMip = sample.LodOrClamp;
  558. }
  559. class AMDGPUDimNoSampleProfile<string opmod,
  560. AMDGPUDimProps dim,
  561. list<LLVMType> retty,
  562. list<AMDGPUArg> dataargs,
  563. bit Mip = false> : AMDGPUDimProfile<opmod, dim> {
  564. let RetTypes = retty;
  565. let DataArgs = dataargs;
  566. let LodClampMip = !if(Mip, "mip", "");
  567. }
  568. class AMDGPUDimAtomicProfile<string opmod,
  569. AMDGPUDimProps dim,
  570. list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
  571. let RetTypes = [llvm_anyint_ty];
  572. let DataArgs = dataargs;
  573. let IsAtomic = true;
  574. }
  575. class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
  576. let RetTypes = [llvm_anyfloat_ty];
  577. let DataArgs = [];
  578. let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
  579. let LodClampMip = "mip";
  580. }
  581. // Helper class for figuring out image intrinsic argument indexes.
  582. class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
  583. int NumDataArgs = !size(P_.DataArgs);
  584. int NumDmaskArgs = !not(P_.IsAtomic);
  585. int NumExtraAddrArgs = !size(P_.ExtraAddrArgs);
  586. int NumVAddrArgs = !size(P_.AddrArgs);
  587. int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0);
  588. int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs));
  589. int NumRSrcArgs = 1;
  590. int NumSampArgs = !if(P_.IsSample, 2, 0);
  591. int DmaskArgIndex = NumDataArgs;
  592. int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs);
  593. int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs);
  594. int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs);
  595. int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
  596. int MipArgIndex = LodArgIndex;
  597. int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs);
  598. int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs);
  599. int UnormArgIndex = !add(SampArgIndex, 1);
  600. int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs);
  601. int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
  602. }
  603. // All dimension-aware intrinsics are derived from this class.
  604. class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
  605. list<IntrinsicProperty> props,
  606. list<SDNodeProperty> sdnodeprops> : Intrinsic<
  607. P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return
  608. !listconcat(
  609. !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic
  610. !if(P_.IsAtomic, [], [llvm_i32_ty]), // dmask(imm)
  611. P_.AddrTypes, // vaddr(VGPR)
  612. [llvm_v8i32_ty], // rsrc(SGPR)
  613. !if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
  614. llvm_i1_ty], []), // unorm(imm)
  615. [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
  616. llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
  617. !listconcat(props,
  618. !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
  619. !if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []),
  620. [IntrWillReturn],
  621. [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>,
  622. ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]),
  623. "", sdnodeprops>,
  624. AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
  625. !if(P_.IsAtomic, 0, 1)), 1> {
  626. AMDGPUDimProfile P = P_;
  627. AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);
  628. let TargetPrefix = "amdgcn";
  629. }
  630. // Marker class for intrinsics with a DMask that determines the returned
  631. // channels.
  632. class AMDGPUImageDMaskIntrinsic;
  633. defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
  634. //////////////////////////////////////////////////////////////////////////
  635. // Load and store intrinsics
  636. //////////////////////////////////////////////////////////////////////////
  637. multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
  638. list<LLVMType> retty,
  639. list<AMDGPUArg> dataargs,
  640. list<IntrinsicProperty> props,
  641. list<SDNodeProperty> sdnodeprops,
  642. bit Mip = false> {
  643. foreach dim = AMDGPUDims.NoMsaa in {
  644. def !strconcat(NAME, "_", dim.Name)
  645. : AMDGPUImageDimIntrinsic<
  646. AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
  647. props, sdnodeprops>;
  648. }
  649. }
  650. multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
  651. list<LLVMType> retty,
  652. list<AMDGPUArg> dataargs,
  653. list<IntrinsicProperty> props,
  654. list<SDNodeProperty> sdnodeprops,
  655. bit Mip = false> {
  656. foreach dim = AMDGPUDims.All in {
  657. def !strconcat(NAME, "_", dim.Name)
  658. : AMDGPUImageDimIntrinsic<
  659. AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
  660. props, sdnodeprops>;
  661. }
  662. }
  663. defm int_amdgcn_image_load
  664. : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem],
  665. [SDNPMemOperand]>,
  666. AMDGPUImageDMaskIntrinsic;
  667. defm int_amdgcn_image_load_mip
  668. : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [],
  669. [IntrReadMem, IntrWillReturn], [SDNPMemOperand], 1>,
  670. AMDGPUImageDMaskIntrinsic;
  671. defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
  672. "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
  673. [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
  674. defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
  675. "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
  676. [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
  677. defm int_amdgcn_image_msaa_load
  678. : AMDGPUImageDimIntrinsicsAll<"MSAA_LOAD", [llvm_any_ty], [], [IntrReadMem],
  679. [SDNPMemOperand]>,
  680. AMDGPUImageDMaskIntrinsic;
  681. //////////////////////////////////////////////////////////////////////////
  682. // sample and getlod intrinsics
  683. //////////////////////////////////////////////////////////////////////////
  684. multiclass AMDGPUImageDimSampleDims<string opmod,
  685. AMDGPUSampleVariant sample,
  686. bit NoMem = false> {
  687. foreach dim = AMDGPUDims.NoMsaa in {
  688. def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
  689. AMDGPUDimSampleProfile<opmod, dim, sample>,
  690. !if(NoMem, [IntrNoMem], [IntrReadMem]),
  691. !if(NoMem, [], [SDNPMemOperand])>;
  692. }
  693. }
  694. foreach sample = AMDGPUSampleVariants in {
  695. defm int_amdgcn_image_sample # sample.LowerCaseMod
  696. : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
  697. AMDGPUImageDMaskIntrinsic;
  698. }
  699. defm int_amdgcn_image_getlod
  700. : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
  701. AMDGPUImageDMaskIntrinsic;
  702. //////////////////////////////////////////////////////////////////////////
  703. // getresinfo intrinsics
  704. //////////////////////////////////////////////////////////////////////////
  705. foreach dim = AMDGPUDims.All in {
  706. def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
  707. : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
  708. AMDGPUImageDMaskIntrinsic;
  709. }
  710. //////////////////////////////////////////////////////////////////////////
  711. // gather4 intrinsics
  712. //////////////////////////////////////////////////////////////////////////
  713. foreach sample = AMDGPUSampleVariantsNoGradients in {
  714. foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
  715. def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
  716. AMDGPUImageDimIntrinsic<
  717. AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>,
  718. [IntrReadMem], [SDNPMemOperand]>;
  719. }
  720. }
  721. }
  722. //////////////////////////////////////////////////////////////////////////
  723. // atomic intrinsics
  724. //////////////////////////////////////////////////////////////////////////
  725. defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
  726. multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
  727. foreach dim = AMDGPUDims.All in {
  728. def !strconcat(NAME, "_", dim.Name)
  729. : AMDGPUImageDimIntrinsic<
  730. AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
  731. [], [SDNPMemOperand]>;
  732. }
  733. }
  734. multiclass AMDGPUImageDimAtomic<string opmod> {
  735. defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
  736. }
  737. defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
  738. defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
  739. defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
  740. defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
  741. defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
  742. defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
  743. defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
  744. defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
  745. defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
  746. defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
  747. defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
  748. defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
  749. defm int_amdgcn_image_atomic_cmpswap :
  750. AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
  751. AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
  752. }
  753. //////////////////////////////////////////////////////////////////////////
  754. // Buffer intrinsics
  755. //////////////////////////////////////////////////////////////////////////
  756. let TargetPrefix = "amdgcn" in {
  757. defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
  758. class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  759. [data_ty],
  760. [llvm_v4i32_ty, // rsrc(SGPR)
  761. llvm_i32_ty, // vindex(VGPR)
  762. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  763. llvm_i1_ty, // glc(imm)
  764. llvm_i1_ty], // slc(imm)
  765. [IntrReadMem, IntrWillReturn,
  766. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  767. AMDGPURsrcIntrinsic<0>;
  768. def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
  769. def int_amdgcn_buffer_load : AMDGPUBufferLoad;
  770. def int_amdgcn_s_buffer_load : Intrinsic <
  771. [llvm_any_ty],
  772. [llvm_v4i32_ty, // rsrc(SGPR)
  773. llvm_i32_ty, // byte offset(SGPR/imm)
  774. llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
  775. [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>,
  776. AMDGPURsrcIntrinsic<0>;
  777. class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  778. [],
  779. [data_ty, // vdata(VGPR)
  780. llvm_v4i32_ty, // rsrc(SGPR)
  781. llvm_i32_ty, // vindex(VGPR)
  782. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  783. llvm_i1_ty, // glc(imm)
  784. llvm_i1_ty], // slc(imm)
  785. [IntrWriteMem, IntrWillReturn,
  786. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  787. AMDGPURsrcIntrinsic<1>;
  788. def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>;
  789. def int_amdgcn_buffer_store : AMDGPUBufferStore;
  790. // New buffer intrinsics with separate raw and struct variants. The raw
  791. // variant never has an index. The struct variant always has an index, even if
  792. // it is const 0. A struct intrinsic with constant 0 index is different to the
  793. // corresponding raw intrinsic on gfx9+ because the behavior of bound checking
  794. // and swizzling changes depending on whether idxen is set in the instruction.
  795. // These new instrinsics also keep the offset and soffset arguments separate as
  796. // they behave differently in bounds checking and swizzling.
  797. class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  798. [data_ty],
  799. [llvm_v4i32_ty, // rsrc(SGPR)
  800. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  801. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  802. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  803. // bit 1 = slc,
  804. // bit 2 = dlc on gfx10+),
  805. // swizzled buffer (bit 3 = swz))
  806. [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
  807. AMDGPURsrcIntrinsic<0>;
  808. def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
  809. def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
  810. class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  811. [data_ty],
  812. [llvm_v4i32_ty, // rsrc(SGPR)
  813. llvm_i32_ty, // vindex(VGPR)
  814. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  815. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  816. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  817. // bit 1 = slc,
  818. // bit 2 = dlc on gfx10+),
  819. // swizzled buffer (bit 3 = swz))
  820. [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  821. AMDGPURsrcIntrinsic<0>;
  822. def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
  823. def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
  824. class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  825. [],
  826. [data_ty, // vdata(VGPR)
  827. llvm_v4i32_ty, // rsrc(SGPR)
  828. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  829. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  830. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  831. // bit 1 = slc,
  832. // bit 2 = dlc on gfx10+),
  833. // swizzled buffer (bit 3 = swz))
  834. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  835. AMDGPURsrcIntrinsic<1>;
  836. def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
  837. def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
  838. class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
  839. [],
  840. [data_ty, // vdata(VGPR)
  841. llvm_v4i32_ty, // rsrc(SGPR)
  842. llvm_i32_ty, // vindex(VGPR)
  843. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  844. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  845. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  846. // bit 1 = slc,
  847. // bit 2 = dlc on gfx10+),
  848. // swizzled buffer (bit 3 = swz))
  849. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  850. AMDGPURsrcIntrinsic<1>;
  851. def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
  852. def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
  853. class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
  854. !if(NoRtn, [], [data_ty]),
  855. [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
  856. llvm_v4i32_ty, // rsrc(SGPR)
  857. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  858. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  859. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  860. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  861. AMDGPURsrcIntrinsic<1, 0>;
  862. def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
  863. def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
  864. def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
  865. def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
  866. def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
  867. def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
  868. def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
  869. def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
  870. def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
  871. def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
  872. def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic;
  873. def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic;
  874. def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
  875. [llvm_anyint_ty],
  876. [LLVMMatchType<0>, // src(VGPR)
  877. LLVMMatchType<0>, // cmp(VGPR)
  878. llvm_v4i32_ty, // rsrc(SGPR)
  879. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  880. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  881. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  882. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  883. AMDGPURsrcIntrinsic<2, 0>;
  884. // gfx908 intrinsic
  885. def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
  886. class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
  887. !if(NoRtn, [], [data_ty]),
  888. [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
  889. llvm_v4i32_ty, // rsrc(SGPR)
  890. llvm_i32_ty, // vindex(VGPR)
  891. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  892. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  893. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  894. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  895. AMDGPURsrcIntrinsic<1, 0>;
  896. def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
  897. def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
  898. def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic;
  899. def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic;
  900. def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic;
  901. def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic;
  902. def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
  903. def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
  904. def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
  905. def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
  906. def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic;
  907. def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic;
  908. def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
  909. [llvm_anyint_ty],
  910. [LLVMMatchType<0>, // src(VGPR)
  911. LLVMMatchType<0>, // cmp(VGPR)
  912. llvm_v4i32_ty, // rsrc(SGPR)
  913. llvm_i32_ty, // vindex(VGPR)
  914. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  915. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  916. llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
  917. [ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>,
  918. AMDGPURsrcIntrinsic<2, 0>;
  919. // gfx908 intrinsic
  920. def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
  921. // Obsolescent tbuffer intrinsics.
  922. def int_amdgcn_tbuffer_load : Intrinsic <
  923. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  924. [llvm_v4i32_ty, // rsrc(SGPR)
  925. llvm_i32_ty, // vindex(VGPR)
  926. llvm_i32_ty, // voffset(VGPR)
  927. llvm_i32_ty, // soffset(SGPR)
  928. llvm_i32_ty, // offset(imm)
  929. llvm_i32_ty, // dfmt(imm)
  930. llvm_i32_ty, // nfmt(imm)
  931. llvm_i1_ty, // glc(imm)
  932. llvm_i1_ty], // slc(imm)
  933. [IntrReadMem, IntrWillReturn,
  934. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
  935. ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>], "", [SDNPMemOperand]>,
  936. AMDGPURsrcIntrinsic<0>;
  937. def int_amdgcn_tbuffer_store : Intrinsic <
  938. [],
  939. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  940. llvm_v4i32_ty, // rsrc(SGPR)
  941. llvm_i32_ty, // vindex(VGPR)
  942. llvm_i32_ty, // voffset(VGPR)
  943. llvm_i32_ty, // soffset(SGPR)
  944. llvm_i32_ty, // offset(imm)
  945. llvm_i32_ty, // dfmt(imm)
  946. llvm_i32_ty, // nfmt(imm)
  947. llvm_i1_ty, // glc(imm)
  948. llvm_i1_ty], // slc(imm)
  949. [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<5>>,
  950. ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
  951. ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<9>>], "", [SDNPMemOperand]>,
  952. AMDGPURsrcIntrinsic<1>;
  953. // New tbuffer intrinsics, with:
  954. // - raw and struct variants
  955. // - joint format field
  956. // - joint cachepolicy field
  957. def int_amdgcn_raw_tbuffer_load : Intrinsic <
  958. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  959. [llvm_v4i32_ty, // rsrc(SGPR)
  960. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  961. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  962. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  963. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  964. // bit 1 = slc,
  965. // bit 2 = dlc on gfx10+),
  966. // swizzled buffer (bit 3 = swz))
  967. [IntrReadMem, IntrWillReturn,
  968. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
  969. AMDGPURsrcIntrinsic<0>;
  970. def int_amdgcn_raw_tbuffer_store : Intrinsic <
  971. [],
  972. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  973. llvm_v4i32_ty, // rsrc(SGPR)
  974. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  975. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  976. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  977. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  978. // bit 1 = slc,
  979. // bit 2 = dlc on gfx10+),
  980. // swizzled buffer (bit 3 = swz))
  981. [IntrWriteMem, IntrWillReturn,
  982. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  983. AMDGPURsrcIntrinsic<1>;
  984. def int_amdgcn_struct_tbuffer_load : Intrinsic <
  985. [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  986. [llvm_v4i32_ty, // rsrc(SGPR)
  987. llvm_i32_ty, // vindex(VGPR)
  988. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  989. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  990. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  991. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  992. // bit 1 = slc,
  993. // bit 2 = dlc on gfx10+),
  994. // swizzled buffer (bit 3 = swz))
  995. [IntrReadMem, IntrWillReturn,
  996. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
  997. AMDGPURsrcIntrinsic<0>;
  998. def int_amdgcn_struct_tbuffer_store : Intrinsic <
  999. [],
  1000. [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
  1001. llvm_v4i32_ty, // rsrc(SGPR)
  1002. llvm_i32_ty, // vindex(VGPR)
  1003. llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
  1004. llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
  1005. llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
  1006. llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
  1007. // bit 1 = slc,
  1008. // bit 2 = dlc on gfx10+),
  1009. // swizzled buffer (bit 3 = swz))
  1010. [IntrWriteMem, IntrWillReturn,
  1011. ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
  1012. AMDGPURsrcIntrinsic<1>;
  1013. class AMDGPUBufferAtomic : Intrinsic <
  1014. [llvm_anyint_ty],
  1015. [LLVMMatchType<0>, // vdata(VGPR)
  1016. llvm_v4i32_ty, // rsrc(SGPR)
  1017. llvm_i32_ty, // vindex(VGPR)
  1018. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1019. llvm_i1_ty], // slc(imm)
  1020. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1021. AMDGPURsrcIntrinsic<1, 0>;
  1022. def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
  1023. def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
  1024. def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
  1025. def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
  1026. def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
  1027. def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
  1028. def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
  1029. def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
  1030. def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
  1031. def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
  1032. def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
  1033. [llvm_i32_ty],
  1034. [llvm_i32_ty, // src(VGPR)
  1035. llvm_i32_ty, // cmp(VGPR)
  1036. llvm_v4i32_ty, // rsrc(SGPR)
  1037. llvm_i32_ty, // vindex(VGPR)
  1038. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1039. llvm_i1_ty], // slc(imm)
  1040. [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1041. AMDGPURsrcIntrinsic<2, 0>;
  1042. def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
  1043. class AMDGPUBufferAtomicFP : Intrinsic <
  1044. [llvm_anyfloat_ty],
  1045. [LLVMMatchType<0>, // vdata(VGPR)
  1046. llvm_v4i32_ty, // rsrc(SGPR)
  1047. llvm_i32_ty, // vindex(VGPR)
  1048. llvm_i32_ty, // offset(SGPR/VGPR/imm)
  1049. llvm_i1_ty], // slc(imm)
  1050. [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>,
  1051. AMDGPURsrcIntrinsic<1, 0>;
  1052. // Legacy form of the intrinsic. raw and struct forms should be preferred.
  1053. def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
  1054. } // defset AMDGPUBufferIntrinsics
  1055. // Uses that do not set the done bit should set IntrWriteMem on the
  1056. // call site.
  1057. def int_amdgcn_exp : Intrinsic <[], [
  1058. llvm_i32_ty, // tgt,
  1059. llvm_i32_ty, // en
  1060. llvm_any_ty, // src0 (f32 or i32)
  1061. LLVMMatchType<0>, // src1
  1062. LLVMMatchType<0>, // src2
  1063. LLVMMatchType<0>, // src3
  1064. llvm_i1_ty, // done
  1065. llvm_i1_ty // vm
  1066. ],
  1067. [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<6>>,
  1068. ImmArg<ArgIndex<7>>, IntrWriteMem, IntrInaccessibleMemOnly,
  1069. IntrWillReturn]
  1070. >;
  1071. // exp with compr bit set.
  1072. def int_amdgcn_exp_compr : Intrinsic <[], [
  1073. llvm_i32_ty, // tgt,
  1074. llvm_i32_ty, // en
  1075. llvm_anyvector_ty, // src0 (v2f16 or v2i16)
  1076. LLVMMatchType<0>, // src1
  1077. llvm_i1_ty, // done
  1078. llvm_i1_ty], // vm
  1079. [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>,
  1080. ImmArg<ArgIndex<5>>, IntrWriteMem, IntrInaccessibleMemOnly,
  1081. IntrWillReturn]
  1082. >;
  1083. def int_amdgcn_buffer_wbinvl1_sc :
  1084. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
  1085. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1086. def int_amdgcn_buffer_wbinvl1 :
  1087. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
  1088. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1089. def int_amdgcn_s_dcache_inv :
  1090. GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
  1091. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1092. def int_amdgcn_s_memtime :
  1093. GCCBuiltin<"__builtin_amdgcn_s_memtime">,
  1094. Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
  1095. def int_amdgcn_s_sleep :
  1096. GCCBuiltin<"__builtin_amdgcn_s_sleep">,
  1097. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1098. IntrHasSideEffects, IntrWillReturn]> {
  1099. }
  1100. def int_amdgcn_s_incperflevel :
  1101. GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
  1102. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1103. IntrHasSideEffects, IntrWillReturn]> {
  1104. }
  1105. def int_amdgcn_s_decperflevel :
  1106. GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
  1107. Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
  1108. IntrHasSideEffects, IntrWillReturn]> {
  1109. }
  1110. def int_amdgcn_s_getreg :
  1111. GCCBuiltin<"__builtin_amdgcn_s_getreg">,
  1112. Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  1113. [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable,
  1114. IntrWillReturn, ImmArg<ArgIndex<0>>]
  1115. >;
  1116. // Note this can be used to set FP environment properties that are
  1117. // unsafe to change in non-strictfp functions. The register properties
  1118. // available (and value required to access them) may differ per
  1119. // subtarget. llvm.amdgcn.s.setreg(hwmode, value)
  1120. def int_amdgcn_s_setreg :
  1121. GCCBuiltin<"__builtin_amdgcn_s_setreg">,
  1122. Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
  1123. [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
  1124. >;
  1125. // int_amdgcn_s_getpc is provided to allow a specific style of position
  1126. // independent code to determine the high part of its address when it is
  1127. // known (through convention) that the code and any data of interest does
  1128. // not cross a 4Gb address boundary. Use for any other purpose may not
  1129. // produce the desired results as optimizations may cause code movement,
  1130. // especially as we explicitly use IntrNoMem to allow optimizations.
  1131. def int_amdgcn_s_getpc :
  1132. GCCBuiltin<"__builtin_amdgcn_s_getpc">,
  1133. Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable,
  1134. IntrWillReturn]>;
  1135. // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
  1136. // param values: 0 = P10, 1 = P20, 2 = P0
  1137. def int_amdgcn_interp_mov :
  1138. GCCBuiltin<"__builtin_amdgcn_interp_mov">,
  1139. Intrinsic<[llvm_float_ty],
  1140. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1141. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1142. ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  1143. // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
  1144. // This intrinsic reads from lds, but the memory values are constant,
  1145. // so it behaves like IntrNoMem.
  1146. def int_amdgcn_interp_p1 :
  1147. GCCBuiltin<"__builtin_amdgcn_interp_p1">,
  1148. Intrinsic<[llvm_float_ty],
  1149. [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1150. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1151. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
  1152. // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
  1153. def int_amdgcn_interp_p2 :
  1154. GCCBuiltin<"__builtin_amdgcn_interp_p2">,
  1155. Intrinsic<[llvm_float_ty],
  1156. [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1157. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1158. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
  1159. // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
  1160. // __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
  1161. // high selects whether high or low 16-bits are loaded from LDS
  1162. def int_amdgcn_interp_p1_f16 :
  1163. GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
  1164. Intrinsic<[llvm_float_ty],
  1165. [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
  1166. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1167. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
  1168. // __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
  1169. // high selects whether high or low 16-bits are loaded from LDS
  1170. def int_amdgcn_interp_p2_f16 :
  1171. GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
  1172. Intrinsic<[llvm_half_ty],
  1173. [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
  1174. [IntrNoMem, IntrSpeculatable, IntrWillReturn,
  1175. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  1176. // Pixel shaders only: whether the current pixel is live (i.e. not a helper
  1177. // invocation for derivative computation).
  1178. def int_amdgcn_ps_live : Intrinsic <
  1179. [llvm_i1_ty],
  1180. [],
  1181. [IntrNoMem, IntrWillReturn]>;
  1182. def int_amdgcn_mbcnt_lo :
  1183. GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
  1184. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1185. [IntrNoMem, IntrWillReturn]>;
  1186. def int_amdgcn_mbcnt_hi :
  1187. GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
  1188. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1189. [IntrNoMem, IntrWillReturn]>;
  1190. // llvm.amdgcn.ds.swizzle src offset
  1191. def int_amdgcn_ds_swizzle :
  1192. GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
  1193. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1194. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1195. ImmArg<ArgIndex<1>>]>;
  1196. def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
  1197. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
  1198. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1199. >;
  1200. def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
  1201. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
  1202. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1203. >;
  1204. def int_amdgcn_lerp :
  1205. GCCBuiltin<"__builtin_amdgcn_lerp">,
  1206. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1207. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1208. >;
  1209. def int_amdgcn_sad_u8 :
  1210. GCCBuiltin<"__builtin_amdgcn_sad_u8">,
  1211. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1212. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1213. >;
  1214. def int_amdgcn_msad_u8 :
  1215. GCCBuiltin<"__builtin_amdgcn_msad_u8">,
  1216. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1217. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1218. >;
  1219. def int_amdgcn_sad_hi_u8 :
  1220. GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
  1221. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1222. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1223. >;
  1224. def int_amdgcn_sad_u16 :
  1225. GCCBuiltin<"__builtin_amdgcn_sad_u16">,
  1226. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1227. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1228. >;
  1229. def int_amdgcn_qsad_pk_u16_u8 :
  1230. GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
  1231. Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
  1232. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1233. >;
  1234. def int_amdgcn_mqsad_pk_u16_u8 :
  1235. GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
  1236. Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
  1237. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1238. >;
  1239. def int_amdgcn_mqsad_u32_u8 :
  1240. GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
  1241. Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
  1242. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1243. >;
  1244. def int_amdgcn_cvt_pk_u8_f32 :
  1245. GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
  1246. Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
  1247. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1248. >;
  1249. def int_amdgcn_icmp :
  1250. Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
  1251. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1252. ImmArg<ArgIndex<2>>]>;
  1253. def int_amdgcn_fcmp :
  1254. Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
  1255. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1256. ImmArg<ArgIndex<2>>]>;
  1257. def int_amdgcn_ballot :
  1258. Intrinsic<[llvm_anyint_ty], [llvm_i1_ty],
  1259. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1260. def int_amdgcn_readfirstlane :
  1261. GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
  1262. Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
  1263. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1264. // The lane argument must be uniform across the currently active threads of the
  1265. // current wave. Otherwise, the result is undefined.
  1266. def int_amdgcn_readlane :
  1267. GCCBuiltin<"__builtin_amdgcn_readlane">,
  1268. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1269. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1270. // The value to write and lane select arguments must be uniform across the
  1271. // currently active threads of the current wave. Otherwise, the result is
  1272. // undefined.
  1273. def int_amdgcn_writelane :
  1274. GCCBuiltin<"__builtin_amdgcn_writelane">,
  1275. Intrinsic<[llvm_i32_ty], [
  1276. llvm_i32_ty, // uniform value to write: returned by the selected lane
  1277. llvm_i32_ty, // uniform lane select
  1278. llvm_i32_ty // returned by all lanes other than the selected one
  1279. ],
  1280. [IntrNoMem, IntrConvergent, IntrWillReturn]
  1281. >;
  1282. // FIXME: Deprecated. This is equivalent to llvm.fshr
  1283. def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
  1284. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1285. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1286. >;
  1287. def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
  1288. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1289. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1290. >;
  1291. def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
  1292. [llvm_i32_ty, llvm_i32_ty],
  1293. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1294. >;
  1295. def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
  1296. [llvm_i32_ty, llvm_i32_ty],
  1297. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1298. >;
  1299. // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
  1300. //
  1301. // bar_val is the total number of waves that will wait on this
  1302. // barrier, minus 1.
  1303. def int_amdgcn_ds_gws_init :
  1304. GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
  1305. Intrinsic<[],
  1306. [llvm_i32_ty, llvm_i32_ty],
  1307. [IntrConvergent, IntrWriteMem,
  1308. IntrInaccessibleMemOnly, IntrWillReturn], "",
  1309. [SDNPMemOperand]
  1310. >;
  1311. // llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
  1312. // bar_val is the total number of waves that will wait on this
  1313. // barrier, minus 1.
  1314. def int_amdgcn_ds_gws_barrier :
  1315. GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
  1316. Intrinsic<[],
  1317. [llvm_i32_ty, llvm_i32_ty],
  1318. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1319. [SDNPMemOperand]
  1320. >;
  1321. // llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
  1322. def int_amdgcn_ds_gws_sema_v :
  1323. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
  1324. Intrinsic<[],
  1325. [llvm_i32_ty],
  1326. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1327. [SDNPMemOperand]
  1328. >;
  1329. // llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
  1330. def int_amdgcn_ds_gws_sema_br :
  1331. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
  1332. Intrinsic<[],
  1333. [llvm_i32_ty, llvm_i32_ty],
  1334. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1335. [SDNPMemOperand]
  1336. >;
  1337. // llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
  1338. def int_amdgcn_ds_gws_sema_p :
  1339. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
  1340. Intrinsic<[],
  1341. [llvm_i32_ty],
  1342. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1343. [SDNPMemOperand]
  1344. >;
  1345. // llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
  1346. def int_amdgcn_ds_gws_sema_release_all :
  1347. GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
  1348. Intrinsic<[],
  1349. [llvm_i32_ty],
  1350. [IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
  1351. [SDNPMemOperand]
  1352. >;
  1353. // Copies the source value to the destination value, with the guarantee that
  1354. // the source value is computed as if the entire program were executed in WQM.
  1355. def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
  1356. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1357. >;
  1358. // Copies the source value to the destination value, such that the source
  1359. // is computed as if the entire program were executed in WQM if any other
  1360. // program code executes in WQM.
  1361. def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty],
  1362. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1363. >;
  1364. // Return true if at least one thread within the pixel quad passes true into
  1365. // the function.
  1366. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
  1367. [llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]
  1368. >;
  1369. // If false, set EXEC=0 for the current thread until the end of program.
  1370. // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
  1371. def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
  1372. def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
  1373. Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
  1374. >;
  1375. // Copies the active channels of the source value to the destination value,
  1376. // with the guarantee that the source value is computed as if the entire
  1377. // program were executed in Whole Wavefront Mode, i.e. with all channels
  1378. // enabled, with a few exceptions: - Phi nodes with require WWM return an
  1379. // undefined value.
  1380. def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
  1381. [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable,
  1382. IntrConvergent, IntrWillReturn]
  1383. >;
  1384. // Given a value, copies it while setting all the inactive lanes to a given
  1385. // value. Note that OpenGL helper lanes are considered active, so if the
  1386. // program ever uses WQM, then the instruction and the first source will be
  1387. // computed in WQM.
  1388. def int_amdgcn_set_inactive :
  1389. Intrinsic<[llvm_anyint_ty],
  1390. [LLVMMatchType<0>, // value to be copied
  1391. LLVMMatchType<0>], // value for the inactive lanes to take
  1392. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1393. // Return if the given flat pointer points to a local memory address.
  1394. def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">,
  1395. Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
  1396. [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
  1397. >;
  1398. // Return if the given flat pointer points to a prvate memory address.
  1399. def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
  1400. Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
  1401. [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
  1402. >;
  1403. //===----------------------------------------------------------------------===//
  1404. // CI+ Intrinsics
  1405. //===----------------------------------------------------------------------===//
  1406. def int_amdgcn_s_dcache_inv_vol :
  1407. GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
  1408. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1409. def int_amdgcn_buffer_wbinvl1_vol :
  1410. GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
  1411. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1412. //===----------------------------------------------------------------------===//
  1413. // VI Intrinsics
  1414. //===----------------------------------------------------------------------===//
  1415. // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1416. def int_amdgcn_mov_dpp :
  1417. Intrinsic<[llvm_anyint_ty],
  1418. [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
  1419. llvm_i1_ty],
  1420. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1421. ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>,
  1422. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
  1423. // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1424. // Should be equivalent to:
  1425. // v_mov_b32 <dest> <old>
  1426. // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
  1427. def int_amdgcn_update_dpp :
  1428. Intrinsic<[llvm_anyint_ty],
  1429. [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
  1430. llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
  1431. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1432. ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
  1433. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1434. def int_amdgcn_s_dcache_wb :
  1435. GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
  1436. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1437. def int_amdgcn_s_dcache_wb_vol :
  1438. GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
  1439. Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
  1440. def int_amdgcn_s_memrealtime :
  1441. GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
  1442. Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
  1443. // llvm.amdgcn.ds.permute <index> <src>
  1444. def int_amdgcn_ds_permute :
  1445. GCCBuiltin<"__builtin_amdgcn_ds_permute">,
  1446. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1447. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1448. // llvm.amdgcn.ds.bpermute <index> <src>
  1449. def int_amdgcn_ds_bpermute :
  1450. GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
  1451. Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
  1452. [IntrNoMem, IntrConvergent, IntrWillReturn]>;
  1453. //===----------------------------------------------------------------------===//
  1454. // GFX10 Intrinsics
  1455. //===----------------------------------------------------------------------===//
  1456. // llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
  1457. def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
  1458. Intrinsic<[llvm_i32_ty],
  1459. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
  1460. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1461. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1462. // llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
  1463. def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
  1464. Intrinsic<[llvm_i32_ty],
  1465. [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
  1466. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1467. ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1468. // llvm.amdgcn.mov.dpp8.i32 <src> <sel>
  1469. // <sel> is a 32-bit constant whose high 8 bits must be zero which selects
  1470. // the lanes to read from.
  1471. def int_amdgcn_mov_dpp8 :
  1472. Intrinsic<[llvm_anyint_ty],
  1473. [LLVMMatchType<0>, llvm_i32_ty],
  1474. [IntrNoMem, IntrConvergent, IntrWillReturn,
  1475. ImmArg<ArgIndex<1>>]>;
  1476. def int_amdgcn_s_get_waveid_in_workgroup :
  1477. GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
  1478. Intrinsic<[llvm_i32_ty], [],
  1479. [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>;
  1480. class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
  1481. [vt],
  1482. [llvm_anyptr_ty, // vaddr
  1483. vt], // vdata(VGPR)
  1484. [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "",
  1485. [SDNPMemOperand]>;
  1486. def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>;
  1487. // uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>,
  1488. // <ray_dir>, <ray_inv_dir>, <texture_descr>
  1489. def int_amdgcn_image_bvh_intersect_ray :
  1490. Intrinsic<[llvm_v4i32_ty],
  1491. [llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty,
  1492. LLVMMatchType<1>, llvm_v4i32_ty],
  1493. [IntrReadMem, IntrWillReturn]>;
  1494. //===----------------------------------------------------------------------===//
  1495. // Deep learning intrinsics.
  1496. //===----------------------------------------------------------------------===//
  1497. // f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
  1498. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1499. def int_amdgcn_fdot2 :
  1500. GCCBuiltin<"__builtin_amdgcn_fdot2">,
  1501. Intrinsic<
  1502. [llvm_float_ty], // %r
  1503. [
  1504. llvm_v2f16_ty, // %a
  1505. llvm_v2f16_ty, // %b
  1506. llvm_float_ty, // %c
  1507. llvm_i1_ty // %clamp
  1508. ],
  1509. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1510. >;
  1511. // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
  1512. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1513. def int_amdgcn_sdot2 :
  1514. GCCBuiltin<"__builtin_amdgcn_sdot2">,
  1515. Intrinsic<
  1516. [llvm_i32_ty], // %r
  1517. [
  1518. llvm_v2i16_ty, // %a
  1519. llvm_v2i16_ty, // %b
  1520. llvm_i32_ty, // %c
  1521. llvm_i1_ty // %clamp
  1522. ],
  1523. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1524. >;
  1525. // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
  1526. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
  1527. def int_amdgcn_udot2 :
  1528. GCCBuiltin<"__builtin_amdgcn_udot2">,
  1529. Intrinsic<
  1530. [llvm_i32_ty], // %r
  1531. [
  1532. llvm_v2i16_ty, // %a
  1533. llvm_v2i16_ty, // %b
  1534. llvm_i32_ty, // %c
  1535. llvm_i1_ty // %clamp
  1536. ],
  1537. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1538. >;
  1539. // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
  1540. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
  1541. def int_amdgcn_sdot4 :
  1542. GCCBuiltin<"__builtin_amdgcn_sdot4">,
  1543. Intrinsic<
  1544. [llvm_i32_ty], // %r
  1545. [
  1546. llvm_i32_ty, // %a
  1547. llvm_i32_ty, // %b
  1548. llvm_i32_ty, // %c
  1549. llvm_i1_ty // %clamp
  1550. ],
  1551. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1552. >;
  1553. // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
  1554. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
  1555. def int_amdgcn_udot4 :
  1556. GCCBuiltin<"__builtin_amdgcn_udot4">,
  1557. Intrinsic<
  1558. [llvm_i32_ty], // %r
  1559. [
  1560. llvm_i32_ty, // %a
  1561. llvm_i32_ty, // %b
  1562. llvm_i32_ty, // %c
  1563. llvm_i1_ty // %clamp
  1564. ],
  1565. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1566. >;
  1567. // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
  1568. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
  1569. // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
  1570. def int_amdgcn_sdot8 :
  1571. GCCBuiltin<"__builtin_amdgcn_sdot8">,
  1572. Intrinsic<
  1573. [llvm_i32_ty], // %r
  1574. [
  1575. llvm_i32_ty, // %a
  1576. llvm_i32_ty, // %b
  1577. llvm_i32_ty, // %c
  1578. llvm_i1_ty // %clamp
  1579. ],
  1580. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1581. >;
  1582. // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
  1583. // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
  1584. // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
  1585. def int_amdgcn_udot8 :
  1586. GCCBuiltin<"__builtin_amdgcn_udot8">,
  1587. Intrinsic<
  1588. [llvm_i32_ty], // %r
  1589. [
  1590. llvm_i32_ty, // %a
  1591. llvm_i32_ty, // %b
  1592. llvm_i32_ty, // %c
  1593. llvm_i1_ty // %clamp
  1594. ],
  1595. [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
  1596. >;
  1597. //===----------------------------------------------------------------------===//
  1598. // gfx908 intrinsics
  1599. // ===----------------------------------------------------------------------===//
  1600. def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
  1601. // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
  1602. def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
  1603. Intrinsic<[llvm_v32f32_ty],
  1604. [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
  1605. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1606. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1607. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1608. def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">,
  1609. Intrinsic<[llvm_v16f32_ty],
  1610. [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
  1611. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1612. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1613. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1614. def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">,
  1615. Intrinsic<[llvm_v4f32_ty],
  1616. [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
  1617. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1618. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1619. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1620. def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">,
  1621. Intrinsic<[llvm_v16f32_ty],
  1622. [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
  1623. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1624. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1625. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1626. def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">,
  1627. Intrinsic<[llvm_v4f32_ty],
  1628. [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
  1629. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1630. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1631. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1632. def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">,
  1633. Intrinsic<[llvm_v32f32_ty],
  1634. [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
  1635. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1636. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1637. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1638. def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">,
  1639. Intrinsic<[llvm_v16f32_ty],
  1640. [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
  1641. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1642. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1643. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1644. def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">,
  1645. Intrinsic<[llvm_v4f32_ty],
  1646. [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
  1647. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1648. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1649. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1650. def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">,
  1651. Intrinsic<[llvm_v16f32_ty],
  1652. [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
  1653. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1654. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1655. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1656. def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">,
  1657. Intrinsic<[llvm_v4f32_ty],
  1658. [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
  1659. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1660. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1661. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1662. def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">,
  1663. Intrinsic<[llvm_v32i32_ty],
  1664. [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
  1665. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1666. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1667. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1668. def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">,
  1669. Intrinsic<[llvm_v16i32_ty],
  1670. [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
  1671. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1672. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1673. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1674. def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">,
  1675. Intrinsic<[llvm_v4i32_ty],
  1676. [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
  1677. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1678. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1679. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1680. def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">,
  1681. Intrinsic<[llvm_v16i32_ty],
  1682. [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
  1683. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1684. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1685. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1686. def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">,
  1687. Intrinsic<[llvm_v4i32_ty],
  1688. [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
  1689. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1690. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1691. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1692. def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">,
  1693. Intrinsic<[llvm_v32f32_ty],
  1694. [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
  1695. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1696. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1697. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1698. def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">,
  1699. Intrinsic<[llvm_v16f32_ty],
  1700. [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
  1701. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1702. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1703. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1704. def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">,
  1705. Intrinsic<[llvm_v4f32_ty],
  1706. [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
  1707. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1708. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1709. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1710. def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">,
  1711. Intrinsic<[llvm_v16f32_ty],
  1712. [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
  1713. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1714. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1715. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1716. def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">,
  1717. Intrinsic<[llvm_v4f32_ty],
  1718. [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
  1719. llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
  1720. [IntrConvergent, IntrNoMem, IntrWillReturn,
  1721. ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
  1722. //===----------------------------------------------------------------------===//
  1723. // Special Intrinsics for backend internal use only. No frontend
  1724. // should emit calls to these.
  1725. // ===----------------------------------------------------------------------===//
  1726. def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
  1727. [llvm_i1_ty], [IntrConvergent, IntrWillReturn]
  1728. >;
  1729. def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
  1730. [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
  1731. >;
  1732. def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
  1733. [llvm_i1_ty, LLVMMatchType<0>],
  1734. [IntrNoMem, IntrConvergent, IntrWillReturn]
  1735. >;
  1736. def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
  1737. [llvm_anyint_ty], [IntrConvergent, IntrWillReturn]
  1738. >;
  1739. def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty],
  1740. [IntrConvergent, IntrWillReturn]>;
  1741. // Represent unreachable in a divergent region.
  1742. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
  1743. // Emit 2.5 ulp, no denormal division. Should only be inserted by
  1744. // pass based on !fpmath metadata.
  1745. def int_amdgcn_fdiv_fast : Intrinsic<
  1746. [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
  1747. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1748. >;
  1749. // Represent a relocation constant.
  1750. def int_amdgcn_reloc_constant : Intrinsic<
  1751. [llvm_i32_ty], [llvm_metadata_ty],
  1752. [IntrNoMem, IntrSpeculatable, IntrWillReturn]
  1753. >;
  1754. }