AMDHSAKernelDescriptor.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. /// \file
  15. /// AMDHSA kernel descriptor definitions. For more information, visit
  16. /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
  17. //
  18. //===----------------------------------------------------------------------===//
  19. #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
  20. #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
  21. #include <cstddef>
  22. #include <cstdint>
  23. // Gets offset of specified member in specified type.
  24. #ifndef offsetof
  25. #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
  26. #endif // offsetof
  27. // Creates enumeration entries used for packing bits into integers. Enumeration
  28. // entries include bit shift amount, bit width, and bit mask.
  29. #ifndef AMDHSA_BITS_ENUM_ENTRY
  30. #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
  31. NAME ## _SHIFT = (SHIFT), \
  32. NAME ## _WIDTH = (WIDTH), \
  33. NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
  34. #endif // AMDHSA_BITS_ENUM_ENTRY
  35. // Gets bits for specified bit mask from specified source.
  36. #ifndef AMDHSA_BITS_GET
  37. #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
  38. #endif // AMDHSA_BITS_GET
  39. // Sets bits for specified bit mask in specified destination.
  40. #ifndef AMDHSA_BITS_SET
  41. #define AMDHSA_BITS_SET(DST, MSK, VAL) \
  42. DST &= ~MSK; \
  43. DST |= ((VAL << MSK ## _SHIFT) & MSK)
  44. #endif // AMDHSA_BITS_SET
  45. namespace llvm {
  46. namespace amdhsa {
  47. // Floating point rounding modes. Must match hardware definition.
  48. enum : uint8_t {
  49. FLOAT_ROUND_MODE_NEAR_EVEN = 0,
  50. FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
  51. FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
  52. FLOAT_ROUND_MODE_ZERO = 3,
  53. };
  54. // Floating point denorm modes. Must match hardware definition.
  55. enum : uint8_t {
  56. FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
  57. FLOAT_DENORM_MODE_FLUSH_DST = 1,
  58. FLOAT_DENORM_MODE_FLUSH_SRC = 2,
  59. FLOAT_DENORM_MODE_FLUSH_NONE = 3,
  60. };
  61. // System VGPR workitem IDs. Must match hardware definition.
  62. enum : uint8_t {
  63. SYSTEM_VGPR_WORKITEM_ID_X = 0,
  64. SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
  65. SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
  66. SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
  67. };
  68. // Compute program resource register 1. Must match hardware definition.
  69. #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
  70. AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
  71. enum : int32_t {
  72. COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
  73. COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
  74. COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
  75. COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
  76. COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
  77. COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
  78. COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
  79. COMPUTE_PGM_RSRC1(PRIV, 20, 1),
  80. COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
  81. COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
  82. COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
  83. COMPUTE_PGM_RSRC1(BULKY, 24, 1),
  84. COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
  85. COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
  86. COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
  87. COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
  88. COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
  89. COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
  90. };
  91. #undef COMPUTE_PGM_RSRC1
  92. // Compute program resource register 2. Must match hardware definition.
  93. #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
  94. AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
  95. enum : int32_t {
  96. COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
  97. COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
  98. COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
  99. COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
  100. COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
  101. COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
  102. COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
  103. COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
  104. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
  105. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
  106. COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
  107. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
  108. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
  109. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
  110. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
  111. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
  112. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
  113. COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
  114. COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
  115. };
  116. #undef COMPUTE_PGM_RSRC2
  117. // Compute program resource register 3 for GFX90A+. Must match hardware
  118. // definition.
  119. #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
  120. AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
  121. enum : int32_t {
  122. COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
  123. COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
  124. COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
  125. COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
  126. };
  127. #undef COMPUTE_PGM_RSRC3_GFX90A
  128. // Compute program resource register 3 for GFX10+. Must match hardware
  129. // definition.
  130. #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
  131. AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
  132. enum : int32_t {
  133. COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
  134. COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+
  135. COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+
  136. COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+
  137. COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
  138. COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+
  139. };
  140. #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
  141. // Kernel code properties. Must be kept backwards compatible.
  142. #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
  143. AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
  144. enum : int32_t {
  145. KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
  146. KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
  147. KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
  148. KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
  149. KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
  150. KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
  151. KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
  152. KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
  153. KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
  154. KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
  155. KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
  156. };
  157. #undef KERNEL_CODE_PROPERTY
  158. // Kernel descriptor. Must be kept backwards compatible.
  159. struct kernel_descriptor_t {
  160. uint32_t group_segment_fixed_size;
  161. uint32_t private_segment_fixed_size;
  162. uint32_t kernarg_size;
  163. uint8_t reserved0[4];
  164. int64_t kernel_code_entry_byte_offset;
  165. uint8_t reserved1[20];
  166. uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
  167. uint32_t compute_pgm_rsrc1;
  168. uint32_t compute_pgm_rsrc2;
  169. uint16_t kernel_code_properties;
  170. uint8_t reserved2[6];
  171. };
  172. enum : uint32_t {
  173. GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
  174. PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
  175. KERNARG_SIZE_OFFSET = 8,
  176. RESERVED0_OFFSET = 12,
  177. KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
  178. RESERVED1_OFFSET = 24,
  179. COMPUTE_PGM_RSRC3_OFFSET = 44,
  180. COMPUTE_PGM_RSRC1_OFFSET = 48,
  181. COMPUTE_PGM_RSRC2_OFFSET = 52,
  182. KERNEL_CODE_PROPERTIES_OFFSET = 56,
  183. RESERVED2_OFFSET = 58,
  184. };
  185. static_assert(
  186. sizeof(kernel_descriptor_t) == 64,
  187. "invalid size for kernel_descriptor_t");
  188. static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
  189. GROUP_SEGMENT_FIXED_SIZE_OFFSET,
  190. "invalid offset for group_segment_fixed_size");
  191. static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
  192. PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
  193. "invalid offset for private_segment_fixed_size");
  194. static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
  195. KERNARG_SIZE_OFFSET,
  196. "invalid offset for kernarg_size");
  197. static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
  198. "invalid offset for reserved0");
  199. static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
  200. KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
  201. "invalid offset for kernel_code_entry_byte_offset");
  202. static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
  203. "invalid offset for reserved1");
  204. static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
  205. COMPUTE_PGM_RSRC3_OFFSET,
  206. "invalid offset for compute_pgm_rsrc3");
  207. static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
  208. COMPUTE_PGM_RSRC1_OFFSET,
  209. "invalid offset for compute_pgm_rsrc1");
  210. static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
  211. COMPUTE_PGM_RSRC2_OFFSET,
  212. "invalid offset for compute_pgm_rsrc2");
  213. static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
  214. KERNEL_CODE_PROPERTIES_OFFSET,
  215. "invalid offset for kernel_code_properties");
  216. static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
  217. "invalid offset for reserved2");
  218. } // end namespace amdhsa
  219. } // end namespace llvm
  220. #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
  221. #ifdef __GNUC__
  222. #pragma GCC diagnostic pop
  223. #endif