1 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDHSA kernel descriptor definitions. For more information, visit
11 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
12 ///
13 /// \warning
14 /// Any changes to this file should also be audited for corresponding changes
15 /// needed in both the assembler and disassembler, namely:
16 /// * AMDGPUAsmPrinter.{cpp,h}
17 /// * AMDGPUTargetStreamer.{cpp,h}
18 /// * AMDGPUDisassembler.{cpp,h}
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
23 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
24 
25 #include <cstddef>
26 #include <cstdint>
27 
28 // Gets offset of specified member in specified type.
29 #ifndef offsetof
30 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
31 #endif // offsetof
32 
33 // Creates enumeration entries used for packing bits into integers. Enumeration
34 // entries include bit shift amount, bit width, and bit mask.
35 #ifndef AMDHSA_BITS_ENUM_ENTRY
36 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
37   NAME ## _SHIFT = (SHIFT),                        \
38   NAME ## _WIDTH = (WIDTH),                        \
39   NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
40 #endif // AMDHSA_BITS_ENUM_ENTRY
41 
42 // Gets bits for specified bit mask from specified source.
43 #ifndef AMDHSA_BITS_GET
44 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
45 #endif // AMDHSA_BITS_GET
46 
47 // Sets bits for specified bit mask in specified destination.
48 #ifndef AMDHSA_BITS_SET
49 #define AMDHSA_BITS_SET(DST, MSK, VAL)  \
50   DST &= ~MSK;                          \
51   DST |= ((VAL << MSK ## _SHIFT) & MSK)
52 #endif // AMDHSA_BITS_SET
53 
54 namespace llvm {
55 namespace amdhsa {
56 
57 // Floating point rounding modes. Must match hardware definition.
58 enum : uint8_t {
59   FLOAT_ROUND_MODE_NEAR_EVEN = 0,
60   FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
61   FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
62   FLOAT_ROUND_MODE_ZERO = 3,
63 };
64 
65 // Floating point denorm modes. Must match hardware definition.
66 enum : uint8_t {
67   FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
68   FLOAT_DENORM_MODE_FLUSH_DST = 1,
69   FLOAT_DENORM_MODE_FLUSH_SRC = 2,
70   FLOAT_DENORM_MODE_FLUSH_NONE = 3,
71 };
72 
73 // System VGPR workitem IDs. Must match hardware definition.
74 enum : uint8_t {
75   SYSTEM_VGPR_WORKITEM_ID_X = 0,
76   SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
77   SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
78   SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
79 };
80 
81 // Compute program resource register 1. Must match hardware definition.
82 // GFX6+.
83 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
84   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
85 // [GFX6-GFX8].
86 #define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
87   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
88 // [GFX6-GFX9].
89 #define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
90   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
91 // [GFX6-GFX11].
92 #define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
93   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
94 // GFX9+.
95 #define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
96   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
97 // GFX10+.
98 #define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
99   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
100 // GFX12+.
101 #define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
102   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
103 enum : int32_t {
104   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
105   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
106   COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
107   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
108   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
109   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
110   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
111   COMPUTE_PGM_RSRC1(PRIV, 20, 1),
112   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1),
113   COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1),
114   COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
115   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1),
116   COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1),
117   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
118   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
119   COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
120   COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
121   COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
122   COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
123   COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
124   COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
125   COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
126 };
127 #undef COMPUTE_PGM_RSRC1
128 
129 // Compute program resource register 2. Must match hardware definition.
130 // GFX6+.
131 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
132   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
133 // [GFX6-GFX11].
134 #define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
135   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
136 // GFX12+.
137 #define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
138   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
139 enum : int32_t {
140   COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
141   COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
142   COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
143   COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
144   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
145   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
146   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
147   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
148   COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
149   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
150   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
151   COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
152   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
153   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
154   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
155   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
156   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
157   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
158   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
159   COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
160 };
161 #undef COMPUTE_PGM_RSRC2
162 
163 // Compute program resource register 3 for GFX90A+. Must match hardware
164 // definition.
165 #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
166   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
167 enum : int32_t {
168   COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
169   COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
170   COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
171   COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
172 };
173 #undef COMPUTE_PGM_RSRC3_GFX90A
174 
175 // Compute program resource register 3 for GFX10+. Must match hardware
176 // definition.
177 // GFX10+.
178 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
179   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
180 // [GFX10].
181 #define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH)                            \
182   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
183 // [GFX10-GFX11].
184 #define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH)                      \
185   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH)
186 // GFX11+.
187 #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
188   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
189 // [GFX11].
190 #define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH)                            \
191   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH)
192 // GFX12+.
193 #define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
194   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH)
195 enum : int32_t {
196   COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4),
197   COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4),
198   COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8),
199   COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6),
200   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1),
201   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1),
202   COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8),
203   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
204   COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
205   COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
206   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17),
207   COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
208   COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
209 };
210 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
211 
212 // Kernel code properties. Must be kept backwards compatible.
213 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
214   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
215 enum : int32_t {
216   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
217   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
218   KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
219   KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
220   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
221   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
222   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
223   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
224   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
225   KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
226   KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
227 };
228 #undef KERNEL_CODE_PROPERTY
229 
230 // Kernarg preload specification.
231 #define KERNARG_PRELOAD_SPEC(NAME, SHIFT, WIDTH)                               \
232   AMDHSA_BITS_ENUM_ENTRY(KERNARG_PRELOAD_SPEC_##NAME, SHIFT, WIDTH)
233 enum : int32_t {
234   KERNARG_PRELOAD_SPEC(LENGTH, 0, 7),
235   KERNARG_PRELOAD_SPEC(OFFSET, 7, 9),
236 };
237 #undef KERNARG_PRELOAD_SPEC
238 
239 // Kernel descriptor. Must be kept backwards compatible.
240 struct kernel_descriptor_t {
241   uint32_t group_segment_fixed_size;
242   uint32_t private_segment_fixed_size;
243   uint32_t kernarg_size;
244   uint8_t reserved0[4];
245   int64_t kernel_code_entry_byte_offset;
246   uint8_t reserved1[20];
247   uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
248   uint32_t compute_pgm_rsrc1;
249   uint32_t compute_pgm_rsrc2;
250   uint16_t kernel_code_properties;
251   uint16_t kernarg_preload;
252   uint8_t reserved3[4];
253 };
254 
255 enum : uint32_t {
256   GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
257   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
258   KERNARG_SIZE_OFFSET = 8,
259   RESERVED0_OFFSET = 12,
260   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
261   RESERVED1_OFFSET = 24,
262   COMPUTE_PGM_RSRC3_OFFSET = 44,
263   COMPUTE_PGM_RSRC1_OFFSET = 48,
264   COMPUTE_PGM_RSRC2_OFFSET = 52,
265   KERNEL_CODE_PROPERTIES_OFFSET = 56,
266   KERNARG_PRELOAD_OFFSET = 58,
267   RESERVED3_OFFSET = 60
268 };
269 
270 static_assert(
271     sizeof(kernel_descriptor_t) == 64,
272     "invalid size for kernel_descriptor_t");
273 static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
274                   GROUP_SEGMENT_FIXED_SIZE_OFFSET,
275               "invalid offset for group_segment_fixed_size");
276 static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
277                   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
278               "invalid offset for private_segment_fixed_size");
279 static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
280                   KERNARG_SIZE_OFFSET,
281               "invalid offset for kernarg_size");
282 static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
283               "invalid offset for reserved0");
284 static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
285                   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
286               "invalid offset for kernel_code_entry_byte_offset");
287 static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
288               "invalid offset for reserved1");
289 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
290                   COMPUTE_PGM_RSRC3_OFFSET,
291               "invalid offset for compute_pgm_rsrc3");
292 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
293                   COMPUTE_PGM_RSRC1_OFFSET,
294               "invalid offset for compute_pgm_rsrc1");
295 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
296                   COMPUTE_PGM_RSRC2_OFFSET,
297               "invalid offset for compute_pgm_rsrc2");
298 static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
299                   KERNEL_CODE_PROPERTIES_OFFSET,
300               "invalid offset for kernel_code_properties");
301 static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
302                   KERNARG_PRELOAD_OFFSET,
303               "invalid offset for kernarg_preload");
304 static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
305               "invalid offset for reserved3");
306 
307 } // end namespace amdhsa
308 } // end namespace llvm
309 
310 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
311