1 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDHSA kernel descriptor definitions. For more information, visit
11 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
12 ///
13 /// \warning
14 /// Any changes to this file should also be audited for corresponding changes
15 /// needed in both the assembler and disassembler, namely:
16 /// * AMDGPUAsmPrinter.{cpp,h}
17 /// * AMDGPUTargetStreamer.{cpp,h}
18 /// * AMDGPUDisassembler.{cpp,h}
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
23 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
24 
25 #include <cstddef>
26 #include <cstdint>
27 
28 // Gets offset of specified member in specified type.
29 #ifndef offsetof
30 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
31 #endif // offsetof
32 
33 // Creates enumeration entries used for packing bits into integers. Enumeration
34 // entries include bit shift amount, bit width, and bit mask.
35 #ifndef AMDHSA_BITS_ENUM_ENTRY
36 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
37   NAME ## _SHIFT = (SHIFT),                        \
38   NAME ## _WIDTH = (WIDTH),                        \
39   NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
40 #endif // AMDHSA_BITS_ENUM_ENTRY
41 
42 // Gets bits for specified bit mask from specified source.
43 #ifndef AMDHSA_BITS_GET
44 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
45 #endif // AMDHSA_BITS_GET
46 
47 // Sets bits for specified bit mask in specified destination.
48 #ifndef AMDHSA_BITS_SET
49 #define AMDHSA_BITS_SET(DST, MSK, VAL)  \
50   DST &= ~MSK;                          \
51   DST |= ((VAL << MSK ## _SHIFT) & MSK)
52 #endif // AMDHSA_BITS_SET
53 
54 namespace llvm {
55 namespace amdhsa {
56 
57 // Floating point rounding modes. Must match hardware definition.
58 enum : uint8_t {
59   FLOAT_ROUND_MODE_NEAR_EVEN = 0,
60   FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
61   FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
62   FLOAT_ROUND_MODE_ZERO = 3,
63 };
64 
65 // Floating point denorm modes. Must match hardware definition.
66 enum : uint8_t {
67   FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
68   FLOAT_DENORM_MODE_FLUSH_DST = 1,
69   FLOAT_DENORM_MODE_FLUSH_SRC = 2,
70   FLOAT_DENORM_MODE_FLUSH_NONE = 3,
71 };
72 
73 // System VGPR workitem IDs. Must match hardware definition.
74 enum : uint8_t {
75   SYSTEM_VGPR_WORKITEM_ID_X = 0,
76   SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
77   SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
78   SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
79 };
80 
81 // Compute program resource register 1. Must match hardware definition.
82 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
83   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
84 enum : int32_t {
85   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
86   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
87   COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
88   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
89   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
90   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
91   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
92   COMPUTE_PGM_RSRC1(PRIV, 20, 1),
93   COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1),
94   COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
95   COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
96   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
97   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
98   COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
99   COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
100   COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
101   COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
102   COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
103 };
104 #undef COMPUTE_PGM_RSRC1
105 
106 // Compute program resource register 2. Must match hardware definition.
107 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
108   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
109 enum : int32_t {
110   COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
111   COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
112   COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1),
113   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
114   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
115   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
116   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
117   COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
118   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
119   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
120   COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
121   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
122   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
123   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
124   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
125   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
126   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
127   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
128   COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
129 };
130 #undef COMPUTE_PGM_RSRC2
131 
132 // Compute program resource register 3 for GFX90A+. Must match hardware
133 // definition.
134 #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
135   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
136 enum : int32_t {
137   COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
138   COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
139   COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
140   COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
141 };
142 #undef COMPUTE_PGM_RSRC3_GFX90A
143 
144 // Compute program resource register 3 for GFX10+. Must match hardware
145 // definition.
146 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
147   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
148 enum : int32_t {
149   COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
150   COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
151   COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
152   COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
153   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
154   COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
155 };
156 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
157 
158 // Kernel code properties. Must be kept backwards compatible.
159 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
160   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
161 enum : int32_t {
162   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
163   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
164   KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
165   KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
166   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
167   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
168   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
169   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
170   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
171   KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
172   KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
173 };
174 #undef KERNEL_CODE_PROPERTY
175 
176 // Kernel descriptor. Must be kept backwards compatible.
177 struct kernel_descriptor_t {
178   uint32_t group_segment_fixed_size;
179   uint32_t private_segment_fixed_size;
180   uint32_t kernarg_size;
181   uint8_t reserved0[4];
182   int64_t kernel_code_entry_byte_offset;
183   uint8_t reserved1[20];
184   uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
185   uint32_t compute_pgm_rsrc1;
186   uint32_t compute_pgm_rsrc2;
187   uint16_t kernel_code_properties;
188   uint8_t reserved2[6];
189 };
190 
191 enum : uint32_t {
192   GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
193   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
194   KERNARG_SIZE_OFFSET = 8,
195   RESERVED0_OFFSET = 12,
196   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
197   RESERVED1_OFFSET = 24,
198   COMPUTE_PGM_RSRC3_OFFSET = 44,
199   COMPUTE_PGM_RSRC1_OFFSET = 48,
200   COMPUTE_PGM_RSRC2_OFFSET = 52,
201   KERNEL_CODE_PROPERTIES_OFFSET = 56,
202   RESERVED2_OFFSET = 58,
203 };
204 
205 static_assert(
206     sizeof(kernel_descriptor_t) == 64,
207     "invalid size for kernel_descriptor_t");
208 static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
209                   GROUP_SEGMENT_FIXED_SIZE_OFFSET,
210               "invalid offset for group_segment_fixed_size");
211 static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
212                   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
213               "invalid offset for private_segment_fixed_size");
214 static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
215                   KERNARG_SIZE_OFFSET,
216               "invalid offset for kernarg_size");
217 static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
218               "invalid offset for reserved0");
219 static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
220                   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
221               "invalid offset for kernel_code_entry_byte_offset");
222 static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
223               "invalid offset for reserved1");
224 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
225                   COMPUTE_PGM_RSRC3_OFFSET,
226               "invalid offset for compute_pgm_rsrc3");
227 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
228                   COMPUTE_PGM_RSRC1_OFFSET,
229               "invalid offset for compute_pgm_rsrc1");
230 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
231                   COMPUTE_PGM_RSRC2_OFFSET,
232               "invalid offset for compute_pgm_rsrc2");
233 static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
234                   KERNEL_CODE_PROPERTIES_OFFSET,
235               "invalid offset for kernel_code_properties");
236 static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET,
237               "invalid offset for reserved2");
238 
239 } // end namespace amdhsa
240 } // end namespace llvm
241 
242 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
243