1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 #ifndef AMD_HSA_KERNEL_CODE_H
44 #define AMD_HSA_KERNEL_CODE_H
45 
46 #include "amd_hsa_common.h"
47 #include "hsa.h"
48 
49 // AMD Kernel Code Version Enumeration Values.
50 typedef uint32_t amd_kernel_code_version32_t;
51 enum amd_kernel_code_version_t {
52   AMD_KERNEL_CODE_VERSION_MAJOR = 1,
53   AMD_KERNEL_CODE_VERSION_MINOR = 1
54 };
55 
56 // AMD Machine Kind Enumeration Values.
57 typedef uint16_t amd_machine_kind16_t;
58 enum amd_machine_kind_t {
59   AMD_MACHINE_KIND_UNDEFINED = 0,
60   AMD_MACHINE_KIND_AMDGPU = 1
61 };
62 
63 // AMD Machine Version.
64 typedef uint16_t amd_machine_version16_t;
65 
66 // AMD Float Round Mode Enumeration Values.
67 enum amd_float_round_mode_t {
68   AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0,
69   AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
70   AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
71   AMD_FLOAT_ROUND_MODE_ZERO = 3
72 };
73 
74 // AMD Float Denorm Mode Enumeration Values.
75 enum amd_float_denorm_mode_t {
76   AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0,
77   AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1,
78   AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2,
79   AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3
80 };
81 
82 // AMD Compute Program Resource Register One.
83 typedef uint32_t amd_compute_pgm_rsrc_one32_t;
84 enum amd_compute_pgm_rsrc_one_t {
85   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
86   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
87   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2),
88   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2),
89   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2),
90   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2),
91   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2),
92   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1),
93   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1),
94   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1),
95   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1),
96   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1),
97   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1),
98   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6)
99 };
100 
101 // AMD System VGPR Workitem ID Enumeration Values.
102 enum amd_system_vgpr_workitem_id_t {
103   AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0,
104   AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
105   AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
106   AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3
107 };
108 
109 // AMD Compute Program Resource Register Two.
110 typedef uint32_t amd_compute_pgm_rsrc_two32_t;
111 enum amd_compute_pgm_rsrc_two_t {
112   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1),
113   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5),
114   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1),
115   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
116   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
117   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
118   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
119   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2),
120   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
121   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1),
122   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9),
123   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
124   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
125   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
126   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
127   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
128   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
129   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1),
130   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1)
131 };
132 
133 // AMD Element Byte Size Enumeration Values.
134 enum amd_element_byte_size_t {
135   AMD_ELEMENT_BYTE_SIZE_2 = 0,
136   AMD_ELEMENT_BYTE_SIZE_4 = 1,
137   AMD_ELEMENT_BYTE_SIZE_8 = 2,
138   AMD_ELEMENT_BYTE_SIZE_16 = 3
139 };
140 
141 // AMD Kernel Code Properties.
142 typedef uint32_t amd_kernel_code_properties32_t;
143 enum amd_kernel_code_properties_t {
144   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
145   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1),
146   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1),
147   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
148   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1),
149   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
150   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
151   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1),
152   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1),
153   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1),
154   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6),
155   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1),
156   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2),
157   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1),
158   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1),
159   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1),
160   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1),
161   AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9)
162 };
163 
164 // AMD Power Of Two Enumeration Values.
165 typedef uint8_t amd_powertwo8_t;
166 enum amd_powertwo_t {
167   AMD_POWERTWO_1 = 0,
168   AMD_POWERTWO_2 = 1,
169   AMD_POWERTWO_4 = 2,
170   AMD_POWERTWO_8 = 3,
171   AMD_POWERTWO_16 = 4,
172   AMD_POWERTWO_32 = 5,
173   AMD_POWERTWO_64 = 6,
174   AMD_POWERTWO_128 = 7,
175   AMD_POWERTWO_256 = 8
176 };
177 
178 // AMD Enabled Control Directive Enumeration Values.
179 typedef uint64_t amd_enabled_control_directive64_t;
180 enum amd_enabled_control_directive_t {
181   AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1,
182   AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2,
183   AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4,
184   AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8,
185   AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16,
186   AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32,
187   AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64,
188   AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128,
189   AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256
190 };
191 
192 // AMD Exception Kind Enumeration Values.
193 typedef uint16_t amd_exception_kind16_t;
194 enum amd_exception_kind_t {
195   AMD_EXCEPTION_KIND_INVALID_OPERATION = 1,
196   AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2,
197   AMD_EXCEPTION_KIND_OVERFLOW = 4,
198   AMD_EXCEPTION_KIND_UNDERFLOW = 8,
199   AMD_EXCEPTION_KIND_INEXACT = 16
200 };
201 
202 // AMD Control Directives.
203 #define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64
204 #define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES)
205 typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s {
206   amd_enabled_control_directive64_t enabled_control_directives;
207   uint16_t enable_break_exceptions;
208   uint16_t enable_detect_exceptions;
209   uint32_t max_dynamic_group_size;
210   uint64_t max_flat_grid_size;
211   uint32_t max_flat_workgroup_size;
212   uint8_t required_dim;
213   uint8_t reserved1[3];
214   uint64_t required_grid_size[3];
215   uint32_t required_workgroup_size[3];
216   uint8_t reserved2[60];
217 } amd_control_directives_t;
218 
219 // AMD Kernel Code.
220 #define AMD_ISA_ALIGN_BYTES 256
221 #define AMD_KERNEL_CODE_ALIGN_BYTES 64
222 #define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES)
223 typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s {
224   amd_kernel_code_version32_t amd_kernel_code_version_major;
225   amd_kernel_code_version32_t amd_kernel_code_version_minor;
226   amd_machine_kind16_t amd_machine_kind;
227   amd_machine_version16_t amd_machine_version_major;
228   amd_machine_version16_t amd_machine_version_minor;
229   amd_machine_version16_t amd_machine_version_stepping;
230   int64_t kernel_code_entry_byte_offset;
231   int64_t kernel_code_prefetch_byte_offset;
232   uint64_t kernel_code_prefetch_byte_size;
233   uint64_t max_scratch_backing_memory_byte_size;
234   amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1;
235   amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2;
236   amd_kernel_code_properties32_t kernel_code_properties;
237   uint32_t workitem_private_segment_byte_size;
238   uint32_t workgroup_group_segment_byte_size;
239   uint32_t gds_segment_byte_size;
240   uint64_t kernarg_segment_byte_size;
241   uint32_t workgroup_fbarrier_count;
242   uint16_t wavefront_sgpr_count;
243   uint16_t workitem_vgpr_count;
244   uint16_t reserved_vgpr_first;
245   uint16_t reserved_vgpr_count;
246   uint16_t reserved_sgpr_first;
247   uint16_t reserved_sgpr_count;
248   uint16_t debug_wavefront_private_segment_offset_sgpr;
249   uint16_t debug_private_segment_buffer_sgpr;
250   amd_powertwo8_t kernarg_segment_alignment;
251   amd_powertwo8_t group_segment_alignment;
252   amd_powertwo8_t private_segment_alignment;
253   amd_powertwo8_t wavefront_size;
254   int32_t call_convention;
255   uint8_t reserved1[12];
256   uint64_t runtime_loader_kernel_symbol;
257   amd_control_directives_t control_directives;
258 } amd_kernel_code_t;
259 
260 // TODO: this struct should be completely gone once debugger designs/implements
261 // Debugger APIs.
262 typedef struct amd_runtime_loader_debug_info_s {
263   const void* elf_raw;
264   size_t elf_size;
265   const char *kernel_name;
266   const void *owning_segment;
267 } amd_runtime_loader_debug_info_t;
268 
269 #endif // AMD_HSA_KERNEL_CODE_H
270