1 //////////////////////////////////////////////////////////////////////////////// 2 // 3 // The University of Illinois/NCSA 4 // Open Source License (NCSA) 5 // 6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 // 8 // Developed by: 9 // 10 // AMD Research and AMD HSA Software Development 11 // 12 // Advanced Micro Devices, Inc. 13 // 14 // www.amd.com 15 // 16 // Permission is hereby granted, free of charge, to any person obtaining a copy 17 // of this software and associated documentation files (the "Software"), to 18 // deal with the Software without restriction, including without limitation 19 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 // and/or sell copies of the Software, and to permit persons to whom the 21 // Software is furnished to do so, subject to the following conditions: 22 // 23 // - Redistributions of source code must retain the above copyright notice, 24 // this list of conditions and the following disclaimers. 25 // - Redistributions in binary form must reproduce the above copyright 26 // notice, this list of conditions and the following disclaimers in 27 // the documentation and/or other materials provided with the distribution. 28 // - Neither the names of Advanced Micro Devices, Inc, 29 // nor the names of its contributors may be used to endorse or promote 30 // products derived from this Software without specific prior written 31 // permission. 32 // 33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 // DEALINGS WITH THE SOFTWARE. 40 // 41 //////////////////////////////////////////////////////////////////////////////// 42 43 #ifndef AMD_HSA_KERNEL_CODE_H 44 #define AMD_HSA_KERNEL_CODE_H 45 46 #include "amd_hsa_common.h" 47 #include "hsa.h" 48 49 // AMD Kernel Code Version Enumeration Values. 50 typedef uint32_t amd_kernel_code_version32_t; 51 enum amd_kernel_code_version_t { 52 AMD_KERNEL_CODE_VERSION_MAJOR = 1, 53 AMD_KERNEL_CODE_VERSION_MINOR = 1 54 }; 55 56 // AMD Machine Kind Enumeration Values. 57 typedef uint16_t amd_machine_kind16_t; 58 enum amd_machine_kind_t { 59 AMD_MACHINE_KIND_UNDEFINED = 0, 60 AMD_MACHINE_KIND_AMDGPU = 1 61 }; 62 63 // AMD Machine Version. 64 typedef uint16_t amd_machine_version16_t; 65 66 // AMD Float Round Mode Enumeration Values. 67 enum amd_float_round_mode_t { 68 AMD_FLOAT_ROUND_MODE_NEAREST_EVEN = 0, 69 AMD_FLOAT_ROUND_MODE_PLUS_INFINITY = 1, 70 AMD_FLOAT_ROUND_MODE_MINUS_INFINITY = 2, 71 AMD_FLOAT_ROUND_MODE_ZERO = 3 72 }; 73 74 // AMD Float Denorm Mode Enumeration Values. 75 enum amd_float_denorm_mode_t { 76 AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT = 0, 77 AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT = 1, 78 AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE = 2, 79 AMD_FLOAT_DENORM_MODE_NO_FLUSH = 3 80 }; 81 82 // AMD Compute Program Resource Register One. 83 typedef uint32_t amd_compute_pgm_rsrc_one32_t; 84 enum amd_compute_pgm_rsrc_one_t { 85 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), 86 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), 87 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY, 10, 2), 88 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32, 12, 2), 89 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64, 14, 2), 90 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32, 16, 2), 91 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64, 18, 2), 92 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_PRIV, 20, 1), 93 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP, 21, 1), 94 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE, 22, 1), 95 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE, 23, 1), 96 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_BULKY, 24, 1), 97 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER, 25, 1), 98 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_ONE_RESERVED1, 26, 6) 99 }; 100 101 // AMD System VGPR Workitem ID Enumeration Values. 102 enum amd_system_vgpr_workitem_id_t { 103 AMD_SYSTEM_VGPR_WORKITEM_ID_X = 0, 104 AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, 105 AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, 106 AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3 107 }; 108 109 // AMD Compute Program Resource Register Two. 110 typedef uint32_t amd_compute_pgm_rsrc_two32_t; 111 enum amd_compute_pgm_rsrc_two_t { 112 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET, 0, 1), 113 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT, 1, 5), 114 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER, 6, 1), 115 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), 116 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), 117 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), 118 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO, 10, 1), 119 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID, 11, 2), 120 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), 121 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION, 14, 1), 122 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE, 15, 9), 123 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), 124 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), 125 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), 126 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), 127 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), 128 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), 129 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO, 30, 1), 130 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_TWO_RESERVED1, 31, 1) 131 }; 132 133 // AMD Element Byte Size Enumeration Values. 134 enum amd_element_byte_size_t { 135 AMD_ELEMENT_BYTE_SIZE_2 = 0, 136 AMD_ELEMENT_BYTE_SIZE_4 = 1, 137 AMD_ELEMENT_BYTE_SIZE_8 = 2, 138 AMD_ELEMENT_BYTE_SIZE_16 = 3 139 }; 140 141 // AMD Kernel Code Properties. 142 typedef uint32_t amd_kernel_code_properties32_t; 143 enum amd_kernel_code_properties_t { 144 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), 145 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR, 1, 1), 146 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR, 2, 1), 147 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), 148 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID, 4, 1), 149 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), 150 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), 151 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X, 7, 1), 152 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y, 8, 1), 153 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z, 9, 1), 154 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED1, 10, 6), 155 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS, 16, 1), 156 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE, 17, 2), 157 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_PTR64, 19, 1), 158 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK, 20, 1), 159 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED, 21, 1), 160 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED, 22, 1), 161 AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTIES_RESERVED2, 23, 9) 162 }; 163 164 // AMD Power Of Two Enumeration Values. 165 typedef uint8_t amd_powertwo8_t; 166 enum amd_powertwo_t { 167 AMD_POWERTWO_1 = 0, 168 AMD_POWERTWO_2 = 1, 169 AMD_POWERTWO_4 = 2, 170 AMD_POWERTWO_8 = 3, 171 AMD_POWERTWO_16 = 4, 172 AMD_POWERTWO_32 = 5, 173 AMD_POWERTWO_64 = 6, 174 AMD_POWERTWO_128 = 7, 175 AMD_POWERTWO_256 = 8 176 }; 177 178 // AMD Enabled Control Directive Enumeration Values. 179 typedef uint64_t amd_enabled_control_directive64_t; 180 enum amd_enabled_control_directive_t { 181 AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS = 1, 182 AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS = 2, 183 AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE = 4, 184 AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE = 8, 185 AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE = 16, 186 AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM = 32, 187 AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE = 64, 188 AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE = 128, 189 AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS = 256 190 }; 191 192 // AMD Exception Kind Enumeration Values. 193 typedef uint16_t amd_exception_kind16_t; 194 enum amd_exception_kind_t { 195 AMD_EXCEPTION_KIND_INVALID_OPERATION = 1, 196 AMD_EXCEPTION_KIND_DIVISION_BY_ZERO = 2, 197 AMD_EXCEPTION_KIND_OVERFLOW = 4, 198 AMD_EXCEPTION_KIND_UNDERFLOW = 8, 199 AMD_EXCEPTION_KIND_INEXACT = 16 200 }; 201 202 // AMD Control Directives. 203 #define AMD_CONTROL_DIRECTIVES_ALIGN_BYTES 64 204 #define AMD_CONTROL_DIRECTIVES_ALIGN __ALIGNED__(AMD_CONTROL_DIRECTIVES_ALIGN_BYTES) 205 typedef AMD_CONTROL_DIRECTIVES_ALIGN struct amd_control_directives_s { 206 amd_enabled_control_directive64_t enabled_control_directives; 207 uint16_t enable_break_exceptions; 208 uint16_t enable_detect_exceptions; 209 uint32_t max_dynamic_group_size; 210 uint64_t max_flat_grid_size; 211 uint32_t max_flat_workgroup_size; 212 uint8_t required_dim; 213 uint8_t reserved1[3]; 214 uint64_t required_grid_size[3]; 215 uint32_t required_workgroup_size[3]; 216 uint8_t reserved2[60]; 217 } amd_control_directives_t; 218 219 // AMD Kernel Code. 220 #define AMD_ISA_ALIGN_BYTES 256 221 #define AMD_KERNEL_CODE_ALIGN_BYTES 64 222 #define AMD_KERNEL_CODE_ALIGN __ALIGNED__(AMD_KERNEL_CODE_ALIGN_BYTES) 223 typedef AMD_KERNEL_CODE_ALIGN struct amd_kernel_code_s { 224 amd_kernel_code_version32_t amd_kernel_code_version_major; 225 amd_kernel_code_version32_t amd_kernel_code_version_minor; 226 amd_machine_kind16_t amd_machine_kind; 227 amd_machine_version16_t amd_machine_version_major; 228 amd_machine_version16_t amd_machine_version_minor; 229 amd_machine_version16_t amd_machine_version_stepping; 230 int64_t kernel_code_entry_byte_offset; 231 int64_t kernel_code_prefetch_byte_offset; 232 uint64_t kernel_code_prefetch_byte_size; 233 uint64_t max_scratch_backing_memory_byte_size; 234 amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1; 235 amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2; 236 amd_kernel_code_properties32_t kernel_code_properties; 237 uint32_t workitem_private_segment_byte_size; 238 uint32_t workgroup_group_segment_byte_size; 239 uint32_t gds_segment_byte_size; 240 uint64_t kernarg_segment_byte_size; 241 uint32_t workgroup_fbarrier_count; 242 uint16_t wavefront_sgpr_count; 243 uint16_t workitem_vgpr_count; 244 uint16_t reserved_vgpr_first; 245 uint16_t reserved_vgpr_count; 246 uint16_t reserved_sgpr_first; 247 uint16_t reserved_sgpr_count; 248 uint16_t debug_wavefront_private_segment_offset_sgpr; 249 uint16_t debug_private_segment_buffer_sgpr; 250 amd_powertwo8_t kernarg_segment_alignment; 251 amd_powertwo8_t group_segment_alignment; 252 amd_powertwo8_t private_segment_alignment; 253 amd_powertwo8_t wavefront_size; 254 int32_t call_convention; 255 uint8_t reserved1[12]; 256 uint64_t runtime_loader_kernel_symbol; 257 amd_control_directives_t control_directives; 258 } amd_kernel_code_t; 259 260 // TODO: this struct should be completely gone once debugger designs/implements 261 // Debugger APIs. 262 typedef struct amd_runtime_loader_debug_info_s { 263 const void* elf_raw; 264 size_t elf_size; 265 const char *kernel_name; 266 const void *owning_segment; 267 } amd_runtime_loader_debug_info_t; 268 269 #endif // AMD_HSA_KERNEL_CODE_H 270