1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 // AMD Research and AMD HSA Software Development
11 //
12 // Advanced Micro Devices, Inc.
13 //
14 // www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 // - Redistributions of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimers.
25 // - Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimers in
27 // the documentation and/or other materials provided with the distribution.
28 // - Neither the names of Advanced Micro Devices, Inc,
29 // nor the names of its contributors may be used to endorse or promote
30 // products derived from this Software without specific prior written
31 // permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42
43 #include "amd_hsa_code_util.hpp"
44 #include "libelf.h"
45 #include <fstream>
46 #include <cstring>
47 #include <iomanip>
48 #include <cassert>
49 #include <algorithm>
50 #include <sstream>
51 #ifdef _WIN32
52 #include <Windows.h>
53 #include <io.h>
54 #include <process.h>
55 #else // _WIN32
56 #include <sys/types.h>
57 #include <unistd.h>
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 #include <fcntl.h>
61 #endif // _WIN32
62 #include "Brig.h"
63
64 namespace {
65 auto eq = " = ";
66
attr1(std::ostream & out)67 std::ostream& attr1(std::ostream& out)
68 {
69 out << " " << std::left << std::setw(60) << std::setfill(' ');
70 return out;
71 }
72
attr2(std::ostream & out)73 std::ostream& attr2(std::ostream& out)
74 {
75 out << " " << std::left << std::setw(58) << std::setfill(' ');
76 return out;
77 }
78 } // namespace anonymous
79
80 namespace amd {
81 namespace hsa {
82 namespace common {
83
IsAccessibleMemoryAddress(uint64_t address)84 bool IsAccessibleMemoryAddress(uint64_t address)
85 {
86 if (0 == address) {
87 return false;
88 }
89 #if defined(_WIN32) || defined(_WIN64)
90 MEMORY_BASIC_INFORMATION memory_info;
91 if (!VirtualQuery(reinterpret_cast<void*>(address), &memory_info, sizeof(memory_info))) {
92 return false;
93 }
94 int32_t is_accessible = ((memory_info.Protect & PAGE_READONLY) ||
95 (memory_info.Protect & PAGE_READWRITE) ||
96 (memory_info.Protect & PAGE_WRITECOPY) ||
97 (memory_info.Protect & PAGE_EXECUTE_READ) ||
98 (memory_info.Protect & PAGE_EXECUTE_READWRITE) ||
99 (memory_info.Protect & PAGE_EXECUTE_WRITECOPY));
100 if (memory_info.Protect & PAGE_GUARD) {
101 is_accessible = 0;
102 }
103 if (memory_info.Protect & PAGE_NOACCESS) {
104 is_accessible = 0;
105 }
106 return is_accessible > 0;
107 #else
108 int32_t random_fd = 0;
109 ssize_t bytes_written = 0;
110 if (-1 == (random_fd = open("/dev/random", O_WRONLY))) {
111 return false;
112 }
113 bytes_written = write(random_fd, (void*)address, 1);
114 if (-1 == close(random_fd)) {
115 return false;
116 }
117 return bytes_written == 1;
118 #endif // _WIN32 || _WIN64
119 }
120
121 }
122
HsaSymbolKindToString(hsa_symbol_kind_t kind)123 std::string HsaSymbolKindToString(hsa_symbol_kind_t kind)
124 {
125 switch (kind) {
126 case HSA_SYMBOL_KIND_VARIABLE: return "VARIABLE";
127 case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: return "INDIRECT_FUNCTION";
128 case HSA_SYMBOL_KIND_KERNEL: return "KERNEL";
129 default: return "UNKNOWN";
130 }
131 }
132
HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage)133 std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage)
134 {
135 switch (linkage) {
136 case HSA_SYMBOL_LINKAGE_MODULE: return "MODULE";
137 case HSA_SYMBOL_LINKAGE_PROGRAM: return "PROGRAM";
138 default: return "UNKNOWN";
139 }
140 }
141
HsaVariableAllocationToString(hsa_variable_allocation_t allocation)142 std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation)
143 {
144 switch (allocation) {
145 case HSA_VARIABLE_ALLOCATION_AGENT: return "AGENT";
146 case HSA_VARIABLE_ALLOCATION_PROGRAM: return "PROGRAM";
147 default: return "UNKNOWN";
148 }
149 }
150
HsaVariableSegmentToString(hsa_variable_segment_t segment)151 std::string HsaVariableSegmentToString(hsa_variable_segment_t segment)
152 {
153 switch (segment) {
154 case HSA_VARIABLE_SEGMENT_GLOBAL: return "GLOBAL";
155 case HSA_VARIABLE_SEGMENT_READONLY: return "READONLY";
156 default: return "UNKNOWN";
157 }
158 }
159
HsaProfileToString(hsa_profile_t profile)160 std::string HsaProfileToString(hsa_profile_t profile)
161 {
162 switch (profile) {
163 case HSA_PROFILE_BASE: return "BASE";
164 case HSA_PROFILE_FULL: return "FULL";
165 default: return "UNKNOWN";
166 }
167 }
168
HsaMachineModelToString(hsa_machine_model_t model)169 std::string HsaMachineModelToString(hsa_machine_model_t model)
170 {
171 switch (model) {
172 case HSA_MACHINE_MODEL_SMALL: return "SMALL";
173 case HSA_MACHINE_MODEL_LARGE: return "LARGE";
174 default: return "UNKNOWN";
175 }
176 }
177
HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode)178 std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode)
179 {
180 switch (mode) {
181 case HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT: return "DEFAULT";
182 case HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO: return "ZERO";
183 case HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR: return "NEAR";
184 default: return "UNKNOWN";
185 }
186 }
187
AmdMachineKindToString(amd_machine_kind16_t machine)188 std::string AmdMachineKindToString(amd_machine_kind16_t machine)
189 {
190 switch (machine) {
191 case AMD_MACHINE_KIND_UNDEFINED: return "UNDEFINED";
192 case AMD_MACHINE_KIND_AMDGPU: return "AMDGPU";
193 default: return "UNKNOWN";
194 }
195 }
196
AmdFloatRoundModeToString(amd_float_round_mode_t round_mode)197 std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode)
198 {
199 switch (round_mode) {
200 case AMD_FLOAT_ROUND_MODE_NEAREST_EVEN: return "NEAREST_EVEN";
201 case AMD_FLOAT_ROUND_MODE_PLUS_INFINITY: return "PLUS_INFINITY";
202 case AMD_FLOAT_ROUND_MODE_MINUS_INFINITY: return "MINUS_INFINITY";
203 case AMD_FLOAT_ROUND_MODE_ZERO: return "ZERO";
204 default: return "UNKNOWN";
205 }
206 }
207
AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode)208 std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode)
209 {
210 switch (denorm_mode) {
211 case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT: return "FLUSH_SOURCE_OUTPUT";
212 case AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT: return "FLUSH_OUTPUT";
213 case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE: return "FLUSH_SOURCE";
214 case AMD_FLOAT_DENORM_MODE_NO_FLUSH: return "FLUSH_NONE";
215 default: return "UNKNOWN";
216 }
217 }
218
AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id)219 std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id)
220 {
221 switch (system_vgpr_workitem_id) {
222 case AMD_SYSTEM_VGPR_WORKITEM_ID_X: return "X";
223 case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y: return "X, Y";
224 case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z: return "X, Y, Z";
225 default: return "UNKNOWN";
226 }
227 }
228
AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size)229 std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size)
230 {
231 switch (element_byte_size) {
232 case AMD_ELEMENT_BYTE_SIZE_2: return "WORD (2 bytes)";
233 case AMD_ELEMENT_BYTE_SIZE_4: return "DWORD (4 bytes)";
234 case AMD_ELEMENT_BYTE_SIZE_8: return "QWORD (8 bytes)";
235 case AMD_ELEMENT_BYTE_SIZE_16: return "16 bytes";
236 default: return "UNKNOWN";
237 }
238 }
239
AmdExceptionKindToString(amd_exception_kind16_t exceptions)240 std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions)
241 {
242 std::string e;
243 if (exceptions & AMD_EXCEPTION_KIND_INVALID_OPERATION) {
244 e += ", INVALID_OPERATON";
245 exceptions &= ~AMD_EXCEPTION_KIND_INVALID_OPERATION;
246 }
247 if (exceptions & AMD_EXCEPTION_KIND_DIVISION_BY_ZERO) {
248 e += ", DIVISION_BY_ZERO";
249 exceptions &= ~AMD_EXCEPTION_KIND_DIVISION_BY_ZERO;
250 }
251 if (exceptions & AMD_EXCEPTION_KIND_OVERFLOW) {
252 e += ", OVERFLOW";
253 exceptions &= ~AMD_EXCEPTION_KIND_OVERFLOW;
254 }
255 if (exceptions & AMD_EXCEPTION_KIND_UNDERFLOW) {
256 e += ", UNDERFLOW";
257 exceptions &= ~AMD_EXCEPTION_KIND_UNDERFLOW;
258 }
259 if (exceptions & AMD_EXCEPTION_KIND_INEXACT) {
260 e += ", INEXACT";
261 exceptions &= ~AMD_EXCEPTION_KIND_INEXACT;
262 }
263 if (exceptions) {
264 e += ", UNKNOWN";
265 }
266 if (!e.empty()) {
267 e = "[" + e.erase(0, 2) + "]";
268 }
269 return e;
270 }
271
AmdPowerTwoToString(amd_powertwo8_t p)272 std::string AmdPowerTwoToString(amd_powertwo8_t p)
273 {
274 return std::to_string(1 << (unsigned) p);
275 }
276
AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec)277 amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec)
278 {
279 switch (sec) {
280 case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
281 case AMDGPU_HSA_DATA_GLOBAL_PROGRAM:
282 case AMDGPU_HSA_BSS_GLOBAL_PROGRAM:
283 return AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM;
284 case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
285 case AMDGPU_HSA_DATA_GLOBAL_AGENT:
286 case AMDGPU_HSA_BSS_GLOBAL_AGENT:
287 return AMDGPU_HSA_SEGMENT_GLOBAL_AGENT;
288 case AMDGPU_HSA_RODATA_READONLY_AGENT:
289 case AMDGPU_HSA_DATA_READONLY_AGENT:
290 case AMDGPU_HSA_BSS_READONLY_AGENT:
291 return AMDGPU_HSA_SEGMENT_READONLY_AGENT;
292 default:
293 assert(false); return AMDGPU_HSA_SEGMENT_LAST;
294 }
295 }
296
IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec)297 bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec)
298 {
299 switch (sec) {
300 case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
301 case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
302 case AMDGPU_HSA_RODATA_READONLY_AGENT:
303 default:
304 return false;
305 }
306 }
307
AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg)308 std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg)
309 {
310 switch (seg) {
311 case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: return "GLOBAL_PROGRAM";
312 case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: return "GLOBAL_AGENT";
313 case AMDGPU_HSA_SEGMENT_READONLY_AGENT: return "READONLY_AGENT";
314 case AMDGPU_HSA_SEGMENT_CODE_AGENT: return "CODE_AGENT";
315 default: return "UNKNOWN";
316 }
317 }
318
AmdPTLoadToString(uint64_t type)319 std::string AmdPTLoadToString(uint64_t type)
320 {
321 if (PT_LOOS <= type && type < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST) {
322 return AmdHsaElfSegmentToString((amdgpu_hsa_elf_segment_t) (type - PT_LOOS));
323 } else {
324 return "UNKNOWN (" + std::to_string(type) + ")";
325 }
326 }
327
PrintAmdKernelCode(std::ostream & out,const amd_kernel_code_t * akc)328 void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc)
329 {
330 uint32_t is_debug_enabled = AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED);
331
332 out << attr1 << "amd_kernel_code_version_major" << eq
333 << akc->amd_kernel_code_version_major
334 << std::endl;
335 out << attr1 << "amd_kernel_code_version_minor" << eq
336 << akc->amd_kernel_code_version_minor
337 << std::endl;
338 out << attr1 << "amd_machine_kind" << eq
339 << AmdMachineKindToString(akc->amd_machine_kind)
340 << std::endl;
341 out << attr1 << "amd_machine_version_major" << eq
342 << (uint32_t)akc->amd_machine_version_major
343 << std::endl;
344 out << attr1 << "amd_machine_version_minor" << eq
345 << (uint32_t)akc->amd_machine_version_minor
346 << std::endl;
347 out << attr1 << "amd_machine_version_stepping" << eq
348 << (uint32_t)akc->amd_machine_version_stepping
349 << std::endl;
350 out << attr1 << "kernel_code_entry_byte_offset" << eq
351 << akc->kernel_code_entry_byte_offset
352 << std::endl;
353 if (akc->kernel_code_prefetch_byte_offset) {
354 out << attr1 << "kernel_code_prefetch_byte_offset" << eq
355 << akc->kernel_code_prefetch_byte_offset
356 << std::endl;
357 }
358 if (akc->kernel_code_prefetch_byte_size) {
359 out << attr1 << "kernel_code_prefetch_byte_size" << eq
360 << akc->kernel_code_prefetch_byte_size
361 << std::endl;
362 }
363 out << attr1 << "max_scratch_backing_memory_byte_size" << eq
364 << akc->max_scratch_backing_memory_byte_size
365 << std::endl;
366 PrintAmdComputePgmRsrcOne(out, akc->compute_pgm_rsrc1);
367 PrintAmdComputePgmRsrcTwo(out, akc->compute_pgm_rsrc2);
368 PrintAmdKernelCodeProperties(out, akc->kernel_code_properties);
369 if (akc->workitem_private_segment_byte_size) {
370 out << attr1 << "workitem_private_segment_byte_size" << eq
371 << akc->workitem_private_segment_byte_size
372 << std::endl;
373 }
374 if (akc->workgroup_group_segment_byte_size) {
375 out << attr1 << "workgroup_group_segment_byte_size" << eq
376 << akc->workgroup_group_segment_byte_size
377 << std::endl;
378 }
379 if (akc->gds_segment_byte_size) {
380 out << attr1 << "gds_segment_byte_size" << eq
381 << akc->gds_segment_byte_size
382 << std::endl;
383 }
384 if (akc->kernarg_segment_byte_size) {
385 out << attr1 << "kernarg_segment_byte_size" << eq
386 << akc->kernarg_segment_byte_size
387 << std::endl;
388 }
389 if (akc->workgroup_fbarrier_count) {
390 out << attr1 << "workgroup_fbarrier_count" << eq
391 << akc->workgroup_fbarrier_count
392 << std::endl;
393 }
394 out << attr1 << "wavefront_sgpr_count" << eq
395 << (uint32_t)akc->wavefront_sgpr_count
396 << std::endl;
397 out << attr1 << "workitem_vgpr_count" << eq
398 << (uint32_t)akc->workitem_vgpr_count
399 << std::endl;
400 if (akc->reserved_vgpr_count > 0) {
401 out << attr1 << "reserved_vgpr_first" << eq
402 << (uint32_t)akc->reserved_vgpr_first
403 << std::endl;
404 out << attr1 << "reserved_vgpr_count" << eq
405 << (uint32_t)akc->reserved_vgpr_count
406 << std::endl;
407 }
408 if (akc->reserved_sgpr_count > 0) {
409 out << attr1 << "reserved_sgpr_first" << eq
410 << (uint32_t)akc->reserved_sgpr_first
411 << std::endl;
412 out << attr1 << "reserved_sgpr_count" << eq
413 << (uint32_t)akc->reserved_sgpr_count
414 << std::endl;
415 }
416 if (is_debug_enabled && (akc->debug_wavefront_private_segment_offset_sgpr != uint16_t(-1))) {
417 out << attr1 << "debug_wavefront_private_segment_offset_sgpr" << eq
418 << (uint32_t)akc->debug_wavefront_private_segment_offset_sgpr
419 << std::endl;
420 }
421 if (is_debug_enabled && (akc->debug_private_segment_buffer_sgpr != uint16_t(-1))) {
422 out << attr1 << "debug_private_segment_buffer_sgpr" << eq
423 << (uint32_t)akc->debug_private_segment_buffer_sgpr
424 << ":"
425 << (uint32_t)(akc->debug_private_segment_buffer_sgpr + 3)
426 << std::endl;
427 }
428 if (akc->kernarg_segment_alignment) {
429 out << attr1 << "kernarg_segment_alignment" << eq
430 << AmdPowerTwoToString(akc->kernarg_segment_alignment)
431 << " (" << (uint32_t) akc->kernarg_segment_alignment << ")"
432 << std::endl;
433 }
434 if (akc->group_segment_alignment) {
435 out << attr1 << "group_segment_alignment" << eq
436 << AmdPowerTwoToString(akc->group_segment_alignment)
437 << " (" << (uint32_t) akc->group_segment_alignment << ")"
438 << std::endl;
439 }
440 if (akc->private_segment_alignment) {
441 out << attr1 << "private_segment_alignment" << eq
442 << AmdPowerTwoToString(akc->private_segment_alignment)
443 << " (" << (uint32_t) akc->private_segment_alignment << ")"
444 << std::endl;
445 }
446 out << attr1 << "wavefront_size" << eq
447 << AmdPowerTwoToString(akc->wavefront_size)
448 << " (" << (uint32_t) akc->wavefront_size << ")"
449 << std::endl;
450 PrintAmdControlDirectives(out, akc->control_directives);
451 }
452
PrintAmdComputePgmRsrcOne(std::ostream & out,amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1)453 void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1)
454 {
455 out << " COMPUTE_PGM_RSRC1 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc1 << "):" << std::endl;
456 out << std::dec;
457
458 uint32_t granulated_workitem_vgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT);
459 out << attr2 << "granulated_workitem_vgpr_count" << eq
460 << granulated_workitem_vgpr_count
461 << std::endl;
462 uint32_t granulated_wavefront_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);
463 out << attr2 << "granulated_wavefront_sgpr_count" << eq
464 << granulated_wavefront_sgpr_count
465 << std::endl;
466 uint32_t priority = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY);
467 out << attr2 << "priority" << eq
468 << priority
469 << std::endl;
470 uint32_t float_round_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32);
471 out << attr2 << "float_round_mode_32" << eq
472 << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_32)
473 << std::endl;
474 uint32_t float_round_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64);
475 out << attr2 << "float_round_mode_16_64" << eq
476 << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_16_64)
477 << std::endl;
478 uint32_t float_denorm_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32);
479 out << attr2 << "float_denorm_mode_32" << eq
480 << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_32)
481 << std::endl;
482 uint32_t float_denorm_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64);
483 out << attr2 << "float_denorm_mode_16_64" << eq
484 << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_16_64)
485 << std::endl;
486 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIV)) {
487 out << attr2 << "priv" << eq << "TRUE"
488 << std::endl;
489 }
490 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP)) {
491 out << attr2 << "enable_dx10_clamp" << eq << "TRUE"
492 << std::endl;
493 }
494 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE)) {
495 out << attr2 << "debug_mode" << eq << "TRUE"
496 << std::endl;
497 }
498 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE)) {
499 out << attr2 << "enable_ieee_mode" << eq << "TRUE"
500 << std::endl;
501 }
502 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_BULKY)) {
503 out << attr2 << "bulky" << eq << "TRUE"
504 << std::endl;
505 }
506 if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER)) {
507 out << attr2 << "cdbg_user" << eq << "TRUE"
508 << std::endl;
509 }
510 }
511
PrintAmdComputePgmRsrcTwo(std::ostream & out,amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2)512 void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2)
513 {
514 out << " COMPUTE_PGM_RSRC2 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc2 << "):" << std::endl;
515 out << std::dec;
516
517 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET)) {
518 out << attr2 << "enable_sgpr_private_segment_wave_byte_offset" << eq << "TRUE"
519 << std::endl;
520 }
521 uint32_t user_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT);
522 out << attr2 << "user_sgpr_count" << eq
523 << user_sgpr_count
524 << std::endl;
525 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER)) {
526 out << attr2 << "enable_trap_handler" << eq << "TRUE"
527 << std::endl;
528 }
529 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X)) {
530 out << attr2 << "enable_sgpr_workgroup_id_x" << eq << "TRUE"
531 << std::endl;
532 }
533 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y)) {
534 out << attr2 << "enable_sgpr_workgroup_id_y" << eq << "TRUE"
535 << std::endl;
536 }
537 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z)) {
538 out << attr2 << "enable_sgpr_workgroup_id_z" << eq << "TRUE"
539 << std::endl;
540 }
541 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO)) {
542 out << attr2 << "enable_sgpr_workgroup_info" << eq << "TRUE"
543 << std::endl;
544 }
545 uint32_t enable_vgpr_workitem_id = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID);
546 out << attr2 << "enable_vgpr_workitem_id" << eq
547 << AmdSystemVgprWorkitemIdToString((amd_system_vgpr_workitem_id_t)enable_vgpr_workitem_id)
548 << std::endl;
549 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH)) {
550 out << attr2 << "enable_exception_address_watch" << eq << "TRUE"
551 << std::endl;
552 }
553 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION)) {
554 out << attr2 << "enable_exception_memory_violation" << eq << "TRUE"
555 << std::endl;
556 }
557 uint32_t granulated_lds_size = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE);
558 out << attr2 << "granulated_lds_size" << eq
559 << granulated_lds_size
560 << std::endl;
561 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION)) {
562 out << attr2 << "enable_exception_ieee_754_fp_invalid_operation" << eq << "TRUE"
563 << std::endl;
564 }
565 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE)) {
566 out << attr2 << "enable_exception_fp_denormal_source" << eq << "TRUE"
567 << std::endl;
568 }
569 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO)) {
570 out << attr2 << "enable_exception_ieee_754_fp_division_by_zero" << eq << "TRUE"
571 << std::endl;
572 }
573 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW)) {
574 out << attr2 << "enable_exception_ieee_754_fp_overflow" << eq << "TRUE"
575 << std::endl;
576 }
577 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW)) {
578 out << attr2 << "enable_exception_ieee_754_fp_underflow" << eq << "TRUE"
579 << std::endl;
580 }
581 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT)) {
582 out << attr2 << "enable_exception_ieee_754_fp_inexact" << eq << "TRUE"
583 << std::endl;
584 }
585 if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO)) {
586 out << attr2 << "enable_exception_int_division_by_zero" << eq << "TRUE"
587 << std::endl;
588 }
589 }
590
PrintAmdKernelCodeProperties(std::ostream & out,amd_kernel_code_properties32_t kernel_code_properties)591 void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties)
592 {
593 out << " KERNEL_CODE_PROPERTIES (0x" << std::hex << std::setw(8) << std::setfill('0') << kernel_code_properties << "):" << std::endl;
594 out << std::dec;
595
596 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER)) {
597 out << attr2 << "enable_sgpr_private_segment_buffer" << eq << "TRUE"
598 << std::endl;
599 }
600 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR)) {
601 out << attr2 << "enable_sgpr_dispatch_ptr" << eq << "TRUE"
602 << std::endl;
603 }
604 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
605 out << attr2 << "enable_sgpr_queue_ptr" << eq << "TRUE"
606 << std::endl;
607 }
608 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR)) {
609 out << attr2 << "enable_sgpr_kernarg_segment_ptr" << eq << "TRUE"
610 << std::endl;
611 }
612 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID)) {
613 out << attr2 << "enable_sgpr_dispatch_id" << eq << "TRUE"
614 << std::endl;
615 }
616 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT)) {
617 out << attr2 << "enable_sgpr_flat_scratch_init" << eq << "TRUE"
618 << std::endl;
619 }
620 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)) {
621 out << attr2 << "enable_sgpr_private_segment_size" << eq << "TRUE"
622 << std::endl;
623 }
624 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X)) {
625 out << attr2 << "enable_sgpr_grid_workgroup_count_x" << eq << "TRUE"
626 << std::endl;
627 }
628 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y)) {
629 out << attr2 << "enable_sgpr_grid_workgroup_count_y" << eq << "TRUE"
630 << std::endl;
631 }
632 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z)) {
633 out << attr2 << "enable_sgpr_grid_workgroup_count_z" << eq << "TRUE"
634 << std::endl;
635 }
636 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS)) {
637 out << attr2 << "enable_ordered_append_gds" << eq << "TRUE"
638 << std::endl;
639 }
640 uint32_t private_element_size = AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE);
641 out << attr2 << "private_element_size" << eq
642 << AmdElementByteSizeToString((amd_element_byte_size_t)private_element_size)
643 << std::endl;
644 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_PTR64)) {
645 out << attr2 << "is_ptr64" << eq << "TRUE"
646 << std::endl;
647 }
648 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK)) {
649 out << attr2 << "is_dynamic_callstack" << eq << "TRUE"
650 << std::endl;
651 }
652 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED)) {
653 out << attr2 << "is_debug_enabled" << eq << "TRUE"
654 << std::endl;
655 }
656 if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED)) {
657 out << attr2 << "is_xnack_enabled" << eq << "TRUE"
658 << std::endl;
659 }
660 }
661
PrintAmdControlDirectives(std::ostream & out,const amd_control_directives_t & control_directives)662 void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives)
663 {
664 if (!control_directives.enabled_control_directives) {
665 return;
666 }
667
668 out << " CONTROL_DIRECTIVES:" << std::endl;
669
670 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS) {
671 out << attr2 << "enable_break_exceptions" << eq
672 << AmdExceptionKindToString(control_directives.enable_break_exceptions).c_str()
673 << std::endl;
674 }
675 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS) {
676 out << attr2 << "enable_detect_exceptions" << eq
677 << AmdExceptionKindToString(control_directives.enable_detect_exceptions).c_str()
678 << std::endl;
679 }
680 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE) {
681 out << attr2 << "max_dynamic_group_size" << eq
682 << control_directives.max_dynamic_group_size
683 << std::endl;
684 }
685 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE) {
686 out << attr2 << "max_flat_grid_size" << eq
687 << control_directives.max_flat_grid_size
688 << std::endl;
689 }
690 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE) {
691 out << attr2 << "max_flat_workgroup_size" << eq
692 << control_directives.max_flat_workgroup_size
693 << std::endl;
694 }
695 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM) {
696 out << attr2 << "required_dim" << eq
697 << (uint32_t)control_directives.required_dim
698 << std::endl;
699 }
700 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE) {
701 out << attr2 << "required_grid_size" << eq
702 << "("
703 << control_directives.required_grid_size[0]
704 << ", "
705 << control_directives.required_grid_size[1]
706 << ", "
707 << control_directives.required_grid_size[2]
708 << ")"
709 << std::endl;
710 }
711 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE) {
712 out << attr2 << "required_workgroup_size" << eq
713 << "("
714 << control_directives.required_workgroup_size[0]
715 << ", "
716 << control_directives.required_workgroup_size[1]
717 << ", "
718 << control_directives.required_workgroup_size[2]
719 << ")"
720 << std::endl;
721 }
722 if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS) {
723 out << attr2 << "require_no_partial_workgroups" << eq << "TRUE"
724 << std::endl;
725 }
726 }
727
728 namespace code_options {
729
space(std::ostream & out)730 std::ostream& space(std::ostream& out)
731 {
732 if (out.tellp()) { out << " "; }
733 return out;
734 }
735
operator <<(std::ostream & out,const control_directive & d)736 std::ostream& operator<<(std::ostream& out, const control_directive& d)
737 {
738 out << space <<
739 "-hsa_control_directive:" << d.name << "=";
740 return out;
741 }
742
BrigExceptionString(BrigExceptions32_t e)743 const char *BrigExceptionString(BrigExceptions32_t e)
744 {
745 switch (e) {
746 case BRIG_EXCEPTIONS_INVALID_OPERATION: return "INVALID_OPERATION";
747 case BRIG_EXCEPTIONS_DIVIDE_BY_ZERO: return "DIVIDE_BY_ZERO";
748 case BRIG_EXCEPTIONS_OVERFLOW: return "OVERFLOW";
749 case BRIG_EXCEPTIONS_INEXACT: return "INEXACT";
750 default:
751 assert(false); return "<unknown_BRIG_exception>";
752 }
753 }
754
operator <<(std::ostream & out,const exceptions_mask & e)755 std::ostream& operator<<(std::ostream& out, const exceptions_mask& e)
756 {
757 bool first = true;
758 for (BrigExceptions32_t be = BRIG_EXCEPTIONS_INVALID_OPERATION; be < BRIG_EXCEPTIONS_FIRST_USER_DEFINED; ++be) {
759 if (e.mask & be) {
760 if (first) { first = false; } else { out << ","; }
761 out << BrigExceptionString(be);
762 }
763 }
764 return out;
765 }
766
operator <<(std::ostream & out,const control_directives & cd)767 std::ostream& operator<<(std::ostream& out, const control_directives& cd)
768 {
769 const hsa_ext_control_directives_t& d = cd.d;
770 uint64_t mask = d.control_directives_mask;
771 if (!mask) { return out; }
772
773 if (mask & BRIG_CONTROL_ENABLEBREAKEXCEPTIONS) {
774 out <<
775 control_directive("ENABLEBREAKEXCEPTIONS") <<
776 exceptions_mask(d.break_exceptions_mask);
777 }
778 if (mask & BRIG_CONTROL_ENABLEDETECTEXCEPTIONS) {
779 out <<
780 control_directive("ENABLEDETECTEXCEPTIONS") <<
781 exceptions_mask(d.detect_exceptions_mask);
782 }
783 if (mask & BRIG_CONTROL_MAXDYNAMICGROUPSIZE) {
784 out <<
785 control_directive("MAXDYNAMICGROUPSIZE") <<
786 d.max_dynamic_group_size;
787 }
788 if (mask & BRIG_CONTROL_MAXFLATGRIDSIZE) {
789 out <<
790 control_directive("MAXFLATGRIDSIZE") <<
791 d.max_flat_grid_size;
792 }
793 if (mask & BRIG_CONTROL_MAXFLATWORKGROUPSIZE) {
794 out <<
795 control_directive("MAXFLATWORKGROUPSIZE") <<
796 d.max_flat_workgroup_size;
797 }
798 if (mask & BRIG_CONTROL_REQUIREDDIM) {
799 out <<
800 control_directive("REQUIREDDIM") <<
801 d.required_dim;
802 }
803 if (mask & BRIG_CONTROL_REQUIREDGRIDSIZE) {
804 out <<
805 control_directive("REQUIREDGRIDSIZE") <<
806 d.required_grid_size[0] << "," <<
807 d.required_grid_size[1] << "," <<
808 d.required_grid_size[2];
809 }
810 if (mask & BRIG_CONTROL_REQUIREDWORKGROUPSIZE) {
811 out <<
812 control_directive("REQUIREDWORKGROUPSIZE") <<
813 d.required_workgroup_size.x << "," <<
814 d.required_workgroup_size.y << "," <<
815 d.required_workgroup_size.z;
816 }
817 return out;
818 }
819 }
820
hsaerr2str(hsa_status_t status)821 const char* hsaerr2str(hsa_status_t status) {
822 switch ((unsigned) status) {
823 case HSA_STATUS_SUCCESS:
824 return
825 "HSA_STATUS_SUCCESS: The function has been executed successfully.";
826 case HSA_STATUS_INFO_BREAK:
827 return
828 "HSA_STATUS_INFO_BREAK: A traversal over a list of "
829 "elements has been interrupted by the application before "
830 "completing.";
831 case HSA_STATUS_ERROR:
832 return "HSA_STATUS_ERROR: A generic error has occurred.";
833 case HSA_STATUS_ERROR_INVALID_ARGUMENT:
834 return
835 "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual "
836 "arguments does not meet a precondition stated in the "
837 "documentation of the corresponding formal argument.";
838 case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION:
839 return
840 "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested "
841 "queue creation is not valid.";
842 case HSA_STATUS_ERROR_INVALID_ALLOCATION:
843 return
844 "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested "
845 "allocation is not valid.";
846 case HSA_STATUS_ERROR_INVALID_AGENT:
847 return
848 "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid.";
849 case HSA_STATUS_ERROR_INVALID_REGION:
850 return
851 "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid.";
852 case HSA_STATUS_ERROR_INVALID_SIGNAL:
853 return
854 "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid.";
855 case HSA_STATUS_ERROR_INVALID_QUEUE:
856 return
857 "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid.";
858 case HSA_STATUS_ERROR_OUT_OF_RESOURCES:
859 return
860 "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to "
861 "allocate the necessary resources. This error may also "
862 "occur when the core runtime library needs to spawn "
863 "threads or create internal OS-specific events.";
864 case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT:
865 return
866 "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet "
867 "is malformed.";
868 case HSA_STATUS_ERROR_RESOURCE_FREE:
869 return
870 "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been "
871 "detected while releasing a resource.";
872 case HSA_STATUS_ERROR_NOT_INITIALIZED:
873 return
874 "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than "
875 "hsa_init has been invoked while the reference count of "
876 "the HSA runtime is zero.";
877 case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW:
878 return
879 "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum "
880 "reference count for the object has been reached.";
881 case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS:
882 return
883 "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to "
884 "a functions are not compatible.";
885 case HSA_STATUS_ERROR_INVALID_INDEX:
886 return "The index is invalid.";
887 case HSA_STATUS_ERROR_INVALID_ISA:
888 return "The instruction set architecture is invalid.";
889 case HSA_STATUS_ERROR_INVALID_CODE_OBJECT:
890 return "The code object is invalid.";
891 case HSA_STATUS_ERROR_INVALID_EXECUTABLE:
892 return "The executable is invalid.";
893 case HSA_STATUS_ERROR_FROZEN_EXECUTABLE:
894 return "The executable is frozen.";
895 case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
896 return "There is no symbol with the given name.";
897 case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED:
898 return "The variable is already defined.";
899 case HSA_STATUS_ERROR_VARIABLE_UNDEFINED:
900 return "The variable is undefined.";
901 case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM:
902 return
903 "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program";
904 case HSA_EXT_STATUS_ERROR_INVALID_MODULE:
905 return "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module";
906 case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE:
907 return
908 "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module";
909 case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED:
910 return
911 "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already "
912 "included";
913 case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH:
914 return
915 "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch";
916 case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED:
917 return
918 "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed";
919 case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH:
920 return
921 "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch";
922 default:
923 return
924 "Unknown HSA status";
925 }
926 }
927
ReadFileIntoBuffer(const std::string & filename,std::vector<char> & buffer)928 bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer)
929 {
930 std::ifstream file(filename, std::ios::binary);
931 if (!file) { return false; }
932 file.seekg(0, std::ios::end);
933 std::streamsize size = file.tellg();
934 file.seekg(0, std::ios::beg);
935
936 buffer.resize((size_t) size);
937 if (!file.read(buffer.data(), size)) { return false; }
938 return true;
939 }
940
941 #ifndef _WIN32
942 #define _tempnam tempnam
943 #define _close close
944 #define _getpid getpid
945 #define _open open
946 #endif // _WIN32
947
OpenTempFile(const char * prefix)948 int OpenTempFile(const char* prefix)
949 {
950 unsigned c = 0;
951 std::string tname = prefix;
952 tname += "_";
953 tname += std::to_string(_getpid());
954 tname += "_";
955 while (c++ < 20) { // Loop because several threads can generate same filename.
956 #ifdef _WIN32
957 char dir[MAX_PATH+1];
958 if (!GetTempPath(sizeof(dir), dir)) { return -1; }
959 #else // _WIN32
960 char *dir = NULL;
961 #endif // _WIN32
962 char *name = _tempnam(dir, tname.c_str());
963 if (!name) { return -1; }
964 #ifdef _WIN32
965 HANDLE h = CreateFile(
966 name,
967 GENERIC_READ | GENERIC_WRITE,
968 0, // No sharing
969 NULL,
970 CREATE_NEW,
971 FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_DELETE_ON_CLOSE,
972 NULL);
973 free(name);
974 if (h == INVALID_HANDLE_VALUE) { continue; }
975 return _open_osfhandle((intptr_t)h, 0);
976 #else // _WIN32
977 int d = _open(name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
978 if (d < 0) { free(name); continue; }
979 if (unlink(name) < 0) { free(name); _close(d); return -1; }
980 free(name);
981 return d;
982 #endif // _WIN32
983 }
984 return -1;
985 }
986
CloseTempFile(int fd)987 void CloseTempFile(int fd)
988 {
989 _close(fd);
990 }
991
CommentTopCallBack(void * ctx,int type)992 const char * CommentTopCallBack(void *ctx, int type) {
993 static const char* amd_kernel_code_t_begin = "amd_kernel_code_t begin";
994 static const char* amd_kernel_code_t_end = "amd_kernel_code_t end";
995 static const char* isa_begin = "isa begin";
996 switch(type) {
997 case COMMENT_AMD_KERNEL_CODE_T_BEGIN:
998 return amd_kernel_code_t_begin;
999 case COMMENT_AMD_KERNEL_CODE_T_END:
1000 return amd_kernel_code_t_end;
1001 case COMMENT_KERNEL_ISA_BEGIN:
1002 return isa_begin;
1003 default:
1004 assert(false);
1005 return "";
1006 }
1007 }
CommentRightCallBack(void * ctx,int type)1008 const char * CommentRightCallBack(void *ctx, int type) {
1009 return nullptr;
1010 }
1011
ParseInstructionOffset(const std::string & instruction)1012 uint32_t ParseInstructionOffset(const std::string& instruction) {
1013 // instruction format: opcode op1, op2 ... // offset: binopcode
1014 std::string::size_type n = instruction.find("//");
1015 assert(n != std::string::npos);
1016 std::string comment = instruction.substr(n);
1017 n = comment.find(':');
1018 assert(n != std::string::npos);
1019 comment.erase(n);
1020 assert(comment.size() > 3);
1021 comment.erase(0, 3);
1022 return strtoul(comment.c_str(), nullptr, 16);
1023 }
1024
IsNotSpace(char c)1025 bool IsNotSpace(char c) {
1026 return !isspace(static_cast<int>(c));
1027 }
1028
ltrim(std::string & str)1029 void ltrim(std::string &str) {
1030 str.erase(str.begin(), std::find_if(str.begin(), str.end(), IsNotSpace));
1031 }
1032
DumpFileName(const std::string & dir,const char * prefix,const char * ext,unsigned n,unsigned i)1033 std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i)
1034 {
1035 std::ostringstream ss;
1036 if (!dir.empty()) {
1037 ss << dir << "/";
1038 }
1039 ss <<
1040 prefix <<
1041 std::setfill('0') << std::setw(3) << n;
1042 if (i) { ss << "_" << i; }
1043 if (ext) { ss << "." << ext; }
1044 return ss.str();
1045 }
1046
1047
1048 }
1049 }
1050