1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 #include "amd_hsa_code_util.hpp"
44 #include "libelf.h"
45 #include <fstream>
46 #include <cstring>
47 #include <iomanip>
48 #include <cassert>
49 #include <algorithm>
50 #include <sstream>
51 #ifdef _WIN32
52 #include <Windows.h>
53 #include <io.h>
54 #include <process.h>
55 #else // _WIN32
56 #include <sys/types.h>
57 #include <unistd.h>
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 #include <fcntl.h>
61 #endif // _WIN32
62 #include "Brig.h"
63 
64 namespace {
65 auto eq = " = ";
66 
attr1(std::ostream & out)67 std::ostream& attr1(std::ostream& out)
68 {
69   out << "  " << std::left << std::setw(60) << std::setfill(' ');
70   return out;
71 }
72 
attr2(std::ostream & out)73 std::ostream& attr2(std::ostream& out)
74 {
75   out << "    " << std::left << std::setw(58) << std::setfill(' ');
76   return out;
77 }
78 } // namespace anonymous
79 
80 namespace amd {
81 namespace hsa {
82 namespace common {
83 
IsAccessibleMemoryAddress(uint64_t address)84 bool IsAccessibleMemoryAddress(uint64_t address)
85 {
86   if (0 == address) {
87     return false;
88   }
89 #if defined(_WIN32) || defined(_WIN64)
90     MEMORY_BASIC_INFORMATION memory_info;
91     if (!VirtualQuery(reinterpret_cast<void*>(address), &memory_info, sizeof(memory_info))) {
92       return false;
93     }
94     int32_t is_accessible = ((memory_info.Protect & PAGE_READONLY) ||
95                              (memory_info.Protect & PAGE_READWRITE) ||
96                              (memory_info.Protect & PAGE_WRITECOPY) ||
97                              (memory_info.Protect & PAGE_EXECUTE_READ) ||
98                              (memory_info.Protect & PAGE_EXECUTE_READWRITE) ||
99                              (memory_info.Protect & PAGE_EXECUTE_WRITECOPY));
100     if (memory_info.Protect & PAGE_GUARD) {
101       is_accessible = 0;
102     }
103     if (memory_info.Protect & PAGE_NOACCESS) {
104       is_accessible = 0;
105     }
106     return is_accessible > 0;
107 #else
108   int32_t random_fd = 0;
109   ssize_t bytes_written = 0;
110   if (-1 == (random_fd = open("/dev/random", O_WRONLY))) {
111     return false;
112   }
113   bytes_written = write(random_fd, (void*)address, 1);
114   if (-1 == close(random_fd)) {
115     return false;
116   }
117   return bytes_written == 1;
118 #endif // _WIN32 || _WIN64
119 }
120 
121 }
122 
HsaSymbolKindToString(hsa_symbol_kind_t kind)123 std::string HsaSymbolKindToString(hsa_symbol_kind_t kind)
124 {
125   switch (kind) {
126   case HSA_SYMBOL_KIND_VARIABLE: return "VARIABLE";
127   case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: return "INDIRECT_FUNCTION";
128   case HSA_SYMBOL_KIND_KERNEL: return "KERNEL";
129   default: return "UNKNOWN";
130   }
131 }
132 
HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage)133 std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage)
134 {
135   switch (linkage) {
136   case HSA_SYMBOL_LINKAGE_MODULE: return "MODULE";
137   case HSA_SYMBOL_LINKAGE_PROGRAM: return "PROGRAM";
138   default: return "UNKNOWN";
139   }
140 }
141 
HsaVariableAllocationToString(hsa_variable_allocation_t allocation)142 std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation)
143 {
144   switch (allocation) {
145   case HSA_VARIABLE_ALLOCATION_AGENT: return "AGENT";
146   case HSA_VARIABLE_ALLOCATION_PROGRAM: return "PROGRAM";
147   default: return "UNKNOWN";
148   }
149 }
150 
HsaVariableSegmentToString(hsa_variable_segment_t segment)151 std::string HsaVariableSegmentToString(hsa_variable_segment_t segment)
152 {
153   switch (segment) {
154   case HSA_VARIABLE_SEGMENT_GLOBAL: return "GLOBAL";
155   case HSA_VARIABLE_SEGMENT_READONLY: return "READONLY";
156   default: return "UNKNOWN";
157   }
158 }
159 
HsaProfileToString(hsa_profile_t profile)160 std::string HsaProfileToString(hsa_profile_t profile)
161 {
162   switch (profile) {
163   case HSA_PROFILE_BASE: return "BASE";
164   case HSA_PROFILE_FULL: return "FULL";
165   default: return "UNKNOWN";
166   }
167 }
168 
HsaMachineModelToString(hsa_machine_model_t model)169 std::string HsaMachineModelToString(hsa_machine_model_t model)
170 {
171   switch (model) {
172   case HSA_MACHINE_MODEL_SMALL: return "SMALL";
173   case HSA_MACHINE_MODEL_LARGE: return "LARGE";
174   default: return "UNKNOWN";
175   }
176 }
177 
HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode)178 std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode)
179 {
180   switch (mode) {
181   case HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT: return "DEFAULT";
182   case HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO: return "ZERO";
183   case HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR: return "NEAR";
184   default: return "UNKNOWN";
185   }
186 }
187 
AmdMachineKindToString(amd_machine_kind16_t machine)188 std::string AmdMachineKindToString(amd_machine_kind16_t machine)
189 {
190   switch (machine) {
191   case AMD_MACHINE_KIND_UNDEFINED: return "UNDEFINED";
192   case AMD_MACHINE_KIND_AMDGPU: return "AMDGPU";
193   default: return "UNKNOWN";
194   }
195 }
196 
AmdFloatRoundModeToString(amd_float_round_mode_t round_mode)197 std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode)
198 {
199   switch (round_mode) {
200   case AMD_FLOAT_ROUND_MODE_NEAREST_EVEN: return "NEAREST_EVEN";
201   case AMD_FLOAT_ROUND_MODE_PLUS_INFINITY: return "PLUS_INFINITY";
202   case AMD_FLOAT_ROUND_MODE_MINUS_INFINITY: return "MINUS_INFINITY";
203   case AMD_FLOAT_ROUND_MODE_ZERO: return "ZERO";
204   default: return "UNKNOWN";
205   }
206 }
207 
AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode)208 std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode)
209 {
210   switch (denorm_mode) {
211   case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT: return "FLUSH_SOURCE_OUTPUT";
212   case AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT: return "FLUSH_OUTPUT";
213   case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE: return "FLUSH_SOURCE";
214   case AMD_FLOAT_DENORM_MODE_NO_FLUSH: return "FLUSH_NONE";
215   default: return "UNKNOWN";
216   }
217 }
218 
AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id)219 std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id)
220 {
221   switch (system_vgpr_workitem_id) {
222   case AMD_SYSTEM_VGPR_WORKITEM_ID_X: return "X";
223   case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y: return "X, Y";
224   case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z: return "X, Y, Z";
225   default: return "UNKNOWN";
226   }
227 }
228 
AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size)229 std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size)
230 {
231   switch (element_byte_size) {
232   case AMD_ELEMENT_BYTE_SIZE_2: return "WORD (2 bytes)";
233   case AMD_ELEMENT_BYTE_SIZE_4: return "DWORD (4 bytes)";
234   case AMD_ELEMENT_BYTE_SIZE_8: return "QWORD (8 bytes)";
235   case AMD_ELEMENT_BYTE_SIZE_16: return "16 bytes";
236   default: return "UNKNOWN";
237   }
238 }
239 
AmdExceptionKindToString(amd_exception_kind16_t exceptions)240 std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions)
241 {
242   std::string e;
243   if (exceptions & AMD_EXCEPTION_KIND_INVALID_OPERATION) {
244     e += ", INVALID_OPERATON";
245     exceptions &= ~AMD_EXCEPTION_KIND_INVALID_OPERATION;
246   }
247   if (exceptions & AMD_EXCEPTION_KIND_DIVISION_BY_ZERO) {
248     e += ", DIVISION_BY_ZERO";
249     exceptions &= ~AMD_EXCEPTION_KIND_DIVISION_BY_ZERO;
250   }
251   if (exceptions & AMD_EXCEPTION_KIND_OVERFLOW) {
252     e += ", OVERFLOW";
253     exceptions &= ~AMD_EXCEPTION_KIND_OVERFLOW;
254   }
255   if (exceptions & AMD_EXCEPTION_KIND_UNDERFLOW) {
256     e += ", UNDERFLOW";
257     exceptions &= ~AMD_EXCEPTION_KIND_UNDERFLOW;
258   }
259   if (exceptions & AMD_EXCEPTION_KIND_INEXACT) {
260     e += ", INEXACT";
261     exceptions &= ~AMD_EXCEPTION_KIND_INEXACT;
262   }
263   if (exceptions) {
264     e += ", UNKNOWN";
265   }
266   if (!e.empty()) {
267     e = "[" + e.erase(0, 2) + "]";
268   }
269   return e;
270 }
271 
AmdPowerTwoToString(amd_powertwo8_t p)272 std::string AmdPowerTwoToString(amd_powertwo8_t p)
273 {
274   return std::to_string(1 << (unsigned) p);
275 }
276 
AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec)277 amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec)
278 {
279   switch (sec) {
280   case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
281   case AMDGPU_HSA_DATA_GLOBAL_PROGRAM:
282   case AMDGPU_HSA_BSS_GLOBAL_PROGRAM:
283     return AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM;
284   case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
285   case AMDGPU_HSA_DATA_GLOBAL_AGENT:
286   case AMDGPU_HSA_BSS_GLOBAL_AGENT:
287     return AMDGPU_HSA_SEGMENT_GLOBAL_AGENT;
288   case AMDGPU_HSA_RODATA_READONLY_AGENT:
289   case AMDGPU_HSA_DATA_READONLY_AGENT:
290   case AMDGPU_HSA_BSS_READONLY_AGENT:
291     return AMDGPU_HSA_SEGMENT_READONLY_AGENT;
292   default:
293     assert(false); return AMDGPU_HSA_SEGMENT_LAST;
294   }
295 }
296 
IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec)297 bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec)
298 {
299   switch (sec) {
300   case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
301   case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
302   case AMDGPU_HSA_RODATA_READONLY_AGENT:
303   default:
304     return false;
305   }
306 }
307 
AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg)308 std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg)
309 {
310   switch (seg) {
311   case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: return "GLOBAL_PROGRAM";
312   case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: return "GLOBAL_AGENT";
313   case AMDGPU_HSA_SEGMENT_READONLY_AGENT: return "READONLY_AGENT";
314   case AMDGPU_HSA_SEGMENT_CODE_AGENT: return "CODE_AGENT";
315   default: return "UNKNOWN";
316   }
317 }
318 
AmdPTLoadToString(uint64_t type)319 std::string AmdPTLoadToString(uint64_t type)
320 {
321   if (PT_LOOS <= type && type < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST) {
322     return AmdHsaElfSegmentToString((amdgpu_hsa_elf_segment_t) (type - PT_LOOS));
323   } else {
324     return "UNKNOWN (" + std::to_string(type) + ")";
325   }
326 }
327 
PrintAmdKernelCode(std::ostream & out,const amd_kernel_code_t * akc)328 void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc)
329 {
330   uint32_t is_debug_enabled = AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED);
331 
332   out << attr1 << "amd_kernel_code_version_major" << eq
333       << akc->amd_kernel_code_version_major
334       << std::endl;
335   out << attr1 << "amd_kernel_code_version_minor" << eq
336       << akc->amd_kernel_code_version_minor
337       << std::endl;
338   out << attr1 << "amd_machine_kind" << eq
339       << AmdMachineKindToString(akc->amd_machine_kind)
340       << std::endl;
341   out << attr1 << "amd_machine_version_major" << eq
342       << (uint32_t)akc->amd_machine_version_major
343       << std::endl;
344   out << attr1 << "amd_machine_version_minor" << eq
345       << (uint32_t)akc->amd_machine_version_minor
346       << std::endl;
347   out << attr1 << "amd_machine_version_stepping" << eq
348       << (uint32_t)akc->amd_machine_version_stepping
349       << std::endl;
350   out << attr1 << "kernel_code_entry_byte_offset" << eq
351       << akc->kernel_code_entry_byte_offset
352       << std::endl;
353   if (akc->kernel_code_prefetch_byte_offset) {
354     out << attr1 << "kernel_code_prefetch_byte_offset" << eq
355         << akc->kernel_code_prefetch_byte_offset
356         << std::endl;
357   }
358   if (akc->kernel_code_prefetch_byte_size) {
359     out << attr1 << "kernel_code_prefetch_byte_size" << eq
360         << akc->kernel_code_prefetch_byte_size
361         << std::endl;
362   }
363   out << attr1 << "max_scratch_backing_memory_byte_size" << eq
364       << akc->max_scratch_backing_memory_byte_size
365       << std::endl;
366   PrintAmdComputePgmRsrcOne(out, akc->compute_pgm_rsrc1);
367   PrintAmdComputePgmRsrcTwo(out, akc->compute_pgm_rsrc2);
368   PrintAmdKernelCodeProperties(out, akc->kernel_code_properties);
369   if (akc->workitem_private_segment_byte_size) {
370     out << attr1 << "workitem_private_segment_byte_size" << eq
371         << akc->workitem_private_segment_byte_size
372         << std::endl;
373   }
374   if (akc->workgroup_group_segment_byte_size) {
375     out << attr1 << "workgroup_group_segment_byte_size" << eq
376         << akc->workgroup_group_segment_byte_size
377         << std::endl;
378   }
379   if (akc->gds_segment_byte_size) {
380     out << attr1 << "gds_segment_byte_size" << eq
381         << akc->gds_segment_byte_size
382         << std::endl;
383   }
384   if (akc->kernarg_segment_byte_size) {
385     out << attr1 << "kernarg_segment_byte_size" << eq
386         << akc->kernarg_segment_byte_size
387         << std::endl;
388   }
389   if (akc->workgroup_fbarrier_count) {
390     out << attr1 << "workgroup_fbarrier_count" << eq
391         << akc->workgroup_fbarrier_count
392         << std::endl;
393   }
394   out << attr1 << "wavefront_sgpr_count" << eq
395       << (uint32_t)akc->wavefront_sgpr_count
396       << std::endl;
397   out << attr1 << "workitem_vgpr_count" << eq
398       << (uint32_t)akc->workitem_vgpr_count
399       << std::endl;
400   if (akc->reserved_vgpr_count > 0) {
401     out << attr1 << "reserved_vgpr_first" << eq
402         << (uint32_t)akc->reserved_vgpr_first
403         << std::endl;
404     out << attr1 << "reserved_vgpr_count" << eq
405         << (uint32_t)akc->reserved_vgpr_count
406         << std::endl;
407   }
408   if (akc->reserved_sgpr_count > 0) {
409     out << attr1 << "reserved_sgpr_first" << eq
410         << (uint32_t)akc->reserved_sgpr_first
411         << std::endl;
412     out << attr1 << "reserved_sgpr_count" << eq
413         << (uint32_t)akc->reserved_sgpr_count
414         << std::endl;
415   }
416   if (is_debug_enabled && (akc->debug_wavefront_private_segment_offset_sgpr != uint16_t(-1))) {
417     out << attr1 << "debug_wavefront_private_segment_offset_sgpr" << eq
418         << (uint32_t)akc->debug_wavefront_private_segment_offset_sgpr
419         << std::endl;
420   }
421   if (is_debug_enabled && (akc->debug_private_segment_buffer_sgpr != uint16_t(-1))) {
422     out << attr1 << "debug_private_segment_buffer_sgpr" << eq
423         << (uint32_t)akc->debug_private_segment_buffer_sgpr
424         << ":"
425         << (uint32_t)(akc->debug_private_segment_buffer_sgpr + 3)
426         << std::endl;
427   }
428   if (akc->kernarg_segment_alignment) {
429     out << attr1 << "kernarg_segment_alignment" << eq
430         << AmdPowerTwoToString(akc->kernarg_segment_alignment)
431         << " (" << (uint32_t) akc->kernarg_segment_alignment << ")"
432         << std::endl;
433   }
434   if (akc->group_segment_alignment) {
435     out << attr1 << "group_segment_alignment" << eq
436         << AmdPowerTwoToString(akc->group_segment_alignment)
437         << " (" << (uint32_t) akc->group_segment_alignment << ")"
438         << std::endl;
439   }
440   if (akc->private_segment_alignment) {
441     out << attr1 << "private_segment_alignment" << eq
442         << AmdPowerTwoToString(akc->private_segment_alignment)
443         << " (" << (uint32_t) akc->private_segment_alignment << ")"
444         << std::endl;
445   }
446   out << attr1 << "wavefront_size" << eq
447       << AmdPowerTwoToString(akc->wavefront_size)
448       << " (" << (uint32_t) akc->wavefront_size << ")"
449       << std::endl;
450   PrintAmdControlDirectives(out, akc->control_directives);
451 }
452 
PrintAmdComputePgmRsrcOne(std::ostream & out,amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1)453 void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1)
454 {
455   out << "  COMPUTE_PGM_RSRC1 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc1 << "):" << std::endl;
456   out << std::dec;
457 
458   uint32_t granulated_workitem_vgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT);
459   out << attr2 << "granulated_workitem_vgpr_count" << eq
460       << granulated_workitem_vgpr_count
461       << std::endl;
462   uint32_t granulated_wavefront_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);
463   out << attr2 << "granulated_wavefront_sgpr_count" << eq
464       << granulated_wavefront_sgpr_count
465       << std::endl;
466   uint32_t priority = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY);
467   out << attr2 << "priority" << eq
468       << priority
469       << std::endl;
470   uint32_t float_round_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32);
471   out << attr2 << "float_round_mode_32" << eq
472       << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_32)
473       << std::endl;
474   uint32_t float_round_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64);
475   out << attr2 << "float_round_mode_16_64" << eq
476       << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_16_64)
477       << std::endl;
478   uint32_t float_denorm_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32);
479   out << attr2 << "float_denorm_mode_32" << eq
480       << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_32)
481       << std::endl;
482   uint32_t float_denorm_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64);
483   out << attr2 << "float_denorm_mode_16_64" << eq
484       << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_16_64)
485       << std::endl;
486   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIV)) {
487     out << attr2 << "priv" << eq << "TRUE"
488         << std::endl;
489   }
490   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP)) {
491     out << attr2 << "enable_dx10_clamp" << eq << "TRUE"
492         << std::endl;
493   }
494   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE)) {
495     out << attr2 << "debug_mode" << eq << "TRUE"
496         << std::endl;
497   }
498   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE)) {
499     out << attr2 << "enable_ieee_mode" << eq << "TRUE"
500         << std::endl;
501   }
502   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_BULKY)) {
503     out << attr2 << "bulky" << eq << "TRUE"
504         << std::endl;
505   }
506   if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER)) {
507     out << attr2 << "cdbg_user" << eq << "TRUE"
508         << std::endl;
509   }
510 }
511 
PrintAmdComputePgmRsrcTwo(std::ostream & out,amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2)512 void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2)
513 {
514   out << "  COMPUTE_PGM_RSRC2 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc2 << "):" << std::endl;
515   out << std::dec;
516 
517   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET)) {
518     out << attr2 << "enable_sgpr_private_segment_wave_byte_offset" << eq << "TRUE"
519         << std::endl;
520   }
521   uint32_t user_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT);
522   out << attr2 << "user_sgpr_count" << eq
523       << user_sgpr_count
524       << std::endl;
525   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER)) {
526     out << attr2 << "enable_trap_handler" << eq << "TRUE"
527         << std::endl;
528   }
529   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X)) {
530     out << attr2 << "enable_sgpr_workgroup_id_x" << eq << "TRUE"
531         << std::endl;
532   }
533   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y)) {
534     out << attr2 << "enable_sgpr_workgroup_id_y" << eq << "TRUE"
535         << std::endl;
536   }
537   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z)) {
538     out << attr2 << "enable_sgpr_workgroup_id_z" << eq << "TRUE"
539         << std::endl;
540   }
541   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO)) {
542     out << attr2 << "enable_sgpr_workgroup_info" << eq << "TRUE"
543         << std::endl;
544   }
545   uint32_t enable_vgpr_workitem_id = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID);
546   out << attr2 << "enable_vgpr_workitem_id" << eq
547       << AmdSystemVgprWorkitemIdToString((amd_system_vgpr_workitem_id_t)enable_vgpr_workitem_id)
548       << std::endl;
549   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH)) {
550     out << attr2 << "enable_exception_address_watch" << eq << "TRUE"
551         << std::endl;
552   }
553   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION)) {
554     out << attr2 << "enable_exception_memory_violation" << eq << "TRUE"
555         << std::endl;
556   }
557   uint32_t granulated_lds_size = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE);
558   out << attr2 << "granulated_lds_size" << eq
559       << granulated_lds_size
560       << std::endl;
561   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION)) {
562     out << attr2 << "enable_exception_ieee_754_fp_invalid_operation" << eq << "TRUE"
563         << std::endl;
564   }
565   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE)) {
566     out << attr2 << "enable_exception_fp_denormal_source" << eq << "TRUE"
567         << std::endl;
568   }
569   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO)) {
570     out << attr2 << "enable_exception_ieee_754_fp_division_by_zero" << eq << "TRUE"
571         << std::endl;
572   }
573   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW)) {
574     out << attr2 << "enable_exception_ieee_754_fp_overflow" << eq << "TRUE"
575         << std::endl;
576   }
577   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW)) {
578     out << attr2 << "enable_exception_ieee_754_fp_underflow" << eq << "TRUE"
579         << std::endl;
580   }
581   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT)) {
582     out << attr2 << "enable_exception_ieee_754_fp_inexact" << eq << "TRUE"
583         << std::endl;
584   }
585   if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO)) {
586     out << attr2 << "enable_exception_int_division_by_zero" << eq << "TRUE"
587         << std::endl;
588   }
589 }
590 
PrintAmdKernelCodeProperties(std::ostream & out,amd_kernel_code_properties32_t kernel_code_properties)591 void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties)
592 {
593   out << "  KERNEL_CODE_PROPERTIES (0x" << std::hex << std::setw(8) << std::setfill('0') << kernel_code_properties << "):" << std::endl;
594   out << std::dec;
595 
596   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER)) {
597     out << attr2 << "enable_sgpr_private_segment_buffer" << eq << "TRUE"
598         << std::endl;
599   }
600   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR)) {
601     out << attr2 << "enable_sgpr_dispatch_ptr" << eq << "TRUE"
602         << std::endl;
603   }
604   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
605     out << attr2 << "enable_sgpr_queue_ptr" << eq << "TRUE"
606         << std::endl;
607   }
608   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR)) {
609     out << attr2 << "enable_sgpr_kernarg_segment_ptr" << eq << "TRUE"
610         << std::endl;
611   }
612   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID)) {
613     out << attr2 << "enable_sgpr_dispatch_id" << eq << "TRUE"
614         << std::endl;
615   }
616   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT)) {
617     out << attr2 << "enable_sgpr_flat_scratch_init" << eq << "TRUE"
618         << std::endl;
619   }
620   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)) {
621     out << attr2 << "enable_sgpr_private_segment_size" << eq << "TRUE"
622         << std::endl;
623   }
624   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X)) {
625     out << attr2 << "enable_sgpr_grid_workgroup_count_x" << eq << "TRUE"
626         << std::endl;
627   }
628   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y)) {
629     out << attr2 << "enable_sgpr_grid_workgroup_count_y" << eq << "TRUE"
630         << std::endl;
631   }
632   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z)) {
633     out << attr2 << "enable_sgpr_grid_workgroup_count_z" << eq << "TRUE"
634         << std::endl;
635   }
636   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS)) {
637     out << attr2 << "enable_ordered_append_gds" << eq << "TRUE"
638         << std::endl;
639   }
640   uint32_t private_element_size = AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE);
641   out << attr2 << "private_element_size" << eq
642       << AmdElementByteSizeToString((amd_element_byte_size_t)private_element_size)
643       << std::endl;
644   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_PTR64)) {
645     out << attr2 << "is_ptr64" << eq << "TRUE"
646         << std::endl;
647   }
648   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK)) {
649     out << attr2 << "is_dynamic_callstack" << eq << "TRUE"
650         << std::endl;
651   }
652   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED)) {
653     out << attr2 << "is_debug_enabled" << eq << "TRUE"
654         << std::endl;
655   }
656   if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED)) {
657     out << attr2 << "is_xnack_enabled" << eq << "TRUE"
658         << std::endl;
659   }
660 }
661 
PrintAmdControlDirectives(std::ostream & out,const amd_control_directives_t & control_directives)662 void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives)
663 {
664   if (!control_directives.enabled_control_directives) {
665     return;
666   }
667 
668   out << "  CONTROL_DIRECTIVES:" << std::endl;
669 
670   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS) {
671     out << attr2 << "enable_break_exceptions" << eq
672         << AmdExceptionKindToString(control_directives.enable_break_exceptions).c_str()
673         << std::endl;
674   }
675   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS) {
676     out << attr2 << "enable_detect_exceptions" << eq
677         << AmdExceptionKindToString(control_directives.enable_detect_exceptions).c_str()
678         << std::endl;
679   }
680   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE) {
681     out << attr2 << "max_dynamic_group_size" << eq
682         << control_directives.max_dynamic_group_size
683         << std::endl;
684   }
685   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE) {
686     out << attr2 << "max_flat_grid_size" << eq
687         << control_directives.max_flat_grid_size
688         << std::endl;
689   }
690   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE) {
691     out << attr2 << "max_flat_workgroup_size" << eq
692         << control_directives.max_flat_workgroup_size
693         << std::endl;
694   }
695   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM) {
696     out << attr2 << "required_dim" << eq
697         << (uint32_t)control_directives.required_dim
698         << std::endl;
699   }
700   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE) {
701     out << attr2 << "required_grid_size" << eq
702         << "("
703         << control_directives.required_grid_size[0]
704         << ", "
705         << control_directives.required_grid_size[1]
706         << ", "
707         << control_directives.required_grid_size[2]
708         << ")"
709         << std::endl;
710   }
711   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE) {
712     out << attr2 << "required_workgroup_size" << eq
713         << "("
714         << control_directives.required_workgroup_size[0]
715         << ", "
716         << control_directives.required_workgroup_size[1]
717         << ", "
718         << control_directives.required_workgroup_size[2]
719         << ")"
720         << std::endl;
721   }
722   if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS) {
723     out << attr2 << "require_no_partial_workgroups" << eq << "TRUE"
724         << std::endl;
725   }
726 }
727 
728 namespace code_options {
729 
space(std::ostream & out)730   std::ostream& space(std::ostream& out)
731   {
732     if (out.tellp()) { out << " "; }
733     return out;
734   }
735 
operator <<(std::ostream & out,const control_directive & d)736   std::ostream& operator<<(std::ostream& out, const control_directive& d)
737   {
738     out << space <<
739       "-hsa_control_directive:" << d.name << "=";
740     return out;
741   }
742 
BrigExceptionString(BrigExceptions32_t e)743   const char *BrigExceptionString(BrigExceptions32_t e)
744   {
745     switch (e) {
746     case BRIG_EXCEPTIONS_INVALID_OPERATION: return "INVALID_OPERATION";
747     case BRIG_EXCEPTIONS_DIVIDE_BY_ZERO: return "DIVIDE_BY_ZERO";
748     case BRIG_EXCEPTIONS_OVERFLOW: return "OVERFLOW";
749     case BRIG_EXCEPTIONS_INEXACT: return "INEXACT";
750     default:
751       assert(false); return "<unknown_BRIG_exception>";
752     }
753   }
754 
operator <<(std::ostream & out,const exceptions_mask & e)755   std::ostream& operator<<(std::ostream& out, const exceptions_mask& e)
756   {
757     bool first = true;
758     for (BrigExceptions32_t be = BRIG_EXCEPTIONS_INVALID_OPERATION; be < BRIG_EXCEPTIONS_FIRST_USER_DEFINED; ++be) {
759       if (e.mask & be) {
760         if (first) { first = false; } else { out << ","; }
761         out << BrigExceptionString(be);
762       }
763     }
764     return out;
765   }
766 
operator <<(std::ostream & out,const control_directives & cd)767   std::ostream& operator<<(std::ostream& out, const control_directives& cd)
768   {
769     const hsa_ext_control_directives_t& d = cd.d;
770     uint64_t mask = d.control_directives_mask;
771     if (!mask) { return out; }
772 
773     if (mask & BRIG_CONTROL_ENABLEBREAKEXCEPTIONS) {
774       out <<
775         control_directive("ENABLEBREAKEXCEPTIONS") <<
776         exceptions_mask(d.break_exceptions_mask);
777     }
778     if (mask & BRIG_CONTROL_ENABLEDETECTEXCEPTIONS) {
779       out <<
780         control_directive("ENABLEDETECTEXCEPTIONS") <<
781         exceptions_mask(d.detect_exceptions_mask);
782     }
783     if (mask & BRIG_CONTROL_MAXDYNAMICGROUPSIZE) {
784       out <<
785         control_directive("MAXDYNAMICGROUPSIZE") <<
786         d.max_dynamic_group_size;
787     }
788     if (mask & BRIG_CONTROL_MAXFLATGRIDSIZE) {
789       out <<
790         control_directive("MAXFLATGRIDSIZE") <<
791         d.max_flat_grid_size;
792     }
793     if (mask & BRIG_CONTROL_MAXFLATWORKGROUPSIZE) {
794       out <<
795         control_directive("MAXFLATWORKGROUPSIZE") <<
796         d.max_flat_workgroup_size;
797     }
798     if (mask & BRIG_CONTROL_REQUIREDDIM) {
799       out <<
800         control_directive("REQUIREDDIM") <<
801         d.required_dim;
802     }
803     if (mask & BRIG_CONTROL_REQUIREDGRIDSIZE) {
804       out <<
805         control_directive("REQUIREDGRIDSIZE") <<
806         d.required_grid_size[0] << "," <<
807         d.required_grid_size[1] << "," <<
808         d.required_grid_size[2];
809     }
810     if (mask & BRIG_CONTROL_REQUIREDWORKGROUPSIZE) {
811       out <<
812         control_directive("REQUIREDWORKGROUPSIZE") <<
813         d.required_workgroup_size.x << "," <<
814         d.required_workgroup_size.y << "," <<
815         d.required_workgroup_size.z;
816     }
817     return out;
818   }
819 }
820 
hsaerr2str(hsa_status_t status)821 const char* hsaerr2str(hsa_status_t status) {
822   switch ((unsigned) status) {
823     case HSA_STATUS_SUCCESS:
824       return
825           "HSA_STATUS_SUCCESS: The function has been executed successfully.";
826     case HSA_STATUS_INFO_BREAK:
827       return
828           "HSA_STATUS_INFO_BREAK: A traversal over a list of "
829           "elements has been interrupted by the application before "
830           "completing.";
831     case HSA_STATUS_ERROR:
832       return "HSA_STATUS_ERROR: A generic error has occurred.";
833     case HSA_STATUS_ERROR_INVALID_ARGUMENT:
834       return
835           "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual "
836           "arguments does not meet a precondition stated in the "
837           "documentation of the corresponding formal argument.";
838     case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION:
839       return
840           "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested "
841           "queue creation is not valid.";
842     case HSA_STATUS_ERROR_INVALID_ALLOCATION:
843       return
844           "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested "
845           "allocation is not valid.";
846     case HSA_STATUS_ERROR_INVALID_AGENT:
847       return
848           "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid.";
849     case HSA_STATUS_ERROR_INVALID_REGION:
850       return
851           "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid.";
852     case HSA_STATUS_ERROR_INVALID_SIGNAL:
853       return
854           "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid.";
855     case HSA_STATUS_ERROR_INVALID_QUEUE:
856       return
857           "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid.";
858     case HSA_STATUS_ERROR_OUT_OF_RESOURCES:
859       return
860           "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to "
861           "allocate the necessary resources. This error may also "
862           "occur when the core runtime library needs to spawn "
863           "threads or create internal OS-specific events.";
864     case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT:
865       return
866           "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet "
867           "is malformed.";
868     case HSA_STATUS_ERROR_RESOURCE_FREE:
869       return
870           "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been "
871           "detected while releasing a resource.";
872     case HSA_STATUS_ERROR_NOT_INITIALIZED:
873       return
874           "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than "
875           "hsa_init has been invoked while the reference count of "
876           "the HSA runtime is zero.";
877     case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW:
878       return
879           "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum "
880           "reference count for the object has been reached.";
881     case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS:
882       return
883           "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to "
884           "a functions are not compatible.";
885     case HSA_STATUS_ERROR_INVALID_INDEX:
886       return "The index is invalid.";
887     case HSA_STATUS_ERROR_INVALID_ISA:
888       return "The instruction set architecture is invalid.";
889     case HSA_STATUS_ERROR_INVALID_CODE_OBJECT:
890       return "The code object is invalid.";
891     case HSA_STATUS_ERROR_INVALID_EXECUTABLE:
892       return "The executable is invalid.";
893     case HSA_STATUS_ERROR_FROZEN_EXECUTABLE:
894       return "The executable is frozen.";
895     case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
896       return "There is no symbol with the given name.";
897     case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED:
898       return "The variable is already defined.";
899     case HSA_STATUS_ERROR_VARIABLE_UNDEFINED:
900       return "The variable is undefined.";
901     case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM:
902       return
903           "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program";
904     case HSA_EXT_STATUS_ERROR_INVALID_MODULE:
905       return "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module";
906     case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE:
907       return
908           "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module";
909     case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED:
910       return
911           "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already "
912           "included";
913     case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH:
914       return
915           "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch";
916     case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED:
917       return
918           "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed";
919     case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH:
920       return
921           "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch";
922     default:
923       return
924           "Unknown HSA status";
925   }
926 }
927 
ReadFileIntoBuffer(const std::string & filename,std::vector<char> & buffer)928 bool ReadFileIntoBuffer(const std::string& filename, std::vector<char>& buffer)
929 {
930   std::ifstream file(filename, std::ios::binary);
931   if (!file) { return false; }
932   file.seekg(0, std::ios::end);
933   std::streamsize size = file.tellg();
934   file.seekg(0, std::ios::beg);
935 
936   buffer.resize((size_t) size);
937   if (!file.read(buffer.data(), size)) { return false; }
938   return true;
939 }
940 
941 #ifndef _WIN32
942 #define _tempnam tempnam
943 #define _close close
944 #define _getpid getpid
945 #define _open open
946 #endif // _WIN32
947 
OpenTempFile(const char * prefix)948 int OpenTempFile(const char* prefix)
949 {
950   unsigned c = 0;
951   std::string tname = prefix;
952   tname += "_";
953   tname += std::to_string(_getpid());
954   tname += "_";
955   while (c++ < 20) { // Loop because several threads can generate same filename.
956 #ifdef _WIN32
957     char dir[MAX_PATH+1];
958     if (!GetTempPath(sizeof(dir), dir)) { return -1; }
959 #else // _WIN32
960     char *dir = NULL;
961 #endif // _WIN32
962     char *name = _tempnam(dir, tname.c_str());
963     if (!name) { return -1; }
964 #ifdef _WIN32
965     HANDLE h = CreateFile(
966       name,
967       GENERIC_READ | GENERIC_WRITE,
968       0, // No sharing
969       NULL,
970       CREATE_NEW,
971       FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_DELETE_ON_CLOSE,
972       NULL);
973     free(name);
974     if (h == INVALID_HANDLE_VALUE) { continue; }
975     return _open_osfhandle((intptr_t)h, 0);
976 #else // _WIN32
977     int d = _open(name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
978     if (d < 0) { free(name); continue; }
979     if (unlink(name) < 0) { free(name); _close(d); return -1; }
980     free(name);
981     return d;
982 #endif // _WIN32
983   }
984   return -1;
985 }
986 
CloseTempFile(int fd)987 void CloseTempFile(int fd)
988 {
989   _close(fd);
990 }
991 
CommentTopCallBack(void * ctx,int type)992 const char * CommentTopCallBack(void *ctx, int type) {
993   static const char* amd_kernel_code_t_begin = "amd_kernel_code_t begin";
994   static const char* amd_kernel_code_t_end = "amd_kernel_code_t end";
995   static const char* isa_begin = "isa begin";
996   switch(type) {
997   case COMMENT_AMD_KERNEL_CODE_T_BEGIN:
998     return amd_kernel_code_t_begin;
999   case COMMENT_AMD_KERNEL_CODE_T_END:
1000     return amd_kernel_code_t_end;
1001   case COMMENT_KERNEL_ISA_BEGIN:
1002     return isa_begin;
1003   default:
1004     assert(false);
1005     return "";
1006   }
1007 }
CommentRightCallBack(void * ctx,int type)1008 const char * CommentRightCallBack(void *ctx, int type) {
1009   return nullptr;
1010 }
1011 
ParseInstructionOffset(const std::string & instruction)1012 uint32_t ParseInstructionOffset(const std::string& instruction) {
1013   // instruction format: opcode op1, op2 ... // offset: binopcode
1014   std::string::size_type n = instruction.find("//");
1015   assert(n != std::string::npos);
1016   std::string comment = instruction.substr(n);
1017   n = comment.find(':');
1018   assert(n != std::string::npos);
1019   comment.erase(n);
1020   assert(comment.size() > 3);
1021   comment.erase(0, 3);
1022   return strtoul(comment.c_str(), nullptr, 16);
1023 }
1024 
IsNotSpace(char c)1025 bool IsNotSpace(char c) {
1026   return !isspace(static_cast<int>(c));
1027 }
1028 
ltrim(std::string & str)1029 void ltrim(std::string &str) {
1030   str.erase(str.begin(), std::find_if(str.begin(), str.end(), IsNotSpace));
1031 }
1032 
DumpFileName(const std::string & dir,const char * prefix,const char * ext,unsigned n,unsigned i)1033 std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i)
1034 {
1035   std::ostringstream ss;
1036   if (!dir.empty()) {
1037     ss << dir << "/";
1038   }
1039   ss <<
1040     prefix <<
1041     std::setfill('0') << std::setw(3) << n;
1042   if (i) { ss << "_" << i; }
1043   if (ext) { ss << "." << ext; }
1044   return ss.str();
1045 }
1046 
1047 
1048 }
1049 }
1050