//////////////////////////////////////////////////////////////////////////////// // // The University of Illinois/NCSA // Open Source License (NCSA) // // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // // AMD Research and AMD HSA Software Development // // Advanced Micro Devices, Inc. // // www.amd.com // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimers in // the documentation and/or other materials provided with the distribution. // - Neither the names of Advanced Micro Devices, Inc, // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS WITH THE SOFTWARE. // //////////////////////////////////////////////////////////////////////////////// #include "amd_hsa_code_util.hpp" #include "libelf.h" #include #include #include #include #include #include #ifdef _WIN32 #include #include #include #else // _WIN32 #include #include #include #include #include #endif // _WIN32 #include "Brig.h" namespace { auto eq = " = "; std::ostream& attr1(std::ostream& out) { out << " " << std::left << std::setw(60) << std::setfill(' '); return out; } std::ostream& attr2(std::ostream& out) { out << " " << std::left << std::setw(58) << std::setfill(' '); return out; } } // namespace anonymous namespace amd { namespace hsa { namespace common { bool IsAccessibleMemoryAddress(uint64_t address) { if (0 == address) { return false; } #if defined(_WIN32) || defined(_WIN64) MEMORY_BASIC_INFORMATION memory_info; if (!VirtualQuery(reinterpret_cast(address), &memory_info, sizeof(memory_info))) { return false; } int32_t is_accessible = ((memory_info.Protect & PAGE_READONLY) || (memory_info.Protect & PAGE_READWRITE) || (memory_info.Protect & PAGE_WRITECOPY) || (memory_info.Protect & PAGE_EXECUTE_READ) || (memory_info.Protect & PAGE_EXECUTE_READWRITE) || (memory_info.Protect & PAGE_EXECUTE_WRITECOPY)); if (memory_info.Protect & PAGE_GUARD) { is_accessible = 0; } if (memory_info.Protect & PAGE_NOACCESS) { is_accessible = 0; } return is_accessible > 0; #else int32_t random_fd = 0; ssize_t bytes_written = 0; if (-1 == (random_fd = open("/dev/random", O_WRONLY))) { return false; } bytes_written = write(random_fd, (void*)address, 1); if (-1 == close(random_fd)) { return false; } return bytes_written == 1; #endif // _WIN32 || _WIN64 } } std::string HsaSymbolKindToString(hsa_symbol_kind_t kind) { switch (kind) { case HSA_SYMBOL_KIND_VARIABLE: return "VARIABLE"; case HSA_SYMBOL_KIND_INDIRECT_FUNCTION: return "INDIRECT_FUNCTION"; case HSA_SYMBOL_KIND_KERNEL: return "KERNEL"; default: return "UNKNOWN"; } } std::string HsaSymbolLinkageToString(hsa_symbol_linkage_t linkage) { switch (linkage) { case HSA_SYMBOL_LINKAGE_MODULE: return "MODULE"; case HSA_SYMBOL_LINKAGE_PROGRAM: return "PROGRAM"; default: return "UNKNOWN"; } } std::string HsaVariableAllocationToString(hsa_variable_allocation_t allocation) { switch (allocation) { case HSA_VARIABLE_ALLOCATION_AGENT: return "AGENT"; case HSA_VARIABLE_ALLOCATION_PROGRAM: return "PROGRAM"; default: return "UNKNOWN"; } } std::string HsaVariableSegmentToString(hsa_variable_segment_t segment) { switch (segment) { case HSA_VARIABLE_SEGMENT_GLOBAL: return "GLOBAL"; case HSA_VARIABLE_SEGMENT_READONLY: return "READONLY"; default: return "UNKNOWN"; } } std::string HsaProfileToString(hsa_profile_t profile) { switch (profile) { case HSA_PROFILE_BASE: return "BASE"; case HSA_PROFILE_FULL: return "FULL"; default: return "UNKNOWN"; } } std::string HsaMachineModelToString(hsa_machine_model_t model) { switch (model) { case HSA_MACHINE_MODEL_SMALL: return "SMALL"; case HSA_MACHINE_MODEL_LARGE: return "LARGE"; default: return "UNKNOWN"; } } std::string HsaFloatRoundingModeToString(hsa_default_float_rounding_mode_t mode) { switch (mode) { case HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT: return "DEFAULT"; case HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO: return "ZERO"; case HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR: return "NEAR"; default: return "UNKNOWN"; } } std::string AmdMachineKindToString(amd_machine_kind16_t machine) { switch (machine) { case AMD_MACHINE_KIND_UNDEFINED: return "UNDEFINED"; case AMD_MACHINE_KIND_AMDGPU: return "AMDGPU"; default: return "UNKNOWN"; } } std::string AmdFloatRoundModeToString(amd_float_round_mode_t round_mode) { switch (round_mode) { case AMD_FLOAT_ROUND_MODE_NEAREST_EVEN: return "NEAREST_EVEN"; case AMD_FLOAT_ROUND_MODE_PLUS_INFINITY: return "PLUS_INFINITY"; case AMD_FLOAT_ROUND_MODE_MINUS_INFINITY: return "MINUS_INFINITY"; case AMD_FLOAT_ROUND_MODE_ZERO: return "ZERO"; default: return "UNKNOWN"; } } std::string AmdFloatDenormModeToString(amd_float_denorm_mode_t denorm_mode) { switch (denorm_mode) { case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE_OUTPUT: return "FLUSH_SOURCE_OUTPUT"; case AMD_FLOAT_DENORM_MODE_FLUSH_OUTPUT: return "FLUSH_OUTPUT"; case AMD_FLOAT_DENORM_MODE_FLUSH_SOURCE: return "FLUSH_SOURCE"; case AMD_FLOAT_DENORM_MODE_NO_FLUSH: return "FLUSH_NONE"; default: return "UNKNOWN"; } } std::string AmdSystemVgprWorkitemIdToString(amd_system_vgpr_workitem_id_t system_vgpr_workitem_id) { switch (system_vgpr_workitem_id) { case AMD_SYSTEM_VGPR_WORKITEM_ID_X: return "X"; case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y: return "X, Y"; case AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z: return "X, Y, Z"; default: return "UNKNOWN"; } } std::string AmdElementByteSizeToString(amd_element_byte_size_t element_byte_size) { switch (element_byte_size) { case AMD_ELEMENT_BYTE_SIZE_2: return "WORD (2 bytes)"; case AMD_ELEMENT_BYTE_SIZE_4: return "DWORD (4 bytes)"; case AMD_ELEMENT_BYTE_SIZE_8: return "QWORD (8 bytes)"; case AMD_ELEMENT_BYTE_SIZE_16: return "16 bytes"; default: return "UNKNOWN"; } } std::string AmdExceptionKindToString(amd_exception_kind16_t exceptions) { std::string e; if (exceptions & AMD_EXCEPTION_KIND_INVALID_OPERATION) { e += ", INVALID_OPERATON"; exceptions &= ~AMD_EXCEPTION_KIND_INVALID_OPERATION; } if (exceptions & AMD_EXCEPTION_KIND_DIVISION_BY_ZERO) { e += ", DIVISION_BY_ZERO"; exceptions &= ~AMD_EXCEPTION_KIND_DIVISION_BY_ZERO; } if (exceptions & AMD_EXCEPTION_KIND_OVERFLOW) { e += ", OVERFLOW"; exceptions &= ~AMD_EXCEPTION_KIND_OVERFLOW; } if (exceptions & AMD_EXCEPTION_KIND_UNDERFLOW) { e += ", UNDERFLOW"; exceptions &= ~AMD_EXCEPTION_KIND_UNDERFLOW; } if (exceptions & AMD_EXCEPTION_KIND_INEXACT) { e += ", INEXACT"; exceptions &= ~AMD_EXCEPTION_KIND_INEXACT; } if (exceptions) { e += ", UNKNOWN"; } if (!e.empty()) { e = "[" + e.erase(0, 2) + "]"; } return e; } std::string AmdPowerTwoToString(amd_powertwo8_t p) { return std::to_string(1 << (unsigned) p); } amdgpu_hsa_elf_segment_t AmdHsaElfSectionSegment(amdgpu_hsa_elf_section_t sec) { switch (sec) { case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM: case AMDGPU_HSA_DATA_GLOBAL_PROGRAM: case AMDGPU_HSA_BSS_GLOBAL_PROGRAM: return AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM; case AMDGPU_HSA_RODATA_GLOBAL_AGENT: case AMDGPU_HSA_DATA_GLOBAL_AGENT: case AMDGPU_HSA_BSS_GLOBAL_AGENT: return AMDGPU_HSA_SEGMENT_GLOBAL_AGENT; case AMDGPU_HSA_RODATA_READONLY_AGENT: case AMDGPU_HSA_DATA_READONLY_AGENT: case AMDGPU_HSA_BSS_READONLY_AGENT: return AMDGPU_HSA_SEGMENT_READONLY_AGENT; default: assert(false); return AMDGPU_HSA_SEGMENT_LAST; } } bool IsAmdHsaElfSectionROData(amdgpu_hsa_elf_section_t sec) { switch (sec) { case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM: case AMDGPU_HSA_RODATA_GLOBAL_AGENT: case AMDGPU_HSA_RODATA_READONLY_AGENT: default: return false; } } std::string AmdHsaElfSegmentToString(amdgpu_hsa_elf_segment_t seg) { switch (seg) { case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: return "GLOBAL_PROGRAM"; case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: return "GLOBAL_AGENT"; case AMDGPU_HSA_SEGMENT_READONLY_AGENT: return "READONLY_AGENT"; case AMDGPU_HSA_SEGMENT_CODE_AGENT: return "CODE_AGENT"; default: return "UNKNOWN"; } } std::string AmdPTLoadToString(uint64_t type) { if (PT_LOOS <= type && type < PT_LOOS + AMDGPU_HSA_SEGMENT_LAST) { return AmdHsaElfSegmentToString((amdgpu_hsa_elf_segment_t) (type - PT_LOOS)); } else { return "UNKNOWN (" + std::to_string(type) + ")"; } } void PrintAmdKernelCode(std::ostream& out, const amd_kernel_code_t *akc) { uint32_t is_debug_enabled = AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED); out << attr1 << "amd_kernel_code_version_major" << eq << akc->amd_kernel_code_version_major << std::endl; out << attr1 << "amd_kernel_code_version_minor" << eq << akc->amd_kernel_code_version_minor << std::endl; out << attr1 << "amd_machine_kind" << eq << AmdMachineKindToString(akc->amd_machine_kind) << std::endl; out << attr1 << "amd_machine_version_major" << eq << (uint32_t)akc->amd_machine_version_major << std::endl; out << attr1 << "amd_machine_version_minor" << eq << (uint32_t)akc->amd_machine_version_minor << std::endl; out << attr1 << "amd_machine_version_stepping" << eq << (uint32_t)akc->amd_machine_version_stepping << std::endl; out << attr1 << "kernel_code_entry_byte_offset" << eq << akc->kernel_code_entry_byte_offset << std::endl; if (akc->kernel_code_prefetch_byte_offset) { out << attr1 << "kernel_code_prefetch_byte_offset" << eq << akc->kernel_code_prefetch_byte_offset << std::endl; } if (akc->kernel_code_prefetch_byte_size) { out << attr1 << "kernel_code_prefetch_byte_size" << eq << akc->kernel_code_prefetch_byte_size << std::endl; } out << attr1 << "max_scratch_backing_memory_byte_size" << eq << akc->max_scratch_backing_memory_byte_size << std::endl; PrintAmdComputePgmRsrcOne(out, akc->compute_pgm_rsrc1); PrintAmdComputePgmRsrcTwo(out, akc->compute_pgm_rsrc2); PrintAmdKernelCodeProperties(out, akc->kernel_code_properties); if (akc->workitem_private_segment_byte_size) { out << attr1 << "workitem_private_segment_byte_size" << eq << akc->workitem_private_segment_byte_size << std::endl; } if (akc->workgroup_group_segment_byte_size) { out << attr1 << "workgroup_group_segment_byte_size" << eq << akc->workgroup_group_segment_byte_size << std::endl; } if (akc->gds_segment_byte_size) { out << attr1 << "gds_segment_byte_size" << eq << akc->gds_segment_byte_size << std::endl; } if (akc->kernarg_segment_byte_size) { out << attr1 << "kernarg_segment_byte_size" << eq << akc->kernarg_segment_byte_size << std::endl; } if (akc->workgroup_fbarrier_count) { out << attr1 << "workgroup_fbarrier_count" << eq << akc->workgroup_fbarrier_count << std::endl; } out << attr1 << "wavefront_sgpr_count" << eq << (uint32_t)akc->wavefront_sgpr_count << std::endl; out << attr1 << "workitem_vgpr_count" << eq << (uint32_t)akc->workitem_vgpr_count << std::endl; if (akc->reserved_vgpr_count > 0) { out << attr1 << "reserved_vgpr_first" << eq << (uint32_t)akc->reserved_vgpr_first << std::endl; out << attr1 << "reserved_vgpr_count" << eq << (uint32_t)akc->reserved_vgpr_count << std::endl; } if (akc->reserved_sgpr_count > 0) { out << attr1 << "reserved_sgpr_first" << eq << (uint32_t)akc->reserved_sgpr_first << std::endl; out << attr1 << "reserved_sgpr_count" << eq << (uint32_t)akc->reserved_sgpr_count << std::endl; } if (is_debug_enabled && (akc->debug_wavefront_private_segment_offset_sgpr != uint16_t(-1))) { out << attr1 << "debug_wavefront_private_segment_offset_sgpr" << eq << (uint32_t)akc->debug_wavefront_private_segment_offset_sgpr << std::endl; } if (is_debug_enabled && (akc->debug_private_segment_buffer_sgpr != uint16_t(-1))) { out << attr1 << "debug_private_segment_buffer_sgpr" << eq << (uint32_t)akc->debug_private_segment_buffer_sgpr << ":" << (uint32_t)(akc->debug_private_segment_buffer_sgpr + 3) << std::endl; } if (akc->kernarg_segment_alignment) { out << attr1 << "kernarg_segment_alignment" << eq << AmdPowerTwoToString(akc->kernarg_segment_alignment) << " (" << (uint32_t) akc->kernarg_segment_alignment << ")" << std::endl; } if (akc->group_segment_alignment) { out << attr1 << "group_segment_alignment" << eq << AmdPowerTwoToString(akc->group_segment_alignment) << " (" << (uint32_t) akc->group_segment_alignment << ")" << std::endl; } if (akc->private_segment_alignment) { out << attr1 << "private_segment_alignment" << eq << AmdPowerTwoToString(akc->private_segment_alignment) << " (" << (uint32_t) akc->private_segment_alignment << ")" << std::endl; } out << attr1 << "wavefront_size" << eq << AmdPowerTwoToString(akc->wavefront_size) << " (" << (uint32_t) akc->wavefront_size << ")" << std::endl; PrintAmdControlDirectives(out, akc->control_directives); } void PrintAmdComputePgmRsrcOne(std::ostream& out, amd_compute_pgm_rsrc_one32_t compute_pgm_rsrc1) { out << " COMPUTE_PGM_RSRC1 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc1 << "):" << std::endl; out << std::dec; uint32_t granulated_workitem_vgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT); out << attr2 << "granulated_workitem_vgpr_count" << eq << granulated_workitem_vgpr_count << std::endl; uint32_t granulated_wavefront_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT); out << attr2 << "granulated_wavefront_sgpr_count" << eq << granulated_wavefront_sgpr_count << std::endl; uint32_t priority = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIORITY); out << attr2 << "priority" << eq << priority << std::endl; uint32_t float_round_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_32); out << attr2 << "float_round_mode_32" << eq << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_32) << std::endl; uint32_t float_round_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_ROUND_MODE_16_64); out << attr2 << "float_round_mode_16_64" << eq << AmdFloatRoundModeToString((amd_float_round_mode_t)float_round_mode_16_64) << std::endl; uint32_t float_denorm_mode_32 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32); out << attr2 << "float_denorm_mode_32" << eq << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_32) << std::endl; uint32_t float_denorm_mode_16_64 = AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64); out << attr2 << "float_denorm_mode_16_64" << eq << AmdFloatDenormModeToString((amd_float_denorm_mode_t)float_denorm_mode_16_64) << std::endl; if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_PRIV)) { out << attr2 << "priv" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP)) { out << attr2 << "enable_dx10_clamp" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_DEBUG_MODE)) { out << attr2 << "debug_mode" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE)) { out << attr2 << "enable_ieee_mode" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_BULKY)) { out << attr2 << "bulky" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_CDBG_USER)) { out << attr2 << "cdbg_user" << eq << "TRUE" << std::endl; } } void PrintAmdComputePgmRsrcTwo(std::ostream& out, amd_compute_pgm_rsrc_two32_t compute_pgm_rsrc2) { out << " COMPUTE_PGM_RSRC2 (0x" << std::hex << std::setw(8) << std::setfill('0') << compute_pgm_rsrc2 << "):" << std::endl; out << std::dec; if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_BYTE_OFFSET)) { out << attr2 << "enable_sgpr_private_segment_wave_byte_offset" << eq << "TRUE" << std::endl; } uint32_t user_sgpr_count = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT); out << attr2 << "user_sgpr_count" << eq << user_sgpr_count << std::endl; if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_TRAP_HANDLER)) { out << attr2 << "enable_trap_handler" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X)) { out << attr2 << "enable_sgpr_workgroup_id_x" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Y)) { out << attr2 << "enable_sgpr_workgroup_id_y" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_Z)) { out << attr2 << "enable_sgpr_workgroup_id_z" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_INFO)) { out << attr2 << "enable_sgpr_workgroup_info" << eq << "TRUE" << std::endl; } uint32_t enable_vgpr_workitem_id = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_VGPR_WORKITEM_ID); out << attr2 << "enable_vgpr_workitem_id" << eq << AmdSystemVgprWorkitemIdToString((amd_system_vgpr_workitem_id_t)enable_vgpr_workitem_id) << std::endl; if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_ADDRESS_WATCH)) { out << attr2 << "enable_exception_address_watch" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_MEMORY_VIOLATION)) { out << attr2 << "enable_exception_memory_violation" << eq << "TRUE" << std::endl; } uint32_t granulated_lds_size = AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_GRANULATED_LDS_SIZE); out << attr2 << "granulated_lds_size" << eq << granulated_lds_size << std::endl; if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION)) { out << attr2 << "enable_exception_ieee_754_fp_invalid_operation" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE)) { out << attr2 << "enable_exception_fp_denormal_source" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO)) { out << attr2 << "enable_exception_ieee_754_fp_division_by_zero" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW)) { out << attr2 << "enable_exception_ieee_754_fp_overflow" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW)) { out << attr2 << "enable_exception_ieee_754_fp_underflow" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT)) { out << attr2 << "enable_exception_ieee_754_fp_inexact" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(compute_pgm_rsrc2, AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_EXCEPTION_INT_DIVISION_BY_ZERO)) { out << attr2 << "enable_exception_int_division_by_zero" << eq << "TRUE" << std::endl; } } void PrintAmdKernelCodeProperties(std::ostream& out, amd_kernel_code_properties32_t kernel_code_properties) { out << " KERNEL_CODE_PROPERTIES (0x" << std::hex << std::setw(8) << std::setfill('0') << kernel_code_properties << "):" << std::endl; out << std::dec; if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER)) { out << attr2 << "enable_sgpr_private_segment_buffer" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR)) { out << attr2 << "enable_sgpr_dispatch_ptr" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) { out << attr2 << "enable_sgpr_queue_ptr" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR)) { out << attr2 << "enable_sgpr_kernarg_segment_ptr" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_ID)) { out << attr2 << "enable_sgpr_dispatch_id" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_FLAT_SCRATCH_INIT)) { out << attr2 << "enable_sgpr_flat_scratch_init" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)) { out << attr2 << "enable_sgpr_private_segment_size" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X)) { out << attr2 << "enable_sgpr_grid_workgroup_count_x" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y)) { out << attr2 << "enable_sgpr_grid_workgroup_count_y" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z)) { out << attr2 << "enable_sgpr_grid_workgroup_count_z" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_ENABLE_ORDERED_APPEND_GDS)) { out << attr2 << "enable_ordered_append_gds" << eq << "TRUE" << std::endl; } uint32_t private_element_size = AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_PRIVATE_ELEMENT_SIZE); out << attr2 << "private_element_size" << eq << AmdElementByteSizeToString((amd_element_byte_size_t)private_element_size) << std::endl; if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_PTR64)) { out << attr2 << "is_ptr64" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK)) { out << attr2 << "is_dynamic_callstack" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DEBUG_ENABLED)) { out << attr2 << "is_debug_enabled" << eq << "TRUE" << std::endl; } if (AMD_HSA_BITS_GET(kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_XNACK_ENABLED)) { out << attr2 << "is_xnack_enabled" << eq << "TRUE" << std::endl; } } void PrintAmdControlDirectives(std::ostream& out, const amd_control_directives_t &control_directives) { if (!control_directives.enabled_control_directives) { return; } out << " CONTROL_DIRECTIVES:" << std::endl; if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_BREAK_EXCEPTIONS) { out << attr2 << "enable_break_exceptions" << eq << AmdExceptionKindToString(control_directives.enable_break_exceptions).c_str() << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_ENABLE_DETECT_EXCEPTIONS) { out << attr2 << "enable_detect_exceptions" << eq << AmdExceptionKindToString(control_directives.enable_detect_exceptions).c_str() << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_DYNAMIC_GROUP_SIZE) { out << attr2 << "max_dynamic_group_size" << eq << control_directives.max_dynamic_group_size << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_GRID_SIZE) { out << attr2 << "max_flat_grid_size" << eq << control_directives.max_flat_grid_size << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_MAX_FLAT_WORKGROUP_SIZE) { out << attr2 << "max_flat_workgroup_size" << eq << control_directives.max_flat_workgroup_size << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_DIM) { out << attr2 << "required_dim" << eq << (uint32_t)control_directives.required_dim << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_GRID_SIZE) { out << attr2 << "required_grid_size" << eq << "(" << control_directives.required_grid_size[0] << ", " << control_directives.required_grid_size[1] << ", " << control_directives.required_grid_size[2] << ")" << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRED_WORKGROUP_SIZE) { out << attr2 << "required_workgroup_size" << eq << "(" << control_directives.required_workgroup_size[0] << ", " << control_directives.required_workgroup_size[1] << ", " << control_directives.required_workgroup_size[2] << ")" << std::endl; } if (control_directives.enabled_control_directives & AMD_ENABLED_CONTROL_DIRECTIVE_REQUIRE_NO_PARTIAL_WORKGROUPS) { out << attr2 << "require_no_partial_workgroups" << eq << "TRUE" << std::endl; } } namespace code_options { std::ostream& space(std::ostream& out) { if (out.tellp()) { out << " "; } return out; } std::ostream& operator<<(std::ostream& out, const control_directive& d) { out << space << "-hsa_control_directive:" << d.name << "="; return out; } const char *BrigExceptionString(BrigExceptions32_t e) { switch (e) { case BRIG_EXCEPTIONS_INVALID_OPERATION: return "INVALID_OPERATION"; case BRIG_EXCEPTIONS_DIVIDE_BY_ZERO: return "DIVIDE_BY_ZERO"; case BRIG_EXCEPTIONS_OVERFLOW: return "OVERFLOW"; case BRIG_EXCEPTIONS_INEXACT: return "INEXACT"; default: assert(false); return ""; } } std::ostream& operator<<(std::ostream& out, const exceptions_mask& e) { bool first = true; for (BrigExceptions32_t be = BRIG_EXCEPTIONS_INVALID_OPERATION; be < BRIG_EXCEPTIONS_FIRST_USER_DEFINED; ++be) { if (e.mask & be) { if (first) { first = false; } else { out << ","; } out << BrigExceptionString(be); } } return out; } std::ostream& operator<<(std::ostream& out, const control_directives& cd) { const hsa_ext_control_directives_t& d = cd.d; uint64_t mask = d.control_directives_mask; if (!mask) { return out; } if (mask & BRIG_CONTROL_ENABLEBREAKEXCEPTIONS) { out << control_directive("ENABLEBREAKEXCEPTIONS") << exceptions_mask(d.break_exceptions_mask); } if (mask & BRIG_CONTROL_ENABLEDETECTEXCEPTIONS) { out << control_directive("ENABLEDETECTEXCEPTIONS") << exceptions_mask(d.detect_exceptions_mask); } if (mask & BRIG_CONTROL_MAXDYNAMICGROUPSIZE) { out << control_directive("MAXDYNAMICGROUPSIZE") << d.max_dynamic_group_size; } if (mask & BRIG_CONTROL_MAXFLATGRIDSIZE) { out << control_directive("MAXFLATGRIDSIZE") << d.max_flat_grid_size; } if (mask & BRIG_CONTROL_MAXFLATWORKGROUPSIZE) { out << control_directive("MAXFLATWORKGROUPSIZE") << d.max_flat_workgroup_size; } if (mask & BRIG_CONTROL_REQUIREDDIM) { out << control_directive("REQUIREDDIM") << d.required_dim; } if (mask & BRIG_CONTROL_REQUIREDGRIDSIZE) { out << control_directive("REQUIREDGRIDSIZE") << d.required_grid_size[0] << "," << d.required_grid_size[1] << "," << d.required_grid_size[2]; } if (mask & BRIG_CONTROL_REQUIREDWORKGROUPSIZE) { out << control_directive("REQUIREDWORKGROUPSIZE") << d.required_workgroup_size.x << "," << d.required_workgroup_size.y << "," << d.required_workgroup_size.z; } return out; } } const char* hsaerr2str(hsa_status_t status) { switch ((unsigned) status) { case HSA_STATUS_SUCCESS: return "HSA_STATUS_SUCCESS: The function has been executed successfully."; case HSA_STATUS_INFO_BREAK: return "HSA_STATUS_INFO_BREAK: A traversal over a list of " "elements has been interrupted by the application before " "completing."; case HSA_STATUS_ERROR: return "HSA_STATUS_ERROR: A generic error has occurred."; case HSA_STATUS_ERROR_INVALID_ARGUMENT: return "HSA_STATUS_ERROR_INVALID_ARGUMENT: One of the actual " "arguments does not meet a precondition stated in the " "documentation of the corresponding formal argument."; case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: return "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION: The requested " "queue creation is not valid."; case HSA_STATUS_ERROR_INVALID_ALLOCATION: return "HSA_STATUS_ERROR_INVALID_ALLOCATION: The requested " "allocation is not valid."; case HSA_STATUS_ERROR_INVALID_AGENT: return "HSA_STATUS_ERROR_INVALID_AGENT: The agent is invalid."; case HSA_STATUS_ERROR_INVALID_REGION: return "HSA_STATUS_ERROR_INVALID_REGION: The memory region is invalid."; case HSA_STATUS_ERROR_INVALID_SIGNAL: return "HSA_STATUS_ERROR_INVALID_SIGNAL: The signal is invalid."; case HSA_STATUS_ERROR_INVALID_QUEUE: return "HSA_STATUS_ERROR_INVALID_QUEUE: The queue is invalid."; case HSA_STATUS_ERROR_OUT_OF_RESOURCES: return "HSA_STATUS_ERROR_OUT_OF_RESOURCES: The runtime failed to " "allocate the necessary resources. This error may also " "occur when the core runtime library needs to spawn " "threads or create internal OS-specific events."; case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: return "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT: The AQL packet " "is malformed."; case HSA_STATUS_ERROR_RESOURCE_FREE: return "HSA_STATUS_ERROR_RESOURCE_FREE: An error has been " "detected while releasing a resource."; case HSA_STATUS_ERROR_NOT_INITIALIZED: return "HSA_STATUS_ERROR_NOT_INITIALIZED: An API other than " "hsa_init has been invoked while the reference count of " "the HSA runtime is zero."; case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: return "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW: The maximum " "reference count for the object has been reached."; case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: return "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS: The arguments passed to " "a functions are not compatible."; case HSA_STATUS_ERROR_INVALID_INDEX: return "The index is invalid."; case HSA_STATUS_ERROR_INVALID_ISA: return "The instruction set architecture is invalid."; case HSA_STATUS_ERROR_INVALID_CODE_OBJECT: return "The code object is invalid."; case HSA_STATUS_ERROR_INVALID_EXECUTABLE: return "The executable is invalid."; case HSA_STATUS_ERROR_FROZEN_EXECUTABLE: return "The executable is frozen."; case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME: return "There is no symbol with the given name."; case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED: return "The variable is already defined."; case HSA_STATUS_ERROR_VARIABLE_UNDEFINED: return "The variable is undefined."; case HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: return "HSA_EXT_STATUS_ERROR_INVALID_PROGRAM: Invalid program"; case HSA_EXT_STATUS_ERROR_INVALID_MODULE: return "HSA_EXT_STATUS_ERROR_INVALID_MODULE: Invalid module"; case HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: return "HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE: Incompatible module"; case HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: return "HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED: Module already " "included"; case HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: return "HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH: Symbol mismatch"; case HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: return "HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED: Finalization failed"; case HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: return "HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH: Directive mismatch"; default: return "Unknown HSA status"; } } bool ReadFileIntoBuffer(const std::string& filename, std::vector& buffer) { std::ifstream file(filename, std::ios::binary); if (!file) { return false; } file.seekg(0, std::ios::end); std::streamsize size = file.tellg(); file.seekg(0, std::ios::beg); buffer.resize((size_t) size); if (!file.read(buffer.data(), size)) { return false; } return true; } #ifndef _WIN32 #define _tempnam tempnam #define _close close #define _getpid getpid #define _open open #endif // _WIN32 int OpenTempFile(const char* prefix) { unsigned c = 0; std::string tname = prefix; tname += "_"; tname += std::to_string(_getpid()); tname += "_"; while (c++ < 20) { // Loop because several threads can generate same filename. #ifdef _WIN32 char dir[MAX_PATH+1]; if (!GetTempPath(sizeof(dir), dir)) { return -1; } #else // _WIN32 char *dir = NULL; #endif // _WIN32 char *name = _tempnam(dir, tname.c_str()); if (!name) { return -1; } #ifdef _WIN32 HANDLE h = CreateFile( name, GENERIC_READ | GENERIC_WRITE, 0, // No sharing NULL, CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_DELETE_ON_CLOSE, NULL); free(name); if (h == INVALID_HANDLE_VALUE) { continue; } return _open_osfhandle((intptr_t)h, 0); #else // _WIN32 int d = _open(name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); if (d < 0) { free(name); continue; } if (unlink(name) < 0) { free(name); _close(d); return -1; } free(name); return d; #endif // _WIN32 } return -1; } void CloseTempFile(int fd) { _close(fd); } const char * CommentTopCallBack(void *ctx, int type) { static const char* amd_kernel_code_t_begin = "amd_kernel_code_t begin"; static const char* amd_kernel_code_t_end = "amd_kernel_code_t end"; static const char* isa_begin = "isa begin"; switch(type) { case COMMENT_AMD_KERNEL_CODE_T_BEGIN: return amd_kernel_code_t_begin; case COMMENT_AMD_KERNEL_CODE_T_END: return amd_kernel_code_t_end; case COMMENT_KERNEL_ISA_BEGIN: return isa_begin; default: assert(false); return ""; } } const char * CommentRightCallBack(void *ctx, int type) { return nullptr; } uint32_t ParseInstructionOffset(const std::string& instruction) { // instruction format: opcode op1, op2 ... // offset: binopcode std::string::size_type n = instruction.find("//"); assert(n != std::string::npos); std::string comment = instruction.substr(n); n = comment.find(':'); assert(n != std::string::npos); comment.erase(n); assert(comment.size() > 3); comment.erase(0, 3); return strtoul(comment.c_str(), nullptr, 16); } bool IsNotSpace(char c) { return !isspace(static_cast(c)); } void ltrim(std::string &str) { str.erase(str.begin(), std::find_if(str.begin(), str.end(), IsNotSpace)); } std::string DumpFileName(const std::string& dir, const char* prefix, const char* ext, unsigned n, unsigned i) { std::ostringstream ss; if (!dir.empty()) { ss << dir << "/"; } ss << prefix << std::setfill('0') << std::setw(3) << n; if (i) { ss << "_" << i; } if (ext) { ss << "." << ext; } return ss.str(); } } }