1 //////////////////////////////////////////////////////////////////////////////// 2 // 3 // The University of Illinois/NCSA 4 // Open Source License (NCSA) 5 // 6 // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. 7 // 8 // Developed by: 9 // 10 // AMD Research and AMD HSA Software Development 11 // 12 // Advanced Micro Devices, Inc. 13 // 14 // www.amd.com 15 // 16 // Permission is hereby granted, free of charge, to any person obtaining a copy 17 // of this software and associated documentation files (the "Software"), to 18 // deal with the Software without restriction, including without limitation 19 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 // and/or sell copies of the Software, and to permit persons to whom the 21 // Software is furnished to do so, subject to the following conditions: 22 // 23 // - Redistributions of source code must retain the above copyright notice, 24 // this list of conditions and the following disclaimers. 25 // - Redistributions in binary form must reproduce the above copyright 26 // notice, this list of conditions and the following disclaimers in 27 // the documentation and/or other materials provided with the distribution. 28 // - Neither the names of Advanced Micro Devices, Inc, 29 // nor the names of its contributors may be used to endorse or promote 30 // products derived from this Software without specific prior written 31 // permission. 32 // 33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 // DEALINGS WITH THE SOFTWARE. 40 // 41 //////////////////////////////////////////////////////////////////////////////// 42 43 #include <assert.h> 44 #include <cstring> 45 #include <iomanip> 46 #include <algorithm> 47 #include "amd_hsa_code.hpp" 48 #include "amd_hsa_code_util.hpp" 49 #include <libelf.h> 50 #include "amd_hsa_elf.h" 51 #include <fstream> 52 #include <sstream> 53 #include <cstdlib> 54 #include <algorithm> 55 56 #ifdef SP3_STATIC_LIB 57 #include "sp3.h" 58 #endif // SP3_STATIC_LIB 59 60 #ifndef _WIN32 61 #define _alloca alloca 62 #endif 63 64 namespace amd { 65 namespace hsa { 66 namespace code { 67 68 using amd::elf::GetNoteString; 69 IsDeclaration() const70 bool Symbol::IsDeclaration() const 71 { 72 return elfsym->type() == STT_COMMON; 73 } 74 IsDefinition() const75 bool Symbol::IsDefinition() const 76 { 77 return !IsDeclaration(); 78 } 79 IsAgent() const80 bool Symbol::IsAgent() const 81 { 82 return elfsym->section()->flags() & SHF_AMDGPU_HSA_AGENT ? true : false; 83 } 84 Linkage() const85 hsa_symbol_linkage_t Symbol::Linkage() const 86 { 87 return elfsym->binding() == STB_GLOBAL ? HSA_SYMBOL_LINKAGE_PROGRAM : HSA_SYMBOL_LINKAGE_MODULE; 88 } 89 Allocation() const90 hsa_variable_allocation_t Symbol::Allocation() const 91 { 92 return IsAgent() ? HSA_VARIABLE_ALLOCATION_AGENT : HSA_VARIABLE_ALLOCATION_PROGRAM; 93 } 94 Segment() const95 hsa_variable_segment_t Symbol::Segment() const 96 { 97 return elfsym->section()->flags() & SHF_AMDGPU_HSA_READONLY ? HSA_VARIABLE_SEGMENT_READONLY : HSA_VARIABLE_SEGMENT_GLOBAL; 98 } 99 Size() const100 uint64_t Symbol::Size() const 101 { 102 return elfsym->size(); 103 } 104 Size32() const105 uint32_t Symbol::Size32() const 106 { 107 assert(elfsym->size() < UINT32_MAX); 108 return (uint32_t) Size(); 109 } 110 Alignment() const111 uint32_t Symbol::Alignment() const 112 { 113 assert(elfsym->section()->addralign() < UINT32_MAX); 114 return uint32_t(elfsym->section()->addralign()); 115 } 116 IsConst() const117 bool Symbol::IsConst() const 118 { 119 return elfsym->section()->flags() & SHF_WRITE ? true : false; 120 } 121 GetInfo(hsa_code_symbol_info_t attribute,void * value)122 hsa_status_t Symbol::GetInfo(hsa_code_symbol_info_t attribute, void *value) 123 { 124 assert(value); 125 126 switch (attribute) { 127 case HSA_CODE_SYMBOL_INFO_TYPE: { 128 *((hsa_symbol_kind_t*)value) = Kind(); 129 break; 130 } 131 case HSA_CODE_SYMBOL_INFO_NAME_LENGTH: { 132 *((uint32_t*)value) = GetSymbolName().size(); 133 break; 134 } 135 case HSA_CODE_SYMBOL_INFO_NAME: { 136 std::string SymbolName = GetSymbolName(); 137 memset(value, 0x0, SymbolName.size()); 138 memcpy(value, SymbolName.c_str(), SymbolName.size()); 139 break; 140 } 141 case HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH: { 142 *((uint32_t*)value) = GetModuleName().size(); 143 break; 144 } 145 case HSA_CODE_SYMBOL_INFO_MODULE_NAME: { 146 std::string ModuleName = GetModuleName(); 147 memset(value, 0x0, ModuleName.size()); 148 memcpy(value, ModuleName.c_str(), ModuleName.size()); 149 break; 150 } 151 case HSA_CODE_SYMBOL_INFO_LINKAGE: { 152 *((hsa_symbol_linkage_t*)value) = Linkage(); 153 break; 154 } 155 case HSA_CODE_SYMBOL_INFO_IS_DEFINITION: { 156 *((bool*)value) = IsDefinition(); 157 break; 158 } 159 default: { 160 return HSA_STATUS_ERROR_INVALID_ARGUMENT; 161 } 162 } 163 return HSA_STATUS_SUCCESS; 164 } 165 GetModuleName() const166 std::string Symbol::GetModuleName() const { 167 std::string FullName = Name(); 168 return FullName.rfind(":") != std::string::npos ? 169 FullName.substr(0, FullName.find(":")) : ""; 170 } 171 GetSymbolName() const172 std::string Symbol::GetSymbolName() const { 173 std::string FullName = Name(); 174 return FullName.rfind(":") != std::string::npos ? 175 FullName.substr(FullName.rfind(":") + 1) : FullName; 176 } 177 ToHandle(Symbol * sym)178 hsa_code_symbol_t Symbol::ToHandle(Symbol* sym) 179 { 180 hsa_code_symbol_t s; 181 s.handle = reinterpret_cast<uint64_t>(sym); 182 return s; 183 } 184 FromHandle(hsa_code_symbol_t s)185 Symbol* Symbol::FromHandle(hsa_code_symbol_t s) 186 { 187 return reinterpret_cast<Symbol*>(s.handle); 188 } 189 KernelSymbol(amd::elf::Symbol * elfsym_,const amd_kernel_code_t * akc)190 KernelSymbol::KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc) 191 : Symbol(elfsym_) 192 , kernarg_segment_size(0) 193 , kernarg_segment_alignment(0) 194 , group_segment_size(0) 195 , private_segment_size(0) 196 , is_dynamic_callstack(0) 197 { 198 if (akc) { 199 kernarg_segment_size = (uint32_t) akc->kernarg_segment_byte_size; 200 kernarg_segment_alignment = (uint32_t) (1 << akc->kernarg_segment_alignment); 201 group_segment_size = uint32_t(akc->workgroup_group_segment_byte_size); 202 private_segment_size = uint32_t(akc->workitem_private_segment_byte_size); 203 is_dynamic_callstack = 204 AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false; 205 } 206 } 207 GetInfo(hsa_code_symbol_info_t attribute,void * value)208 hsa_status_t KernelSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value) 209 { 210 assert(value); 211 switch (attribute) { 212 case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: { 213 *((uint32_t*)value) = kernarg_segment_size; 214 break; 215 } 216 case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT: { 217 *((uint32_t*)value) = kernarg_segment_alignment; 218 break; 219 } 220 case HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: { 221 *((uint32_t*)value) = group_segment_size; 222 break; 223 } 224 case HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: { 225 *((uint32_t*)value) = private_segment_size; 226 break; 227 } 228 case HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK: { 229 *((bool*)value) = is_dynamic_callstack; 230 break; 231 } 232 default: { 233 return Symbol::GetInfo(attribute, value); 234 } 235 } 236 return HSA_STATUS_SUCCESS; 237 } 238 GetInfo(hsa_code_symbol_info_t attribute,void * value)239 hsa_status_t VariableSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value) 240 { 241 assert(value); 242 switch (attribute) { 243 case HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION: { 244 *((hsa_variable_allocation_t*)value) = Allocation(); 245 break; 246 } 247 case HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT: { 248 *((hsa_variable_segment_t*)value) = Segment(); 249 break; 250 } 251 case HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT: { 252 *((uint32_t*)value) = Alignment(); 253 break; 254 } 255 case HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE: { 256 *((uint32_t*)value) = Size(); 257 break; 258 } 259 case HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST: { 260 *((bool*)value) = IsConst(); 261 break; 262 } 263 default: { 264 return Symbol::GetInfo(attribute, value); 265 } 266 } 267 return HSA_STATUS_SUCCESS; 268 } 269 AmdHsaCode(bool combineDataSegments_)270 AmdHsaCode::AmdHsaCode(bool combineDataSegments_) 271 : img(nullptr), 272 combineDataSegments(combineDataSegments_), 273 hsatext(0), imageInit(0), samplerInit(0), 274 debugInfo(0), debugLine(0), debugAbbrev(0) 275 { 276 for (unsigned i = 0; i < AMDGPU_HSA_SEGMENT_LAST; ++i) { 277 for (unsigned j = 0; j < 2; ++j) { 278 hsaSegments[i][j] = 0; 279 } 280 } 281 for (unsigned i = 0; i < AMDGPU_HSA_SECTION_LAST; ++i) { 282 hsaSections[i] = 0; 283 } 284 } 285 ~AmdHsaCode()286 AmdHsaCode::~AmdHsaCode() 287 { 288 for (Symbol* sym : symbols) { delete sym; } 289 } 290 PullElf()291 bool AmdHsaCode::PullElf() 292 { 293 uint32_t majorVersion, minorVersion; 294 if (!GetNoteCodeObjectVersion(&majorVersion, &minorVersion)) { 295 return false; 296 } 297 if (majorVersion >= 2) { 298 return PullElfV2(); 299 } else { 300 return PullElfV1(); 301 } 302 } 303 PullElfV1()304 bool AmdHsaCode::PullElfV1() 305 { 306 for (size_t i = 0; i < img->segmentCount(); ++i) { 307 Segment* s = img->segment(i); 308 if (s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM || 309 s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT || 310 s->type() == PT_AMDGPU_HSA_LOAD_READONLY_AGENT || 311 s->type() == PT_AMDGPU_HSA_LOAD_CODE_AGENT) { 312 dataSegments.push_back(s); 313 } 314 } 315 for (size_t i = 0; i < img->sectionCount(); ++i) { 316 Section* sec = img->section(i); 317 if (!sec) { continue; } 318 if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) && 319 (sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_CODE))) { 320 dataSections.push_back(sec); 321 } else if (sec->type() == SHT_RELA) { 322 relocationSections.push_back(sec->asRelocationSection()); 323 } 324 if (sec->Name() == ".hsatext") { 325 hsatext = sec; 326 } 327 } 328 for (size_t i = 0; i < img->symtab()->symbolCount(); ++i) { 329 amd::elf::Symbol* elfsym = img->symtab()->symbol(i); 330 Symbol* sym = 0; 331 switch (elfsym->type()) { 332 case STT_AMDGPU_HSA_KERNEL: { 333 amd::elf::Section* sec = elfsym->section(); 334 amd_kernel_code_t akc; 335 if (!sec) { 336 out << "Failed to find section for symbol " << elfsym->name() << std::endl; 337 return false; 338 } 339 if (!(sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_CODE | SHF_EXECINSTR))) { 340 out << "Invalid code section for symbol " << elfsym->name() << std::endl; 341 return false; 342 } 343 if (!sec->getData(elfsym->value(), &akc, sizeof(amd_kernel_code_t))) { 344 out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl; 345 return false; 346 } 347 sym = new KernelSymbol(elfsym, &akc); 348 break; 349 } 350 case STT_OBJECT: 351 case STT_COMMON: 352 sym = new VariableSymbol(elfsym); 353 break; 354 default: 355 break; // Skip unknown symbols. 356 } 357 if (sym) { symbols.push_back(sym); } 358 } 359 360 return true; 361 } 362 LoadFromFile(const std::string & filename)363 bool AmdHsaCode::LoadFromFile(const std::string& filename) 364 { 365 if (!img) { img.reset(amd::elf::NewElf64Image()); } 366 if (!img->loadFromFile(filename)) { return ElfImageError(); } 367 if (!PullElf()) { return ElfImageError(); } 368 return true; 369 } 370 SaveToFile(const std::string & filename)371 bool AmdHsaCode::SaveToFile(const std::string& filename) 372 { 373 return img->saveToFile(filename) || ElfImageError(); 374 } 375 WriteToBuffer(void * buffer)376 bool AmdHsaCode::WriteToBuffer(void* buffer) 377 { 378 return img->copyToBuffer(buffer, ElfSize()) || ElfImageError(); 379 } 380 381 InitFromBuffer(const void * buffer,size_t size)382 bool AmdHsaCode::InitFromBuffer(const void* buffer, size_t size) 383 { 384 if (!img) { img.reset(amd::elf::NewElf64Image()); } 385 if (!img->initFromBuffer(buffer, size)) { return ElfImageError(); } 386 if (!PullElf()) { return ElfImageError(); } 387 return true; 388 } 389 InitAsBuffer(const void * buffer,size_t size)390 bool AmdHsaCode::InitAsBuffer(const void* buffer, size_t size) 391 { 392 if (!img) { img.reset(amd::elf::NewElf64Image()); } 393 if (!img->initAsBuffer(buffer, size)) { return ElfImageError(); } 394 if (!PullElf()) { return ElfImageError(); } 395 return true; 396 } 397 InitAsHandle(hsa_code_object_t code_object)398 bool AmdHsaCode::InitAsHandle(hsa_code_object_t code_object) 399 { 400 void *elfmemrd = reinterpret_cast<void*>(code_object.handle); 401 if (!elfmemrd) { return false; } 402 return InitAsBuffer(elfmemrd, 0); 403 } 404 InitNew(bool xnack)405 bool AmdHsaCode::InitNew(bool xnack) 406 { 407 if (!img) { 408 img.reset(amd::elf::NewElf64Image()); 409 uint32_t flags = 0; 410 if (xnack) { flags |= EF_AMDGPU_XNACK; } 411 return img->initNew(EM_AMDGPU, ET_EXEC, ELFOSABI_AMDGPU_HSA, ELFABIVERSION_AMDGPU_HSA, flags) || 412 ElfImageError(); // FIXME: elfutils libelf does not allow program headers in ET_REL file type, so change it later in finalizer. 413 } 414 return false; 415 } 416 Freeze()417 bool AmdHsaCode::Freeze() 418 { 419 return img->Freeze() || ElfImageError(); 420 } 421 GetHandle()422 hsa_code_object_t AmdHsaCode::GetHandle() 423 { 424 hsa_code_object_t code_object; 425 code_object.handle = reinterpret_cast<uint64_t>(img->data()); 426 return code_object; 427 } 428 ElfData()429 const char* AmdHsaCode::ElfData() 430 { 431 return img->data(); 432 } 433 ElfSize()434 uint64_t AmdHsaCode::ElfSize() 435 { 436 return img->size(); 437 } 438 Validate()439 bool AmdHsaCode::Validate() 440 { 441 if (!img->Validate()) { return ElfImageError(); } 442 if (img->Machine() != EM_AMDGPU) { 443 out << "ELF error: Invalid machine" << std::endl; 444 return false; 445 } 446 return true; 447 } 448 AddAmdNote(uint32_t type,const void * desc,uint32_t desc_size)449 void AmdHsaCode::AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size) 450 { 451 img->note()->addNote("AMD", type, desc, desc_size); 452 } 453 AddNoteCodeObjectVersion(uint32_t major,uint32_t minor)454 void AmdHsaCode::AddNoteCodeObjectVersion(uint32_t major, uint32_t minor) 455 { 456 amdgpu_hsa_note_code_object_version_t desc; 457 desc.major_version = major; 458 desc.minor_version = minor; 459 AddAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc, sizeof(desc)); 460 } 461 GetNoteCodeObjectVersion(uint32_t * major,uint32_t * minor)462 bool AmdHsaCode::GetNoteCodeObjectVersion(uint32_t* major, uint32_t* minor) 463 { 464 amdgpu_hsa_note_code_object_version_t* desc; 465 if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { return false; } 466 *major = desc->major_version; 467 *minor = desc->minor_version; 468 return true; 469 } 470 GetNoteCodeObjectVersion(std::string & version)471 bool AmdHsaCode::GetNoteCodeObjectVersion(std::string& version) 472 { 473 amdgpu_hsa_note_code_object_version_t* desc; 474 if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { return false; } 475 version.clear(); 476 version += std::to_string(desc->major_version); 477 version += "."; 478 version += std::to_string(desc->minor_version); 479 return true; 480 } 481 AddNoteHsail(uint32_t hsail_major,uint32_t hsail_minor,hsa_profile_t profile,hsa_machine_model_t machine_model,hsa_default_float_rounding_mode_t rounding_mode)482 void AmdHsaCode::AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode) 483 { 484 amdgpu_hsa_note_hsail_t desc; 485 memset(&desc, 0, sizeof(desc)); 486 desc.hsail_major_version = hsail_major; 487 desc.hsail_minor_version = hsail_minor; 488 desc.profile = uint8_t(profile); 489 desc.machine_model = uint8_t(machine_model); 490 desc.default_float_round = uint8_t(rounding_mode); 491 AddAmdNote(NT_AMDGPU_HSA_HSAIL, &desc, sizeof(desc)); 492 } 493 GetNoteHsail(uint32_t * hsail_major,uint32_t * hsail_minor,hsa_profile_t * profile,hsa_machine_model_t * machine_model,hsa_default_float_rounding_mode_t * default_float_round)494 bool AmdHsaCode::GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round) 495 { 496 amdgpu_hsa_note_hsail_t *desc; 497 if (!GetAmdNote(NT_AMDGPU_HSA_HSAIL, &desc)) { return false; } 498 *hsail_major = desc->hsail_major_version; 499 *hsail_minor = desc->hsail_minor_version; 500 *profile = (hsa_profile_t) desc->profile; 501 *machine_model = (hsa_machine_model_t) desc->machine_model; 502 *default_float_round = (hsa_default_float_rounding_mode_t) desc->default_float_round; 503 return true; 504 } 505 AddNoteIsa(const std::string & vendor_name,const std::string & architecture_name,uint32_t major,uint32_t minor,uint32_t stepping)506 void AmdHsaCode::AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping) 507 { 508 size_t size = sizeof(amdgpu_hsa_note_producer_t) + vendor_name.length() + architecture_name.length() + 1; 509 amdgpu_hsa_note_isa_t* desc = (amdgpu_hsa_note_isa_t*) _alloca(size); 510 memset(desc, 0, size); 511 desc->vendor_name_size = vendor_name.length()+1; 512 desc->architecture_name_size = architecture_name.length()+1; 513 desc->major = major; 514 desc->minor = minor; 515 desc->stepping = stepping; 516 memcpy(desc->vendor_and_architecture_name, vendor_name.c_str(), vendor_name.length() + 1); 517 memcpy(desc->vendor_and_architecture_name + desc->vendor_name_size, architecture_name.c_str(), architecture_name.length() + 1); 518 AddAmdNote(NT_AMDGPU_HSA_ISA, desc, size); 519 } 520 GetNoteIsa(std::string & vendor_name,std::string & architecture_name,uint32_t * major_version,uint32_t * minor_version,uint32_t * stepping)521 bool AmdHsaCode::GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping) 522 { 523 amdgpu_hsa_note_isa_t *desc; 524 if (!GetAmdNote(NT_AMDGPU_HSA_ISA, &desc)) { return false; } 525 vendor_name = GetNoteString(desc->vendor_name_size, desc->vendor_and_architecture_name); 526 architecture_name = GetNoteString(desc->architecture_name_size, desc->vendor_and_architecture_name + vendor_name.length() + 1); 527 *major_version = desc->major; 528 *minor_version = desc->minor; 529 *stepping = desc->stepping; 530 return true; 531 } 532 GetNoteIsa(std::string & isaName)533 bool AmdHsaCode::GetNoteIsa(std::string& isaName) 534 { 535 std::string vendor_name, architecture_name; 536 uint32_t major_version, minor_version, stepping; 537 if (!GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { return false; } 538 isaName.clear(); 539 isaName += vendor_name; 540 isaName += ":"; 541 isaName += architecture_name; 542 isaName += ":"; 543 isaName += std::to_string(major_version); 544 isaName += ":"; 545 isaName += std::to_string(minor_version); 546 isaName += ":"; 547 isaName += std::to_string(stepping); 548 return true; 549 } 550 AddNoteProducer(uint32_t major,uint32_t minor,const std::string & producer)551 void AmdHsaCode::AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer) 552 { 553 size_t size = sizeof(amdgpu_hsa_note_producer_t) + producer.length(); 554 amdgpu_hsa_note_producer_t* desc = (amdgpu_hsa_note_producer_t*) _alloca(size); 555 memset(desc, 0, size); 556 desc->producer_name_size = producer.length(); 557 desc->producer_major_version = major; 558 desc->producer_minor_version = minor; 559 memcpy(desc->producer_name, producer.c_str(), producer.length() + 1); 560 AddAmdNote(NT_AMDGPU_HSA_PRODUCER, desc, size); 561 } 562 GetNoteProducer(uint32_t * major,uint32_t * minor,std::string & producer_name)563 bool AmdHsaCode::GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name) 564 { 565 amdgpu_hsa_note_producer_t* desc; 566 if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER, &desc)) { return false; } 567 *major = desc->producer_major_version; 568 *minor = desc->producer_minor_version; 569 producer_name = GetNoteString(desc->producer_name_size, desc->producer_name); 570 return true; 571 } 572 AddNoteProducerOptions(const std::string & options)573 void AmdHsaCode::AddNoteProducerOptions(const std::string& options) 574 { 575 size_t size = sizeof(amdgpu_hsa_note_producer_options_t) + options.length(); 576 amdgpu_hsa_note_producer_options_t *desc = (amdgpu_hsa_note_producer_options_t*) _alloca(size); 577 desc->producer_options_size = options.length(); 578 memcpy(desc->producer_options, options.c_str(), options.length() + 1); 579 AddAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, desc, size); 580 } 581 AddNoteProducerOptions(int32_t call_convention,const hsa_ext_control_directives_t & user_directives,const std::string & user_options)582 void AmdHsaCode::AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options) 583 { 584 using namespace code_options; 585 std::ostringstream ss; 586 ss << 587 space << "-hsa_call_convention=" << call_convention << 588 control_directives(user_directives); 589 if (!user_options.empty()) { 590 ss << space << user_options; 591 } 592 593 AddNoteProducerOptions(ss.str()); 594 } 595 GetNoteProducerOptions(std::string & options)596 bool AmdHsaCode::GetNoteProducerOptions(std::string& options) 597 { 598 amdgpu_hsa_note_producer_options_t* desc; 599 if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, &desc)) { return false; } 600 options = GetNoteString(desc->producer_options_size, desc->producer_options); 601 return true; 602 } 603 GetInfo(hsa_code_object_info_t attribute,void * value)604 hsa_status_t AmdHsaCode::GetInfo(hsa_code_object_info_t attribute, void *value) 605 { 606 assert(value); 607 switch (attribute) { 608 case HSA_CODE_OBJECT_INFO_VERSION: { 609 std::string version; 610 if (!GetNoteCodeObjectVersion(version)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } 611 char *svalue = (char*)value; 612 memset(svalue, 0x0, 64); 613 memcpy(svalue, version.c_str(), (std::min)(size_t(63), version.length())); 614 break; 615 } 616 case HSA_CODE_OBJECT_INFO_ISA: { 617 // TODO: Currently returns string representation instead of hsa_isa_t 618 // which is unavailable here. 619 std::string isa; 620 if (!GetNoteIsa(isa)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } 621 char *svalue = (char*)value; 622 memset(svalue, 0x0, 64); 623 memcpy(svalue, isa.c_str(), (std::min)(size_t(63), isa.length())); 624 break; 625 } 626 case HSA_CODE_OBJECT_INFO_MACHINE_MODEL: 627 case HSA_CODE_OBJECT_INFO_PROFILE: 628 case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: { 629 uint32_t hsail_major, hsail_minor; 630 hsa_profile_t profile; 631 hsa_machine_model_t machine_model; 632 hsa_default_float_rounding_mode_t default_float_round; 633 if (!GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &default_float_round)) { 634 return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; 635 } 636 switch (attribute) { 637 case HSA_CODE_OBJECT_INFO_MACHINE_MODEL: 638 *((hsa_machine_model_t*)value) = machine_model; break; 639 case HSA_CODE_OBJECT_INFO_PROFILE: 640 *((hsa_profile_t*)value) = profile; break; 641 case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: 642 *((hsa_default_float_rounding_mode_t*)value) = default_float_round; break; 643 default: break; 644 } 645 break; 646 } 647 default: 648 assert(false); 649 return HSA_STATUS_ERROR_INVALID_ARGUMENT; 650 } 651 return HSA_STATUS_SUCCESS; 652 } 653 GetSymbol(const char * module_name,const char * symbol_name,hsa_code_symbol_t * s)654 hsa_status_t AmdHsaCode::GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *s) 655 { 656 std::string mname = MangleSymbolName(module_name ? module_name : "", symbol_name); 657 for (Symbol* sym : symbols) { 658 if (sym->Name() == mname) { 659 *s = Symbol::ToHandle(sym); 660 return HSA_STATUS_SUCCESS; 661 } 662 } 663 return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME; 664 } 665 IterateSymbols(hsa_code_object_t code_object,hsa_status_t (* callback)(hsa_code_object_t code_object,hsa_code_symbol_t symbol,void * data),void * data)666 hsa_status_t AmdHsaCode::IterateSymbols(hsa_code_object_t code_object, 667 hsa_status_t (*callback)( 668 hsa_code_object_t code_object, 669 hsa_code_symbol_t symbol, 670 void* data), 671 void* data) 672 { 673 for (Symbol* sym : symbols) { 674 hsa_code_symbol_t s = Symbol::ToHandle(sym); 675 hsa_status_t status = callback(code_object, s, data); 676 if (status != HSA_STATUS_SUCCESS) { return status; } 677 } 678 return HSA_STATUS_SUCCESS; 679 } 680 ImageInitSection()681 Section* AmdHsaCode::ImageInitSection() 682 { 683 if (!imageInit) { 684 imageInit = img->addSection( 685 ".hsaimage_imageinit", 686 SHT_PROGBITS, 687 SHF_MERGE, 688 sizeof(amdgpu_hsa_image_descriptor_t)); 689 } 690 return imageInit; 691 } 692 AddImageInitializer(Symbol * image,uint64_t destOffset,const amdgpu_hsa_image_descriptor_t & desc)693 void AmdHsaCode::AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& desc) 694 { 695 uint64_t offset = ImageInitSection()->addData(&desc, sizeof(desc), 8); 696 amd::elf::Symbol* imageInit = 697 img->symtab()->addSymbol(ImageInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL); 698 image->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_INIT_IMAGE, imageInit, image->elfSym()->value() + destOffset, 0); 699 } 700 AddImageInitializer(Symbol * image,uint64_t destOffset,amdgpu_hsa_metadata_kind16_t kind,amdgpu_hsa_image_geometry8_t geometry,amdgpu_hsa_image_channel_order8_t channel_order,amdgpu_hsa_image_channel_type8_t channel_type,uint64_t width,uint64_t height,uint64_t depth,uint64_t array)701 void AmdHsaCode::AddImageInitializer( 702 Symbol* image, uint64_t destOffset, 703 amdgpu_hsa_metadata_kind16_t kind, 704 amdgpu_hsa_image_geometry8_t geometry, 705 amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type, 706 uint64_t width, uint64_t height, uint64_t depth, uint64_t array) 707 { 708 amdgpu_hsa_image_descriptor_t desc; 709 desc.size = (uint16_t) sizeof(amdgpu_hsa_image_descriptor_t); 710 desc.kind = kind; 711 desc.geometry = geometry; 712 desc.channel_order = channel_order; 713 desc.channel_type = channel_type; 714 desc.width = width; 715 desc.height = height; 716 desc.depth = depth; 717 desc.array = array; 718 AddImageInitializer(image, destOffset, desc); 719 } 720 721 SamplerInitSection()722 Section* AmdHsaCode::SamplerInitSection() 723 { 724 if (!samplerInit) { 725 samplerInit = img->addSection( 726 ".hsaimage_samplerinit", 727 SHT_PROGBITS, 728 SHF_MERGE, 729 sizeof(amdgpu_hsa_sampler_descriptor_t)); 730 } 731 return samplerInit; 732 } 733 AddSamplerInitializer(Symbol * sampler,uint64_t destOffset,const amdgpu_hsa_sampler_descriptor_t & desc)734 void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& desc) 735 { 736 uint64_t offset = SamplerInitSection()->addData(&desc, sizeof(desc), 8); 737 amd::elf::Symbol* samplerInit = 738 img->symtab()->addSymbol(SamplerInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL); 739 sampler->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_INIT_SAMPLER, samplerInit, sampler->elfSym()->value() + destOffset, 0); 740 } 741 AddSamplerInitializer(Symbol * sampler,uint64_t destOffset,amdgpu_hsa_sampler_coord8_t coord,amdgpu_hsa_sampler_filter8_t filter,amdgpu_hsa_sampler_addressing8_t addressing)742 void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, 743 amdgpu_hsa_sampler_coord8_t coord, 744 amdgpu_hsa_sampler_filter8_t filter, 745 amdgpu_hsa_sampler_addressing8_t addressing) 746 { 747 amdgpu_hsa_sampler_descriptor_t desc; 748 desc.size = (uint16_t) sizeof(amdgpu_hsa_sampler_descriptor_t); 749 desc.kind = AMDGPU_HSA_METADATA_KIND_INIT_SAMP; 750 desc.coord = coord; 751 desc.filter = filter; 752 desc.addressing = addressing; 753 AddSamplerInitializer(sampler, destOffset, desc); 754 } 755 AddInitVarWithAddress(bool large,Symbol * dest,uint64_t destOffset,Symbol * addrOf,uint64_t addrAddend)756 void AmdHsaCode::AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend) 757 { 758 uint32_t rtype = large ? R_AMDGPU_64 : R_AMDGPU_32_LOW; 759 dest->elfSym()->section()->relocationSection()->addRelocation(rtype, addrOf->elfSym(), dest->elfSym()->value() + destOffset, addrAddend); 760 } 761 NextKernelCodeOffset() const762 uint64_t AmdHsaCode::NextKernelCodeOffset() const 763 { 764 return HsaText()->nextDataOffset(256); 765 } 766 AddKernelCode(KernelSymbol * sym,const void * code,size_t size)767 bool AmdHsaCode::AddKernelCode(KernelSymbol* sym, const void* code, size_t size) 768 { 769 assert(nullptr != sym); 770 771 uint64_t offset = HsaText()->addData(code, size, 256); 772 sym->setValue(offset); 773 sym->setSize(size); 774 return true; 775 } 776 AddEmptySection()777 Section* AmdHsaCode::AddEmptySection() 778 { 779 dataSections.push_back(nullptr); return nullptr; 780 } 781 AddCodeSection(Segment * segment)782 Section* AmdHsaCode::AddCodeSection(Segment* segment) 783 { 784 if (nullptr == img) { return nullptr; } 785 Section *sec = img->addSection( 786 ".hsatext", 787 SHT_PROGBITS, 788 SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_AMDGPU_HSA_CODE | SHF_AMDGPU_HSA_AGENT, 789 0, 790 segment); 791 dataSections.push_back(sec); 792 hsatext = sec; 793 return sec; 794 } 795 AddDataSection(const std::string & name,uint32_t type,uint64_t flags,Segment * segment)796 Section* AmdHsaCode::AddDataSection(const std::string &name, 797 uint32_t type, 798 uint64_t flags, 799 Segment* segment) 800 { 801 if (nullptr == img) { return nullptr; } 802 Section *sec = img->addSection(name, type, flags, 0, segment); 803 dataSections.push_back(sec); 804 return sec; 805 } 806 InitHsaSectionSegment(amdgpu_hsa_elf_section_t section,bool combineSegments)807 void AmdHsaCode::InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments) 808 { 809 InitHsaSegment(AmdHsaElfSectionSegment(section), combineSegments || !IsAmdHsaElfSectionROData(section)); 810 } 811 HsaDataSection(amdgpu_hsa_elf_section_t sec,bool combineSegments)812 Section* AmdHsaCode::HsaDataSection(amdgpu_hsa_elf_section_t sec, bool combineSegments) 813 { 814 if (!hsaSections[sec]) { 815 bool writable = combineSegments || !IsAmdHsaElfSectionROData(sec); 816 Segment* segment = HsaSegment(AmdHsaElfSectionSegment(sec), writable); 817 assert(segment); // Expected to be init the segment via InitHsaSegment. 818 Section* section; 819 switch (sec) { 820 case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM: 821 section = AddDataSection(".hsarodata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL, segment); break; 822 case AMDGPU_HSA_RODATA_GLOBAL_AGENT: 823 section = AddDataSection(".hsarodata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break; 824 case AMDGPU_HSA_RODATA_READONLY_AGENT: 825 section = AddDataSection(".hsarodata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break; 826 case AMDGPU_HSA_DATA_GLOBAL_PROGRAM: 827 section = AddDataSection(".hsadata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break; 828 case AMDGPU_HSA_DATA_GLOBAL_AGENT: 829 section = AddDataSection(".hsadata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break; 830 case AMDGPU_HSA_DATA_READONLY_AGENT: 831 section = AddDataSection(".hsadata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break; 832 case AMDGPU_HSA_BSS_GLOBAL_PROGRAM: 833 section = AddDataSection(".hsabss_global_program", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break; 834 case AMDGPU_HSA_BSS_GLOBAL_AGENT: 835 section = AddDataSection(".hsabss_global_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break; 836 case AMDGPU_HSA_BSS_READONLY_AGENT: 837 section = AddDataSection(".hsabss_readonly_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break; 838 default: 839 assert(false); return 0; 840 } 841 hsaSections[sec] = section; 842 } 843 return hsaSections[sec]; 844 } 845 InitHsaSegment(amdgpu_hsa_elf_segment_t segment,bool writable)846 void AmdHsaCode::InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable) 847 { 848 if (!hsaSegments[segment][writable]) { 849 uint32_t flags = PF_R; 850 if (writable) { flags |= PF_W; } 851 if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) { flags |= PF_X; } 852 uint32_t type = PT_LOOS + segment; 853 assert(segment < AMDGPU_HSA_SEGMENT_LAST); 854 hsaSegments[segment][writable] = img->initSegment(type, flags); 855 } 856 } 857 AddHsaSegments()858 bool AmdHsaCode::AddHsaSegments() 859 { 860 if (!img->addSegments()) { return ElfImageError(); } 861 return true; 862 } 863 HsaSegment(amdgpu_hsa_elf_segment_t segment,bool writable)864 Segment* AmdHsaCode::HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable) 865 { 866 return hsaSegments[segment][writable]; 867 } 868 AddExecutableSymbol(const std::string & name,unsigned char type,unsigned char binding,unsigned char other,Section * section)869 Symbol* AmdHsaCode::AddExecutableSymbol(const std::string &name, 870 unsigned char type, 871 unsigned char binding, 872 unsigned char other, 873 Section *section) 874 { 875 if (nullptr == img) { return nullptr; } 876 if (!section) { section = HsaText(); } 877 symbols.push_back(new KernelSymbol(img->symtab()->addSymbol(section, name, 0, 0, type, binding, other), nullptr)); 878 return symbols.back(); 879 } 880 AddVariableSymbol(const std::string & name,unsigned char type,unsigned char binding,unsigned char other,Section * section,uint64_t value,uint64_t size)881 Symbol* AmdHsaCode::AddVariableSymbol(const std::string &name, 882 unsigned char type, 883 unsigned char binding, 884 unsigned char other, 885 Section *section, 886 uint64_t value, 887 uint64_t size) 888 { 889 if (nullptr == img) { return nullptr; } 890 symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(section, name, value, size, type, binding, other))); 891 return symbols.back(); 892 } 893 AddSectionSymbols()894 void AmdHsaCode::AddSectionSymbols() 895 { 896 if (nullptr == img) { return; } 897 for (size_t i = 0; i < dataSections.size(); ++i) { 898 if (dataSections[i] && dataSections[i]->flags() & SHF_ALLOC) { 899 symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(dataSections[i], "__hsa_section" + dataSections[i]->Name(), 0, 0, STT_SECTION, STB_LOCAL))); 900 } 901 } 902 } 903 GetSymbolByElfIndex(size_t index)904 Symbol* AmdHsaCode::GetSymbolByElfIndex(size_t index) 905 { 906 for (auto &s : symbols) { 907 if (s && index == s->Index()) { 908 return s; 909 } 910 } 911 return nullptr; 912 } 913 FindSymbol(const std::string & n)914 Symbol* AmdHsaCode::FindSymbol(const std::string &n) 915 { 916 for (auto &s : symbols) { 917 if (s && n == s->Name()) { 918 return s; 919 } 920 } 921 return nullptr; 922 } 923 AddData(amdgpu_hsa_elf_section_t s,const void * data,size_t size)924 void AmdHsaCode::AddData(amdgpu_hsa_elf_section_t s, const void* data, size_t size) 925 { 926 // getDataSection(s)->addData(data, size); 927 } 928 DebugInfo()929 Section* AmdHsaCode::DebugInfo() 930 { 931 if (!debugInfo) { 932 debugInfo = img->addSection(".debug_info", SHT_PROGBITS); 933 } 934 return debugInfo; 935 } 936 DebugLine()937 Section* AmdHsaCode::DebugLine() 938 { 939 if (!debugLine) { 940 debugLine = img->addSection(".debug_line", SHT_PROGBITS); 941 } 942 return debugLine; 943 } 944 DebugAbbrev()945 Section* AmdHsaCode::DebugAbbrev() 946 { 947 if (!debugAbbrev) { 948 debugAbbrev = img->addSection(".debug_abbrev", SHT_PROGBITS); 949 } 950 return debugAbbrev; 951 } 952 AddHsaHlDebug(const std::string & name,const void * data,size_t size)953 Section* AmdHsaCode::AddHsaHlDebug(const std::string& name, const void* data, size_t size) 954 { 955 Section* section = img->addSection(name, SHT_PROGBITS, SHF_OS_NONCONFORMING); 956 section->addData(data, size, 1); 957 return section; 958 } 959 PrintToFile(const std::string & filename)960 bool AmdHsaCode::PrintToFile(const std::string& filename) 961 { 962 std::ofstream out(filename); 963 if (out.fail()) { return false; } 964 Print(out); 965 return out.fail(); 966 } 967 Print(std::ostream & out)968 void AmdHsaCode::Print(std::ostream& out) 969 { 970 PrintNotes(out); 971 out << std::endl; 972 PrintSegments(out); 973 out << std::endl; 974 PrintSections(out); 975 out << std::endl; 976 PrintSymbols(out); 977 out << std::endl; 978 PrintMachineCode(out); 979 out << std::endl; 980 out << "AMD HSA Code Object End" << std::endl; 981 } 982 PrintNotes(std::ostream & out)983 void AmdHsaCode::PrintNotes(std::ostream& out) 984 { 985 { 986 uint32_t major_version, minor_version; 987 if (GetNoteCodeObjectVersion(&major_version, &minor_version)) { 988 out << "AMD HSA Code Object" << std::endl 989 << " Version " << major_version << "." << minor_version << std::endl; 990 } 991 } 992 { 993 uint32_t hsail_major, hsail_minor; 994 hsa_profile_t profile; 995 hsa_machine_model_t machine_model; 996 hsa_default_float_rounding_mode_t rounding_mode; 997 if (GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &rounding_mode)) { 998 out << "HSAIL " << std::endl 999 << " Version: " << hsail_major << "." << hsail_minor << std::endl 1000 << " Profile: " << HsaProfileToString(profile) 1001 << " Machine model: " << HsaMachineModelToString(machine_model) 1002 << " Default float rounding: " << HsaFloatRoundingModeToString(rounding_mode) << std::endl; 1003 } 1004 } 1005 { 1006 std::string vendor_name, architecture_name; 1007 uint32_t major_version, minor_version, stepping; 1008 if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { 1009 out << "ISA" << std::endl 1010 << " Vendor " << vendor_name 1011 << " Arch " << architecture_name 1012 << " Version " << major_version << ":" << minor_version << ":" << stepping << std::endl; 1013 } 1014 } 1015 { 1016 std::string producer_name, producer_options; 1017 uint32_t major, minor; 1018 if (GetNoteProducer(&major, &minor, producer_name)) { 1019 out << "Producer '" << producer_name << "' " << "Version " << major << ":" << minor << std::endl; 1020 } 1021 } 1022 { 1023 std::string producer_options; 1024 if (GetNoteProducerOptions(producer_options)) { 1025 out << "Producer options" << std::endl 1026 << " '" << producer_options << "'" << std::endl; 1027 } 1028 } 1029 } 1030 PrintSegments(std::ostream & out)1031 void AmdHsaCode::PrintSegments(std::ostream& out) 1032 { 1033 out << "Segments (total " << DataSegmentCount() << "):" << std::endl; 1034 for (size_t i = 0; i < DataSegmentCount(); ++i) { 1035 PrintSegment(out, DataSegment(i)); 1036 } 1037 } 1038 PrintSections(std::ostream & out)1039 void AmdHsaCode::PrintSections(std::ostream& out) 1040 { 1041 out << "Data Sections (total " << DataSectionCount() << "):" << std::endl; 1042 for (size_t i = 0; i < DataSectionCount(); ++i) { 1043 PrintSection(out, DataSection(i)); 1044 } 1045 out << std::endl; 1046 out << "Relocation Sections (total " << RelocationSectionCount() << "):" << std::endl; 1047 for (size_t i = 0; i < RelocationSectionCount(); ++i) { 1048 PrintSection(out, GetRelocationSection(i)); 1049 } 1050 } 1051 PrintSymbols(std::ostream & out)1052 void AmdHsaCode::PrintSymbols(std::ostream& out) 1053 { 1054 out << "Symbols (total " << SymbolCount() << "):" << std::endl; 1055 for (size_t i = 0; i < SymbolCount(); ++i) { 1056 PrintSymbol(out, GetSymbol(i)); 1057 } 1058 } 1059 PrintMachineCode(std::ostream & out)1060 void AmdHsaCode::PrintMachineCode(std::ostream& out) 1061 { 1062 if (HasHsaText()) { 1063 out << std::dec; 1064 for (size_t i = 0; i < SymbolCount(); ++i) { 1065 Symbol* sym = GetSymbol(i); 1066 if (sym->IsKernelSymbol() && sym->IsDefinition()) { 1067 amd_kernel_code_t kernel_code; 1068 HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); 1069 out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec; 1070 PrintAmdKernelCode(out, &kernel_code); 1071 out << std::endl; 1072 } 1073 } 1074 1075 std::vector<uint8_t> isa(HsaText()->size(), 0); 1076 HsaText()->getData(0, isa.data(), HsaText()->size()); 1077 1078 out << "Disassembly:" << std::endl; 1079 PrintDisassembly(out, isa.data(), HsaText()->size(), 0); 1080 out << std::endl << std::dec; 1081 } else { 1082 out << "Machine code section is not present" << std::endl << std::endl; 1083 } 1084 } 1085 PrintSegment(std::ostream & out,Segment * segment)1086 void AmdHsaCode::PrintSegment(std::ostream& out, Segment* segment) 1087 { 1088 out << " Segment (" << segment->getSegmentIndex() << ")" << std::endl; 1089 out << " Type: " << AmdPTLoadToString(segment->type()) 1090 << " " 1091 << " Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << segment->flags() << std::dec 1092 << std::endl 1093 << " Image Size: " << segment->imageSize() 1094 << " " 1095 << " Memory Size: " << segment->memSize() 1096 << " " 1097 << " Align: " << segment->align() 1098 << " " 1099 << " VAddr: " << segment->vaddr() 1100 << std::endl; 1101 out << std::dec; 1102 } 1103 PrintSection(std::ostream & out,Section * section)1104 void AmdHsaCode::PrintSection(std::ostream& out, Section* section) 1105 { 1106 out << " Section " << section->Name() << " (Index " << section->getSectionIndex() << ")" << std::endl; 1107 out << " Type: " << section->type() 1108 << " " 1109 << " Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << section->flags() << std::dec 1110 << std::endl 1111 << " Size: " << section->size() 1112 << " " 1113 << " Address: " << section->addr() 1114 << " " 1115 << " Align: " << section->addralign() 1116 << std::endl; 1117 out << std::dec; 1118 1119 if (section->flags() & SHF_AMDGPU_HSA_CODE) { 1120 // Printed separately. 1121 return; 1122 } 1123 1124 switch (section->type()) { 1125 case SHT_NOBITS: 1126 return; 1127 case SHT_RELA: 1128 PrintRelocationData(out, section->asRelocationSection()); 1129 return; 1130 default: 1131 PrintRawData(out, section); 1132 } 1133 } 1134 PrintRawData(std::ostream & out,Section * section)1135 void AmdHsaCode::PrintRawData(std::ostream& out, Section* section) 1136 { 1137 out << " Data:" << std::endl; 1138 unsigned char *sdata = (unsigned char*)alloca(section->size()); 1139 section->getData(0, sdata, section->size()); 1140 PrintRawData(out, sdata, section->size()); 1141 } 1142 PrintRawData(std::ostream & out,const unsigned char * data,size_t size)1143 void AmdHsaCode::PrintRawData(std::ostream& out, const unsigned char *data, size_t size) 1144 { 1145 out << std::hex << std::setfill('0'); 1146 for (size_t i = 0; i < size; i += 16) { 1147 out << " " << std::setw(7) << i << ":"; 1148 1149 for (size_t j = 0; j < 16; j += 1) { 1150 uint32_t value = i + j < size ? (uint32_t)data[i + j] : 0; 1151 if (j % 2 == 0) { out << ' '; } 1152 out << std::setw(2) << value; 1153 } 1154 out << " "; 1155 1156 for (size_t j = 0; i + j < size && j < 16; j += 1) { 1157 char value = (char)data[i + j] >= 32 && (char)data[i + j] <= 126 ? (char)data[i + j] : '.'; 1158 out << value; 1159 } 1160 out << std::endl; 1161 } 1162 out << std::dec; 1163 } 1164 PrintRelocationData(std::ostream & out,RelocationSection * section)1165 void AmdHsaCode::PrintRelocationData(std::ostream& out, RelocationSection* section) 1166 { 1167 if (section->targetSection()) { 1168 out << " Relocation Entries for " << section->targetSection()->Name() << " Section (total " << section->relocationCount() << "):" << std::endl; 1169 } else { 1170 // Dynamic relocations do not have a target section, they work with 1171 // virtual addresses. 1172 out << " Dynamic Relocation Entries (total " << section->relocationCount() << "):" << std::endl; 1173 } 1174 for (size_t i = 0; i < section->relocationCount(); ++i) { 1175 out << " Relocation (Index " << i << "):" << std::endl; 1176 out << " Type: " << section->relocation(i)->type() << std::endl; 1177 out << " Symbol: " << section->relocation(i)->symbol()->name() << std::endl; 1178 out << " Offset: " << section->relocation(i)->offset() << " Addend: " << section->relocation(i)->addend() << std::endl; 1179 } 1180 out << std::dec; 1181 } 1182 PrintSymbol(std::ostream & out,Symbol * sym)1183 void AmdHsaCode::PrintSymbol(std::ostream& out, Symbol* sym) 1184 { 1185 out << " Symbol " << sym->Name() << " (Index " << sym->Index() << "):" << std::endl; 1186 if (sym->IsKernelSymbol() || sym->IsVariableSymbol()) { 1187 out << " Section: " << sym->GetSection()->Name() << " "; 1188 out << " Section Offset: " << sym->SectionOffset() << std::endl; 1189 out << " VAddr: " << sym->VAddr() << " "; 1190 out << " Size: " << sym->Size() << " "; 1191 out << " Alignment: " << sym->Alignment() << std::endl; 1192 out << " Kind: " << HsaSymbolKindToString(sym->Kind()) << " "; 1193 out << " Linkage: " << HsaSymbolLinkageToString(sym->Linkage()) << " "; 1194 out << " Definition: " << (sym->IsDefinition() ? "TRUE" : "FALSE") << std::endl; 1195 } 1196 if (sym->IsVariableSymbol()) { 1197 out << " Allocation: " << HsaVariableAllocationToString(sym->Allocation()) << " "; 1198 out << " Segment: " << HsaVariableSegmentToString(sym->Segment()) << " "; 1199 out << " Constant: " << (sym->IsConst() ? "TRUE" : "FALSE") << std::endl; 1200 } 1201 out << std::dec; 1202 } 1203 PrintMachineCode(std::ostream & out,KernelSymbol * sym)1204 void AmdHsaCode::PrintMachineCode(std::ostream& out, KernelSymbol* sym) 1205 { 1206 assert(HsaText()); 1207 amd_kernel_code_t kernel_code; 1208 HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); 1209 1210 out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec; 1211 PrintAmdKernelCode(out, &kernel_code); 1212 out << std::endl; 1213 1214 std::vector<uint8_t> isa(HsaText()->size(), 0); 1215 HsaText()->getData(0, isa.data(), HsaText()->size()); 1216 uint64_t isa_offset = sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset; 1217 1218 out << "Disassembly for " << sym->Name() << ": " << std::endl; 1219 PrintDisassembly(out, isa.data(), HsaText()->size(), isa_offset); 1220 out << std::endl << std::dec; 1221 } 1222 PrintDisassembly(std::ostream & out,const unsigned char * isa,size_t size,uint32_t isa_offset)1223 void AmdHsaCode::PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset) 1224 { 1225 #ifdef SP3_STATIC_LIB 1226 // Default asic is ci. 1227 std::string asic = "CI"; 1228 std::string vendor_name, architecture_name; 1229 uint32_t major_version, minor_version, stepping; 1230 if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { 1231 if (major_version == 7) { 1232 asic = "CI"; 1233 } else if (major_version == 8) { 1234 asic = "VI"; 1235 } else if (major_version == 9) { 1236 asic = "GFX9"; 1237 } else { 1238 assert(!"unknown compute capability"); 1239 } 1240 } 1241 1242 struct sp3_context *dis_state = sp3_new(); 1243 sp3_setasic(dis_state, asic.c_str()); 1244 1245 sp3_vma *dis_vma = sp3_vm_new_ptr(0, size / 4, (const uint32_t*)isa); 1246 1247 std::vector<uint32_t> comments(HsaText()->size() / 4, 0); 1248 for (size_t i = 0; i < SymbolCount(); ++i) { 1249 Symbol* sym = GetSymbol(i); 1250 if (sym->IsKernelSymbol() && sym->IsDefinition()) { 1251 comments[sym->SectionOffset() / 4] = COMMENT_AMD_KERNEL_CODE_T_BEGIN; 1252 comments[(sym->SectionOffset() + 252) / 4] = COMMENT_AMD_KERNEL_CODE_T_END; 1253 amd_kernel_code_t kernel_code; 1254 HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); 1255 comments[(kernel_code.kernel_code_entry_byte_offset + sym->SectionOffset()) / 4] = COMMENT_KERNEL_ISA_BEGIN; 1256 } 1257 } 1258 sp3_vma *comment_vma = sp3_vm_new_ptr(0, comments.size(), (const uint32_t*)comments.data()); 1259 sp3_setcomments(dis_state, comment_vma, CommentTopCallBack, CommentRightCallBack, this); 1260 1261 // When isa_offset == 0 disassembly full hsatext section. 1262 // Otherwise disassembly only from this offset till endpgm instruction. 1263 char *text = sp3_disasm( 1264 dis_state, 1265 dis_vma, 1266 isa_offset / 4, 1267 nullptr, 1268 SP3_SHTYPE_CS, 1269 nullptr, 1270 (unsigned)(size / 4), 1271 isa_offset == 0 ? SP3DIS_FORCEVALID | SP3DIS_COMMENTS : SP3DIS_COMMENTS); 1272 1273 enum class IsaState { 1274 UNKNOWN, 1275 AMD_KERNEL_CODE_T_BEGIN, 1276 AMD_KERNEL_CODE_T, 1277 AMD_KERNEL_CODE_T_END, 1278 ISA_BEGIN, 1279 ISA, 1280 PADDING, 1281 }; 1282 1283 std::string line; 1284 char *text_ptr = text; 1285 IsaState state = IsaState::UNKNOWN; 1286 1287 uint32_t offset = 0; 1288 uint32_t padding_end = 0; 1289 std::string padding; 1290 1291 while (text_ptr && text_ptr[0] != '\0') { 1292 line.clear(); 1293 while (text_ptr[0] != '\0' && text_ptr[0] != '\n') { 1294 line.push_back(text_ptr[0]); 1295 ++text_ptr; 1296 } 1297 ltrim(line); 1298 if (text_ptr[0] == '\n') { 1299 ++text_ptr; 1300 } 1301 switch (state) { 1302 case IsaState::UNKNOWN: 1303 assert(line != "// amd_kernel_code_t end"); 1304 padding.clear(); 1305 if (line == "// amd_kernel_code_t begin") { 1306 state = IsaState::AMD_KERNEL_CODE_T_BEGIN; 1307 } else if (line == "// isa begin") { 1308 state = IsaState::ISA_BEGIN; 1309 } else if (line == "end") { 1310 out << line << std::endl; 1311 } else if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { 1312 padding += " " + line + "\n"; 1313 offset = ParseInstructionOffset(line); 1314 padding_end = ParseInstructionOffset(line); 1315 state = IsaState::PADDING; 1316 } else if (line != "shader (null)") { 1317 out << " " << line << std::endl; 1318 } 1319 break; 1320 1321 case IsaState::AMD_KERNEL_CODE_T_BEGIN: 1322 assert(line != "// amd_kernel_code_t begin"); 1323 assert(line != "// amd_kernel_code_t end"); 1324 assert(line != "// isa begin"); 1325 assert(line != "end"); 1326 padding.clear(); 1327 offset = ParseInstructionOffset(line); 1328 state = IsaState::AMD_KERNEL_CODE_T; 1329 break; 1330 1331 case IsaState::AMD_KERNEL_CODE_T: 1332 assert(line != "// amd_kernel_code_t begin"); 1333 assert(line != "// isa begin"); 1334 assert(line != "end"); 1335 assert(padding.empty()); 1336 if (line == "// amd_kernel_code_t end") { 1337 state = IsaState::AMD_KERNEL_CODE_T_END; 1338 } 1339 break; 1340 1341 case IsaState::AMD_KERNEL_CODE_T_END: 1342 assert(line != "// amd_kernel_code_t begin"); 1343 assert(line != "// amd_kernel_code_t end"); 1344 assert(line != "// isa begin"); 1345 assert(line != "end"); 1346 assert(padding.empty()); 1347 for (size_t i = 0; i < SymbolCount(); ++i) { 1348 Symbol* sym = GetSymbol(i); 1349 if (sym->IsKernelSymbol() && sym->IsDefinition() && sym->SectionOffset() == offset) { 1350 std::ostream::fmtflags flags = out.flags(); 1351 char fill = out.fill(); 1352 out << " //" << std::endl; 1353 out << " // amd_kernel_code_t for " << sym->Name() 1354 << " (" << std::hex << std::setw(12) << std::setfill('0') << std::right << offset 1355 << " - " << std::setw(12) << (offset + 256) << ')' << std::endl; 1356 out << " //" << std::endl; 1357 out << std::setfill(fill); 1358 out.flags(flags); 1359 break; 1360 } 1361 } 1362 state = IsaState::UNKNOWN; 1363 break; 1364 1365 case IsaState::ISA_BEGIN: 1366 assert(line != "// amd_kernel_code_t begin"); 1367 assert(line != "// amd_kernel_code_t end"); 1368 assert(line != "// isa begin"); 1369 padding.clear(); 1370 offset = ParseInstructionOffset(line); 1371 for (size_t i = 0; i < SymbolCount(); ++i) { 1372 Symbol* sym = GetSymbol(i); 1373 if (sym->IsKernelSymbol() && sym->IsDefinition()) { 1374 amd_kernel_code_t kernel_code; 1375 HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); 1376 if ((sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset) == offset) { 1377 out << " //" << std::endl; 1378 out << " // " << sym->Name() << ':' << std::endl; 1379 out << " //" << std::endl; 1380 break; 1381 } 1382 } 1383 } 1384 if (line == "end") { 1385 out << line << std::endl; 1386 state = IsaState::UNKNOWN; 1387 } else { 1388 out << " " << line << std::endl; 1389 state = IsaState::ISA; 1390 } 1391 break; 1392 1393 case IsaState::ISA: 1394 assert(line != "// amd_kernel_code_t end"); 1395 if (!padding.empty()) { 1396 out << padding; 1397 out.flush(); 1398 padding.clear(); 1399 } 1400 if (line == "// amd_kernel_code_t begin") { 1401 state = IsaState::AMD_KERNEL_CODE_T_BEGIN; 1402 } else if (line == "// isa begin") { 1403 state = IsaState::ISA_BEGIN; 1404 } else if (line == "end") { 1405 out << line << std::endl; 1406 state = IsaState::UNKNOWN; 1407 } else if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { 1408 padding += " " + line + "\n"; 1409 offset = ParseInstructionOffset(line); 1410 padding_end = offset; 1411 state = IsaState::PADDING; 1412 } else { 1413 out << " " << line << std::endl; 1414 } 1415 break; 1416 1417 case IsaState::PADDING: 1418 assert(line != "// amd_kernel_code_t end"); 1419 if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { 1420 padding += " " + line + "\n"; 1421 padding_end = ParseInstructionOffset(line); 1422 } else if (line == "// amd_kernel_code_t begin" || line == "// isa begin" || line == "end") { 1423 padding.clear(); 1424 std::ostream::fmtflags flags = out.flags(); 1425 char fill = out.fill(); 1426 out << " //" << std::endl; 1427 out << " // padding (" 1428 << std::hex << std::setw(12) << std::setfill('0') << std::right << offset 1429 << " - " << std::setw(12) << (padding_end + 4) << ')' << std::endl; 1430 out << " //" << std::endl; 1431 out << std::setfill(fill); 1432 out.flags(flags); 1433 if (line == "// amd_kernel_code_t begin") { 1434 state = IsaState::AMD_KERNEL_CODE_T_BEGIN; 1435 } else if (line == "// isa begin") { 1436 state = IsaState::ISA_BEGIN; 1437 } else if (line == "end") { 1438 out << line << std::endl; 1439 state = IsaState::UNKNOWN; 1440 } 1441 } else { 1442 padding += " " + line + "\n"; 1443 state = IsaState::ISA; 1444 } 1445 break; 1446 1447 default: 1448 assert(false); 1449 break; 1450 } 1451 } 1452 1453 sp3_free(text); 1454 sp3_close(dis_state); 1455 sp3_vm_free(dis_vma); 1456 sp3_vm_free(comment_vma); 1457 #else 1458 PrintRawData(out, isa, size); 1459 #endif // SP3_STATIC_LIB 1460 out << std::dec; 1461 } 1462 MangleSymbolName(const std::string & module_name,const std::string symbol_name)1463 std::string AmdHsaCode::MangleSymbolName(const std::string& module_name, const std::string symbol_name) 1464 { 1465 if (module_name.empty()) { 1466 return symbol_name; 1467 } else { 1468 return module_name + "::" + symbol_name; 1469 } 1470 } 1471 ElfImageError()1472 bool AmdHsaCode::ElfImageError() 1473 { 1474 out << img->output(); 1475 return false; 1476 } 1477 FromHandle(hsa_code_object_t c)1478 AmdHsaCode* AmdHsaCodeManager::FromHandle(hsa_code_object_t c) 1479 { 1480 CodeMap::iterator i = codeMap.find(c.handle); 1481 if (i == codeMap.end()) { 1482 AmdHsaCode* code = new AmdHsaCode(); 1483 const void* buffer = reinterpret_cast<const void*>(c.handle); 1484 if (!code->InitAsBuffer(buffer, 0)) { 1485 delete code; 1486 return 0; 1487 } 1488 codeMap[c.handle] = code; 1489 return code; 1490 } 1491 return i->second; 1492 } 1493 Destroy(hsa_code_object_t c)1494 bool AmdHsaCodeManager::Destroy(hsa_code_object_t c) 1495 { 1496 CodeMap::iterator i = codeMap.find(c.handle); 1497 if (i == codeMap.end()) { 1498 // Currently, we do not always create map entry for every code object buffer. 1499 return true; 1500 } 1501 delete i->second; 1502 codeMap.erase(i); 1503 return true; 1504 } 1505 PullElfV2()1506 bool AmdHsaCode::PullElfV2() 1507 { 1508 for (size_t i = 0; i < img->segmentCount(); ++i) { 1509 Segment* s = img->segment(i); 1510 if (s->type() == PT_LOAD) { 1511 dataSegments.push_back(s); 1512 } 1513 } 1514 for (size_t i = 0; i < img->sectionCount(); ++i) { 1515 Section* sec = img->section(i); 1516 if (!sec) { continue; } 1517 if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) && 1518 !(sec->flags() & SHF_EXECINSTR)) { 1519 dataSections.push_back(sec); 1520 } else if (sec->type() == SHT_RELA) { 1521 relocationSections.push_back(sec->asRelocationSection()); 1522 } 1523 if (sec->Name() == ".text") { 1524 hsatext = sec; 1525 } 1526 } 1527 for (size_t i = 0; i < img->symtab()->symbolCount(); ++i) { 1528 amd::elf::Symbol* elfsym = img->symtab()->symbol(i); 1529 Symbol* sym = 0; 1530 switch (elfsym->type()) { 1531 case STT_AMDGPU_HSA_KERNEL: { 1532 amd::elf::Section* sec = elfsym->section(); 1533 amd_kernel_code_t akc; 1534 if (!sec) { 1535 out << "Failed to find section for symbol " << elfsym->name() << std::endl; 1536 return false; 1537 } 1538 if (!(sec->flags() & (SHF_ALLOC | SHF_EXECINSTR))) { 1539 out << "Invalid code section for symbol " << elfsym->name() << std::endl; 1540 return false; 1541 } 1542 if (!sec->getData(elfsym->value() - sec->addr(), &akc, sizeof(amd_kernel_code_t))) { 1543 out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl; 1544 return false; 1545 } 1546 sym = new KernelSymbolV2(elfsym, &akc); 1547 break; 1548 } 1549 case STT_OBJECT: 1550 case STT_COMMON: 1551 sym = new VariableSymbolV2(elfsym); 1552 break; 1553 default: 1554 break; // Skip unknown symbols. 1555 } 1556 if (sym) { symbols.push_back(sym); } 1557 } 1558 1559 return true; 1560 } 1561 KernelSymbolV2(amd::elf::Symbol * elfsym_,const amd_kernel_code_t * akc)1562 KernelSymbolV2::KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc) : 1563 KernelSymbol(elfsym_, akc) { } 1564 } 1565 } 1566 } 1567