1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 #include <assert.h>
44 #include <cstring>
45 #include <iomanip>
46 #include <algorithm>
47 #include "amd_hsa_code.hpp"
48 #include "amd_hsa_code_util.hpp"
49 #include <libelf.h>
50 #include "amd_hsa_elf.h"
51 #include <fstream>
52 #include <sstream>
53 #include <cstdlib>
54 #include <algorithm>
55 
56 #ifdef SP3_STATIC_LIB
57 #include "sp3.h"
58 #endif // SP3_STATIC_LIB
59 
60 #ifndef _WIN32
61 #define _alloca alloca
62 #endif
63 
64 namespace amd {
65 namespace hsa {
66 namespace code {
67 
68     using amd::elf::GetNoteString;
69 
IsDeclaration() const70     bool Symbol::IsDeclaration() const
71     {
72       return elfsym->type() == STT_COMMON;
73     }
74 
IsDefinition() const75     bool Symbol::IsDefinition() const
76     {
77       return !IsDeclaration();
78     }
79 
IsAgent() const80     bool Symbol::IsAgent() const
81     {
82       return elfsym->section()->flags() & SHF_AMDGPU_HSA_AGENT ? true : false;
83     }
84 
Linkage() const85     hsa_symbol_linkage_t Symbol::Linkage() const
86     {
87       return elfsym->binding() == STB_GLOBAL ? HSA_SYMBOL_LINKAGE_PROGRAM : HSA_SYMBOL_LINKAGE_MODULE;
88     }
89 
Allocation() const90     hsa_variable_allocation_t Symbol::Allocation() const
91     {
92       return IsAgent() ? HSA_VARIABLE_ALLOCATION_AGENT : HSA_VARIABLE_ALLOCATION_PROGRAM;
93     }
94 
Segment() const95     hsa_variable_segment_t Symbol::Segment() const
96     {
97       return elfsym->section()->flags() & SHF_AMDGPU_HSA_READONLY ? HSA_VARIABLE_SEGMENT_READONLY : HSA_VARIABLE_SEGMENT_GLOBAL;
98     }
99 
Size() const100     uint64_t Symbol::Size() const
101     {
102       return elfsym->size();
103     }
104 
Size32() const105     uint32_t Symbol::Size32() const
106     {
107       assert(elfsym->size() < UINT32_MAX);
108       return (uint32_t) Size();
109     }
110 
Alignment() const111     uint32_t Symbol::Alignment() const
112     {
113       assert(elfsym->section()->addralign() < UINT32_MAX);
114       return uint32_t(elfsym->section()->addralign());
115     }
116 
IsConst() const117     bool Symbol::IsConst() const
118     {
119       return elfsym->section()->flags() & SHF_WRITE ? true : false;
120     }
121 
GetInfo(hsa_code_symbol_info_t attribute,void * value)122     hsa_status_t Symbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
123     {
124       assert(value);
125 
126       switch (attribute) {
127         case HSA_CODE_SYMBOL_INFO_TYPE: {
128           *((hsa_symbol_kind_t*)value) = Kind();
129           break;
130         }
131         case HSA_CODE_SYMBOL_INFO_NAME_LENGTH: {
132           *((uint32_t*)value) = GetSymbolName().size();
133           break;
134         }
135         case HSA_CODE_SYMBOL_INFO_NAME: {
136           std::string SymbolName = GetSymbolName();
137           memset(value, 0x0, SymbolName.size());
138           memcpy(value, SymbolName.c_str(), SymbolName.size());
139           break;
140         }
141         case HSA_CODE_SYMBOL_INFO_MODULE_NAME_LENGTH: {
142           *((uint32_t*)value) = GetModuleName().size();
143           break;
144         }
145         case HSA_CODE_SYMBOL_INFO_MODULE_NAME: {
146           std::string ModuleName = GetModuleName();
147           memset(value, 0x0, ModuleName.size());
148           memcpy(value, ModuleName.c_str(), ModuleName.size());
149           break;
150         }
151         case HSA_CODE_SYMBOL_INFO_LINKAGE: {
152           *((hsa_symbol_linkage_t*)value) = Linkage();
153           break;
154         }
155         case HSA_CODE_SYMBOL_INFO_IS_DEFINITION: {
156           *((bool*)value) = IsDefinition();
157           break;
158         }
159         default: {
160           return HSA_STATUS_ERROR_INVALID_ARGUMENT;
161         }
162       }
163       return HSA_STATUS_SUCCESS;
164     }
165 
GetModuleName() const166     std::string Symbol::GetModuleName() const {
167       std::string FullName = Name();
168       return FullName.rfind(":") != std::string::npos ?
169         FullName.substr(0, FullName.find(":")) : "";
170     }
171 
GetSymbolName() const172     std::string Symbol::GetSymbolName() const {
173       std::string FullName = Name();
174       return FullName.rfind(":") != std::string::npos ?
175         FullName.substr(FullName.rfind(":") + 1) : FullName;
176     }
177 
ToHandle(Symbol * sym)178     hsa_code_symbol_t Symbol::ToHandle(Symbol* sym)
179     {
180       hsa_code_symbol_t s;
181       s.handle = reinterpret_cast<uint64_t>(sym);
182       return s;
183     }
184 
FromHandle(hsa_code_symbol_t s)185     Symbol* Symbol::FromHandle(hsa_code_symbol_t s)
186     {
187       return reinterpret_cast<Symbol*>(s.handle);
188     }
189 
KernelSymbol(amd::elf::Symbol * elfsym_,const amd_kernel_code_t * akc)190     KernelSymbol::KernelSymbol(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc)
191         : Symbol(elfsym_)
192         , kernarg_segment_size(0)
193         , kernarg_segment_alignment(0)
194         , group_segment_size(0)
195         , private_segment_size(0)
196         , is_dynamic_callstack(0)
197     {
198       if (akc) {
199         kernarg_segment_size = (uint32_t) akc->kernarg_segment_byte_size;
200         kernarg_segment_alignment = (uint32_t) (1 << akc->kernarg_segment_alignment);
201         group_segment_size = uint32_t(akc->workgroup_group_segment_byte_size);
202         private_segment_size = uint32_t(akc->workitem_private_segment_byte_size);
203         is_dynamic_callstack =
204           AMD_HSA_BITS_GET(akc->kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false;
205       }
206     }
207 
GetInfo(hsa_code_symbol_info_t attribute,void * value)208     hsa_status_t KernelSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
209     {
210       assert(value);
211       switch (attribute) {
212         case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: {
213           *((uint32_t*)value) = kernarg_segment_size;
214           break;
215         }
216         case HSA_CODE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT: {
217           *((uint32_t*)value) = kernarg_segment_alignment;
218           break;
219         }
220         case HSA_CODE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: {
221           *((uint32_t*)value) = group_segment_size;
222           break;
223         }
224         case HSA_CODE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: {
225           *((uint32_t*)value) = private_segment_size;
226           break;
227         }
228         case HSA_CODE_SYMBOL_INFO_KERNEL_DYNAMIC_CALLSTACK: {
229           *((bool*)value) = is_dynamic_callstack;
230           break;
231         }
232         default: {
233           return Symbol::GetInfo(attribute, value);
234         }
235       }
236       return HSA_STATUS_SUCCESS;
237     }
238 
GetInfo(hsa_code_symbol_info_t attribute,void * value)239     hsa_status_t VariableSymbol::GetInfo(hsa_code_symbol_info_t attribute, void *value)
240     {
241       assert(value);
242       switch (attribute) {
243         case HSA_CODE_SYMBOL_INFO_VARIABLE_ALLOCATION: {
244           *((hsa_variable_allocation_t*)value) = Allocation();
245           break;
246         }
247         case HSA_CODE_SYMBOL_INFO_VARIABLE_SEGMENT: {
248           *((hsa_variable_segment_t*)value) = Segment();
249           break;
250         }
251         case HSA_CODE_SYMBOL_INFO_VARIABLE_ALIGNMENT: {
252           *((uint32_t*)value) = Alignment();
253           break;
254         }
255         case HSA_CODE_SYMBOL_INFO_VARIABLE_SIZE: {
256           *((uint32_t*)value) = Size();
257           break;
258         }
259         case HSA_CODE_SYMBOL_INFO_VARIABLE_IS_CONST: {
260           *((bool*)value) = IsConst();
261           break;
262         }
263         default: {
264           return Symbol::GetInfo(attribute, value);
265         }
266       }
267       return HSA_STATUS_SUCCESS;
268     }
269 
AmdHsaCode(bool combineDataSegments_)270     AmdHsaCode::AmdHsaCode(bool combineDataSegments_)
271       : img(nullptr),
272         combineDataSegments(combineDataSegments_),
273         hsatext(0), imageInit(0), samplerInit(0),
274         debugInfo(0), debugLine(0), debugAbbrev(0)
275     {
276       for (unsigned i = 0; i < AMDGPU_HSA_SEGMENT_LAST; ++i) {
277         for (unsigned j = 0; j < 2; ++j) {
278           hsaSegments[i][j] = 0;
279         }
280       }
281       for (unsigned i = 0; i < AMDGPU_HSA_SECTION_LAST; ++i) {
282         hsaSections[i] = 0;
283       }
284     }
285 
~AmdHsaCode()286     AmdHsaCode::~AmdHsaCode()
287     {
288       for (Symbol* sym : symbols) { delete sym; }
289     }
290 
PullElf()291     bool AmdHsaCode::PullElf()
292     {
293       uint32_t majorVersion, minorVersion;
294       if (!GetNoteCodeObjectVersion(&majorVersion, &minorVersion)) {
295         return false;
296       }
297       if (majorVersion >= 2) {
298         return PullElfV2();
299       } else {
300         return PullElfV1();
301       }
302     }
303 
PullElfV1()304     bool AmdHsaCode::PullElfV1()
305     {
306       for (size_t i = 0; i < img->segmentCount(); ++i) {
307         Segment* s = img->segment(i);
308         if (s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM ||
309             s->type() == PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT ||
310             s->type() == PT_AMDGPU_HSA_LOAD_READONLY_AGENT ||
311             s->type() == PT_AMDGPU_HSA_LOAD_CODE_AGENT) {
312           dataSegments.push_back(s);
313         }
314       }
315       for (size_t i = 0; i < img->sectionCount(); ++i) {
316         Section* sec = img->section(i);
317         if (!sec) { continue; }
318         if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) &&
319             (sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_CODE))) {
320           dataSections.push_back(sec);
321         } else if (sec->type() == SHT_RELA) {
322           relocationSections.push_back(sec->asRelocationSection());
323         }
324         if (sec->Name() == ".hsatext") {
325           hsatext = sec;
326         }
327       }
328       for (size_t i = 0; i < img->symtab()->symbolCount(); ++i) {
329         amd::elf::Symbol* elfsym = img->symtab()->symbol(i);
330         Symbol* sym = 0;
331         switch (elfsym->type()) {
332         case STT_AMDGPU_HSA_KERNEL: {
333           amd::elf::Section* sec = elfsym->section();
334           amd_kernel_code_t akc;
335           if (!sec) {
336             out << "Failed to find section for symbol " << elfsym->name() << std::endl;
337             return false;
338           }
339           if (!(sec->flags() & (SHF_AMDGPU_HSA_AGENT | SHF_AMDGPU_HSA_CODE | SHF_EXECINSTR))) {
340             out << "Invalid code section for symbol " << elfsym->name() << std::endl;
341             return false;
342           }
343           if (!sec->getData(elfsym->value(), &akc, sizeof(amd_kernel_code_t))) {
344             out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl;
345             return false;
346           }
347           sym = new KernelSymbol(elfsym, &akc);
348           break;
349         }
350         case STT_OBJECT:
351         case STT_COMMON:
352           sym = new VariableSymbol(elfsym);
353           break;
354         default:
355           break; // Skip unknown symbols.
356         }
357         if (sym) { symbols.push_back(sym); }
358       }
359 
360       return true;
361     }
362 
LoadFromFile(const std::string & filename)363     bool AmdHsaCode::LoadFromFile(const std::string& filename)
364     {
365       if (!img) { img.reset(amd::elf::NewElf64Image()); }
366       if (!img->loadFromFile(filename)) { return ElfImageError(); }
367       if (!PullElf()) { return ElfImageError(); }
368       return true;
369     }
370 
SaveToFile(const std::string & filename)371     bool AmdHsaCode::SaveToFile(const std::string& filename)
372     {
373       return img->saveToFile(filename) || ElfImageError();
374     }
375 
WriteToBuffer(void * buffer)376     bool AmdHsaCode::WriteToBuffer(void* buffer)
377     {
378       return img->copyToBuffer(buffer, ElfSize()) || ElfImageError();
379     }
380 
381 
InitFromBuffer(const void * buffer,size_t size)382     bool AmdHsaCode::InitFromBuffer(const void* buffer, size_t size)
383     {
384       if (!img) { img.reset(amd::elf::NewElf64Image()); }
385       if (!img->initFromBuffer(buffer, size)) { return ElfImageError(); }
386       if (!PullElf()) { return ElfImageError(); }
387       return true;
388     }
389 
InitAsBuffer(const void * buffer,size_t size)390     bool AmdHsaCode::InitAsBuffer(const void* buffer, size_t size)
391     {
392       if (!img) { img.reset(amd::elf::NewElf64Image()); }
393       if (!img->initAsBuffer(buffer, size)) { return ElfImageError(); }
394       if (!PullElf()) { return ElfImageError(); }
395       return true;
396     }
397 
InitAsHandle(hsa_code_object_t code_object)398     bool AmdHsaCode::InitAsHandle(hsa_code_object_t code_object)
399     {
400       void *elfmemrd = reinterpret_cast<void*>(code_object.handle);
401       if (!elfmemrd) { return false; }
402       return InitAsBuffer(elfmemrd, 0);
403     }
404 
InitNew(bool xnack)405     bool AmdHsaCode::InitNew(bool xnack)
406     {
407       if (!img) {
408         img.reset(amd::elf::NewElf64Image());
409         uint32_t flags = 0;
410         if (xnack) { flags |= EF_AMDGPU_XNACK; }
411         return img->initNew(EM_AMDGPU, ET_EXEC, ELFOSABI_AMDGPU_HSA, ELFABIVERSION_AMDGPU_HSA, flags) ||
412           ElfImageError(); // FIXME: elfutils libelf does not allow program headers in ET_REL file type, so change it later in finalizer.
413       }
414       return false;
415     }
416 
Freeze()417     bool AmdHsaCode::Freeze()
418     {
419       return img->Freeze() || ElfImageError();
420     }
421 
GetHandle()422     hsa_code_object_t AmdHsaCode::GetHandle()
423     {
424       hsa_code_object_t code_object;
425       code_object.handle = reinterpret_cast<uint64_t>(img->data());
426       return code_object;
427     }
428 
ElfData()429     const char* AmdHsaCode::ElfData()
430     {
431       return img->data();
432     }
433 
ElfSize()434     uint64_t AmdHsaCode::ElfSize()
435     {
436       return img->size();
437     }
438 
Validate()439     bool AmdHsaCode::Validate()
440     {
441       if (!img->Validate()) { return ElfImageError(); }
442       if (img->Machine() != EM_AMDGPU) {
443         out << "ELF error: Invalid machine" << std::endl;
444         return false;
445       }
446       return true;
447     }
448 
AddAmdNote(uint32_t type,const void * desc,uint32_t desc_size)449     void AmdHsaCode::AddAmdNote(uint32_t type, const void* desc, uint32_t desc_size)
450     {
451       img->note()->addNote("AMD", type, desc, desc_size);
452     }
453 
AddNoteCodeObjectVersion(uint32_t major,uint32_t minor)454     void AmdHsaCode::AddNoteCodeObjectVersion(uint32_t major, uint32_t minor)
455     {
456       amdgpu_hsa_note_code_object_version_t desc;
457       desc.major_version = major;
458       desc.minor_version = minor;
459       AddAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc, sizeof(desc));
460     }
461 
GetNoteCodeObjectVersion(uint32_t * major,uint32_t * minor)462     bool AmdHsaCode::GetNoteCodeObjectVersion(uint32_t* major, uint32_t* minor)
463     {
464       amdgpu_hsa_note_code_object_version_t* desc;
465       if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { return false; }
466       *major = desc->major_version;
467       *minor = desc->minor_version;
468       return true;
469     }
470 
GetNoteCodeObjectVersion(std::string & version)471     bool AmdHsaCode::GetNoteCodeObjectVersion(std::string& version)
472     {
473       amdgpu_hsa_note_code_object_version_t* desc;
474       if (!GetAmdNote(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, &desc)) { return false; }
475       version.clear();
476       version += std::to_string(desc->major_version);
477       version += ".";
478       version += std::to_string(desc->minor_version);
479       return true;
480     }
481 
AddNoteHsail(uint32_t hsail_major,uint32_t hsail_minor,hsa_profile_t profile,hsa_machine_model_t machine_model,hsa_default_float_rounding_mode_t rounding_mode)482     void AmdHsaCode::AddNoteHsail(uint32_t hsail_major, uint32_t hsail_minor, hsa_profile_t profile, hsa_machine_model_t machine_model, hsa_default_float_rounding_mode_t rounding_mode)
483     {
484       amdgpu_hsa_note_hsail_t desc;
485       memset(&desc, 0, sizeof(desc));
486       desc.hsail_major_version = hsail_major;
487       desc.hsail_minor_version = hsail_minor;
488       desc.profile = uint8_t(profile);
489       desc.machine_model = uint8_t(machine_model);
490       desc.default_float_round = uint8_t(rounding_mode);
491       AddAmdNote(NT_AMDGPU_HSA_HSAIL, &desc, sizeof(desc));
492     }
493 
GetNoteHsail(uint32_t * hsail_major,uint32_t * hsail_minor,hsa_profile_t * profile,hsa_machine_model_t * machine_model,hsa_default_float_rounding_mode_t * default_float_round)494     bool AmdHsaCode::GetNoteHsail(uint32_t* hsail_major, uint32_t* hsail_minor, hsa_profile_t* profile, hsa_machine_model_t* machine_model, hsa_default_float_rounding_mode_t* default_float_round)
495     {
496       amdgpu_hsa_note_hsail_t *desc;
497       if (!GetAmdNote(NT_AMDGPU_HSA_HSAIL, &desc)) { return false; }
498       *hsail_major = desc->hsail_major_version;
499       *hsail_minor = desc->hsail_minor_version;
500       *profile = (hsa_profile_t) desc->profile;
501       *machine_model = (hsa_machine_model_t) desc->machine_model;
502       *default_float_round = (hsa_default_float_rounding_mode_t) desc->default_float_round;
503       return true;
504     }
505 
AddNoteIsa(const std::string & vendor_name,const std::string & architecture_name,uint32_t major,uint32_t minor,uint32_t stepping)506     void AmdHsaCode::AddNoteIsa(const std::string& vendor_name, const std::string& architecture_name, uint32_t major, uint32_t minor, uint32_t stepping)
507     {
508       size_t size = sizeof(amdgpu_hsa_note_producer_t) + vendor_name.length() + architecture_name.length() + 1;
509       amdgpu_hsa_note_isa_t* desc = (amdgpu_hsa_note_isa_t*) _alloca(size);
510       memset(desc, 0, size);
511       desc->vendor_name_size = vendor_name.length()+1;
512       desc->architecture_name_size = architecture_name.length()+1;
513       desc->major = major;
514       desc->minor = minor;
515       desc->stepping = stepping;
516       memcpy(desc->vendor_and_architecture_name, vendor_name.c_str(), vendor_name.length() + 1);
517       memcpy(desc->vendor_and_architecture_name + desc->vendor_name_size, architecture_name.c_str(), architecture_name.length() + 1);
518       AddAmdNote(NT_AMDGPU_HSA_ISA, desc, size);
519     }
520 
GetNoteIsa(std::string & vendor_name,std::string & architecture_name,uint32_t * major_version,uint32_t * minor_version,uint32_t * stepping)521     bool AmdHsaCode::GetNoteIsa(std::string& vendor_name, std::string& architecture_name, uint32_t* major_version, uint32_t* minor_version, uint32_t* stepping)
522     {
523       amdgpu_hsa_note_isa_t *desc;
524       if (!GetAmdNote(NT_AMDGPU_HSA_ISA, &desc)) { return false; }
525       vendor_name = GetNoteString(desc->vendor_name_size, desc->vendor_and_architecture_name);
526       architecture_name = GetNoteString(desc->architecture_name_size, desc->vendor_and_architecture_name + vendor_name.length() + 1);
527       *major_version = desc->major;
528       *minor_version = desc->minor;
529       *stepping = desc->stepping;
530       return true;
531     }
532 
GetNoteIsa(std::string & isaName)533     bool AmdHsaCode::GetNoteIsa(std::string& isaName)
534     {
535       std::string vendor_name, architecture_name;
536       uint32_t major_version, minor_version, stepping;
537       if (!GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { return false; }
538       isaName.clear();
539       isaName += vendor_name;
540       isaName += ":";
541       isaName += architecture_name;
542       isaName += ":";
543       isaName += std::to_string(major_version);
544       isaName += ":";
545       isaName += std::to_string(minor_version);
546       isaName += ":";
547       isaName += std::to_string(stepping);
548       return true;
549     }
550 
AddNoteProducer(uint32_t major,uint32_t minor,const std::string & producer)551     void AmdHsaCode::AddNoteProducer(uint32_t major, uint32_t minor, const std::string& producer)
552     {
553       size_t size = sizeof(amdgpu_hsa_note_producer_t) + producer.length();
554       amdgpu_hsa_note_producer_t* desc = (amdgpu_hsa_note_producer_t*) _alloca(size);
555       memset(desc, 0, size);
556       desc->producer_name_size = producer.length();
557       desc->producer_major_version = major;
558       desc->producer_minor_version = minor;
559       memcpy(desc->producer_name, producer.c_str(), producer.length() + 1);
560       AddAmdNote(NT_AMDGPU_HSA_PRODUCER, desc, size);
561     }
562 
GetNoteProducer(uint32_t * major,uint32_t * minor,std::string & producer_name)563     bool AmdHsaCode::GetNoteProducer(uint32_t* major, uint32_t* minor, std::string& producer_name)
564     {
565       amdgpu_hsa_note_producer_t* desc;
566       if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER, &desc)) { return false; }
567       *major = desc->producer_major_version;
568       *minor = desc->producer_minor_version;
569       producer_name = GetNoteString(desc->producer_name_size, desc->producer_name);
570       return true;
571     }
572 
AddNoteProducerOptions(const std::string & options)573     void AmdHsaCode::AddNoteProducerOptions(const std::string& options)
574     {
575       size_t size = sizeof(amdgpu_hsa_note_producer_options_t) + options.length();
576       amdgpu_hsa_note_producer_options_t *desc = (amdgpu_hsa_note_producer_options_t*) _alloca(size);
577       desc->producer_options_size = options.length();
578       memcpy(desc->producer_options, options.c_str(), options.length() + 1);
579       AddAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, desc, size);
580     }
581 
AddNoteProducerOptions(int32_t call_convention,const hsa_ext_control_directives_t & user_directives,const std::string & user_options)582     void AmdHsaCode::AddNoteProducerOptions(int32_t call_convention, const hsa_ext_control_directives_t& user_directives, const std::string& user_options)
583     {
584       using namespace code_options;
585       std::ostringstream ss;
586       ss <<
587         space << "-hsa_call_convention=" << call_convention <<
588         control_directives(user_directives);
589       if (!user_options.empty()) {
590         ss << space << user_options;
591       }
592 
593       AddNoteProducerOptions(ss.str());
594     }
595 
GetNoteProducerOptions(std::string & options)596     bool AmdHsaCode::GetNoteProducerOptions(std::string& options)
597     {
598       amdgpu_hsa_note_producer_options_t* desc;
599       if (!GetAmdNote(NT_AMDGPU_HSA_PRODUCER_OPTIONS, &desc)) { return false; }
600       options = GetNoteString(desc->producer_options_size, desc->producer_options);
601       return true;
602     }
603 
GetInfo(hsa_code_object_info_t attribute,void * value)604     hsa_status_t AmdHsaCode::GetInfo(hsa_code_object_info_t attribute, void *value)
605     {
606       assert(value);
607       switch (attribute) {
608       case HSA_CODE_OBJECT_INFO_VERSION: {
609         std::string version;
610         if (!GetNoteCodeObjectVersion(version)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
611         char *svalue = (char*)value;
612         memset(svalue, 0x0, 64);
613         memcpy(svalue, version.c_str(), (std::min)(size_t(63), version.length()));
614         break;
615       }
616       case HSA_CODE_OBJECT_INFO_ISA: {
617         // TODO: Currently returns string representation instead of hsa_isa_t
618         // which is unavailable here.
619         std::string isa;
620         if (!GetNoteIsa(isa)) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; }
621         char *svalue = (char*)value;
622         memset(svalue, 0x0, 64);
623         memcpy(svalue, isa.c_str(), (std::min)(size_t(63), isa.length()));
624         break;
625       }
626       case HSA_CODE_OBJECT_INFO_MACHINE_MODEL:
627       case HSA_CODE_OBJECT_INFO_PROFILE:
628       case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE: {
629         uint32_t hsail_major, hsail_minor;
630         hsa_profile_t profile;
631         hsa_machine_model_t machine_model;
632         hsa_default_float_rounding_mode_t default_float_round;
633         if (!GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &default_float_round)) {
634           return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
635         }
636         switch (attribute) {
637         case HSA_CODE_OBJECT_INFO_MACHINE_MODEL:
638            *((hsa_machine_model_t*)value) = machine_model; break;
639         case HSA_CODE_OBJECT_INFO_PROFILE:
640           *((hsa_profile_t*)value) = profile; break;
641         case HSA_CODE_OBJECT_INFO_DEFAULT_FLOAT_ROUNDING_MODE:
642           *((hsa_default_float_rounding_mode_t*)value) = default_float_round; break;
643         default: break;
644         }
645         break;
646       }
647       default:
648         assert(false);
649         return HSA_STATUS_ERROR_INVALID_ARGUMENT;
650       }
651       return HSA_STATUS_SUCCESS;
652     }
653 
GetSymbol(const char * module_name,const char * symbol_name,hsa_code_symbol_t * s)654     hsa_status_t AmdHsaCode::GetSymbol(const char *module_name, const char *symbol_name, hsa_code_symbol_t *s)
655     {
656       std::string mname = MangleSymbolName(module_name ? module_name : "", symbol_name);
657       for (Symbol* sym : symbols) {
658         if (sym->Name() == mname) {
659           *s = Symbol::ToHandle(sym);
660           return HSA_STATUS_SUCCESS;
661         }
662       }
663       return HSA_STATUS_ERROR_INVALID_SYMBOL_NAME;
664     }
665 
IterateSymbols(hsa_code_object_t code_object,hsa_status_t (* callback)(hsa_code_object_t code_object,hsa_code_symbol_t symbol,void * data),void * data)666     hsa_status_t AmdHsaCode::IterateSymbols(hsa_code_object_t code_object,
667                                   hsa_status_t (*callback)(
668                                   hsa_code_object_t code_object,
669                                   hsa_code_symbol_t symbol,
670                                   void* data),
671                                 void* data)
672     {
673       for (Symbol* sym : symbols) {
674         hsa_code_symbol_t s = Symbol::ToHandle(sym);
675         hsa_status_t status = callback(code_object, s, data);
676         if (status != HSA_STATUS_SUCCESS) { return status; }
677       }
678       return HSA_STATUS_SUCCESS;
679     }
680 
ImageInitSection()681     Section* AmdHsaCode::ImageInitSection()
682     {
683       if (!imageInit) {
684         imageInit = img->addSection(
685           ".hsaimage_imageinit",
686           SHT_PROGBITS,
687           SHF_MERGE,
688           sizeof(amdgpu_hsa_image_descriptor_t));
689       }
690       return imageInit;
691     }
692 
AddImageInitializer(Symbol * image,uint64_t destOffset,const amdgpu_hsa_image_descriptor_t & desc)693     void AmdHsaCode::AddImageInitializer(Symbol* image, uint64_t destOffset, const amdgpu_hsa_image_descriptor_t& desc)
694     {
695       uint64_t offset = ImageInitSection()->addData(&desc, sizeof(desc), 8);
696       amd::elf::Symbol* imageInit =
697         img->symtab()->addSymbol(ImageInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL);
698       image->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_INIT_IMAGE, imageInit, image->elfSym()->value() + destOffset, 0);
699     }
700 
AddImageInitializer(Symbol * image,uint64_t destOffset,amdgpu_hsa_metadata_kind16_t kind,amdgpu_hsa_image_geometry8_t geometry,amdgpu_hsa_image_channel_order8_t channel_order,amdgpu_hsa_image_channel_type8_t channel_type,uint64_t width,uint64_t height,uint64_t depth,uint64_t array)701     void AmdHsaCode::AddImageInitializer(
702       Symbol* image, uint64_t destOffset,
703       amdgpu_hsa_metadata_kind16_t kind,
704       amdgpu_hsa_image_geometry8_t geometry,
705       amdgpu_hsa_image_channel_order8_t channel_order, amdgpu_hsa_image_channel_type8_t channel_type,
706       uint64_t width, uint64_t height, uint64_t depth, uint64_t array)
707     {
708       amdgpu_hsa_image_descriptor_t desc;
709       desc.size = (uint16_t) sizeof(amdgpu_hsa_image_descriptor_t);
710       desc.kind = kind;
711       desc.geometry = geometry;
712       desc.channel_order = channel_order;
713       desc.channel_type = channel_type;
714       desc.width = width;
715       desc.height = height;
716       desc.depth = depth;
717       desc.array = array;
718       AddImageInitializer(image, destOffset, desc);
719     }
720 
721 
SamplerInitSection()722     Section* AmdHsaCode::SamplerInitSection()
723     {
724       if (!samplerInit) {
725         samplerInit = img->addSection(
726           ".hsaimage_samplerinit",
727           SHT_PROGBITS,
728           SHF_MERGE,
729           sizeof(amdgpu_hsa_sampler_descriptor_t));
730       }
731       return samplerInit;
732     }
733 
AddSamplerInitializer(Symbol * sampler,uint64_t destOffset,const amdgpu_hsa_sampler_descriptor_t & desc)734     void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset, const amdgpu_hsa_sampler_descriptor_t& desc)
735     {
736       uint64_t offset = SamplerInitSection()->addData(&desc, sizeof(desc), 8);
737       amd::elf::Symbol* samplerInit =
738         img->symtab()->addSymbol(SamplerInitSection(), "", offset, 0, STT_AMDGPU_HSA_METADATA, STB_LOCAL);
739       sampler->elfSym()->section()->relocationSection()->addRelocation(R_AMDGPU_INIT_SAMPLER, samplerInit, sampler->elfSym()->value() + destOffset, 0);
740     }
741 
AddSamplerInitializer(Symbol * sampler,uint64_t destOffset,amdgpu_hsa_sampler_coord8_t coord,amdgpu_hsa_sampler_filter8_t filter,amdgpu_hsa_sampler_addressing8_t addressing)742     void AmdHsaCode::AddSamplerInitializer(Symbol* sampler, uint64_t destOffset,
743         amdgpu_hsa_sampler_coord8_t coord,
744         amdgpu_hsa_sampler_filter8_t filter,
745         amdgpu_hsa_sampler_addressing8_t addressing)
746     {
747       amdgpu_hsa_sampler_descriptor_t desc;
748       desc.size = (uint16_t) sizeof(amdgpu_hsa_sampler_descriptor_t);
749       desc.kind = AMDGPU_HSA_METADATA_KIND_INIT_SAMP;
750       desc.coord = coord;
751       desc.filter = filter;
752       desc.addressing = addressing;
753       AddSamplerInitializer(sampler, destOffset, desc);
754     }
755 
AddInitVarWithAddress(bool large,Symbol * dest,uint64_t destOffset,Symbol * addrOf,uint64_t addrAddend)756     void AmdHsaCode::AddInitVarWithAddress(bool large, Symbol* dest, uint64_t destOffset, Symbol* addrOf, uint64_t addrAddend)
757     {
758       uint32_t rtype = large ? R_AMDGPU_64 : R_AMDGPU_32_LOW;
759       dest->elfSym()->section()->relocationSection()->addRelocation(rtype, addrOf->elfSym(), dest->elfSym()->value() + destOffset, addrAddend);
760     }
761 
NextKernelCodeOffset() const762     uint64_t AmdHsaCode::NextKernelCodeOffset() const
763     {
764       return HsaText()->nextDataOffset(256);
765     }
766 
AddKernelCode(KernelSymbol * sym,const void * code,size_t size)767     bool AmdHsaCode::AddKernelCode(KernelSymbol* sym, const void* code, size_t size)
768     {
769       assert(nullptr != sym);
770 
771       uint64_t offset = HsaText()->addData(code, size, 256);
772       sym->setValue(offset);
773       sym->setSize(size);
774       return true;
775     }
776 
AddEmptySection()777     Section* AmdHsaCode::AddEmptySection()
778     {
779       dataSections.push_back(nullptr); return nullptr;
780     }
781 
AddCodeSection(Segment * segment)782     Section* AmdHsaCode::AddCodeSection(Segment* segment)
783     {
784       if (nullptr == img) { return nullptr; }
785       Section *sec = img->addSection(
786         ".hsatext",
787         SHT_PROGBITS,
788         SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_AMDGPU_HSA_CODE | SHF_AMDGPU_HSA_AGENT,
789         0,
790         segment);
791       dataSections.push_back(sec);
792       hsatext = sec;
793       return sec;
794     }
795 
AddDataSection(const std::string & name,uint32_t type,uint64_t flags,Segment * segment)796     Section* AmdHsaCode::AddDataSection(const std::string &name,
797                                         uint32_t type,
798                                         uint64_t flags,
799                                         Segment* segment)
800     {
801       if (nullptr == img) { return nullptr; }
802       Section *sec = img->addSection(name, type, flags, 0, segment);
803       dataSections.push_back(sec);
804       return sec;
805     }
806 
InitHsaSectionSegment(amdgpu_hsa_elf_section_t section,bool combineSegments)807     void AmdHsaCode::InitHsaSectionSegment(amdgpu_hsa_elf_section_t section, bool combineSegments)
808     {
809       InitHsaSegment(AmdHsaElfSectionSegment(section), combineSegments || !IsAmdHsaElfSectionROData(section));
810     }
811 
HsaDataSection(amdgpu_hsa_elf_section_t sec,bool combineSegments)812     Section* AmdHsaCode::HsaDataSection(amdgpu_hsa_elf_section_t sec, bool combineSegments)
813     {
814       if (!hsaSections[sec]) {
815         bool writable = combineSegments || !IsAmdHsaElfSectionROData(sec);
816         Segment* segment = HsaSegment(AmdHsaElfSectionSegment(sec), writable);
817         assert(segment); // Expected to be init the segment via InitHsaSegment.
818         Section* section;
819         switch (sec) {
820         case AMDGPU_HSA_RODATA_GLOBAL_PROGRAM:
821           section = AddDataSection(".hsarodata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL, segment); break;
822         case AMDGPU_HSA_RODATA_GLOBAL_AGENT:
823           section = AddDataSection(".hsarodata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
824         case AMDGPU_HSA_RODATA_READONLY_AGENT:
825           section = AddDataSection(".hsarodata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
826         case AMDGPU_HSA_DATA_GLOBAL_PROGRAM:
827           section = AddDataSection(".hsadata_global_program", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break;
828         case AMDGPU_HSA_DATA_GLOBAL_AGENT:
829           section = AddDataSection(".hsadata_global_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
830         case AMDGPU_HSA_DATA_READONLY_AGENT:
831           section = AddDataSection(".hsadata_readonly_agent", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
832         case AMDGPU_HSA_BSS_GLOBAL_PROGRAM:
833           section = AddDataSection(".hsabss_global_program", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL, segment); break;
834         case AMDGPU_HSA_BSS_GLOBAL_AGENT:
835           section = AddDataSection(".hsabss_global_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_GLOBAL | SHF_AMDGPU_HSA_AGENT, segment); break;
836         case AMDGPU_HSA_BSS_READONLY_AGENT:
837           section = AddDataSection(".hsabss_readonly_agent", SHT_NOBITS, SHF_ALLOC | SHF_WRITE | SHF_AMDGPU_HSA_READONLY | SHF_AMDGPU_HSA_AGENT, segment); break;
838         default:
839           assert(false); return 0;
840         }
841         hsaSections[sec] = section;
842       }
843       return hsaSections[sec];
844     }
845 
InitHsaSegment(amdgpu_hsa_elf_segment_t segment,bool writable)846     void AmdHsaCode::InitHsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable)
847     {
848       if (!hsaSegments[segment][writable]) {
849         uint32_t flags = PF_R;
850         if (writable) { flags |= PF_W; }
851         if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) { flags |= PF_X; }
852         uint32_t type = PT_LOOS + segment;
853         assert(segment < AMDGPU_HSA_SEGMENT_LAST);
854         hsaSegments[segment][writable] = img->initSegment(type, flags);
855       }
856     }
857 
AddHsaSegments()858     bool AmdHsaCode::AddHsaSegments()
859     {
860       if (!img->addSegments()) { return ElfImageError(); }
861       return true;
862     }
863 
HsaSegment(amdgpu_hsa_elf_segment_t segment,bool writable)864     Segment* AmdHsaCode::HsaSegment(amdgpu_hsa_elf_segment_t segment, bool writable)
865     {
866       return hsaSegments[segment][writable];
867     }
868 
AddExecutableSymbol(const std::string & name,unsigned char type,unsigned char binding,unsigned char other,Section * section)869     Symbol* AmdHsaCode::AddExecutableSymbol(const std::string &name,
870                                             unsigned char type,
871                                             unsigned char binding,
872                                             unsigned char other,
873                                             Section *section)
874     {
875       if (nullptr == img) { return nullptr; }
876       if (!section) { section = HsaText(); }
877       symbols.push_back(new KernelSymbol(img->symtab()->addSymbol(section, name, 0, 0, type, binding, other), nullptr));
878       return symbols.back();
879     }
880 
AddVariableSymbol(const std::string & name,unsigned char type,unsigned char binding,unsigned char other,Section * section,uint64_t value,uint64_t size)881     Symbol* AmdHsaCode::AddVariableSymbol(const std::string &name,
882                                           unsigned char type,
883                                           unsigned char binding,
884                                           unsigned char other,
885                                           Section *section,
886                                           uint64_t value,
887                                           uint64_t size)
888     {
889       if (nullptr == img) { return nullptr; }
890       symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(section, name, value, size, type, binding, other)));
891       return symbols.back();
892     }
893 
AddSectionSymbols()894     void AmdHsaCode::AddSectionSymbols()
895     {
896       if (nullptr == img) { return; }
897       for (size_t i = 0; i < dataSections.size(); ++i) {
898         if (dataSections[i] && dataSections[i]->flags() & SHF_ALLOC) {
899           symbols.push_back(new VariableSymbol(img->symtab()->addSymbol(dataSections[i], "__hsa_section" + dataSections[i]->Name(), 0, 0, STT_SECTION, STB_LOCAL)));
900         }
901       }
902     }
903 
GetSymbolByElfIndex(size_t index)904     Symbol* AmdHsaCode::GetSymbolByElfIndex(size_t index)
905     {
906       for (auto &s : symbols) {
907         if (s && index == s->Index()) {
908           return s;
909         }
910       }
911       return nullptr;
912     }
913 
FindSymbol(const std::string & n)914     Symbol* AmdHsaCode::FindSymbol(const std::string &n)
915     {
916       for (auto &s : symbols) {
917         if (s && n == s->Name()) {
918           return s;
919         }
920       }
921       return nullptr;
922     }
923 
AddData(amdgpu_hsa_elf_section_t s,const void * data,size_t size)924     void AmdHsaCode::AddData(amdgpu_hsa_elf_section_t s, const void* data, size_t size)
925     {
926 //      getDataSection(s)->addData(data, size);
927     }
928 
DebugInfo()929     Section* AmdHsaCode::DebugInfo()
930     {
931       if (!debugInfo) {
932         debugInfo = img->addSection(".debug_info", SHT_PROGBITS);
933       }
934       return debugInfo;
935     }
936 
DebugLine()937     Section* AmdHsaCode::DebugLine()
938     {
939       if (!debugLine) {
940         debugLine = img->addSection(".debug_line", SHT_PROGBITS);
941       }
942       return debugLine;
943     }
944 
DebugAbbrev()945     Section* AmdHsaCode::DebugAbbrev()
946     {
947       if (!debugAbbrev) {
948         debugAbbrev = img->addSection(".debug_abbrev", SHT_PROGBITS);
949       }
950       return debugAbbrev;
951     }
952 
AddHsaHlDebug(const std::string & name,const void * data,size_t size)953     Section* AmdHsaCode::AddHsaHlDebug(const std::string& name, const void* data, size_t size)
954     {
955       Section* section = img->addSection(name, SHT_PROGBITS, SHF_OS_NONCONFORMING);
956       section->addData(data, size, 1);
957       return section;
958     }
959 
PrintToFile(const std::string & filename)960     bool AmdHsaCode::PrintToFile(const std::string& filename)
961     {
962       std::ofstream out(filename);
963       if (out.fail()) { return false; }
964       Print(out);
965       return out.fail();
966     }
967 
Print(std::ostream & out)968     void AmdHsaCode::Print(std::ostream& out)
969     {
970       PrintNotes(out);
971       out << std::endl;
972       PrintSegments(out);
973       out << std::endl;
974       PrintSections(out);
975       out << std::endl;
976       PrintSymbols(out);
977       out << std::endl;
978       PrintMachineCode(out);
979       out << std::endl;
980       out << "AMD HSA Code Object End" << std::endl;
981     }
982 
PrintNotes(std::ostream & out)983     void AmdHsaCode::PrintNotes(std::ostream& out)
984     {
985       {
986         uint32_t major_version, minor_version;
987         if (GetNoteCodeObjectVersion(&major_version, &minor_version)) {
988           out << "AMD HSA Code Object" << std::endl
989               << "  Version " << major_version << "." << minor_version << std::endl;
990         }
991       }
992       {
993         uint32_t hsail_major, hsail_minor;
994         hsa_profile_t profile;
995         hsa_machine_model_t machine_model;
996         hsa_default_float_rounding_mode_t rounding_mode;
997         if (GetNoteHsail(&hsail_major, &hsail_minor, &profile, &machine_model, &rounding_mode)) {
998           out << "HSAIL " << std::endl
999               << "  Version: " << hsail_major << "." << hsail_minor << std::endl
1000               << "  Profile: " << HsaProfileToString(profile)
1001               << "  Machine model: " << HsaMachineModelToString(machine_model)
1002               << "  Default float rounding: " << HsaFloatRoundingModeToString(rounding_mode) << std::endl;
1003         }
1004       }
1005       {
1006         std::string vendor_name, architecture_name;
1007         uint32_t major_version, minor_version, stepping;
1008         if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) {
1009           out << "ISA" << std::endl
1010               << "  Vendor " << vendor_name
1011               << "  Arch " << architecture_name
1012               << "  Version " << major_version << ":" << minor_version << ":" << stepping << std::endl;
1013         }
1014       }
1015       {
1016         std::string producer_name, producer_options;
1017         uint32_t major, minor;
1018         if (GetNoteProducer(&major, &minor, producer_name)) {
1019           out << "Producer '" << producer_name << "' " << "Version " << major << ":" << minor << std::endl;
1020         }
1021       }
1022       {
1023         std::string producer_options;
1024         if (GetNoteProducerOptions(producer_options)) {
1025           out << "Producer options" << std::endl
1026               << "  '" << producer_options << "'" << std::endl;
1027         }
1028       }
1029     }
1030 
PrintSegments(std::ostream & out)1031     void AmdHsaCode::PrintSegments(std::ostream& out)
1032     {
1033       out << "Segments (total " << DataSegmentCount() << "):" << std::endl;
1034       for (size_t i = 0; i < DataSegmentCount(); ++i) {
1035         PrintSegment(out, DataSegment(i));
1036       }
1037     }
1038 
PrintSections(std::ostream & out)1039     void AmdHsaCode::PrintSections(std::ostream& out)
1040     {
1041       out << "Data Sections (total " << DataSectionCount() << "):" << std::endl;
1042       for (size_t i = 0; i < DataSectionCount(); ++i) {
1043         PrintSection(out, DataSection(i));
1044       }
1045       out << std::endl;
1046       out << "Relocation Sections (total " << RelocationSectionCount() << "):" << std::endl;
1047       for (size_t i = 0; i < RelocationSectionCount(); ++i) {
1048         PrintSection(out, GetRelocationSection(i));
1049       }
1050     }
1051 
PrintSymbols(std::ostream & out)1052     void AmdHsaCode::PrintSymbols(std::ostream& out)
1053     {
1054       out << "Symbols (total " << SymbolCount() << "):" << std::endl;
1055       for (size_t i = 0; i < SymbolCount(); ++i) {
1056         PrintSymbol(out, GetSymbol(i));
1057       }
1058     }
1059 
PrintMachineCode(std::ostream & out)1060     void AmdHsaCode::PrintMachineCode(std::ostream& out)
1061     {
1062       if (HasHsaText()) {
1063         out << std::dec;
1064         for (size_t i = 0; i < SymbolCount(); ++i) {
1065           Symbol* sym = GetSymbol(i);
1066           if (sym->IsKernelSymbol() && sym->IsDefinition()) {
1067             amd_kernel_code_t kernel_code;
1068             HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
1069             out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec;
1070             PrintAmdKernelCode(out, &kernel_code);
1071             out << std::endl;
1072           }
1073         }
1074 
1075         std::vector<uint8_t> isa(HsaText()->size(), 0);
1076         HsaText()->getData(0, isa.data(), HsaText()->size());
1077 
1078         out << "Disassembly:" << std::endl;
1079         PrintDisassembly(out, isa.data(), HsaText()->size(), 0);
1080         out << std::endl << std::dec;
1081       } else {
1082         out << "Machine code section is not present" << std::endl << std::endl;
1083       }
1084     }
1085 
PrintSegment(std::ostream & out,Segment * segment)1086     void AmdHsaCode::PrintSegment(std::ostream& out, Segment* segment)
1087     {
1088       out << "  Segment (" << segment->getSegmentIndex() << ")" << std::endl;
1089       out << "    Type: " << AmdPTLoadToString(segment->type())
1090           << " "
1091           << "    Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << segment->flags() << std::dec
1092           << std::endl
1093           << "    Image Size: " << segment->imageSize()
1094           << " "
1095           << "    Memory Size: " << segment->memSize()
1096           << " "
1097           << "    Align: " << segment->align()
1098           << " "
1099           << "    VAddr: " << segment->vaddr()
1100           << std::endl;
1101       out << std::dec;
1102     }
1103 
PrintSection(std::ostream & out,Section * section)1104     void AmdHsaCode::PrintSection(std::ostream& out, Section* section)
1105     {
1106       out << "  Section " << section->Name() << " (Index " << section->getSectionIndex() << ")" << std::endl;
1107       out << "    Type: " << section->type()
1108           << " "
1109           << "    Flags: " << "0x" << std::hex << std::setw(8) << std::setfill('0') << section->flags() << std::dec
1110           << std::endl
1111           << "    Size:  " << section->size()
1112           << " "
1113           << "    Address: " << section->addr()
1114           << " "
1115           << "    Align: " << section->addralign()
1116           << std::endl;
1117       out << std::dec;
1118 
1119       if (section->flags() & SHF_AMDGPU_HSA_CODE) {
1120         // Printed separately.
1121         return;
1122       }
1123 
1124       switch (section->type()) {
1125       case SHT_NOBITS:
1126         return;
1127       case SHT_RELA:
1128         PrintRelocationData(out, section->asRelocationSection());
1129         return;
1130       default:
1131         PrintRawData(out, section);
1132       }
1133     }
1134 
PrintRawData(std::ostream & out,Section * section)1135     void AmdHsaCode::PrintRawData(std::ostream& out, Section* section)
1136     {
1137       out << "    Data:" << std::endl;
1138       unsigned char *sdata = (unsigned char*)alloca(section->size());
1139       section->getData(0, sdata, section->size());
1140       PrintRawData(out, sdata, section->size());
1141     }
1142 
PrintRawData(std::ostream & out,const unsigned char * data,size_t size)1143     void AmdHsaCode::PrintRawData(std::ostream& out, const unsigned char *data, size_t size)
1144     {
1145       out << std::hex << std::setfill('0');
1146       for (size_t i = 0; i < size; i += 16) {
1147         out << "      " << std::setw(7) << i << ":";
1148 
1149         for (size_t j = 0; j < 16; j += 1) {
1150           uint32_t value = i + j < size ? (uint32_t)data[i + j] : 0;
1151           if (j % 2 == 0) { out << ' '; }
1152           out << std::setw(2) << value;
1153         }
1154         out << "  ";
1155 
1156         for (size_t j = 0; i + j < size && j < 16; j += 1) {
1157           char value = (char)data[i + j] >= 32 && (char)data[i + j] <= 126 ? (char)data[i + j] : '.';
1158           out << value;
1159         }
1160         out << std::endl;
1161       }
1162       out << std::dec;
1163     }
1164 
PrintRelocationData(std::ostream & out,RelocationSection * section)1165     void AmdHsaCode::PrintRelocationData(std::ostream& out, RelocationSection* section)
1166     {
1167       if (section->targetSection()) {
1168         out << "    Relocation Entries for " << section->targetSection()->Name() << " Section (total " << section->relocationCount() << "):" << std::endl;
1169       } else {
1170         // Dynamic relocations do not have a target section, they work with
1171         // virtual addresses.
1172         out << "    Dynamic Relocation Entries (total " << section->relocationCount() << "):" << std::endl;
1173       }
1174       for (size_t i = 0; i < section->relocationCount(); ++i) {
1175         out << "      Relocation (Index " << i << "):" << std::endl;
1176         out << "        Type: " << section->relocation(i)->type() << std::endl;
1177         out << "        Symbol: " << section->relocation(i)->symbol()->name() << std::endl;
1178         out << "        Offset: " << section->relocation(i)->offset() << " Addend: " << section->relocation(i)->addend() << std::endl;
1179       }
1180       out << std::dec;
1181     }
1182 
PrintSymbol(std::ostream & out,Symbol * sym)1183     void AmdHsaCode::PrintSymbol(std::ostream& out, Symbol* sym)
1184     {
1185       out << "  Symbol " << sym->Name() << " (Index " << sym->Index() << "):" << std::endl;
1186       if (sym->IsKernelSymbol() || sym->IsVariableSymbol()) {
1187         out << "    Section: " << sym->GetSection()->Name() << " ";
1188         out << "    Section Offset: " << sym->SectionOffset() << std::endl;
1189         out << "    VAddr: " << sym->VAddr() << " ";
1190         out << "    Size: " << sym->Size() << " ";
1191         out << "    Alignment: " << sym->Alignment() << std::endl;
1192         out << "    Kind: " << HsaSymbolKindToString(sym->Kind()) << " ";
1193         out << "    Linkage: " << HsaSymbolLinkageToString(sym->Linkage()) << " ";
1194         out << "    Definition: " << (sym->IsDefinition() ? "TRUE" : "FALSE") << std::endl;
1195       }
1196       if (sym->IsVariableSymbol()) {
1197         out << "    Allocation: " << HsaVariableAllocationToString(sym->Allocation()) << " ";
1198         out << "    Segment: " << HsaVariableSegmentToString(sym->Segment()) << " ";
1199         out << "    Constant: " << (sym->IsConst() ? "TRUE" : "FALSE") << std::endl;
1200       }
1201       out << std::dec;
1202     }
1203 
PrintMachineCode(std::ostream & out,KernelSymbol * sym)1204     void AmdHsaCode::PrintMachineCode(std::ostream& out, KernelSymbol* sym)
1205     {
1206       assert(HsaText());
1207       amd_kernel_code_t kernel_code;
1208       HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
1209 
1210       out << "AMD Kernel Code for " << sym->Name() << ": " << std::endl << std::dec;
1211       PrintAmdKernelCode(out, &kernel_code);
1212       out << std::endl;
1213 
1214       std::vector<uint8_t> isa(HsaText()->size(), 0);
1215       HsaText()->getData(0, isa.data(), HsaText()->size());
1216       uint64_t isa_offset = sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset;
1217 
1218       out << "Disassembly for " << sym->Name() << ": " << std::endl;
1219       PrintDisassembly(out, isa.data(), HsaText()->size(), isa_offset);
1220       out << std::endl << std::dec;
1221     }
1222 
PrintDisassembly(std::ostream & out,const unsigned char * isa,size_t size,uint32_t isa_offset)1223     void AmdHsaCode::PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset)
1224     {
1225     #ifdef SP3_STATIC_LIB
1226       // Default asic is ci.
1227       std::string asic = "CI";
1228       std::string vendor_name, architecture_name;
1229       uint32_t major_version, minor_version, stepping;
1230       if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) {
1231         if (major_version == 7) {
1232           asic = "CI";
1233         } else if (major_version == 8) {
1234           asic = "VI";
1235         } else if (major_version == 9) {
1236           asic = "GFX9";
1237         } else {
1238           assert(!"unknown compute capability");
1239         }
1240       }
1241 
1242       struct sp3_context *dis_state = sp3_new();
1243       sp3_setasic(dis_state, asic.c_str());
1244 
1245       sp3_vma *dis_vma = sp3_vm_new_ptr(0, size / 4, (const uint32_t*)isa);
1246 
1247       std::vector<uint32_t> comments(HsaText()->size() / 4, 0);
1248       for (size_t i = 0; i < SymbolCount(); ++i) {
1249         Symbol* sym = GetSymbol(i);
1250         if (sym->IsKernelSymbol() && sym->IsDefinition()) {
1251           comments[sym->SectionOffset() / 4] = COMMENT_AMD_KERNEL_CODE_T_BEGIN;
1252           comments[(sym->SectionOffset() + 252) / 4] = COMMENT_AMD_KERNEL_CODE_T_END;
1253           amd_kernel_code_t kernel_code;
1254           HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
1255           comments[(kernel_code.kernel_code_entry_byte_offset + sym->SectionOffset()) / 4] = COMMENT_KERNEL_ISA_BEGIN;
1256         }
1257       }
1258       sp3_vma *comment_vma = sp3_vm_new_ptr(0, comments.size(), (const uint32_t*)comments.data());
1259       sp3_setcomments(dis_state, comment_vma, CommentTopCallBack, CommentRightCallBack, this);
1260 
1261       // When isa_offset == 0 disassembly full hsatext section.
1262       // Otherwise disassembly only from this offset till endpgm instruction.
1263       char *text = sp3_disasm(
1264         dis_state,
1265         dis_vma,
1266         isa_offset / 4,
1267         nullptr,
1268         SP3_SHTYPE_CS,
1269         nullptr,
1270         (unsigned)(size / 4),
1271         isa_offset == 0 ? SP3DIS_FORCEVALID | SP3DIS_COMMENTS : SP3DIS_COMMENTS);
1272 
1273       enum class IsaState {
1274         UNKNOWN,
1275         AMD_KERNEL_CODE_T_BEGIN,
1276         AMD_KERNEL_CODE_T,
1277         AMD_KERNEL_CODE_T_END,
1278         ISA_BEGIN,
1279         ISA,
1280         PADDING,
1281       };
1282 
1283       std::string line;
1284       char *text_ptr = text;
1285       IsaState state = IsaState::UNKNOWN;
1286 
1287       uint32_t offset = 0;
1288       uint32_t padding_end = 0;
1289       std::string padding;
1290 
1291       while (text_ptr && text_ptr[0] != '\0') {
1292         line.clear();
1293         while (text_ptr[0] != '\0' && text_ptr[0] != '\n') {
1294           line.push_back(text_ptr[0]);
1295           ++text_ptr;
1296         }
1297         ltrim(line);
1298         if (text_ptr[0] == '\n') {
1299           ++text_ptr;
1300         }
1301         switch (state) {
1302         case IsaState::UNKNOWN:
1303           assert(line != "// amd_kernel_code_t end");
1304           padding.clear();
1305           if (line == "// amd_kernel_code_t begin") {
1306             state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
1307           } else if (line == "// isa begin") {
1308             state = IsaState::ISA_BEGIN;
1309           } else if (line == "end") {
1310             out << line << std::endl;
1311           } else if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
1312             padding += "  " + line + "\n";
1313             offset = ParseInstructionOffset(line);
1314             padding_end = ParseInstructionOffset(line);
1315             state = IsaState::PADDING;
1316           } else if (line != "shader (null)") {
1317             out << "  " << line << std::endl;
1318           }
1319           break;
1320 
1321         case IsaState::AMD_KERNEL_CODE_T_BEGIN:
1322           assert(line != "// amd_kernel_code_t begin");
1323           assert(line != "// amd_kernel_code_t end");
1324           assert(line != "// isa begin");
1325           assert(line != "end");
1326           padding.clear();
1327           offset = ParseInstructionOffset(line);
1328           state = IsaState::AMD_KERNEL_CODE_T;
1329           break;
1330 
1331         case IsaState::AMD_KERNEL_CODE_T:
1332           assert(line != "// amd_kernel_code_t begin");
1333           assert(line != "// isa begin");
1334           assert(line != "end");
1335           assert(padding.empty());
1336           if (line == "// amd_kernel_code_t end") {
1337             state = IsaState::AMD_KERNEL_CODE_T_END;
1338           }
1339           break;
1340 
1341         case IsaState::AMD_KERNEL_CODE_T_END:
1342           assert(line != "// amd_kernel_code_t begin");
1343           assert(line != "// amd_kernel_code_t end");
1344           assert(line != "// isa begin");
1345           assert(line != "end");
1346           assert(padding.empty());
1347           for (size_t i = 0; i < SymbolCount(); ++i) {
1348             Symbol* sym = GetSymbol(i);
1349             if (sym->IsKernelSymbol() && sym->IsDefinition() && sym->SectionOffset() == offset) {
1350               std::ostream::fmtflags flags = out.flags();
1351               char fill = out.fill();
1352               out << "  //" << std::endl;
1353               out << "  // amd_kernel_code_t for " << sym->Name()
1354                   << " (" << std::hex << std::setw(12) << std::setfill('0') << std::right << offset
1355                   << " - " << std::setw(12) << (offset + 256) << ')' << std::endl;
1356               out << "  //" << std::endl;
1357               out << std::setfill(fill);
1358               out.flags(flags);
1359               break;
1360             }
1361           }
1362           state = IsaState::UNKNOWN;
1363           break;
1364 
1365         case IsaState::ISA_BEGIN:
1366           assert(line != "// amd_kernel_code_t begin");
1367           assert(line != "// amd_kernel_code_t end");
1368           assert(line != "// isa begin");
1369           padding.clear();
1370           offset = ParseInstructionOffset(line);
1371           for (size_t i = 0; i < SymbolCount(); ++i) {
1372             Symbol* sym = GetSymbol(i);
1373             if (sym->IsKernelSymbol() && sym->IsDefinition()) {
1374               amd_kernel_code_t kernel_code;
1375               HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t));
1376               if ((sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset) == offset) {
1377                 out << "  //" << std::endl;
1378                 out << "  // " << sym->Name() << ':' << std::endl;
1379                 out << "  //" << std::endl;
1380                 break;
1381               }
1382             }
1383           }
1384           if (line == "end") {
1385             out << line << std::endl;
1386             state = IsaState::UNKNOWN;
1387           } else {
1388             out << "  " << line << std::endl;
1389             state = IsaState::ISA;
1390           }
1391           break;
1392 
1393         case IsaState::ISA:
1394           assert(line != "// amd_kernel_code_t end");
1395           if (!padding.empty()) {
1396             out << padding;
1397             out.flush();
1398             padding.clear();
1399           }
1400           if (line == "// amd_kernel_code_t begin") {
1401             state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
1402           } else if (line == "// isa begin") {
1403             state = IsaState::ISA_BEGIN;
1404           } else if (line == "end") {
1405             out << line << std::endl;
1406             state = IsaState::UNKNOWN;
1407           } else if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
1408             padding += "  " + line + "\n";
1409             offset = ParseInstructionOffset(line);
1410             padding_end = offset;
1411             state = IsaState::PADDING;
1412           } else {
1413             out << "  " << line << std::endl;
1414           }
1415           break;
1416 
1417         case IsaState::PADDING:
1418           assert(line != "// amd_kernel_code_t end");
1419           if (line.find("v_cndmask_b32  v0, s0, v0, vcc") != std::string::npos) {
1420             padding += "  " + line + "\n";
1421             padding_end = ParseInstructionOffset(line);
1422           } else if (line == "// amd_kernel_code_t begin" || line == "// isa begin" || line == "end") {
1423               padding.clear();
1424               std::ostream::fmtflags flags = out.flags();
1425               char fill = out.fill();
1426               out << "  //" << std::endl;
1427               out << "  // padding ("
1428                   << std::hex << std::setw(12) << std::setfill('0') << std::right << offset
1429                   << " - " << std::setw(12) << (padding_end + 4) << ')' << std::endl;
1430               out << "  //" << std::endl;
1431               out << std::setfill(fill);
1432               out.flags(flags);
1433               if (line == "// amd_kernel_code_t begin") {
1434                 state = IsaState::AMD_KERNEL_CODE_T_BEGIN;
1435               } else if (line == "// isa begin") {
1436                 state = IsaState::ISA_BEGIN;
1437               } else if (line == "end") {
1438                 out << line << std::endl;
1439                 state = IsaState::UNKNOWN;
1440               }
1441           } else {
1442             padding += "  " + line + "\n";
1443             state = IsaState::ISA;
1444           }
1445           break;
1446 
1447         default:
1448           assert(false);
1449           break;
1450         }
1451       }
1452 
1453       sp3_free(text);
1454       sp3_close(dis_state);
1455       sp3_vm_free(dis_vma);
1456       sp3_vm_free(comment_vma);
1457     #else
1458       PrintRawData(out, isa, size);
1459     #endif // SP3_STATIC_LIB
1460       out << std::dec;
1461     }
1462 
MangleSymbolName(const std::string & module_name,const std::string symbol_name)1463     std::string AmdHsaCode::MangleSymbolName(const std::string& module_name, const std::string symbol_name)
1464     {
1465       if (module_name.empty()) {
1466         return symbol_name;
1467       } else {
1468         return module_name + "::" + symbol_name;
1469       }
1470     }
1471 
ElfImageError()1472     bool AmdHsaCode::ElfImageError()
1473     {
1474       out << img->output();
1475       return false;
1476     }
1477 
FromHandle(hsa_code_object_t c)1478       AmdHsaCode* AmdHsaCodeManager::FromHandle(hsa_code_object_t c)
1479       {
1480         CodeMap::iterator i = codeMap.find(c.handle);
1481         if (i == codeMap.end()) {
1482           AmdHsaCode* code = new AmdHsaCode();
1483           const void* buffer = reinterpret_cast<const void*>(c.handle);
1484           if (!code->InitAsBuffer(buffer, 0)) {
1485             delete code;
1486             return 0;
1487           }
1488           codeMap[c.handle] = code;
1489           return code;
1490         }
1491         return i->second;
1492       }
1493 
Destroy(hsa_code_object_t c)1494       bool AmdHsaCodeManager::Destroy(hsa_code_object_t c)
1495       {
1496         CodeMap::iterator i = codeMap.find(c.handle);
1497         if (i == codeMap.end()) {
1498           // Currently, we do not always create map entry for every code object buffer.
1499           return true;
1500         }
1501         delete i->second;
1502         codeMap.erase(i);
1503         return true;
1504       }
1505 
PullElfV2()1506     bool AmdHsaCode::PullElfV2()
1507     {
1508       for (size_t i = 0; i < img->segmentCount(); ++i) {
1509         Segment* s = img->segment(i);
1510         if (s->type() == PT_LOAD) {
1511           dataSegments.push_back(s);
1512         }
1513       }
1514       for (size_t i = 0; i < img->sectionCount(); ++i) {
1515         Section* sec = img->section(i);
1516         if (!sec) { continue; }
1517         if ((sec->type() == SHT_PROGBITS || sec->type() == SHT_NOBITS) &&
1518             !(sec->flags() & SHF_EXECINSTR)) {
1519           dataSections.push_back(sec);
1520         } else if (sec->type() == SHT_RELA) {
1521           relocationSections.push_back(sec->asRelocationSection());
1522         }
1523         if (sec->Name() == ".text") {
1524           hsatext = sec;
1525         }
1526       }
1527       for (size_t i = 0; i < img->symtab()->symbolCount(); ++i) {
1528         amd::elf::Symbol* elfsym = img->symtab()->symbol(i);
1529         Symbol* sym = 0;
1530         switch (elfsym->type()) {
1531         case STT_AMDGPU_HSA_KERNEL: {
1532           amd::elf::Section* sec = elfsym->section();
1533           amd_kernel_code_t akc;
1534           if (!sec) {
1535             out << "Failed to find section for symbol " << elfsym->name() << std::endl;
1536             return false;
1537           }
1538           if (!(sec->flags() & (SHF_ALLOC | SHF_EXECINSTR))) {
1539             out << "Invalid code section for symbol " << elfsym->name() << std::endl;
1540             return false;
1541           }
1542           if (!sec->getData(elfsym->value() - sec->addr(), &akc, sizeof(amd_kernel_code_t))) {
1543             out << "Failed to get AMD Kernel Code for symbol " << elfsym->name() << std::endl;
1544             return false;
1545           }
1546           sym = new KernelSymbolV2(elfsym, &akc);
1547           break;
1548         }
1549         case STT_OBJECT:
1550         case STT_COMMON:
1551           sym = new VariableSymbolV2(elfsym);
1552           break;
1553         default:
1554           break; // Skip unknown symbols.
1555         }
1556         if (sym) { symbols.push_back(sym); }
1557       }
1558 
1559       return true;
1560     }
1561 
KernelSymbolV2(amd::elf::Symbol * elfsym_,const amd_kernel_code_t * akc)1562     KernelSymbolV2::KernelSymbolV2(amd::elf::Symbol* elfsym_, const amd_kernel_code_t* akc) :
1563       KernelSymbol(elfsym_, akc) { }
1564 }
1565 }
1566 }
1567