1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides AMDGPU specific target streamer methods.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUTargetStreamer.h"
15 #include "AMDGPU.h"
16 #include "SIDefines.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/BinaryFormat/ELF.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCELFStreamer.h"
27 #include "llvm/MC/MCObjectFileInfo.h"
28 #include "llvm/MC/MCSectionELF.h"
29 #include "llvm/Support/FormattedStream.h"
30
31 namespace llvm {
32 #include "AMDGPUPTNote.h"
33 }
34
35 using namespace llvm;
36 using namespace llvm::AMDGPU;
37
38 //===----------------------------------------------------------------------===//
39 // AMDGPUTargetStreamer
40 //===----------------------------------------------------------------------===//
41
42 static const struct {
43 const char *Name;
44 unsigned Mach;
45 } MachTable[] = {
46 // Radeon HD 2000/3000 Series (R600).
47 { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
48 { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
49 { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
50 { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
51 // Radeon HD 4000 Series (R700).
52 { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
53 { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
54 { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
55 // Radeon HD 5000 Series (Evergreen).
56 { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
57 { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
58 { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
59 { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
60 { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
61 // Radeon HD 6000 Series (Northern Islands).
62 { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
63 { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
64 { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
65 { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
66 // AMDGCN GFX6.
67 { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
68 { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
69 { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
70 { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
71 { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
72 { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
73 { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
74 // AMDGCN GFX7.
75 { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
76 { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
77 { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
78 { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
79 { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
80 { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
81 { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
82 { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
83 { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
84 { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
85 // AMDGCN GFX8.
86 { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
87 { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
88 { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
89 { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
90 { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
91 { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
92 { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
93 { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
94 { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
95 { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
96 { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
97 // AMDGCN GFX9.
98 { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
99 { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
100 { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
101 { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
102 // Not specified processor.
103 { nullptr, ELF::EF_AMDGPU_MACH_NONE }
104 };
105
getMACH(StringRef GPU) const106 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
107 auto Entry = MachTable;
108 for (; Entry->Name && GPU != Entry->Name; ++Entry)
109 ;
110 return Entry->Mach;
111 }
112
getMachName(unsigned Mach)113 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
114 auto Entry = MachTable;
115 for (; Entry->Name && Mach != Entry->Mach; ++Entry)
116 ;
117 return Entry->Name;
118 }
119
EmitHSAMetadata(StringRef HSAMetadataString)120 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
121 HSAMD::Metadata HSAMetadata;
122 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
123 return false;
124
125 return EmitHSAMetadata(HSAMetadata);
126 }
127
128 //===----------------------------------------------------------------------===//
129 // AMDGPUTargetAsmStreamer
130 //===----------------------------------------------------------------------===//
131
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)132 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
133 formatted_raw_ostream &OS)
134 : AMDGPUTargetStreamer(S), OS(OS) { }
135
EmitDirectiveAMDGCNTarget(StringRef Target)136 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
137 OS << "\t.amdgcn_target \"" << Target << "\"\n";
138 }
139
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)140 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
141 uint32_t Major, uint32_t Minor) {
142 OS << "\t.hsa_code_object_version " <<
143 Twine(Major) << "," << Twine(Minor) << '\n';
144 }
145
146 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)147 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
148 uint32_t Minor,
149 uint32_t Stepping,
150 StringRef VendorName,
151 StringRef ArchName) {
152 OS << "\t.hsa_code_object_isa " <<
153 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
154 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
155
156 }
157
158 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)159 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
160 OS << "\t.amd_kernel_code_t\n";
161 dumpAmdKernelCode(&Header, OS, "\t\t");
162 OS << "\t.end_amd_kernel_code_t\n";
163 }
164
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)165 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
166 unsigned Type) {
167 switch (Type) {
168 default: llvm_unreachable("Invalid AMDGPU symbol type");
169 case ELF::STT_AMDGPU_HSA_KERNEL:
170 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
171 break;
172 }
173 }
174
EmitISAVersion(StringRef IsaVersionString)175 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
176 OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
177 return true;
178 }
179
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)180 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
181 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
182 std::string HSAMetadataString;
183 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
184 return false;
185
186 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
187 OS << HSAMetadataString << '\n';
188 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
189 return true;
190 }
191
EmitPALMetadata(const PALMD::Metadata & PALMetadata)192 bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
193 const PALMD::Metadata &PALMetadata) {
194 std::string PALMetadataString;
195 if (PALMD::toString(PALMetadata, PALMetadataString))
196 return false;
197
198 OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
199 return true;
200 }
201
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KD,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr,bool ReserveXNACK)202 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
203 const MCSubtargetInfo &STI, StringRef KernelName,
204 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
205 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
206 amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
207
208 IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
209
210 OS << "\t.amdhsa_kernel " << KernelName << '\n';
211
212 #define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \
213 DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
214 if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \
215 AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \
216 STREAM << "\t\t" << DIRECTIVE << " " \
217 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
218
219 if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
220 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
221 << '\n';
222 if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
223 OS << "\t\t.amdhsa_private_segment_fixed_size "
224 << KD.private_segment_fixed_size << '\n';
225
226 PRINT_IF_NOT_DEFAULT(
227 OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
228 kernel_code_properties,
229 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
230 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
231 kernel_code_properties,
232 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
233 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
234 kernel_code_properties,
235 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
236 PRINT_IF_NOT_DEFAULT(
237 OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
238 kernel_code_properties,
239 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
240 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
241 kernel_code_properties,
242 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
243 PRINT_IF_NOT_DEFAULT(
244 OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
245 kernel_code_properties,
246 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
247 PRINT_IF_NOT_DEFAULT(
248 OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
249 kernel_code_properties,
250 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
251 PRINT_IF_NOT_DEFAULT(
252 OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
253 compute_pgm_rsrc2,
254 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
255 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
256 compute_pgm_rsrc2,
257 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
258 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
259 compute_pgm_rsrc2,
260 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
261 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
262 compute_pgm_rsrc2,
263 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
264 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
265 compute_pgm_rsrc2,
266 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
267 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
268 compute_pgm_rsrc2,
269 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
270
271 // These directives are required.
272 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
273 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
274
275 if (!ReserveVCC)
276 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
277 if (IVersion.Major >= 7 && !ReserveFlatScr)
278 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
279 if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
280 OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
281
282 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
283 compute_pgm_rsrc1,
284 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
285 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
286 compute_pgm_rsrc1,
287 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
288 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
289 compute_pgm_rsrc1,
290 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
291 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
292 compute_pgm_rsrc1,
293 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
294 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
295 compute_pgm_rsrc1,
296 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
297 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
298 compute_pgm_rsrc1,
299 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
300 if (IVersion.Major >= 9)
301 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
302 compute_pgm_rsrc1,
303 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
304 PRINT_IF_NOT_DEFAULT(
305 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
306 compute_pgm_rsrc2,
307 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
308 PRINT_IF_NOT_DEFAULT(
309 OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
310 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
311 PRINT_IF_NOT_DEFAULT(
312 OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
313 compute_pgm_rsrc2,
314 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
315 PRINT_IF_NOT_DEFAULT(
316 OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
317 compute_pgm_rsrc2,
318 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
319 PRINT_IF_NOT_DEFAULT(
320 OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
321 compute_pgm_rsrc2,
322 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
323 PRINT_IF_NOT_DEFAULT(
324 OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
325 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
326 PRINT_IF_NOT_DEFAULT(
327 OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
328 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
329 #undef PRINT_IF_NOT_DEFAULT
330
331 OS << "\t.end_amdhsa_kernel\n";
332 }
333
334 //===----------------------------------------------------------------------===//
335 // AMDGPUTargetELFStreamer
336 //===----------------------------------------------------------------------===//
337
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)338 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
339 MCStreamer &S, const MCSubtargetInfo &STI)
340 : AMDGPUTargetStreamer(S), Streamer(S) {
341 MCAssembler &MCA = getStreamer().getAssembler();
342 unsigned EFlags = MCA.getELFHeaderEFlags();
343
344 EFlags &= ~ELF::EF_AMDGPU_MACH;
345 EFlags |= getMACH(STI.getCPU());
346
347 EFlags &= ~ELF::EF_AMDGPU_XNACK;
348 if (AMDGPU::hasXNACK(STI))
349 EFlags |= ELF::EF_AMDGPU_XNACK;
350
351 MCA.setELFHeaderEFlags(EFlags);
352 }
353
getStreamer()354 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
355 return static_cast<MCELFStreamer &>(Streamer);
356 }
357
EmitAMDGPUNote(const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)358 void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
359 const MCExpr *DescSZ, unsigned NoteType,
360 function_ref<void(MCELFStreamer &)> EmitDesc) {
361 auto &S = getStreamer();
362 auto &Context = S.getContext();
363
364 auto NameSZ = sizeof(ElfNote::NoteName);
365
366 S.PushSection();
367 S.SwitchSection(Context.getELFSection(
368 ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
369 S.EmitIntValue(NameSZ, 4); // namesz
370 S.EmitValue(DescSZ, 4); // descz
371 S.EmitIntValue(NoteType, 4); // type
372 S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
373 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
374 EmitDesc(S); // desc
375 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
376 S.PopSection();
377 }
378
EmitDirectiveAMDGCNTarget(StringRef Target)379 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
380
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)381 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
382 uint32_t Major, uint32_t Minor) {
383
384 EmitAMDGPUNote(
385 MCConstantExpr::create(8, getContext()),
386 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
387 [&](MCELFStreamer &OS){
388 OS.EmitIntValue(Major, 4);
389 OS.EmitIntValue(Minor, 4);
390 }
391 );
392 }
393
394 void
EmitDirectiveHSACodeObjectISA(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)395 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
396 uint32_t Minor,
397 uint32_t Stepping,
398 StringRef VendorName,
399 StringRef ArchName) {
400 uint16_t VendorNameSize = VendorName.size() + 1;
401 uint16_t ArchNameSize = ArchName.size() + 1;
402
403 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
404 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
405 VendorNameSize + ArchNameSize;
406
407 EmitAMDGPUNote(
408 MCConstantExpr::create(DescSZ, getContext()),
409 ElfNote::NT_AMDGPU_HSA_ISA,
410 [&](MCELFStreamer &OS) {
411 OS.EmitIntValue(VendorNameSize, 2);
412 OS.EmitIntValue(ArchNameSize, 2);
413 OS.EmitIntValue(Major, 4);
414 OS.EmitIntValue(Minor, 4);
415 OS.EmitIntValue(Stepping, 4);
416 OS.EmitBytes(VendorName);
417 OS.EmitIntValue(0, 1); // NULL terminate VendorName
418 OS.EmitBytes(ArchName);
419 OS.EmitIntValue(0, 1); // NULL terminte ArchName
420 }
421 );
422 }
423
424 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)425 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
426
427 MCStreamer &OS = getStreamer();
428 OS.PushSection();
429 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
430 OS.PopSection();
431 }
432
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)433 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
434 unsigned Type) {
435 MCSymbolELF *Symbol = cast<MCSymbolELF>(
436 getStreamer().getContext().getOrCreateSymbol(SymbolName));
437 Symbol->setType(Type);
438 }
439
EmitISAVersion(StringRef IsaVersionString)440 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
441 // Create two labels to mark the beginning and end of the desc field
442 // and a MCExpr to calculate the size of the desc field.
443 auto &Context = getContext();
444 auto *DescBegin = Context.createTempSymbol();
445 auto *DescEnd = Context.createTempSymbol();
446 auto *DescSZ = MCBinaryExpr::createSub(
447 MCSymbolRefExpr::create(DescEnd, Context),
448 MCSymbolRefExpr::create(DescBegin, Context), Context);
449
450 EmitAMDGPUNote(
451 DescSZ,
452 ELF::NT_AMD_AMDGPU_ISA,
453 [&](MCELFStreamer &OS) {
454 OS.EmitLabel(DescBegin);
455 OS.EmitBytes(IsaVersionString);
456 OS.EmitLabel(DescEnd);
457 }
458 );
459 return true;
460 }
461
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)462 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
463 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
464 std::string HSAMetadataString;
465 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
466 return false;
467
468 // Create two labels to mark the beginning and end of the desc field
469 // and a MCExpr to calculate the size of the desc field.
470 auto &Context = getContext();
471 auto *DescBegin = Context.createTempSymbol();
472 auto *DescEnd = Context.createTempSymbol();
473 auto *DescSZ = MCBinaryExpr::createSub(
474 MCSymbolRefExpr::create(DescEnd, Context),
475 MCSymbolRefExpr::create(DescBegin, Context), Context);
476
477 EmitAMDGPUNote(
478 DescSZ,
479 ELF::NT_AMD_AMDGPU_HSA_METADATA,
480 [&](MCELFStreamer &OS) {
481 OS.EmitLabel(DescBegin);
482 OS.EmitBytes(HSAMetadataString);
483 OS.EmitLabel(DescEnd);
484 }
485 );
486 return true;
487 }
488
EmitPALMetadata(const PALMD::Metadata & PALMetadata)489 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
490 const PALMD::Metadata &PALMetadata) {
491 EmitAMDGPUNote(
492 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
493 ELF::NT_AMD_AMDGPU_PAL_METADATA,
494 [&](MCELFStreamer &OS){
495 for (auto I : PALMetadata)
496 OS.EmitIntValue(I, sizeof(uint32_t));
497 }
498 );
499 return true;
500 }
501
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KernelDescriptor,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr,bool ReserveXNACK)502 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
503 const MCSubtargetInfo &STI, StringRef KernelName,
504 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
505 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
506 bool ReserveXNACK) {
507 auto &Streamer = getStreamer();
508 auto &Context = Streamer.getContext();
509
510 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
511 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
512 KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
513 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
514 KernelDescriptorSymbol->setSize(
515 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
516
517 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
518 Context.getOrCreateSymbol(Twine(KernelName)));
519 KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
520
521 Streamer.EmitLabel(KernelDescriptorSymbol);
522 Streamer.EmitBytes(StringRef(
523 (const char*)&(KernelDescriptor),
524 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
525 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
526 // expression being created is:
527 // (start of kernel code) - (start of kernel descriptor)
528 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
529 Streamer.EmitValue(MCBinaryExpr::createSub(
530 MCSymbolRefExpr::create(
531 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
532 MCSymbolRefExpr::create(
533 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
534 Context),
535 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
536 Streamer.EmitBytes(StringRef(
537 (const char*)&(KernelDescriptor) +
538 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
539 sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
540 sizeof(KernelDescriptor) -
541 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
542 sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
543 }
544