1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides AMDGPU specific target streamer methods.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPUTargetStreamer.h"
14 #include "AMDGPUPTNote.h"
15 #include "AMDKernelCodeT.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
19 #include "llvm/BinaryFormat/ELF.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCSectionELF.h"
23 #include "llvm/Support/AMDGPUMetadata.h"
24 #include "llvm/Support/AMDHSAKernelDescriptor.h"
25 #include "llvm/Support/FormattedStream.h"
26
27 using namespace llvm;
28 using namespace llvm::AMDGPU;
29
30 //===----------------------------------------------------------------------===//
31 // AMDGPUTargetStreamer
32 //===----------------------------------------------------------------------===//
33
convertIsaVersionV2(uint32_t & Major,uint32_t & Minor,uint32_t & Stepping,bool Sramecc,bool Xnack)34 static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
35 uint32_t &Stepping, bool Sramecc, bool Xnack) {
36 if (Major == 9 && Minor == 0) {
37 switch (Stepping) {
38 case 0:
39 case 2:
40 case 4:
41 case 6:
42 if (Xnack)
43 Stepping++;
44 }
45 }
46 }
47
EmitHSAMetadataV2(StringRef HSAMetadataString)48 bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
49 HSAMD::Metadata HSAMetadata;
50 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
51 return false;
52 return EmitHSAMetadata(HSAMetadata);
53 }
54
EmitHSAMetadataV3(StringRef HSAMetadataString)55 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
56 msgpack::Document HSAMetadataDoc;
57 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
58 return false;
59 return EmitHSAMetadata(HSAMetadataDoc, false);
60 }
61
getArchNameFromElfMach(unsigned ElfMach)62 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
63 AMDGPU::GPUKind AK;
64
65 switch (ElfMach) {
66 default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
67 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
68 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
69 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
70 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
71 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
72 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
73 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
74 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
75 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
76 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
77 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
78 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
79 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
80 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
81 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
82 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
113 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
114 }
115
116 StringRef GPUName = getArchNameAMDGCN(AK);
117 if (GPUName != "")
118 return GPUName;
119 return getArchNameR600(AK);
120 }
121
getElfMach(StringRef GPU)122 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
123 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
124 if (AK == AMDGPU::GPUKind::GK_NONE)
125 AK = parseArchR600(GPU);
126
127 switch (AK) {
128 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
129 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
130 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
131 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
132 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
133 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
134 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
135 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
136 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
137 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
138 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
139 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
140 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
141 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
142 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
143 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
144 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
145 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
146 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
147 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
148 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
149 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
150 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
151 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
152 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
153 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
154 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
155 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
156 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
157 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
158 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
159 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
160 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
161 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
162 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
163 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
164 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
165 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
166 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
167 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
168 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
169 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
170 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
171 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
172 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
173 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
174 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
175 }
176
177 llvm_unreachable("unknown GPU");
178 }
179
180 //===----------------------------------------------------------------------===//
181 // AMDGPUTargetAsmStreamer
182 //===----------------------------------------------------------------------===//
183
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)184 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
185 formatted_raw_ostream &OS)
186 : AMDGPUTargetStreamer(S), OS(OS) { }
187
188 // A hook for emitting stuff at the end.
189 // We use it for emitting the accumulated PAL metadata as directives.
190 // The PAL metadata is reset after it is emitted.
finish()191 void AMDGPUTargetAsmStreamer::finish() {
192 std::string S;
193 getPALMetadata()->toString(S);
194 OS << S;
195
196 // Reset the pal metadata so its data will not affect a compilation that
197 // reuses this object.
198 getPALMetadata()->reset();
199 }
200
EmitDirectiveAMDGCNTarget()201 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
202 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
203 }
204
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)205 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
206 uint32_t Major, uint32_t Minor) {
207 OS << "\t.hsa_code_object_version " <<
208 Twine(Major) << "," << Twine(Minor) << '\n';
209 }
210
211 void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)212 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
213 uint32_t Minor,
214 uint32_t Stepping,
215 StringRef VendorName,
216 StringRef ArchName) {
217 convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
218 OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
219 << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
220 }
221
222 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)223 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
224 OS << "\t.amd_kernel_code_t\n";
225 dumpAmdKernelCode(&Header, OS, "\t\t");
226 OS << "\t.end_amd_kernel_code_t\n";
227 }
228
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)229 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
230 unsigned Type) {
231 switch (Type) {
232 default: llvm_unreachable("Invalid AMDGPU symbol type");
233 case ELF::STT_AMDGPU_HSA_KERNEL:
234 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
235 break;
236 }
237 }
238
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)239 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
240 Align Alignment) {
241 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
242 << Alignment.value() << '\n';
243 }
244
EmitISAVersion()245 bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
246 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
247 return true;
248 }
249
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)250 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
251 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
252 std::string HSAMetadataString;
253 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
254 return false;
255
256 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
257 OS << HSAMetadataString << '\n';
258 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
259 return true;
260 }
261
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)262 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
263 msgpack::Document &HSAMetadataDoc, bool Strict) {
264 HSAMD::V3::MetadataVerifier Verifier(Strict);
265 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
266 return false;
267
268 std::string HSAMetadataString;
269 raw_string_ostream StrOS(HSAMetadataString);
270 HSAMetadataDoc.toYAML(StrOS);
271
272 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
273 OS << StrOS.str() << '\n';
274 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
275 return true;
276 }
277
EmitCodeEnd(const MCSubtargetInfo & STI)278 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
279 const uint32_t Encoded_s_code_end = 0xbf9f0000;
280 const uint32_t Encoded_s_nop = 0xbf800000;
281 uint32_t Encoded_pad = Encoded_s_code_end;
282
283 // Instruction cache line size in bytes.
284 const unsigned Log2CacheLineSize = 6;
285 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
286
287 // Extra padding amount in bytes to support prefetch mode 3.
288 unsigned FillSize = 3 * CacheLineSize;
289
290 if (AMDGPU::isGFX90A(STI)) {
291 Encoded_pad = Encoded_s_nop;
292 FillSize = 16 * CacheLineSize;
293 }
294
295 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
296 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
297 return true;
298 }
299
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KD,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)300 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
301 const MCSubtargetInfo &STI, StringRef KernelName,
302 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
303 bool ReserveVCC, bool ReserveFlatScr) {
304 IsaVersion IVersion = getIsaVersion(STI.getCPU());
305
306 OS << "\t.amdhsa_kernel " << KernelName << '\n';
307
308 #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
309 STREAM << "\t\t" << DIRECTIVE << " " \
310 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
311
312 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
313 << '\n';
314 OS << "\t\t.amdhsa_private_segment_fixed_size "
315 << KD.private_segment_fixed_size << '\n';
316 OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
317
318 if (!hasArchitectedFlatScratch(STI))
319 PRINT_FIELD(
320 OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
321 kernel_code_properties,
322 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
323 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
324 kernel_code_properties,
325 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
326 PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
327 kernel_code_properties,
328 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
329 PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
330 kernel_code_properties,
331 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
332 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
333 kernel_code_properties,
334 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
335 if (!hasArchitectedFlatScratch(STI))
336 PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
337 kernel_code_properties,
338 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
339 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
340 kernel_code_properties,
341 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
342 if (IVersion.Major >= 10)
343 PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
344 kernel_code_properties,
345 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
346 PRINT_FIELD(OS,
347 (hasArchitectedFlatScratch(STI)
348 ? ".amdhsa_enable_private_segment"
349 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
350 KD, compute_pgm_rsrc2,
351 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
352 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
353 compute_pgm_rsrc2,
354 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
355 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
356 compute_pgm_rsrc2,
357 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
358 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
359 compute_pgm_rsrc2,
360 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
361 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
362 compute_pgm_rsrc2,
363 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
364 PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
365 compute_pgm_rsrc2,
366 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
367
368 // These directives are required.
369 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
370 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
371
372 if (AMDGPU::isGFX90A(STI))
373 OS << "\t\t.amdhsa_accum_offset " <<
374 (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
375 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
376 << '\n';
377
378 if (!ReserveVCC)
379 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
380 if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI))
381 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
382
383 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
384 switch (*HsaAbiVer) {
385 default:
386 break;
387 case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
388 break;
389 case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
390 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
391 if (getTargetID()->isXnackSupported())
392 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
393 break;
394 }
395 }
396
397 PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
398 compute_pgm_rsrc1,
399 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
400 PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
401 compute_pgm_rsrc1,
402 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
403 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
404 compute_pgm_rsrc1,
405 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
406 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
407 compute_pgm_rsrc1,
408 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
409 PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
410 compute_pgm_rsrc1,
411 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
412 PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
413 compute_pgm_rsrc1,
414 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
415 if (IVersion.Major >= 9)
416 PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
417 compute_pgm_rsrc1,
418 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
419 if (AMDGPU::isGFX90A(STI))
420 PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
421 compute_pgm_rsrc3,
422 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
423 if (IVersion.Major >= 10) {
424 PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
425 compute_pgm_rsrc1,
426 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
427 PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
428 compute_pgm_rsrc1,
429 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
430 PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
431 compute_pgm_rsrc1,
432 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
433 }
434 PRINT_FIELD(
435 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
436 compute_pgm_rsrc2,
437 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
438 PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
439 compute_pgm_rsrc2,
440 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
441 PRINT_FIELD(
442 OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
443 compute_pgm_rsrc2,
444 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
445 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
446 compute_pgm_rsrc2,
447 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
448 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
449 compute_pgm_rsrc2,
450 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
451 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
452 compute_pgm_rsrc2,
453 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
454 PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
455 compute_pgm_rsrc2,
456 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
457 #undef PRINT_FIELD
458
459 OS << "\t.end_amdhsa_kernel\n";
460 }
461
462 //===----------------------------------------------------------------------===//
463 // AMDGPUTargetELFStreamer
464 //===----------------------------------------------------------------------===//
465
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)466 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
467 const MCSubtargetInfo &STI)
468 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
469
getStreamer()470 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
471 return static_cast<MCELFStreamer &>(Streamer);
472 }
473
474 // A hook for emitting stuff at the end.
475 // We use it for emitting the accumulated PAL metadata as a .note record.
476 // The PAL metadata is reset after it is emitted.
finish()477 void AMDGPUTargetELFStreamer::finish() {
478 MCAssembler &MCA = getStreamer().getAssembler();
479 MCA.setELFHeaderEFlags(getEFlags());
480
481 std::string Blob;
482 const char *Vendor = getPALMetadata()->getVendor();
483 unsigned Type = getPALMetadata()->getType();
484 getPALMetadata()->toBlob(Type, Blob);
485 if (Blob.empty())
486 return;
487 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
488 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
489
490 // Reset the pal metadata so its data will not affect a compilation that
491 // reuses this object.
492 getPALMetadata()->reset();
493 }
494
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)495 void AMDGPUTargetELFStreamer::EmitNote(
496 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
497 function_ref<void(MCELFStreamer &)> EmitDesc) {
498 auto &S = getStreamer();
499 auto &Context = S.getContext();
500
501 auto NameSZ = Name.size() + 1;
502
503 unsigned NoteFlags = 0;
504 // TODO Apparently, this is currently needed for OpenCL as mentioned in
505 // https://reviews.llvm.org/D74995
506 if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
507 NoteFlags = ELF::SHF_ALLOC;
508
509 S.PushSection();
510 S.SwitchSection(
511 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
512 S.emitInt32(NameSZ); // namesz
513 S.emitValue(DescSZ, 4); // descz
514 S.emitInt32(NoteType); // type
515 S.emitBytes(Name); // name
516 S.emitValueToAlignment(4, 0, 1, 0); // padding 0
517 EmitDesc(S); // desc
518 S.emitValueToAlignment(4, 0, 1, 0); // padding 0
519 S.PopSection();
520 }
521
getEFlags()522 unsigned AMDGPUTargetELFStreamer::getEFlags() {
523 switch (STI.getTargetTriple().getArch()) {
524 default:
525 llvm_unreachable("Unsupported Arch");
526 case Triple::r600:
527 return getEFlagsR600();
528 case Triple::amdgcn:
529 return getEFlagsAMDGCN();
530 }
531 }
532
getEFlagsR600()533 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
534 assert(STI.getTargetTriple().getArch() == Triple::r600);
535
536 return getElfMach(STI.getCPU());
537 }
538
getEFlagsAMDGCN()539 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
540 assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
541
542 switch (STI.getTargetTriple().getOS()) {
543 default:
544 // TODO: Why are some tests have "mingw" listed as OS?
545 // llvm_unreachable("Unsupported OS");
546 case Triple::UnknownOS:
547 return getEFlagsUnknownOS();
548 case Triple::AMDHSA:
549 return getEFlagsAMDHSA();
550 case Triple::AMDPAL:
551 return getEFlagsAMDPAL();
552 case Triple::Mesa3D:
553 return getEFlagsMesa3D();
554 }
555 }
556
getEFlagsUnknownOS()557 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
558 // TODO: Why are some tests have "mingw" listed as OS?
559 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
560
561 return getEFlagsV3();
562 }
563
getEFlagsAMDHSA()564 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
565 assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
566
567 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
568 switch (*HsaAbiVer) {
569 case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
570 case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
571 return getEFlagsV3();
572 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
573 return getEFlagsV4();
574 }
575 }
576
577 llvm_unreachable("HSA OS ABI Version identification must be defined");
578 }
579
getEFlagsAMDPAL()580 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
581 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
582
583 return getEFlagsV3();
584 }
585
getEFlagsMesa3D()586 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
587 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
588
589 return getEFlagsV3();
590 }
591
getEFlagsV3()592 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
593 unsigned EFlagsV3 = 0;
594
595 // mach.
596 EFlagsV3 |= getElfMach(STI.getCPU());
597
598 // xnack.
599 if (getTargetID()->isXnackOnOrAny())
600 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
601 // sramecc.
602 if (getTargetID()->isSramEccOnOrAny())
603 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
604
605 return EFlagsV3;
606 }
607
getEFlagsV4()608 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
609 unsigned EFlagsV4 = 0;
610
611 // mach.
612 EFlagsV4 |= getElfMach(STI.getCPU());
613
614 // xnack.
615 switch (getTargetID()->getXnackSetting()) {
616 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
617 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
618 break;
619 case AMDGPU::IsaInfo::TargetIDSetting::Any:
620 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
621 break;
622 case AMDGPU::IsaInfo::TargetIDSetting::Off:
623 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
624 break;
625 case AMDGPU::IsaInfo::TargetIDSetting::On:
626 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
627 break;
628 }
629 // sramecc.
630 switch (getTargetID()->getSramEccSetting()) {
631 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
632 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
633 break;
634 case AMDGPU::IsaInfo::TargetIDSetting::Any:
635 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
636 break;
637 case AMDGPU::IsaInfo::TargetIDSetting::Off:
638 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
639 break;
640 case AMDGPU::IsaInfo::TargetIDSetting::On:
641 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
642 break;
643 }
644
645 return EFlagsV4;
646 }
647
EmitDirectiveAMDGCNTarget()648 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
649
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)650 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
651 uint32_t Major, uint32_t Minor) {
652
653 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
654 ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
655 OS.emitInt32(Major);
656 OS.emitInt32(Minor);
657 });
658 }
659
660 void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)661 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
662 uint32_t Minor,
663 uint32_t Stepping,
664 StringRef VendorName,
665 StringRef ArchName) {
666 uint16_t VendorNameSize = VendorName.size() + 1;
667 uint16_t ArchNameSize = ArchName.size() + 1;
668
669 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
670 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
671 VendorNameSize + ArchNameSize;
672
673 convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
674 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
675 ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
676 OS.emitInt16(VendorNameSize);
677 OS.emitInt16(ArchNameSize);
678 OS.emitInt32(Major);
679 OS.emitInt32(Minor);
680 OS.emitInt32(Stepping);
681 OS.emitBytes(VendorName);
682 OS.emitInt8(0); // NULL terminate VendorName
683 OS.emitBytes(ArchName);
684 OS.emitInt8(0); // NULL terminte ArchName
685 });
686 }
687
688 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)689 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
690
691 MCStreamer &OS = getStreamer();
692 OS.PushSection();
693 OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
694 OS.PopSection();
695 }
696
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)697 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
698 unsigned Type) {
699 MCSymbolELF *Symbol = cast<MCSymbolELF>(
700 getStreamer().getContext().getOrCreateSymbol(SymbolName));
701 Symbol->setType(Type);
702 }
703
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)704 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
705 Align Alignment) {
706 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
707 SymbolELF->setType(ELF::STT_OBJECT);
708
709 if (!SymbolELF->isBindingSet()) {
710 SymbolELF->setBinding(ELF::STB_GLOBAL);
711 SymbolELF->setExternal(true);
712 }
713
714 if (SymbolELF->declareCommon(Size, Alignment.value(), true)) {
715 report_fatal_error("Symbol: " + Symbol->getName() +
716 " redeclared as different type");
717 }
718
719 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
720 SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
721 }
722
EmitISAVersion()723 bool AMDGPUTargetELFStreamer::EmitISAVersion() {
724 // Create two labels to mark the beginning and end of the desc field
725 // and a MCExpr to calculate the size of the desc field.
726 auto &Context = getContext();
727 auto *DescBegin = Context.createTempSymbol();
728 auto *DescEnd = Context.createTempSymbol();
729 auto *DescSZ = MCBinaryExpr::createSub(
730 MCSymbolRefExpr::create(DescEnd, Context),
731 MCSymbolRefExpr::create(DescBegin, Context), Context);
732
733 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
734 [&](MCELFStreamer &OS) {
735 OS.emitLabel(DescBegin);
736 OS.emitBytes(getTargetID()->toString());
737 OS.emitLabel(DescEnd);
738 });
739 return true;
740 }
741
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)742 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
743 bool Strict) {
744 HSAMD::V3::MetadataVerifier Verifier(Strict);
745 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
746 return false;
747
748 std::string HSAMetadataString;
749 HSAMetadataDoc.writeToBlob(HSAMetadataString);
750
751 // Create two labels to mark the beginning and end of the desc field
752 // and a MCExpr to calculate the size of the desc field.
753 auto &Context = getContext();
754 auto *DescBegin = Context.createTempSymbol();
755 auto *DescEnd = Context.createTempSymbol();
756 auto *DescSZ = MCBinaryExpr::createSub(
757 MCSymbolRefExpr::create(DescEnd, Context),
758 MCSymbolRefExpr::create(DescBegin, Context), Context);
759
760 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
761 [&](MCELFStreamer &OS) {
762 OS.emitLabel(DescBegin);
763 OS.emitBytes(HSAMetadataString);
764 OS.emitLabel(DescEnd);
765 });
766 return true;
767 }
768
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)769 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
770 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
771 std::string HSAMetadataString;
772 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
773 return false;
774
775 // Create two labels to mark the beginning and end of the desc field
776 // and a MCExpr to calculate the size of the desc field.
777 auto &Context = getContext();
778 auto *DescBegin = Context.createTempSymbol();
779 auto *DescEnd = Context.createTempSymbol();
780 auto *DescSZ = MCBinaryExpr::createSub(
781 MCSymbolRefExpr::create(DescEnd, Context),
782 MCSymbolRefExpr::create(DescBegin, Context), Context);
783
784 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
785 [&](MCELFStreamer &OS) {
786 OS.emitLabel(DescBegin);
787 OS.emitBytes(HSAMetadataString);
788 OS.emitLabel(DescEnd);
789 });
790 return true;
791 }
792
EmitCodeEnd(const MCSubtargetInfo & STI)793 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
794 const uint32_t Encoded_s_code_end = 0xbf9f0000;
795 const uint32_t Encoded_s_nop = 0xbf800000;
796 uint32_t Encoded_pad = Encoded_s_code_end;
797
798 // Instruction cache line size in bytes.
799 const unsigned Log2CacheLineSize = 6;
800 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
801
802 // Extra padding amount in bytes to support prefetch mode 3.
803 unsigned FillSize = 3 * CacheLineSize;
804
805 if (AMDGPU::isGFX90A(STI)) {
806 Encoded_pad = Encoded_s_nop;
807 FillSize = 16 * CacheLineSize;
808 }
809
810 MCStreamer &OS = getStreamer();
811 OS.PushSection();
812 OS.emitValueToAlignment(CacheLineSize, Encoded_pad, 4);
813 for (unsigned I = 0; I < FillSize; I += 4)
814 OS.emitInt32(Encoded_pad);
815 OS.PopSection();
816 return true;
817 }
818
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KernelDescriptor,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)819 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
820 const MCSubtargetInfo &STI, StringRef KernelName,
821 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
822 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
823 auto &Streamer = getStreamer();
824 auto &Context = Streamer.getContext();
825
826 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
827 Context.getOrCreateSymbol(Twine(KernelName)));
828 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
829 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
830
831 // Copy kernel descriptor symbol's binding, other and visibility from the
832 // kernel code symbol.
833 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
834 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
835 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
836 // Kernel descriptor symbol's type and size are fixed.
837 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
838 KernelDescriptorSymbol->setSize(
839 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
840
841 // The visibility of the kernel code symbol must be protected or less to allow
842 // static relocations from the kernel descriptor to be used.
843 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
844 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
845
846 Streamer.emitLabel(KernelDescriptorSymbol);
847 Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
848 Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
849 Streamer.emitInt32(KernelDescriptor.kernarg_size);
850
851 for (uint8_t Res : KernelDescriptor.reserved0)
852 Streamer.emitInt8(Res);
853
854 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
855 // expression being created is:
856 // (start of kernel code) - (start of kernel descriptor)
857 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
858 Streamer.emitValue(MCBinaryExpr::createSub(
859 MCSymbolRefExpr::create(
860 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
861 MCSymbolRefExpr::create(
862 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
863 Context),
864 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
865 for (uint8_t Res : KernelDescriptor.reserved1)
866 Streamer.emitInt8(Res);
867 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
868 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
869 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
870 Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
871 for (uint8_t Res : KernelDescriptor.reserved2)
872 Streamer.emitInt8(Res);
873 }
874