1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides AMDGPU specific target streamer methods.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPUTargetStreamer.h"
14 #include "AMDGPUPTNote.h"
15 #include "AMDKernelCodeT.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
19 #include "llvm/BinaryFormat/ELF.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCSectionELF.h"
23 #include "llvm/Support/AMDGPUMetadata.h"
24 #include "llvm/Support/AMDHSAKernelDescriptor.h"
25 #include "llvm/Support/FormattedStream.h"
26
27 using namespace llvm;
28 using namespace llvm::AMDGPU;
29
30 //===----------------------------------------------------------------------===//
31 // AMDGPUTargetStreamer
32 //===----------------------------------------------------------------------===//
33
convertIsaVersionV2(uint32_t & Major,uint32_t & Minor,uint32_t & Stepping,bool Sramecc,bool Xnack)34 static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
35 uint32_t &Stepping, bool Sramecc, bool Xnack) {
36 if (Major == 9 && Minor == 0) {
37 switch (Stepping) {
38 case 0:
39 case 2:
40 case 4:
41 case 6:
42 if (Xnack)
43 Stepping++;
44 }
45 }
46 }
47
EmitHSAMetadataV2(StringRef HSAMetadataString)48 bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
49 HSAMD::Metadata HSAMetadata;
50 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
51 return false;
52 return EmitHSAMetadata(HSAMetadata);
53 }
54
EmitHSAMetadataV3(StringRef HSAMetadataString)55 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
56 msgpack::Document HSAMetadataDoc;
57 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
58 return false;
59 return EmitHSAMetadata(HSAMetadataDoc, false);
60 }
61
getArchNameFromElfMach(unsigned ElfMach)62 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
63 AMDGPU::GPUKind AK;
64
65 switch (ElfMach) {
66 default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
67 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
68 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
69 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
70 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
71 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
72 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
73 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
74 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
75 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
76 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
77 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
78 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
79 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
80 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
81 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
82 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
115 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
116 }
117
118 StringRef GPUName = getArchNameAMDGCN(AK);
119 if (GPUName != "")
120 return GPUName;
121 return getArchNameR600(AK);
122 }
123
getElfMach(StringRef GPU)124 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
125 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
126 if (AK == AMDGPU::GPUKind::GK_NONE)
127 AK = parseArchR600(GPU);
128
129 switch (AK) {
130 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
131 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
132 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
133 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
134 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
135 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
136 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
137 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
138 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
139 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
140 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
141 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
142 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
143 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
144 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
145 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
146 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
147 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
148 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
149 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
150 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
151 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
152 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
153 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
154 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
155 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
156 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
157 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
158 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
159 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
160 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
161 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
162 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
163 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
164 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
165 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
166 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
167 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
168 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
169 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
170 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
171 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
172 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
173 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
174 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
175 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
176 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
177 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
178 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
179 }
180
181 llvm_unreachable("unknown GPU");
182 }
183
184 //===----------------------------------------------------------------------===//
185 // AMDGPUTargetAsmStreamer
186 //===----------------------------------------------------------------------===//
187
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)188 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
189 formatted_raw_ostream &OS)
190 : AMDGPUTargetStreamer(S), OS(OS) { }
191
192 // A hook for emitting stuff at the end.
193 // We use it for emitting the accumulated PAL metadata as directives.
194 // The PAL metadata is reset after it is emitted.
finish()195 void AMDGPUTargetAsmStreamer::finish() {
196 std::string S;
197 getPALMetadata()->toString(S);
198 OS << S;
199
200 // Reset the pal metadata so its data will not affect a compilation that
201 // reuses this object.
202 getPALMetadata()->reset();
203 }
204
EmitDirectiveAMDGCNTarget()205 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
206 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
207 }
208
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)209 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
210 uint32_t Major, uint32_t Minor) {
211 OS << "\t.hsa_code_object_version " <<
212 Twine(Major) << "," << Twine(Minor) << '\n';
213 }
214
215 void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)216 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
217 uint32_t Minor,
218 uint32_t Stepping,
219 StringRef VendorName,
220 StringRef ArchName) {
221 convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
222 OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
223 << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
224 }
225
226 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)227 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
228 OS << "\t.amd_kernel_code_t\n";
229 dumpAmdKernelCode(&Header, OS, "\t\t");
230 OS << "\t.end_amd_kernel_code_t\n";
231 }
232
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)233 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
234 unsigned Type) {
235 switch (Type) {
236 default: llvm_unreachable("Invalid AMDGPU symbol type");
237 case ELF::STT_AMDGPU_HSA_KERNEL:
238 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
239 break;
240 }
241 }
242
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)243 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
244 Align Alignment) {
245 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
246 << Alignment.value() << '\n';
247 }
248
EmitISAVersion()249 bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
250 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
251 return true;
252 }
253
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)254 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
255 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
256 std::string HSAMetadataString;
257 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
258 return false;
259
260 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
261 OS << HSAMetadataString << '\n';
262 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
263 return true;
264 }
265
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)266 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
267 msgpack::Document &HSAMetadataDoc, bool Strict) {
268 HSAMD::V3::MetadataVerifier Verifier(Strict);
269 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
270 return false;
271
272 std::string HSAMetadataString;
273 raw_string_ostream StrOS(HSAMetadataString);
274 HSAMetadataDoc.toYAML(StrOS);
275
276 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
277 OS << StrOS.str() << '\n';
278 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
279 return true;
280 }
281
EmitCodeEnd(const MCSubtargetInfo & STI)282 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
283 const uint32_t Encoded_s_code_end = 0xbf9f0000;
284 const uint32_t Encoded_s_nop = 0xbf800000;
285 uint32_t Encoded_pad = Encoded_s_code_end;
286
287 // Instruction cache line size in bytes.
288 const unsigned Log2CacheLineSize = 6;
289 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
290
291 // Extra padding amount in bytes to support prefetch mode 3.
292 unsigned FillSize = 3 * CacheLineSize;
293
294 if (AMDGPU::isGFX90A(STI)) {
295 Encoded_pad = Encoded_s_nop;
296 FillSize = 16 * CacheLineSize;
297 }
298
299 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
300 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
301 return true;
302 }
303
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KD,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)304 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
305 const MCSubtargetInfo &STI, StringRef KernelName,
306 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
307 bool ReserveVCC, bool ReserveFlatScr) {
308 IsaVersion IVersion = getIsaVersion(STI.getCPU());
309
310 OS << "\t.amdhsa_kernel " << KernelName << '\n';
311
312 #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
313 STREAM << "\t\t" << DIRECTIVE << " " \
314 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
315
316 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
317 << '\n';
318 OS << "\t\t.amdhsa_private_segment_fixed_size "
319 << KD.private_segment_fixed_size << '\n';
320 OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
321
322 if (!hasArchitectedFlatScratch(STI))
323 PRINT_FIELD(
324 OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
325 kernel_code_properties,
326 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
327 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
328 kernel_code_properties,
329 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
330 PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
331 kernel_code_properties,
332 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
333 PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
334 kernel_code_properties,
335 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
336 PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
337 kernel_code_properties,
338 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
339 if (!hasArchitectedFlatScratch(STI))
340 PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
341 kernel_code_properties,
342 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
343 PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
344 kernel_code_properties,
345 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
346 if (IVersion.Major >= 10)
347 PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
348 kernel_code_properties,
349 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
350 PRINT_FIELD(OS,
351 (hasArchitectedFlatScratch(STI)
352 ? ".amdhsa_enable_private_segment"
353 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
354 KD, compute_pgm_rsrc2,
355 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
356 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
357 compute_pgm_rsrc2,
358 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
359 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
360 compute_pgm_rsrc2,
361 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
362 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
363 compute_pgm_rsrc2,
364 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
365 PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
366 compute_pgm_rsrc2,
367 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
368 PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
369 compute_pgm_rsrc2,
370 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
371
372 // These directives are required.
373 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
374 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
375
376 if (AMDGPU::isGFX90A(STI))
377 OS << "\t\t.amdhsa_accum_offset " <<
378 (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
379 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
380 << '\n';
381
382 if (!ReserveVCC)
383 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
384 if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI))
385 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
386
387 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
388 switch (*HsaAbiVer) {
389 default:
390 break;
391 case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
392 break;
393 case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
394 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
395 if (getTargetID()->isXnackSupported())
396 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
397 break;
398 }
399 }
400
401 PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
402 compute_pgm_rsrc1,
403 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
404 PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
405 compute_pgm_rsrc1,
406 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
407 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
408 compute_pgm_rsrc1,
409 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
410 PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
411 compute_pgm_rsrc1,
412 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
413 PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
414 compute_pgm_rsrc1,
415 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
416 PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
417 compute_pgm_rsrc1,
418 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
419 if (IVersion.Major >= 9)
420 PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
421 compute_pgm_rsrc1,
422 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
423 if (AMDGPU::isGFX90A(STI))
424 PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
425 compute_pgm_rsrc3,
426 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
427 if (IVersion.Major >= 10) {
428 PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
429 compute_pgm_rsrc1,
430 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
431 PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
432 compute_pgm_rsrc1,
433 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
434 PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
435 compute_pgm_rsrc1,
436 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
437 }
438 PRINT_FIELD(
439 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
440 compute_pgm_rsrc2,
441 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
442 PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
443 compute_pgm_rsrc2,
444 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
445 PRINT_FIELD(
446 OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
447 compute_pgm_rsrc2,
448 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
449 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
450 compute_pgm_rsrc2,
451 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
452 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
453 compute_pgm_rsrc2,
454 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
455 PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
456 compute_pgm_rsrc2,
457 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
458 PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
459 compute_pgm_rsrc2,
460 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
461 #undef PRINT_FIELD
462
463 OS << "\t.end_amdhsa_kernel\n";
464 }
465
466 //===----------------------------------------------------------------------===//
467 // AMDGPUTargetELFStreamer
468 //===----------------------------------------------------------------------===//
469
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)470 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
471 const MCSubtargetInfo &STI)
472 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
473
getStreamer()474 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
475 return static_cast<MCELFStreamer &>(Streamer);
476 }
477
478 // A hook for emitting stuff at the end.
479 // We use it for emitting the accumulated PAL metadata as a .note record.
480 // The PAL metadata is reset after it is emitted.
finish()481 void AMDGPUTargetELFStreamer::finish() {
482 MCAssembler &MCA = getStreamer().getAssembler();
483 MCA.setELFHeaderEFlags(getEFlags());
484
485 std::string Blob;
486 const char *Vendor = getPALMetadata()->getVendor();
487 unsigned Type = getPALMetadata()->getType();
488 getPALMetadata()->toBlob(Type, Blob);
489 if (Blob.empty())
490 return;
491 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
492 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
493
494 // Reset the pal metadata so its data will not affect a compilation that
495 // reuses this object.
496 getPALMetadata()->reset();
497 }
498
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)499 void AMDGPUTargetELFStreamer::EmitNote(
500 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
501 function_ref<void(MCELFStreamer &)> EmitDesc) {
502 auto &S = getStreamer();
503 auto &Context = S.getContext();
504
505 auto NameSZ = Name.size() + 1;
506
507 unsigned NoteFlags = 0;
508 // TODO Apparently, this is currently needed for OpenCL as mentioned in
509 // https://reviews.llvm.org/D74995
510 if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
511 NoteFlags = ELF::SHF_ALLOC;
512
513 S.PushSection();
514 S.SwitchSection(
515 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
516 S.emitInt32(NameSZ); // namesz
517 S.emitValue(DescSZ, 4); // descz
518 S.emitInt32(NoteType); // type
519 S.emitBytes(Name); // name
520 S.emitValueToAlignment(4, 0, 1, 0); // padding 0
521 EmitDesc(S); // desc
522 S.emitValueToAlignment(4, 0, 1, 0); // padding 0
523 S.PopSection();
524 }
525
getEFlags()526 unsigned AMDGPUTargetELFStreamer::getEFlags() {
527 switch (STI.getTargetTriple().getArch()) {
528 default:
529 llvm_unreachable("Unsupported Arch");
530 case Triple::r600:
531 return getEFlagsR600();
532 case Triple::amdgcn:
533 return getEFlagsAMDGCN();
534 }
535 }
536
getEFlagsR600()537 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
538 assert(STI.getTargetTriple().getArch() == Triple::r600);
539
540 return getElfMach(STI.getCPU());
541 }
542
getEFlagsAMDGCN()543 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
544 assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
545
546 switch (STI.getTargetTriple().getOS()) {
547 default:
548 // TODO: Why are some tests have "mingw" listed as OS?
549 // llvm_unreachable("Unsupported OS");
550 case Triple::UnknownOS:
551 return getEFlagsUnknownOS();
552 case Triple::AMDHSA:
553 return getEFlagsAMDHSA();
554 case Triple::AMDPAL:
555 return getEFlagsAMDPAL();
556 case Triple::Mesa3D:
557 return getEFlagsMesa3D();
558 }
559 }
560
getEFlagsUnknownOS()561 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
562 // TODO: Why are some tests have "mingw" listed as OS?
563 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
564
565 return getEFlagsV3();
566 }
567
getEFlagsAMDHSA()568 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
569 assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
570
571 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
572 switch (*HsaAbiVer) {
573 case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
574 case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
575 return getEFlagsV3();
576 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
577 return getEFlagsV4();
578 }
579 }
580
581 llvm_unreachable("HSA OS ABI Version identification must be defined");
582 }
583
getEFlagsAMDPAL()584 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
585 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
586
587 return getEFlagsV3();
588 }
589
getEFlagsMesa3D()590 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
591 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
592
593 return getEFlagsV3();
594 }
595
getEFlagsV3()596 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
597 unsigned EFlagsV3 = 0;
598
599 // mach.
600 EFlagsV3 |= getElfMach(STI.getCPU());
601
602 // xnack.
603 if (getTargetID()->isXnackOnOrAny())
604 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
605 // sramecc.
606 if (getTargetID()->isSramEccOnOrAny())
607 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
608
609 return EFlagsV3;
610 }
611
getEFlagsV4()612 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
613 unsigned EFlagsV4 = 0;
614
615 // mach.
616 EFlagsV4 |= getElfMach(STI.getCPU());
617
618 // xnack.
619 switch (getTargetID()->getXnackSetting()) {
620 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
621 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
622 break;
623 case AMDGPU::IsaInfo::TargetIDSetting::Any:
624 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
625 break;
626 case AMDGPU::IsaInfo::TargetIDSetting::Off:
627 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
628 break;
629 case AMDGPU::IsaInfo::TargetIDSetting::On:
630 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
631 break;
632 }
633 // sramecc.
634 switch (getTargetID()->getSramEccSetting()) {
635 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
636 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
637 break;
638 case AMDGPU::IsaInfo::TargetIDSetting::Any:
639 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
640 break;
641 case AMDGPU::IsaInfo::TargetIDSetting::Off:
642 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
643 break;
644 case AMDGPU::IsaInfo::TargetIDSetting::On:
645 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
646 break;
647 }
648
649 return EFlagsV4;
650 }
651
EmitDirectiveAMDGCNTarget()652 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
653
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)654 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
655 uint32_t Major, uint32_t Minor) {
656
657 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
658 ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
659 OS.emitInt32(Major);
660 OS.emitInt32(Minor);
661 });
662 }
663
664 void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)665 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
666 uint32_t Minor,
667 uint32_t Stepping,
668 StringRef VendorName,
669 StringRef ArchName) {
670 uint16_t VendorNameSize = VendorName.size() + 1;
671 uint16_t ArchNameSize = ArchName.size() + 1;
672
673 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
674 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
675 VendorNameSize + ArchNameSize;
676
677 convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
678 EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
679 ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
680 OS.emitInt16(VendorNameSize);
681 OS.emitInt16(ArchNameSize);
682 OS.emitInt32(Major);
683 OS.emitInt32(Minor);
684 OS.emitInt32(Stepping);
685 OS.emitBytes(VendorName);
686 OS.emitInt8(0); // NULL terminate VendorName
687 OS.emitBytes(ArchName);
688 OS.emitInt8(0); // NULL terminte ArchName
689 });
690 }
691
692 void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)693 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
694
695 MCStreamer &OS = getStreamer();
696 OS.PushSection();
697 OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
698 OS.PopSection();
699 }
700
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)701 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
702 unsigned Type) {
703 MCSymbolELF *Symbol = cast<MCSymbolELF>(
704 getStreamer().getContext().getOrCreateSymbol(SymbolName));
705 Symbol->setType(Type);
706 }
707
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)708 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
709 Align Alignment) {
710 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
711 SymbolELF->setType(ELF::STT_OBJECT);
712
713 if (!SymbolELF->isBindingSet()) {
714 SymbolELF->setBinding(ELF::STB_GLOBAL);
715 SymbolELF->setExternal(true);
716 }
717
718 if (SymbolELF->declareCommon(Size, Alignment.value(), true)) {
719 report_fatal_error("Symbol: " + Symbol->getName() +
720 " redeclared as different type");
721 }
722
723 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
724 SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
725 }
726
EmitISAVersion()727 bool AMDGPUTargetELFStreamer::EmitISAVersion() {
728 // Create two labels to mark the beginning and end of the desc field
729 // and a MCExpr to calculate the size of the desc field.
730 auto &Context = getContext();
731 auto *DescBegin = Context.createTempSymbol();
732 auto *DescEnd = Context.createTempSymbol();
733 auto *DescSZ = MCBinaryExpr::createSub(
734 MCSymbolRefExpr::create(DescEnd, Context),
735 MCSymbolRefExpr::create(DescBegin, Context), Context);
736
737 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
738 [&](MCELFStreamer &OS) {
739 OS.emitLabel(DescBegin);
740 OS.emitBytes(getTargetID()->toString());
741 OS.emitLabel(DescEnd);
742 });
743 return true;
744 }
745
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)746 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
747 bool Strict) {
748 HSAMD::V3::MetadataVerifier Verifier(Strict);
749 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
750 return false;
751
752 std::string HSAMetadataString;
753 HSAMetadataDoc.writeToBlob(HSAMetadataString);
754
755 // Create two labels to mark the beginning and end of the desc field
756 // and a MCExpr to calculate the size of the desc field.
757 auto &Context = getContext();
758 auto *DescBegin = Context.createTempSymbol();
759 auto *DescEnd = Context.createTempSymbol();
760 auto *DescSZ = MCBinaryExpr::createSub(
761 MCSymbolRefExpr::create(DescEnd, Context),
762 MCSymbolRefExpr::create(DescBegin, Context), Context);
763
764 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
765 [&](MCELFStreamer &OS) {
766 OS.emitLabel(DescBegin);
767 OS.emitBytes(HSAMetadataString);
768 OS.emitLabel(DescEnd);
769 });
770 return true;
771 }
772
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)773 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
774 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
775 std::string HSAMetadataString;
776 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
777 return false;
778
779 // Create two labels to mark the beginning and end of the desc field
780 // and a MCExpr to calculate the size of the desc field.
781 auto &Context = getContext();
782 auto *DescBegin = Context.createTempSymbol();
783 auto *DescEnd = Context.createTempSymbol();
784 auto *DescSZ = MCBinaryExpr::createSub(
785 MCSymbolRefExpr::create(DescEnd, Context),
786 MCSymbolRefExpr::create(DescBegin, Context), Context);
787
788 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
789 [&](MCELFStreamer &OS) {
790 OS.emitLabel(DescBegin);
791 OS.emitBytes(HSAMetadataString);
792 OS.emitLabel(DescEnd);
793 });
794 return true;
795 }
796
EmitCodeEnd(const MCSubtargetInfo & STI)797 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
798 const uint32_t Encoded_s_code_end = 0xbf9f0000;
799 const uint32_t Encoded_s_nop = 0xbf800000;
800 uint32_t Encoded_pad = Encoded_s_code_end;
801
802 // Instruction cache line size in bytes.
803 const unsigned Log2CacheLineSize = 6;
804 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
805
806 // Extra padding amount in bytes to support prefetch mode 3.
807 unsigned FillSize = 3 * CacheLineSize;
808
809 if (AMDGPU::isGFX90A(STI)) {
810 Encoded_pad = Encoded_s_nop;
811 FillSize = 16 * CacheLineSize;
812 }
813
814 MCStreamer &OS = getStreamer();
815 OS.PushSection();
816 OS.emitValueToAlignment(CacheLineSize, Encoded_pad, 4);
817 for (unsigned I = 0; I < FillSize; I += 4)
818 OS.emitInt32(Encoded_pad);
819 OS.PopSection();
820 return true;
821 }
822
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KernelDescriptor,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)823 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
824 const MCSubtargetInfo &STI, StringRef KernelName,
825 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
826 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
827 auto &Streamer = getStreamer();
828 auto &Context = Streamer.getContext();
829
830 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
831 Context.getOrCreateSymbol(Twine(KernelName)));
832 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
833 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
834
835 // Copy kernel descriptor symbol's binding, other and visibility from the
836 // kernel code symbol.
837 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
838 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
839 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
840 // Kernel descriptor symbol's type and size are fixed.
841 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
842 KernelDescriptorSymbol->setSize(
843 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
844
845 // The visibility of the kernel code symbol must be protected or less to allow
846 // static relocations from the kernel descriptor to be used.
847 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
848 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
849
850 Streamer.emitLabel(KernelDescriptorSymbol);
851 Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
852 Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
853 Streamer.emitInt32(KernelDescriptor.kernarg_size);
854
855 for (uint8_t Res : KernelDescriptor.reserved0)
856 Streamer.emitInt8(Res);
857
858 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
859 // expression being created is:
860 // (start of kernel code) - (start of kernel descriptor)
861 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
862 Streamer.emitValue(MCBinaryExpr::createSub(
863 MCSymbolRefExpr::create(
864 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
865 MCSymbolRefExpr::create(
866 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
867 Context),
868 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
869 for (uint8_t Res : KernelDescriptor.reserved1)
870 Streamer.emitInt8(Res);
871 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
872 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
873 Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
874 Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
875 for (uint8_t Res : KernelDescriptor.reserved2)
876 Streamer.emitInt8(Res);
877 }
878