1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25                                                      const DataLayout &DL) {
26   Type *Ty = Arg.getType();
27   MaybeAlign ArgAlign;
28   if (Arg.hasByRefAttr()) {
29     Ty = Arg.getParamByRefType();
30     ArgAlign = Arg.getParamAlign();
31   }
32 
33   if (!ArgAlign)
34     ArgAlign = DL.getABITypeAlign(Ty);
35 
36   return std::pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
41 static cl::opt<bool> DumpHSAMetadata(
42     "amdgpu-dump-hsa-metadata",
43     cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45     "amdgpu-verify-hsa-metadata",
46     cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV4
53 //===----------------------------------------------------------------------===//
54 
dump(StringRef HSAMetadataString) const55 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
56   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
57 }
58 
verify(StringRef HSAMetadataString) const59 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
60   errs() << "AMDGPU HSA Metadata Parser Test: ";
61 
62   msgpack::Document FromHSAMetadataString;
63 
64   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
65     errs() << "FAIL\n";
66     return;
67   }
68 
69   std::string ToHSAMetadataString;
70   raw_string_ostream StrOS(ToHSAMetadataString);
71   FromHSAMetadataString.toYAML(StrOS);
72 
73   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
74   if (HSAMetadataString != ToHSAMetadataString) {
75     errs() << "Original input: " << HSAMetadataString << '\n'
76            << "Produced output: " << StrOS.str() << '\n';
77   }
78 }
79 
80 std::optional<StringRef>
getAccessQualifier(StringRef AccQual) const81 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
82   return StringSwitch<std::optional<StringRef>>(AccQual)
83       .Case("read_only", StringRef("read_only"))
84       .Case("write_only", StringRef("write_only"))
85       .Case("read_write", StringRef("read_write"))
86       .Default(std::nullopt);
87 }
88 
getAddressSpaceQualifier(unsigned AddressSpace) const89 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
90     unsigned AddressSpace) const {
91   switch (AddressSpace) {
92   case AMDGPUAS::PRIVATE_ADDRESS:
93     return StringRef("private");
94   case AMDGPUAS::GLOBAL_ADDRESS:
95     return StringRef("global");
96   case AMDGPUAS::CONSTANT_ADDRESS:
97     return StringRef("constant");
98   case AMDGPUAS::LOCAL_ADDRESS:
99     return StringRef("local");
100   case AMDGPUAS::FLAT_ADDRESS:
101     return StringRef("generic");
102   case AMDGPUAS::REGION_ADDRESS:
103     return StringRef("region");
104   default:
105     return std::nullopt;
106   }
107 }
108 
109 StringRef
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const110 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
111                                         StringRef BaseTypeName) const {
112   if (TypeQual.contains("pipe"))
113     return "pipe";
114 
115   return StringSwitch<StringRef>(BaseTypeName)
116       .Case("image1d_t", "image")
117       .Case("image1d_array_t", "image")
118       .Case("image1d_buffer_t", "image")
119       .Case("image2d_t", "image")
120       .Case("image2d_array_t", "image")
121       .Case("image2d_array_depth_t", "image")
122       .Case("image2d_array_msaa_t", "image")
123       .Case("image2d_array_msaa_depth_t", "image")
124       .Case("image2d_depth_t", "image")
125       .Case("image2d_msaa_t", "image")
126       .Case("image2d_msaa_depth_t", "image")
127       .Case("image3d_t", "image")
128       .Case("sampler_t", "sampler")
129       .Case("queue_t", "queue")
130       .Default(isa<PointerType>(Ty)
131                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
132                           ? "dynamic_shared_pointer"
133                           : "global_buffer")
134                    : "by_value");
135 }
136 
getTypeName(Type * Ty,bool Signed) const137 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
138                                                    bool Signed) const {
139   switch (Ty->getTypeID()) {
140   case Type::IntegerTyID: {
141     if (!Signed)
142       return (Twine('u') + getTypeName(Ty, true)).str();
143 
144     auto BitWidth = Ty->getIntegerBitWidth();
145     switch (BitWidth) {
146     case 8:
147       return "char";
148     case 16:
149       return "short";
150     case 32:
151       return "int";
152     case 64:
153       return "long";
154     default:
155       return (Twine('i') + Twine(BitWidth)).str();
156     }
157   }
158   case Type::HalfTyID:
159     return "half";
160   case Type::FloatTyID:
161     return "float";
162   case Type::DoubleTyID:
163     return "double";
164   case Type::FixedVectorTyID: {
165     auto VecTy = cast<FixedVectorType>(Ty);
166     auto ElTy = VecTy->getElementType();
167     auto NumElements = VecTy->getNumElements();
168     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
169   }
170   default:
171     return "unknown";
172   }
173 }
174 
175 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const176 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
177   auto Dims = HSAMetadataDoc->getArrayNode();
178   if (Node->getNumOperands() != 3)
179     return Dims;
180 
181   for (auto &Op : Node->operands())
182     Dims.push_back(Dims.getDocument()->getNode(
183         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
184   return Dims;
185 }
186 
emitVersion()187 void MetadataStreamerMsgPackV4::emitVersion() {
188   auto Version = HSAMetadataDoc->getArrayNode();
189   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
190   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
191   getRootMetadata("amdhsa.version") = Version;
192 }
193 
emitTargetID(const IsaInfo::AMDGPUTargetID & TargetID)194 void MetadataStreamerMsgPackV4::emitTargetID(
195     const IsaInfo::AMDGPUTargetID &TargetID) {
196   getRootMetadata("amdhsa.target") =
197       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
198 }
199 
emitPrintf(const Module & Mod)200 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
201   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
202   if (!Node)
203     return;
204 
205   auto Printf = HSAMetadataDoc->getArrayNode();
206   for (auto *Op : Node->operands())
207     if (Op->getNumOperands())
208       Printf.push_back(Printf.getDocument()->getNode(
209           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
210   getRootMetadata("amdhsa.printf") = Printf;
211 }
212 
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)213 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
214                                                    msgpack::MapDocNode Kern) {
215   // TODO: What about other languages?
216   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
217   if (!Node || !Node->getNumOperands())
218     return;
219   auto Op0 = Node->getOperand(0);
220   if (Op0->getNumOperands() <= 1)
221     return;
222 
223   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
224   auto LanguageVersion = Kern.getDocument()->getArrayNode();
225   LanguageVersion.push_back(Kern.getDocument()->getNode(
226       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
227   LanguageVersion.push_back(Kern.getDocument()->getNode(
228       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
229   Kern[".language_version"] = LanguageVersion;
230 }
231 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)232 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
233                                                 msgpack::MapDocNode Kern) {
234 
235   if (auto Node = Func.getMetadata("reqd_work_group_size"))
236     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
237   if (auto Node = Func.getMetadata("work_group_size_hint"))
238     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
239   if (auto Node = Func.getMetadata("vec_type_hint")) {
240     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
241         getTypeName(
242             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
243             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
244         /*Copy=*/true);
245   }
246   if (Func.hasFnAttribute("runtime-handle")) {
247     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
248         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
249         /*Copy=*/true);
250   }
251   if (Func.hasFnAttribute("device-init"))
252     Kern[".kind"] = Kern.getDocument()->getNode("init");
253   else if (Func.hasFnAttribute("device-fini"))
254     Kern[".kind"] = Kern.getDocument()->getNode("fini");
255 }
256 
emitKernelArgs(const MachineFunction & MF,msgpack::MapDocNode Kern)257 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
258                                                msgpack::MapDocNode Kern) {
259   auto &Func = MF.getFunction();
260   unsigned Offset = 0;
261   auto Args = HSAMetadataDoc->getArrayNode();
262   for (auto &Arg : Func.args())
263     emitKernelArg(Arg, Offset, Args);
264 
265   emitHiddenKernelArgs(MF, Offset, Args);
266 
267   Kern[".args"] = Args;
268 }
269 
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)270 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
271                                               unsigned &Offset,
272                                               msgpack::ArrayDocNode Args) {
273   auto Func = Arg.getParent();
274   auto ArgNo = Arg.getArgNo();
275   const MDNode *Node;
276 
277   StringRef Name;
278   Node = Func->getMetadata("kernel_arg_name");
279   if (Node && ArgNo < Node->getNumOperands())
280     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
281   else if (Arg.hasName())
282     Name = Arg.getName();
283 
284   StringRef TypeName;
285   Node = Func->getMetadata("kernel_arg_type");
286   if (Node && ArgNo < Node->getNumOperands())
287     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
288 
289   StringRef BaseTypeName;
290   Node = Func->getMetadata("kernel_arg_base_type");
291   if (Node && ArgNo < Node->getNumOperands())
292     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
293 
294   StringRef ActAccQual;
295   // Do we really need NoAlias check here?
296   if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
297     if (Arg.onlyReadsMemory())
298       ActAccQual = "read_only";
299     else if (Arg.hasAttribute(Attribute::WriteOnly))
300       ActAccQual = "write_only";
301   }
302 
303   StringRef AccQual;
304   Node = Func->getMetadata("kernel_arg_access_qual");
305   if (Node && ArgNo < Node->getNumOperands())
306     AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
307 
308   StringRef TypeQual;
309   Node = Func->getMetadata("kernel_arg_type_qual");
310   if (Node && ArgNo < Node->getNumOperands())
311     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 
313   const DataLayout &DL = Func->getParent()->getDataLayout();
314 
315   MaybeAlign PointeeAlign;
316   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
317 
318   // FIXME: Need to distinguish in memory alignment from pointer alignment.
319   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
320     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
321       PointeeAlign = Arg.getParamAlign().valueOrOne();
322   }
323 
324   // There's no distinction between byval aggregates and raw aggregates.
325   Type *ArgTy;
326   Align ArgAlign;
327   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
328 
329   emitKernelArg(DL, ArgTy, ArgAlign,
330                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
331                 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
332                 AccQual, TypeQual);
333 }
334 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef ActAccQual,StringRef AccQual,StringRef TypeQual)335 void MetadataStreamerMsgPackV4::emitKernelArg(
336     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
337     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
338     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
339     StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
340   auto Arg = Args.getDocument()->getMapNode();
341 
342   if (!Name.empty())
343     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
344   if (!TypeName.empty())
345     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
346   auto Size = DL.getTypeAllocSize(Ty);
347   Arg[".size"] = Arg.getDocument()->getNode(Size);
348   Offset = alignTo(Offset, Alignment);
349   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
350   Offset += Size;
351   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
352   if (PointeeAlign)
353     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
354 
355   if (auto PtrTy = dyn_cast<PointerType>(Ty))
356     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
357       // Limiting address space to emit only for a certain ValueKind.
358       if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
359         Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
360                                                            /*Copy=*/true);
361 
362   if (auto AQ = getAccessQualifier(AccQual))
363     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
364 
365   if (auto AAQ = getAccessQualifier(ActAccQual))
366     Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
367 
368   SmallVector<StringRef, 1> SplitTypeQuals;
369   TypeQual.split(SplitTypeQuals, " ", -1, false);
370   for (StringRef Key : SplitTypeQuals) {
371     if (Key == "const")
372       Arg[".is_const"] = Arg.getDocument()->getNode(true);
373     else if (Key == "restrict")
374       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
375     else if (Key == "volatile")
376       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
377     else if (Key == "pipe")
378       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
379   }
380 
381   Args.push_back(Arg);
382 }
383 
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)384 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
385     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
386   auto &Func = MF.getFunction();
387   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
388 
389   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
390   if (!HiddenArgNumBytes)
391     return;
392 
393   const Module *M = Func.getParent();
394   auto &DL = M->getDataLayout();
395   auto Int64Ty = Type::getInt64Ty(Func.getContext());
396 
397   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
398 
399   if (HiddenArgNumBytes >= 8)
400     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
401                   Args);
402   if (HiddenArgNumBytes >= 16)
403     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
404                   Args);
405   if (HiddenArgNumBytes >= 24)
406     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
407                   Args);
408 
409   auto Int8PtrTy =
410       PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
411 
412   if (HiddenArgNumBytes >= 32) {
413     // We forbid the use of features requiring hostcall when compiling OpenCL
414     // before code object V5, which makes the mutual exclusion between the
415     // "printf buffer" and "hostcall buffer" here sound.
416     if (M->getNamedMetadata("llvm.printf.fmts"))
417       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
418                     Args);
419     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
420       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
421                     Args);
422     else
423       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
424   }
425 
426   // Emit "default queue" and "completion action" arguments if enqueue kernel is
427   // used, otherwise emit dummy "none" arguments.
428   if (HiddenArgNumBytes >= 40) {
429     if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
430       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
431                     Args);
432     } else {
433       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
434     }
435   }
436 
437   if (HiddenArgNumBytes >= 48) {
438     if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
439       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
440                     Args);
441     } else {
442       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
443     }
444   }
445 
446   // Emit the pointer argument for multi-grid object.
447   if (HiddenArgNumBytes >= 56) {
448     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
449       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
450                     Args);
451     } else {
452       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
453     }
454   }
455 }
456 
457 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo,unsigned CodeObjectVersion) const458 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
459                                              const SIProgramInfo &ProgramInfo,
460                                              unsigned CodeObjectVersion) const {
461   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
462   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
463   const Function &F = MF.getFunction();
464 
465   auto Kern = HSAMetadataDoc->getMapNode();
466 
467   Align MaxKernArgAlign;
468   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
469       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
470   Kern[".group_segment_fixed_size"] =
471       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
472   Kern[".private_segment_fixed_size"] =
473       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
474   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
475     Kern[".uses_dynamic_stack"] =
476         Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
477 
478   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
479     Kern[".workgroup_processor_mode"] =
480         Kern.getDocument()->getNode(ProgramInfo.WgpMode);
481 
482   // FIXME: The metadata treats the minimum as 16?
483   Kern[".kernarg_segment_align"] =
484       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
485   Kern[".wavefront_size"] =
486       Kern.getDocument()->getNode(STM.getWavefrontSize());
487   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
488   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
489 
490   // Only add AGPR count to metadata for supported devices
491   if (STM.hasMAIInsts()) {
492     Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
493   }
494 
495   Kern[".max_flat_workgroup_size"] =
496       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
497   Kern[".sgpr_spill_count"] =
498       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
499   Kern[".vgpr_spill_count"] =
500       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
501 
502   return Kern;
503 }
504 
emitTo(AMDGPUTargetStreamer & TargetStreamer)505 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
506   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
507 }
508 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)509 void MetadataStreamerMsgPackV4::begin(const Module &Mod,
510                                       const IsaInfo::AMDGPUTargetID &TargetID) {
511   emitVersion();
512   emitTargetID(TargetID);
513   emitPrintf(Mod);
514   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
515 }
516 
end()517 void MetadataStreamerMsgPackV4::end() {
518   std::string HSAMetadataString;
519   raw_string_ostream StrOS(HSAMetadataString);
520   HSAMetadataDoc->toYAML(StrOS);
521 
522   if (DumpHSAMetadata)
523     dump(StrOS.str());
524   if (VerifyHSAMetadata)
525     verify(StrOS.str());
526 }
527 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)528 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
529                                            const SIProgramInfo &ProgramInfo) {
530   auto &Func = MF.getFunction();
531   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
532       Func.getCallingConv() != CallingConv::SPIR_KERNEL)
533     return;
534 
535   auto CodeObjectVersion =
536       AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
537   auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
538 
539   auto Kernels =
540       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
541 
542   {
543     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
544     Kern[".symbol"] = Kern.getDocument()->getNode(
545         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
546     emitKernelLanguage(Func, Kern);
547     emitKernelAttrs(Func, Kern);
548     emitKernelArgs(MF, Kern);
549   }
550 
551   Kernels.push_back(Kern);
552 }
553 
554 //===----------------------------------------------------------------------===//
555 // HSAMetadataStreamerV5
556 //===----------------------------------------------------------------------===//
557 
emitVersion()558 void MetadataStreamerMsgPackV5::emitVersion() {
559   auto Version = HSAMetadataDoc->getArrayNode();
560   Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
561   Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
562   getRootMetadata("amdhsa.version") = Version;
563 }
564 
emitHiddenKernelArgs(const MachineFunction & MF,unsigned & Offset,msgpack::ArrayDocNode Args)565 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
566     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
567   auto &Func = MF.getFunction();
568   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
569 
570   // No implicit kernel argument is used.
571   if (ST.getImplicitArgNumBytes(Func) == 0)
572     return;
573 
574   const Module *M = Func.getParent();
575   auto &DL = M->getDataLayout();
576   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
577 
578   auto Int64Ty = Type::getInt64Ty(Func.getContext());
579   auto Int32Ty = Type::getInt32Ty(Func.getContext());
580   auto Int16Ty = Type::getInt16Ty(Func.getContext());
581 
582   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
583   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
584   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
585   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
586 
587   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
588   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
589   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
590 
591   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
592   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
593   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
594 
595   // Reserved for hidden_tool_correlation_id.
596   Offset += 8;
597 
598   Offset += 8; // Reserved.
599 
600   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
601   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
602   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
603 
604   emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
605 
606   Offset += 6; // Reserved.
607   auto Int8PtrTy =
608       PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
609 
610   if (M->getNamedMetadata("llvm.printf.fmts")) {
611     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
612                   Args);
613   } else {
614     Offset += 8; // Skipped.
615   }
616 
617   if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
618     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
619                   Args);
620   } else {
621     Offset += 8; // Skipped.
622   }
623 
624   if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
625     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
626                 Args);
627   } else {
628     Offset += 8; // Skipped.
629   }
630 
631   if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
632     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
633   else
634     Offset += 8; // Skipped.
635 
636   if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
637     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
638                   Args);
639   } else {
640     Offset += 8; // Skipped.
641   }
642 
643   if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
644     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
645                   Args);
646   } else {
647     Offset += 8; // Skipped.
648   }
649 
650   // Emit argument for hidden dynamic lds size
651   if (MFI.isDynamicLDSUsed()) {
652     emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
653                   Args);
654   } else {
655     Offset += 4; // skipped
656   }
657 
658   Offset += 68; // Reserved.
659 
660   // hidden_private_base and hidden_shared_base are only when the subtarget has
661   // ApertureRegs.
662   if (!ST.hasApertureRegs()) {
663     emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
664     emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
665   } else {
666     Offset += 8; // Skipped.
667   }
668 
669   if (MFI.getUserSGPRInfo().hasQueuePtr())
670     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
671 }
672 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)673 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
674                                                 msgpack::MapDocNode Kern) {
675   MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
676 
677   if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
678     Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
679 }
680 
681 
682 } // end namespace HSAMD
683 } // end namespace AMDGPU
684 } // end namespace llvm
685