1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23 
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25                                                      const DataLayout &DL) {
26   Type *Ty = Arg.getType();
27   MaybeAlign ArgAlign;
28   if (Arg.hasByRefAttr()) {
29     Ty = Arg.getParamByRefType();
30     ArgAlign = Arg.getParamAlign();
31   }
32 
33   if (!ArgAlign)
34     ArgAlign = DL.getABITypeAlign(Ty);
35 
36   return std::make_pair(Ty, *ArgAlign);
37 }
38 
39 namespace llvm {
40 
41 static cl::opt<bool> DumpHSAMetadata(
42     "amdgpu-dump-hsa-metadata",
43     cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45     "amdgpu-verify-hsa-metadata",
46     cl::desc("Verify AMDGPU HSA Metadata"));
47 
48 namespace AMDGPU {
49 namespace HSAMD {
50 
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57 
verify(StringRef HSAMetadataString) const58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59   errs() << "AMDGPU HSA Metadata Parser Test: ";
60 
61   HSAMD::Metadata FromHSAMetadataString;
62   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63     errs() << "FAIL\n";
64     return;
65   }
66 
67   std::string ToHSAMetadataString;
68   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69     errs() << "FAIL\n";
70     return;
71   }
72 
73   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74          << '\n';
75   if (HSAMetadataString != ToHSAMetadataString) {
76     errs() << "Original input: " << HSAMetadataString << '\n'
77            << "Produced output: " << ToHSAMetadataString << '\n';
78   }
79 }
80 
81 AccessQualifier
getAccessQualifier(StringRef AccQual) const82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83   if (AccQual.empty())
84     return AccessQualifier::Unknown;
85 
86   return StringSwitch<AccessQualifier>(AccQual)
87              .Case("read_only",  AccessQualifier::ReadOnly)
88              .Case("write_only", AccessQualifier::WriteOnly)
89              .Case("read_write", AccessQualifier::ReadWrite)
90              .Default(AccessQualifier::Default);
91 }
92 
93 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const94 MetadataStreamerV2::getAddressSpaceQualifier(
95     unsigned AddressSpace) const {
96   switch (AddressSpace) {
97   case AMDGPUAS::PRIVATE_ADDRESS:
98     return AddressSpaceQualifier::Private;
99   case AMDGPUAS::GLOBAL_ADDRESS:
100     return AddressSpaceQualifier::Global;
101   case AMDGPUAS::CONSTANT_ADDRESS:
102     return AddressSpaceQualifier::Constant;
103   case AMDGPUAS::LOCAL_ADDRESS:
104     return AddressSpaceQualifier::Local;
105   case AMDGPUAS::FLAT_ADDRESS:
106     return AddressSpaceQualifier::Generic;
107   case AMDGPUAS::REGION_ADDRESS:
108     return AddressSpaceQualifier::Region;
109   default:
110     return AddressSpaceQualifier::Unknown;
111   }
112 }
113 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115                                            StringRef BaseTypeName) const {
116   if (TypeQual.find("pipe") != StringRef::npos)
117     return ValueKind::Pipe;
118 
119   return StringSwitch<ValueKind>(BaseTypeName)
120              .Case("image1d_t", ValueKind::Image)
121              .Case("image1d_array_t", ValueKind::Image)
122              .Case("image1d_buffer_t", ValueKind::Image)
123              .Case("image2d_t", ValueKind::Image)
124              .Case("image2d_array_t", ValueKind::Image)
125              .Case("image2d_array_depth_t", ValueKind::Image)
126              .Case("image2d_array_msaa_t", ValueKind::Image)
127              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128              .Case("image2d_depth_t", ValueKind::Image)
129              .Case("image2d_msaa_t", ValueKind::Image)
130              .Case("image2d_msaa_depth_t", ValueKind::Image)
131              .Case("image3d_t", ValueKind::Image)
132              .Case("sampler_t", ValueKind::Sampler)
133              .Case("queue_t", ValueKind::Queue)
134              .Default(isa<PointerType>(Ty) ?
135                           (Ty->getPointerAddressSpace() ==
136                            AMDGPUAS::LOCAL_ADDRESS ?
137                            ValueKind::DynamicSharedPointer :
138                            ValueKind::GlobalBuffer) :
139                       ValueKind::ByValue);
140 }
141 
getTypeName(Type * Ty,bool Signed) const142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143   switch (Ty->getTypeID()) {
144   case Type::IntegerTyID: {
145     if (!Signed)
146       return (Twine('u') + getTypeName(Ty, true)).str();
147 
148     auto BitWidth = Ty->getIntegerBitWidth();
149     switch (BitWidth) {
150     case 8:
151       return "char";
152     case 16:
153       return "short";
154     case 32:
155       return "int";
156     case 64:
157       return "long";
158     default:
159       return (Twine('i') + Twine(BitWidth)).str();
160     }
161   }
162   case Type::HalfTyID:
163     return "half";
164   case Type::FloatTyID:
165     return "float";
166   case Type::DoubleTyID:
167     return "double";
168   case Type::FixedVectorTyID: {
169     auto VecTy = cast<FixedVectorType>(Ty);
170     auto ElTy = VecTy->getElementType();
171     auto NumElements = VecTy->getNumElements();
172     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173   }
174   default:
175     return "unknown";
176   }
177 }
178 
179 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181   std::vector<uint32_t> Dims;
182   if (Node->getNumOperands() != 3)
183     return Dims;
184 
185   for (auto &Op : Node->operands())
186     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187   return Dims;
188 }
189 
190 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192                                     const SIProgramInfo &ProgramInfo) const {
193   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
194   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
195   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196   const Function &F = MF.getFunction();
197 
198   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199          F.getCallingConv() == CallingConv::SPIR_KERNEL);
200 
201   Align MaxKernArgAlign;
202   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203                                                                MaxKernArgAlign);
204   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
205   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
206   HSACodeProps.mKernargSegmentAlign =
207       std::max(MaxKernArgAlign, Align(4)).value();
208   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
209   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
210   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
211   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
212   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
213   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
214   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
215   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
216 
217   return HSACodeProps;
218 }
219 
220 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const221 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
222                                      const SIProgramInfo &ProgramInfo) const {
223   return HSAMD::Kernel::DebugProps::Metadata();
224 }
225 
emitVersion()226 void MetadataStreamerV2::emitVersion() {
227   auto &Version = HSAMetadata.mVersion;
228 
229   Version.push_back(VersionMajorV2);
230   Version.push_back(VersionMinorV2);
231 }
232 
emitPrintf(const Module & Mod)233 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
234   auto &Printf = HSAMetadata.mPrintf;
235 
236   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
237   if (!Node)
238     return;
239 
240   for (auto Op : Node->operands())
241     if (Op->getNumOperands())
242       Printf.push_back(
243           std::string(cast<MDString>(Op->getOperand(0))->getString()));
244 }
245 
emitKernelLanguage(const Function & Func)246 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
247   auto &Kernel = HSAMetadata.mKernels.back();
248 
249   // TODO: What about other languages?
250   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
251   if (!Node || !Node->getNumOperands())
252     return;
253   auto Op0 = Node->getOperand(0);
254   if (Op0->getNumOperands() <= 1)
255     return;
256 
257   Kernel.mLanguage = "OpenCL C";
258   Kernel.mLanguageVersion.push_back(
259       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
260   Kernel.mLanguageVersion.push_back(
261       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
262 }
263 
emitKernelAttrs(const Function & Func)264 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
265   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
266 
267   if (auto Node = Func.getMetadata("reqd_work_group_size"))
268     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
269   if (auto Node = Func.getMetadata("work_group_size_hint"))
270     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
271   if (auto Node = Func.getMetadata("vec_type_hint")) {
272     Attrs.mVecTypeHint = getTypeName(
273         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
274         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
275   }
276   if (Func.hasFnAttribute("runtime-handle")) {
277     Attrs.mRuntimeHandle =
278         Func.getFnAttribute("runtime-handle").getValueAsString().str();
279   }
280 }
281 
emitKernelArgs(const Function & Func)282 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
283   for (auto &Arg : Func.args())
284     emitKernelArg(Arg);
285 
286   emitHiddenKernelArgs(Func);
287 }
288 
emitKernelArg(const Argument & Arg)289 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
290   auto Func = Arg.getParent();
291   auto ArgNo = Arg.getArgNo();
292   const MDNode *Node;
293 
294   StringRef Name;
295   Node = Func->getMetadata("kernel_arg_name");
296   if (Node && ArgNo < Node->getNumOperands())
297     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
298   else if (Arg.hasName())
299     Name = Arg.getName();
300 
301   StringRef TypeName;
302   Node = Func->getMetadata("kernel_arg_type");
303   if (Node && ArgNo < Node->getNumOperands())
304     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
305 
306   StringRef BaseTypeName;
307   Node = Func->getMetadata("kernel_arg_base_type");
308   if (Node && ArgNo < Node->getNumOperands())
309     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
310 
311   StringRef AccQual;
312   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
313       Arg.hasNoAliasAttr()) {
314     AccQual = "read_only";
315   } else {
316     Node = Func->getMetadata("kernel_arg_access_qual");
317     if (Node && ArgNo < Node->getNumOperands())
318       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
319   }
320 
321   StringRef TypeQual;
322   Node = Func->getMetadata("kernel_arg_type_qual");
323   if (Node && ArgNo < Node->getNumOperands())
324     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
325 
326   const DataLayout &DL = Func->getParent()->getDataLayout();
327 
328   MaybeAlign PointeeAlign;
329   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
330     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
331       // FIXME: Should report this for all address spaces
332       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
333                                                    PtrTy->getElementType());
334     }
335   }
336 
337   Type *ArgTy;
338   Align ArgAlign;
339   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
340 
341   emitKernelArg(DL, ArgTy, ArgAlign,
342                 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
343                 TypeName, BaseTypeName, AccQual, TypeQual);
344 }
345 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)346 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
347                                        Align Alignment, ValueKind ValueKind,
348                                        MaybeAlign PointeeAlign, StringRef Name,
349                                        StringRef TypeName,
350                                        StringRef BaseTypeName,
351                                        StringRef AccQual, StringRef TypeQual) {
352   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
353   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
354 
355   Arg.mName = std::string(Name);
356   Arg.mTypeName = std::string(TypeName);
357   Arg.mSize = DL.getTypeAllocSize(Ty);
358   Arg.mAlign = Alignment.value();
359   Arg.mValueKind = ValueKind;
360   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
361 
362   if (auto PtrTy = dyn_cast<PointerType>(Ty))
363     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
364 
365   Arg.mAccQual = getAccessQualifier(AccQual);
366 
367   // TODO: Emit Arg.mActualAccQual.
368 
369   SmallVector<StringRef, 1> SplitTypeQuals;
370   TypeQual.split(SplitTypeQuals, " ", -1, false);
371   for (StringRef Key : SplitTypeQuals) {
372     auto P = StringSwitch<bool*>(Key)
373                  .Case("const",    &Arg.mIsConst)
374                  .Case("restrict", &Arg.mIsRestrict)
375                  .Case("volatile", &Arg.mIsVolatile)
376                  .Case("pipe",     &Arg.mIsPipe)
377                  .Default(nullptr);
378     if (P)
379       *P = true;
380   }
381 }
382 
emitHiddenKernelArgs(const Function & Func)383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
384   int HiddenArgNumBytes =
385       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
386 
387   if (!HiddenArgNumBytes)
388     return;
389 
390   auto &DL = Func.getParent()->getDataLayout();
391   auto Int64Ty = Type::getInt64Ty(Func.getContext());
392 
393   if (HiddenArgNumBytes >= 8)
394     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395   if (HiddenArgNumBytes >= 16)
396     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397   if (HiddenArgNumBytes >= 24)
398     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399 
400   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
401                                       AMDGPUAS::GLOBAL_ADDRESS);
402 
403   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
404   // "none" argument.
405   if (HiddenArgNumBytes >= 32) {
406     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
407       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
408     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
409       // The printf runtime binding pass should have ensured that hostcall and
410       // printf are not used in the same module.
411       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
412       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
413     } else
414       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
415   }
416 
417   // Emit "default queue" and "completion action" arguments if enqueue kernel is
418   // used, otherwise emit dummy "none" arguments.
419   if (HiddenArgNumBytes >= 48) {
420     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
421       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
422       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
423     } else {
424       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
425       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
426     }
427   }
428 
429   // Emit the pointer argument for multi-grid object.
430   if (HiddenArgNumBytes >= 56)
431     emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
432 }
433 
emitTo(AMDGPUTargetStreamer & TargetStreamer)434 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
435   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
436 }
437 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)438 void MetadataStreamerV2::begin(const Module &Mod,
439                                const IsaInfo::AMDGPUTargetID &TargetID) {
440   emitVersion();
441   emitPrintf(Mod);
442 }
443 
end()444 void MetadataStreamerV2::end() {
445   std::string HSAMetadataString;
446   if (toString(HSAMetadata, HSAMetadataString))
447     return;
448 
449   if (DumpHSAMetadata)
450     dump(HSAMetadataString);
451   if (VerifyHSAMetadata)
452     verify(HSAMetadataString);
453 }
454 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)455 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
456                                     const SIProgramInfo &ProgramInfo) {
457   auto &Func = MF.getFunction();
458   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
459     return;
460 
461   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
462   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
463 
464   HSAMetadata.mKernels.push_back(Kernel::Metadata());
465   auto &Kernel = HSAMetadata.mKernels.back();
466 
467   Kernel.mName = std::string(Func.getName());
468   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
469   emitKernelLanguage(Func);
470   emitKernelAttrs(Func);
471   emitKernelArgs(Func);
472   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
473   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
474 }
475 
476 //===----------------------------------------------------------------------===//
477 // HSAMetadataStreamerV3
478 //===----------------------------------------------------------------------===//
479 
dump(StringRef HSAMetadataString) const480 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
481   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
482 }
483 
verify(StringRef HSAMetadataString) const484 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
485   errs() << "AMDGPU HSA Metadata Parser Test: ";
486 
487   msgpack::Document FromHSAMetadataString;
488 
489   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
490     errs() << "FAIL\n";
491     return;
492   }
493 
494   std::string ToHSAMetadataString;
495   raw_string_ostream StrOS(ToHSAMetadataString);
496   FromHSAMetadataString.toYAML(StrOS);
497 
498   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
499   if (HSAMetadataString != ToHSAMetadataString) {
500     errs() << "Original input: " << HSAMetadataString << '\n'
501            << "Produced output: " << StrOS.str() << '\n';
502   }
503 }
504 
505 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const506 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
507   return StringSwitch<Optional<StringRef>>(AccQual)
508       .Case("read_only", StringRef("read_only"))
509       .Case("write_only", StringRef("write_only"))
510       .Case("read_write", StringRef("read_write"))
511       .Default(None);
512 }
513 
514 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const515 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
516   switch (AddressSpace) {
517   case AMDGPUAS::PRIVATE_ADDRESS:
518     return StringRef("private");
519   case AMDGPUAS::GLOBAL_ADDRESS:
520     return StringRef("global");
521   case AMDGPUAS::CONSTANT_ADDRESS:
522     return StringRef("constant");
523   case AMDGPUAS::LOCAL_ADDRESS:
524     return StringRef("local");
525   case AMDGPUAS::FLAT_ADDRESS:
526     return StringRef("generic");
527   case AMDGPUAS::REGION_ADDRESS:
528     return StringRef("region");
529   default:
530     return None;
531   }
532 }
533 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const534 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
535                                            StringRef BaseTypeName) const {
536   if (TypeQual.find("pipe") != StringRef::npos)
537     return "pipe";
538 
539   return StringSwitch<StringRef>(BaseTypeName)
540       .Case("image1d_t", "image")
541       .Case("image1d_array_t", "image")
542       .Case("image1d_buffer_t", "image")
543       .Case("image2d_t", "image")
544       .Case("image2d_array_t", "image")
545       .Case("image2d_array_depth_t", "image")
546       .Case("image2d_array_msaa_t", "image")
547       .Case("image2d_array_msaa_depth_t", "image")
548       .Case("image2d_depth_t", "image")
549       .Case("image2d_msaa_t", "image")
550       .Case("image2d_msaa_depth_t", "image")
551       .Case("image3d_t", "image")
552       .Case("sampler_t", "sampler")
553       .Case("queue_t", "queue")
554       .Default(isa<PointerType>(Ty)
555                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
556                           ? "dynamic_shared_pointer"
557                           : "global_buffer")
558                    : "by_value");
559 }
560 
getTypeName(Type * Ty,bool Signed) const561 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
562   switch (Ty->getTypeID()) {
563   case Type::IntegerTyID: {
564     if (!Signed)
565       return (Twine('u') + getTypeName(Ty, true)).str();
566 
567     auto BitWidth = Ty->getIntegerBitWidth();
568     switch (BitWidth) {
569     case 8:
570       return "char";
571     case 16:
572       return "short";
573     case 32:
574       return "int";
575     case 64:
576       return "long";
577     default:
578       return (Twine('i') + Twine(BitWidth)).str();
579     }
580   }
581   case Type::HalfTyID:
582     return "half";
583   case Type::FloatTyID:
584     return "float";
585   case Type::DoubleTyID:
586     return "double";
587   case Type::FixedVectorTyID: {
588     auto VecTy = cast<FixedVectorType>(Ty);
589     auto ElTy = VecTy->getElementType();
590     auto NumElements = VecTy->getNumElements();
591     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
592   }
593   default:
594     return "unknown";
595   }
596 }
597 
598 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const599 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
600   auto Dims = HSAMetadataDoc->getArrayNode();
601   if (Node->getNumOperands() != 3)
602     return Dims;
603 
604   for (auto &Op : Node->operands())
605     Dims.push_back(Dims.getDocument()->getNode(
606         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
607   return Dims;
608 }
609 
emitVersion()610 void MetadataStreamerV3::emitVersion() {
611   auto Version = HSAMetadataDoc->getArrayNode();
612   Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
613   Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
614   getRootMetadata("amdhsa.version") = Version;
615 }
616 
emitPrintf(const Module & Mod)617 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
618   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
619   if (!Node)
620     return;
621 
622   auto Printf = HSAMetadataDoc->getArrayNode();
623   for (auto Op : Node->operands())
624     if (Op->getNumOperands())
625       Printf.push_back(Printf.getDocument()->getNode(
626           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
627   getRootMetadata("amdhsa.printf") = Printf;
628 }
629 
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)630 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
631                                             msgpack::MapDocNode Kern) {
632   // TODO: What about other languages?
633   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
634   if (!Node || !Node->getNumOperands())
635     return;
636   auto Op0 = Node->getOperand(0);
637   if (Op0->getNumOperands() <= 1)
638     return;
639 
640   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
641   auto LanguageVersion = Kern.getDocument()->getArrayNode();
642   LanguageVersion.push_back(Kern.getDocument()->getNode(
643       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
644   LanguageVersion.push_back(Kern.getDocument()->getNode(
645       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
646   Kern[".language_version"] = LanguageVersion;
647 }
648 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)649 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
650                                          msgpack::MapDocNode Kern) {
651 
652   if (auto Node = Func.getMetadata("reqd_work_group_size"))
653     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
654   if (auto Node = Func.getMetadata("work_group_size_hint"))
655     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
656   if (auto Node = Func.getMetadata("vec_type_hint")) {
657     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
658         getTypeName(
659             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
660             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
661         /*Copy=*/true);
662   }
663   if (Func.hasFnAttribute("runtime-handle")) {
664     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
665         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
666         /*Copy=*/true);
667   }
668 }
669 
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)670 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
671                                         msgpack::MapDocNode Kern) {
672   unsigned Offset = 0;
673   auto Args = HSAMetadataDoc->getArrayNode();
674   for (auto &Arg : Func.args())
675     emitKernelArg(Arg, Offset, Args);
676 
677   emitHiddenKernelArgs(Func, Offset, Args);
678 
679   Kern[".args"] = Args;
680 }
681 
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)682 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
683                                        msgpack::ArrayDocNode Args) {
684   auto Func = Arg.getParent();
685   auto ArgNo = Arg.getArgNo();
686   const MDNode *Node;
687 
688   StringRef Name;
689   Node = Func->getMetadata("kernel_arg_name");
690   if (Node && ArgNo < Node->getNumOperands())
691     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
692   else if (Arg.hasName())
693     Name = Arg.getName();
694 
695   StringRef TypeName;
696   Node = Func->getMetadata("kernel_arg_type");
697   if (Node && ArgNo < Node->getNumOperands())
698     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
699 
700   StringRef BaseTypeName;
701   Node = Func->getMetadata("kernel_arg_base_type");
702   if (Node && ArgNo < Node->getNumOperands())
703     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
704 
705   StringRef AccQual;
706   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
707       Arg.hasNoAliasAttr()) {
708     AccQual = "read_only";
709   } else {
710     Node = Func->getMetadata("kernel_arg_access_qual");
711     if (Node && ArgNo < Node->getNumOperands())
712       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
713   }
714 
715   StringRef TypeQual;
716   Node = Func->getMetadata("kernel_arg_type_qual");
717   if (Node && ArgNo < Node->getNumOperands())
718     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
719 
720   const DataLayout &DL = Func->getParent()->getDataLayout();
721 
722   MaybeAlign PointeeAlign;
723   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
724 
725   // FIXME: Need to distinguish in memory alignment from pointer alignment.
726   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
727     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
728       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
729                                                    PtrTy->getElementType());
730     }
731   }
732 
733   // There's no distinction between byval aggregates and raw aggregates.
734   Type *ArgTy;
735   Align ArgAlign;
736   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
737 
738   emitKernelArg(DL, ArgTy, ArgAlign,
739                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
740                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
741 }
742 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)743 void MetadataStreamerV3::emitKernelArg(
744     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
745     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
746     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
747     StringRef AccQual, StringRef TypeQual) {
748   auto Arg = Args.getDocument()->getMapNode();
749 
750   if (!Name.empty())
751     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
752   if (!TypeName.empty())
753     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
754   auto Size = DL.getTypeAllocSize(Ty);
755   Arg[".size"] = Arg.getDocument()->getNode(Size);
756   Offset = alignTo(Offset, Alignment);
757   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
758   Offset += Size;
759   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
760   if (PointeeAlign)
761     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
762 
763   if (auto PtrTy = dyn_cast<PointerType>(Ty))
764     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
765       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
766 
767   if (auto AQ = getAccessQualifier(AccQual))
768     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
769 
770   // TODO: Emit Arg[".actual_access"].
771 
772   SmallVector<StringRef, 1> SplitTypeQuals;
773   TypeQual.split(SplitTypeQuals, " ", -1, false);
774   for (StringRef Key : SplitTypeQuals) {
775     if (Key == "const")
776       Arg[".is_const"] = Arg.getDocument()->getNode(true);
777     else if (Key == "restrict")
778       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
779     else if (Key == "volatile")
780       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
781     else if (Key == "pipe")
782       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
783   }
784 
785   Args.push_back(Arg);
786 }
787 
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)788 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
789                                               unsigned &Offset,
790                                               msgpack::ArrayDocNode Args) {
791   int HiddenArgNumBytes =
792       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
793 
794   if (!HiddenArgNumBytes)
795     return;
796 
797   auto &DL = Func.getParent()->getDataLayout();
798   auto Int64Ty = Type::getInt64Ty(Func.getContext());
799 
800   if (HiddenArgNumBytes >= 8)
801     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
802                   Args);
803   if (HiddenArgNumBytes >= 16)
804     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
805                   Args);
806   if (HiddenArgNumBytes >= 24)
807     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
808                   Args);
809 
810   auto Int8PtrTy =
811       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
812 
813   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
814   // "none" argument.
815   if (HiddenArgNumBytes >= 32) {
816     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
817       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
818                     Args);
819     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
820       // The printf runtime binding pass should have ensured that hostcall and
821       // printf are not used in the same module.
822       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
823       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
824                     Args);
825     } else
826       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
827   }
828 
829   // Emit "default queue" and "completion action" arguments if enqueue kernel is
830   // used, otherwise emit dummy "none" arguments.
831   if (HiddenArgNumBytes >= 48) {
832     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
833       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
834                     Args);
835       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
836                     Args);
837     } else {
838       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
839       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
840     }
841   }
842 
843   // Emit the pointer argument for multi-grid object.
844   if (HiddenArgNumBytes >= 56)
845     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
846                   Args);
847 }
848 
849 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const850 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
851                                       const SIProgramInfo &ProgramInfo) const {
852   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
853   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
854   const Function &F = MF.getFunction();
855 
856   auto Kern = HSAMetadataDoc->getMapNode();
857 
858   Align MaxKernArgAlign;
859   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
860       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
861   Kern[".group_segment_fixed_size"] =
862       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
863   Kern[".private_segment_fixed_size"] =
864       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
865   Kern[".kernarg_segment_align"] =
866       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
867   Kern[".wavefront_size"] =
868       Kern.getDocument()->getNode(STM.getWavefrontSize());
869   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
870   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
871   Kern[".max_flat_workgroup_size"] =
872       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
873   Kern[".sgpr_spill_count"] =
874       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
875   Kern[".vgpr_spill_count"] =
876       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
877 
878   return Kern;
879 }
880 
emitTo(AMDGPUTargetStreamer & TargetStreamer)881 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
882   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
883 }
884 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)885 void MetadataStreamerV3::begin(const Module &Mod,
886                                const IsaInfo::AMDGPUTargetID &TargetID) {
887   emitVersion();
888   emitPrintf(Mod);
889   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
890 }
891 
end()892 void MetadataStreamerV3::end() {
893   std::string HSAMetadataString;
894   raw_string_ostream StrOS(HSAMetadataString);
895   HSAMetadataDoc->toYAML(StrOS);
896 
897   if (DumpHSAMetadata)
898     dump(StrOS.str());
899   if (VerifyHSAMetadata)
900     verify(StrOS.str());
901 }
902 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)903 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
904                                     const SIProgramInfo &ProgramInfo) {
905   auto &Func = MF.getFunction();
906   auto Kern = getHSAKernelProps(MF, ProgramInfo);
907 
908   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
909          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
910 
911   auto Kernels =
912       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
913 
914   {
915     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
916     Kern[".symbol"] = Kern.getDocument()->getNode(
917         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
918     emitKernelLanguage(Func, Kern);
919     emitKernelAttrs(Func, Kern);
920     emitKernelArgs(Func, Kern);
921   }
922 
923   Kernels.push_back(Kern);
924 }
925 
926 //===----------------------------------------------------------------------===//
927 // HSAMetadataStreamerV4
928 //===----------------------------------------------------------------------===//
929 
emitVersion()930 void MetadataStreamerV4::emitVersion() {
931   auto Version = HSAMetadataDoc->getArrayNode();
932   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
933   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
934   getRootMetadata("amdhsa.version") = Version;
935 }
936 
emitTargetID(const IsaInfo::AMDGPUTargetID & TargetID)937 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
938   getRootMetadata("amdhsa.target") =
939       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
940 }
941 
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)942 void MetadataStreamerV4::begin(const Module &Mod,
943                                const IsaInfo::AMDGPUTargetID &TargetID) {
944   emitVersion();
945   emitTargetID(TargetID);
946   emitPrintf(Mod);
947   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
948 }
949 
950 } // end namespace HSAMD
951 } // end namespace AMDGPU
952 } // end namespace llvm
953