1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45 
verify(StringRef HSAMetadataString) const46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47   errs() << "AMDGPU HSA Metadata Parser Test: ";
48 
49   HSAMD::Metadata FromHSAMetadataString;
50   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51     errs() << "FAIL\n";
52     return;
53   }
54 
55   std::string ToHSAMetadataString;
56   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57     errs() << "FAIL\n";
58     return;
59   }
60 
61   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62          << '\n';
63   if (HSAMetadataString != ToHSAMetadataString) {
64     errs() << "Original input: " << HSAMetadataString << '\n'
65            << "Produced output: " << ToHSAMetadataString << '\n';
66   }
67 }
68 
69 AccessQualifier
getAccessQualifier(StringRef AccQual) const70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71   if (AccQual.empty())
72     return AccessQualifier::Unknown;
73 
74   return StringSwitch<AccessQualifier>(AccQual)
75              .Case("read_only",  AccessQualifier::ReadOnly)
76              .Case("write_only", AccessQualifier::WriteOnly)
77              .Case("read_write", AccessQualifier::ReadWrite)
78              .Default(AccessQualifier::Default);
79 }
80 
81 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const82 MetadataStreamerV2::getAddressSpaceQualifier(
83     unsigned AddressSpace) const {
84   switch (AddressSpace) {
85   case AMDGPUAS::PRIVATE_ADDRESS:
86     return AddressSpaceQualifier::Private;
87   case AMDGPUAS::GLOBAL_ADDRESS:
88     return AddressSpaceQualifier::Global;
89   case AMDGPUAS::CONSTANT_ADDRESS:
90     return AddressSpaceQualifier::Constant;
91   case AMDGPUAS::LOCAL_ADDRESS:
92     return AddressSpaceQualifier::Local;
93   case AMDGPUAS::FLAT_ADDRESS:
94     return AddressSpaceQualifier::Generic;
95   case AMDGPUAS::REGION_ADDRESS:
96     return AddressSpaceQualifier::Region;
97   default:
98     return AddressSpaceQualifier::Unknown;
99   }
100 }
101 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103                                            StringRef BaseTypeName) const {
104   if (TypeQual.find("pipe") != StringRef::npos)
105     return ValueKind::Pipe;
106 
107   return StringSwitch<ValueKind>(BaseTypeName)
108              .Case("image1d_t", ValueKind::Image)
109              .Case("image1d_array_t", ValueKind::Image)
110              .Case("image1d_buffer_t", ValueKind::Image)
111              .Case("image2d_t", ValueKind::Image)
112              .Case("image2d_array_t", ValueKind::Image)
113              .Case("image2d_array_depth_t", ValueKind::Image)
114              .Case("image2d_array_msaa_t", ValueKind::Image)
115              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116              .Case("image2d_depth_t", ValueKind::Image)
117              .Case("image2d_msaa_t", ValueKind::Image)
118              .Case("image2d_msaa_depth_t", ValueKind::Image)
119              .Case("image3d_t", ValueKind::Image)
120              .Case("sampler_t", ValueKind::Sampler)
121              .Case("queue_t", ValueKind::Queue)
122              .Default(isa<PointerType>(Ty) ?
123                           (Ty->getPointerAddressSpace() ==
124                            AMDGPUAS::LOCAL_ADDRESS ?
125                            ValueKind::DynamicSharedPointer :
126                            ValueKind::GlobalBuffer) :
127                       ValueKind::ByValue);
128 }
129 
getValueType(Type * Ty,StringRef TypeName) const130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131   switch (Ty->getTypeID()) {
132   case Type::IntegerTyID: {
133     auto Signed = !TypeName.startswith("u");
134     switch (Ty->getIntegerBitWidth()) {
135     case 8:
136       return Signed ? ValueType::I8 : ValueType::U8;
137     case 16:
138       return Signed ? ValueType::I16 : ValueType::U16;
139     case 32:
140       return Signed ? ValueType::I32 : ValueType::U32;
141     case 64:
142       return Signed ? ValueType::I64 : ValueType::U64;
143     default:
144       return ValueType::Struct;
145     }
146   }
147   case Type::HalfTyID:
148     return ValueType::F16;
149   case Type::FloatTyID:
150     return ValueType::F32;
151   case Type::DoubleTyID:
152     return ValueType::F64;
153   case Type::PointerTyID:
154     return getValueType(Ty->getPointerElementType(), TypeName);
155   case Type::VectorTyID:
156     return getValueType(Ty->getVectorElementType(), TypeName);
157   default:
158     return ValueType::Struct;
159   }
160 }
161 
getTypeName(Type * Ty,bool Signed) const162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163   switch (Ty->getTypeID()) {
164   case Type::IntegerTyID: {
165     if (!Signed)
166       return (Twine('u') + getTypeName(Ty, true)).str();
167 
168     auto BitWidth = Ty->getIntegerBitWidth();
169     switch (BitWidth) {
170     case 8:
171       return "char";
172     case 16:
173       return "short";
174     case 32:
175       return "int";
176     case 64:
177       return "long";
178     default:
179       return (Twine('i') + Twine(BitWidth)).str();
180     }
181   }
182   case Type::HalfTyID:
183     return "half";
184   case Type::FloatTyID:
185     return "float";
186   case Type::DoubleTyID:
187     return "double";
188   case Type::VectorTyID: {
189     auto VecTy = cast<VectorType>(Ty);
190     auto ElTy = VecTy->getElementType();
191     auto NumElements = VecTy->getVectorNumElements();
192     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193   }
194   default:
195     return "unknown";
196   }
197 }
198 
199 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201   std::vector<uint32_t> Dims;
202   if (Node->getNumOperands() != 3)
203     return Dims;
204 
205   for (auto &Op : Node->operands())
206     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207   return Dims;
208 }
209 
210 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212                                     const SIProgramInfo &ProgramInfo) const {
213   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216   const Function &F = MF.getFunction();
217 
218   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219          F.getCallingConv() == CallingConv::SPIR_KERNEL);
220 
221   Align MaxKernArgAlign;
222   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223                                                                MaxKernArgAlign);
224   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226   HSACodeProps.mKernargSegmentAlign =
227       std::max(MaxKernArgAlign, Align(4)).value();
228   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236 
237   return HSACodeProps;
238 }
239 
240 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242                                      const SIProgramInfo &ProgramInfo) const {
243   return HSAMD::Kernel::DebugProps::Metadata();
244 }
245 
emitVersion()246 void MetadataStreamerV2::emitVersion() {
247   auto &Version = HSAMetadata.mVersion;
248 
249   Version.push_back(VersionMajor);
250   Version.push_back(VersionMinor);
251 }
252 
emitPrintf(const Module & Mod)253 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
254   auto &Printf = HSAMetadata.mPrintf;
255 
256   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
257   if (!Node)
258     return;
259 
260   for (auto Op : Node->operands())
261     if (Op->getNumOperands())
262       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
263 }
264 
emitKernelLanguage(const Function & Func)265 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
266   auto &Kernel = HSAMetadata.mKernels.back();
267 
268   // TODO: What about other languages?
269   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
270   if (!Node || !Node->getNumOperands())
271     return;
272   auto Op0 = Node->getOperand(0);
273   if (Op0->getNumOperands() <= 1)
274     return;
275 
276   Kernel.mLanguage = "OpenCL C";
277   Kernel.mLanguageVersion.push_back(
278       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
279   Kernel.mLanguageVersion.push_back(
280       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
281 }
282 
emitKernelAttrs(const Function & Func)283 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
284   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
285 
286   if (auto Node = Func.getMetadata("reqd_work_group_size"))
287     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
288   if (auto Node = Func.getMetadata("work_group_size_hint"))
289     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
290   if (auto Node = Func.getMetadata("vec_type_hint")) {
291     Attrs.mVecTypeHint = getTypeName(
292         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
293         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
294   }
295   if (Func.hasFnAttribute("runtime-handle")) {
296     Attrs.mRuntimeHandle =
297         Func.getFnAttribute("runtime-handle").getValueAsString().str();
298   }
299 }
300 
emitKernelArgs(const Function & Func)301 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
302   for (auto &Arg : Func.args())
303     emitKernelArg(Arg);
304 
305   emitHiddenKernelArgs(Func);
306 }
307 
emitKernelArg(const Argument & Arg)308 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
309   auto Func = Arg.getParent();
310   auto ArgNo = Arg.getArgNo();
311   const MDNode *Node;
312 
313   StringRef Name;
314   Node = Func->getMetadata("kernel_arg_name");
315   if (Node && ArgNo < Node->getNumOperands())
316     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
317   else if (Arg.hasName())
318     Name = Arg.getName();
319 
320   StringRef TypeName;
321   Node = Func->getMetadata("kernel_arg_type");
322   if (Node && ArgNo < Node->getNumOperands())
323     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
324 
325   StringRef BaseTypeName;
326   Node = Func->getMetadata("kernel_arg_base_type");
327   if (Node && ArgNo < Node->getNumOperands())
328     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
329 
330   StringRef AccQual;
331   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
332       Arg.hasNoAliasAttr()) {
333     AccQual = "read_only";
334   } else {
335     Node = Func->getMetadata("kernel_arg_access_qual");
336     if (Node && ArgNo < Node->getNumOperands())
337       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
338   }
339 
340   StringRef TypeQual;
341   Node = Func->getMetadata("kernel_arg_type_qual");
342   if (Node && ArgNo < Node->getNumOperands())
343     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
344 
345   Type *Ty = Arg.getType();
346   const DataLayout &DL = Func->getParent()->getDataLayout();
347 
348   unsigned PointeeAlign = 0;
349   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
350     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
351       PointeeAlign = Arg.getParamAlignment();
352       if (PointeeAlign == 0)
353         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
354     }
355   }
356 
357   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
358                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
359 }
360 
emitKernelArg(const DataLayout & DL,Type * Ty,ValueKind ValueKind,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)361 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
362                                        ValueKind ValueKind,
363                                        unsigned PointeeAlign, StringRef Name,
364                                        StringRef TypeName,
365                                        StringRef BaseTypeName,
366                                        StringRef AccQual, StringRef TypeQual) {
367   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
368   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
369 
370   Arg.mName = Name;
371   Arg.mTypeName = TypeName;
372   Arg.mSize = DL.getTypeAllocSize(Ty);
373   Arg.mAlign = DL.getABITypeAlignment(Ty);
374   Arg.mValueKind = ValueKind;
375   Arg.mValueType = getValueType(Ty, BaseTypeName);
376   Arg.mPointeeAlign = PointeeAlign;
377 
378   if (auto PtrTy = dyn_cast<PointerType>(Ty))
379     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
380 
381   Arg.mAccQual = getAccessQualifier(AccQual);
382 
383   // TODO: Emit Arg.mActualAccQual.
384 
385   SmallVector<StringRef, 1> SplitTypeQuals;
386   TypeQual.split(SplitTypeQuals, " ", -1, false);
387   for (StringRef Key : SplitTypeQuals) {
388     auto P = StringSwitch<bool*>(Key)
389                  .Case("const",    &Arg.mIsConst)
390                  .Case("restrict", &Arg.mIsRestrict)
391                  .Case("volatile", &Arg.mIsVolatile)
392                  .Case("pipe",     &Arg.mIsPipe)
393                  .Default(nullptr);
394     if (P)
395       *P = true;
396   }
397 }
398 
emitHiddenKernelArgs(const Function & Func)399 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
400   int HiddenArgNumBytes =
401       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
402 
403   if (!HiddenArgNumBytes)
404     return;
405 
406   auto &DL = Func.getParent()->getDataLayout();
407   auto Int64Ty = Type::getInt64Ty(Func.getContext());
408 
409   if (HiddenArgNumBytes >= 8)
410     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
411   if (HiddenArgNumBytes >= 16)
412     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
413   if (HiddenArgNumBytes >= 24)
414     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
415 
416   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
417                                       AMDGPUAS::GLOBAL_ADDRESS);
418 
419   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
420   // "none" argument.
421   if (HiddenArgNumBytes >= 32) {
422     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
423       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
424     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
425       // The printf runtime binding pass should have ensured that hostcall and
426       // printf are not used in the same module.
427       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
428       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
429     } else
430       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
431   }
432 
433   // Emit "default queue" and "completion action" arguments if enqueue kernel is
434   // used, otherwise emit dummy "none" arguments.
435   if (HiddenArgNumBytes >= 48) {
436     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
437       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
438       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
439     } else {
440       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
441       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
442     }
443   }
444 
445   // Emit the pointer argument for multi-grid object.
446   if (HiddenArgNumBytes >= 56)
447     emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
448 }
449 
emitTo(AMDGPUTargetStreamer & TargetStreamer)450 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
451   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
452 }
453 
begin(const Module & Mod)454 void MetadataStreamerV2::begin(const Module &Mod) {
455   emitVersion();
456   emitPrintf(Mod);
457 }
458 
end()459 void MetadataStreamerV2::end() {
460   std::string HSAMetadataString;
461   if (toString(HSAMetadata, HSAMetadataString))
462     return;
463 
464   if (DumpHSAMetadata)
465     dump(HSAMetadataString);
466   if (VerifyHSAMetadata)
467     verify(HSAMetadataString);
468 }
469 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)470 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
471                                     const SIProgramInfo &ProgramInfo) {
472   auto &Func = MF.getFunction();
473   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
474     return;
475 
476   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
477   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
478 
479   HSAMetadata.mKernels.push_back(Kernel::Metadata());
480   auto &Kernel = HSAMetadata.mKernels.back();
481 
482   Kernel.mName = Func.getName();
483   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
484   emitKernelLanguage(Func);
485   emitKernelAttrs(Func);
486   emitKernelArgs(Func);
487   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
488   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
489 }
490 
491 //===----------------------------------------------------------------------===//
492 // HSAMetadataStreamerV3
493 //===----------------------------------------------------------------------===//
494 
dump(StringRef HSAMetadataString) const495 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
496   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
497 }
498 
verify(StringRef HSAMetadataString) const499 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
500   errs() << "AMDGPU HSA Metadata Parser Test: ";
501 
502   msgpack::Document FromHSAMetadataString;
503 
504   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
505     errs() << "FAIL\n";
506     return;
507   }
508 
509   std::string ToHSAMetadataString;
510   raw_string_ostream StrOS(ToHSAMetadataString);
511   FromHSAMetadataString.toYAML(StrOS);
512 
513   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
514   if (HSAMetadataString != ToHSAMetadataString) {
515     errs() << "Original input: " << HSAMetadataString << '\n'
516            << "Produced output: " << StrOS.str() << '\n';
517   }
518 }
519 
520 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const521 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
522   return StringSwitch<Optional<StringRef>>(AccQual)
523       .Case("read_only", StringRef("read_only"))
524       .Case("write_only", StringRef("write_only"))
525       .Case("read_write", StringRef("read_write"))
526       .Default(None);
527 }
528 
529 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const530 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
531   switch (AddressSpace) {
532   case AMDGPUAS::PRIVATE_ADDRESS:
533     return StringRef("private");
534   case AMDGPUAS::GLOBAL_ADDRESS:
535     return StringRef("global");
536   case AMDGPUAS::CONSTANT_ADDRESS:
537     return StringRef("constant");
538   case AMDGPUAS::LOCAL_ADDRESS:
539     return StringRef("local");
540   case AMDGPUAS::FLAT_ADDRESS:
541     return StringRef("generic");
542   case AMDGPUAS::REGION_ADDRESS:
543     return StringRef("region");
544   default:
545     return None;
546   }
547 }
548 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const549 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
550                                            StringRef BaseTypeName) const {
551   if (TypeQual.find("pipe") != StringRef::npos)
552     return "pipe";
553 
554   return StringSwitch<StringRef>(BaseTypeName)
555       .Case("image1d_t", "image")
556       .Case("image1d_array_t", "image")
557       .Case("image1d_buffer_t", "image")
558       .Case("image2d_t", "image")
559       .Case("image2d_array_t", "image")
560       .Case("image2d_array_depth_t", "image")
561       .Case("image2d_array_msaa_t", "image")
562       .Case("image2d_array_msaa_depth_t", "image")
563       .Case("image2d_depth_t", "image")
564       .Case("image2d_msaa_t", "image")
565       .Case("image2d_msaa_depth_t", "image")
566       .Case("image3d_t", "image")
567       .Case("sampler_t", "sampler")
568       .Case("queue_t", "queue")
569       .Default(isa<PointerType>(Ty)
570                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
571                           ? "dynamic_shared_pointer"
572                           : "global_buffer")
573                    : "by_value");
574 }
575 
getValueType(Type * Ty,StringRef TypeName) const576 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
577   switch (Ty->getTypeID()) {
578   case Type::IntegerTyID: {
579     auto Signed = !TypeName.startswith("u");
580     switch (Ty->getIntegerBitWidth()) {
581     case 8:
582       return Signed ? "i8" : "u8";
583     case 16:
584       return Signed ? "i16" : "u16";
585     case 32:
586       return Signed ? "i32" : "u32";
587     case 64:
588       return Signed ? "i64" : "u64";
589     default:
590       return "struct";
591     }
592   }
593   case Type::HalfTyID:
594     return "f16";
595   case Type::FloatTyID:
596     return "f32";
597   case Type::DoubleTyID:
598     return "f64";
599   case Type::PointerTyID:
600     return getValueType(Ty->getPointerElementType(), TypeName);
601   case Type::VectorTyID:
602     return getValueType(Ty->getVectorElementType(), TypeName);
603   default:
604     return "struct";
605   }
606 }
607 
getTypeName(Type * Ty,bool Signed) const608 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
609   switch (Ty->getTypeID()) {
610   case Type::IntegerTyID: {
611     if (!Signed)
612       return (Twine('u') + getTypeName(Ty, true)).str();
613 
614     auto BitWidth = Ty->getIntegerBitWidth();
615     switch (BitWidth) {
616     case 8:
617       return "char";
618     case 16:
619       return "short";
620     case 32:
621       return "int";
622     case 64:
623       return "long";
624     default:
625       return (Twine('i') + Twine(BitWidth)).str();
626     }
627   }
628   case Type::HalfTyID:
629     return "half";
630   case Type::FloatTyID:
631     return "float";
632   case Type::DoubleTyID:
633     return "double";
634   case Type::VectorTyID: {
635     auto VecTy = cast<VectorType>(Ty);
636     auto ElTy = VecTy->getElementType();
637     auto NumElements = VecTy->getVectorNumElements();
638     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
639   }
640   default:
641     return "unknown";
642   }
643 }
644 
645 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const646 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
647   auto Dims = HSAMetadataDoc->getArrayNode();
648   if (Node->getNumOperands() != 3)
649     return Dims;
650 
651   for (auto &Op : Node->operands())
652     Dims.push_back(Dims.getDocument()->getNode(
653         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
654   return Dims;
655 }
656 
emitVersion()657 void MetadataStreamerV3::emitVersion() {
658   auto Version = HSAMetadataDoc->getArrayNode();
659   Version.push_back(Version.getDocument()->getNode(VersionMajor));
660   Version.push_back(Version.getDocument()->getNode(VersionMinor));
661   getRootMetadata("amdhsa.version") = Version;
662 }
663 
emitPrintf(const Module & Mod)664 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
665   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
666   if (!Node)
667     return;
668 
669   auto Printf = HSAMetadataDoc->getArrayNode();
670   for (auto Op : Node->operands())
671     if (Op->getNumOperands())
672       Printf.push_back(Printf.getDocument()->getNode(
673           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
674   getRootMetadata("amdhsa.printf") = Printf;
675 }
676 
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)677 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
678                                             msgpack::MapDocNode Kern) {
679   // TODO: What about other languages?
680   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
681   if (!Node || !Node->getNumOperands())
682     return;
683   auto Op0 = Node->getOperand(0);
684   if (Op0->getNumOperands() <= 1)
685     return;
686 
687   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
688   auto LanguageVersion = Kern.getDocument()->getArrayNode();
689   LanguageVersion.push_back(Kern.getDocument()->getNode(
690       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
691   LanguageVersion.push_back(Kern.getDocument()->getNode(
692       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
693   Kern[".language_version"] = LanguageVersion;
694 }
695 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)696 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
697                                          msgpack::MapDocNode Kern) {
698 
699   if (auto Node = Func.getMetadata("reqd_work_group_size"))
700     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
701   if (auto Node = Func.getMetadata("work_group_size_hint"))
702     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
703   if (auto Node = Func.getMetadata("vec_type_hint")) {
704     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
705         getTypeName(
706             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
707             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
708         /*Copy=*/true);
709   }
710   if (Func.hasFnAttribute("runtime-handle")) {
711     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
712         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
713         /*Copy=*/true);
714   }
715 }
716 
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)717 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
718                                         msgpack::MapDocNode Kern) {
719   unsigned Offset = 0;
720   auto Args = HSAMetadataDoc->getArrayNode();
721   for (auto &Arg : Func.args())
722     emitKernelArg(Arg, Offset, Args);
723 
724   emitHiddenKernelArgs(Func, Offset, Args);
725 
726   Kern[".args"] = Args;
727 }
728 
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)729 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
730                                        msgpack::ArrayDocNode Args) {
731   auto Func = Arg.getParent();
732   auto ArgNo = Arg.getArgNo();
733   const MDNode *Node;
734 
735   StringRef Name;
736   Node = Func->getMetadata("kernel_arg_name");
737   if (Node && ArgNo < Node->getNumOperands())
738     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
739   else if (Arg.hasName())
740     Name = Arg.getName();
741 
742   StringRef TypeName;
743   Node = Func->getMetadata("kernel_arg_type");
744   if (Node && ArgNo < Node->getNumOperands())
745     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
746 
747   StringRef BaseTypeName;
748   Node = Func->getMetadata("kernel_arg_base_type");
749   if (Node && ArgNo < Node->getNumOperands())
750     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
751 
752   StringRef AccQual;
753   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
754       Arg.hasNoAliasAttr()) {
755     AccQual = "read_only";
756   } else {
757     Node = Func->getMetadata("kernel_arg_access_qual");
758     if (Node && ArgNo < Node->getNumOperands())
759       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
760   }
761 
762   StringRef TypeQual;
763   Node = Func->getMetadata("kernel_arg_type_qual");
764   if (Node && ArgNo < Node->getNumOperands())
765     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
766 
767   Type *Ty = Arg.getType();
768   const DataLayout &DL = Func->getParent()->getDataLayout();
769 
770   unsigned PointeeAlign = 0;
771   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
772     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
773       PointeeAlign = Arg.getParamAlignment();
774       if (PointeeAlign == 0)
775         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
776     }
777   }
778 
779   emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
780                 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
781                 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
782                 TypeQual);
783 }
784 
emitKernelArg(const DataLayout & DL,Type * Ty,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)785 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
786                                        StringRef ValueKind, unsigned &Offset,
787                                        msgpack::ArrayDocNode Args,
788                                        unsigned PointeeAlign, StringRef Name,
789                                        StringRef TypeName,
790                                        StringRef BaseTypeName,
791                                        StringRef AccQual, StringRef TypeQual) {
792   auto Arg = Args.getDocument()->getMapNode();
793 
794   if (!Name.empty())
795     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
796   if (!TypeName.empty())
797     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
798   auto Size = DL.getTypeAllocSize(Ty);
799   auto Align = DL.getABITypeAlignment(Ty);
800   Arg[".size"] = Arg.getDocument()->getNode(Size);
801   Offset = alignTo(Offset, Align);
802   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
803   Offset += Size;
804   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
805   Arg[".value_type"] =
806       Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
807   if (PointeeAlign)
808     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
809 
810   if (auto PtrTy = dyn_cast<PointerType>(Ty))
811     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
812       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
813 
814   if (auto AQ = getAccessQualifier(AccQual))
815     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
816 
817   // TODO: Emit Arg[".actual_access"].
818 
819   SmallVector<StringRef, 1> SplitTypeQuals;
820   TypeQual.split(SplitTypeQuals, " ", -1, false);
821   for (StringRef Key : SplitTypeQuals) {
822     if (Key == "const")
823       Arg[".is_const"] = Arg.getDocument()->getNode(true);
824     else if (Key == "restrict")
825       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
826     else if (Key == "volatile")
827       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
828     else if (Key == "pipe")
829       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
830   }
831 
832   Args.push_back(Arg);
833 }
834 
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)835 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
836                                               unsigned &Offset,
837                                               msgpack::ArrayDocNode Args) {
838   int HiddenArgNumBytes =
839       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
840 
841   if (!HiddenArgNumBytes)
842     return;
843 
844   auto &DL = Func.getParent()->getDataLayout();
845   auto Int64Ty = Type::getInt64Ty(Func.getContext());
846 
847   if (HiddenArgNumBytes >= 8)
848     emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
849   if (HiddenArgNumBytes >= 16)
850     emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
851   if (HiddenArgNumBytes >= 24)
852     emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
853 
854   auto Int8PtrTy =
855       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
856 
857   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
858   // "none" argument.
859   if (HiddenArgNumBytes >= 32) {
860     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
861       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
862     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
863       // The printf runtime binding pass should have ensured that hostcall and
864       // printf are not used in the same module.
865       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
866       emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
867     } else
868       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
869   }
870 
871   // Emit "default queue" and "completion action" arguments if enqueue kernel is
872   // used, otherwise emit dummy "none" arguments.
873   if (HiddenArgNumBytes >= 48) {
874     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
875       emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
876       emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
877     } else {
878       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
879       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
880     }
881   }
882 
883   // Emit the pointer argument for multi-grid object.
884   if (HiddenArgNumBytes >= 56)
885     emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
886 }
887 
888 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const889 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
890                                       const SIProgramInfo &ProgramInfo) const {
891   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
892   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
893   const Function &F = MF.getFunction();
894 
895   auto Kern = HSAMetadataDoc->getMapNode();
896 
897   Align MaxKernArgAlign;
898   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
899       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
900   Kern[".group_segment_fixed_size"] =
901       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
902   Kern[".private_segment_fixed_size"] =
903       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
904   Kern[".kernarg_segment_align"] =
905       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
906   Kern[".wavefront_size"] =
907       Kern.getDocument()->getNode(STM.getWavefrontSize());
908   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
909   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
910   Kern[".max_flat_workgroup_size"] =
911       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
912   Kern[".sgpr_spill_count"] =
913       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
914   Kern[".vgpr_spill_count"] =
915       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
916 
917   return Kern;
918 }
919 
emitTo(AMDGPUTargetStreamer & TargetStreamer)920 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
921   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
922 }
923 
begin(const Module & Mod)924 void MetadataStreamerV3::begin(const Module &Mod) {
925   emitVersion();
926   emitPrintf(Mod);
927   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
928 }
929 
end()930 void MetadataStreamerV3::end() {
931   std::string HSAMetadataString;
932   raw_string_ostream StrOS(HSAMetadataString);
933   HSAMetadataDoc->toYAML(StrOS);
934 
935   if (DumpHSAMetadata)
936     dump(StrOS.str());
937   if (VerifyHSAMetadata)
938     verify(StrOS.str());
939 }
940 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)941 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
942                                     const SIProgramInfo &ProgramInfo) {
943   auto &Func = MF.getFunction();
944   auto Kern = getHSAKernelProps(MF, ProgramInfo);
945 
946   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
947          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
948 
949   auto Kernels =
950       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
951 
952   {
953     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
954     Kern[".symbol"] = Kern.getDocument()->getNode(
955         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
956     emitKernelLanguage(Func, Kern);
957     emitKernelAttrs(Func, Kern);
958     emitKernelArgs(Func, Kern);
959   }
960 
961   Kernels.push_back(Kern);
962 }
963 
964 } // end namespace HSAMD
965 } // end namespace AMDGPU
966 } // end namespace llvm
967