1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace llvm;
28 
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)29 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
30                                                      const DataLayout &DL) {
31   Type *Ty = Arg.getType();
32   MaybeAlign ArgAlign;
33   if (Arg.hasByRefAttr()) {
34     Ty = Arg.getParamByRefType();
35     ArgAlign = Arg.getParamAlign();
36   }
37 
38   if (!ArgAlign)
39     ArgAlign = DL.getABITypeAlign(Ty);
40 
41   return std::make_pair(Ty, *ArgAlign);
42 }
43 
44 namespace llvm {
45 
46 static cl::opt<bool> DumpHSAMetadata(
47     "amdgpu-dump-hsa-metadata",
48     cl::desc("Dump AMDGPU HSA Metadata"));
49 static cl::opt<bool> VerifyHSAMetadata(
50     "amdgpu-verify-hsa-metadata",
51     cl::desc("Verify AMDGPU HSA Metadata"));
52 
53 namespace AMDGPU {
54 namespace HSAMD {
55 
56 //===----------------------------------------------------------------------===//
57 // HSAMetadataStreamerV2
58 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const59 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
60   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
61 }
62 
verify(StringRef HSAMetadataString) const63 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
64   errs() << "AMDGPU HSA Metadata Parser Test: ";
65 
66   HSAMD::Metadata FromHSAMetadataString;
67   if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
68     errs() << "FAIL\n";
69     return;
70   }
71 
72   std::string ToHSAMetadataString;
73   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
74     errs() << "FAIL\n";
75     return;
76   }
77 
78   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
79          << '\n';
80   if (HSAMetadataString != ToHSAMetadataString) {
81     errs() << "Original input: " << HSAMetadataString << '\n'
82            << "Produced output: " << ToHSAMetadataString << '\n';
83   }
84 }
85 
86 AccessQualifier
getAccessQualifier(StringRef AccQual) const87 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
88   if (AccQual.empty())
89     return AccessQualifier::Unknown;
90 
91   return StringSwitch<AccessQualifier>(AccQual)
92              .Case("read_only",  AccessQualifier::ReadOnly)
93              .Case("write_only", AccessQualifier::WriteOnly)
94              .Case("read_write", AccessQualifier::ReadWrite)
95              .Default(AccessQualifier::Default);
96 }
97 
98 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const99 MetadataStreamerV2::getAddressSpaceQualifier(
100     unsigned AddressSpace) const {
101   switch (AddressSpace) {
102   case AMDGPUAS::PRIVATE_ADDRESS:
103     return AddressSpaceQualifier::Private;
104   case AMDGPUAS::GLOBAL_ADDRESS:
105     return AddressSpaceQualifier::Global;
106   case AMDGPUAS::CONSTANT_ADDRESS:
107     return AddressSpaceQualifier::Constant;
108   case AMDGPUAS::LOCAL_ADDRESS:
109     return AddressSpaceQualifier::Local;
110   case AMDGPUAS::FLAT_ADDRESS:
111     return AddressSpaceQualifier::Generic;
112   case AMDGPUAS::REGION_ADDRESS:
113     return AddressSpaceQualifier::Region;
114   default:
115     return AddressSpaceQualifier::Unknown;
116   }
117 }
118 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const119 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
120                                            StringRef BaseTypeName) const {
121   if (TypeQual.find("pipe") != StringRef::npos)
122     return ValueKind::Pipe;
123 
124   return StringSwitch<ValueKind>(BaseTypeName)
125              .Case("image1d_t", ValueKind::Image)
126              .Case("image1d_array_t", ValueKind::Image)
127              .Case("image1d_buffer_t", ValueKind::Image)
128              .Case("image2d_t", ValueKind::Image)
129              .Case("image2d_array_t", ValueKind::Image)
130              .Case("image2d_array_depth_t", ValueKind::Image)
131              .Case("image2d_array_msaa_t", ValueKind::Image)
132              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
133              .Case("image2d_depth_t", ValueKind::Image)
134              .Case("image2d_msaa_t", ValueKind::Image)
135              .Case("image2d_msaa_depth_t", ValueKind::Image)
136              .Case("image3d_t", ValueKind::Image)
137              .Case("sampler_t", ValueKind::Sampler)
138              .Case("queue_t", ValueKind::Queue)
139              .Default(isa<PointerType>(Ty) ?
140                           (Ty->getPointerAddressSpace() ==
141                            AMDGPUAS::LOCAL_ADDRESS ?
142                            ValueKind::DynamicSharedPointer :
143                            ValueKind::GlobalBuffer) :
144                       ValueKind::ByValue);
145 }
146 
getTypeName(Type * Ty,bool Signed) const147 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
148   switch (Ty->getTypeID()) {
149   case Type::IntegerTyID: {
150     if (!Signed)
151       return (Twine('u') + getTypeName(Ty, true)).str();
152 
153     auto BitWidth = Ty->getIntegerBitWidth();
154     switch (BitWidth) {
155     case 8:
156       return "char";
157     case 16:
158       return "short";
159     case 32:
160       return "int";
161     case 64:
162       return "long";
163     default:
164       return (Twine('i') + Twine(BitWidth)).str();
165     }
166   }
167   case Type::HalfTyID:
168     return "half";
169   case Type::FloatTyID:
170     return "float";
171   case Type::DoubleTyID:
172     return "double";
173   case Type::FixedVectorTyID: {
174     auto VecTy = cast<FixedVectorType>(Ty);
175     auto ElTy = VecTy->getElementType();
176     auto NumElements = VecTy->getNumElements();
177     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
178   }
179   default:
180     return "unknown";
181   }
182 }
183 
184 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const185 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
186   std::vector<uint32_t> Dims;
187   if (Node->getNumOperands() != 3)
188     return Dims;
189 
190   for (auto &Op : Node->operands())
191     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
192   return Dims;
193 }
194 
195 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const196 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
197                                     const SIProgramInfo &ProgramInfo) const {
198   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
199   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
200   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
201   const Function &F = MF.getFunction();
202 
203   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
204          F.getCallingConv() == CallingConv::SPIR_KERNEL);
205 
206   Align MaxKernArgAlign;
207   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
208                                                                MaxKernArgAlign);
209   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
210   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
211   HSACodeProps.mKernargSegmentAlign =
212       std::max(MaxKernArgAlign, Align(4)).value();
213   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
214   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
215   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
216   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
217   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
218   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
219   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
220   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
221 
222   return HSACodeProps;
223 }
224 
225 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const226 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
227                                      const SIProgramInfo &ProgramInfo) const {
228   return HSAMD::Kernel::DebugProps::Metadata();
229 }
230 
emitVersion()231 void MetadataStreamerV2::emitVersion() {
232   auto &Version = HSAMetadata.mVersion;
233 
234   Version.push_back(VersionMajor);
235   Version.push_back(VersionMinor);
236 }
237 
emitPrintf(const Module & Mod)238 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
239   auto &Printf = HSAMetadata.mPrintf;
240 
241   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
242   if (!Node)
243     return;
244 
245   for (auto Op : Node->operands())
246     if (Op->getNumOperands())
247       Printf.push_back(
248           std::string(cast<MDString>(Op->getOperand(0))->getString()));
249 }
250 
emitKernelLanguage(const Function & Func)251 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
252   auto &Kernel = HSAMetadata.mKernels.back();
253 
254   // TODO: What about other languages?
255   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
256   if (!Node || !Node->getNumOperands())
257     return;
258   auto Op0 = Node->getOperand(0);
259   if (Op0->getNumOperands() <= 1)
260     return;
261 
262   Kernel.mLanguage = "OpenCL C";
263   Kernel.mLanguageVersion.push_back(
264       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
265   Kernel.mLanguageVersion.push_back(
266       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
267 }
268 
emitKernelAttrs(const Function & Func)269 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
270   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
271 
272   if (auto Node = Func.getMetadata("reqd_work_group_size"))
273     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
274   if (auto Node = Func.getMetadata("work_group_size_hint"))
275     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
276   if (auto Node = Func.getMetadata("vec_type_hint")) {
277     Attrs.mVecTypeHint = getTypeName(
278         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
279         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
280   }
281   if (Func.hasFnAttribute("runtime-handle")) {
282     Attrs.mRuntimeHandle =
283         Func.getFnAttribute("runtime-handle").getValueAsString().str();
284   }
285 }
286 
emitKernelArgs(const Function & Func)287 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
288   for (auto &Arg : Func.args())
289     emitKernelArg(Arg);
290 
291   emitHiddenKernelArgs(Func);
292 }
293 
emitKernelArg(const Argument & Arg)294 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
295   auto Func = Arg.getParent();
296   auto ArgNo = Arg.getArgNo();
297   const MDNode *Node;
298 
299   StringRef Name;
300   Node = Func->getMetadata("kernel_arg_name");
301   if (Node && ArgNo < Node->getNumOperands())
302     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
303   else if (Arg.hasName())
304     Name = Arg.getName();
305 
306   StringRef TypeName;
307   Node = Func->getMetadata("kernel_arg_type");
308   if (Node && ArgNo < Node->getNumOperands())
309     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
310 
311   StringRef BaseTypeName;
312   Node = Func->getMetadata("kernel_arg_base_type");
313   if (Node && ArgNo < Node->getNumOperands())
314     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
315 
316   StringRef AccQual;
317   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
318       Arg.hasNoAliasAttr()) {
319     AccQual = "read_only";
320   } else {
321     Node = Func->getMetadata("kernel_arg_access_qual");
322     if (Node && ArgNo < Node->getNumOperands())
323       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324   }
325 
326   StringRef TypeQual;
327   Node = Func->getMetadata("kernel_arg_type_qual");
328   if (Node && ArgNo < Node->getNumOperands())
329     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
330 
331   const DataLayout &DL = Func->getParent()->getDataLayout();
332 
333   MaybeAlign PointeeAlign;
334   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
335     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
336       // FIXME: Should report this for all address spaces
337       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
338                                                    PtrTy->getElementType());
339     }
340   }
341 
342   Type *ArgTy;
343   Align ArgAlign;
344   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
345 
346   emitKernelArg(DL, ArgTy, ArgAlign,
347                 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
348                 TypeName, BaseTypeName, AccQual, TypeQual);
349 }
350 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)351 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
352                                        Align Alignment, ValueKind ValueKind,
353                                        MaybeAlign PointeeAlign, StringRef Name,
354                                        StringRef TypeName,
355                                        StringRef BaseTypeName,
356                                        StringRef AccQual, StringRef TypeQual) {
357   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
358   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
359 
360   Arg.mName = std::string(Name);
361   Arg.mTypeName = std::string(TypeName);
362   Arg.mSize = DL.getTypeAllocSize(Ty);
363   Arg.mAlign = Alignment.value();
364   Arg.mValueKind = ValueKind;
365   Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
366 
367   if (auto PtrTy = dyn_cast<PointerType>(Ty))
368     Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
369 
370   Arg.mAccQual = getAccessQualifier(AccQual);
371 
372   // TODO: Emit Arg.mActualAccQual.
373 
374   SmallVector<StringRef, 1> SplitTypeQuals;
375   TypeQual.split(SplitTypeQuals, " ", -1, false);
376   for (StringRef Key : SplitTypeQuals) {
377     auto P = StringSwitch<bool*>(Key)
378                  .Case("const",    &Arg.mIsConst)
379                  .Case("restrict", &Arg.mIsRestrict)
380                  .Case("volatile", &Arg.mIsVolatile)
381                  .Case("pipe",     &Arg.mIsPipe)
382                  .Default(nullptr);
383     if (P)
384       *P = true;
385   }
386 }
387 
emitHiddenKernelArgs(const Function & Func)388 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
389   int HiddenArgNumBytes =
390       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
391 
392   if (!HiddenArgNumBytes)
393     return;
394 
395   auto &DL = Func.getParent()->getDataLayout();
396   auto Int64Ty = Type::getInt64Ty(Func.getContext());
397 
398   if (HiddenArgNumBytes >= 8)
399     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
400   if (HiddenArgNumBytes >= 16)
401     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
402   if (HiddenArgNumBytes >= 24)
403     emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
404 
405   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
406                                       AMDGPUAS::GLOBAL_ADDRESS);
407 
408   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
409   // "none" argument.
410   if (HiddenArgNumBytes >= 32) {
411     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
412       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
413     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
414       // The printf runtime binding pass should have ensured that hostcall and
415       // printf are not used in the same module.
416       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
417       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
418     } else
419       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
420   }
421 
422   // Emit "default queue" and "completion action" arguments if enqueue kernel is
423   // used, otherwise emit dummy "none" arguments.
424   if (HiddenArgNumBytes >= 48) {
425     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
426       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
427       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
428     } else {
429       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
430       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
431     }
432   }
433 
434   // Emit the pointer argument for multi-grid object.
435   if (HiddenArgNumBytes >= 56)
436     emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
437 }
438 
emitTo(AMDGPUTargetStreamer & TargetStreamer)439 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
440   return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
441 }
442 
begin(const Module & Mod)443 void MetadataStreamerV2::begin(const Module &Mod) {
444   emitVersion();
445   emitPrintf(Mod);
446 }
447 
end()448 void MetadataStreamerV2::end() {
449   std::string HSAMetadataString;
450   if (toString(HSAMetadata, HSAMetadataString))
451     return;
452 
453   if (DumpHSAMetadata)
454     dump(HSAMetadataString);
455   if (VerifyHSAMetadata)
456     verify(HSAMetadataString);
457 }
458 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)459 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
460                                     const SIProgramInfo &ProgramInfo) {
461   auto &Func = MF.getFunction();
462   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
463     return;
464 
465   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
466   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
467 
468   HSAMetadata.mKernels.push_back(Kernel::Metadata());
469   auto &Kernel = HSAMetadata.mKernels.back();
470 
471   Kernel.mName = std::string(Func.getName());
472   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
473   emitKernelLanguage(Func);
474   emitKernelAttrs(Func);
475   emitKernelArgs(Func);
476   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
477   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
478 }
479 
480 //===----------------------------------------------------------------------===//
481 // HSAMetadataStreamerV3
482 //===----------------------------------------------------------------------===//
483 
dump(StringRef HSAMetadataString) const484 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
485   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
486 }
487 
verify(StringRef HSAMetadataString) const488 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
489   errs() << "AMDGPU HSA Metadata Parser Test: ";
490 
491   msgpack::Document FromHSAMetadataString;
492 
493   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
494     errs() << "FAIL\n";
495     return;
496   }
497 
498   std::string ToHSAMetadataString;
499   raw_string_ostream StrOS(ToHSAMetadataString);
500   FromHSAMetadataString.toYAML(StrOS);
501 
502   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
503   if (HSAMetadataString != ToHSAMetadataString) {
504     errs() << "Original input: " << HSAMetadataString << '\n'
505            << "Produced output: " << StrOS.str() << '\n';
506   }
507 }
508 
509 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const510 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
511   return StringSwitch<Optional<StringRef>>(AccQual)
512       .Case("read_only", StringRef("read_only"))
513       .Case("write_only", StringRef("write_only"))
514       .Case("read_write", StringRef("read_write"))
515       .Default(None);
516 }
517 
518 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const519 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
520   switch (AddressSpace) {
521   case AMDGPUAS::PRIVATE_ADDRESS:
522     return StringRef("private");
523   case AMDGPUAS::GLOBAL_ADDRESS:
524     return StringRef("global");
525   case AMDGPUAS::CONSTANT_ADDRESS:
526     return StringRef("constant");
527   case AMDGPUAS::LOCAL_ADDRESS:
528     return StringRef("local");
529   case AMDGPUAS::FLAT_ADDRESS:
530     return StringRef("generic");
531   case AMDGPUAS::REGION_ADDRESS:
532     return StringRef("region");
533   default:
534     return None;
535   }
536 }
537 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const538 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
539                                            StringRef BaseTypeName) const {
540   if (TypeQual.find("pipe") != StringRef::npos)
541     return "pipe";
542 
543   return StringSwitch<StringRef>(BaseTypeName)
544       .Case("image1d_t", "image")
545       .Case("image1d_array_t", "image")
546       .Case("image1d_buffer_t", "image")
547       .Case("image2d_t", "image")
548       .Case("image2d_array_t", "image")
549       .Case("image2d_array_depth_t", "image")
550       .Case("image2d_array_msaa_t", "image")
551       .Case("image2d_array_msaa_depth_t", "image")
552       .Case("image2d_depth_t", "image")
553       .Case("image2d_msaa_t", "image")
554       .Case("image2d_msaa_depth_t", "image")
555       .Case("image3d_t", "image")
556       .Case("sampler_t", "sampler")
557       .Case("queue_t", "queue")
558       .Default(isa<PointerType>(Ty)
559                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
560                           ? "dynamic_shared_pointer"
561                           : "global_buffer")
562                    : "by_value");
563 }
564 
getTypeName(Type * Ty,bool Signed) const565 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
566   switch (Ty->getTypeID()) {
567   case Type::IntegerTyID: {
568     if (!Signed)
569       return (Twine('u') + getTypeName(Ty, true)).str();
570 
571     auto BitWidth = Ty->getIntegerBitWidth();
572     switch (BitWidth) {
573     case 8:
574       return "char";
575     case 16:
576       return "short";
577     case 32:
578       return "int";
579     case 64:
580       return "long";
581     default:
582       return (Twine('i') + Twine(BitWidth)).str();
583     }
584   }
585   case Type::HalfTyID:
586     return "half";
587   case Type::FloatTyID:
588     return "float";
589   case Type::DoubleTyID:
590     return "double";
591   case Type::FixedVectorTyID: {
592     auto VecTy = cast<FixedVectorType>(Ty);
593     auto ElTy = VecTy->getElementType();
594     auto NumElements = VecTy->getNumElements();
595     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
596   }
597   default:
598     return "unknown";
599   }
600 }
601 
602 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const603 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
604   auto Dims = HSAMetadataDoc->getArrayNode();
605   if (Node->getNumOperands() != 3)
606     return Dims;
607 
608   for (auto &Op : Node->operands())
609     Dims.push_back(Dims.getDocument()->getNode(
610         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
611   return Dims;
612 }
613 
emitVersion()614 void MetadataStreamerV3::emitVersion() {
615   auto Version = HSAMetadataDoc->getArrayNode();
616   Version.push_back(Version.getDocument()->getNode(VersionMajor));
617   Version.push_back(Version.getDocument()->getNode(VersionMinor));
618   getRootMetadata("amdhsa.version") = Version;
619 }
620 
emitPrintf(const Module & Mod)621 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
622   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
623   if (!Node)
624     return;
625 
626   auto Printf = HSAMetadataDoc->getArrayNode();
627   for (auto Op : Node->operands())
628     if (Op->getNumOperands())
629       Printf.push_back(Printf.getDocument()->getNode(
630           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
631   getRootMetadata("amdhsa.printf") = Printf;
632 }
633 
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)634 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
635                                             msgpack::MapDocNode Kern) {
636   // TODO: What about other languages?
637   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
638   if (!Node || !Node->getNumOperands())
639     return;
640   auto Op0 = Node->getOperand(0);
641   if (Op0->getNumOperands() <= 1)
642     return;
643 
644   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
645   auto LanguageVersion = Kern.getDocument()->getArrayNode();
646   LanguageVersion.push_back(Kern.getDocument()->getNode(
647       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
648   LanguageVersion.push_back(Kern.getDocument()->getNode(
649       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
650   Kern[".language_version"] = LanguageVersion;
651 }
652 
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)653 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
654                                          msgpack::MapDocNode Kern) {
655 
656   if (auto Node = Func.getMetadata("reqd_work_group_size"))
657     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
658   if (auto Node = Func.getMetadata("work_group_size_hint"))
659     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
660   if (auto Node = Func.getMetadata("vec_type_hint")) {
661     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
662         getTypeName(
663             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
664             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
665         /*Copy=*/true);
666   }
667   if (Func.hasFnAttribute("runtime-handle")) {
668     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
669         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
670         /*Copy=*/true);
671   }
672 }
673 
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)674 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
675                                         msgpack::MapDocNode Kern) {
676   unsigned Offset = 0;
677   auto Args = HSAMetadataDoc->getArrayNode();
678   for (auto &Arg : Func.args())
679     emitKernelArg(Arg, Offset, Args);
680 
681   emitHiddenKernelArgs(Func, Offset, Args);
682 
683   Kern[".args"] = Args;
684 }
685 
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)686 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
687                                        msgpack::ArrayDocNode Args) {
688   auto Func = Arg.getParent();
689   auto ArgNo = Arg.getArgNo();
690   const MDNode *Node;
691 
692   StringRef Name;
693   Node = Func->getMetadata("kernel_arg_name");
694   if (Node && ArgNo < Node->getNumOperands())
695     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
696   else if (Arg.hasName())
697     Name = Arg.getName();
698 
699   StringRef TypeName;
700   Node = Func->getMetadata("kernel_arg_type");
701   if (Node && ArgNo < Node->getNumOperands())
702     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
703 
704   StringRef BaseTypeName;
705   Node = Func->getMetadata("kernel_arg_base_type");
706   if (Node && ArgNo < Node->getNumOperands())
707     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
708 
709   StringRef AccQual;
710   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
711       Arg.hasNoAliasAttr()) {
712     AccQual = "read_only";
713   } else {
714     Node = Func->getMetadata("kernel_arg_access_qual");
715     if (Node && ArgNo < Node->getNumOperands())
716       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
717   }
718 
719   StringRef TypeQual;
720   Node = Func->getMetadata("kernel_arg_type_qual");
721   if (Node && ArgNo < Node->getNumOperands())
722     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
723 
724   const DataLayout &DL = Func->getParent()->getDataLayout();
725 
726   MaybeAlign PointeeAlign;
727   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
728 
729   // FIXME: Need to distinguish in memory alignment from pointer alignment.
730   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
731     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
732       PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
733                                                    PtrTy->getElementType());
734     }
735   }
736 
737   // There's no distinction between byval aggregates and raw aggregates.
738   Type *ArgTy;
739   Align ArgAlign;
740   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
741 
742   emitKernelArg(DL, ArgTy, ArgAlign,
743                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
744                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
745 }
746 
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)747 void MetadataStreamerV3::emitKernelArg(
748     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
749     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
750     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
751     StringRef AccQual, StringRef TypeQual) {
752   auto Arg = Args.getDocument()->getMapNode();
753 
754   if (!Name.empty())
755     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
756   if (!TypeName.empty())
757     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
758   auto Size = DL.getTypeAllocSize(Ty);
759   Arg[".size"] = Arg.getDocument()->getNode(Size);
760   Offset = alignTo(Offset, Alignment);
761   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
762   Offset += Size;
763   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
764   if (PointeeAlign)
765     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
766 
767   if (auto PtrTy = dyn_cast<PointerType>(Ty))
768     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
769       Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
770 
771   if (auto AQ = getAccessQualifier(AccQual))
772     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
773 
774   // TODO: Emit Arg[".actual_access"].
775 
776   SmallVector<StringRef, 1> SplitTypeQuals;
777   TypeQual.split(SplitTypeQuals, " ", -1, false);
778   for (StringRef Key : SplitTypeQuals) {
779     if (Key == "const")
780       Arg[".is_const"] = Arg.getDocument()->getNode(true);
781     else if (Key == "restrict")
782       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
783     else if (Key == "volatile")
784       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
785     else if (Key == "pipe")
786       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
787   }
788 
789   Args.push_back(Arg);
790 }
791 
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
793                                               unsigned &Offset,
794                                               msgpack::ArrayDocNode Args) {
795   int HiddenArgNumBytes =
796       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
797 
798   if (!HiddenArgNumBytes)
799     return;
800 
801   auto &DL = Func.getParent()->getDataLayout();
802   auto Int64Ty = Type::getInt64Ty(Func.getContext());
803 
804   if (HiddenArgNumBytes >= 8)
805     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
806                   Args);
807   if (HiddenArgNumBytes >= 16)
808     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
809                   Args);
810   if (HiddenArgNumBytes >= 24)
811     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
812                   Args);
813 
814   auto Int8PtrTy =
815       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
816 
817   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
818   // "none" argument.
819   if (HiddenArgNumBytes >= 32) {
820     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
821       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
822                     Args);
823     else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
824       // The printf runtime binding pass should have ensured that hostcall and
825       // printf are not used in the same module.
826       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
827       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
828                     Args);
829     } else
830       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
831   }
832 
833   // Emit "default queue" and "completion action" arguments if enqueue kernel is
834   // used, otherwise emit dummy "none" arguments.
835   if (HiddenArgNumBytes >= 48) {
836     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
837       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
838                     Args);
839       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
840                     Args);
841     } else {
842       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
844     }
845   }
846 
847   // Emit the pointer argument for multi-grid object.
848   if (HiddenArgNumBytes >= 56)
849     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
850                   Args);
851 }
852 
853 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
855                                       const SIProgramInfo &ProgramInfo) const {
856   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
857   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
858   const Function &F = MF.getFunction();
859 
860   auto Kern = HSAMetadataDoc->getMapNode();
861 
862   Align MaxKernArgAlign;
863   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
864       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
865   Kern[".group_segment_fixed_size"] =
866       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
867   Kern[".private_segment_fixed_size"] =
868       Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
869   Kern[".kernarg_segment_align"] =
870       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
871   Kern[".wavefront_size"] =
872       Kern.getDocument()->getNode(STM.getWavefrontSize());
873   Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
874   Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
875   Kern[".max_flat_workgroup_size"] =
876       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
877   Kern[".sgpr_spill_count"] =
878       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
879   Kern[".vgpr_spill_count"] =
880       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
881 
882   return Kern;
883 }
884 
emitTo(AMDGPUTargetStreamer & TargetStreamer)885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
886   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
887 }
888 
begin(const Module & Mod)889 void MetadataStreamerV3::begin(const Module &Mod) {
890   emitVersion();
891   emitPrintf(Mod);
892   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
893 }
894 
end()895 void MetadataStreamerV3::end() {
896   std::string HSAMetadataString;
897   raw_string_ostream StrOS(HSAMetadataString);
898   HSAMetadataDoc->toYAML(StrOS);
899 
900   if (DumpHSAMetadata)
901     dump(StrOS.str());
902   if (VerifyHSAMetadata)
903     verify(StrOS.str());
904 }
905 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)906 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
907                                     const SIProgramInfo &ProgramInfo) {
908   auto &Func = MF.getFunction();
909   auto Kern = getHSAKernelProps(MF, ProgramInfo);
910 
911   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
912          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
913 
914   auto Kernels =
915       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
916 
917   {
918     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
919     Kern[".symbol"] = Kern.getDocument()->getNode(
920         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
921     emitKernelLanguage(Func, Kern);
922     emitKernelAttrs(Func, Kern);
923     emitKernelArgs(Func, Kern);
924   }
925 
926   Kernels.push_back(Kern);
927 }
928 
929 } // end namespace HSAMD
930 } // end namespace AMDGPU
931 } // end namespace llvm
932