1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
23
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
31 }
32
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
35
36 return std::make_pair(Ty, *ArgAlign);
37 }
38
39 namespace llvm {
40
41 static cl::opt<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
47
48 namespace AMDGPU {
49 namespace HSAMD {
50
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
56 }
57
verify(StringRef HSAMetadataString) const58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
60
61 HSAMD::Metadata FromHSAMetadataString;
62 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63 errs() << "FAIL\n";
64 return;
65 }
66
67 std::string ToHSAMetadataString;
68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69 errs() << "FAIL\n";
70 return;
71 }
72
73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74 << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << ToHSAMetadataString << '\n';
78 }
79 }
80
81 AccessQualifier
getAccessQualifier(StringRef AccQual) const82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83 if (AccQual.empty())
84 return AccessQualifier::Unknown;
85
86 return StringSwitch<AccessQualifier>(AccQual)
87 .Case("read_only", AccessQualifier::ReadOnly)
88 .Case("write_only", AccessQualifier::WriteOnly)
89 .Case("read_write", AccessQualifier::ReadWrite)
90 .Default(AccessQualifier::Default);
91 }
92
93 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const94 MetadataStreamerV2::getAddressSpaceQualifier(
95 unsigned AddressSpace) const {
96 switch (AddressSpace) {
97 case AMDGPUAS::PRIVATE_ADDRESS:
98 return AddressSpaceQualifier::Private;
99 case AMDGPUAS::GLOBAL_ADDRESS:
100 return AddressSpaceQualifier::Global;
101 case AMDGPUAS::CONSTANT_ADDRESS:
102 return AddressSpaceQualifier::Constant;
103 case AMDGPUAS::LOCAL_ADDRESS:
104 return AddressSpaceQualifier::Local;
105 case AMDGPUAS::FLAT_ADDRESS:
106 return AddressSpaceQualifier::Generic;
107 case AMDGPUAS::REGION_ADDRESS:
108 return AddressSpaceQualifier::Region;
109 default:
110 return AddressSpaceQualifier::Unknown;
111 }
112 }
113
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115 StringRef BaseTypeName) const {
116 if (TypeQual.find("pipe") != StringRef::npos)
117 return ValueKind::Pipe;
118
119 return StringSwitch<ValueKind>(BaseTypeName)
120 .Case("image1d_t", ValueKind::Image)
121 .Case("image1d_array_t", ValueKind::Image)
122 .Case("image1d_buffer_t", ValueKind::Image)
123 .Case("image2d_t", ValueKind::Image)
124 .Case("image2d_array_t", ValueKind::Image)
125 .Case("image2d_array_depth_t", ValueKind::Image)
126 .Case("image2d_array_msaa_t", ValueKind::Image)
127 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128 .Case("image2d_depth_t", ValueKind::Image)
129 .Case("image2d_msaa_t", ValueKind::Image)
130 .Case("image2d_msaa_depth_t", ValueKind::Image)
131 .Case("image3d_t", ValueKind::Image)
132 .Case("sampler_t", ValueKind::Sampler)
133 .Case("queue_t", ValueKind::Queue)
134 .Default(isa<PointerType>(Ty) ?
135 (Ty->getPointerAddressSpace() ==
136 AMDGPUAS::LOCAL_ADDRESS ?
137 ValueKind::DynamicSharedPointer :
138 ValueKind::GlobalBuffer) :
139 ValueKind::ByValue);
140 }
141
getTypeName(Type * Ty,bool Signed) const142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143 switch (Ty->getTypeID()) {
144 case Type::IntegerTyID: {
145 if (!Signed)
146 return (Twine('u') + getTypeName(Ty, true)).str();
147
148 auto BitWidth = Ty->getIntegerBitWidth();
149 switch (BitWidth) {
150 case 8:
151 return "char";
152 case 16:
153 return "short";
154 case 32:
155 return "int";
156 case 64:
157 return "long";
158 default:
159 return (Twine('i') + Twine(BitWidth)).str();
160 }
161 }
162 case Type::HalfTyID:
163 return "half";
164 case Type::FloatTyID:
165 return "float";
166 case Type::DoubleTyID:
167 return "double";
168 case Type::FixedVectorTyID: {
169 auto VecTy = cast<FixedVectorType>(Ty);
170 auto ElTy = VecTy->getElementType();
171 auto NumElements = VecTy->getNumElements();
172 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
173 }
174 default:
175 return "unknown";
176 }
177 }
178
179 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181 std::vector<uint32_t> Dims;
182 if (Node->getNumOperands() != 3)
183 return Dims;
184
185 for (auto &Op : Node->operands())
186 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187 return Dims;
188 }
189
190 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192 const SIProgramInfo &ProgramInfo) const {
193 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
194 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196 const Function &F = MF.getFunction();
197
198 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199 F.getCallingConv() == CallingConv::SPIR_KERNEL);
200
201 Align MaxKernArgAlign;
202 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203 MaxKernArgAlign);
204 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
205 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
206 HSACodeProps.mKernargSegmentAlign =
207 std::max(MaxKernArgAlign, Align(4)).value();
208 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
209 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
210 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
211 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
212 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
213 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
214 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
215 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
216
217 return HSACodeProps;
218 }
219
220 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const221 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
222 const SIProgramInfo &ProgramInfo) const {
223 return HSAMD::Kernel::DebugProps::Metadata();
224 }
225
emitVersion()226 void MetadataStreamerV2::emitVersion() {
227 auto &Version = HSAMetadata.mVersion;
228
229 Version.push_back(VersionMajorV2);
230 Version.push_back(VersionMinorV2);
231 }
232
emitPrintf(const Module & Mod)233 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
234 auto &Printf = HSAMetadata.mPrintf;
235
236 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
237 if (!Node)
238 return;
239
240 for (auto Op : Node->operands())
241 if (Op->getNumOperands())
242 Printf.push_back(
243 std::string(cast<MDString>(Op->getOperand(0))->getString()));
244 }
245
emitKernelLanguage(const Function & Func)246 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
247 auto &Kernel = HSAMetadata.mKernels.back();
248
249 // TODO: What about other languages?
250 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
251 if (!Node || !Node->getNumOperands())
252 return;
253 auto Op0 = Node->getOperand(0);
254 if (Op0->getNumOperands() <= 1)
255 return;
256
257 Kernel.mLanguage = "OpenCL C";
258 Kernel.mLanguageVersion.push_back(
259 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
260 Kernel.mLanguageVersion.push_back(
261 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
262 }
263
emitKernelAttrs(const Function & Func)264 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
265 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
266
267 if (auto Node = Func.getMetadata("reqd_work_group_size"))
268 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
269 if (auto Node = Func.getMetadata("work_group_size_hint"))
270 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
271 if (auto Node = Func.getMetadata("vec_type_hint")) {
272 Attrs.mVecTypeHint = getTypeName(
273 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
274 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
275 }
276 if (Func.hasFnAttribute("runtime-handle")) {
277 Attrs.mRuntimeHandle =
278 Func.getFnAttribute("runtime-handle").getValueAsString().str();
279 }
280 }
281
emitKernelArgs(const Function & Func)282 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
283 for (auto &Arg : Func.args())
284 emitKernelArg(Arg);
285
286 emitHiddenKernelArgs(Func);
287 }
288
emitKernelArg(const Argument & Arg)289 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
290 auto Func = Arg.getParent();
291 auto ArgNo = Arg.getArgNo();
292 const MDNode *Node;
293
294 StringRef Name;
295 Node = Func->getMetadata("kernel_arg_name");
296 if (Node && ArgNo < Node->getNumOperands())
297 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
298 else if (Arg.hasName())
299 Name = Arg.getName();
300
301 StringRef TypeName;
302 Node = Func->getMetadata("kernel_arg_type");
303 if (Node && ArgNo < Node->getNumOperands())
304 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
305
306 StringRef BaseTypeName;
307 Node = Func->getMetadata("kernel_arg_base_type");
308 if (Node && ArgNo < Node->getNumOperands())
309 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
310
311 StringRef AccQual;
312 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
313 Arg.hasNoAliasAttr()) {
314 AccQual = "read_only";
315 } else {
316 Node = Func->getMetadata("kernel_arg_access_qual");
317 if (Node && ArgNo < Node->getNumOperands())
318 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
319 }
320
321 StringRef TypeQual;
322 Node = Func->getMetadata("kernel_arg_type_qual");
323 if (Node && ArgNo < Node->getNumOperands())
324 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
325
326 const DataLayout &DL = Func->getParent()->getDataLayout();
327
328 MaybeAlign PointeeAlign;
329 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
330 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
331 // FIXME: Should report this for all address spaces
332 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
333 PtrTy->getElementType());
334 }
335 }
336
337 Type *ArgTy;
338 Align ArgAlign;
339 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
340
341 emitKernelArg(DL, ArgTy, ArgAlign,
342 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
343 TypeName, BaseTypeName, AccQual, TypeQual);
344 }
345
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)346 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
347 Align Alignment, ValueKind ValueKind,
348 MaybeAlign PointeeAlign, StringRef Name,
349 StringRef TypeName,
350 StringRef BaseTypeName,
351 StringRef AccQual, StringRef TypeQual) {
352 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
353 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
354
355 Arg.mName = std::string(Name);
356 Arg.mTypeName = std::string(TypeName);
357 Arg.mSize = DL.getTypeAllocSize(Ty);
358 Arg.mAlign = Alignment.value();
359 Arg.mValueKind = ValueKind;
360 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
361
362 if (auto PtrTy = dyn_cast<PointerType>(Ty))
363 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
364
365 Arg.mAccQual = getAccessQualifier(AccQual);
366
367 // TODO: Emit Arg.mActualAccQual.
368
369 SmallVector<StringRef, 1> SplitTypeQuals;
370 TypeQual.split(SplitTypeQuals, " ", -1, false);
371 for (StringRef Key : SplitTypeQuals) {
372 auto P = StringSwitch<bool*>(Key)
373 .Case("const", &Arg.mIsConst)
374 .Case("restrict", &Arg.mIsRestrict)
375 .Case("volatile", &Arg.mIsVolatile)
376 .Case("pipe", &Arg.mIsPipe)
377 .Default(nullptr);
378 if (P)
379 *P = true;
380 }
381 }
382
emitHiddenKernelArgs(const Function & Func)383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
384 int HiddenArgNumBytes =
385 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
386
387 if (!HiddenArgNumBytes)
388 return;
389
390 auto &DL = Func.getParent()->getDataLayout();
391 auto Int64Ty = Type::getInt64Ty(Func.getContext());
392
393 if (HiddenArgNumBytes >= 8)
394 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395 if (HiddenArgNumBytes >= 16)
396 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397 if (HiddenArgNumBytes >= 24)
398 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
399
400 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
401 AMDGPUAS::GLOBAL_ADDRESS);
402
403 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
404 // "none" argument.
405 if (HiddenArgNumBytes >= 32) {
406 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
408 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
409 // The printf runtime binding pass should have ensured that hostcall and
410 // printf are not used in the same module.
411 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
413 } else
414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
415 }
416
417 // Emit "default queue" and "completion action" arguments if enqueue kernel is
418 // used, otherwise emit dummy "none" arguments.
419 if (HiddenArgNumBytes >= 48) {
420 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
421 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
423 } else {
424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
425 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
426 }
427 }
428
429 // Emit the pointer argument for multi-grid object.
430 if (HiddenArgNumBytes >= 56)
431 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
432 }
433
emitTo(AMDGPUTargetStreamer & TargetStreamer)434 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
435 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
436 }
437
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)438 void MetadataStreamerV2::begin(const Module &Mod,
439 const IsaInfo::AMDGPUTargetID &TargetID) {
440 emitVersion();
441 emitPrintf(Mod);
442 }
443
end()444 void MetadataStreamerV2::end() {
445 std::string HSAMetadataString;
446 if (toString(HSAMetadata, HSAMetadataString))
447 return;
448
449 if (DumpHSAMetadata)
450 dump(HSAMetadataString);
451 if (VerifyHSAMetadata)
452 verify(HSAMetadataString);
453 }
454
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)455 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
456 const SIProgramInfo &ProgramInfo) {
457 auto &Func = MF.getFunction();
458 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
459 return;
460
461 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
462 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
463
464 HSAMetadata.mKernels.push_back(Kernel::Metadata());
465 auto &Kernel = HSAMetadata.mKernels.back();
466
467 Kernel.mName = std::string(Func.getName());
468 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
469 emitKernelLanguage(Func);
470 emitKernelAttrs(Func);
471 emitKernelArgs(Func);
472 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
473 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
474 }
475
476 //===----------------------------------------------------------------------===//
477 // HSAMetadataStreamerV3
478 //===----------------------------------------------------------------------===//
479
dump(StringRef HSAMetadataString) const480 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
481 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
482 }
483
verify(StringRef HSAMetadataString) const484 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
485 errs() << "AMDGPU HSA Metadata Parser Test: ";
486
487 msgpack::Document FromHSAMetadataString;
488
489 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
490 errs() << "FAIL\n";
491 return;
492 }
493
494 std::string ToHSAMetadataString;
495 raw_string_ostream StrOS(ToHSAMetadataString);
496 FromHSAMetadataString.toYAML(StrOS);
497
498 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
499 if (HSAMetadataString != ToHSAMetadataString) {
500 errs() << "Original input: " << HSAMetadataString << '\n'
501 << "Produced output: " << StrOS.str() << '\n';
502 }
503 }
504
505 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const506 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
507 return StringSwitch<Optional<StringRef>>(AccQual)
508 .Case("read_only", StringRef("read_only"))
509 .Case("write_only", StringRef("write_only"))
510 .Case("read_write", StringRef("read_write"))
511 .Default(None);
512 }
513
514 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const515 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
516 switch (AddressSpace) {
517 case AMDGPUAS::PRIVATE_ADDRESS:
518 return StringRef("private");
519 case AMDGPUAS::GLOBAL_ADDRESS:
520 return StringRef("global");
521 case AMDGPUAS::CONSTANT_ADDRESS:
522 return StringRef("constant");
523 case AMDGPUAS::LOCAL_ADDRESS:
524 return StringRef("local");
525 case AMDGPUAS::FLAT_ADDRESS:
526 return StringRef("generic");
527 case AMDGPUAS::REGION_ADDRESS:
528 return StringRef("region");
529 default:
530 return None;
531 }
532 }
533
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const534 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
535 StringRef BaseTypeName) const {
536 if (TypeQual.find("pipe") != StringRef::npos)
537 return "pipe";
538
539 return StringSwitch<StringRef>(BaseTypeName)
540 .Case("image1d_t", "image")
541 .Case("image1d_array_t", "image")
542 .Case("image1d_buffer_t", "image")
543 .Case("image2d_t", "image")
544 .Case("image2d_array_t", "image")
545 .Case("image2d_array_depth_t", "image")
546 .Case("image2d_array_msaa_t", "image")
547 .Case("image2d_array_msaa_depth_t", "image")
548 .Case("image2d_depth_t", "image")
549 .Case("image2d_msaa_t", "image")
550 .Case("image2d_msaa_depth_t", "image")
551 .Case("image3d_t", "image")
552 .Case("sampler_t", "sampler")
553 .Case("queue_t", "queue")
554 .Default(isa<PointerType>(Ty)
555 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
556 ? "dynamic_shared_pointer"
557 : "global_buffer")
558 : "by_value");
559 }
560
getTypeName(Type * Ty,bool Signed) const561 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
562 switch (Ty->getTypeID()) {
563 case Type::IntegerTyID: {
564 if (!Signed)
565 return (Twine('u') + getTypeName(Ty, true)).str();
566
567 auto BitWidth = Ty->getIntegerBitWidth();
568 switch (BitWidth) {
569 case 8:
570 return "char";
571 case 16:
572 return "short";
573 case 32:
574 return "int";
575 case 64:
576 return "long";
577 default:
578 return (Twine('i') + Twine(BitWidth)).str();
579 }
580 }
581 case Type::HalfTyID:
582 return "half";
583 case Type::FloatTyID:
584 return "float";
585 case Type::DoubleTyID:
586 return "double";
587 case Type::FixedVectorTyID: {
588 auto VecTy = cast<FixedVectorType>(Ty);
589 auto ElTy = VecTy->getElementType();
590 auto NumElements = VecTy->getNumElements();
591 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
592 }
593 default:
594 return "unknown";
595 }
596 }
597
598 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const599 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
600 auto Dims = HSAMetadataDoc->getArrayNode();
601 if (Node->getNumOperands() != 3)
602 return Dims;
603
604 for (auto &Op : Node->operands())
605 Dims.push_back(Dims.getDocument()->getNode(
606 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
607 return Dims;
608 }
609
emitVersion()610 void MetadataStreamerV3::emitVersion() {
611 auto Version = HSAMetadataDoc->getArrayNode();
612 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
613 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
614 getRootMetadata("amdhsa.version") = Version;
615 }
616
emitPrintf(const Module & Mod)617 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
618 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
619 if (!Node)
620 return;
621
622 auto Printf = HSAMetadataDoc->getArrayNode();
623 for (auto Op : Node->operands())
624 if (Op->getNumOperands())
625 Printf.push_back(Printf.getDocument()->getNode(
626 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
627 getRootMetadata("amdhsa.printf") = Printf;
628 }
629
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)630 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
631 msgpack::MapDocNode Kern) {
632 // TODO: What about other languages?
633 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
634 if (!Node || !Node->getNumOperands())
635 return;
636 auto Op0 = Node->getOperand(0);
637 if (Op0->getNumOperands() <= 1)
638 return;
639
640 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
641 auto LanguageVersion = Kern.getDocument()->getArrayNode();
642 LanguageVersion.push_back(Kern.getDocument()->getNode(
643 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
644 LanguageVersion.push_back(Kern.getDocument()->getNode(
645 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
646 Kern[".language_version"] = LanguageVersion;
647 }
648
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)649 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
650 msgpack::MapDocNode Kern) {
651
652 if (auto Node = Func.getMetadata("reqd_work_group_size"))
653 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
654 if (auto Node = Func.getMetadata("work_group_size_hint"))
655 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
656 if (auto Node = Func.getMetadata("vec_type_hint")) {
657 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
658 getTypeName(
659 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
660 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
661 /*Copy=*/true);
662 }
663 if (Func.hasFnAttribute("runtime-handle")) {
664 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
665 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
666 /*Copy=*/true);
667 }
668 }
669
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)670 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
671 msgpack::MapDocNode Kern) {
672 unsigned Offset = 0;
673 auto Args = HSAMetadataDoc->getArrayNode();
674 for (auto &Arg : Func.args())
675 emitKernelArg(Arg, Offset, Args);
676
677 emitHiddenKernelArgs(Func, Offset, Args);
678
679 Kern[".args"] = Args;
680 }
681
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)682 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
683 msgpack::ArrayDocNode Args) {
684 auto Func = Arg.getParent();
685 auto ArgNo = Arg.getArgNo();
686 const MDNode *Node;
687
688 StringRef Name;
689 Node = Func->getMetadata("kernel_arg_name");
690 if (Node && ArgNo < Node->getNumOperands())
691 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
692 else if (Arg.hasName())
693 Name = Arg.getName();
694
695 StringRef TypeName;
696 Node = Func->getMetadata("kernel_arg_type");
697 if (Node && ArgNo < Node->getNumOperands())
698 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
699
700 StringRef BaseTypeName;
701 Node = Func->getMetadata("kernel_arg_base_type");
702 if (Node && ArgNo < Node->getNumOperands())
703 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
704
705 StringRef AccQual;
706 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
707 Arg.hasNoAliasAttr()) {
708 AccQual = "read_only";
709 } else {
710 Node = Func->getMetadata("kernel_arg_access_qual");
711 if (Node && ArgNo < Node->getNumOperands())
712 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
713 }
714
715 StringRef TypeQual;
716 Node = Func->getMetadata("kernel_arg_type_qual");
717 if (Node && ArgNo < Node->getNumOperands())
718 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
719
720 const DataLayout &DL = Func->getParent()->getDataLayout();
721
722 MaybeAlign PointeeAlign;
723 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
724
725 // FIXME: Need to distinguish in memory alignment from pointer alignment.
726 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
727 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
728 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
729 PtrTy->getElementType());
730 }
731 }
732
733 // There's no distinction between byval aggregates and raw aggregates.
734 Type *ArgTy;
735 Align ArgAlign;
736 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
737
738 emitKernelArg(DL, ArgTy, ArgAlign,
739 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
740 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
741 }
742
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)743 void MetadataStreamerV3::emitKernelArg(
744 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
745 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
746 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
747 StringRef AccQual, StringRef TypeQual) {
748 auto Arg = Args.getDocument()->getMapNode();
749
750 if (!Name.empty())
751 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
752 if (!TypeName.empty())
753 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
754 auto Size = DL.getTypeAllocSize(Ty);
755 Arg[".size"] = Arg.getDocument()->getNode(Size);
756 Offset = alignTo(Offset, Alignment);
757 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
758 Offset += Size;
759 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
760 if (PointeeAlign)
761 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
762
763 if (auto PtrTy = dyn_cast<PointerType>(Ty))
764 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
765 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
766
767 if (auto AQ = getAccessQualifier(AccQual))
768 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
769
770 // TODO: Emit Arg[".actual_access"].
771
772 SmallVector<StringRef, 1> SplitTypeQuals;
773 TypeQual.split(SplitTypeQuals, " ", -1, false);
774 for (StringRef Key : SplitTypeQuals) {
775 if (Key == "const")
776 Arg[".is_const"] = Arg.getDocument()->getNode(true);
777 else if (Key == "restrict")
778 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
779 else if (Key == "volatile")
780 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
781 else if (Key == "pipe")
782 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
783 }
784
785 Args.push_back(Arg);
786 }
787
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)788 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
789 unsigned &Offset,
790 msgpack::ArrayDocNode Args) {
791 int HiddenArgNumBytes =
792 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
793
794 if (!HiddenArgNumBytes)
795 return;
796
797 auto &DL = Func.getParent()->getDataLayout();
798 auto Int64Ty = Type::getInt64Ty(Func.getContext());
799
800 if (HiddenArgNumBytes >= 8)
801 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
802 Args);
803 if (HiddenArgNumBytes >= 16)
804 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
805 Args);
806 if (HiddenArgNumBytes >= 24)
807 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
808 Args);
809
810 auto Int8PtrTy =
811 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
812
813 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
814 // "none" argument.
815 if (HiddenArgNumBytes >= 32) {
816 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
817 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
818 Args);
819 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
820 // The printf runtime binding pass should have ensured that hostcall and
821 // printf are not used in the same module.
822 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
823 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
824 Args);
825 } else
826 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
827 }
828
829 // Emit "default queue" and "completion action" arguments if enqueue kernel is
830 // used, otherwise emit dummy "none" arguments.
831 if (HiddenArgNumBytes >= 48) {
832 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
833 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
834 Args);
835 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
836 Args);
837 } else {
838 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
840 }
841 }
842
843 // Emit the pointer argument for multi-grid object.
844 if (HiddenArgNumBytes >= 56)
845 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
846 Args);
847 }
848
849 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const850 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
851 const SIProgramInfo &ProgramInfo) const {
852 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
853 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
854 const Function &F = MF.getFunction();
855
856 auto Kern = HSAMetadataDoc->getMapNode();
857
858 Align MaxKernArgAlign;
859 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
860 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
861 Kern[".group_segment_fixed_size"] =
862 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
863 Kern[".private_segment_fixed_size"] =
864 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
865 Kern[".kernarg_segment_align"] =
866 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
867 Kern[".wavefront_size"] =
868 Kern.getDocument()->getNode(STM.getWavefrontSize());
869 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
870 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
871 Kern[".max_flat_workgroup_size"] =
872 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
873 Kern[".sgpr_spill_count"] =
874 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
875 Kern[".vgpr_spill_count"] =
876 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
877
878 return Kern;
879 }
880
emitTo(AMDGPUTargetStreamer & TargetStreamer)881 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
882 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
883 }
884
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)885 void MetadataStreamerV3::begin(const Module &Mod,
886 const IsaInfo::AMDGPUTargetID &TargetID) {
887 emitVersion();
888 emitPrintf(Mod);
889 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
890 }
891
end()892 void MetadataStreamerV3::end() {
893 std::string HSAMetadataString;
894 raw_string_ostream StrOS(HSAMetadataString);
895 HSAMetadataDoc->toYAML(StrOS);
896
897 if (DumpHSAMetadata)
898 dump(StrOS.str());
899 if (VerifyHSAMetadata)
900 verify(StrOS.str());
901 }
902
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)903 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
904 const SIProgramInfo &ProgramInfo) {
905 auto &Func = MF.getFunction();
906 auto Kern = getHSAKernelProps(MF, ProgramInfo);
907
908 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
909 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
910
911 auto Kernels =
912 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
913
914 {
915 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
916 Kern[".symbol"] = Kern.getDocument()->getNode(
917 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
918 emitKernelLanguage(Func, Kern);
919 emitKernelAttrs(Func, Kern);
920 emitKernelArgs(Func, Kern);
921 }
922
923 Kernels.push_back(Kern);
924 }
925
926 //===----------------------------------------------------------------------===//
927 // HSAMetadataStreamerV4
928 //===----------------------------------------------------------------------===//
929
emitVersion()930 void MetadataStreamerV4::emitVersion() {
931 auto Version = HSAMetadataDoc->getArrayNode();
932 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
933 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
934 getRootMetadata("amdhsa.version") = Version;
935 }
936
emitTargetID(const IsaInfo::AMDGPUTargetID & TargetID)937 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
938 getRootMetadata("amdhsa.target") =
939 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
940 }
941
begin(const Module & Mod,const IsaInfo::AMDGPUTargetID & TargetID)942 void MetadataStreamerV4::begin(const Module &Mod,
943 const IsaInfo::AMDGPUTargetID &TargetID) {
944 emitVersion();
945 emitTargetID(TargetID);
946 emitPrintf(Mod);
947 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
948 }
949
950 } // end namespace HSAMD
951 } // end namespace AMDGPU
952 } // end namespace llvm
953