1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 using namespace llvm;
28
getArgumentTypeAlign(const Argument & Arg,const DataLayout & DL)29 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
30 const DataLayout &DL) {
31 Type *Ty = Arg.getType();
32 MaybeAlign ArgAlign;
33 if (Arg.hasByRefAttr()) {
34 Ty = Arg.getParamByRefType();
35 ArgAlign = Arg.getParamAlign();
36 }
37
38 if (!ArgAlign)
39 ArgAlign = DL.getABITypeAlign(Ty);
40
41 return std::make_pair(Ty, *ArgAlign);
42 }
43
44 namespace llvm {
45
46 static cl::opt<bool> DumpHSAMetadata(
47 "amdgpu-dump-hsa-metadata",
48 cl::desc("Dump AMDGPU HSA Metadata"));
49 static cl::opt<bool> VerifyHSAMetadata(
50 "amdgpu-verify-hsa-metadata",
51 cl::desc("Verify AMDGPU HSA Metadata"));
52
53 namespace AMDGPU {
54 namespace HSAMD {
55
56 //===----------------------------------------------------------------------===//
57 // HSAMetadataStreamerV2
58 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const59 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
60 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
61 }
62
verify(StringRef HSAMetadataString) const63 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
64 errs() << "AMDGPU HSA Metadata Parser Test: ";
65
66 HSAMD::Metadata FromHSAMetadataString;
67 if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
68 errs() << "FAIL\n";
69 return;
70 }
71
72 std::string ToHSAMetadataString;
73 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
74 errs() << "FAIL\n";
75 return;
76 }
77
78 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
79 << '\n';
80 if (HSAMetadataString != ToHSAMetadataString) {
81 errs() << "Original input: " << HSAMetadataString << '\n'
82 << "Produced output: " << ToHSAMetadataString << '\n';
83 }
84 }
85
86 AccessQualifier
getAccessQualifier(StringRef AccQual) const87 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
88 if (AccQual.empty())
89 return AccessQualifier::Unknown;
90
91 return StringSwitch<AccessQualifier>(AccQual)
92 .Case("read_only", AccessQualifier::ReadOnly)
93 .Case("write_only", AccessQualifier::WriteOnly)
94 .Case("read_write", AccessQualifier::ReadWrite)
95 .Default(AccessQualifier::Default);
96 }
97
98 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const99 MetadataStreamerV2::getAddressSpaceQualifier(
100 unsigned AddressSpace) const {
101 switch (AddressSpace) {
102 case AMDGPUAS::PRIVATE_ADDRESS:
103 return AddressSpaceQualifier::Private;
104 case AMDGPUAS::GLOBAL_ADDRESS:
105 return AddressSpaceQualifier::Global;
106 case AMDGPUAS::CONSTANT_ADDRESS:
107 return AddressSpaceQualifier::Constant;
108 case AMDGPUAS::LOCAL_ADDRESS:
109 return AddressSpaceQualifier::Local;
110 case AMDGPUAS::FLAT_ADDRESS:
111 return AddressSpaceQualifier::Generic;
112 case AMDGPUAS::REGION_ADDRESS:
113 return AddressSpaceQualifier::Region;
114 default:
115 return AddressSpaceQualifier::Unknown;
116 }
117 }
118
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const119 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
120 StringRef BaseTypeName) const {
121 if (TypeQual.find("pipe") != StringRef::npos)
122 return ValueKind::Pipe;
123
124 return StringSwitch<ValueKind>(BaseTypeName)
125 .Case("image1d_t", ValueKind::Image)
126 .Case("image1d_array_t", ValueKind::Image)
127 .Case("image1d_buffer_t", ValueKind::Image)
128 .Case("image2d_t", ValueKind::Image)
129 .Case("image2d_array_t", ValueKind::Image)
130 .Case("image2d_array_depth_t", ValueKind::Image)
131 .Case("image2d_array_msaa_t", ValueKind::Image)
132 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
133 .Case("image2d_depth_t", ValueKind::Image)
134 .Case("image2d_msaa_t", ValueKind::Image)
135 .Case("image2d_msaa_depth_t", ValueKind::Image)
136 .Case("image3d_t", ValueKind::Image)
137 .Case("sampler_t", ValueKind::Sampler)
138 .Case("queue_t", ValueKind::Queue)
139 .Default(isa<PointerType>(Ty) ?
140 (Ty->getPointerAddressSpace() ==
141 AMDGPUAS::LOCAL_ADDRESS ?
142 ValueKind::DynamicSharedPointer :
143 ValueKind::GlobalBuffer) :
144 ValueKind::ByValue);
145 }
146
getTypeName(Type * Ty,bool Signed) const147 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
148 switch (Ty->getTypeID()) {
149 case Type::IntegerTyID: {
150 if (!Signed)
151 return (Twine('u') + getTypeName(Ty, true)).str();
152
153 auto BitWidth = Ty->getIntegerBitWidth();
154 switch (BitWidth) {
155 case 8:
156 return "char";
157 case 16:
158 return "short";
159 case 32:
160 return "int";
161 case 64:
162 return "long";
163 default:
164 return (Twine('i') + Twine(BitWidth)).str();
165 }
166 }
167 case Type::HalfTyID:
168 return "half";
169 case Type::FloatTyID:
170 return "float";
171 case Type::DoubleTyID:
172 return "double";
173 case Type::FixedVectorTyID: {
174 auto VecTy = cast<FixedVectorType>(Ty);
175 auto ElTy = VecTy->getElementType();
176 auto NumElements = VecTy->getNumElements();
177 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
178 }
179 default:
180 return "unknown";
181 }
182 }
183
184 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const185 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
186 std::vector<uint32_t> Dims;
187 if (Node->getNumOperands() != 3)
188 return Dims;
189
190 for (auto &Op : Node->operands())
191 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
192 return Dims;
193 }
194
195 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const196 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
197 const SIProgramInfo &ProgramInfo) const {
198 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
199 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
200 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
201 const Function &F = MF.getFunction();
202
203 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
204 F.getCallingConv() == CallingConv::SPIR_KERNEL);
205
206 Align MaxKernArgAlign;
207 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
208 MaxKernArgAlign);
209 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
210 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
211 HSACodeProps.mKernargSegmentAlign =
212 std::max(MaxKernArgAlign, Align(4)).value();
213 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
214 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
215 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
216 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
217 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
218 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
219 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
220 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
221
222 return HSACodeProps;
223 }
224
225 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const226 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
227 const SIProgramInfo &ProgramInfo) const {
228 return HSAMD::Kernel::DebugProps::Metadata();
229 }
230
emitVersion()231 void MetadataStreamerV2::emitVersion() {
232 auto &Version = HSAMetadata.mVersion;
233
234 Version.push_back(VersionMajor);
235 Version.push_back(VersionMinor);
236 }
237
emitPrintf(const Module & Mod)238 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
239 auto &Printf = HSAMetadata.mPrintf;
240
241 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
242 if (!Node)
243 return;
244
245 for (auto Op : Node->operands())
246 if (Op->getNumOperands())
247 Printf.push_back(
248 std::string(cast<MDString>(Op->getOperand(0))->getString()));
249 }
250
emitKernelLanguage(const Function & Func)251 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
252 auto &Kernel = HSAMetadata.mKernels.back();
253
254 // TODO: What about other languages?
255 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
256 if (!Node || !Node->getNumOperands())
257 return;
258 auto Op0 = Node->getOperand(0);
259 if (Op0->getNumOperands() <= 1)
260 return;
261
262 Kernel.mLanguage = "OpenCL C";
263 Kernel.mLanguageVersion.push_back(
264 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
265 Kernel.mLanguageVersion.push_back(
266 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
267 }
268
emitKernelAttrs(const Function & Func)269 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
270 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
271
272 if (auto Node = Func.getMetadata("reqd_work_group_size"))
273 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
274 if (auto Node = Func.getMetadata("work_group_size_hint"))
275 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
276 if (auto Node = Func.getMetadata("vec_type_hint")) {
277 Attrs.mVecTypeHint = getTypeName(
278 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
279 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
280 }
281 if (Func.hasFnAttribute("runtime-handle")) {
282 Attrs.mRuntimeHandle =
283 Func.getFnAttribute("runtime-handle").getValueAsString().str();
284 }
285 }
286
emitKernelArgs(const Function & Func)287 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
288 for (auto &Arg : Func.args())
289 emitKernelArg(Arg);
290
291 emitHiddenKernelArgs(Func);
292 }
293
emitKernelArg(const Argument & Arg)294 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
295 auto Func = Arg.getParent();
296 auto ArgNo = Arg.getArgNo();
297 const MDNode *Node;
298
299 StringRef Name;
300 Node = Func->getMetadata("kernel_arg_name");
301 if (Node && ArgNo < Node->getNumOperands())
302 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
303 else if (Arg.hasName())
304 Name = Arg.getName();
305
306 StringRef TypeName;
307 Node = Func->getMetadata("kernel_arg_type");
308 if (Node && ArgNo < Node->getNumOperands())
309 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
310
311 StringRef BaseTypeName;
312 Node = Func->getMetadata("kernel_arg_base_type");
313 if (Node && ArgNo < Node->getNumOperands())
314 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
315
316 StringRef AccQual;
317 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
318 Arg.hasNoAliasAttr()) {
319 AccQual = "read_only";
320 } else {
321 Node = Func->getMetadata("kernel_arg_access_qual");
322 if (Node && ArgNo < Node->getNumOperands())
323 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
324 }
325
326 StringRef TypeQual;
327 Node = Func->getMetadata("kernel_arg_type_qual");
328 if (Node && ArgNo < Node->getNumOperands())
329 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
330
331 const DataLayout &DL = Func->getParent()->getDataLayout();
332
333 MaybeAlign PointeeAlign;
334 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
335 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
336 // FIXME: Should report this for all address spaces
337 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
338 PtrTy->getElementType());
339 }
340 }
341
342 Type *ArgTy;
343 Align ArgAlign;
344 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
345
346 emitKernelArg(DL, ArgTy, ArgAlign,
347 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
348 TypeName, BaseTypeName, AccQual, TypeQual);
349 }
350
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,ValueKind ValueKind,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)351 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
352 Align Alignment, ValueKind ValueKind,
353 MaybeAlign PointeeAlign, StringRef Name,
354 StringRef TypeName,
355 StringRef BaseTypeName,
356 StringRef AccQual, StringRef TypeQual) {
357 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
358 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
359
360 Arg.mName = std::string(Name);
361 Arg.mTypeName = std::string(TypeName);
362 Arg.mSize = DL.getTypeAllocSize(Ty);
363 Arg.mAlign = Alignment.value();
364 Arg.mValueKind = ValueKind;
365 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
366
367 if (auto PtrTy = dyn_cast<PointerType>(Ty))
368 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
369
370 Arg.mAccQual = getAccessQualifier(AccQual);
371
372 // TODO: Emit Arg.mActualAccQual.
373
374 SmallVector<StringRef, 1> SplitTypeQuals;
375 TypeQual.split(SplitTypeQuals, " ", -1, false);
376 for (StringRef Key : SplitTypeQuals) {
377 auto P = StringSwitch<bool*>(Key)
378 .Case("const", &Arg.mIsConst)
379 .Case("restrict", &Arg.mIsRestrict)
380 .Case("volatile", &Arg.mIsVolatile)
381 .Case("pipe", &Arg.mIsPipe)
382 .Default(nullptr);
383 if (P)
384 *P = true;
385 }
386 }
387
emitHiddenKernelArgs(const Function & Func)388 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
389 int HiddenArgNumBytes =
390 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
391
392 if (!HiddenArgNumBytes)
393 return;
394
395 auto &DL = Func.getParent()->getDataLayout();
396 auto Int64Ty = Type::getInt64Ty(Func.getContext());
397
398 if (HiddenArgNumBytes >= 8)
399 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
400 if (HiddenArgNumBytes >= 16)
401 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
402 if (HiddenArgNumBytes >= 24)
403 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
404
405 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
406 AMDGPUAS::GLOBAL_ADDRESS);
407
408 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
409 // "none" argument.
410 if (HiddenArgNumBytes >= 32) {
411 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
413 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
414 // The printf runtime binding pass should have ensured that hostcall and
415 // printf are not used in the same module.
416 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
417 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
418 } else
419 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
420 }
421
422 // Emit "default queue" and "completion action" arguments if enqueue kernel is
423 // used, otherwise emit dummy "none" arguments.
424 if (HiddenArgNumBytes >= 48) {
425 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
426 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
427 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
428 } else {
429 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
430 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
431 }
432 }
433
434 // Emit the pointer argument for multi-grid object.
435 if (HiddenArgNumBytes >= 56)
436 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
437 }
438
emitTo(AMDGPUTargetStreamer & TargetStreamer)439 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
440 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
441 }
442
begin(const Module & Mod)443 void MetadataStreamerV2::begin(const Module &Mod) {
444 emitVersion();
445 emitPrintf(Mod);
446 }
447
end()448 void MetadataStreamerV2::end() {
449 std::string HSAMetadataString;
450 if (toString(HSAMetadata, HSAMetadataString))
451 return;
452
453 if (DumpHSAMetadata)
454 dump(HSAMetadataString);
455 if (VerifyHSAMetadata)
456 verify(HSAMetadataString);
457 }
458
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)459 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
460 const SIProgramInfo &ProgramInfo) {
461 auto &Func = MF.getFunction();
462 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
463 return;
464
465 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
466 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
467
468 HSAMetadata.mKernels.push_back(Kernel::Metadata());
469 auto &Kernel = HSAMetadata.mKernels.back();
470
471 Kernel.mName = std::string(Func.getName());
472 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
473 emitKernelLanguage(Func);
474 emitKernelAttrs(Func);
475 emitKernelArgs(Func);
476 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
477 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
478 }
479
480 //===----------------------------------------------------------------------===//
481 // HSAMetadataStreamerV3
482 //===----------------------------------------------------------------------===//
483
dump(StringRef HSAMetadataString) const484 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
485 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
486 }
487
verify(StringRef HSAMetadataString) const488 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
489 errs() << "AMDGPU HSA Metadata Parser Test: ";
490
491 msgpack::Document FromHSAMetadataString;
492
493 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
494 errs() << "FAIL\n";
495 return;
496 }
497
498 std::string ToHSAMetadataString;
499 raw_string_ostream StrOS(ToHSAMetadataString);
500 FromHSAMetadataString.toYAML(StrOS);
501
502 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
503 if (HSAMetadataString != ToHSAMetadataString) {
504 errs() << "Original input: " << HSAMetadataString << '\n'
505 << "Produced output: " << StrOS.str() << '\n';
506 }
507 }
508
509 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const510 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
511 return StringSwitch<Optional<StringRef>>(AccQual)
512 .Case("read_only", StringRef("read_only"))
513 .Case("write_only", StringRef("write_only"))
514 .Case("read_write", StringRef("read_write"))
515 .Default(None);
516 }
517
518 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const519 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
520 switch (AddressSpace) {
521 case AMDGPUAS::PRIVATE_ADDRESS:
522 return StringRef("private");
523 case AMDGPUAS::GLOBAL_ADDRESS:
524 return StringRef("global");
525 case AMDGPUAS::CONSTANT_ADDRESS:
526 return StringRef("constant");
527 case AMDGPUAS::LOCAL_ADDRESS:
528 return StringRef("local");
529 case AMDGPUAS::FLAT_ADDRESS:
530 return StringRef("generic");
531 case AMDGPUAS::REGION_ADDRESS:
532 return StringRef("region");
533 default:
534 return None;
535 }
536 }
537
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const538 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
539 StringRef BaseTypeName) const {
540 if (TypeQual.find("pipe") != StringRef::npos)
541 return "pipe";
542
543 return StringSwitch<StringRef>(BaseTypeName)
544 .Case("image1d_t", "image")
545 .Case("image1d_array_t", "image")
546 .Case("image1d_buffer_t", "image")
547 .Case("image2d_t", "image")
548 .Case("image2d_array_t", "image")
549 .Case("image2d_array_depth_t", "image")
550 .Case("image2d_array_msaa_t", "image")
551 .Case("image2d_array_msaa_depth_t", "image")
552 .Case("image2d_depth_t", "image")
553 .Case("image2d_msaa_t", "image")
554 .Case("image2d_msaa_depth_t", "image")
555 .Case("image3d_t", "image")
556 .Case("sampler_t", "sampler")
557 .Case("queue_t", "queue")
558 .Default(isa<PointerType>(Ty)
559 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
560 ? "dynamic_shared_pointer"
561 : "global_buffer")
562 : "by_value");
563 }
564
getTypeName(Type * Ty,bool Signed) const565 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
566 switch (Ty->getTypeID()) {
567 case Type::IntegerTyID: {
568 if (!Signed)
569 return (Twine('u') + getTypeName(Ty, true)).str();
570
571 auto BitWidth = Ty->getIntegerBitWidth();
572 switch (BitWidth) {
573 case 8:
574 return "char";
575 case 16:
576 return "short";
577 case 32:
578 return "int";
579 case 64:
580 return "long";
581 default:
582 return (Twine('i') + Twine(BitWidth)).str();
583 }
584 }
585 case Type::HalfTyID:
586 return "half";
587 case Type::FloatTyID:
588 return "float";
589 case Type::DoubleTyID:
590 return "double";
591 case Type::FixedVectorTyID: {
592 auto VecTy = cast<FixedVectorType>(Ty);
593 auto ElTy = VecTy->getElementType();
594 auto NumElements = VecTy->getNumElements();
595 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
596 }
597 default:
598 return "unknown";
599 }
600 }
601
602 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const603 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
604 auto Dims = HSAMetadataDoc->getArrayNode();
605 if (Node->getNumOperands() != 3)
606 return Dims;
607
608 for (auto &Op : Node->operands())
609 Dims.push_back(Dims.getDocument()->getNode(
610 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
611 return Dims;
612 }
613
emitVersion()614 void MetadataStreamerV3::emitVersion() {
615 auto Version = HSAMetadataDoc->getArrayNode();
616 Version.push_back(Version.getDocument()->getNode(VersionMajor));
617 Version.push_back(Version.getDocument()->getNode(VersionMinor));
618 getRootMetadata("amdhsa.version") = Version;
619 }
620
emitPrintf(const Module & Mod)621 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
622 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
623 if (!Node)
624 return;
625
626 auto Printf = HSAMetadataDoc->getArrayNode();
627 for (auto Op : Node->operands())
628 if (Op->getNumOperands())
629 Printf.push_back(Printf.getDocument()->getNode(
630 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
631 getRootMetadata("amdhsa.printf") = Printf;
632 }
633
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)634 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
635 msgpack::MapDocNode Kern) {
636 // TODO: What about other languages?
637 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
638 if (!Node || !Node->getNumOperands())
639 return;
640 auto Op0 = Node->getOperand(0);
641 if (Op0->getNumOperands() <= 1)
642 return;
643
644 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
645 auto LanguageVersion = Kern.getDocument()->getArrayNode();
646 LanguageVersion.push_back(Kern.getDocument()->getNode(
647 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
648 LanguageVersion.push_back(Kern.getDocument()->getNode(
649 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
650 Kern[".language_version"] = LanguageVersion;
651 }
652
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)653 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
654 msgpack::MapDocNode Kern) {
655
656 if (auto Node = Func.getMetadata("reqd_work_group_size"))
657 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
658 if (auto Node = Func.getMetadata("work_group_size_hint"))
659 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
660 if (auto Node = Func.getMetadata("vec_type_hint")) {
661 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
662 getTypeName(
663 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
664 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
665 /*Copy=*/true);
666 }
667 if (Func.hasFnAttribute("runtime-handle")) {
668 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
669 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
670 /*Copy=*/true);
671 }
672 }
673
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)674 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
675 msgpack::MapDocNode Kern) {
676 unsigned Offset = 0;
677 auto Args = HSAMetadataDoc->getArrayNode();
678 for (auto &Arg : Func.args())
679 emitKernelArg(Arg, Offset, Args);
680
681 emitHiddenKernelArgs(Func, Offset, Args);
682
683 Kern[".args"] = Args;
684 }
685
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)686 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
687 msgpack::ArrayDocNode Args) {
688 auto Func = Arg.getParent();
689 auto ArgNo = Arg.getArgNo();
690 const MDNode *Node;
691
692 StringRef Name;
693 Node = Func->getMetadata("kernel_arg_name");
694 if (Node && ArgNo < Node->getNumOperands())
695 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
696 else if (Arg.hasName())
697 Name = Arg.getName();
698
699 StringRef TypeName;
700 Node = Func->getMetadata("kernel_arg_type");
701 if (Node && ArgNo < Node->getNumOperands())
702 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
703
704 StringRef BaseTypeName;
705 Node = Func->getMetadata("kernel_arg_base_type");
706 if (Node && ArgNo < Node->getNumOperands())
707 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
708
709 StringRef AccQual;
710 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
711 Arg.hasNoAliasAttr()) {
712 AccQual = "read_only";
713 } else {
714 Node = Func->getMetadata("kernel_arg_access_qual");
715 if (Node && ArgNo < Node->getNumOperands())
716 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
717 }
718
719 StringRef TypeQual;
720 Node = Func->getMetadata("kernel_arg_type_qual");
721 if (Node && ArgNo < Node->getNumOperands())
722 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
723
724 const DataLayout &DL = Func->getParent()->getDataLayout();
725
726 MaybeAlign PointeeAlign;
727 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
728
729 // FIXME: Need to distinguish in memory alignment from pointer alignment.
730 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
731 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
732 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
733 PtrTy->getElementType());
734 }
735 }
736
737 // There's no distinction between byval aggregates and raw aggregates.
738 Type *ArgTy;
739 Align ArgAlign;
740 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
741
742 emitKernelArg(DL, ArgTy, ArgAlign,
743 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
744 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
745 }
746
emitKernelArg(const DataLayout & DL,Type * Ty,Align Alignment,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,MaybeAlign PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)747 void MetadataStreamerV3::emitKernelArg(
748 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
749 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
750 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
751 StringRef AccQual, StringRef TypeQual) {
752 auto Arg = Args.getDocument()->getMapNode();
753
754 if (!Name.empty())
755 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
756 if (!TypeName.empty())
757 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
758 auto Size = DL.getTypeAllocSize(Ty);
759 Arg[".size"] = Arg.getDocument()->getNode(Size);
760 Offset = alignTo(Offset, Alignment);
761 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
762 Offset += Size;
763 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
764 if (PointeeAlign)
765 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
766
767 if (auto PtrTy = dyn_cast<PointerType>(Ty))
768 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
769 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
770
771 if (auto AQ = getAccessQualifier(AccQual))
772 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
773
774 // TODO: Emit Arg[".actual_access"].
775
776 SmallVector<StringRef, 1> SplitTypeQuals;
777 TypeQual.split(SplitTypeQuals, " ", -1, false);
778 for (StringRef Key : SplitTypeQuals) {
779 if (Key == "const")
780 Arg[".is_const"] = Arg.getDocument()->getNode(true);
781 else if (Key == "restrict")
782 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
783 else if (Key == "volatile")
784 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
785 else if (Key == "pipe")
786 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
787 }
788
789 Args.push_back(Arg);
790 }
791
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
793 unsigned &Offset,
794 msgpack::ArrayDocNode Args) {
795 int HiddenArgNumBytes =
796 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
797
798 if (!HiddenArgNumBytes)
799 return;
800
801 auto &DL = Func.getParent()->getDataLayout();
802 auto Int64Ty = Type::getInt64Ty(Func.getContext());
803
804 if (HiddenArgNumBytes >= 8)
805 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
806 Args);
807 if (HiddenArgNumBytes >= 16)
808 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
809 Args);
810 if (HiddenArgNumBytes >= 24)
811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
812 Args);
813
814 auto Int8PtrTy =
815 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
816
817 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
818 // "none" argument.
819 if (HiddenArgNumBytes >= 32) {
820 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
821 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
822 Args);
823 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
824 // The printf runtime binding pass should have ensured that hostcall and
825 // printf are not used in the same module.
826 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
827 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
828 Args);
829 } else
830 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
831 }
832
833 // Emit "default queue" and "completion action" arguments if enqueue kernel is
834 // used, otherwise emit dummy "none" arguments.
835 if (HiddenArgNumBytes >= 48) {
836 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
837 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
838 Args);
839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
840 Args);
841 } else {
842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
844 }
845 }
846
847 // Emit the pointer argument for multi-grid object.
848 if (HiddenArgNumBytes >= 56)
849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
850 Args);
851 }
852
853 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
855 const SIProgramInfo &ProgramInfo) const {
856 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
857 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
858 const Function &F = MF.getFunction();
859
860 auto Kern = HSAMetadataDoc->getMapNode();
861
862 Align MaxKernArgAlign;
863 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
864 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
865 Kern[".group_segment_fixed_size"] =
866 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
867 Kern[".private_segment_fixed_size"] =
868 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
869 Kern[".kernarg_segment_align"] =
870 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
871 Kern[".wavefront_size"] =
872 Kern.getDocument()->getNode(STM.getWavefrontSize());
873 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
874 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
875 Kern[".max_flat_workgroup_size"] =
876 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
877 Kern[".sgpr_spill_count"] =
878 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
879 Kern[".vgpr_spill_count"] =
880 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
881
882 return Kern;
883 }
884
emitTo(AMDGPUTargetStreamer & TargetStreamer)885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
886 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
887 }
888
begin(const Module & Mod)889 void MetadataStreamerV3::begin(const Module &Mod) {
890 emitVersion();
891 emitPrintf(Mod);
892 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
893 }
894
end()895 void MetadataStreamerV3::end() {
896 std::string HSAMetadataString;
897 raw_string_ostream StrOS(HSAMetadataString);
898 HSAMetadataDoc->toYAML(StrOS);
899
900 if (DumpHSAMetadata)
901 dump(StrOS.str());
902 if (VerifyHSAMetadata)
903 verify(StrOS.str());
904 }
905
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)906 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
907 const SIProgramInfo &ProgramInfo) {
908 auto &Func = MF.getFunction();
909 auto Kern = getHSAKernelProps(MF, ProgramInfo);
910
911 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
912 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
913
914 auto Kernels =
915 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
916
917 {
918 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
919 Kern[".symbol"] = Kern.getDocument()->getNode(
920 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
921 emitKernelLanguage(Func, Kern);
922 emitKernelAttrs(Func, Kern);
923 emitKernelArgs(Func, Kern);
924 }
925
926 Kernels.push_back(Kern);
927 }
928
929 } // end namespace HSAMD
930 } // end namespace AMDGPU
931 } // end namespace llvm
932