1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 namespace llvm {
28
29 static cl::opt<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
35
36 namespace AMDGPU {
37 namespace HSAMD {
38
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
dump(StringRef HSAMetadataString) const42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
44 }
45
verify(StringRef HSAMetadataString) const46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
48
49 HSAMD::Metadata FromHSAMetadataString;
50 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51 errs() << "FAIL\n";
52 return;
53 }
54
55 std::string ToHSAMetadataString;
56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57 errs() << "FAIL\n";
58 return;
59 }
60
61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62 << '\n';
63 if (HSAMetadataString != ToHSAMetadataString) {
64 errs() << "Original input: " << HSAMetadataString << '\n'
65 << "Produced output: " << ToHSAMetadataString << '\n';
66 }
67 }
68
69 AccessQualifier
getAccessQualifier(StringRef AccQual) const70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71 if (AccQual.empty())
72 return AccessQualifier::Unknown;
73
74 return StringSwitch<AccessQualifier>(AccQual)
75 .Case("read_only", AccessQualifier::ReadOnly)
76 .Case("write_only", AccessQualifier::WriteOnly)
77 .Case("read_write", AccessQualifier::ReadWrite)
78 .Default(AccessQualifier::Default);
79 }
80
81 AddressSpaceQualifier
getAddressSpaceQualifier(unsigned AddressSpace) const82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace) const {
84 switch (AddressSpace) {
85 case AMDGPUAS::PRIVATE_ADDRESS:
86 return AddressSpaceQualifier::Private;
87 case AMDGPUAS::GLOBAL_ADDRESS:
88 return AddressSpaceQualifier::Global;
89 case AMDGPUAS::CONSTANT_ADDRESS:
90 return AddressSpaceQualifier::Constant;
91 case AMDGPUAS::LOCAL_ADDRESS:
92 return AddressSpaceQualifier::Local;
93 case AMDGPUAS::FLAT_ADDRESS:
94 return AddressSpaceQualifier::Generic;
95 case AMDGPUAS::REGION_ADDRESS:
96 return AddressSpaceQualifier::Region;
97 default:
98 return AddressSpaceQualifier::Unknown;
99 }
100 }
101
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103 StringRef BaseTypeName) const {
104 if (TypeQual.find("pipe") != StringRef::npos)
105 return ValueKind::Pipe;
106
107 return StringSwitch<ValueKind>(BaseTypeName)
108 .Case("image1d_t", ValueKind::Image)
109 .Case("image1d_array_t", ValueKind::Image)
110 .Case("image1d_buffer_t", ValueKind::Image)
111 .Case("image2d_t", ValueKind::Image)
112 .Case("image2d_array_t", ValueKind::Image)
113 .Case("image2d_array_depth_t", ValueKind::Image)
114 .Case("image2d_array_msaa_t", ValueKind::Image)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116 .Case("image2d_depth_t", ValueKind::Image)
117 .Case("image2d_msaa_t", ValueKind::Image)
118 .Case("image2d_msaa_depth_t", ValueKind::Image)
119 .Case("image3d_t", ValueKind::Image)
120 .Case("sampler_t", ValueKind::Sampler)
121 .Case("queue_t", ValueKind::Queue)
122 .Default(isa<PointerType>(Ty) ?
123 (Ty->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS ?
125 ValueKind::DynamicSharedPointer :
126 ValueKind::GlobalBuffer) :
127 ValueKind::ByValue);
128 }
129
getValueType(Type * Ty,StringRef TypeName) const130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131 switch (Ty->getTypeID()) {
132 case Type::IntegerTyID: {
133 auto Signed = !TypeName.startswith("u");
134 switch (Ty->getIntegerBitWidth()) {
135 case 8:
136 return Signed ? ValueType::I8 : ValueType::U8;
137 case 16:
138 return Signed ? ValueType::I16 : ValueType::U16;
139 case 32:
140 return Signed ? ValueType::I32 : ValueType::U32;
141 case 64:
142 return Signed ? ValueType::I64 : ValueType::U64;
143 default:
144 return ValueType::Struct;
145 }
146 }
147 case Type::HalfTyID:
148 return ValueType::F16;
149 case Type::FloatTyID:
150 return ValueType::F32;
151 case Type::DoubleTyID:
152 return ValueType::F64;
153 case Type::PointerTyID:
154 return getValueType(Ty->getPointerElementType(), TypeName);
155 case Type::VectorTyID:
156 return getValueType(Ty->getVectorElementType(), TypeName);
157 default:
158 return ValueType::Struct;
159 }
160 }
161
getTypeName(Type * Ty,bool Signed) const162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163 switch (Ty->getTypeID()) {
164 case Type::IntegerTyID: {
165 if (!Signed)
166 return (Twine('u') + getTypeName(Ty, true)).str();
167
168 auto BitWidth = Ty->getIntegerBitWidth();
169 switch (BitWidth) {
170 case 8:
171 return "char";
172 case 16:
173 return "short";
174 case 32:
175 return "int";
176 case 64:
177 return "long";
178 default:
179 return (Twine('i') + Twine(BitWidth)).str();
180 }
181 }
182 case Type::HalfTyID:
183 return "half";
184 case Type::FloatTyID:
185 return "float";
186 case Type::DoubleTyID:
187 return "double";
188 case Type::VectorTyID: {
189 auto VecTy = cast<VectorType>(Ty);
190 auto ElTy = VecTy->getElementType();
191 auto NumElements = VecTy->getVectorNumElements();
192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193 }
194 default:
195 return "unknown";
196 }
197 }
198
199 std::vector<uint32_t>
getWorkGroupDimensions(MDNode * Node) const200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201 std::vector<uint32_t> Dims;
202 if (Node->getNumOperands() != 3)
203 return Dims;
204
205 for (auto &Op : Node->operands())
206 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207 return Dims;
208 }
209
210 Kernel::CodeProps::Metadata
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212 const SIProgramInfo &ProgramInfo) const {
213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216 const Function &F = MF.getFunction();
217
218 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219 F.getCallingConv() == CallingConv::SPIR_KERNEL);
220
221 Align MaxKernArgAlign;
222 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223 MaxKernArgAlign);
224 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226 HSACodeProps.mKernargSegmentAlign =
227 std::max(MaxKernArgAlign, Align(4)).value();
228 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236
237 return HSACodeProps;
238 }
239
240 Kernel::DebugProps::Metadata
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242 const SIProgramInfo &ProgramInfo) const {
243 return HSAMD::Kernel::DebugProps::Metadata();
244 }
245
emitVersion()246 void MetadataStreamerV2::emitVersion() {
247 auto &Version = HSAMetadata.mVersion;
248
249 Version.push_back(VersionMajor);
250 Version.push_back(VersionMinor);
251 }
252
emitPrintf(const Module & Mod)253 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
254 auto &Printf = HSAMetadata.mPrintf;
255
256 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
257 if (!Node)
258 return;
259
260 for (auto Op : Node->operands())
261 if (Op->getNumOperands())
262 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
263 }
264
emitKernelLanguage(const Function & Func)265 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
266 auto &Kernel = HSAMetadata.mKernels.back();
267
268 // TODO: What about other languages?
269 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
270 if (!Node || !Node->getNumOperands())
271 return;
272 auto Op0 = Node->getOperand(0);
273 if (Op0->getNumOperands() <= 1)
274 return;
275
276 Kernel.mLanguage = "OpenCL C";
277 Kernel.mLanguageVersion.push_back(
278 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
279 Kernel.mLanguageVersion.push_back(
280 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
281 }
282
emitKernelAttrs(const Function & Func)283 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
284 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
285
286 if (auto Node = Func.getMetadata("reqd_work_group_size"))
287 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
288 if (auto Node = Func.getMetadata("work_group_size_hint"))
289 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
290 if (auto Node = Func.getMetadata("vec_type_hint")) {
291 Attrs.mVecTypeHint = getTypeName(
292 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
293 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
294 }
295 if (Func.hasFnAttribute("runtime-handle")) {
296 Attrs.mRuntimeHandle =
297 Func.getFnAttribute("runtime-handle").getValueAsString().str();
298 }
299 }
300
emitKernelArgs(const Function & Func)301 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
302 for (auto &Arg : Func.args())
303 emitKernelArg(Arg);
304
305 emitHiddenKernelArgs(Func);
306 }
307
emitKernelArg(const Argument & Arg)308 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
309 auto Func = Arg.getParent();
310 auto ArgNo = Arg.getArgNo();
311 const MDNode *Node;
312
313 StringRef Name;
314 Node = Func->getMetadata("kernel_arg_name");
315 if (Node && ArgNo < Node->getNumOperands())
316 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
317 else if (Arg.hasName())
318 Name = Arg.getName();
319
320 StringRef TypeName;
321 Node = Func->getMetadata("kernel_arg_type");
322 if (Node && ArgNo < Node->getNumOperands())
323 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
324
325 StringRef BaseTypeName;
326 Node = Func->getMetadata("kernel_arg_base_type");
327 if (Node && ArgNo < Node->getNumOperands())
328 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
329
330 StringRef AccQual;
331 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
332 Arg.hasNoAliasAttr()) {
333 AccQual = "read_only";
334 } else {
335 Node = Func->getMetadata("kernel_arg_access_qual");
336 if (Node && ArgNo < Node->getNumOperands())
337 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
338 }
339
340 StringRef TypeQual;
341 Node = Func->getMetadata("kernel_arg_type_qual");
342 if (Node && ArgNo < Node->getNumOperands())
343 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
344
345 Type *Ty = Arg.getType();
346 const DataLayout &DL = Func->getParent()->getDataLayout();
347
348 unsigned PointeeAlign = 0;
349 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
350 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
351 PointeeAlign = Arg.getParamAlignment();
352 if (PointeeAlign == 0)
353 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
354 }
355 }
356
357 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
358 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
359 }
360
emitKernelArg(const DataLayout & DL,Type * Ty,ValueKind ValueKind,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)361 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
362 ValueKind ValueKind,
363 unsigned PointeeAlign, StringRef Name,
364 StringRef TypeName,
365 StringRef BaseTypeName,
366 StringRef AccQual, StringRef TypeQual) {
367 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
368 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
369
370 Arg.mName = Name;
371 Arg.mTypeName = TypeName;
372 Arg.mSize = DL.getTypeAllocSize(Ty);
373 Arg.mAlign = DL.getABITypeAlignment(Ty);
374 Arg.mValueKind = ValueKind;
375 Arg.mValueType = getValueType(Ty, BaseTypeName);
376 Arg.mPointeeAlign = PointeeAlign;
377
378 if (auto PtrTy = dyn_cast<PointerType>(Ty))
379 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
380
381 Arg.mAccQual = getAccessQualifier(AccQual);
382
383 // TODO: Emit Arg.mActualAccQual.
384
385 SmallVector<StringRef, 1> SplitTypeQuals;
386 TypeQual.split(SplitTypeQuals, " ", -1, false);
387 for (StringRef Key : SplitTypeQuals) {
388 auto P = StringSwitch<bool*>(Key)
389 .Case("const", &Arg.mIsConst)
390 .Case("restrict", &Arg.mIsRestrict)
391 .Case("volatile", &Arg.mIsVolatile)
392 .Case("pipe", &Arg.mIsPipe)
393 .Default(nullptr);
394 if (P)
395 *P = true;
396 }
397 }
398
emitHiddenKernelArgs(const Function & Func)399 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
400 int HiddenArgNumBytes =
401 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
402
403 if (!HiddenArgNumBytes)
404 return;
405
406 auto &DL = Func.getParent()->getDataLayout();
407 auto Int64Ty = Type::getInt64Ty(Func.getContext());
408
409 if (HiddenArgNumBytes >= 8)
410 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
411 if (HiddenArgNumBytes >= 16)
412 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
413 if (HiddenArgNumBytes >= 24)
414 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
415
416 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
417 AMDGPUAS::GLOBAL_ADDRESS);
418
419 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
420 // "none" argument.
421 if (HiddenArgNumBytes >= 32) {
422 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
423 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
424 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
425 // The printf runtime binding pass should have ensured that hostcall and
426 // printf are not used in the same module.
427 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
428 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
429 } else
430 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
431 }
432
433 // Emit "default queue" and "completion action" arguments if enqueue kernel is
434 // used, otherwise emit dummy "none" arguments.
435 if (HiddenArgNumBytes >= 48) {
436 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
437 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
438 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
439 } else {
440 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
442 }
443 }
444
445 // Emit the pointer argument for multi-grid object.
446 if (HiddenArgNumBytes >= 56)
447 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
448 }
449
emitTo(AMDGPUTargetStreamer & TargetStreamer)450 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
451 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
452 }
453
begin(const Module & Mod)454 void MetadataStreamerV2::begin(const Module &Mod) {
455 emitVersion();
456 emitPrintf(Mod);
457 }
458
end()459 void MetadataStreamerV2::end() {
460 std::string HSAMetadataString;
461 if (toString(HSAMetadata, HSAMetadataString))
462 return;
463
464 if (DumpHSAMetadata)
465 dump(HSAMetadataString);
466 if (VerifyHSAMetadata)
467 verify(HSAMetadataString);
468 }
469
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)470 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
471 const SIProgramInfo &ProgramInfo) {
472 auto &Func = MF.getFunction();
473 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
474 return;
475
476 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
477 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
478
479 HSAMetadata.mKernels.push_back(Kernel::Metadata());
480 auto &Kernel = HSAMetadata.mKernels.back();
481
482 Kernel.mName = Func.getName();
483 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
484 emitKernelLanguage(Func);
485 emitKernelAttrs(Func);
486 emitKernelArgs(Func);
487 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
488 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
489 }
490
491 //===----------------------------------------------------------------------===//
492 // HSAMetadataStreamerV3
493 //===----------------------------------------------------------------------===//
494
dump(StringRef HSAMetadataString) const495 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
496 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
497 }
498
verify(StringRef HSAMetadataString) const499 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
500 errs() << "AMDGPU HSA Metadata Parser Test: ";
501
502 msgpack::Document FromHSAMetadataString;
503
504 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
505 errs() << "FAIL\n";
506 return;
507 }
508
509 std::string ToHSAMetadataString;
510 raw_string_ostream StrOS(ToHSAMetadataString);
511 FromHSAMetadataString.toYAML(StrOS);
512
513 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
514 if (HSAMetadataString != ToHSAMetadataString) {
515 errs() << "Original input: " << HSAMetadataString << '\n'
516 << "Produced output: " << StrOS.str() << '\n';
517 }
518 }
519
520 Optional<StringRef>
getAccessQualifier(StringRef AccQual) const521 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
522 return StringSwitch<Optional<StringRef>>(AccQual)
523 .Case("read_only", StringRef("read_only"))
524 .Case("write_only", StringRef("write_only"))
525 .Case("read_write", StringRef("read_write"))
526 .Default(None);
527 }
528
529 Optional<StringRef>
getAddressSpaceQualifier(unsigned AddressSpace) const530 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
531 switch (AddressSpace) {
532 case AMDGPUAS::PRIVATE_ADDRESS:
533 return StringRef("private");
534 case AMDGPUAS::GLOBAL_ADDRESS:
535 return StringRef("global");
536 case AMDGPUAS::CONSTANT_ADDRESS:
537 return StringRef("constant");
538 case AMDGPUAS::LOCAL_ADDRESS:
539 return StringRef("local");
540 case AMDGPUAS::FLAT_ADDRESS:
541 return StringRef("generic");
542 case AMDGPUAS::REGION_ADDRESS:
543 return StringRef("region");
544 default:
545 return None;
546 }
547 }
548
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const549 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
550 StringRef BaseTypeName) const {
551 if (TypeQual.find("pipe") != StringRef::npos)
552 return "pipe";
553
554 return StringSwitch<StringRef>(BaseTypeName)
555 .Case("image1d_t", "image")
556 .Case("image1d_array_t", "image")
557 .Case("image1d_buffer_t", "image")
558 .Case("image2d_t", "image")
559 .Case("image2d_array_t", "image")
560 .Case("image2d_array_depth_t", "image")
561 .Case("image2d_array_msaa_t", "image")
562 .Case("image2d_array_msaa_depth_t", "image")
563 .Case("image2d_depth_t", "image")
564 .Case("image2d_msaa_t", "image")
565 .Case("image2d_msaa_depth_t", "image")
566 .Case("image3d_t", "image")
567 .Case("sampler_t", "sampler")
568 .Case("queue_t", "queue")
569 .Default(isa<PointerType>(Ty)
570 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
571 ? "dynamic_shared_pointer"
572 : "global_buffer")
573 : "by_value");
574 }
575
getValueType(Type * Ty,StringRef TypeName) const576 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
577 switch (Ty->getTypeID()) {
578 case Type::IntegerTyID: {
579 auto Signed = !TypeName.startswith("u");
580 switch (Ty->getIntegerBitWidth()) {
581 case 8:
582 return Signed ? "i8" : "u8";
583 case 16:
584 return Signed ? "i16" : "u16";
585 case 32:
586 return Signed ? "i32" : "u32";
587 case 64:
588 return Signed ? "i64" : "u64";
589 default:
590 return "struct";
591 }
592 }
593 case Type::HalfTyID:
594 return "f16";
595 case Type::FloatTyID:
596 return "f32";
597 case Type::DoubleTyID:
598 return "f64";
599 case Type::PointerTyID:
600 return getValueType(Ty->getPointerElementType(), TypeName);
601 case Type::VectorTyID:
602 return getValueType(Ty->getVectorElementType(), TypeName);
603 default:
604 return "struct";
605 }
606 }
607
getTypeName(Type * Ty,bool Signed) const608 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
609 switch (Ty->getTypeID()) {
610 case Type::IntegerTyID: {
611 if (!Signed)
612 return (Twine('u') + getTypeName(Ty, true)).str();
613
614 auto BitWidth = Ty->getIntegerBitWidth();
615 switch (BitWidth) {
616 case 8:
617 return "char";
618 case 16:
619 return "short";
620 case 32:
621 return "int";
622 case 64:
623 return "long";
624 default:
625 return (Twine('i') + Twine(BitWidth)).str();
626 }
627 }
628 case Type::HalfTyID:
629 return "half";
630 case Type::FloatTyID:
631 return "float";
632 case Type::DoubleTyID:
633 return "double";
634 case Type::VectorTyID: {
635 auto VecTy = cast<VectorType>(Ty);
636 auto ElTy = VecTy->getElementType();
637 auto NumElements = VecTy->getVectorNumElements();
638 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
639 }
640 default:
641 return "unknown";
642 }
643 }
644
645 msgpack::ArrayDocNode
getWorkGroupDimensions(MDNode * Node) const646 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
647 auto Dims = HSAMetadataDoc->getArrayNode();
648 if (Node->getNumOperands() != 3)
649 return Dims;
650
651 for (auto &Op : Node->operands())
652 Dims.push_back(Dims.getDocument()->getNode(
653 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
654 return Dims;
655 }
656
emitVersion()657 void MetadataStreamerV3::emitVersion() {
658 auto Version = HSAMetadataDoc->getArrayNode();
659 Version.push_back(Version.getDocument()->getNode(VersionMajor));
660 Version.push_back(Version.getDocument()->getNode(VersionMinor));
661 getRootMetadata("amdhsa.version") = Version;
662 }
663
emitPrintf(const Module & Mod)664 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
665 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
666 if (!Node)
667 return;
668
669 auto Printf = HSAMetadataDoc->getArrayNode();
670 for (auto Op : Node->operands())
671 if (Op->getNumOperands())
672 Printf.push_back(Printf.getDocument()->getNode(
673 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
674 getRootMetadata("amdhsa.printf") = Printf;
675 }
676
emitKernelLanguage(const Function & Func,msgpack::MapDocNode Kern)677 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
678 msgpack::MapDocNode Kern) {
679 // TODO: What about other languages?
680 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
681 if (!Node || !Node->getNumOperands())
682 return;
683 auto Op0 = Node->getOperand(0);
684 if (Op0->getNumOperands() <= 1)
685 return;
686
687 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
688 auto LanguageVersion = Kern.getDocument()->getArrayNode();
689 LanguageVersion.push_back(Kern.getDocument()->getNode(
690 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
691 LanguageVersion.push_back(Kern.getDocument()->getNode(
692 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
693 Kern[".language_version"] = LanguageVersion;
694 }
695
emitKernelAttrs(const Function & Func,msgpack::MapDocNode Kern)696 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
697 msgpack::MapDocNode Kern) {
698
699 if (auto Node = Func.getMetadata("reqd_work_group_size"))
700 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
701 if (auto Node = Func.getMetadata("work_group_size_hint"))
702 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
703 if (auto Node = Func.getMetadata("vec_type_hint")) {
704 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
705 getTypeName(
706 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
707 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
708 /*Copy=*/true);
709 }
710 if (Func.hasFnAttribute("runtime-handle")) {
711 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
712 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
713 /*Copy=*/true);
714 }
715 }
716
emitKernelArgs(const Function & Func,msgpack::MapDocNode Kern)717 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
718 msgpack::MapDocNode Kern) {
719 unsigned Offset = 0;
720 auto Args = HSAMetadataDoc->getArrayNode();
721 for (auto &Arg : Func.args())
722 emitKernelArg(Arg, Offset, Args);
723
724 emitHiddenKernelArgs(Func, Offset, Args);
725
726 Kern[".args"] = Args;
727 }
728
emitKernelArg(const Argument & Arg,unsigned & Offset,msgpack::ArrayDocNode Args)729 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
730 msgpack::ArrayDocNode Args) {
731 auto Func = Arg.getParent();
732 auto ArgNo = Arg.getArgNo();
733 const MDNode *Node;
734
735 StringRef Name;
736 Node = Func->getMetadata("kernel_arg_name");
737 if (Node && ArgNo < Node->getNumOperands())
738 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
739 else if (Arg.hasName())
740 Name = Arg.getName();
741
742 StringRef TypeName;
743 Node = Func->getMetadata("kernel_arg_type");
744 if (Node && ArgNo < Node->getNumOperands())
745 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
746
747 StringRef BaseTypeName;
748 Node = Func->getMetadata("kernel_arg_base_type");
749 if (Node && ArgNo < Node->getNumOperands())
750 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
751
752 StringRef AccQual;
753 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
754 Arg.hasNoAliasAttr()) {
755 AccQual = "read_only";
756 } else {
757 Node = Func->getMetadata("kernel_arg_access_qual");
758 if (Node && ArgNo < Node->getNumOperands())
759 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
760 }
761
762 StringRef TypeQual;
763 Node = Func->getMetadata("kernel_arg_type_qual");
764 if (Node && ArgNo < Node->getNumOperands())
765 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
766
767 Type *Ty = Arg.getType();
768 const DataLayout &DL = Func->getParent()->getDataLayout();
769
770 unsigned PointeeAlign = 0;
771 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
772 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
773 PointeeAlign = Arg.getParamAlignment();
774 if (PointeeAlign == 0)
775 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
776 }
777 }
778
779 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
780 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
781 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
782 TypeQual);
783 }
784
emitKernelArg(const DataLayout & DL,Type * Ty,StringRef ValueKind,unsigned & Offset,msgpack::ArrayDocNode Args,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)785 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
786 StringRef ValueKind, unsigned &Offset,
787 msgpack::ArrayDocNode Args,
788 unsigned PointeeAlign, StringRef Name,
789 StringRef TypeName,
790 StringRef BaseTypeName,
791 StringRef AccQual, StringRef TypeQual) {
792 auto Arg = Args.getDocument()->getMapNode();
793
794 if (!Name.empty())
795 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
796 if (!TypeName.empty())
797 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
798 auto Size = DL.getTypeAllocSize(Ty);
799 auto Align = DL.getABITypeAlignment(Ty);
800 Arg[".size"] = Arg.getDocument()->getNode(Size);
801 Offset = alignTo(Offset, Align);
802 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
803 Offset += Size;
804 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
805 Arg[".value_type"] =
806 Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
807 if (PointeeAlign)
808 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
809
810 if (auto PtrTy = dyn_cast<PointerType>(Ty))
811 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
812 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
813
814 if (auto AQ = getAccessQualifier(AccQual))
815 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
816
817 // TODO: Emit Arg[".actual_access"].
818
819 SmallVector<StringRef, 1> SplitTypeQuals;
820 TypeQual.split(SplitTypeQuals, " ", -1, false);
821 for (StringRef Key : SplitTypeQuals) {
822 if (Key == "const")
823 Arg[".is_const"] = Arg.getDocument()->getNode(true);
824 else if (Key == "restrict")
825 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
826 else if (Key == "volatile")
827 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
828 else if (Key == "pipe")
829 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
830 }
831
832 Args.push_back(Arg);
833 }
834
emitHiddenKernelArgs(const Function & Func,unsigned & Offset,msgpack::ArrayDocNode Args)835 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
836 unsigned &Offset,
837 msgpack::ArrayDocNode Args) {
838 int HiddenArgNumBytes =
839 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
840
841 if (!HiddenArgNumBytes)
842 return;
843
844 auto &DL = Func.getParent()->getDataLayout();
845 auto Int64Ty = Type::getInt64Ty(Func.getContext());
846
847 if (HiddenArgNumBytes >= 8)
848 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
849 if (HiddenArgNumBytes >= 16)
850 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
851 if (HiddenArgNumBytes >= 24)
852 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
853
854 auto Int8PtrTy =
855 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
856
857 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
858 // "none" argument.
859 if (HiddenArgNumBytes >= 32) {
860 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
861 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
862 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
863 // The printf runtime binding pass should have ensured that hostcall and
864 // printf are not used in the same module.
865 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
866 emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
867 } else
868 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
869 }
870
871 // Emit "default queue" and "completion action" arguments if enqueue kernel is
872 // used, otherwise emit dummy "none" arguments.
873 if (HiddenArgNumBytes >= 48) {
874 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
875 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
876 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
877 } else {
878 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
879 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
880 }
881 }
882
883 // Emit the pointer argument for multi-grid object.
884 if (HiddenArgNumBytes >= 56)
885 emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
886 }
887
888 msgpack::MapDocNode
getHSAKernelProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const889 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
890 const SIProgramInfo &ProgramInfo) const {
891 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
892 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
893 const Function &F = MF.getFunction();
894
895 auto Kern = HSAMetadataDoc->getMapNode();
896
897 Align MaxKernArgAlign;
898 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
899 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
900 Kern[".group_segment_fixed_size"] =
901 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
902 Kern[".private_segment_fixed_size"] =
903 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
904 Kern[".kernarg_segment_align"] =
905 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
906 Kern[".wavefront_size"] =
907 Kern.getDocument()->getNode(STM.getWavefrontSize());
908 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
909 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
910 Kern[".max_flat_workgroup_size"] =
911 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
912 Kern[".sgpr_spill_count"] =
913 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
914 Kern[".vgpr_spill_count"] =
915 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
916
917 return Kern;
918 }
919
emitTo(AMDGPUTargetStreamer & TargetStreamer)920 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
921 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
922 }
923
begin(const Module & Mod)924 void MetadataStreamerV3::begin(const Module &Mod) {
925 emitVersion();
926 emitPrintf(Mod);
927 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
928 }
929
end()930 void MetadataStreamerV3::end() {
931 std::string HSAMetadataString;
932 raw_string_ostream StrOS(HSAMetadataString);
933 HSAMetadataDoc->toYAML(StrOS);
934
935 if (DumpHSAMetadata)
936 dump(StrOS.str());
937 if (VerifyHSAMetadata)
938 verify(StrOS.str());
939 }
940
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)941 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
942 const SIProgramInfo &ProgramInfo) {
943 auto &Func = MF.getFunction();
944 auto Kern = getHSAKernelProps(MF, ProgramInfo);
945
946 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
947 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
948
949 auto Kernels =
950 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
951
952 {
953 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
954 Kern[".symbol"] = Kern.getDocument()->getNode(
955 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
956 emitKernelLanguage(Func, Kern);
957 emitKernelAttrs(Func, Kern);
958 emitKernelArgs(Func, Kern);
959 }
960
961 Kernels.push_back(Kern);
962 }
963
964 } // end namespace HSAMD
965 } // end namespace AMDGPU
966 } // end namespace llvm
967