1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20
21 #define DEBUG_TYPE "amdgpu-attributor"
22
23 using namespace llvm;
24
25 static constexpr StringLiteral ImplicitAttrNames[] = {
26 // X ids unnecessarily propagated to kernels.
27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
32
33 // We do not need to note the x workitem or workgroup id because they are always
34 // initialized.
35 //
36 // TODO: We should not add the attributes if the known compile time workgroup
37 // size is 1 for y/z.
intrinsicToAttrName(Intrinsic::ID ID,bool & NonKernelOnly,bool & IsQueuePtr)38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
39 bool &IsQueuePtr) {
40 switch (ID) {
41 case Intrinsic::amdgcn_workitem_id_x:
42 NonKernelOnly = true;
43 return "amdgpu-work-item-id-x";
44 case Intrinsic::amdgcn_workgroup_id_x:
45 NonKernelOnly = true;
46 return "amdgpu-work-group-id-x";
47 case Intrinsic::amdgcn_workitem_id_y:
48 case Intrinsic::r600_read_tidig_y:
49 return "amdgpu-work-item-id-y";
50 case Intrinsic::amdgcn_workitem_id_z:
51 case Intrinsic::r600_read_tidig_z:
52 return "amdgpu-work-item-id-z";
53 case Intrinsic::amdgcn_workgroup_id_y:
54 case Intrinsic::r600_read_tgid_y:
55 return "amdgpu-work-group-id-y";
56 case Intrinsic::amdgcn_workgroup_id_z:
57 case Intrinsic::r600_read_tgid_z:
58 return "amdgpu-work-group-id-z";
59 case Intrinsic::amdgcn_dispatch_ptr:
60 return "amdgpu-dispatch-ptr";
61 case Intrinsic::amdgcn_dispatch_id:
62 return "amdgpu-dispatch-id";
63 case Intrinsic::amdgcn_kernarg_segment_ptr:
64 return "amdgpu-kernarg-segment-ptr";
65 case Intrinsic::amdgcn_implicitarg_ptr:
66 return "amdgpu-implicitarg-ptr";
67 case Intrinsic::amdgcn_queue_ptr:
68 case Intrinsic::amdgcn_is_shared:
69 case Intrinsic::amdgcn_is_private:
70 // TODO: Does not require queue ptr on gfx9+
71 case Intrinsic::trap:
72 case Intrinsic::debugtrap:
73 IsQueuePtr = true;
74 return "amdgpu-queue-ptr";
75 default:
76 return "";
77 }
78 }
79
castRequiresQueuePtr(unsigned SrcAS)80 static bool castRequiresQueuePtr(unsigned SrcAS) {
81 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
82 }
83
isDSAddress(const Constant * C)84 static bool isDSAddress(const Constant *C) {
85 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
86 if (!GV)
87 return false;
88 unsigned AS = GV->getAddressSpace();
89 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
90 }
91
92 class AMDGPUInformationCache : public InformationCache {
93 public:
AMDGPUInformationCache(const Module & M,AnalysisGetter & AG,BumpPtrAllocator & Allocator,SetVector<Function * > * CGSCC,TargetMachine & TM)94 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
95 BumpPtrAllocator &Allocator,
96 SetVector<Function *> *CGSCC, TargetMachine &TM)
97 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
98 TargetMachine &TM;
99
100 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
101
102 /// Check if the subtarget has aperture regs.
hasApertureRegs(Function & F)103 bool hasApertureRegs(Function &F) {
104 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
105 return ST.hasApertureRegs();
106 }
107
108 private:
109 /// Check if the ConstantExpr \p CE requires queue ptr attribute.
visitConstExpr(const ConstantExpr * CE)110 static bool visitConstExpr(const ConstantExpr *CE) {
111 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
112 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
113 return castRequiresQueuePtr(SrcAS);
114 }
115 return false;
116 }
117
118 /// Get the constant access bitmap for \p C.
getConstantAccess(const Constant * C)119 uint8_t getConstantAccess(const Constant *C) {
120 auto It = ConstantStatus.find(C);
121 if (It != ConstantStatus.end())
122 return It->second;
123
124 uint8_t Result = 0;
125 if (isDSAddress(C))
126 Result = DS_GLOBAL;
127
128 if (const auto *CE = dyn_cast<ConstantExpr>(C))
129 if (visitConstExpr(CE))
130 Result |= ADDR_SPACE_CAST;
131
132 for (const Use &U : C->operands()) {
133 const auto *OpC = dyn_cast<Constant>(U);
134 if (!OpC)
135 continue;
136
137 Result |= getConstantAccess(OpC);
138 }
139 return Result;
140 }
141
142 public:
143 /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
needsQueuePtr(const Constant * C,Function & Fn)144 bool needsQueuePtr(const Constant *C, Function &Fn) {
145 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
146 bool HasAperture = hasApertureRegs(Fn);
147
148 // No need to explore the constants.
149 if (!IsNonEntryFunc && HasAperture)
150 return false;
151
152 uint8_t Access = getConstantAccess(C);
153
154 // We need to trap on DS globals in non-entry functions.
155 if (IsNonEntryFunc && (Access & DS_GLOBAL))
156 return true;
157
158 return !HasAperture && (Access & ADDR_SPACE_CAST);
159 }
160
161 private:
162 /// Used to determine if the Constant needs a queue ptr attribute.
163 DenseMap<const Constant *, uint8_t> ConstantStatus;
164 };
165
166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
167 using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAAMDAttributesAAAMDAttributes168 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
169
170 /// Create an abstract attribute view for the position \p IRP.
171 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
172 Attributor &A);
173
174 /// See AbstractAttribute::getName().
getNameAAAMDAttributes175 const std::string getName() const override { return "AAAMDAttributes"; }
176
177 /// See AbstractAttribute::getIdAddr().
getIdAddrAAAMDAttributes178 const char *getIdAddr() const override { return &ID; }
179
180 /// This function should return true if the type of the \p AA is
181 /// AAAMDAttributes.
classofAAAMDAttributes182 static bool classof(const AbstractAttribute *AA) {
183 return (AA->getIdAddr() == &ID);
184 }
185
186 virtual const DenseSet<StringRef> &getAttributes() const = 0;
187
188 /// Unique ID (due to the unique address)
189 static const char ID;
190 };
191 const char AAAMDAttributes::ID = 0;
192
193 struct AAAMDWorkGroupSize
194 : public StateWrapper<BooleanState, AbstractAttribute> {
195 using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAAMDWorkGroupSizeAAAMDWorkGroupSize196 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
197
198 /// Create an abstract attribute view for the position \p IRP.
199 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
200 Attributor &A);
201
202 /// See AbstractAttribute::getName().
getNameAAAMDWorkGroupSize203 const std::string getName() const override { return "AAAMDWorkGroupSize"; }
204
205 /// See AbstractAttribute::getIdAddr().
getIdAddrAAAMDWorkGroupSize206 const char *getIdAddr() const override { return &ID; }
207
208 /// This function should return true if the type of the \p AA is
209 /// AAAMDAttributes.
classofAAAMDWorkGroupSize210 static bool classof(const AbstractAttribute *AA) {
211 return (AA->getIdAddr() == &ID);
212 }
213
214 /// Unique ID (due to the unique address)
215 static const char ID;
216 };
217 const char AAAMDWorkGroupSize::ID = 0;
218
219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
AAAMDWorkGroupSizeFunctionAAAMDWorkGroupSizeFunction220 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
221 : AAAMDWorkGroupSize(IRP, A) {}
222
initializeAAAMDWorkGroupSizeFunction223 void initialize(Attributor &A) override {
224 Function *F = getAssociatedFunction();
225 CallingConv::ID CC = F->getCallingConv();
226
227 if (CC != CallingConv::AMDGPU_KERNEL)
228 return;
229
230 bool InitialValue = false;
231 if (F->hasFnAttribute("uniform-work-group-size"))
232 InitialValue = F->getFnAttribute("uniform-work-group-size")
233 .getValueAsString()
234 .equals("true");
235
236 if (InitialValue)
237 indicateOptimisticFixpoint();
238 else
239 indicatePessimisticFixpoint();
240 }
241
updateImplAAAMDWorkGroupSizeFunction242 ChangeStatus updateImpl(Attributor &A) override {
243 ChangeStatus Change = ChangeStatus::UNCHANGED;
244
245 auto CheckCallSite = [&](AbstractCallSite CS) {
246 Function *Caller = CS.getInstruction()->getFunction();
247 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
248 << "->" << getAssociatedFunction()->getName() << "\n");
249
250 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
251 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
252
253 Change = Change | clampStateAndIndicateChange(this->getState(),
254 CallerInfo.getState());
255
256 return true;
257 };
258
259 bool AllCallSitesKnown = true;
260 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
261 indicatePessimisticFixpoint();
262
263 return Change;
264 }
265
manifestAAAMDWorkGroupSizeFunction266 ChangeStatus manifest(Attributor &A) override {
267 SmallVector<Attribute, 8> AttrList;
268 LLVMContext &Ctx = getAssociatedFunction()->getContext();
269
270 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
271 getAssumed() ? "true" : "false"));
272 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
273 /* ForceReplace */ true);
274 }
275
isValidStateAAAMDWorkGroupSizeFunction276 bool isValidState() const override {
277 // This state is always valid, even when the state is false.
278 return true;
279 }
280
getAsStrAAAMDWorkGroupSizeFunction281 const std::string getAsStr() const override {
282 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
283 }
284
285 /// See AbstractAttribute::trackStatistics()
trackStatisticsAAAMDWorkGroupSizeFunction286 void trackStatistics() const override {}
287 };
288
createForPosition(const IRPosition & IRP,Attributor & A)289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
290 Attributor &A) {
291 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
292 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
293 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
294 }
295
296 struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunctionAAAMDAttributesFunction297 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
298 : AAAMDAttributes(IRP, A) {}
299
initializeAAAMDAttributesFunction300 void initialize(Attributor &A) override {
301 Function *F = getAssociatedFunction();
302 CallingConv::ID CC = F->getCallingConv();
303 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
304
305 // Don't add attributes to instrinsics
306 if (F->isIntrinsic()) {
307 indicatePessimisticFixpoint();
308 return;
309 }
310
311 // Ignore functions with graphics calling conventions, these are currently
312 // not allowed to have kernel arguments.
313 if (AMDGPU::isGraphics(F->getCallingConv())) {
314 indicatePessimisticFixpoint();
315 return;
316 }
317
318 for (StringRef Attr : ImplicitAttrNames) {
319 if (F->hasFnAttribute(Attr))
320 Attributes.insert(Attr);
321 }
322
323 // TODO: We shouldn't need this in the future.
324 if (CallingConvSupportsAllImplicits &&
325 F->hasAddressTaken(nullptr, true, true, true)) {
326 for (StringRef AttrName : ImplicitAttrNames) {
327 Attributes.insert(AttrName);
328 }
329 }
330 }
331
updateImplAAAMDAttributesFunction332 ChangeStatus updateImpl(Attributor &A) override {
333 Function *F = getAssociatedFunction();
334 ChangeStatus Change = ChangeStatus::UNCHANGED;
335 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
336 CallingConv::ID CC = F->getCallingConv();
337 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
338 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
339
340 auto AddAttribute = [&](StringRef AttrName) {
341 if (Attributes.insert(AttrName).second)
342 Change = ChangeStatus::CHANGED;
343 };
344
345 // Check for Intrinsics and propagate attributes.
346 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
347 *this, this->getIRPosition(), DepClassTy::REQUIRED);
348
349 // We have to assume that we can reach a function with these attributes.
350 // We do not consider inline assembly as a unknown callee.
351 if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
352 for (StringRef AttrName : ImplicitAttrNames) {
353 AddAttribute(AttrName);
354 }
355 }
356
357 bool NeedsQueuePtr = false;
358 bool HasCall = false;
359 for (Function *Callee : AAEdges.getOptimisticEdges()) {
360 Intrinsic::ID IID = Callee->getIntrinsicID();
361 if (IID != Intrinsic::not_intrinsic) {
362 if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
363 AddAttribute("amdgpu-kernarg-segment-ptr");
364 continue;
365 }
366
367 bool NonKernelOnly = false;
368 StringRef AttrName =
369 intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
370
371 if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
372 AddAttribute(AttrName);
373
374 continue;
375 }
376
377 HasCall = true;
378 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
379 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
380 const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
381 // Propagate implicit attributes from called function.
382 for (StringRef AttrName : ImplicitAttrNames)
383 if (CalleeAttributes.count(AttrName))
384 AddAttribute(AttrName);
385 }
386
387 HasCall |= AAEdges.hasUnknownCallee();
388 if (!IsNonEntryFunc && HasCall)
389 AddAttribute("amdgpu-calls");
390
391 // Check the function body.
392 auto CheckAlloca = [&](Instruction &I) {
393 AddAttribute("amdgpu-stack-objects");
394 return false;
395 };
396
397 bool UsedAssumedInformation = false;
398 A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
399 UsedAssumedInformation);
400
401 // If we found that we need amdgpu-queue-ptr, nothing else to do.
402 if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
403 AddAttribute("amdgpu-queue-ptr");
404 return Change;
405 }
406
407 auto CheckAddrSpaceCasts = [&](Instruction &I) {
408 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
409 if (castRequiresQueuePtr(SrcAS)) {
410 NeedsQueuePtr = true;
411 return false;
412 }
413 return true;
414 };
415
416 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
417
418 // `checkForAllInstructions` is much more cheaper than going through all
419 // instructions, try it first.
420
421 // amdgpu-queue-ptr is not needed if aperture regs is present.
422 if (!HasApertureRegs)
423 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
424 {Instruction::AddrSpaceCast},
425 UsedAssumedInformation);
426
427 // If we found that we need amdgpu-queue-ptr, nothing else to do.
428 if (NeedsQueuePtr) {
429 AddAttribute("amdgpu-queue-ptr");
430 return Change;
431 }
432
433 if (!IsNonEntryFunc && HasApertureRegs)
434 return Change;
435
436 for (BasicBlock &BB : *F) {
437 for (Instruction &I : BB) {
438 for (const Use &U : I.operands()) {
439 if (const auto *C = dyn_cast<Constant>(U)) {
440 if (InfoCache.needsQueuePtr(C, *F)) {
441 AddAttribute("amdgpu-queue-ptr");
442 return Change;
443 }
444 }
445 }
446 }
447 }
448
449 return Change;
450 }
451
manifestAAAMDAttributesFunction452 ChangeStatus manifest(Attributor &A) override {
453 SmallVector<Attribute, 8> AttrList;
454 LLVMContext &Ctx = getAssociatedFunction()->getContext();
455
456 for (StringRef AttrName : Attributes)
457 AttrList.push_back(Attribute::get(Ctx, AttrName));
458
459 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
460 /* ForceReplace */ true);
461 }
462
getAsStrAAAMDAttributesFunction463 const std::string getAsStr() const override {
464 return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
465 }
466
getAttributesAAAMDAttributesFunction467 const DenseSet<StringRef> &getAttributes() const override {
468 return Attributes;
469 }
470
471 /// See AbstractAttribute::trackStatistics()
trackStatisticsAAAMDAttributesFunction472 void trackStatistics() const override {}
473
474 private:
475 DenseSet<StringRef> Attributes;
476 };
477
createForPosition(const IRPosition & IRP,Attributor & A)478 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
479 Attributor &A) {
480 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
481 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
482 llvm_unreachable("AAAMDAttributes is only valid for function position");
483 }
484
485 class AMDGPUAttributor : public ModulePass {
486 public:
AMDGPUAttributor()487 AMDGPUAttributor() : ModulePass(ID) {}
488
489 /// doInitialization - Virtual method overridden by subclasses to do
490 /// any necessary initialization before any pass is run.
doInitialization(Module &)491 bool doInitialization(Module &) override {
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 report_fatal_error("TargetMachine is required");
495
496 TM = &TPC->getTM<TargetMachine>();
497 return false;
498 }
499
runOnModule(Module & M)500 bool runOnModule(Module &M) override {
501 SetVector<Function *> Functions;
502 AnalysisGetter AG;
503 for (Function &F : M)
504 Functions.insert(&F);
505
506 CallGraphUpdater CGUpdater;
507 BumpPtrAllocator Allocator;
508 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
509 Attributor A(Functions, InfoCache, CGUpdater);
510
511 for (Function &F : M) {
512 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
513 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
514 }
515
516 ChangeStatus Change = A.run();
517 return Change == ChangeStatus::CHANGED;
518 }
519
getPassName() const520 StringRef getPassName() const override { return "AMDGPU Attributor"; }
521 TargetMachine *TM;
522 static char ID;
523 };
524
525 char AMDGPUAttributor::ID = 0;
526
createAMDGPUAttributorPass()527 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
528 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
529