1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "llvm/CodeGen/MachineModuleInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/Target/TargetMachine.h"
16 
17 using namespace llvm;
18 
19 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
20                                              const AMDGPUSubtarget &ST)
21     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
22       IsModuleEntryFunction(
23           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
24       NoSignedZerosFPMath(false) {
25 
26   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
27   // except reserved size is not correctly aligned.
28 
29   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
30   MemoryBound = MemBoundAttr.getValueAsBool();
31 
32   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
33   WaveLimiter = WaveLimitAttr.getValueAsBool();
34 
35   // FIXME: How is this attribute supposed to interact with statically known
36   // global sizes?
37   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
38   if (!S.empty())
39     S.consumeInteger(0, GDSSize);
40 
41   // Assume the attribute allocates before any known GDS globals.
42   StaticGDSSize = GDSSize;
43 
44   CallingConv::ID CC = F.getCallingConv();
45   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
46     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
47 
48   // FIXME: Shouldn't be target specific
49   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
50   NoSignedZerosFPMath =
51       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
52 }
53 
54 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
55                                                   const GlobalVariable &GV,
56                                                   Align Trailing) {
57   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
58   if (!Entry.second)
59     return Entry.first->second;
60 
61   Align Alignment =
62       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
63 
64   unsigned Offset;
65   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
66     /// TODO: We should sort these to minimize wasted space due to alignment
67     /// padding. Currently the padding is decided by the first encountered use
68     /// during lowering.
69     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
70 
71     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
72 
73     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
74     LDSSize = alignTo(StaticLDSSize, Trailing);
75   } else {
76     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
77            "expected region address space");
78 
79     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
80     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
81 
82     // FIXME: Apply alignment of dynamic GDS
83     GDSSize = StaticGDSSize;
84   }
85 
86   Entry.first->second = Offset;
87   return Offset;
88 }
89 
90 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
91 
92 bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
93   auto name = GV.getName();
94   return (name == ModuleLDSName) ||
95          (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
96 }
97 
98 const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
99     const GlobalVariable &GV) {
100   const Module &M = *GV.getParent();
101   StringRef N(GV.getName());
102   if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
103     return M.getFunction(N);
104   }
105   return nullptr;
106 }
107 
108 const GlobalVariable *
109 AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
110   const Module *M = F.getParent();
111   std::string KernelLDSName = "llvm.amdgcn.kernel.";
112   KernelLDSName += F.getName();
113   KernelLDSName += ".lds";
114   return M->getNamedGlobal(KernelLDSName);
115 }
116 
117 // This kernel calls no functions that require the module lds struct
118 static bool canElideModuleLDS(const Function &F) {
119   return F.hasFnAttribute("amdgpu-elide-module-lds");
120 }
121 
122 unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
123     const GlobalVariable &GV) {
124   // module.lds, then alignment padding, then kernel.lds, then other variables
125   // if any
126 
127   assert(isKnownAddressLDSGlobal(GV));
128   unsigned Offset = 0;
129 
130   if (GV.getName() == ModuleLDSName) {
131     return 0;
132   }
133 
134   const Module *M = GV.getParent();
135   const DataLayout &DL = M->getDataLayout();
136 
137   const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
138   const Function *f = getKernelLDSFunctionFromGlobal(GV);
139 
140   // Account for module.lds if allocated for this function
141   if (GVM && f && !canElideModuleLDS(*f)) {
142     // allocator aligns this to var align, but it's zero to begin with
143     Offset += DL.getTypeAllocSize(GVM->getValueType());
144   }
145 
146   // No dynamic LDS alignment done by allocateModuleLDSGlobal
147   Offset = alignTo(
148       Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
149 
150   return Offset;
151 }
152 
153 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
154   const Module *M = F.getParent();
155 
156   // This function is called before allocating any other LDS so that it can
157   // reliably put values at known addresses. Consequently, dynamic LDS, if
158   // present, will not yet have been allocated
159 
160   assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
161 
162   if (isModuleEntryFunction()) {
163 
164     // Pointer values start from zero, memory allocated per-kernel-launch
165     // Variables can be grouped into a module level struct and a struct per
166     // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
167     // are allocated at statically computable addresses here.
168     //
169     // Address 0
170     // {
171     //   llvm.amdgcn.module.lds
172     // }
173     // alignment padding
174     // {
175     //   llvm.amdgcn.kernel.some-name.lds
176     // }
177     // other variables, e.g. dynamic lds, allocated after this call
178 
179     const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
180     const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
181 
182     if (GV && !canElideModuleLDS(F)) {
183       assert(isKnownAddressLDSGlobal(*GV));
184       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
185       (void)Offset;
186       assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
187              "Module LDS expected to be allocated before other LDS");
188     }
189 
190     if (KV) {
191       // The per-kernel offset is deterministic because it is allocated
192       // before any other non-module LDS variables.
193       assert(isKnownAddressLDSGlobal(*KV));
194       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
195       (void)Offset;
196       assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
197              "Kernel LDS expected to be immediately after module LDS");
198     }
199   }
200 }
201 
202 std::optional<uint32_t>
203 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
204   auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
205   if (MD && MD->getNumOperands() == 1) {
206     ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
207     if (KnownSize) {
208       uint64_t V = KnownSize->getZExtValue();
209       if (V <= UINT32_MAX) {
210         return V;
211       }
212     }
213   }
214   return {};
215 }
216 
217 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
218                                            const GlobalVariable &GV) {
219   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
220 
221   Align Alignment =
222       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
223   if (Alignment <= DynLDSAlign)
224     return;
225 
226   LDSSize = alignTo(StaticLDSSize, Alignment);
227   DynLDSAlign = Alignment;
228 }
229