1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SmallSet.h"
13 #include "llvm/Analysis/AliasAnalysis.h"
14 #include "llvm/Analysis/MemorySSA.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/ReplaceConstant.h"
20 
21 #define DEBUG_TYPE "amdgpu-memory-utils"
22 
23 using namespace llvm;
24 
25 namespace llvm {
26 
27 namespace AMDGPU {
28 
29 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
30   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31                                        GV->getValueType());
32 }
33 
34 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
35                                    const Function *F) {
36   // We are not interested in kernel LDS lowering for module LDS itself.
37   if (F && GV.getName() == "llvm.amdgcn.module.lds")
38     return false;
39 
40   bool Ret = false;
41   SmallPtrSet<const User *, 8> Visited;
42   SmallVector<const User *, 16> Stack(GV.users());
43 
44   assert(!F || isKernelCC(F));
45 
46   while (!Stack.empty()) {
47     const User *V = Stack.pop_back_val();
48     Visited.insert(V);
49 
50     if (isa<GlobalValue>(V)) {
51       // This use of the LDS variable is the initializer of a global variable.
52       // This is ill formed. The address of an LDS variable is kernel dependent
53       // and unknown until runtime. It can't be written to a global variable.
54       continue;
55     }
56 
57     if (auto *I = dyn_cast<Instruction>(V)) {
58       const Function *UF = I->getFunction();
59       if (UF == F) {
60         // Used from this kernel, we want to put it into the structure.
61         Ret = true;
62       } else if (!F) {
63         // For module LDS lowering, lowering is required if the user instruction
64         // is from non-kernel function.
65         Ret |= !isKernelCC(UF);
66       }
67       continue;
68     }
69 
70     // User V should be a constant, recursively visit users of V.
71     assert(isa<Constant>(V) && "Expected a constant.");
72     append_range(Stack, V->users());
73   }
74 
75   return Ret;
76 }
77 
78 bool isLDSVariableToLower(const GlobalVariable &GV) {
79   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
80     return false;
81   }
82   if (!GV.hasInitializer()) {
83     // addrspace(3) without initializer implies cuda/hip extern __shared__
84     // the semantics for such a variable appears to be that all extern
85     // __shared__ variables alias one another, in which case this transform
86     // is not required
87     return false;
88   }
89   if (!isa<UndefValue>(GV.getInitializer())) {
90     // Initializers are unimplemented for LDS address space.
91     // Leave such variables in place for consistent error reporting.
92     return false;
93   }
94   if (GV.isConstant()) {
95     // A constant undef variable can't be written to, and any load is
96     // undef, so it should be eliminated by the optimizer. It could be
97     // dropped by the back end if not. This pass skips over it.
98     return false;
99   }
100   return true;
101 }
102 
103 std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
104                                                       const Function *F) {
105   std::vector<llvm::GlobalVariable *> LocalVars;
106   for (auto &GV : M.globals()) {
107     if (!isLDSVariableToLower(GV)) {
108       continue;
109     }
110     if (!shouldLowerLDSToStruct(GV, F)) {
111       continue;
112     }
113     LocalVars.push_back(&GV);
114   }
115   return LocalVars;
116 }
117 
118 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
119   Instruction *DefInst = Def->getMemoryInst();
120 
121   if (isa<FenceInst>(DefInst))
122     return false;
123 
124   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
125     switch (II->getIntrinsicID()) {
126     case Intrinsic::amdgcn_s_barrier:
127     case Intrinsic::amdgcn_wave_barrier:
128     case Intrinsic::amdgcn_sched_barrier:
129     case Intrinsic::amdgcn_sched_group_barrier:
130       return false;
131     default:
132       break;
133     }
134   }
135 
136   // Ignore atomics not aliasing with the original load, any atomic is a
137   // universal MemoryDef from MSSA's point of view too, just like a fence.
138   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
139     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
140   };
141 
142   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
143       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
144     return false;
145 
146   return true;
147 }
148 
149 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
150                            AAResults *AA) {
151   MemorySSAWalker *Walker = MSSA->getWalker();
152   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
153   SmallSet<MemoryAccess *, 8> Visited;
154   MemoryLocation Loc(MemoryLocation::get(Load));
155 
156   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
157 
158   // Start with a nearest dominating clobbering access, it will be either
159   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
160   // MemoryPhi if several MemoryDefs can define this memory state. In that
161   // case add all Defs to WorkList and continue going up and checking all
162   // the definitions of this memory location until the root. When all the
163   // defs are exhausted and came to the entry state we have no clobber.
164   // Along the scan ignore barriers and fences which are considered clobbers
165   // by the MemorySSA, but not really writing anything into the memory.
166   while (!WorkList.empty()) {
167     MemoryAccess *MA = WorkList.pop_back_val();
168     if (!Visited.insert(MA).second)
169       continue;
170 
171     if (MSSA->isLiveOnEntryDef(MA))
172       continue;
173 
174     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
175       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');
176 
177       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
178         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
179         return true;
180       }
181 
182       WorkList.push_back(
183           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
184       continue;
185     }
186 
187     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
188     for (const auto &Use : Phi->incoming_values())
189       WorkList.push_back(cast<MemoryAccess>(&Use));
190   }
191 
192   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
193   return false;
194 }
195 
196 } // end namespace AMDGPU
197 
198 } // end namespace llvm
199