1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "Utils/AMDGPUMemoryUtils.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/MemorySSA.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/InitializePasses.h"
25 
26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 class AMDGPUPromoteKernelArguments : public FunctionPass {
33   MemorySSA *MSSA;
34 
35   AliasAnalysis *AA;
36 
37   Instruction *ArgCastInsertPt;
38 
39   SmallVector<Value *> Ptrs;
40 
41   void enqueueUsers(Value *Ptr);
42 
43   bool promotePointer(Value *Ptr);
44 
45   bool promoteLoad(LoadInst *LI);
46 
47 public:
48   static char ID;
49 
50   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51 
52   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53 
54   bool runOnFunction(Function &F) override;
55 
56   void getAnalysisUsage(AnalysisUsage &AU) const override {
57     AU.addRequired<AAResultsWrapperPass>();
58     AU.addRequired<MemorySSAWrapperPass>();
59     AU.setPreservesAll();
60   }
61 };
62 
63 } // end anonymous namespace
64 
65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66   SmallVector<User *> PtrUsers(Ptr->users());
67 
68   while (!PtrUsers.empty()) {
69     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70     if (!U)
71       continue;
72 
73     switch (U->getOpcode()) {
74     default:
75       break;
76     case Instruction::Load: {
77       LoadInst *LD = cast<LoadInst>(U);
78       if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79           !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80         Ptrs.push_back(LD);
81 
82       break;
83     }
84     case Instruction::GetElementPtr:
85     case Instruction::AddrSpaceCast:
86     case Instruction::BitCast:
87       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88         PtrUsers.append(U->user_begin(), U->user_end());
89       break;
90     }
91   }
92 }
93 
94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95   bool Changed = false;
96 
97   LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98   if (LI)
99     Changed |= promoteLoad(LI);
100 
101   PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102   if (!PT)
103     return Changed;
104 
105   if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106       PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108     enqueueUsers(Ptr);
109 
110   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111     return Changed;
112 
113   IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114                    : ArgCastInsertPt);
115 
116   // Cast pointer to global address space and back to flat and let
117   // Infer Address Spaces pass to do all necessary rewriting.
118   PointerType *NewPT =
119       PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
120   Value *Cast =
121       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122   Value *CastBack =
123       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124   Ptr->replaceUsesWithIf(CastBack,
125                          [Cast](Use &U) { return U.getUser() != Cast; });
126 
127   return true;
128 }
129 
130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131   if (!LI->isSimple())
132     return false;
133 
134   LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135   return true;
136 }
137 
138 // skip allocas
139 static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143 
144     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145     // so loads will need to be inserted before it.
146     if (!AI || !AI->isStaticAlloca())
147       break;
148   }
149 
150   return InsPt;
151 }
152 
153 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154                                        AliasAnalysis &AA) {
155   if (skipFunction(F))
156     return false;
157 
158   CallingConv::ID CC = F.getCallingConv();
159   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160     return false;
161 
162   ArgCastInsertPt = &*getInsertPt(*F.begin());
163   this->MSSA = &MSSA;
164   this->AA = &AA;
165 
166   for (Argument &Arg : F.args()) {
167     if (Arg.use_empty())
168       continue;
169 
170     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174       continue;
175 
176     Ptrs.push_back(&Arg);
177   }
178 
179   bool Changed = false;
180   while (!Ptrs.empty()) {
181     Value *Ptr = Ptrs.pop_back_val();
182     Changed |= promotePointer(Ptr);
183   }
184 
185   return Changed;
186 }
187 
188 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191   return run(F, MSSA, AA);
192 }
193 
194 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195                       "AMDGPU Promote Kernel Arguments", false, false)
196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199                     "AMDGPU Promote Kernel Arguments", false, false)
200 
201 char AMDGPUPromoteKernelArguments::ID = 0;
202 
203 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204   return new AMDGPUPromoteKernelArguments();
205 }
206 
207 PreservedAnalyses
208 AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209                                       FunctionAnalysisManager &AM) {
210   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211   AliasAnalysis &AA = AM.getResult<AAManager>(F);
212   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213     PreservedAnalyses PA;
214     PA.preserveSet<CFGAnalyses>();
215     PA.preserve<MemorySSAAnalysis>();
216     return PA;
217   }
218   return PreservedAnalyses::all();
219 }
220