1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
20 #include "llvm/Analysis/MemorySSA.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/InitializePasses.h"
24 
25 #define DEBUG_TYPE "amdgpu-annotate-uniform"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 class AMDGPUAnnotateUniformValues : public FunctionPass,
32                        public InstVisitor<AMDGPUAnnotateUniformValues> {
33   LegacyDivergenceAnalysis *DA;
34   MemorySSA *MSSA;
35   AliasAnalysis *AA;
36   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
37   bool isEntryFunc;
38 
39 public:
40   static char ID;
41   AMDGPUAnnotateUniformValues() :
42     FunctionPass(ID) { }
43   bool doInitialization(Module &M) override;
44   bool runOnFunction(Function &F) override;
45   StringRef getPassName() const override {
46     return "AMDGPU Annotate Uniform Values";
47   }
48   void getAnalysisUsage(AnalysisUsage &AU) const override {
49     AU.addRequired<LegacyDivergenceAnalysis>();
50     AU.addRequired<MemorySSAWrapperPass>();
51     AU.addRequired<AAResultsWrapperPass>();
52     AU.setPreservesAll();
53  }
54 
55   void visitBranchInst(BranchInst &I);
56   void visitLoadInst(LoadInst &I);
57   bool isClobberedInFunction(LoadInst * Load);
58 };
59 
60 } // End anonymous namespace
61 
62 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
63                       "Add AMDGPU uniform metadata", false, false)
64 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
65 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
66 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
67 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
68                     "Add AMDGPU uniform metadata", false, false)
69 
70 char AMDGPUAnnotateUniformValues::ID = 0;
71 
72 static void setUniformMetadata(Instruction *I) {
73   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
74 }
75 static void setNoClobberMetadata(Instruction *I) {
76   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
77 }
78 
79 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
80   MemorySSAWalker *Walker = MSSA->getWalker();
81   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
82   SmallSet<MemoryAccess *, 8> Visited;
83   MemoryLocation Loc(MemoryLocation::get(Load));
84 
85   const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool {
86     Instruction *DefInst = Def->getMemoryInst();
87     LLVM_DEBUG(dbgs() << "  Def: " << *DefInst << '\n');
88 
89     if (isa<FenceInst>(DefInst))
90       return false;
91 
92     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
93       switch (II->getIntrinsicID()) {
94       case Intrinsic::amdgcn_s_barrier:
95       case Intrinsic::amdgcn_wave_barrier:
96         return false;
97       default:
98         break;
99       }
100     }
101 
102     // Ignore atomics not aliasing with the original load, any atomic is a
103     // universal MemoryDef from MSSA's point of view too, just like a fence.
104     const auto checkNoAlias = [this, Load](auto I) -> bool {
105       return I && AA->isNoAlias(I->getPointerOperand(),
106                                 Load->getPointerOperand());
107     };
108 
109     if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
110         checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
111       return false;
112 
113     return true;
114   };
115 
116   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
117 
118   // Start with a nearest dominating clobbering access, it will be either
119   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
120   // MemoryPhi if several MemoryDefs can define this memory state. In that
121   // case add all Defs to WorkList and continue going up and checking all
122   // the definitions of this memory location until the root. When all the
123   // defs are exhausted and came to the entry state we have no clobber.
124   // Along the scan ignore barriers and fences which are considered clobbers
125   // by the MemorySSA, but not really writing anything into the memory.
126   while (!WorkList.empty()) {
127     MemoryAccess *MA = WorkList.pop_back_val();
128     if (!Visited.insert(MA).second)
129       continue;
130 
131     if (MSSA->isLiveOnEntryDef(MA))
132       continue;
133 
134     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
135       if (isReallyAClobber(Def)) {
136         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
137         return true;
138       }
139 
140       WorkList.push_back(
141           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
142       continue;
143     }
144 
145     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
146     for (auto &Use : Phi->incoming_values())
147       WorkList.push_back(cast<MemoryAccess>(&Use));
148   }
149 
150   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
151   return false;
152 }
153 
154 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
155   if (DA->isUniform(&I))
156     setUniformMetadata(&I);
157 }
158 
159 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
160   Value *Ptr = I.getPointerOperand();
161   if (!DA->isUniform(Ptr))
162     return;
163   // We're tracking up to the Function boundaries, and cannot go beyond because
164   // of FunctionPass restrictions. We can ensure that is memory not clobbered
165   // for memory operations that are live in to entry points only.
166   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
167 
168   if (!isEntryFunc) {
169     if (PtrI)
170       setUniformMetadata(PtrI);
171     return;
172   }
173 
174   bool NotClobbered = false;
175   bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
176   if (PtrI)
177     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
178   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
179     if (GlobalLoad && !isClobberedInFunction(&I)) {
180       NotClobbered = true;
181       // Lookup for the existing GEP
182       if (noClobberClones.count(Ptr)) {
183         PtrI = noClobberClones[Ptr];
184       } else {
185         // Create GEP of the Value
186         Function *F = I.getParent()->getParent();
187         Value *Idx = Constant::getIntegerValue(
188           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
189         // Insert GEP at the entry to make it dominate all uses
190         PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
191                                          ArrayRef<Value *>(Idx), Twine(""),
192                                          F->getEntryBlock().getFirstNonPHI());
193       }
194       I.replaceUsesOfWith(Ptr, PtrI);
195     }
196   }
197 
198   if (PtrI) {
199     setUniformMetadata(PtrI);
200     if (NotClobbered)
201       setNoClobberMetadata(PtrI);
202   }
203 }
204 
205 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
206   return false;
207 }
208 
209 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
210   if (skipFunction(F))
211     return false;
212 
213   DA = &getAnalysis<LegacyDivergenceAnalysis>();
214   MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
215   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
216   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
217 
218   visit(F);
219   noClobberClones.clear();
220   return true;
221 }
222 
223 FunctionPass *
224 llvm::createAMDGPUAnnotateUniformValues() {
225   return new AMDGPUAnnotateUniformValues();
226 }
227