1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SmallSet.h"
13 #include "llvm/Analysis/AliasAnalysis.h"
14 #include "llvm/Analysis/MemorySSA.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/ReplaceConstant.h"
20 
21 #define DEBUG_TYPE "amdgpu-memory-utils"
22 
23 using namespace llvm;
24 
25 namespace llvm {
26 
27 namespace AMDGPU {
28 
29 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
30   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31                                        GV->getValueType());
32 }
33 
34 bool isDynamicLDS(const GlobalVariable &GV) {
35   // external zero size addrspace(3) without initializer implies cuda/hip extern
36   // __shared__ the semantics for such a variable appears to be that all extern
37   // __shared__ variables alias one another. This hits different handling.
38   const Module *M = GV.getParent();
39   const DataLayout &DL = M->getDataLayout();
40   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
41     return false;
42   }
43   uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
44   return GV.hasExternalLinkage() && AllocSize == 0;
45 }
46 
47 bool isLDSVariableToLower(const GlobalVariable &GV) {
48   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
49     return false;
50   }
51   if (isDynamicLDS(GV)) {
52     return true;
53   }
54   if (GV.isConstant()) {
55     // A constant undef variable can't be written to, and any load is
56     // undef, so it should be eliminated by the optimizer. It could be
57     // dropped by the back end if not. This pass skips over it.
58     return false;
59   }
60   if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
61     // Initializers are unimplemented for LDS address space.
62     // Leave such variables in place for consistent error reporting.
63     return false;
64   }
65   return true;
66 }
67 
68 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
69   Instruction *DefInst = Def->getMemoryInst();
70 
71   if (isa<FenceInst>(DefInst))
72     return false;
73 
74   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
75     switch (II->getIntrinsicID()) {
76     case Intrinsic::amdgcn_s_barrier:
77     case Intrinsic::amdgcn_wave_barrier:
78     case Intrinsic::amdgcn_sched_barrier:
79     case Intrinsic::amdgcn_sched_group_barrier:
80       return false;
81     default:
82       break;
83     }
84   }
85 
86   // Ignore atomics not aliasing with the original load, any atomic is a
87   // universal MemoryDef from MSSA's point of view too, just like a fence.
88   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
89     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
90   };
91 
92   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
93       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
94     return false;
95 
96   return true;
97 }
98 
99 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
100                            AAResults *AA) {
101   MemorySSAWalker *Walker = MSSA->getWalker();
102   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
103   SmallSet<MemoryAccess *, 8> Visited;
104   MemoryLocation Loc(MemoryLocation::get(Load));
105 
106   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
107 
108   // Start with a nearest dominating clobbering access, it will be either
109   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
110   // MemoryPhi if several MemoryDefs can define this memory state. In that
111   // case add all Defs to WorkList and continue going up and checking all
112   // the definitions of this memory location until the root. When all the
113   // defs are exhausted and came to the entry state we have no clobber.
114   // Along the scan ignore barriers and fences which are considered clobbers
115   // by the MemorySSA, but not really writing anything into the memory.
116   while (!WorkList.empty()) {
117     MemoryAccess *MA = WorkList.pop_back_val();
118     if (!Visited.insert(MA).second)
119       continue;
120 
121     if (MSSA->isLiveOnEntryDef(MA))
122       continue;
123 
124     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
125       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');
126 
127       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
128         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
129         return true;
130       }
131 
132       WorkList.push_back(
133           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
134       continue;
135     }
136 
137     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
138     for (const auto &Use : Phi->incoming_values())
139       WorkList.push_back(cast<MemoryAccess>(&Use));
140   }
141 
142   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
143   return false;
144 }
145 
146 } // end namespace AMDGPU
147 
148 } // end namespace llvm
149