1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SmallSet.h"
13 #include "llvm/Analysis/AliasAnalysis.h"
14 #include "llvm/Analysis/MemorySSA.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/ReplaceConstant.h"
20
21 #define DEBUG_TYPE "amdgpu-memory-utils"
22
23 using namespace llvm;
24
25 namespace llvm {
26
27 namespace AMDGPU {
28
getAlign(DataLayout const & DL,const GlobalVariable * GV)29 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
30 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31 GV->getValueType());
32 }
33
isDynamicLDS(const GlobalVariable & GV)34 bool isDynamicLDS(const GlobalVariable &GV) {
35 // external zero size addrspace(3) without initializer implies cuda/hip extern
36 // __shared__ the semantics for such a variable appears to be that all extern
37 // __shared__ variables alias one another. This hits different handling.
38 const Module *M = GV.getParent();
39 const DataLayout &DL = M->getDataLayout();
40 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
41 return false;
42 }
43 uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
44 return GV.hasExternalLinkage() && AllocSize == 0;
45 }
46
isLDSVariableToLower(const GlobalVariable & GV)47 bool isLDSVariableToLower(const GlobalVariable &GV) {
48 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
49 return false;
50 }
51 if (isDynamicLDS(GV)) {
52 return true;
53 }
54 if (GV.isConstant()) {
55 // A constant undef variable can't be written to, and any load is
56 // undef, so it should be eliminated by the optimizer. It could be
57 // dropped by the back end if not. This pass skips over it.
58 return false;
59 }
60 if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
61 // Initializers are unimplemented for LDS address space.
62 // Leave such variables in place for consistent error reporting.
63 return false;
64 }
65 return true;
66 }
67
isReallyAClobber(const Value * Ptr,MemoryDef * Def,AAResults * AA)68 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
69 Instruction *DefInst = Def->getMemoryInst();
70
71 if (isa<FenceInst>(DefInst))
72 return false;
73
74 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
75 switch (II->getIntrinsicID()) {
76 case Intrinsic::amdgcn_s_barrier:
77 case Intrinsic::amdgcn_s_barrier_signal:
78 case Intrinsic::amdgcn_s_barrier_signal_var:
79 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
80 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
81 case Intrinsic::amdgcn_s_barrier_init:
82 case Intrinsic::amdgcn_s_barrier_join:
83 case Intrinsic::amdgcn_s_barrier_wait:
84 case Intrinsic::amdgcn_s_barrier_leave:
85 case Intrinsic::amdgcn_s_get_barrier_state:
86 case Intrinsic::amdgcn_s_wakeup_barrier:
87 case Intrinsic::amdgcn_wave_barrier:
88 case Intrinsic::amdgcn_sched_barrier:
89 case Intrinsic::amdgcn_sched_group_barrier:
90 return false;
91 default:
92 break;
93 }
94 }
95
96 // Ignore atomics not aliasing with the original load, any atomic is a
97 // universal MemoryDef from MSSA's point of view too, just like a fence.
98 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
99 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
100 };
101
102 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
103 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
104 return false;
105
106 return true;
107 }
108
isClobberedInFunction(const LoadInst * Load,MemorySSA * MSSA,AAResults * AA)109 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
110 AAResults *AA) {
111 MemorySSAWalker *Walker = MSSA->getWalker();
112 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
113 SmallSet<MemoryAccess *, 8> Visited;
114 MemoryLocation Loc(MemoryLocation::get(Load));
115
116 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
117
118 // Start with a nearest dominating clobbering access, it will be either
119 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
120 // MemoryPhi if several MemoryDefs can define this memory state. In that
121 // case add all Defs to WorkList and continue going up and checking all
122 // the definitions of this memory location until the root. When all the
123 // defs are exhausted and came to the entry state we have no clobber.
124 // Along the scan ignore barriers and fences which are considered clobbers
125 // by the MemorySSA, but not really writing anything into the memory.
126 while (!WorkList.empty()) {
127 MemoryAccess *MA = WorkList.pop_back_val();
128 if (!Visited.insert(MA).second)
129 continue;
130
131 if (MSSA->isLiveOnEntryDef(MA))
132 continue;
133
134 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
135 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
136
137 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
138 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
139 return true;
140 }
141
142 WorkList.push_back(
143 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
144 continue;
145 }
146
147 const MemoryPhi *Phi = cast<MemoryPhi>(MA);
148 for (const auto &Use : Phi->incoming_values())
149 WorkList.push_back(cast<MemoryAccess>(&Use));
150 }
151
152 LLVM_DEBUG(dbgs() << " -> no clobber\n");
153 return false;
154 }
155
156 } // end namespace AMDGPU
157
158 } // end namespace llvm
159