1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2018-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/ADT/PostOrderIterator.h>
11 #include <llvm/Analysis/LoopInfo.h>
12 #include <llvm/Analysis/ScalarEvolution.h>
13 #include <llvm/Analysis/ScalarEvolutionExpressions.h>
14 #include <llvm/IR/CFG.h>
15 #include <llvm/IR/PatternMatch.h>
16 #include <llvm/Pass.h>
17 #include <llvm/Support/Debug.h>
18 #include <llvm/Support/raw_ostream.h>
19 #include <llvm/Transforms/Utils/Local.h>
20 #include "llvmWrapper/Transforms/Utils/LoopUtils.h"
21 #include "common/LLVMWarningsPop.hpp"
22 #include "GenISAIntrinsics/GenIntrinsics.h"
23 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
24 #include "Compiler/IGCPassSupport.h"
25 #include "Compiler/MetaDataUtilsWrapper.h"
26 #include "Compiler/CISACodeGen/AdvMemOpt.h"
27 #include "Compiler/CISACodeGen/WIAnalysis.hpp"
28 #include "Probe/Assertion.h"
29
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 using namespace IGC;
33 using namespace IGC::IGCMD;
34
35 namespace {
36
37 class AdvMemOpt : public FunctionPass {
38 DominatorTree* DT;
39 LoopInfo* LI;
40 PostDominatorTree* PDT;
41 ScalarEvolution* SE;
42 WIAnalysis* WI;
43
44 public:
45 static char ID;
46
AdvMemOpt()47 AdvMemOpt() : FunctionPass(ID) {
48 initializeAdvMemOptPass(*PassRegistry::getPassRegistry());
49 }
50
51 bool runOnFunction(Function& F) override;
52
getPassName() const53 StringRef getPassName() const override { return "Advanced MemOpt"; }
54
55 private:
getAnalysisUsage(AnalysisUsage & AU) const56 void getAnalysisUsage(AnalysisUsage& AU) const override {
57 AU.setPreservesCFG();
58 AU.addPreservedID(WIAnalysis::ID);
59 AU.addRequired<CodeGenContextWrapper>();
60 AU.addRequired<MetaDataUtilsWrapper>();
61 AU.addRequired<WIAnalysis>();
62 AU.addRequired<DominatorTreeWrapperPass>();
63 AU.addRequired<LoopInfoWrapperPass>();
64 AU.addRequired<PostDominatorTreeWrapperPass>();
65 AU.addRequired<ScalarEvolutionWrapperPass>();
66 }
67
68 bool collectOperandInst(SmallPtrSetImpl<Instruction*>&,
69 Instruction*, BasicBlock*) const;
70 bool collectTrivialUser(SmallPtrSetImpl<Instruction*>&,
71 Instruction*) const;
72 bool hoistUniformLoad(LoadInst*, BasicBlock*) const;
73 bool hoistUniformLoad(ArrayRef<BasicBlock*>) const;
74
75 bool isLeadCandidate(BasicBlock*) const;
76
77 bool hasMemoryWrite(BasicBlock* BB) const;
78 bool hasMemoryWrite(BasicBlock* Entry, BasicBlock* Exit) const;
79 };
80
81 char AdvMemOpt::ID = 0;
82
83 } // End anonymous namespace
84
createAdvMemOptPass()85 FunctionPass* IGC::createAdvMemOptPass() {
86 return new AdvMemOpt();
87 }
88
89 #define PASS_FLAG "igc-advmemopt"
90 #define PASS_DESC "Advanced Memory Optimization"
91 #define PASS_CFG_ONLY false
92 #define PASS_ANALYSIS false
93 namespace IGC {
94 IGC_INITIALIZE_PASS_BEGIN(AdvMemOpt, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
95 IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
96 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
97 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
98 IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
99 IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
100 IGC_INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
101 IGC_INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
102 IGC_INITIALIZE_PASS_END(AdvMemOpt, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
103 } // End namespace IGC
104
runOnFunction(Function & F)105 bool AdvMemOpt::runOnFunction(Function& F) {
106 // Skip non-kernel function.
107 MetaDataUtils* MDU = nullptr;
108 MDU = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
109 auto FII = MDU->findFunctionsInfoItem(&F);
110 if (FII == MDU->end_FunctionsInfo())
111 return false;
112
113 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
114 PDT = &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
115 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
116 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
117 WI = &getAnalysis<WIAnalysis>();
118
119 SmallVector<Loop*, 8> InnermostLoops;
120 for (auto I = LI->begin(), E = LI->end(); I != E; ++I)
121 for (auto DFI = df_begin(*I), DFE = df_end(*I); DFI != DFE; ++DFI) {
122 Loop* L = *DFI;
123 if (IGCLLVM::isInnermost(L))
124 InnermostLoops.push_back(L);
125 }
126
127 for (Loop* L : InnermostLoops) {
128 SmallVector<BasicBlock*, 8> Line;
129 BasicBlock* BB = L->getHeader();
130 while (BB) {
131 Line.push_back(BB);
132 BasicBlock* CurrBB = BB;
133 BB = nullptr;
134 for (auto BI = succ_begin(CurrBB),
135 BE = succ_end(CurrBB); BI != BE; ++BI) {
136 BasicBlock* OtherBB = *BI;
137 if (CurrBB == OtherBB || !L->contains(OtherBB))
138 continue;
139 if (DT->dominates(CurrBB, OtherBB) && PDT->dominates(OtherBB, CurrBB)) {
140 BB = OtherBB;
141 break;
142 }
143 }
144 }
145 hoistUniformLoad(Line);
146 }
147
148 return false;
149 }
150
isLeadCandidate(BasicBlock * BB) const151 bool AdvMemOpt::isLeadCandidate(BasicBlock* BB) const {
152 // A candidate lead should have at least one uniform loads. In addition,
153 // there's no instruction might to write memory from the last uniform loads
154 // to the end.
155 for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
156 if (II->mayWriteToMemory())
157 return false;
158 LoadInst* LD = dyn_cast<LoadInst>(&*II);
159 if (!LD || !WI->isUniform(LD))
160 continue;
161 // Found uniform loads.
162 return true;
163 }
164 return false;
165 }
166
167 namespace {
168 class RegionSubgraph {
169 BasicBlock* Exit;
170 SmallPtrSet<BasicBlock*, 32> Visited;
171
172 public:
RegionSubgraph(BasicBlock * E)173 RegionSubgraph(BasicBlock* E) : Exit(E) {}
174
preVisit(Optional<BasicBlock * > From,BasicBlock * To)175 bool preVisit(Optional<BasicBlock*> From, BasicBlock* To) {
176 if (To == Exit)
177 return false;
178 return Visited.insert(To).second;
179 }
180 };
181 } // End anonymous namespace
182
183 namespace llvm {
184 template<>
185 class po_iterator_storage<RegionSubgraph, true> {
186 RegionSubgraph& RSG;
187
188 public:
po_iterator_storage(RegionSubgraph & G)189 po_iterator_storage(RegionSubgraph& G) : RSG(G) {}
190
insertEdge(Optional<BasicBlock * > From,BasicBlock * To)191 bool insertEdge(Optional<BasicBlock*> From, BasicBlock* To) {
192 return RSG.preVisit(From, To);
193 }
finishPostorder(BasicBlock *)194 void finishPostorder(BasicBlock*) {}
195 };
196 } // End llvm namespace
197
hasMemoryWrite(BasicBlock * BB) const198 bool AdvMemOpt::hasMemoryWrite(BasicBlock* BB) const {
199 for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
200 if (II->mayWriteToMemory())
201 return true;
202 return false;
203 }
204
hasMemoryWrite(BasicBlock * Entry,BasicBlock * Exit) const205 bool AdvMemOpt::hasMemoryWrite(BasicBlock* Entry, BasicBlock* Exit) const {
206 // Entry and Exit must be on line of code.
207 IGC_ASSERT(nullptr != DT);
208 IGC_ASSERT(DT->dominates(Entry, Exit));
209 IGC_ASSERT(nullptr != PDT);
210 IGC_ASSERT(PDT->dominates(Exit, Entry));
211
212 RegionSubgraph RSG(Exit);
213 for (auto SI = po_ext_begin(Entry, RSG),
214 SE = po_ext_end(Entry, RSG); SI != SE; ++SI)
215 if (*SI != Entry && hasMemoryWrite(*SI))
216 return true;
217 return false;
218 }
219
collectOperandInst(SmallPtrSetImpl<Instruction * > & Set,Instruction * Inst,BasicBlock * LeadingBlock) const220 bool AdvMemOpt::collectOperandInst(SmallPtrSetImpl<Instruction*>& Set,
221 Instruction* Inst, BasicBlock* LeadingBlock) const {
222 for (Value* V : Inst->operands()) {
223 Instruction* I = dyn_cast<Instruction>(V);
224 if (!I)
225 continue;
226 else if (I->getParent() != Inst->getParent())
227 {
228 // moving load instruction can be done only if operands
229 // comes from the same basic block or a dominator of
230 // the destination basic block. The condition is required
231 // to counteract using uninitialized or wrong filled registers
232 if (DT->dominates(I->getParent(), LeadingBlock))
233 continue;
234 else
235 return true;
236 }
237 if (isa<PHINode>(I) || collectOperandInst(Set, I, LeadingBlock))
238 return true;
239 }
240 Set.insert(Inst);
241 return false;
242 }
243
collectTrivialUser(SmallPtrSetImpl<Instruction * > & Set,Instruction * Inst) const244 bool AdvMemOpt::collectTrivialUser(SmallPtrSetImpl<Instruction*>& Set,
245 Instruction* Inst) const {
246 for (auto* U : Inst->users()) {
247 Instruction* I = dyn_cast<Instruction>(U);
248 if (!I || I->getParent() != Inst->getParent())
249 continue;
250 if (!isa<BitCastInst>(I) && !isa<ExtractElementInst>(I))
251 continue;
252 if (collectTrivialUser(Set, I))
253 return true;
254 }
255 Set.insert(Inst);
256 return false;
257 }
258
hoistUniformLoad(LoadInst * LD,BasicBlock * BB) const259 bool AdvMemOpt::hoistUniformLoad(LoadInst* LD, BasicBlock* BB) const {
260 SmallPtrSet<Instruction*, 32> ToHoist;
261 if (collectOperandInst(ToHoist, LD, BB))
262 return false;
263 if (collectTrivialUser(ToHoist, LD))
264 return false;
265 BasicBlock* FromBB = LD->getParent();
266 Instruction* Pos = BB->getTerminator();
267 for (auto II = FromBB->getFirstNonPHI()->getIterator(),
268 IE = FromBB->end(); II != IE; /*EMPTY*/) {
269 Instruction* I = &*II++;
270 if (ToHoist.count(I)) {
271 I->moveBefore(Pos);
272 ToHoist.erase(I);
273 if (ToHoist.empty())
274 break;
275 }
276 }
277 return true;
278 }
279
hoistUniformLoad(ArrayRef<BasicBlock * > Line) const280 bool AdvMemOpt::hoistUniformLoad(ArrayRef<BasicBlock*> Line) const {
281 bool Changed = false;
282 // Find the lead BB where to hoist uniform load.
283 auto BI = Line.begin();
284 auto BE = Line.end();
285 while (BI != BE) {
286 if (!isLeadCandidate(*BI)) {
287 ++BI;
288 continue;
289 }
290 // Found lead.
291 BasicBlock* Lead = *BI++;
292 BasicBlock* Prev = Lead;
293 for (; BI != BE; ++BI) {
294 BasicBlock* Curr = *BI;
295 // Check whether it's safe to hoist uniform loads from Curr to Lead by
296 // checking all blocks between Prev and Curr.
297 if (hasMemoryWrite(Prev, Curr))
298 break;
299 // Hoist uniform loads from Curr into Lead.
300 for (auto II = Curr->getFirstNonPHI()->getIterator(),
301 IE = Curr->end(); II != IE; /*EMPTY*/) {
302 if (II->mayWriteToMemory())
303 break;
304 LoadInst* LD = dyn_cast<LoadInst>(&*II++);
305 if (!LD || !WI->isUniform(LD))
306 continue;
307 if (!hoistUniformLoad(LD, Lead))
308 break; // Bail out if any uniform load could not be hoisted safely.
309 // Reset iterator
310 II = Curr->getFirstNonPHI()->getIterator();
311 Changed = true;
312 }
313 // After hoisting uniform loads safely, if Curr has memory write, stop
314 // hoisting further.
315 if (hasMemoryWrite(Curr))
316 break;
317 }
318 }
319 return Changed;
320 }
321