1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2018-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/ADT/PostOrderIterator.h>
11 #include <llvm/Analysis/LoopInfo.h>
12 #include <llvm/Analysis/ScalarEvolution.h>
13 #include <llvm/Analysis/ScalarEvolutionExpressions.h>
14 #include <llvm/IR/CFG.h>
15 #include <llvm/IR/PatternMatch.h>
16 #include <llvm/Pass.h>
17 #include <llvm/Support/Debug.h>
18 #include <llvm/Support/raw_ostream.h>
19 #include <llvm/Transforms/Utils/Local.h>
20 #include "llvmWrapper/Transforms/Utils/LoopUtils.h"
21 #include "common/LLVMWarningsPop.hpp"
22 #include "GenISAIntrinsics/GenIntrinsics.h"
23 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
24 #include "Compiler/IGCPassSupport.h"
25 #include "Compiler/MetaDataUtilsWrapper.h"
26 #include "Compiler/CISACodeGen/AdvMemOpt.h"
27 #include "Compiler/CISACodeGen/WIAnalysis.hpp"
28 #include "Probe/Assertion.h"
29 
30 using namespace llvm;
31 using namespace llvm::PatternMatch;
32 using namespace IGC;
33 using namespace IGC::IGCMD;
34 
35 namespace {
36 
37     class AdvMemOpt : public FunctionPass {
38         DominatorTree* DT;
39         LoopInfo* LI;
40         PostDominatorTree* PDT;
41         ScalarEvolution* SE;
42         WIAnalysis* WI;
43 
44     public:
45         static char ID;
46 
AdvMemOpt()47         AdvMemOpt() : FunctionPass(ID) {
48             initializeAdvMemOptPass(*PassRegistry::getPassRegistry());
49         }
50 
51         bool runOnFunction(Function& F) override;
52 
getPassName() const53         StringRef getPassName() const override { return "Advanced MemOpt"; }
54 
55     private:
getAnalysisUsage(AnalysisUsage & AU) const56         void getAnalysisUsage(AnalysisUsage& AU) const override {
57             AU.setPreservesCFG();
58             AU.addPreservedID(WIAnalysis::ID);
59             AU.addRequired<CodeGenContextWrapper>();
60             AU.addRequired<MetaDataUtilsWrapper>();
61             AU.addRequired<WIAnalysis>();
62             AU.addRequired<DominatorTreeWrapperPass>();
63             AU.addRequired<LoopInfoWrapperPass>();
64             AU.addRequired<PostDominatorTreeWrapperPass>();
65             AU.addRequired<ScalarEvolutionWrapperPass>();
66         }
67 
68         bool collectOperandInst(SmallPtrSetImpl<Instruction*>&,
69             Instruction*, BasicBlock*) const;
70         bool collectTrivialUser(SmallPtrSetImpl<Instruction*>&,
71             Instruction*) const;
72         bool hoistUniformLoad(LoadInst*, BasicBlock*) const;
73         bool hoistUniformLoad(ArrayRef<BasicBlock*>) const;
74 
75         bool isLeadCandidate(BasicBlock*) const;
76 
77         bool hasMemoryWrite(BasicBlock* BB) const;
78         bool hasMemoryWrite(BasicBlock* Entry, BasicBlock* Exit) const;
79     };
80 
81     char AdvMemOpt::ID = 0;
82 
83 } // End anonymous namespace
84 
createAdvMemOptPass()85 FunctionPass* IGC::createAdvMemOptPass() {
86     return new AdvMemOpt();
87 }
88 
89 #define PASS_FLAG     "igc-advmemopt"
90 #define PASS_DESC     "Advanced Memory Optimization"
91 #define PASS_CFG_ONLY false
92 #define PASS_ANALYSIS false
93 namespace IGC {
94     IGC_INITIALIZE_PASS_BEGIN(AdvMemOpt, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
95         IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
96         IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
97         IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
98         IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
99         IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
100         IGC_INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
101         IGC_INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
102     IGC_INITIALIZE_PASS_END(AdvMemOpt, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
103 } // End namespace IGC
104 
runOnFunction(Function & F)105 bool AdvMemOpt::runOnFunction(Function& F) {
106     // Skip non-kernel function.
107     MetaDataUtils* MDU = nullptr;
108     MDU = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
109     auto FII = MDU->findFunctionsInfoItem(&F);
110     if (FII == MDU->end_FunctionsInfo())
111         return false;
112 
113     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
114     PDT = &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
115     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
116     SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
117     WI = &getAnalysis<WIAnalysis>();
118 
119     SmallVector<Loop*, 8> InnermostLoops;
120     for (auto I = LI->begin(), E = LI->end(); I != E; ++I)
121         for (auto DFI = df_begin(*I), DFE = df_end(*I); DFI != DFE; ++DFI) {
122             Loop* L = *DFI;
123             if (IGCLLVM::isInnermost(L))
124                 InnermostLoops.push_back(L);
125         }
126 
127     for (Loop* L : InnermostLoops) {
128         SmallVector<BasicBlock*, 8> Line;
129         BasicBlock* BB = L->getHeader();
130         while (BB) {
131             Line.push_back(BB);
132             BasicBlock* CurrBB = BB;
133             BB = nullptr;
134             for (auto BI = succ_begin(CurrBB),
135                 BE = succ_end(CurrBB); BI != BE; ++BI) {
136                 BasicBlock* OtherBB = *BI;
137                 if (CurrBB == OtherBB || !L->contains(OtherBB))
138                     continue;
139                 if (DT->dominates(CurrBB, OtherBB) && PDT->dominates(OtherBB, CurrBB)) {
140                     BB = OtherBB;
141                     break;
142                 }
143             }
144         }
145         hoistUniformLoad(Line);
146     }
147 
148     return false;
149 }
150 
isLeadCandidate(BasicBlock * BB) const151 bool AdvMemOpt::isLeadCandidate(BasicBlock* BB) const {
152     // A candidate lead should have at least one uniform loads. In addition,
153     // there's no instruction might to write memory from the last uniform loads
154     // to the end.
155     for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
156         if (II->mayWriteToMemory())
157             return false;
158         LoadInst* LD = dyn_cast<LoadInst>(&*II);
159         if (!LD || !WI->isUniform(LD))
160             continue;
161         // Found uniform loads.
162         return true;
163     }
164     return false;
165 }
166 
167 namespace {
168     class RegionSubgraph {
169         BasicBlock* Exit;
170         SmallPtrSet<BasicBlock*, 32> Visited;
171 
172     public:
RegionSubgraph(BasicBlock * E)173         RegionSubgraph(BasicBlock* E) : Exit(E) {}
174 
preVisit(Optional<BasicBlock * > From,BasicBlock * To)175         bool preVisit(Optional<BasicBlock*> From, BasicBlock* To) {
176             if (To == Exit)
177                 return false;
178             return Visited.insert(To).second;
179         }
180     };
181 } // End anonymous namespace
182 
183 namespace llvm {
184     template<>
185     class po_iterator_storage<RegionSubgraph, true> {
186         RegionSubgraph& RSG;
187 
188     public:
po_iterator_storage(RegionSubgraph & G)189         po_iterator_storage(RegionSubgraph& G) : RSG(G) {}
190 
insertEdge(Optional<BasicBlock * > From,BasicBlock * To)191         bool insertEdge(Optional<BasicBlock*> From, BasicBlock* To) {
192             return RSG.preVisit(From, To);
193         }
finishPostorder(BasicBlock *)194         void finishPostorder(BasicBlock*) {}
195     };
196 } // End llvm namespace
197 
hasMemoryWrite(BasicBlock * BB) const198 bool AdvMemOpt::hasMemoryWrite(BasicBlock* BB) const {
199     for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
200         if (II->mayWriteToMemory())
201             return true;
202     return false;
203 }
204 
hasMemoryWrite(BasicBlock * Entry,BasicBlock * Exit) const205 bool AdvMemOpt::hasMemoryWrite(BasicBlock* Entry, BasicBlock* Exit) const {
206     // Entry and Exit must be on line of code.
207     IGC_ASSERT(nullptr != DT);
208     IGC_ASSERT(DT->dominates(Entry, Exit));
209     IGC_ASSERT(nullptr != PDT);
210     IGC_ASSERT(PDT->dominates(Exit, Entry));
211 
212     RegionSubgraph RSG(Exit);
213     for (auto SI = po_ext_begin(Entry, RSG),
214         SE = po_ext_end(Entry, RSG); SI != SE; ++SI)
215         if (*SI != Entry && hasMemoryWrite(*SI))
216             return true;
217     return false;
218 }
219 
collectOperandInst(SmallPtrSetImpl<Instruction * > & Set,Instruction * Inst,BasicBlock * LeadingBlock) const220 bool AdvMemOpt::collectOperandInst(SmallPtrSetImpl<Instruction*>& Set,
221     Instruction* Inst, BasicBlock* LeadingBlock) const {
222     for (Value* V : Inst->operands()) {
223         Instruction* I = dyn_cast<Instruction>(V);
224         if (!I)
225             continue;
226         else if (I->getParent() != Inst->getParent())
227         {
228             // moving load instruction can be done only if operands
229             // comes from the same basic block or a dominator of
230             // the destination basic block. The condition is required
231             // to counteract using uninitialized or wrong filled registers
232             if (DT->dominates(I->getParent(), LeadingBlock))
233                 continue;
234             else
235                 return true;
236         }
237         if (isa<PHINode>(I) || collectOperandInst(Set, I, LeadingBlock))
238             return true;
239     }
240     Set.insert(Inst);
241     return false;
242 }
243 
collectTrivialUser(SmallPtrSetImpl<Instruction * > & Set,Instruction * Inst) const244 bool AdvMemOpt::collectTrivialUser(SmallPtrSetImpl<Instruction*>& Set,
245     Instruction* Inst) const {
246     for (auto* U : Inst->users()) {
247         Instruction* I = dyn_cast<Instruction>(U);
248         if (!I || I->getParent() != Inst->getParent())
249             continue;
250         if (!isa<BitCastInst>(I) && !isa<ExtractElementInst>(I))
251             continue;
252         if (collectTrivialUser(Set, I))
253             return true;
254     }
255     Set.insert(Inst);
256     return false;
257 }
258 
hoistUniformLoad(LoadInst * LD,BasicBlock * BB) const259 bool AdvMemOpt::hoistUniformLoad(LoadInst* LD, BasicBlock* BB) const {
260     SmallPtrSet<Instruction*, 32> ToHoist;
261     if (collectOperandInst(ToHoist, LD, BB))
262         return false;
263     if (collectTrivialUser(ToHoist, LD))
264         return false;
265     BasicBlock* FromBB = LD->getParent();
266     Instruction* Pos = BB->getTerminator();
267     for (auto II = FromBB->getFirstNonPHI()->getIterator(),
268         IE = FromBB->end(); II != IE; /*EMPTY*/) {
269         Instruction* I = &*II++;
270         if (ToHoist.count(I)) {
271             I->moveBefore(Pos);
272             ToHoist.erase(I);
273             if (ToHoist.empty())
274                 break;
275         }
276     }
277     return true;
278 }
279 
hoistUniformLoad(ArrayRef<BasicBlock * > Line) const280 bool AdvMemOpt::hoistUniformLoad(ArrayRef<BasicBlock*> Line) const {
281     bool Changed = false;
282     // Find the lead BB where to hoist uniform load.
283     auto BI = Line.begin();
284     auto BE = Line.end();
285     while (BI != BE) {
286         if (!isLeadCandidate(*BI)) {
287             ++BI;
288             continue;
289         }
290         // Found lead.
291         BasicBlock* Lead = *BI++;
292         BasicBlock* Prev = Lead;
293         for (; BI != BE; ++BI) {
294             BasicBlock* Curr = *BI;
295             // Check whether it's safe to hoist uniform loads from Curr to Lead by
296             // checking all blocks between Prev and Curr.
297             if (hasMemoryWrite(Prev, Curr))
298                 break;
299             // Hoist uniform loads from Curr into Lead.
300             for (auto II = Curr->getFirstNonPHI()->getIterator(),
301                 IE = Curr->end(); II != IE; /*EMPTY*/) {
302                 if (II->mayWriteToMemory())
303                     break;
304                 LoadInst* LD = dyn_cast<LoadInst>(&*II++);
305                 if (!LD || !WI->isUniform(LD))
306                     continue;
307                 if (!hoistUniformLoad(LD, Lead))
308                     break; // Bail out if any uniform load could not be hoisted safely.
309                   // Reset iterator
310                 II = Curr->getFirstNonPHI()->getIterator();
311                 Changed = true;
312             }
313             // After hoisting uniform loads safely, if Curr has memory write, stop
314             // hoisting further.
315             if (hasMemoryWrite(Curr))
316                 break;
317         }
318     }
319     return Changed;
320 }
321