1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 #include "Compiler/CISACodeGen/WIAnalysis.hpp"
11 #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
12 #include "Compiler/CISACodeGen/DeSSA.hpp"
13 #include "Compiler/CISACodeGen/CoalescingEngine.hpp"
14 #include "Compiler/CISACodeGen/BlockCoalescing.hpp"
15 #include "common/LLVMWarningsPush.hpp"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/IR/Dominators.h"
18 #include "llvm/ADT/TinyPtrVector.h"
19 #include <llvm/IR/Function.h>
20 #include <llvm/IR/Instructions.h>
21 #include <llvm/IR/InstIterator.h>
22 #include <llvm/IR/InstVisitor.h>
23 #include "llvm/Pass.h"
24 #include "llvmWrapper/IR/DerivedTypes.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "common/LLVMWarningsPop.hpp"
27 #include "Compiler/CISACodeGen/RegisterEstimator.hpp"
28 #include <list>
29 #include <map>
30 #include <algorithm>
31 #include "Probe/Assertion.h"
32 
33 namespace IGC {
34 
35     struct SSubVecDesc
36     {
37         // Denote a subvector of BaseVector starting at StartElementOffset.
38         // StartElementOffset is in the unit of BaseVector's element type.
39         //
40         // This can potentially denote subvector and basevector relationship
41         // among vector values of different element sizes. For now, subvector
42         // and basevector have the same element size (could be differnt types,
43         // such as int32_t and float, etc). Here is the example showing the
44         // relationship among them:
45         //   Given the aliasing relation:
46         //     Aliaser[0:n] -->  BaseVector[0:m]
47         //   where (StartElementOffset + n) <= m. Then,
48         //     Aliaser = BaseVector[StartElementOffset, StartElementOffset+n]
49 
50         // Aliaser
51         //   It is a dessa node value; either scalar or subvector
52         llvm::Value* Aliaser;
53 
54         // Aliasee:
55         llvm::Value* BaseVector;
56 
57         // Valid only if this entry is for BaseVector, ie, Aliaser == BaseVector
58         llvm::SmallVector<SSubVecDesc*, 16> Aliasers;
59 
60         // In the unit of BaseVector's element size
61         short  StartElementOffset;  // in the unit of BaseVector's element type
62         short  NumElts;             // the number of element of Aliaser
63 
SSubVecDescIGC::SSubVecDesc64         SSubVecDesc(llvm::Value* V)
65             : Aliaser(V), BaseVector(V), StartElementOffset(0)
66         {
67             IGCLLVM::FixedVectorType* VTy = llvm::dyn_cast<IGCLLVM::FixedVectorType>(V->getType());
68             NumElts = VTy ? (short)VTy->getNumElements() : 1;
69         }
70 
71         // Temporary : tobedeleted
SSubVecDescIGC::SSubVecDesc72         SSubVecDesc()
73             : Aliaser(nullptr), BaseVector(nullptr),
74             StartElementOffset(0), NumElts(0)
75         {}
76     };
77 
78     //  Represent a Vector's element at index = EltIx.
79     struct SVecElement {
80         llvm::Value* Vec;
81         llvm::Value* Elem;
82         int          EltIx;
83 
SVecElementIGC::SVecElement84         SVecElement() : Vec(nullptr), Elem(nullptr), EltIx(-1) {}
85     };
86 
87     // A temporary struct for capturing vector/sub-vector relation.
88     // For example:
89     //   extElt:
90     //     s0 = extElt From, 4
91     //     s1 = extElt From, 5
92     //     ...
93     //
94     //   cast:
95     //     s0 = castinst s0
96     //     s1 = castinst s1
97     //     ...
98     //
99     //   insELt:
100     //     V0 = insElt Undef,  s0, 0
101     //     V1 = insElt V0,     s1, 1
102     //     ......
103     //     Vn = insElt Vn-1,   sn, n
104     //
105     // where s0-s1 are typically from extElt, but not necessary.
106     // And they can from different vectors. Sometimes, castInst
107     // are present between ins and ext. Here, two cases are
108     // considered:
109     //
110     //   case 1: Insert to (x0 and x1 are inserted to y)
111     //     case 1.1
112     //        int4 x0, x1;
113     //        int8 y = (x0, x1)
114     //
115     //     case 1.2
116     //        int4 y = (s0, s1, s2, s3)
117     //
118     //   case 2: Extract from (y is extracted from x)
119     //       int8 x
120     //       int4 y = x.s0123 (use half of x)
121     //
122     //
123     // A vector is used to keep this info. Each element of the
124     // vector corresponds to a single IEI. So, for vector size
125     // of N, there are N elements. The vector is defined as the
126     // following inside the class:
127     //    SmallVector<SVecInsExtInfo, 16> VecInsEltInfoTy
128     //
129     struct SVecInsEltInfo {
130         llvm::InsertElementInst* IEI;
131         llvm::Value* Elt;
132 
133         // If Elt is null, EEI must not be null, which
134         // indicates that (FromVec, FromVec_eltIx) is
135         // used as scalar operands in this IEI.
136         llvm::ExtractElementInst* EEI;
137         llvm::Value* FromVec;
138         int          FromVec_eltIx;
139 
SVecInsEltInfoIGC::SVecInsEltInfo140         SVecInsEltInfo()
141             : IEI(nullptr), Elt(nullptr),
142             EEI(nullptr), FromVec(nullptr), FromVec_eltIx(0)
143         {}
144     };
145 
146     /// RPE based analysis for querying variable reuse status.
147     ///
148     /// Let two instructions DInst and UInst be defined in the same basic block,
149     ///
150     /// DInst = ...
151     /// UInst = DInst op Other
152     ///
153     /// and assume it is legal to use the same CVariable for DInst and UInst. This
154     /// analysis determines if this reuse will be applied or not. When overall
155     /// register pressure is low, this decision could be most aggressive. When DInst
156     /// and UInst are acrossing a high pressure region (defined below), then the
157     /// reuse will only be applied less aggressively.
158     ///
159     /// Denote by RPE(x) the estimated register pressure at point x. Let Threshold
160     /// be a predefined threshold constant. We say pair (DInst, UInst) is crossing a
161     /// high register pressure region if
162     ///
163     /// (1) RPE(x) >= Threshold for any x between DInst and UInst (inclusive), or
164     /// (2) RPE(x) >= Threshold for any use x of UInst.
165     ///
166     class VariableReuseAnalysis : public llvm::FunctionPass,
167         public llvm::InstVisitor<VariableReuseAnalysis>
168     {
169     public:
170         static char ID;
171 
172         VariableReuseAnalysis();
~VariableReuseAnalysis()173         ~VariableReuseAnalysis() {}
174 
175         typedef llvm::SmallVector<SVecInsEltInfo, 32> VecInsEltInfoTy;
176         typedef std::map<llvm::Value*, SSubVecDesc*> AliasMapTy;  // ordered map
177         typedef llvm::SmallVector<llvm::Value*, 32> ValueVectorTy;
178         typedef llvm::DenseMap<llvm::Value*, llvm::Value*> Val2ValMapTy;
179 
180         // following to be deleted
181         typedef llvm::DenseMap<llvm::Value*, SSubVecDesc> ValueAliasMapTy;
182         typedef llvm::DenseMap<llvm::Value*, llvm::TinyPtrVector<llvm::Value*> > AliasRootMapTy;
183         typedef llvm::SmallVector<SVecElement, 32> VecEltTy;
184 
185         virtual bool runOnFunction(llvm::Function& F) override;
186 
187         // Need to perform this after WI/LiveVars/DeSSA/CoalescingEnging.
188         // (todo: check if coalescing can be merged into dessa completely)
getAnalysisUsage(llvm::AnalysisUsage & AU) const189         virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override {
190             // AU.addRequired<RegisterEstimator>();
191             AU.setPreservesAll();
192             AU.addRequired<llvm::DominatorTreeWrapperPass>();
193             AU.addRequired<WIAnalysis>();
194             AU.addRequired<LiveVarsAnalysis>();
195             AU.addRequired<CodeGenPatternMatch>();
196             AU.addRequired<DeSSA>();
197             AU.addRequired<CoalescingEngine>();
198             AU.addRequired<BlockCoalescing>();
199             AU.addRequired<CodeGenContextWrapper>();
200         }
201 
getPassName() const202         llvm::StringRef getPassName() const override {
203             return "VariableReuseAnalysis";
204         }
205 
206         /// Initialize per-function states. In particular, check if the entire function
207         /// has a low pressure.
BeginFunction(llvm::Function * F,unsigned SimdSize)208         void BeginFunction(llvm::Function* F, unsigned SimdSize) {
209             m_SimdSize = (uint16_t)SimdSize;
210             if (m_RPE) {
211                 if (m_RPE->isGRFPressureLow(m_SimdSize))
212                     m_IsFunctionPressureLow = Status::True;
213                 else
214                     m_IsFunctionPressureLow = Status::False;
215             }
216         }
217 
isCurFunctionPressureLow() const218         bool isCurFunctionPressureLow() const {
219             return m_IsFunctionPressureLow == Status::True;
220         }
221 
isCurBlockPressureLow() const222         bool isCurBlockPressureLow() const {
223             return m_IsBlockPressureLow == Status::True;
224         }
225 
226         /// RAII class to initialize and cleanup basic block level cache.
227         class EnterBlockRAII {
228         public:
EnterBlockRAII(VariableReuseAnalysis * VRA,llvm::BasicBlock * BB)229             explicit EnterBlockRAII(VariableReuseAnalysis* VRA, llvm::BasicBlock* BB)
230                 : VRA(VRA) {
231                 VRA->BeginBlock(BB);
232             }
~EnterBlockRAII()233             ~EnterBlockRAII() { VRA->EndBlock(); }
234             VariableReuseAnalysis* VRA;
235         };
236         friend class EnterBlockRAII;
237 
238         // Check use instruction's legality and its pressure impact.
239         bool checkUseInst(llvm::Instruction* UInst, LiveVars* LV);
240 
241         // Check def instruction's legality and its pressure impact.
242         bool checkDefInst(llvm::Instruction* DInst, llvm::Instruction* UInst,
243             LiveVars* LV);
244 
245         // Visitor
246         void visitExtractElementInst(llvm::ExtractElementInst& I);
247 
isAliasedValue(llvm::Value * V)248         bool isAliasedValue(llvm::Value* V) {
249             if (m_pCtx->getVectorCoalescingControl() > 0) {
250                 return isAliased(V);
251             }
252             return false;
253         }
254 
255         // getRootValue():
256         //   return dessa root value; if dessa root value
257         //   is null, return itself.
258         llvm::Value* getRootValue(llvm::Value* V);
259         // getAliasRootValue()
260         //   return alias root value if it exists, itself otherwise.
261         llvm::Value* getAliasRootValue(llvm::Value* V);  // to be deleted
262 
263         /// printAlias - print value aliasing info in human readable form
264         void printAlias(llvm::raw_ostream& OS, const llvm::Function* F = nullptr) const;
265         /// dumpAalias - dump alias info to dbgs().
266         void dumpAlias() const;
267 
268         //
269         // m_aliasMap:
270         //      For mapping aliaser to aliasee:
271         //         aliaser -> aliasee
272         // where aliasee is a vector value and aliaser could be a scalar or
273         // a vector values.
274         //
275         // Properties of the map:
276         //   1. No chain aliases
277         //       No following:
278         //           Vec0 -> Vec1
279         //           v0 -> Vec0
280         //       Instead, they are represented as follows:
281         //           Vec0 -> Vec1
282         //           v0   -> Vec1
283         //   2. Aliasee is an aliaser to itself (for convenience)
284         //           Vec0 -> Vec0
285         //       When this entry is seen, we know Vec0 is an aliasee,
286         //       also called a alias root value.
287         //   3. Liveness of aliaser and aliasee are not combined
288         //      Unlike dessa alias, in which aliser's liveness is merged
289         //      into aliasee's. Here, aliaser's liveness is nerver merged
290         //      into aliasee's.
291         //
292         //  Note:
293         //     notation:
294         //         aliasCC(v) : all values that share the same alias root as v.
295         //         dessaCC(v) : all values in the same dessa congruent class as v.
296         //         subAlias(v, startIx, nelts) :
297         //                      all v in aliasCC(v) whose elements overlap
298         //                      v's baseVector[startIx : startIx+nelts-1].
299         //  For example,  V is of int4, s0, s1, s2, s3 are scalars that are aliased
300         //  to V's element at 0, 1, 2, and 2, respectively.
301         //                         s0 --> v[0]
302         //                         s1 --> v[1]
303         //                         s2 --> v[2]
304         //                         s3 --> v[3]
305         //   aliasCC(s0) = aliasCC(s1) = aliasCC(s2) = aliasCC(s3) = aliasCC(v)
306         //               = {v, s0, s1, s2, s3, s4}
307         //   subAlias(v, 2, 2) = {s2, s3, v}  // only s2&s3 overlaps V[2:3]
308         //   dessaCC(s0) = { values in the same dessa CC }
309         //
310         //   [todo] add Algo here.
311         //
312         AliasMapTy  m_aliasMap;
313 
314         // Function argument cannot be made a sub-part of another bigger
315         // value as it has been assigned a fixed physical GRF. The following
316         // map is used for checking if a value is an arg or coalesced with
317         // an argument by dessa.
318         std::list<llvm::Value*> m_ArgDeSSARoot;
isOrCoalescedWithArg(llvm::Value * V)319         bool isOrCoalescedWithArg(llvm::Value* V)
320         {
321             if (llvm::isa<llvm::Argument>(V))
322                 return true;
323             if (m_DeSSA) {
324                 if (llvm::Value * R = m_DeSSA->getRootValue(V)) {
325                     auto IE = m_ArgDeSSARoot.end();
326                     auto it = std::find(m_ArgDeSSARoot.begin(), IE, R);
327                     return it != IE;
328                 }
329             }
330             return false;
331         }
332 
333         // Add an entry V->itself if not existing yet.
334         void addVecAlias(llvm::Value* Aliaser, llvm::Value* Aliasee, int Idx);
335         SSubVecDesc* getOrCreateSubVecDesc(llvm::Value* V);
336         void getAllAliasVals(
337             ValueVectorTy& AliasVals,
338             llvm::Value* Aliaser,
339             llvm::Value* VecAliasee,
340             int    Idx);
341 
342         // No need to emit code for instructions in this map due to aliasing
343         llvm::DenseMap <llvm::Instruction*, int> m_HasBecomeNoopInsts;
344 
345         // For emitting livetime start to visa to assist liveness analysis
346         //   1. m_LifetimeAt1stDefInBB :  aliasee -> BB
347         //        Once a first def is encounted, add lifetime start and clear
348         //        this map entry afterwards.
349         //   2. m_LifetimeAtEndOfBB :  BB -> set of values
350         //        Add lifetime start for all values in the set at the end of BB.
351         llvm::DenseMap<llvm::Value*, llvm::BasicBlock*> m_LifetimeAt1stDefOfBB;
352         llvm::DenseMap<llvm::BasicBlock*, llvm::TinyPtrVector<llvm::Value*> > m_LifetimeAtEndOfBB;
353 
354     private:
reset()355         void reset() {
356             m_SimdSize = 0;
357             m_IsFunctionPressureLow = Status::Undef;
358             m_IsBlockPressureLow = Status::Undef;
359             m_aliasMap.clear();
360             m_root2AliasMap.clear();
361             m_HasBecomeNoopInsts.clear();
362             m_LifetimeAt1stDefOfBB.clear();
363             m_LifetimeAtEndOfBB.clear();
364 
365         }
366 
367         // Initialize per-block states. In particular, check if the entire block has a
368         // low pressure.
BeginBlock(llvm::BasicBlock * BB)369         void BeginBlock(llvm::BasicBlock* BB) {
370             IGC_ASSERT(m_SimdSize != 0);
371             if (m_RPE) {
372                 CodeGenContext* context = nullptr;
373                 context = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
374                 uint32_t BBPresure = m_RPE->getMaxLiveGRFAtBB(BB, m_SimdSize);
375                 if (BBPresure <= context->getNumGRFPerThread())
376                     m_IsBlockPressureLow = Status::True;
377                 else
378                     m_IsBlockPressureLow = Status::False;
379             }
380         }
381 
382         // Cleanup per-block states.
EndBlock()383         void EndBlock() { m_IsBlockPressureLow = Status::Undef; }
384 
385         void visitLiveInstructions(llvm::Function* F);
386 
387         void setLifeTimeStartPos(
388             llvm::Value* RootVal,
389             ValueVectorTy& AllVals,
390             BlockCoalescing* theBC);
391         void postProcessing();
392 
393         // Return true if this instruction can be converted to an alias
394         bool canBeAlias(llvm::CastInst* I);
395 
396         // If V has been payload-coalesced, return true.
hasBeenPayloadCoalesced(llvm::Value * V)397         bool hasBeenPayloadCoalesced(llvm::Value* V) {
398             return (m_coalescingEngine->GetValueCCTupleMapping(V) != nullptr);
399         }
400 
401         void mergeVariables(llvm::Function* F);
402         void InsertElementAliasing(llvm::Function* F);
403 
404         llvm::Value* traceAliasValue(llvm::Value* V);
405         bool getElementValue(
406             llvm::InsertElementInst* IEI, int& IEI_ix,
407             llvm::Value*& S,
408             llvm::Value*& V, int& V_ix);
409         bool getAllInsEltsIfAvailable(
410             llvm::InsertElementInst* FirstIEI,
411             VecInsEltInfoTy& AllIEIs);
412 
413         bool processExtractFrom(VecInsEltInfoTy& AllIEIs);
414         bool processInsertTo(VecInsEltInfoTy& AllIEIs);
415 
416         // Check if sub can be aliased to Base[Base_ix:size(sub)-1]
417         bool aliasInterfere(llvm::Value* Sub, llvm::Value* Base, int Base_ix);
418 
419         // DCC: DeSSA congruent class
420         // If any of V's DCC is an aliaser, return true.
421         bool hasAnyOfDCCAsAliaser(llvm::Value* V) const;
422         bool hasAnotherInDCCAsAliasee(llvm::Value* V) const;
423         bool isAliased(llvm::Value* V) const;
424 
425         // Returns true for the following pattern:
426         //   a = extractElement <vectorType> EEI_Vec, <constant EEI_ix>
427         //   b = insertElement  <vectorType> V1,  E,  <constant IEI_ix>
428         // where EEI_ix and IEI_ix are constants; Return false otherwise.
429         bool getVectorIndicesIfConstant(
430             llvm::InsertElementInst* IEI,
431             int& IEI_ix,
432             llvm::Value*& EEI_Vec,
433             int& EEI_ix);
434 
435 
436         CodeGenContext* m_pCtx;
437         WIAnalysis* m_WIA;
438         LiveVars* m_LV;
439         DeSSA* m_DeSSA;
440         CodeGenPatternMatch* m_PatternMatch;
441         CoalescingEngine* m_coalescingEngine;
442         llvm::DominatorTree* m_DT;
443         const llvm::DataLayout* m_DL;
444 
445         llvm::BumpPtrAllocator Allocator;
446 
447         /// Current Function; set on entry to runOnFunction
448         /// and unset on exit to runOnFunction
449         llvm::Function* m_F;
450 
451         // The register pressure estimator (optional).
452         RegisterEstimator* m_RPE;
453 
454         // Results may be cached at kernel level or basic block level. Use the
455         // following enum to indicate cached flag status.
456         enum class Status : int8_t {
457             Undef = -1,
458             False = 0,
459             True = 1
460         };
461 
462         // Per SIMD-compilation constant. Each compilation needs to initialize the
463         // SIMD mode.
464         uint16_t m_SimdSize;
465 
466         // When this function has low register pressure, reuse can be applied
467         // aggressively without checking each individual def-use pair.
468         Status m_IsFunctionPressureLow;
469 
470         // When this block has low register pressure, reuse can be applied
471         // aggressively without checking each individual def-use pair.
472         Status m_IsBlockPressureLow;
473 
474         // Temporaries under VATemp
475         // if a value V is in a dessa CC and V is aliased,
476         // add <V's root, V> into the map.  This is a quick
477         // way to check if any of values in a dessa CC has
478         // been aliased (either aliaser or aliasee)
479         Val2ValMapTy m_root2AliasMap;
480     };
481 
482     llvm::FunctionPass* createVariableReuseAnalysisPass();
483 
484 } // namespace IGC
485