1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef __GRAPHCOLOR_H__
10 #define __GRAPHCOLOR_H__
11 
12 #include "BitSet.h"
13 #include "G4_IR.hpp"
14 #include "RegAlloc.h"
15 #include "RPE.h"
16 #include "SpillManagerGMRF.h"
17 #include "VarSplit.h"
18 
19 #include <list>
20 #include <limits>
21 #include <memory>
22 #include <map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #define BITS_DWORD 32
27 #define ROUND(x,y)    ((x) + ((y - x % y) % y))
28 
29 namespace vISA
30 {
31     const float MAXSPILLCOST = (std::numeric_limits<float>::max());
32     const float MINSPILLCOST = -(std::numeric_limits<float>::max());
33 
34     class BankConflictPass
35     {
36     private:
37         GlobalRA& gra;
38         bool forGlobal;
39 
40         BankConflict setupBankAccordingToSiblingOperand(BankConflict assignedBank, unsigned offset, bool oneGRFBank);
41         void setupEvenOddBankConflictsForDecls(G4_Declare * dcl_1, G4_Declare * dcl_2, unsigned offset1, unsigned offset2,
42             BankConflict &srcBC1, BankConflict &srcBC2);
43         void setupBankConflictsOneGRFOld(G4_INST* inst, int &bank1RegNum, int &bank2RegNum, float GRFRatio, unsigned &internalConflict);
44         bool isOddOffset(unsigned offset) const;
45         void setupBankConflictsforDPAS(G4_INST* inst);
46         bool hasDpasInst = false;
47 
48         void setupBankConflictsforTwoGRFs(G4_INST* inst);
49         void setupBankConflictsforMad(G4_INST* inst);
50         void setupBankConflictsForBB(G4_BB* bb, unsigned &threeSourceInstNum, unsigned &sendInstNum,
51             unsigned numRegLRA, unsigned & internalConflict);
52         void setupBankConflictsForBBTGL(G4_BB* bb, unsigned& threeSourceInstNum, unsigned& sendInstNum, unsigned numRegLRA, unsigned& internalConflict);
53         bool hasInternalConflict3Srcs(BankConflict *srcBC);
54         void setupBankForSrc0(G4_INST* inst, G4_INST* prevInst);
55         void getBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned *offset);
56         void getPrevBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned *offset);
57 
58     public:
59         bool setupBankConflictsForKernel(bool doLocalRR, bool &threeSourceCandidate, unsigned numRegLRA, bool &highInternalConflict);
60 
BankConflictPass(GlobalRA & g,bool global)61         BankConflictPass(GlobalRA& g, bool global) : gra(g), forGlobal(global)
62         {
63 
64         }
65     };
66 
67 class LiveRange final
68 {
69     G4_RegVar* const var;
70     G4_Declare* const dcl;
71     const G4_RegFileKind regKind;
72     bool* forbidden = nullptr;
73     int numForbidden = -1;
74     bool spilled = false;
75 
76     GlobalRA& gra;
77     unsigned numRegNeeded;
78     unsigned degree = 0;
79     unsigned refCount = 0;
80     unsigned parentLRID;
81     AssignedReg reg;
82     float spillCost;
83     BankConflict bc = BankConflict::BANK_CONFLICT_NONE;
84     const static unsigned UndefHint = 0xffffffff;
85     unsigned allocHint = UndefHint;
86 
87     union {
88         uint16_t bunch = 0;
89         struct {
90             uint16_t calleeSaveBias : 1; // indicates if the var is biased to get a callee-save assignment or not
91             uint16_t callerSaveBias : 1; // indicates if the var is biased to get a caller-save assignment or not
92             uint16_t isEOTSrc : 1; //Gen7 only, Whether the liveRange is the message source of an EOT send
93             uint16_t retIp : 1;   // variable is the return ip and should not be spilled
94 
95             uint16_t active : 1;
96             uint16_t isInfiniteCost : 1;
97             uint16_t isCandidate : 1;
98             uint16_t isPseudoNode : 1;
99             uint16_t isPartialDeclare : 1;
100             uint16_t isSplittedDeclare : 1;
101         };
102     };
103 
104 public:
105 
106     LiveRange(G4_RegVar* v, GlobalRA&);
107 
new(size_t sz,vISA::Mem_Manager & m)108     void* operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
109 
setDegree(unsigned d)110     void setDegree(unsigned d) {degree = d;}
getDegree()111     unsigned getDegree() const {return degree;}
112 
getNumRegNeeded()113     unsigned getNumRegNeeded() const {return numRegNeeded;}
114 
subtractDegree(unsigned d)115     void subtractDegree(unsigned d)
116     {
117         MUST_BE_TRUE(d <= degree, ERROR_INTERNAL_ARGUMENT);
118         degree -= d;
119     }
120 
setActive(bool v)121     void setActive(bool v) {active = v;}
getActive()122     bool getActive() const {return active;}
123 
124     void emit(std::ostream& output, bool symbolreg = false) const
125     {
126         output << getVar ()->getDeclare ()->getName();
127         if (reg.phyReg != NULL)
128         {
129             output << "(";
130             reg.phyReg->emit(output);
131             output << '.' << reg.subRegOff << ':';
132             output << TypeSymbol(getVar()->getDeclare()->getElemType()) << ")";
133         }
134         output << "(size = " << getDcl()->getByteSize() <<
135             ", spill cost = " << getSpillCost() << ", degree = " << getDegree() << ")";
136     }
137 
getRefCount()138     unsigned getRefCount() const {return refCount;}
setRefCount(unsigned count)139     void setRefCount(unsigned count) {refCount = count;}
140 
getSpillCost()141     float getSpillCost() const {return spillCost;}
setSpillCost(float cost)142     void setSpillCost(float cost) {spillCost = cost;}
143 
getIsInfiniteSpillCost()144     bool getIsInfiniteSpillCost() const { return isInfiniteCost; }
145     void checkForInfiniteSpillCost(G4_BB* bb, std::list<G4_INST*>::reverse_iterator& it);
146 
getPhyReg()147     G4_VarBase* getPhyReg() const { return reg.phyReg; }
148 
getPhyRegOff()149     unsigned getPhyRegOff() const { return reg.subRegOff; }
150 
setPhyReg(G4_VarBase * pr,unsigned off)151     void setPhyReg(G4_VarBase* pr, unsigned off)
152     {
153         MUST_BE_TRUE(pr->isPhyReg(), ERROR_UNKNOWN);
154         reg.phyReg = pr;
155         reg.subRegOff = off;
156     }
157 
resetPhyReg()158     void resetPhyReg()
159     {
160         reg.phyReg = nullptr;
161         reg.subRegOff = 0;
162     }
163 
getIsPseudoNode()164     bool getIsPseudoNode() const { return isPseudoNode; }
setIsPseudoNode()165     void setIsPseudoNode() { isPseudoNode = true; }
getIsPartialDcl()166     bool getIsPartialDcl() const { return isPartialDeclare; }
setIsPartialDcl()167     void setIsPartialDcl() { isPartialDeclare = true; }
getIsSplittedDcl()168     bool getIsSplittedDcl() const { return isSplittedDeclare; }
setIsSplittedDcl(bool v)169     void setIsSplittedDcl(bool v) { isSplittedDeclare = v; }
getBC()170     BankConflict getBC() const { return bc;  }
setBC(BankConflict c)171     void setBC(BankConflict c) { bc = c; }
setParentLRID(int id)172     void setParentLRID(int id) { parentLRID = id; }
getParentLRID()173     unsigned getParentLRID() const { return parentLRID; }
174 
getAllocHint()175     unsigned getAllocHint() const { return allocHint; }
hasAllocHint()176     bool hasAllocHint() const { return allocHint != UndefHint; }
177     void setAllocHint(unsigned h);
resetAllocHint()178     void resetAllocHint() { allocHint = UndefHint; }
179 
180     // From VarBasis
181     public:
182         void allocForbidden(vISA::Mem_Manager& mem, bool reserveStackCallRegs, unsigned reserveSpillSize, unsigned rerservedRegNum);
183         void allocForbiddenCallerSave(vISA::Mem_Manager& mem, G4_Kernel* kernel);
184         void allocForbiddenCalleeSave(vISA::Mem_Manager& mem, G4_Kernel* kernel);
getForbidden()185         const bool* getForbidden() const { return forbidden; }
markForbidden(int reg,int numReg)186         void markForbidden(int reg, int numReg)
187         {
188             MUST_BE_TRUE(((int)getForbiddenVectorSize()) >= reg + numReg, "forbidden register is out of bound");
189             for (int i = reg; i < reg + numReg; ++i)
190             {
191                 forbidden[i] = true;
192             }
193             numForbidden = -1;
194         }
getNumForbidden()195         int getNumForbidden()
196         {
197             if (forbidden == nullptr)
198             {
199                 return 0;
200             }
201             if (numForbidden == -1)
202             {
203                 numForbidden = 0;
204                 for (int i = 0, size = getForbiddenVectorSize(); i < size; ++i)
205                 {
206                     if (forbidden[i])
207                     {
208                         ++numForbidden;
209                     }
210                 }
211             }
212             return numForbidden;
213         }
getVar()214         G4_RegVar* getVar() const { return var; }
getDcl()215         G4_Declare* getDcl() const { return dcl; }
getRegKind()216         G4_RegFileKind getRegKind() const { return regKind; }
217         void dump() const;
218 
setCalleeSaveBias(bool v)219         void setCalleeSaveBias(bool v) { calleeSaveBias = v; }
getCalleeSaveBias()220         bool getCalleeSaveBias() const { return calleeSaveBias; }
221 
setCallerSaveBias(bool v)222         void setCallerSaveBias(bool v) { callerSaveBias = v; }
getCallerSaveBias()223         bool getCallerSaveBias() const { return callerSaveBias; }
224 
setEOTSrc()225         void setEOTSrc() { isEOTSrc = true; }
getEOTSrc()226         bool getEOTSrc() const { return isEOTSrc; }
227 
setRetIp()228         void setRetIp() { retIp = true; }
isRetIp()229         bool isRetIp() const { return retIp; }
230 
isSpilled()231         bool isSpilled() const { return spilled; }
setSpilled(bool v)232         void setSpilled(bool v) { spilled = v; }
233 
234 private:
235     //const Options *m_options;
236     unsigned getForbiddenVectorSize() const;
237     void allocForbiddenVector(vISA::Mem_Manager& mem);
238 }; // class LiveRange
239 } // vISA::
240 typedef std::list<vISA::LiveRange*> LIVERANGE_LIST;
241 typedef std::list<vISA::LiveRange*>::iterator LIVERANGE_LIST_ITER;
242 
243 // A mapping from the pseudo decl created for caller save/restore, to the ret val
244 // This is used in augmentIntfGraph to prune interference edges for fcall ret val
245 typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*> FCALL_RET_MAP;
246 typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*>::iterator FCALL_RET_MAP_ITER;
247 
248 typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned>> CALL_DECL_MAP;
249 typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned>>::iterator CALL_DECL_MAP_ITER;
250 
251 //
252 // A bit array records all interference information.
253 // (2D matrix is flatten to 1D array)
254 // Since the interference information is symmetric, we can use only
255 // half of the size. To simplify the implementation, we use the full
256 // size of the bit array.
257 //
258 namespace vISA
259 {
260     class Augmentation
261     {
262     private:
263         // pair of default mask, non-default mask
264         using MaskDeclares = std::pair<BitSet, BitSet>;
265         G4_Kernel& kernel;
266         Interference& intf;
267         GlobalRA& gra;
268         const LivenessAnalysis& liveAnalysis;
269         LiveRange* const * const lrs;
270         FCALL_RET_MAP& fcallRetMap;
271         CALL_DECL_MAP callDclMap;
272         std::unordered_map<FuncInfo*, PhyRegSummary> localSummaryOfCallee;
273         std::vector<G4_Declare*> sortedIntervals;
274         std::list<G4_Declare*> defaultMask;
275         std::list<G4_Declare*> nonDefaultMask;
276         std::unordered_map<FuncInfo*, MaskDeclares> callsiteDeclares;
277         std::unordered_map <G4_Declare*, MaskDeclares> retDeclares;
278         Mem_Manager& m;
279 
280         bool updateDstMaskForGather(G4_INST* inst, std::vector<unsigned char>& mask);
281         bool updateDstMaskForGatherRaw(G4_INST* inst, std::vector<unsigned char>& mask, const G4_SendDescRaw* raw);
282         bool updateDstMaskForGatherLdSt(G4_INST* inst, std::vector<unsigned char>& mask, const G4_SendDescLdSt *ldst);
283         void updateDstMask(G4_INST* inst, bool checkCmodOnly);
284         static unsigned getByteSizeFromMask(AugmentationMasks type);
285         bool isDefaultMaskDcl(G4_Declare* dcl, unsigned simdSize, AugmentationMasks type);
286         bool isDefaultMaskSubDeclare(unsigned char* mask, unsigned lb, unsigned rb, G4_Declare* dcl, unsigned simdSize);
287         bool verifyMaskIfInit(G4_Declare* dcl, AugmentationMasks mask);
288         bool checkGRFPattern3(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
289             unsigned lb, unsigned rb, unsigned execSize);
290         bool checkGRFPattern2(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
291             unsigned lb, unsigned rb, unsigned execSize);
292         bool checkGRFPattern1(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
293             unsigned lb, unsigned rb, unsigned execSize);
294         void markNonDefaultDstRgn(G4_INST* inst, G4_Operand* opnd);
295         bool markNonDefaultMaskDef();
296         G4_BB* getTopmostBBDst(G4_BB* src, G4_BB* end, G4_BB* origSrc, unsigned traversal);
297         void updateStartIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
298         void updateEndIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
299         void updateStartInterval(const G4_Declare* dcl, G4_INST* curInst);
300         void updateEndInterval(const G4_Declare* dcl, G4_INST* curInst);
301         void updateStartIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
302         void updateEndIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
303         void buildLiveIntervals();
304         void clearIntervalInfo();
305         void sortLiveIntervals();
306         unsigned getEnd(const G4_Declare* dcl) const;
307         bool isNoMask(const G4_Declare* dcl, unsigned size) const;
308         bool isConsecutiveBits(const G4_Declare* dcl, unsigned size) const;
309         bool isCompatible(const G4_Declare* testDcl, const G4_Declare* biggerDcl) const;
310         void buildInterferenceIncompatibleMask();
311         void buildInteferenceForCallSiteOrRetDeclare(G4_Declare* newDcl, MaskDeclares* mask);
312         void buildInteferenceForCallsite(FuncInfo* func);
313         void buildInteferenceForRetDeclares();
314         void buildSummaryForCallees();
315         void expireIntervals(unsigned startIdx);
316         void buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall);
317         void buildSIMDIntfAll(G4_Declare* newDcl);
318         void buildSIMDIntfAllOld(G4_Declare* newDcl);
319         void updateActiveList(G4_Declare* newDcl, std::list<G4_Declare*>* dclMaskList);
320         void handleSIMDIntf(G4_Declare* firstDcl, G4_Declare* secondDcl, bool isCall);
321         bool weakEdgeNeeded(AugmentationMasks, AugmentationMasks);
322 
323         void addSIMDIntfDclForCallSite(MaskDeclares* maskDeclares);
324         void addSIMDIntfForRetDclares(G4_Declare* newDcl);
325 
326     public:
327         Augmentation(G4_Kernel& k, Interference& i, const LivenessAnalysis& l, LiveRange* const ranges[], GlobalRA& g);
328 
329         void augmentIntfGraph();
330     };
331 
332     class Interference
333     {
334         friend class Augmentation;
335 
336         // This stores compatible ranges for each variable. Such
337         // compatible ranges will not be present in sparseIntf set.
338         // We store G4_Declare* instead of id is because variables
339         // allocated by LRA will not have a valid id.
340         std::map<G4_Declare*, std::vector<G4_Declare*>> compatibleSparseIntf;
341 
342         GlobalRA& gra;
343         G4_Kernel& kernel;
344         LiveRange** const & lrs;
345         IR_Builder& builder;
346         const unsigned maxId;
347         const unsigned rowSize;
348         const unsigned splitStartId;
349         const unsigned splitNum;
350         unsigned* matrix = nullptr;
351         const LivenessAnalysis* const liveAnalysis;
352 
353         std::vector<std::vector<unsigned>> sparseIntf;
354 
355         // sparse interference matrix.
356         // we don't directly update sparseIntf to ensure uniqueness
357         // like dense matrix, interference is not symmetric (that is, if v1 and v2 interfere and v1 < v2,
358         // we insert (v1, v2) but not (v2, v1)) for better cache behavior
359         std::vector<std::unordered_set<uint32_t> > sparseMatrix;
360         static const uint32_t denseMatrixLimit = 0x80000;
361 
updateLiveness(BitSet & live,uint32_t id,bool val)362         static void updateLiveness(BitSet& live, uint32_t id, bool val)
363         {
364             live.set(id, val);
365         }
366 
367         G4_Declare* getGRFDclForHRA(int GRFNum) const;
368 
useDenseMatrix()369         bool useDenseMatrix() const
370         {
371             return maxId < denseMatrixLimit;
372         }
373 
374         // Only upper-half matrix is now used in intf graph.
safeSetInterference(unsigned v1,unsigned v2)375         inline void safeSetInterference(unsigned v1, unsigned v2)
376         {
377             // Assume v1 < v2
378             if (useDenseMatrix())
379             {
380                 unsigned col = v2 / BITS_DWORD;
381                 matrix[v1 * rowSize + col] |= 1 << (v2 % BITS_DWORD);
382             }
383             else
384             {
385                 sparseMatrix[v1].emplace(v2);
386             }
387         }
388 
setBlockInterferencesOneWay(unsigned v1,unsigned col,unsigned block)389         inline void setBlockInterferencesOneWay(unsigned v1, unsigned col, unsigned block)
390         {
391             if (useDenseMatrix())
392             {
393 #ifdef _DEBUG
394                 MUST_BE_TRUE(sparseIntf.size() == 0, "Updating intf graph matrix after populating sparse intf graph");
395 #endif
396 
397                 matrix[v1 * rowSize + col] |= block;
398             }
399             else
400             {
401                 auto&& intfSet = sparseMatrix[v1];
402                 for (int i = 0; i < BITS_DWORD; ++i)
403                 {
404                     if (block & (1 << i))
405                     {
406                         uint32_t v2 = col * BITS_DWORD + i;
407                         intfSet.emplace(v2);
408                     }
409                 }
410             }
411         }
412 
getInterferenceBlk(unsigned idx)413         unsigned getInterferenceBlk(unsigned idx) const
414         {
415             assert(useDenseMatrix() && "matrix is not initialized");
416             return matrix[idx];
417         }
418 
419         void addCalleeSaveBias(const BitSet& live);
420 
421         void buildInterferenceAtBBExit(const G4_BB* bb, BitSet& live);
422         void buildInterferenceWithinBB(G4_BB* bb, BitSet& live);
423         void buildInterferenceForDst(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, G4_DstRegRegion* dst);
424         void buildInterferenceForFcall(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, const G4_VarBase* regVar);
425 
426         inline void filterSplitDclares(unsigned startIdx, unsigned endIdx, unsigned n, unsigned col, unsigned &elt, bool is_split);
427 
428         void buildInterferenceWithLive(const BitSet& live, unsigned i);
429         void buildInterferenceWithSubDcl(unsigned lr_id, G4_Operand *opnd, BitSet& live, bool setLive, bool setIntf);
430         void buildInterferenceWithAllSubDcl(unsigned v1, unsigned v2);
431 
432         void markInterferenceForSend(G4_BB* bb, G4_INST* inst, G4_DstRegRegion* dst);
433 
434         void buildInterferenceWithLocalRA(G4_BB* bb);
435 
436         void buildInterferenceAmongLiveOuts();
437         void buildInterferenceAmongLiveIns();
438 
439         void markInterferenceToAvoidDstSrcOverlap(G4_BB* bb, G4_INST* inst);
440 
441         void generateSparseIntfGraph();
442 
443     public:
444         Interference(const LivenessAnalysis* l, LiveRange** const & lr, unsigned n, unsigned ns, unsigned nm,
445             GlobalRA& g);
446 
~Interference()447         ~Interference()
448         {
449             if (useDenseMatrix())
450             {
451                 delete[] matrix;
452             }
453         }
454 
getCompatibleSparseIntf(G4_Declare * d)455         const std::vector<G4_Declare*>* getCompatibleSparseIntf(G4_Declare* d) const
456         {
457             if (compatibleSparseIntf.size() > 0)
458             {
459                 auto it = compatibleSparseIntf.find(d);
460                 if (it == compatibleSparseIntf.end())
461                 {
462                     return nullptr;
463                 }
464                 return &it->second;
465             }
466             return nullptr;
467         }
468 
init(vISA::Mem_Manager & m)469         void init(vISA::Mem_Manager& m)
470         {
471             if (useDenseMatrix())
472             {
473                 auto N = (size_t)rowSize * (size_t)maxId;
474                 matrix = new uint32_t[N](); // zero-initialize
475             }
476             else
477             {
478                 sparseMatrix.resize(maxId);
479             }
480         }
481 
482         void computeInterference();
483         void applyPartitionBias();
484         bool interfereBetween(unsigned v1, unsigned v2) const;
getSparseIntfForVar(unsigned id)485         const std::vector<unsigned>& getSparseIntfForVar(unsigned id) const { return sparseIntf[id]; }
486 
487         inline bool varSplitCheckBeforeIntf(unsigned v1, unsigned v2) const;
488 
checkAndSetIntf(unsigned v1,unsigned v2)489         void checkAndSetIntf(unsigned v1, unsigned v2)
490         {
491             if (v1 < v2)
492             {
493                 safeSetInterference(v1, v2);
494             }
495             else if (v1 > v2)
496             {
497                 safeSetInterference(v2, v1);
498             }
499         }
500 
501         void dumpInterference() const;
502         void dumpVarInterference() const;
503         bool dumpIntf(const char*) const;
504         void interferenceVerificationForSplit() const;
505 
506         bool linearScanVerify() const;
507 
508         bool isStrongEdgeBetween(const G4_Declare*, const G4_Declare*) const;
509     };
510 
511     // Class to compute reg chart dump and dump it to ostream.
512     // Used only when -dumpregchart is passed.
513     class RegChartDump
514     {
515         const GlobalRA& gra;
516         std::vector<G4_Declare*> sortedLiveIntervals;
517         std::unordered_map<G4_Declare*, std::pair<G4_INST*, G4_INST*>> startEnd;
518     public:
519         void recordLiveIntervals(const std::vector<G4_Declare*>& dcls);
520         void dumpRegChart(std::ostream&, LiveRange** lrs = nullptr, unsigned numLRs = 0);
521 
RegChartDump(const GlobalRA & g)522         RegChartDump(const GlobalRA& g) : gra(g) {}
523     };
524 
525     class GraphColor
526     {
527         GlobalRA& gra;
528 
529         unsigned totalGRFRegCount; // .reg_count_total
530         const unsigned numVar;
531         const unsigned numSplitStartID;
532         const unsigned numSplitVar;
533         unsigned *spAddrRegSig;
534         Interference intf;
535         PhyRegPool& regPool;
536         IR_Builder& builder;
537         LiveRange** lrs = nullptr;
538         bool isHybrid;
539         LIVERANGE_LIST spilledLRs;
540         bool forceSpill;
541         vISA::Mem_Manager mem;
542         const Options *m_options;
543 
544         unsigned evenTotalDegree = 1;
545         unsigned oddTotalDegree = 1;
546         unsigned evenTotalRegNum = 1;
547         unsigned oddTotalRegNum = 1;
548         unsigned evenMaxRegNum = 1;
549         unsigned oddMaxRegNum = 1;
550 
551         G4_Kernel& kernel;
552         LivenessAnalysis& liveAnalysis;
553 
554         std::vector<LiveRange*> colorOrder;
555         LIVERANGE_LIST unconstrainedWorklist;
556         LIVERANGE_LIST constrainedWorklist;
557         unsigned numColor = 0;
558 
559         unsigned edgeWeightGRF(const LiveRange* lr1, const LiveRange* lr2);
560         unsigned edgeWeightARF(const LiveRange* lr1, const LiveRange* lr2);
561 
562         void computeDegreeForGRF();
563         void computeDegreeForARF();
564         void computeSpillCosts(bool useSplitLLRHeuristic);
565         void determineColorOrdering();
566         void removeConstrained();
567         void relaxNeighborDegreeGRF(LiveRange* lr);
568         void relaxNeighborDegreeARF(LiveRange* lr);
569         bool assignColors(ColorHeuristic heuristicGRF, bool doBankConflict, bool highInternalConflict, bool honorHints = true);
570 
clearSpillAddrLocSignature()571         void clearSpillAddrLocSignature()
572         {
573             memset(spAddrRegSig, 0, getNumAddrRegisters() * sizeof(unsigned));
574         }
575         void pruneActiveSpillAddrLocs(G4_DstRegRegion*, unsigned, G4_Type);
576         void updateActiveSpillAddrLocs(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize);
577         bool redundantAddrFill(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize);
578 
579     public:
580         GraphColor(LivenessAnalysis& live, unsigned totalGRF, bool hybrid, bool forceSpill_);
581 
getOptions()582         const Options * getOptions() const { return m_options; }
583 
584         bool regAlloc(
585             bool doBankConflictReduction,
586             bool highInternalConflict,
587             bool reserveSpillReg, unsigned& spillRegSize, unsigned& indrSpillRegSize, const RPE* rpe);
requireSpillCode()588         bool requireSpillCode() const { return !spilledLRs.empty(); }
getIntf()589         const Interference * getIntf() const { return &intf; }
590         void createLiveRanges(unsigned reserveSpillSize = 0);
getLiveRanges()591         LiveRange ** getLiveRanges() const { return lrs; }
getSpilledLiveRanges()592         const LIVERANGE_LIST & getSpilledLiveRanges() const { return spilledLRs; }
593         void confirmRegisterAssignments();
594         void resetTemporaryRegisterAssignments();
595         void cleanupRedundantARFFillCode();
596         void getCalleeSaveRegisters();
597         void addA0SaveRestoreCode();
598         void addFlagSaveRestoreCode();
599         void getSaveRestoreRegister();
600         void getCallerSaveRegisters();
601         void dumpRegisterPressure();
getGRA()602         GlobalRA & getGRA() { return gra; }
603         G4_SrcRegRegion* getScratchSurface() const;
getLRs()604         LiveRange** getLRs() const { return lrs; }
getNumVars()605         unsigned int getNumVars() const { return numVar; }
606     };
607 
608     struct BundleConflict
609     {
610         const G4_Declare * const dcl;
611         const int offset;
BundleConflictBundleConflict612         BundleConflict(const G4_Declare *dcl, int offset) : dcl(dcl), offset(offset) { }
613     };
614 
615     struct RAVarInfo
616     {
617         unsigned numSplit = 0;
618         unsigned bb_id = UINT_MAX;      // block local variable's block id.
619         G4_Declare* splittedDCL = nullptr;
620         LocalLiveRange* localLR = nullptr;
621         LSLiveRange* LSLR = nullptr;
622         unsigned numRefs = 0;
623         BankConflict conflict = BANK_CONFLICT_NONE;      // used to indicate bank that should be assigned to dcl if possible
624         G4_INST* startInterval = nullptr;
625         G4_INST* endInterval = nullptr;
626         std::vector<unsigned char> mask;
627         std::vector<const G4_Declare*> subDclList;
628         unsigned subOff = 0;
629         std::vector<BundleConflict> bundleConflicts;
630         G4_SubReg_Align subAlign = G4_SubReg_Align::Any;
631         bool isEvenAlign = false;
632     };
633 
634     class VerifyAugmentation
635     {
636     private:
637         G4_Kernel* kernel = nullptr;
638         GlobalRA* gra = nullptr;
639         std::vector<G4_Declare*> sortedLiveRanges;
640         std::unordered_map<const G4_Declare*, std::tuple<LiveRange*, AugmentationMasks, G4_INST*, G4_INST*>> masks;
641         LiveRange* const * lrs = nullptr;
642         unsigned numVars = 0;
643         const Interference* intf = nullptr;
644         std::unordered_map<G4_Declare*, LiveRange*> DclLRMap;
645         std::unordered_map<G4_BB*, std::string> bbLabels;
646         std::vector<std::tuple<G4_BB*, unsigned, unsigned>> BBLexId;
647 
getStr(AugmentationMasks a)648         static const char* getStr(AugmentationMasks a)
649         {
650             if (a == AugmentationMasks::Default16Bit)
651                 return "Default16Bit";
652             else if (a == AugmentationMasks::Default32Bit)
653                 return "Default32Bit";
654             else if (a == AugmentationMasks::Default64Bit)
655                 return "Default64Bit";
656             else if (a == AugmentationMasks::NonDefault)
657                 return "NonDefault";
658             else if (a == AugmentationMasks::Undetermined)
659                 return "Undetermined";
660 
661             return "-----";
662         };
663         void labelBBs();
664         void populateBBLexId();
665         bool interfereBetween(G4_Declare*, G4_Declare*);
666         void verifyAlign(G4_Declare* dcl);
667 
668     public:
669         void verify();
reset()670         void reset()
671         {
672             sortedLiveRanges.clear();
673             masks.clear();
674             kernel = nullptr;
675             lrs = nullptr;
676             gra = nullptr;
677             numVars = 0;
678             intf = nullptr;
679             DclLRMap.clear();
680             bbLabels.clear();
681             BBLexId.clear();
682         }
683         void loadAugData(std::vector<G4_Declare*>& s, LiveRange* const * l, unsigned n, const Interference* i, GlobalRA& g);
684         void dump(const char* dclName);
685         bool isClobbered(LiveRange* lr, std::string& msg);
686     };
687 
688     class GlobalRA
689     {
690     private:
691         std::unordered_set<G4_INST*> EUFusionWAInsts;
692         bool m_EUFusionWANeeded;
693     public:
EUFusionWANeeded()694         bool EUFusionWANeeded() const { return m_EUFusionWANeeded; }
695         void addEUFusionWAInsts(G4_INST* inst);
removeEUFusionWAInst(G4_INST * inst)696         void removeEUFusionWAInst(G4_INST* inst) { EUFusionWAInsts.erase(inst); }
getEUFusionWAInsts()697         const std::unordered_set<G4_INST*>& getEUFusionWAInsts() { return EUFusionWAInsts; }
698     public:
699         std::unique_ptr<VerifyAugmentation> verifyAugmentation;
700         std::unique_ptr<RegChartDump> regChart;
701         std::unique_ptr<SpillAnalysis> spillAnalysis;
useGenericAugAlign()702         static bool useGenericAugAlign()
703         {
704             auto gen = getPlatformGeneration(getGenxPlatform());
705             if (gen == PlatformGen::GEN9 ||
706                 gen == PlatformGen::GEN8)
707                 return false;
708             return true;
709         }
710         static const char StackCallStr[];
711 
712     private:
713         template <class REGION_TYPE> static unsigned getRegionDisp(REGION_TYPE * region);
714         unsigned getRegionByteSize(G4_DstRegRegion * region, unsigned execSize);
owordAligned(unsigned offset)715         static bool owordAligned(unsigned offset) { return offset % 16 == 0; }
716         template <class REGION_TYPE> bool isUnalignedRegion(REGION_TYPE * region, unsigned execSize);
717         bool shouldPreloadDst(G4_INST* instContext, G4_BB* curBB);
718         bool livenessCandidate(const G4_Declare* decl) const;
719         void updateDefSet(std::set<G4_Declare*>& defs, G4_Declare* referencedDcl);
720         void detectUndefinedUses(LivenessAnalysis& liveAnalysis, G4_Kernel& kernel);
721         void markBlockLocalVar(G4_RegVar* var, unsigned bbId);
722         void markBlockLocalVars();
723         void computePhyReg();
724         void fixAlignment();
725         void expandSpillIntrinsic(G4_BB*);
726         void expandFillIntrinsic(G4_BB*);
727         void expandSpillFillIntrinsics(unsigned);
728         void saveRestoreA0(G4_BB*);
729 
730         static const RAVarInfo defaultValues;
731         std::vector<RAVarInfo> vars;
732         std::vector<AugmentationMasks> varMasks;
733         std::vector<G4_Declare *> UndeclaredVars;
734 
735         // fake declares for each GRF reg, used by HRA
736         // note only GRFs that are used by LRA get a declare
737         std::vector<G4_Declare*> GRFDclsForHRA;
738 
739         // Store all LocalLiveRange instances created so they're
740         // appropriately destroyed alongwith instance of GlobalRA.
741         // This needs to be a list because we'll take address of
742         // its elements and std::vector cannot be used due to its
743         // reallocation policy.
744         std::list<LocalLiveRange> localLiveRanges;
745 
746         std::unordered_map<const G4_BB*, unsigned> subretloc;
747         // map ret location to declare for call/ret
748         std::map<uint32_t, G4_Declare*> retDecls;
749 
750         // store instructions that shouldnt be rematerialized.
751         std::unordered_set<G4_INST*> dontRemat;
752 
allocVar(const G4_Declare * dcl)753         RAVarInfo &allocVar(const G4_Declare* dcl)
754         {
755             auto dclid = dcl->getDeclId();
756             if (dclid >= vars.size())
757                 vars.resize(dclid + 1);
758             return vars[dclid];
759         }
760 
getVar(const G4_Declare * dcl)761         const RAVarInfo &getVar(const G4_Declare* dcl) const
762         {
763             auto dclid = dcl->getDeclId();
764             return dclid >= vars.size() ? defaultValues : vars[dclid];
765         }
766 
767         // temp variable storing the FP dcl's old value
768         // created in addStoreRestoreForFP
769         G4_Declare* oldFPDcl = nullptr;
770 
771         // instruction to save/restore vISA FP, only present in functions
772         G4_INST* saveBE_FPInst = nullptr;
773         G4_INST* restoreBE_FPInst = nullptr;
774 
775         // instruction go update BE_FP, only present in functions
776         G4_INST* setupBE_FP = nullptr;
777 
778         // new temps for each reference of spilled address/flag decls
779         std::unordered_set<G4_Declare*> addrFlagSpillDcls;
780 
781         // store iteration number for GRA loop
782         unsigned iterNo = 0;
783 
784         uint32_t numGRFSpill = 0;
785         uint32_t numGRFFill = 0;
786 
787         bool spillFillIntrinUsesLSC(G4_INST* spillFillIntrin);
788         void expandFillLSC(G4_BB* bb, INST_LIST_ITER& instIt);
789         void expandSpillLSC(G4_BB* bb, INST_LIST_ITER& instIt);
790         void expandFillNonStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_DstRegRegion* resultRgn, G4_BB* bb, INST_LIST_ITER& instIt);
791         void expandSpillNonStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_SrcRegRegion* payload, G4_BB* bb, INST_LIST_ITER& instIt);
792         void expandFillStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_DstRegRegion* resultRgn, G4_BB* bb, INST_LIST_ITER& instIt);
793         void expandSpillStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* payload, G4_BB* bb, INST_LIST_ITER& instIt);
794 
795     public:
796         static unsigned sendBlockSizeCode(unsigned owordSize);
797 
798         // For current program, store caller/callee save/restore instructions
799         std::unordered_set<G4_INST*> calleeSaveInsts;
800         std::unordered_set<G4_INST*> calleeRestoreInsts;
801         std::unordered_map<G4_INST*, std::unordered_set<G4_INST*>> callerSaveInsts;
802         std::unordered_map<G4_INST*, std::unordered_set<G4_INST*>> callerRestoreInsts;
803         std::unordered_map<G4_BB*, std::vector<bool>> callerSaveRegsMap;
804         std::unordered_map<G4_BB*, unsigned> callerSaveRegCountMap;
805         std::unordered_map<G4_BB*, std::vector<bool>> retRegsMap;
806         std::vector<bool> calleeSaveRegs;
807         unsigned calleeSaveRegCount = 0;
808 
809         std::unordered_map<G4_Declare*, SplitResults> splitResults;
810 
811     public:
812         G4_Kernel& kernel;
813         IR_Builder& builder;
814         PhyRegPool& regPool;
815         PointsToAnalysis& pointsToAnalysis;
816         FCALL_RET_MAP fcallRetMap;
817 
818         bool useLscForSpillFill = false;
819         bool useLscForNonStackCallSpillFill = false;
820 
getVarSplitPass()821         VarSplitPass* getVarSplitPass() const { return kernel.getVarSplitPass(); }
822 
getSubRetLoc(const G4_BB * bb)823         unsigned getSubRetLoc(const G4_BB* bb)
824         {
825             auto it = subretloc.find(bb);
826             if (it == subretloc.end())
827                 return UNDEFINED_VAL;
828             return it->second;
829         }
830 
setSubRetLoc(const G4_BB * bb,unsigned s)831         void setSubRetLoc(const G4_BB* bb, unsigned s) { subretloc[bb] = s; }
832 
833         bool isSubRetLocConflict(G4_BB *bb, std::vector<unsigned> &usedLoc, unsigned stackTop);
834         void assignLocForReturnAddr();
835         unsigned determineReturnAddrLoc(unsigned entryId, unsigned* retLoc, G4_BB* bb);
836         void insertCallReturnVar();
837         void insertSaveAddr(G4_BB*);
838         void insertRestoreAddr(G4_BB*);
setIterNo(unsigned i)839         void setIterNo(unsigned i) { iterNo = i; }
getIterNo()840         unsigned getIterNo() const { return iterNo; }
841         void fixSrc0IndirFcall();
842 
getRetDecl(uint32_t retLoc)843         G4_Declare* getRetDecl(uint32_t retLoc)
844         {
845             auto result = retDecls.find(retLoc);
846             if (result != retDecls.end())
847             {
848                 return result->second;
849             }
850 
851             const char* name = builder.getNameString(kernel.fg.mem, 24, "RET__loc%d", retLoc);
852             G4_Declare* dcl = builder.createDeclareNoLookup(name, G4_GRF, 2, 1, Type_UD);
853 
854 
855             // call destination must still be QWord aligned
856             dcl->setSubRegAlign(Four_Word);
857             setSubRegAlign(dcl, Four_Word);
858 
859             retDecls[retLoc] = dcl;
860             return dcl;
861         }
862 
getSaveBE_FPInst()863         G4_INST* getSaveBE_FPInst() const { return saveBE_FPInst; };
getRestoreBE_FPInst()864         G4_INST* getRestoreBE_FPInst() const { return restoreBE_FPInst; };
865 
getBEFPSetupInst()866         G4_INST* getBEFPSetupInst() { return setupBE_FP; }
setBEFPSetupInst(G4_INST * i)867         void setBEFPSetupInst(G4_INST* i) { setupBE_FP = i; }
868 
869         static unsigned owordToGRFSize(unsigned numOwords);
870         static unsigned hwordToGRFSize(unsigned numHwords);
871         static unsigned GRFToHwordSize(unsigned numGRFs);
872         static unsigned GRFSizeToOwords(unsigned numGRFs);
873         static unsigned getHWordByteSize();
874 
875         // RA specific fields
getGRFDclForHRA(int GRFNum)876         G4_Declare* getGRFDclForHRA(int GRFNum) const { return GRFDclsForHRA[GRFNum]; }
877 
getOldFPDcl()878         G4_Declare* getOldFPDcl() const { return oldFPDcl; }
879 
isAddrFlagSpillDcl(G4_Declare * dcl)880         bool isAddrFlagSpillDcl(G4_Declare* dcl) const
881         {
882             return addrFlagSpillDcls.count(dcl) != 0;
883         }
884 
addAddrFlagSpillDcl(G4_Declare * dcl)885         void addAddrFlagSpillDcl(G4_Declare* dcl)
886         {
887             addrFlagSpillDcls.insert(dcl);
888         }
889 
addUndefinedDcl(G4_Declare * dcl)890         void addUndefinedDcl(G4_Declare* dcl)
891         {
892             UndeclaredVars.push_back(dcl);
893         }
894 
isUndefinedDcl(const G4_Declare * dcl)895         bool isUndefinedDcl(const G4_Declare* dcl) const
896         {
897             return std::find(UndeclaredVars.begin(), UndeclaredVars.end(), dcl) != UndeclaredVars.end();
898         }
899 
getSplitVarNum(const G4_Declare * dcl)900         unsigned getSplitVarNum(const G4_Declare* dcl) const
901         {
902             return getVar(dcl).numSplit;
903         }
904 
setSplitVarNum(const G4_Declare * dcl,unsigned val)905         void setSplitVarNum(const G4_Declare* dcl, unsigned val)
906         {
907             allocVar(dcl).numSplit = val;
908         }
909 
getBBId(const G4_Declare * dcl)910         unsigned getBBId(const G4_Declare* dcl) const
911         {
912             return getVar(dcl).bb_id;
913         }
914 
setBBId(const G4_Declare * dcl,unsigned id)915         void setBBId(const G4_Declare* dcl, unsigned id)
916         {
917             allocVar(dcl).bb_id = id;
918         }
919 
isBlockLocal(const G4_Declare * dcl)920         bool isBlockLocal(const G4_Declare* dcl) const
921         {
922             return getBBId(dcl) < (UINT_MAX - 1);
923         }
924 
getSplittedDeclare(const G4_Declare * dcl)925         G4_Declare* getSplittedDeclare(const G4_Declare* dcl) const
926         {
927             return getVar(dcl).splittedDCL;
928         }
929 
setSplittedDeclare(const G4_Declare * dcl,G4_Declare * sd)930         void setSplittedDeclare(const G4_Declare* dcl, G4_Declare* sd)
931         {
932             allocVar(dcl).splittedDCL = sd;
933         }
934 
getLocalLR(const G4_Declare * dcl)935         LocalLiveRange* getLocalLR(const G4_Declare* dcl) const
936         {
937             return getVar(dcl).localLR;
938         }
939 
setLocalLR(G4_Declare * dcl,LocalLiveRange * lr)940         void setLocalLR(G4_Declare* dcl, LocalLiveRange* lr)
941         {
942             RAVarInfo &var = allocVar(dcl);
943             MUST_BE_TRUE(var.localLR == NULL, "Local live range already allocated for declaration");
944             var.localLR = lr;
945             lr->setTopDcl(dcl);
946         }
947 
getSafeLSLR(const G4_Declare * dcl)948         LSLiveRange* getSafeLSLR(const G4_Declare* dcl) const
949         {
950             auto dclid = dcl->getDeclId();
951             assert(dclid < vars.size());
952             return vars[dclid].LSLR;
953         }
954 
getLSLR(const G4_Declare * dcl)955         LSLiveRange* getLSLR(const G4_Declare* dcl) const
956         {
957             return getVar(dcl).LSLR;
958         }
959 
setLSLR(G4_Declare * dcl,LSLiveRange * lr)960         void setLSLR(G4_Declare* dcl, LSLiveRange* lr)
961         {
962             RAVarInfo &var = allocVar(dcl);
963             MUST_BE_TRUE(var.LSLR == NULL, "Local live range already allocated for declaration");
964             var.LSLR = lr;
965             lr->setTopDcl(dcl);
966         }
967 
resetLSLR(const G4_Declare * dcl)968         void resetLSLR(const G4_Declare* dcl)
969         {
970             allocVar(dcl).LSLR = nullptr;
971         }
972 
resetLocalLR(const G4_Declare * dcl)973         void resetLocalLR(const G4_Declare* dcl)
974         {
975             allocVar(dcl).localLR = nullptr;
976         }
977 
clearStaleLiveRanges()978         void clearStaleLiveRanges()
979         {
980             for (auto dcl : kernel.Declares)
981             {
982                 setBBId(dcl, UINT_MAX);
983                 resetLocalLR(dcl);
984             }
985         }
986 
clearLocalLiveRanges()987         void clearLocalLiveRanges()
988         {
989             for (auto dcl : kernel.Declares)
990             {
991                 resetLocalLR(dcl);
992             }
993         }
994 
recordRef(const G4_Declare * dcl)995         void recordRef(const G4_Declare* dcl)
996         {
997             allocVar(dcl).numRefs++;
998         }
999 
getNumRefs(const G4_Declare * dcl)1000         unsigned getNumRefs(const G4_Declare* dcl) const
1001         {
1002             return getVar(dcl).numRefs;
1003         }
1004 
setNumRefs(const G4_Declare * dcl,unsigned refs)1005         void setNumRefs(const G4_Declare* dcl, unsigned refs)
1006         {
1007             allocVar(dcl).numRefs = refs;
1008         }
1009 
getBankConflict(const G4_Declare * dcl)1010         BankConflict getBankConflict(const G4_Declare* dcl) const
1011         {
1012             return getVar(dcl).conflict;
1013         }
1014 
setBankConflict(const G4_Declare * dcl,BankConflict c)1015         void setBankConflict(const G4_Declare* dcl, BankConflict c)
1016         {
1017             allocVar(dcl).conflict = c;
1018         }
1019 
getStartInterval(const G4_Declare * dcl)1020         G4_INST* getStartInterval(const G4_Declare* dcl) const
1021         {
1022             return getVar(dcl).startInterval;
1023         }
1024 
setStartInterval(const G4_Declare * dcl,G4_INST * inst)1025         void setStartInterval(const G4_Declare* dcl, G4_INST* inst)
1026         {
1027             allocVar(dcl).startInterval = inst;
1028         }
1029 
getEndInterval(const G4_Declare * dcl)1030         G4_INST* getEndInterval(const G4_Declare* dcl) const
1031         {
1032             return getVar(dcl).endInterval;
1033         }
1034 
setEndInterval(const G4_Declare * dcl,G4_INST * inst)1035         void setEndInterval(const G4_Declare* dcl, G4_INST* inst)
1036         {
1037             allocVar(dcl).endInterval = inst;
1038         }
1039 
getMask(const G4_Declare * dcl)1040         const std::vector<unsigned char>& getMask(const G4_Declare* dcl) const
1041         {
1042             return getVar(dcl).mask;
1043         }
1044 
setMask(const G4_Declare * dcl,std::vector<unsigned char> m)1045         void setMask(const G4_Declare* dcl, std::vector<unsigned char> m)
1046         {
1047             allocVar(dcl).mask = m;
1048         }
1049 
getAugmentationMask(const G4_Declare * dcl)1050         AugmentationMasks getAugmentationMask(const G4_Declare* dcl) const
1051         {
1052             auto dclid = dcl->getDeclId();
1053             if (dclid >= varMasks.size())
1054             {
1055                 return AugmentationMasks::Undetermined;
1056             }
1057             return varMasks[dclid];
1058         }
1059 
setAugmentationMask(const G4_Declare * dcl,AugmentationMasks m)1060         void setAugmentationMask(const G4_Declare* dcl, AugmentationMasks m)
1061         {
1062             auto dclid = dcl->getDeclId();
1063             if (dclid >= varMasks.size())
1064                 varMasks.resize(dclid + 1);
1065             varMasks[dclid] = m;
1066             if (dcl->getIsSplittedDcl())
1067             {
1068                 for (const G4_Declare *subDcl : getSubDclList(dcl))
1069                 {
1070                     setAugmentationMask(subDcl, m);
1071                 }
1072             }
1073         }
1074 
getHasNonDefaultMaskDef(const G4_Declare * dcl)1075         bool getHasNonDefaultMaskDef(const G4_Declare* dcl) const
1076         {
1077             return (getAugmentationMask(dcl) == AugmentationMasks::NonDefault);
1078         }
1079 
addBundleConflictDcl(const G4_Declare * dcl,const G4_Declare * subDcl,int offset)1080         void addBundleConflictDcl(const G4_Declare *dcl, const G4_Declare* subDcl, int offset)
1081         {
1082             allocVar(dcl).bundleConflicts.emplace_back(subDcl, offset);
1083         }
1084 
clearBundleConflictDcl(const G4_Declare * dcl)1085         void clearBundleConflictDcl(const G4_Declare* dcl)
1086         {
1087             allocVar(dcl).bundleConflicts.clear();
1088         }
1089 
getBundleConflicts(const G4_Declare * dcl)1090         const std::vector<BundleConflict> &getBundleConflicts(const G4_Declare* dcl) const
1091         {
1092             return getVar(dcl).bundleConflicts;
1093         }
1094 
get_bundle(unsigned baseReg,int offset)1095         unsigned get_bundle(unsigned baseReg, int offset) const
1096         {
1097             if (builder.hasPartialInt64Support())
1098             {
1099                 return (((baseReg + offset) % 32) / 2);
1100             }
1101             return (((baseReg + offset) % 64) / 4);
1102         }
1103 
get_bank(unsigned baseReg,int offset)1104         unsigned get_bank(unsigned baseReg, int offset)
1105         {
1106             int bankID = (baseReg + offset) % 2;
1107 
1108             if (builder.hasTwoGRFBank16Bundles())
1109             {
1110                 bankID = ((baseReg + offset) % 4) / 2;
1111             }
1112 
1113 
1114             if (builder.hasOneGRFBank16Bundles())
1115             {
1116                 bankID = (baseReg + offset) % 2;
1117             }
1118 
1119             return bankID;
1120         }
1121 
addSubDcl(const G4_Declare * dcl,G4_Declare * subDcl)1122         void addSubDcl(const G4_Declare *dcl, G4_Declare* subDcl)
1123         {
1124             allocVar(dcl).subDclList.push_back(subDcl);
1125         }
1126 
clearSubDcl(const G4_Declare * dcl)1127         void clearSubDcl(const G4_Declare* dcl)
1128         {
1129             allocVar(dcl).subDclList.clear();
1130         }
1131 
getSubDclList(const G4_Declare * dcl)1132         const std::vector<const G4_Declare*> &getSubDclList(const G4_Declare* dcl) const
1133         {
1134             return getVar(dcl).subDclList;
1135         }
1136 
getSubOffset(const G4_Declare * dcl)1137         unsigned getSubOffset(const G4_Declare* dcl) const
1138         {
1139             return getVar(dcl).subOff;
1140         }
1141 
setSubOffset(const G4_Declare * dcl,unsigned offset)1142         void setSubOffset(const G4_Declare* dcl, unsigned offset)
1143         {
1144             allocVar(dcl).subOff = offset;
1145         }
1146 
getSubRegAlign(const G4_Declare * dcl)1147         G4_SubReg_Align getSubRegAlign(const G4_Declare* dcl) const
1148         {
1149             return getVar(dcl).subAlign;
1150         }
1151 
setSubRegAlign(const G4_Declare * dcl,G4_SubReg_Align subAlg)1152         void setSubRegAlign(const G4_Declare* dcl, G4_SubReg_Align subAlg)
1153         {
1154             auto& subAlign = allocVar(dcl).subAlign;
1155             // sub reg alignment can only be more restricted than prior setting
1156             MUST_BE_TRUE(subAlign == Any || subAlign == subAlg || subAlign % 2 == 0,
1157                 ERROR_UNKNOWN);
1158             if (subAlign > subAlg)
1159             {
1160                 MUST_BE_TRUE(subAlign % subAlg == 0, "Sub reg alignment conflict");
1161                 // do nothing; keep the original alignment (more restricted)
1162             }
1163             else
1164             {
1165                 MUST_BE_TRUE(subAlg % subAlign == 0, "Sub reg alignment conflict");
1166                 subAlign = subAlg;
1167             }
1168         }
1169 
hasAlignSetup(const G4_Declare * dcl)1170         bool hasAlignSetup(const G4_Declare* dcl) const
1171         {
1172             if (getVar(dcl).subAlign == G4_SubReg_Align::Any &&
1173                 dcl->getSubRegAlign() != G4_SubReg_Align::Any)
1174                 return false;
1175             return true;
1176         }
1177 
isEvenAligned(const G4_Declare * dcl)1178         bool isEvenAligned(const G4_Declare* dcl) const
1179         {
1180             return getVar(dcl).isEvenAlign;
1181         }
1182 
setEvenAligned(const G4_Declare * dcl,bool e)1183         void setEvenAligned(const G4_Declare* dcl, bool e)
1184         {
1185             allocVar(dcl).isEvenAlign = e;
1186         }
1187 
1188         BankAlign getBankAlign(const G4_Declare*) const;
1189         bool areAllDefsNoMask(G4_Declare*);
1190         void removeUnreferencedDcls();
1191         LocalLiveRange* GetOrCreateLocalLiveRange(G4_Declare* topdcl);
1192 
GlobalRA(G4_Kernel & k,PhyRegPool & r,PointsToAnalysis & p2a)1193         GlobalRA(G4_Kernel& k, PhyRegPool& r, PointsToAnalysis& p2a) : kernel(k), builder(*k.fg.builder), regPool(r),
1194             pointsToAnalysis(p2a)
1195         {
1196             vars.resize(k.Declares.size());
1197             varMasks.resize(k.Declares.size());
1198 
1199             if (kernel.getOptions()->getOption(vISA_VerifyAugmentation))
1200             {
1201                 verifyAugmentation = std::make_unique<VerifyAugmentation>();
1202             }
1203 
1204             // Need call WA for EU Fusion for non-entry function
1205             m_EUFusionWANeeded = builder.hasFusedEU()
1206                 && builder.getOption(vISA_fusedCallWA)
1207                 && (kernel.fg.getHasStackCalls() || kernel.hasIndirectCall())
1208                 && !builder.getIsKernel();
1209         }
1210 
1211         void emitFGWithLiveness(const LivenessAnalysis& liveAnalysis) const;
1212         void reportSpillInfo(const LivenessAnalysis& liveness, const GraphColor& coloring) const;
1213         static uint32_t getRefCount(int loopNestLevel);
1214         bool isReRAPass();
1215         void updateSubRegAlignment(G4_SubReg_Align subAlign);
1216         bool isChannelSliced();
1217         void evenAlign();
1218         bool evenAlignNeeded(G4_Declare*);
1219         void getBankAlignment(LiveRange* lr, BankAlign &align);
1220         void printLiveIntervals();
1221         void reportUndefinedUses(LivenessAnalysis& liveAnalysis, G4_BB* bb, G4_INST* inst, G4_Declare* referencedDcl, std::set<G4_Declare*>& defs, std::ofstream& optreport, Gen4_Operand_Number opndNum);
1222         void detectNeverDefinedUses();
1223         void emitVarLiveIntervals();
1224 
1225         void determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpillRegSize);
1226         G4_Imm* createMsgDesc(unsigned owordSize, bool writeType, bool isSplitSend);
1227         void stackCallProlog();
1228         void saveRegs(unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr, unsigned frameOwordOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group);
1229         void saveActiveRegs(std::vector<bool>& saveRegs, unsigned startReg, unsigned frameOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group);
1230         void addrRegAlloc();
1231         void flagRegAlloc();
1232         bool hybridRA(bool doBankConflictReduction, bool highInternalConflict, LocalRA& lra);
1233         void assignRegForAliasDcl();
1234         void removeSplitDecl();
1235         int coloringRegAlloc();
1236         void restoreRegs(unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr, unsigned frameOwordOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group, bool caller);
1237         void restoreActiveRegs(std::vector<bool>& restoreRegs, unsigned startReg, unsigned frameOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group, bool caller);
1238         void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs);
1239         void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::vector<bool>& retRegs);
1240         void addCallerSaveRestoreCode();
1241         void addCalleeSaveRestoreCode();
1242         void addGenxMainStackSetupCode();
1243         void addCalleeStackSetupCode();
1244         void addSaveRestoreCode(unsigned localSpillAreaOwordSize);
1245         void addCallerSavePseudoCode();
1246         void addCalleeSavePseudoCode();
1247         void addStoreRestoreToReturn();
1248         void markGraphBlockLocalVars();
1249         void verifyRA(LivenessAnalysis & liveAnalysis);
1250         void resetGlobalRAStates();
1251 
1252         void insertPhyRegDecls();
1253 
copyMissingAlignment()1254         void copyMissingAlignment()
1255         {
1256             // Insert alignment for vars created in RA
1257             for (auto dcl : kernel.Declares)
1258             {
1259                 if (dcl->getAliasDeclare())
1260                     continue;
1261 
1262                 if (!hasAlignSetup(dcl))
1263                 {
1264                     // Var may be temp created in RA
1265                     setSubRegAlign(dcl, dcl->getSubRegAlign());
1266                     setEvenAligned(dcl, dcl->isEvenAlign());
1267                 }
1268             }
1269         }
1270 
copyAlignment(G4_Declare * dst,G4_Declare * src)1271         void copyAlignment(G4_Declare* dst, G4_Declare* src)
1272         {
1273             setEvenAligned(dst, isEvenAligned(src));
1274             setSubRegAlign(dst, getSubRegAlign(src));
1275         }
1276 
copyAlignment()1277         void copyAlignment()
1278         {
1279             for (auto dcl : kernel.Declares)
1280             {
1281                 if (dcl->getAliasDeclare())
1282                     continue;
1283 
1284                 setSubRegAlign(dcl, dcl->getSubRegAlign());
1285                 setEvenAligned(dcl, dcl->isEvenAlign());
1286             }
1287         }
1288 
isNoRemat(G4_INST * inst)1289         bool isNoRemat(G4_INST* inst)
1290         {
1291             return dontRemat.find(inst) != dontRemat.end();
1292         }
1293 
addNoRemat(G4_INST * inst)1294         void addNoRemat(G4_INST* inst)
1295         {
1296             dontRemat.insert(inst);
1297         }
1298     };
1299 
getGRFDclForHRA(int GRFNum)1300     inline G4_Declare* Interference::getGRFDclForHRA(int GRFNum) const
1301     {
1302         return gra.getGRFDclForHRA(GRFNum);
1303     }
1304 
1305     class VarSplit
1306     {
1307     private:
1308         G4_Kernel& kernel;
1309         GlobalRA& gra;
1310 
1311         VarRange* splitVarRange(VarRange *src1, VarRange *src2, std::stack<VarRange*> *toDelete);
1312         void rangeListSpliting(VAR_RANGE_LIST *rangeList, G4_Operand *opnd, std::stack<VarRange*> *toDelete);
1313         static void getHeightWidth(G4_Type type, unsigned numberElements, unsigned short &dclWidth, unsigned short &dclHeight, int &totalByteSize);
1314         void createSubDcls(G4_Kernel& kernel, G4_Declare* oldDcl, std::vector<G4_Declare*> &splitDclList);
1315         void insertMovesToTemp(IR_Builder& builder, G4_Declare* oldDcl, G4_Operand *dstOpnd, G4_BB* bb, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList);
1316         void insertMovesFromTemp(G4_Kernel& kernel, G4_Declare* oldDcl, int index, G4_Operand *srcOpnd, int pos, G4_BB* bb, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList);
1317 
1318     public:
1319         bool didLocalSplit = false;
1320         bool didGlobalSplit = false;
1321 
1322         void localSplit(IR_Builder& builder, G4_BB* bb);
1323         void globalSplit(IR_Builder& builder, G4_Kernel &kernel);
1324         bool canDoGlobalSplit(IR_Builder& builder, G4_Kernel &kernel, uint32_t sendSpillRefCount);
1325 
VarSplit(GlobalRA & g)1326         VarSplit(GlobalRA& g) : kernel(g.kernel), gra(g)
1327         {
1328 
1329         }
1330     };
1331 
1332     //
1333     // Spill code clean up
1334     //
1335     typedef struct _CLEAN_NUM_PROFILE
1336     {
1337         unsigned spill_clean_num[10] {};
1338         unsigned fill_clean_num[10] {};
1339     } CLEAN_NUM_PROFILE;
1340 
1341     typedef struct _SCRATCH_RANGE
1342     {
1343         unsigned leftOff;
1344         unsigned rightOff;
1345     }SCRATCH_RANGE;
1346 
1347     typedef std::vector<SCRATCH_RANGE >   SCRATCH_RANGE_VEC;
1348     typedef std::vector<SCRATCH_RANGE >::iterator   SCRATCH_RANGE_VEC_ITER;
1349 
1350     typedef struct _RANGE
1351     {
1352         unsigned linearizedStart;
1353         unsigned linearizedEnd;
1354         bool         predicate;
1355     }REG_RANGE;
1356 
1357     typedef std::vector<REG_RANGE >   REG_RANGE_VEC;
1358     typedef std::vector<REG_RANGE >::iterator   REG_RANGE_VEC_ITER;
1359 
1360     typedef std::pair<vISA::G4_INST *, int > RENAME_OPND;
1361     typedef std::vector<RENAME_OPND>  RANAME_VEC;
1362 
1363     typedef struct _SCRATCH_ACCESS
1364     {
1365         //Basic info
1366 #ifdef _DEBUG
1367         int            regNum;
1368 #endif
1369         vISA::G4_Declare*    scratchDcl;       //The scrach access
1370         vISA::G4_Operand*    flagOpnd;
1371         INST_LIST_ITER inst_it;
1372 
1373         unsigned   linearizedStart; //linearized start regsiter address
1374         unsigned   linearizedEnd;   //linearized end regsiter address
1375         unsigned   leftOff;         //left offset in scratch space
1376         unsigned   rightOff;        //right offset in the scratch space
1377         unsigned   useCount;
1378 
1379         bool     isSpill = false;
1380         bool     isBlockLocal = false;
1381         bool     directKill = false;
1382 
1383         bool     regKilled = false;
1384         bool     regPartialKilled = false;
1385         bool     regOverKilled = false;
1386         bool     inRangePartialKilled = false;
1387         bool     regInUse = false;
1388 
1389         bool     fillInUse = false;
1390         bool     removeable = false;
1391         bool     instKilled = false;
1392         bool     evicted = false;
1393         bool     scratchDefined = false;
1394 
1395         unsigned   maskFlag;
1396 
1397         RANAME_VEC              renameOperandVec;
1398         SCRATCH_RANGE_VEC       killedScratchRange;
1399         REG_RANGE_VEC           killedRegRange;
1400         struct _SCRATCH_ACCESS* preScratchAccess;
1401         struct _SCRATCH_ACCESS* prePreScratchAccess;
1402         struct _SCRATCH_ACCESS* preFillAccess;
1403 
1404     } SCRATCH_ACCESS;
1405 
1406     typedef std::vector< SCRATCH_ACCESS *> SCRATCH_PTR_VEC;
1407 
1408     typedef vISA::std_arena_based_allocator<SCRATCH_ACCESS*> SCRATCH_PTR_ALLOCATOR;
1409     typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR>           SCRATCH_PTR_LIST;
1410     typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR>::iterator SCRATCH_PTR_LIST_ITER;
1411 
1412     class FlagSpillCleanup
1413     {
1414     private:
1415         GlobalRA& gra;
1416 
1417         void FlagLineraizedStartAndEnd(G4_Declare*  topdcl, unsigned& linearizedStart, unsigned& linearizedEnd);
1418         bool replaceWithPreDcl(IR_Builder& builder, SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess);
1419         bool scratchKilledByPartial(SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess);
1420         bool addKilledGRFRanges(unsigned linearizedStart, unsigned linearizedEnd, SCRATCH_ACCESS* scratchAccess,
1421             G4_Predicate*   predicate);
1422         bool regFullyKilled(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd,
1423             unsigned short maskFlag);
1424         bool inRangePartialKilled(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd,
1425             unsigned short maskFlag);
1426         bool regDefineAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd,
1427             unsigned short maskFlag, G4_Predicate* predicate);
1428         void regDefineFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd);
1429         bool regUseAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd);
1430         void regUseFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd, int opndIndex);
1431         void regUseScratch(SCRATCH_PTR_LIST * scratchTraceList, G4_INST * inst, G4_Operand * opnd, Gen4_Operand_Number opndIndex);
1432         void initializeScratchAccess(SCRATCH_ACCESS *scratchAccess, INST_LIST_ITER inst_it);
1433         bool initializeFlagScratchAccess(SCRATCH_PTR_VEC* scratchAccessList, SCRATCH_ACCESS*& scratchAccess, INST_LIST_ITER inst_it);
1434         void freeScratchAccess(SCRATCH_PTR_VEC *scratchAccessList);
1435         void flagDefine(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst);
1436         void scratchUse(SCRATCH_PTR_LIST & scratchTraceList, G4_INST * inst);
1437         void flagUse(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst);
1438         bool flagScratchDefineUse(G4_BB* bb, SCRATCH_PTR_LIST* scratchTraceList, SCRATCH_PTR_VEC* candidateList,
1439             SCRATCH_ACCESS* scratchAccess, CLEAN_NUM_PROFILE* clean_num_profile);
1440         void flagSpillFillClean(G4_BB* bb, INST_LIST_ITER inst_it, SCRATCH_PTR_VEC& scratchAccessList,
1441             SCRATCH_PTR_LIST& scratchTraceList, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile);
1442         void regFillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList,
1443             CLEAN_NUM_PROFILE* clean_num_profile);
1444         void regSpillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile);
1445 
1446     public:
1447         void spillFillCodeCleanFlag(IR_Builder& builder, G4_Kernel& kernel, CLEAN_NUM_PROFILE* clean_num_profile);
FlagSpillCleanup(GlobalRA & g)1448         FlagSpillCleanup(GlobalRA& g) : gra(g)
1449         {
1450 
1451         }
1452 
1453     };
1454 }
1455 
1456 // TODO: Refactor code so that stack call related enums,
1457 // methods, etc. should be part of this class. Right now
1458 // code is scattered across FlowGraph and GraphColor.
1459 class StackCall
1460 {
1461 public:
1462     // Following enum holds offsets of various fields in
1463     // frame descriptor as per VISA ABI.
1464     enum class FrameDescriptorOfsets
1465     {
1466         FE_FP = vISA::IR_Builder::SubRegs_Stackcall::FE_FP * 8,
1467         FE_SP = vISA::IR_Builder::SubRegs_Stackcall::FE_SP * 8,
1468         Ret_IP = vISA::IR_Builder::SubRegs_Stackcall::Ret_IP * 4,
1469         Ret_EM = vISA::IR_Builder::SubRegs_Stackcall::Ret_EM * 4,
1470         BE_FP = vISA::IR_Builder::SubRegs_Stackcall::BE_FP * 4,
1471         BE_SP = vISA::IR_Builder::SubRegs_Stackcall::BE_SP * 4
1472     };
1473 };
1474 
1475 #endif // __GRAPHCOLOR_H__
1476