1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _INC_LOCALRA_H_
10 #define _INC_LOCALRA_H_
11 
12 #include <list>
13 #include "G4_Opcode.h"
14 #include "FlowGraph.h"
15 #include "BuildIR.h"
16 #include "BitSet.h"
17 
18 // Forward decls
19 namespace vISA
20 {
21 class G4_Declare;
22 class G4_INST;
23 class G4_BB;
24 class LinearScan;
25 class LocalLiveRange;
26 class PhyRegLocalRA;
27 class PhyRegsManager;
28 class PhyRegSummary;
29 class BankConflictPass;
30 class GlobalRA;
31 }
32 
33 vISA::G4_Declare* GetTopDclFromRegRegion(vISA::G4_Operand* opnd);
34 
35 typedef std::map<vISA::LocalLiveRange*, std::vector<std::pair<INST_LIST_ITER, unsigned int>>> LLR_USE_MAP;
36 typedef std::map<vISA::LocalLiveRange*, std::vector<std::pair<INST_LIST_ITER, unsigned int>>>::iterator LLR_USE_MAP_ITER;
37 
38 // Each declaration will have a LocalLiveRange object allocated for it
39 namespace vISA
40 {
41     class PhyRegsLocalRA;
42     class InputLiveRange;
43 
44     class LocalRA
45     {
46     private:
47         G4_Kernel& kernel;
48         IR_Builder& builder;
49         PhyRegsLocalRA* pregs = nullptr;
50         LLR_USE_MAP LLRUseMap;
51         unsigned int numRegLRA = 0;
52         unsigned int globalLRSize = 0;
53         bool doSplitLLR = false;
54         Mem_Manager& mem;
55         std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>> inputIntervals;
56         BankConflictPass& bc;
57         GlobalRA& gra;
58         bool doBCR = false;
59         bool highInternalConflict = false;
60         bool hasSplitInsts = false;
61 
62         BankAlign getBankAlignForUniqueAssign(G4_Declare *dcl);
63         bool hasBackEdge();
64         void evenAlign();
65         void preLocalRAAnalysis();
66         void trivialAssignRA(bool& needGlobalRA, bool threeSourceCandidate);
67         bool localRAPass(bool doRoundRobin, bool doSplitLLR);
68         void resetMasks();
69         void blockOutputPhyRegs();
70         void removeUnrequiredLifetimeOps();
71         bool assignUniqueRegisters(bool twoBanksRA, bool twoDirectionsAssign, bool hybridWithSpill);
72         bool unassignedRangeFound();
73         void updateRegUsage(PhyRegSummary* summary, unsigned int& numRegsUsed);
74         void localRAOptReport();
75         void markReferencesInOpnd(G4_Operand* opnd, bool isEOT, INST_LIST_ITER inst_it,
76             unsigned int pos);
77         void markReferencesInInst(INST_LIST_ITER inst_it);
78         void setLexicalID(bool includePseudo);
79         void markReferences(unsigned int& numRowsEOT, bool& lifetimeOpFound);
80         void calculateInputIntervals();
81         bool hasDstSrcOverlapPotential(G4_DstRegRegion* dst, G4_SrcRegRegion* src);
82         void calculateLiveIntervals(G4_BB* bb, std::vector<LocalLiveRange*>& liveIntervals);
83         void printAddressTakenDecls();
84         void printLocalRACandidates();
85         void printLocalLiveIntervals(G4_BB* bb, std::vector<LocalLiveRange*>& liveIntervals);
86         void printInputLiveIntervals();
87         bool countLiveIntervals();
88 
89         // scratch fields used for parameter passing
90         G4_BB* curBB = nullptr;
91 
92     public:
93         static void getRowInfo(int size, int& nrows, int& lastRowSize);
94         static unsigned int convertSubRegOffFromWords(G4_Declare* dcl, int subregnuminwords);
95         static unsigned int convertSubRegOffToWords(G4_Declare* dcl, int subregnum);
96         static void countLocalLiveIntervals(std::vector<LocalLiveRange*>& liveIntervals);
97         static void printLocalLiveIntervalDistribution(unsigned int numScalars,
98             unsigned int numHalfGRF, unsigned int numOneGRF, unsigned int numTwoOrMoreGRF,
99             unsigned int numTotal);
100 
101         LocalRA(BankConflictPass&, GlobalRA&);
102         bool localRA();
103         void undoLocalRAAssignments(bool clearInterval);
doHybridBCR()104         bool doHybridBCR() const { return doBCR; }
hasHighInternalBC()105         bool hasHighInternalBC() const { return highInternalConflict; }
106     };
107 
108 class LocalLiveRange
109 {
110 private:
111     G4_Declare* topdcl;
112     G4_INST* firstRef;
113     G4_INST* lastRef;
114     unsigned int lrStartIdx, lrEndIdx;
115     bool isIndirectAccess;
116     G4_VarBase* preg;
117     // pregoff is stored in word here
118     // But subreg offset stored in regvar should be in units of dcl's element size
119     int pregoff;
120 
121     unsigned int numRefsInFG;
122     G4_BB* prevBBRef;
123     bool eot;
124 
125     bool assigned;
126     bool isSplit;
127 
128     IR_Builder& builder;
129 
130     std::unordered_set<unsigned int> forbiddenGRFs;
131     const static unsigned int UndefHint = 0xffffffff;
132     unsigned int hint = UndefHint;
133 
134 public:
LocalLiveRange(IR_Builder & b)135     LocalLiveRange(IR_Builder& b) : builder(b)
136     {
137         topdcl = NULL;
138         firstRef = lastRef = NULL;
139         lrStartIdx = lrEndIdx = 0;
140         isIndirectAccess = false;
141         numRefsInFG = 0;
142         prevBBRef = NULL;
143         preg = NULL;
144         pregoff = 0;
145         assigned = false;
146         eot = false;
147         isSplit = false;
148     }
149 
150     // A reference to this live range exists in bb basic block, record it
151     void recordRef(G4_BB*);
152 
markIndirectRef()153     void markIndirectRef() { isIndirectAccess = true; }
154 
155     // A live range is local if it is never accessed indirectly (via address taken register) and
156     // only a single basic block references the range
157     bool isLiveRangeLocal() const;
158 
159     bool isLiveRangeGlobal();
160 
161     bool isGRFRegAssigned();
162 
setTopDcl(G4_Declare * dcl)163     void setTopDcl(G4_Declare* dcl)
164     {
165         MUST_BE_TRUE(topdcl == NULL, "Redefining top dcl");
166         topdcl = dcl;
167     }
168 
getTopDcl()169     G4_Declare* getTopDcl() const { return topdcl; }
170 
new(size_t sz,Mem_Manager & m)171     void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
172 
hasIndirectAccess()173     bool hasIndirectAccess() const { return isIndirectAccess; }
174 
setFirstRef(G4_INST * inst,unsigned int idx)175     void setFirstRef(G4_INST* inst, unsigned int idx)
176     {
177         firstRef = inst;
178         lrStartIdx = idx;
179     }
180 
getFirstRef(unsigned int & idx)181     G4_INST* getFirstRef(unsigned int& idx) const
182     {
183         idx = lrStartIdx;
184         return firstRef;
185     }
186 
setLastRef(G4_INST * inst,unsigned int idx)187     void setLastRef(G4_INST* inst, unsigned int idx)
188     {
189         lastRef = inst;
190         lrEndIdx = idx;
191     }
192 
getLastRef(unsigned int & idx)193     G4_INST* getLastRef(unsigned int& idx)
194     {
195         idx = lrEndIdx;
196         return lastRef;
197     }
198 
setPhyReg(G4_VarBase * pr,int subreg)199     void setPhyReg(G4_VarBase* pr, int subreg) { preg = pr; pregoff = subreg; }
getPhyReg(int & subreg)200     G4_VarBase* getPhyReg(int& subreg) { subreg = pregoff; return preg; }
201 
202     unsigned int getSizeInWords();
203 
setAssigned(bool a)204     void setAssigned(bool a) { assigned = a; }
getAssigned()205     bool getAssigned() const { return assigned; }
206 
markEOT()207     void markEOT() { eot = true; }
isEOT()208     bool isEOT() const { return eot; }
209 
markSplit()210     void markSplit() { isSplit = true; }
getSplit()211     bool getSplit() const { return isSplit; }
212 
addForbidden(unsigned int f)213     void addForbidden(unsigned int f) { forbiddenGRFs.insert(f); }
getForbidden()214     std::unordered_set<unsigned int>& getForbidden() { return forbiddenGRFs; }
215 
setHint(unsigned int h)216     void setHint(unsigned int h) { hint = h; }
hasHint()217     bool hasHint() const { return hint != UndefHint; }
getHint()218     unsigned int getHint() const { return hint; }
219 };
220 
221 class InputLiveRange
222 {
223 private:
224     unsigned int regWordIdx;
225     unsigned int lrEndIdx;
226 
227 public:
InputLiveRange(unsigned int regId,unsigned int endId)228     InputLiveRange(unsigned int regId, unsigned int endId) : regWordIdx(regId), lrEndIdx(endId)
229     {
230 
231     }
232 
new(size_t sz,Mem_Manager & m)233     void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
234 
getRegWordIdx()235     unsigned int getRegWordIdx() { return regWordIdx; }
getLrEndIdx()236     unsigned int getLrEndIdx() { return lrEndIdx; }
237 };
238 }
239 #define SECOND_HALF_BANK_START_GRF 64
240 #define RR_HEURISTIC  0.66
241 enum
242 {
243     WORD_FREE = 0,
244     WORD_BUSY = 1,
245 };
246 
247 namespace vISA
248 {
249 class PhyRegsLocalRA
250 {
251 private:
252     IR_Builder* builder;
253     unsigned int numRegs;
254     // nth bit represents whether the register's nth word is free/busy
255     // 1 - busy, 0 - free
256     // It is possible to use bit-vector in place of this array
257     // bitvector does not provide coarse grained access to mark
258     // entire grf busy/available
259     std::vector<uint32_t> regBusyVector;
260     std::vector<int32_t> regLastUse;
261     std::vector<bool> grfAvialable;
262     int lastUseSum1;
263     int lastUseSum2;
264     int bank1AvailableRegNum;
265     int bank2AvailableRegNum;
266 
267     bool twoBanksRA;
268     bool simpleGRFAvailable;
269     bool r0Forbidden;
270     bool r1Forbidden;
271 
272     int LraFFWindowSize;
273 
274 public:
PhyRegsLocalRA(IR_Builder * _builder,uint32_t nregs)275     PhyRegsLocalRA(IR_Builder* _builder, uint32_t nregs) : builder(_builder), numRegs(nregs)
276     {
277         uint32_t grfFree = 0;
278 
279         regBusyVector.resize(numRegs);
280         regLastUse.resize(numRegs);
281         grfAvialable.resize(numRegs);
282 
283         for (int i = 0; i < (int) nregs; i++)
284         {
285             regBusyVector[i] = grfFree;
286             regLastUse[i] = 0;
287             grfAvialable[i] = true;
288         }
289 
290         lastUseSum1 = 0;
291         lastUseSum2 = 0;
292         bank1AvailableRegNum = 0;
293         bank2AvailableRegNum = 0;
294 
295         twoBanksRA = false;
296         simpleGRFAvailable = false;
297         r0Forbidden = false;
298         r1Forbidden = false;
299        LraFFWindowSize = (int)builder->getOptions()->getuInt32Option(vISA_LraFFWindowSize);
300     }
301 
new(size_t sz,Mem_Manager & m)302     void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
303 
304     void findRegisterCandiateWithAlignForward(int& i, BankAlign align, bool evenAlign);
305 
306     unsigned int get_bundle(unsigned int baseReg, int offset);
307 
308     int findBundleConflictFreeRegister(int curReg, int endReg, unsigned short occupiedBundles, BankAlign align, bool evenAlign);
309 
310     void findRegisterCandiateWithAlignBackward(int& i, BankAlign align, bool evenAlign);
311 
312     void setGRFBusy(int which);
313     void setGRFBusy(int which, bool isInput);
314     void setGRFBusy(int which, int howmany, bool isInput);
315     void setGRFBusy(int which, int howmany);
316     void setWordBusy(int whichgrf, int word, bool isInput);
317     void setGRFNotBusy(int which, int instID);
318     void setH1GRFBusy(int which);
319     void setH2GRFBusy(int which);
320     void setWordBusy(int whichgrf, int word);
321     void setWordBusy(int whichgrf, int word, int howmany, bool isInput);
322     void setWordBusy(int whichgrf, int word, int howmany);
323     void setWordNotBusy(int whichgrf, int word, int instID);
324 
isGRFBusy(int which)325     inline bool isGRFBusy(int which) const
326     {
327         MUST_BE_TRUE(isGRFAvailable(which), "Invalid register");
328         return (regBusyVector[which] != 0);
329     }
330 
isGRFBusy(int which,int howmany)331     inline bool isGRFBusy(int which, int howmany) const
332     {
333         bool retval = false;
334 
335         for (int i = 0; i < howmany && !retval; i++)
336         {
337             retval |= isGRFBusy(which + i);
338         }
339 
340         return retval;
341     }
342 
343     bool isH1GRFBusy(int which);
344     bool isH2GRFBusy(int which);
345     inline bool isWordBusy(int whichgrf, int word);
346     bool isWordBusy(int whichgrf, int word, int howmany);
347 
348     bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int & regnum, int nrows, int lastRowSize, int endReg, unsigned short occupiedBundles,
349         int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden, bool hintSet);
350 
351     void markPhyRegs(G4_Declare* topdcl);
352 
353     // Available/unavailable is different from busy/free
354     // Unavailable GRFs are not available for allocation
setGRFUnavailable(int which)355     void setGRFUnavailable(int which) { grfAvialable[which] = false; }
isGRFAvailable(int which)356     bool isGRFAvailable(int which) const
357     {
358 
359         if (simpleGRFAvailable)
360         {
361             if (which > 1)
362             {
363                 return true;
364             }
365             else
366             {
367                 if (r0Forbidden && which == 0)
368                 {
369                     return false;
370                 }
371 
372                 if (r1Forbidden && which <= 1)
373                 {
374                     return false;
375                 }
376                 return true;
377             }
378         }
379         else
380         {
381             MUST_BE_TRUE(which < (int) numRegs, "invalid GRF");
382             return (grfAvialable[which] == true);
383         }
384     }
385 
isGRFAvailable(int which,int howmany)386     bool isGRFAvailable(int which, int howmany) const
387     {
388         if (simpleGRFAvailable)
389         {
390             if (which > 1)
391             {
392                 return true;
393             }
394             else
395             {
396                 if (r0Forbidden && which == 0)
397                 {
398                     return false;
399                 }
400 
401                 if (r1Forbidden && which <= 1)
402                 {
403                     return false;
404                 }
405                 return true;
406             }
407         }
408         else
409         {
410             for (int i = 0; i < howmany; i++)
411             {
412                 if (!isGRFAvailable(which + i))
413                 {
414                     return false;
415                 }
416             }
417         }
418 
419         return true;
420     }
421 
422     void printBusyRegs();
getRegLastUse(int reg)423     int getRegLastUse(int reg) {return regLastUse[reg];}
424 
getLastUseSum1()425     int getLastUseSum1() {return lastUseSum1;}
getLastUseSum2()426     int getLastUseSum2() {return lastUseSum2;}
getBank1AvailableRegNum()427     int getBank1AvailableRegNum() {return bank1AvailableRegNum;}
getBank2AvailableRegNum()428     int getBank2AvailableRegNum() {return bank2AvailableRegNum;}
429 
setBank1AvailableRegNum(int avail1)430     void setBank1AvailableRegNum(int avail1) { bank1AvailableRegNum = avail1;}
setBank2AvailableRegNum(int avail2)431     void setBank2AvailableRegNum(int avail2) { bank2AvailableRegNum = avail2;}
432 
setTwoBanksRA(bool twoBanks)433     void setTwoBanksRA(bool twoBanks) { twoBanksRA = twoBanks;}
setSimpleGRFAvailable(bool simple)434     void setSimpleGRFAvailable(bool simple) {simpleGRFAvailable = simple; }
setR0Forbidden()435     void setR0Forbidden() {r0Forbidden = true;}
setR1Forbidden()436     void setR1Forbidden() {r1Forbidden = true;}
437     bool findFreeMultipleRegsBackward(int regIdx, BankAlign align, int &regnum, int nrows, int lastRowSize, int endReg, int instID,
438         bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden);
439     bool findFreeSingleReg(int regIdx, G4_SubReg_Align subalign, int &regnum, int &subregnum, int size);
440     bool findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int &regnum, int &subregnum, int endReg,
441         int instID, bool isHybridAlloc, bool forward, std::unordered_set<unsigned int>& forbidden);
442 
443     bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int& regnum, int nrows, int lastRowSize, int endReg, int instID, const bool* forbidden);
444 
445     bool findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int endReg, const bool* forbidden);
446 
447 };
448 
449 class PhyRegsManager
450 {
451 private:
452     PhyRegsLocalRA availableRegs;
453     bool twoBanksRA;
454 
455 public:
PhyRegsManager(PhyRegsLocalRA pregs,bool _twoBanksRA)456     PhyRegsManager(PhyRegsLocalRA pregs, bool _twoBanksRA) : availableRegs(pregs), twoBanksRA(_twoBanksRA)
457     {
458         availableRegs.setTwoBanksRA(_twoBanksRA);
459     }
460 
461     int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int & regnum, int & subregnum, int startRegNum, int endRegNum,
462         unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden, bool hasHint,
463         unsigned int hintReg);
464 
465     void freeRegs(int regnum, int subregnum, int numwords, int instID);
getAvaialableRegs()466     PhyRegsLocalRA * getAvaialableRegs() { return &availableRegs; }
467     int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int startRegNum, int endRegNum, unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden);
468     int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int startRegNum, int endRegNum, unsigned int instID, const bool* forbidden);
469 };
470 
471 class LinearScan {
472 private:
473     GlobalRA& gra;
474     IR_Builder& builder;
475     Mem_Manager& mem;
476     PhyRegsManager& pregManager;
477     PhyRegsLocalRA& initPregs;
478     std::vector<LocalLiveRange*>& liveIntervals;
479     std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputIntervals;
480     std::list<LocalLiveRange*> active;
481     PhyRegSummary* summary;
482 
483     void expireRanges(unsigned int);
484     void expireInputRanges(unsigned int, unsigned int, unsigned int);
485     void expireAllActive();
486     bool allocateRegsFromBanks(LocalLiveRange*);
487     bool allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap);
488     void coalesceSplit(LocalLiveRange* lr);
489     void freeAllocedRegs(LocalLiveRange*, bool);
490     void updateActiveList(LocalLiveRange*);
491 
492     BitSet pregs;
493     unsigned int simdSize;
494 
495     unsigned int globalLRSize;
496     unsigned int* startGRFReg;
497     unsigned int numRegLRA;
498 
499     unsigned int bank1StartGRFReg;
500     unsigned int bank2StartGRFReg;
501     unsigned int bank1_start;
502     unsigned int bank1_end;
503     unsigned int bank2_start;
504     unsigned int bank2_end;
505 
506     bool useRoundRobin;
507     bool doBankConflict;
508     bool highInternalConflict;
509     bool doSplitLLR;
510 
511 public:
512     LinearScan(GlobalRA& g, std::vector<LocalLiveRange*>& localLiveIntervals,
513         std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputLivelIntervals,
514         PhyRegsManager& pregMgr, PhyRegsLocalRA& pregs, Mem_Manager& memmgr, PhyRegSummary* s,
515         unsigned int numReg, unsigned int glrs, bool roundRobin, bool bankConflict,
516         bool internalConflict, bool splitLLR, unsigned int simdS);
517 
518     void run(G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap);
519 };
520 
521 class PhyRegSummary
522 {
523 private:
524     uint32_t totalNumGRF = 0;
525     std::vector<bool> GRFUsage;
526 
527 public:
PhyRegSummary()528     PhyRegSummary() {}
529 
PhyRegSummary(uint32_t numGRF)530     PhyRegSummary(uint32_t numGRF) : totalNumGRF(numGRF)
531     {
532         GRFUsage.resize(totalNumGRF, false);
533     }
534 
getNumGRF()535     uint32_t getNumGRF() const { return totalNumGRF; }
536 
new(size_t sz,Mem_Manager & m)537     void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
538 
539     void markPhyRegs(G4_VarBase* pr, unsigned int words);
540 
isGRFBusy(int regnum)541     bool isGRFBusy(int regnum) const { return GRFUsage[regnum]; }
542 
setGRFBusy(int regnum)543     void setGRFBusy(int regnum) { GRFUsage[regnum] = true; }
544 
545     void printBusyRegs();
546 
getNumFreeGRF()547     uint32_t getNumFreeGRF() const
548     {
549         uint32_t numFreeGRF = 0;
550         for (int i = 0, size = getNumGRF(); i < size; ++i)
551         {
552             if (!isGRFBusy(i))
553             {
554                 numFreeGRF++;
555             }
556         }
557         return numFreeGRF;
558     }
559 };
560 }
561 #endif // _INC_LOCALRA_H_
562