1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef __PHYREGUSAGE_H__
10 #define __PHYREGUSAGE_H__
11 
12 #include "BuildIR.h"
13 #include "G4_Opcode.h"
14 #include "G4_IR.hpp"
15 
16 enum ColorHeuristic {FIRST_FIT, ROUND_ROBIN};
17 
18 // forward declares
19 namespace vISA
20 {
21 class LiveRange;
22 class GlobalRA;
23 
24 class PhyRegUsageParms
25 {
26 public:
27     GlobalRA& gra;
28     G4_RegFileKind rFile;
29     unsigned int maxGRFCanBeUsed;
30     unsigned int& startARFReg;
31     unsigned int& startFlagReg;
32     unsigned int& startGRFReg;
33     unsigned int& bank1_start;
34     unsigned int& bank1_end;
35     unsigned int& bank2_start;
36     unsigned int& bank2_end;
37     bool doBankConflict;
38     bool* availableGregs;
39     uint32_t* availableSubRegs;
40     bool* availableAddrs;
41     bool* availableFlags;
42     uint8_t* weakEdgeUsage;
43     unsigned int totalGRF;
44     LiveRange** lrs;
45 
46     PhyRegUsageParms(GlobalRA& g, LiveRange* l[], G4_RegFileKind r, unsigned int m, unsigned int& startARF, unsigned int& startFlag, unsigned int& startGRF,
47         unsigned int& bank1_s, unsigned int& bank1_e, unsigned int& bank2_s, unsigned int& bank2_e, bool doBC, bool* avaGReg,
48         uint32_t* avaSubReg, bool* avaAddrs, bool* avaFlags, uint8_t* weakEdges);
49 };
50 
51 //
52 // track which registers are currently in use (cannot be assigned to other variables)
53 // For sub reg allocation, the granularity is UW/W (2 bytes). Doing so, we only need to
54 // handle even and odd alignment.
55 //
56 class PhyRegUsage
57 {
58     GlobalRA& gra;
59     LiveRange** lrs;
60     unsigned maxGRFCanBeUsed;
61     bool *availableGregs;             // true if the reg is available for allocation
62     uint32_t *availableSubRegs;     // each entry is a 16-bit value of the words that are free in a GRF
63     bool *availableAddrs;                        // true if the reg is available for allocation
64     bool *availableFlags;
65     uint8_t* weakEdgeUsage;
66 
67     ColorHeuristic colorHeuristic;   // perform register assignment in first-fit/round-robin for GRFs
68     G4_RegFileKind regFile;
69 
70     unsigned& startARFReg;                        // round-robin reg  start bias
71     unsigned& startFLAGReg;
72     unsigned& startGRFReg;                        // round-robin reg  start bias
73 
74 
75     unsigned &bank1_start;
76     unsigned &bank2_start;
77     unsigned &bank1_end;
78     unsigned &bank2_end;
79 
80     unsigned totalGRFNum;
81 
82     bool honorBankBias;       // whether we honor the bank bias assigned by the bank conflict avoidance heuristic
83     bool overlapTest; // set to true only when current dcl has compatible ranges marked by augmentation
84 
85     struct PhyReg
86     {
87         int reg;
88         int subreg; // in unit of words (0-15)
89     }; // return type for findGRFSubReg
90 
91     PhyReg findGRFSubReg(const bool forbidden[],
92         bool callerSaveBias,
93         bool callerSaverBias,
94         BankAlign align,
95         G4_SubReg_Align subAlign,
96         unsigned nwords);
97 
98     void findGRFSubRegFromRegs(int startReg,
99         int endReg,
100         int step,
101         PhyReg *phyReg,
102         G4_SubReg_Align subAlign,
103         unsigned nwords,
104         const bool forbidden[],
105         bool fromPartialOccupiedReg);
106 
107     PhyReg findGRFSubRegFromBanks(G4_Declare *dcl,
108         const bool forbidden[],
109         bool oneGRFBankDivision);
110 
111     void freeGRFSubReg(unsigned regNum, unsigned regOff, unsigned nwords, G4_Type ty);
112     void freeContiguous(bool availRegs[], unsigned start, unsigned numReg, unsigned maxRegs);
113     bool canGRFSubRegAlloc(G4_Declare* decl);
114     bool findContiguousNoWrapGRF(bool availRegs[], const bool forbidden[], unsigned short occupiedBundles, BankAlign align, unsigned numRegNeeded, unsigned startPos, unsigned endPos, unsigned & idx);
115 
116     bool findContiguousNoWrapAddrFlag(bool availRegs[],
117                                    const bool forbidden[],
118                                  G4_SubReg_Align subAlign,
119                                  unsigned numRegNeeded,
120                                  unsigned startPos,
121                                  unsigned endPos,
122                                  unsigned& idx);
123 
124     bool allFree(bool availRegs[], unsigned maxRegs);
125 
126     bool findFreeRegs(bool availRegs[],
127         const bool forbidden[],
128         BankAlign align,
129         unsigned numRegNeeded,
130         unsigned startRegNum,
131         unsigned endRegNum,
132         unsigned& idx,
133         bool gotoSecondBank,
134         bool oneGRFBankDivision);
135 
136 public:
137     IR_Builder& builder;
138 
139     PhyRegPool& regPool; // all Physical Reg Operands
140 
141     PhyRegUsage(PhyRegUsageParms&);
142 
143     bool isOverlapValid(unsigned int, unsigned int);
144 
setWeakEdgeUse(unsigned int reg,uint8_t index)145     void setWeakEdgeUse(unsigned int reg, uint8_t index)
146     {
147         // Consider V1 is allocated to r10, r11, r12, r13.
148         // Then following will be set eventually to model
149         // compatible ranges:
150         //  weakEdgeUsage[10] = 1;
151         //  weakEdgeUsage[11] = 2;
152         //  weakEdgeUsage[12] = 3;
153         //  weakEdgeUsage[13] = 4;
154         // This means some other compatible range cannot start
155         // at r7, r8, r9, r11, r12, r13. Another compatible range
156         // can either have no overlap at all with this range (strong
157         // edge), or it can start at r10 to have full
158         // overlap (weak edge).
159         weakEdgeUsage[reg] = index;
160     }
161 
getWeakEdgeUse(unsigned int reg)162     uint8_t getWeakEdgeUse(unsigned int reg) const
163     {
164         return weakEdgeUsage[reg];
165     }
166 
runOverlapTest(bool t)167     void runOverlapTest(bool t)
168     {
169         overlapTest = t;
170     }
171 
~PhyRegUsage()172     ~PhyRegUsage()
173     {
174 
175     }
176 
177     bool assignRegs(bool  isSIMD16,
178                     LiveRange* var,
179                     const bool* forbidden,
180                     BankAlign  align,
181                     G4_SubReg_Align subAlign,
182                     ColorHeuristic colorHeuristic,
183                     float spillCost,
184                     bool hintSet);
185 
186     bool assignGRFRegsFromBanks(LiveRange*     varBasis,
187                              BankAlign  align,
188                              const bool*     forbidden,
189                              ColorHeuristic  heuristic,
190                              bool oneGRFBankDivision);
191 
192     void markBusyForDclSplit(G4_RegFileKind kind,
193         unsigned regNum,
194         unsigned regOff,
195         unsigned nunits,
196         unsigned numRows);
197 
markBusyGRF(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows,bool isPreDefinedVar)198     void markBusyGRF(unsigned regNum,
199         unsigned regOff,
200         unsigned nunits,
201         unsigned numRows,
202         bool isPreDefinedVar)
203     {
204         MUST_BE_TRUE(numRows > 0 && nunits > 0, ERROR_INTERNAL_ARGUMENT);
205 
206         MUST_BE_TRUE((regNum + numRows <= maxGRFCanBeUsed) || isPreDefinedVar,
207             ERROR_UNKNOWN);
208 
209         //
210         // sub reg allocation (allocation unit is word)
211         //
212         if (numRows == 1 && regOff + nunits < numEltPerGRF<Type_UW>())
213         {
214             availableGregs[regNum] = false;
215             auto subregMask = getSubregBitMask(regOff, nunits);
216             availableSubRegs[regNum] &= ~subregMask;
217         }
218         else // allocate whole registers
219         {
220             for (unsigned i = 0; i < numRows; i++)
221             {
222                 availableGregs[regNum + i] = false;
223                 if (getGRFSize() == 64)
224                     availableSubRegs[regNum + i] = 0;
225                 else
226                     availableSubRegs[regNum + i] = 0xffff0000;
227             }
228         }
229     }
230 
markBusyAddress(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)231     void markBusyAddress(unsigned regNum,
232                   unsigned regOff,
233                   unsigned nunits,
234         unsigned numRows)
235     {
236         MUST_BE_TRUE(regNum == 0 && regOff + nunits <= getNumAddrRegisters(),
237             ERROR_UNKNOWN);
238         for (unsigned i = regOff; i < regOff + nunits; i++)
239             availableAddrs[i] = false;
240     }
241 
markBusyFlag(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)242     void markBusyFlag(unsigned regNum,
243         unsigned regOff,
244         unsigned nunits,
245         unsigned numRows)
246     {
247         for (unsigned i = regOff; i < regOff + nunits; i++)
248             availableFlags[i] = false;
249     }
250 
numAllocUnit(unsigned nelems,G4_Type ty)251     static unsigned numAllocUnit(unsigned nelems, G4_Type ty)
252     {
253         //
254         // we allocate sub reg in 2-byte granularity
255         //
256         unsigned nbytes = nelems* TypeSize(ty);
257         return nbytes/G4_WSIZE + nbytes%G4_WSIZE;
258     }
259 
260     // translate offset to allocUnit
offsetAllocUnit(unsigned nelems,G4_Type ty)261     static unsigned offsetAllocUnit(unsigned nelems, G4_Type ty)
262     {
263 
264         unsigned nbytes = nelems * TypeSize(ty);
265         //RA allocate register in unit of G4_WSIZE bytes
266         //pre-assigned register may start from nbytes%G4_WSIZE != 0, i.e, within an allocUnit
267         return nbytes/G4_WSIZE;
268     }
269 
270     void updateRegUsage(LiveRange* lr);
271 
getSubregBitMask(uint32_t start,uint32_t num)272     uint32_t getSubregBitMask(uint32_t start, uint32_t num) const
273     {
274         MUST_BE_TRUE(num > 0 && start+num <= numEltPerGRF<Type_UW>(), "illegal number of words");
275         uint32_t mask = ((1 << num) - 1) << start;
276 
277         return (uint32_t) mask;
278     }
279 
emit(std::ostream & output)280     void emit(std::ostream& output)
281     {
282         output << "available GRFs: ";
283         for (unsigned int i = 0; i < totalGRFNum; i++)
284         {
285             if (availableGregs[i])
286             {
287                 output << i << " ";
288             }
289         }
290         output << "\n";
291     }
292 
293 private:
294 
295     void freeRegs(LiveRange* var);
296 
297     bool findContiguousAddrFlag(bool availRegs[],
298                             const bool forbidden[],
299                            G4_SubReg_Align subAlign,
300                            unsigned numRegNeeded,
301                            unsigned maxRegs,
302                            unsigned& startReg, // inout
303                            unsigned& idx,      // output
304                            bool isCalleeSaveBias = false,
305                            bool isEOTSrc = false);
306 
307     bool findContiguousGRFFromBanks(G4_Declare *dcl, bool availRegs[],
308                                  const bool forbidden[],
309                                  BankAlign align,
310                                  unsigned& idx,
311                                  bool oneGRFBankDivision);
312 
313     unsigned short getOccupiedBundle(const G4_Declare* dcl) const;
314 
315     // find contiguous free words in a registers
316     int findContiguousWords(uint32_t words, G4_SubReg_Align alignment, int numWord) const;
317     bool findContiguousGRF(bool availRegs[], const bool forbidden[], unsigned occupiedBundles, BankAlign align,
318         unsigned numRegNeeded, unsigned maxRegs, unsigned & startPos, unsigned & idx, bool isCalleeSaveBias, bool isEOTSrc);
319 };
320 }
321 #endif // __PHYREGUSAGE_H__
322