1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "PhyRegUsage.h"
10 #include "FlowGraph.h"
11 #include "GraphColor.h"
12 
13 using namespace vISA;
14 
PhyRegUsage(PhyRegUsageParms & p)15 PhyRegUsage::PhyRegUsage(PhyRegUsageParms& p) :
16     gra(p.gra),
17     lrs(p.lrs),
18     availableGregs(p.availableGregs),
19     availableSubRegs(p.availableSubRegs),
20     availableAddrs(p.availableAddrs),
21     availableFlags(p.availableFlags),
22     colorHeuristic(FIRST_FIT),
23     startARFReg(p.startARFReg),
24     startFLAGReg(p.startFlagReg),
25     startGRFReg(p.startGRFReg),
26     bank1_start(p.bank1_start),
27     bank2_start(p.bank2_start),
28     bank1_end(p.bank1_end),
29     bank2_end(p.bank2_end),
30     totalGRFNum(p.totalGRF),
31     honorBankBias(p.doBankConflict),
32     builder(*p.gra.kernel.fg.builder),
33     regPool(p.gra.regPool)
34 {
35     maxGRFCanBeUsed = p.maxGRFCanBeUsed;
36     regFile = p.rFile;
37 
38     weakEdgeUsage = p.weakEdgeUsage;
39     overlapTest = false;
40 
41     if (regFile == G4_GRF)
42     {
43         memset(availableGregs, true, sizeof(bool)* totalGRFNum);
44         memset(availableSubRegs, 0xffffffff, sizeof(uint32_t)*totalGRFNum);
45         if (weakEdgeUsage)
46         {
47             memset(weakEdgeUsage, 0, sizeof(uint8_t)* totalGRFNum);
48         }
49     }
50     else if (regFile == G4_ADDRESS)
51     {
52         auto numAddrRegs = getNumAddrRegisters();
53         for (unsigned i = 0; i < numAddrRegs; i++)
54             availableAddrs[i] = true;
55     }
56     else if (regFile == G4_FLAG)
57     {
58         auto numFlags = builder.getNumFlagRegisters();
59         for (unsigned i = 0; i < numFlags; i++)
60             availableFlags[i] = true;
61     }
62 }
63 
markBusyForDclSplit(G4_RegFileKind kind,unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)64 void PhyRegUsage::markBusyForDclSplit(G4_RegFileKind kind,
65     unsigned regNum,
66     unsigned regOff,
67     unsigned nunits,  //word units
68     unsigned numRows)
69 {
70     MUST_BE_TRUE(numRows > 0 && nunits > 0, ERROR_INTERNAL_ARGUMENT);
71     MUST_BE_TRUE(regNum + numRows <= maxGRFCanBeUsed, ERROR_UNKNOWN);
72 
73     unsigned start_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff) / numEltPerGRF<Type_UW>();
74     unsigned end_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff + nunits) / numEltPerGRF<Type_UW>();
75 
76     unsigned start_sub_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff) % numEltPerGRF<Type_UW>();
77     unsigned end_sub_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff + nunits) % numEltPerGRF<Type_UW>();
78 
79     for (unsigned i = start_GRF; i < end_GRF; i++)
80     {
81         availableGregs[i] = false;
82         if (getGRFSize() == 64)
83             availableSubRegs[i] = 0;
84         else
85             availableSubRegs[i] = 0xffff0000;
86     }
87 
88     if (end_sub_GRF)
89     {
90         availableGregs[end_GRF] = false;
91         if (start_GRF == end_GRF)
92         {
93             auto subregMask = getSubregBitMask(start_sub_GRF, nunits);
94             availableSubRegs[end_GRF] &= ~subregMask;
95         }
96         else
97         {
98             auto subregMask = getSubregBitMask(0, end_sub_GRF);
99             availableSubRegs[end_GRF] &= ~subregMask;
100         }
101     }
102 }
103 //
104 // mark availRegs[start ... start+num-1] free again
105 //
freeContiguous(bool availRegs[],unsigned start,unsigned num,unsigned maxRegs)106 void PhyRegUsage::freeContiguous(bool availRegs[],
107     unsigned start,
108     unsigned num,
109     unsigned maxRegs)
110 {
111     for (unsigned i = start; i < start + num; i++)
112     {
113         MUST_BE_TRUE(i < maxRegs && availRegs[i] == false,
114             ERROR_UNKNOWN);
115         availRegs[i] = true;
116     }
117 }
118 //
119 // return true if all entries are true
120 //
allFree(bool availRegs[],unsigned maxRegs)121 bool PhyRegUsage::allFree(bool availRegs[], unsigned maxRegs)
122 {
123     for (unsigned i = 0; i < maxRegs; i++)
124     {
125         if (availRegs[i] == false)
126             return false;
127     }
128     return true;
129 }
130 
131 //
132 // mark sub reg [regOff .. regOff + nbytes -1] of the reg regNum free
133 //
freeGRFSubReg(unsigned regNum,unsigned regOff,unsigned nwords,G4_Type ty)134 void PhyRegUsage::freeGRFSubReg(unsigned regNum,
135     unsigned regOff,
136     unsigned nwords,
137     G4_Type  ty)
138 {
139     //
140     // adjust regOff to its corresponding word position
141     //
142 
143     int startWord = regOff * TypeSize(ty) / G4_WSIZE;
144     auto subregMask = getSubregBitMask(startWord, nwords);
145     availableSubRegs[regNum] |= subregMask;
146 
147     //
148     // if all sub regs of regNum are free, then unlink the reg
149     //
150     if (availableSubRegs[regNum] == 0xFFFFFFFF)
151     {
152         MUST_BE_TRUE(!availableGregs[regNum],
153             ERROR_UNKNOWN);
154         availableGregs[regNum] = true;
155     }
156 }
157 
158 //
159 // free registers that are held by intv
160 //
freeRegs(LiveRange * varBasis)161 void PhyRegUsage::freeRegs(LiveRange* varBasis)
162 {
163     G4_Declare* decl = varBasis->getDcl();
164     G4_RegFileKind kind = decl->getRegFile();
165     MUST_BE_TRUE(varBasis->getPhyReg(),
166         ERROR_UNKNOWN);
167     if (decl->useGRF())
168     {
169         MUST_BE_TRUE(varBasis->getPhyReg()->isGreg(), ERROR_UNKNOWN);
170         if (canGRFSubRegAlloc(decl))
171         {
172             freeGRFSubReg(((G4_Greg*)varBasis->getPhyReg())->getRegNum(), varBasis->getPhyRegOff(),
173                 numAllocUnit(decl->getNumElems(), decl->getElemType()), decl->getElemType());
174         }
175         else
176         {
177             freeContiguous(availableGregs, ((G4_Greg*)varBasis->getPhyReg())->getRegNum(),
178                 decl->getNumRows(), totalGRFNum);
179         }
180     }
181     else if (kind == G4_ADDRESS)
182     {
183         MUST_BE_TRUE(varBasis->getPhyReg()->isAreg(), ERROR_UNKNOWN);
184         freeContiguous(availableAddrs, varBasis->getPhyRegOff(),
185             numAllocUnit(decl->getNumElems(), decl->getElemType()), getNumAddrRegisters());
186     }
187     else if (kind == G4_FLAG)
188     {
189         MUST_BE_TRUE(varBasis->getPhyReg()->isFlag(), ERROR_UNKNOWN);
190         freeContiguous(availableFlags, varBasis->getPhyRegOff(),
191             numAllocUnit(decl->getNumElems(), decl->getElemType()), builder.getNumFlagRegisters());
192     }
193     else // not yet handled
194         MUST_BE_TRUE(false, ERROR_UNKNOWN);
195 }
196 
getSubAlignInWords(G4_SubReg_Align subAlign)197 static int getSubAlignInWords(G4_SubReg_Align subAlign)
198 {
199     return static_cast<int>(subAlign);
200 }
201 
getOccupiedBundle(const G4_Declare * dcl) const202 unsigned short PhyRegUsage::getOccupiedBundle(const G4_Declare* dcl) const
203 {
204     unsigned short occupiedBundles = 0;
205     unsigned bundleNum = 0;
206     if (!builder.getOption(vISA_enableBundleCR))
207     {
208         return occupiedBundles;
209     }
210 
211     if (!builder.hasDPAS() || !builder.getOption(vISA_EnableDPASBundleConflictReduction))
212     {
213         return 0;
214     }
215 
216     for (const BundleConflict& conflict : gra.getBundleConflicts(dcl))
217     {
218         unsigned reg = -1;
219         int offset = 0;
220 
221         offset = conflict.offset;
222         const G4_RegVar* regVar = conflict.dcl->getRegVar();
223         if (regVar->isPhyRegAssigned())
224         {
225             reg = regVar->getPhyReg()->asGreg()->getRegNum();
226         }
227         else
228         {
229             LiveRange* lr = lrs[regVar->getId()];
230             if (lr && lr->getPhyReg())
231             {
232                 reg = lr->getPhyReg()->asGreg()->getRegNum();
233             }
234         }
235 
236         if (reg != -1)
237         {
238             unsigned bundle = gra.get_bundle(reg, offset);
239             unsigned bundle1 = gra.get_bundle(reg, offset + 1);
240             if (!(occupiedBundles & ((unsigned short)1 << bundle)))
241             {
242                 bundleNum++;
243             }
244             occupiedBundles |= (unsigned short)1 << bundle;
245             occupiedBundles |= (unsigned short)1 << bundle1;
246         }
247     }
248     if (bundleNum > 12)
249     {
250         occupiedBundles = 0;
251     }
252 
253     return occupiedBundles;
254 }
255 
256 // returns the starting word index if we find enough free contiguous words satisfying alignment,
257 // -1 otherwise
findContiguousWords(uint32_t words,G4_SubReg_Align subAlign,int numWords) const258 int PhyRegUsage::findContiguousWords(
259     uint32_t words,
260     G4_SubReg_Align subAlign,
261     int numWords) const
262 {
263     // early exit in (false?) hope of saving compile time
264     if (words == 0)
265     {
266         return -1;
267     }
268 
269     int step = getSubAlignInWords(subAlign);
270     int startWord = 0;
271 
272     for (int i = startWord; i + numWords <= (int)numEltPerGRF<Type_UW>(); i += step)
273     {
274         uint32_t bitMask = getSubregBitMask(i, numWords);
275         if ((bitMask & words) == bitMask)
276         {
277             return i;
278         }
279     }
280 
281     return -1;
282 }
283 
284 //
285 // look for contiguous available regs starting from startPos
286 //
findContiguousGRF(bool availRegs[],const bool forbidden[],unsigned occupiedBundles,BankAlign align,unsigned numRegNeeded,unsigned maxRegs,unsigned & startPos,unsigned & idx,bool forceCalleeSaveOnly,bool isEOTSrc)287 bool PhyRegUsage::findContiguousGRF(bool availRegs[],
288     const bool forbidden[],
289     unsigned occupiedBundles,
290     BankAlign align,
291     unsigned numRegNeeded,
292     unsigned maxRegs,
293     unsigned& startPos,
294     unsigned& idx,
295     bool forceCalleeSaveOnly,
296     bool isEOTSrc)
297 {
298     unsigned startPosRunOne = startPos;
299     unsigned endPosRunOne = maxRegs;
300 
301     if (isEOTSrc && (startPosRunOne >= maxRegs))
302     {
303         return false;
304     }
305     else
306     {
307         MUST_BE_TRUE(startPosRunOne < maxRegs, ERROR_UNKNOWN);
308     }
309     bool found =
310         findContiguousNoWrapGRF(
311         availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunOne, endPosRunOne, idx);
312 
313     if (startPosRunOne > 0 && found == false && !isEOTSrc && !forceCalleeSaveOnly)
314     {
315         unsigned startPosRunTwo = 0;
316         unsigned endPosRunTwo = startPos + numRegNeeded;
317         endPosRunTwo = std::min(endPosRunTwo, maxRegs);
318         MUST_BE_TRUE(endPosRunTwo > 0 && endPosRunTwo <= maxRegs, ERROR_UNKNOWN);
319         found =
320             findContiguousNoWrapGRF(
321             availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
322     }
323 
324     if (found)
325     {
326         MUST_BE_TRUE(idx < maxRegs && idx + numRegNeeded <= maxRegs, ERROR_UNKNOWN);
327 
328         if (colorHeuristic == ROUND_ROBIN) {
329             startPos = (idx + numRegNeeded) % maxRegs;
330         }
331     }
332 
333     return found;
334 }
335 
336 //
337 // look for contiguous available regs starting from startPos
338 //
findContiguousAddrFlag(bool availRegs[],const bool forbidden[],G4_SubReg_Align subAlign,unsigned numRegNeeded,unsigned maxRegs,unsigned & startPos,unsigned & idx,bool isCalleeSaveBias,bool isEOTSrc)339 bool PhyRegUsage::findContiguousAddrFlag(bool availRegs[],
340     const bool forbidden[],
341     G4_SubReg_Align subAlign,
342     unsigned numRegNeeded,
343     unsigned maxRegs,
344     unsigned& startPos,
345     unsigned& idx,
346     bool isCalleeSaveBias,
347     bool isEOTSrc)
348 {
349     unsigned startPosRunOne = startPos;
350     unsigned endPosRunOne = maxRegs;
351 
352     if (isEOTSrc && (startPosRunOne >= maxRegs))
353     {
354         return false;
355     }
356     else
357     {
358         MUST_BE_TRUE(startPosRunOne < maxRegs, ERROR_UNKNOWN);
359     }
360     bool found =
361         findContiguousNoWrapAddrFlag(
362         availRegs, forbidden, subAlign, numRegNeeded, startPosRunOne, endPosRunOne, idx);
363 
364     if (startPosRunOne > 0 && found == false && !isEOTSrc && !isCalleeSaveBias)
365     {
366         unsigned startPosRunTwo = 0;
367         unsigned endPosRunTwo = startPos + numRegNeeded;
368         endPosRunTwo = std::min(endPosRunTwo, maxRegs);
369         MUST_BE_TRUE(endPosRunTwo > 0 && endPosRunTwo <= maxRegs, ERROR_UNKNOWN);
370         found =
371             findContiguousNoWrapAddrFlag(
372             availRegs, forbidden, subAlign, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
373     }
374 
375     if (found)
376     {
377         MUST_BE_TRUE(idx < maxRegs && idx + numRegNeeded <= maxRegs, ERROR_UNKNOWN);
378 
379         if (colorHeuristic == ROUND_ROBIN) {
380             startPos = (idx + numRegNeeded) % maxRegs;
381         }
382     }
383 
384     return found;
385 }
386 
findContiguousGRFFromBanks(G4_Declare * dcl,bool availRegs[],const bool forbidden[],BankAlign origAlign,unsigned & idx,bool oneGRFBankDivision)387 bool PhyRegUsage::findContiguousGRFFromBanks(G4_Declare *dcl,
388     bool availRegs[],
389     const bool forbidden[],
390     BankAlign origAlign,
391     unsigned& idx,
392     bool oneGRFBankDivision)
393 {   // EOT is not handled in this function
394     bool found = false;
395     unsigned numRegNeeded = dcl->getNumRows();
396     auto dclBC = gra.getBankConflict(dcl);
397     bool gotoSecondBank = (dclBC == BANK_CONFLICT_SECOND_HALF_EVEN ||
398         dclBC == BANK_CONFLICT_SECOND_HALF_ODD) && (dcl->getNumRows() > 1);
399 
400     BankAlign align = origAlign;
401     if (dclBC != BANK_CONFLICT_NONE && align == BankAlign::Either && dcl->getNumRows() <= 1)
402     {
403         align = gra.getBankAlign(dcl);
404     }
405 
406     ASSERT_USER(bank1_end < totalGRFNum && bank1_start < totalGRFNum && bank2_start < totalGRFNum && bank2_end < totalGRFNum,
407         "Wrong bank boundaries value");
408 
409     if (colorHeuristic == ROUND_ROBIN)
410     {
411         //For round robin, bank1_end and bank2_end are fixed.
412         if (gotoSecondBank)  //For odd aligned varaibe, we put them to a specific sections.
413         {
414             //From maxGRFCanBeUsed - 1 to bank2_end
415             ASSERT_USER(bank2_start >= bank2_end, "Second bank's start can not less than end\n");
416 
417             if ((bank2_start - bank2_end + 1) >= numRegNeeded) //3 - 2 + 1 >= 2
418             {
419                 found = findFreeRegs(
420                     availRegs, forbidden, align, numRegNeeded, bank2_start, bank2_end, idx, gotoSecondBank, oneGRFBankDivision);
421             }
422 
423             if (!found)
424             {
425                 if (maxGRFCanBeUsed - 1 >= bank2_start + numRegNeeded)
426                 {
427                     found = findFreeRegs(
428                         availRegs, forbidden, align, numRegNeeded, maxGRFCanBeUsed - 1, bank2_start + 1, idx, gotoSecondBank, oneGRFBankDivision);
429                 }
430                 else
431                 {
432                     return false;
433                 }
434             }
435 
436             if (found)
437             {
438                 bank2_start = idx - 1;
439                 if (bank2_start < bank2_end)
440                 {
441                     bank2_start = maxGRFCanBeUsed - 1;
442                 }
443             }
444         }
445         else
446         {   //From 0 to bank1_end
447             if (bank1_end - bank1_start + 1 >= numRegNeeded)
448             {
449                 found = findFreeRegs(
450                     availRegs, forbidden, BankAlign::Even, numRegNeeded, bank1_start, bank1_end, idx, gotoSecondBank, oneGRFBankDivision);
451             }
452 
453             if (!found)
454             {
455                 if (bank1_start >= numRegNeeded)
456                 {
457                     found = findFreeRegs(
458                         availRegs, forbidden, BankAlign::Even, numRegNeeded, 0, bank1_start - 2 + numRegNeeded, idx, gotoSecondBank, oneGRFBankDivision);
459                 }
460             }
461 
462             if (found)
463             {
464                 bank1_start = idx + numRegNeeded;
465                 if (bank1_start > bank1_end)
466                 {
467                     bank1_start = 0;
468                 }
469             }
470         }
471     }
472     else
473     {
474         //For first fit, the bank1_start and bank2_start are fixed. bank2_end and bank1_end are dynamically decided, but can not change in one direction (MIN or MAX).
475         if (gotoSecondBank)  //For odd aligned varaibe, we put them to a specific sections.
476         {
477             found = findFreeRegs(
478                 availRegs, forbidden, align, numRegNeeded, maxGRFCanBeUsed - 1, 0, idx, gotoSecondBank, oneGRFBankDivision);
479 
480             if (found)
481             {
482                 bank2_end = std::min(idx, bank2_end);
483             }
484         }
485         else
486         {
487             found = findFreeRegs(
488                 availRegs, forbidden, align, numRegNeeded, 0, maxGRFCanBeUsed - 1, idx, gotoSecondBank, oneGRFBankDivision);
489             if (found)
490             {
491                 bank1_end = std::max(idx + numRegNeeded - 1, bank1_end);
492             }
493         }
494 
495         if (bank2_end <= bank1_end)
496         {
497             found = false;
498         }
499 
500     }
501 
502     return found;
503 }
504 
isOverlapValid(unsigned int reg,unsigned int numRegs)505 bool PhyRegUsage::isOverlapValid(unsigned int reg, unsigned int numRegs)
506 {
507     for (unsigned int i = reg; i < (reg + numRegs); i++)
508     {
509         auto k = getWeakEdgeUse(i);
510         if (!(k == 0 ||
511             k == (i - reg + 1)))
512         {
513             // This condition will be taken when there is a partial
514             // overlap.
515             return false;
516         }
517     }
518 
519     return true;
520 }
521 
522 //
523 // look for contiguous available regs from startPos to maxRegs
524 //
findContiguousNoWrapGRF(bool availRegs[],const bool forbidden[],unsigned short occupiedBundles,BankAlign align,unsigned numRegNeeded,unsigned startPos,unsigned endPos,unsigned & idx)525 bool PhyRegUsage::findContiguousNoWrapGRF(bool availRegs[],
526     const bool forbidden[],
527     unsigned short occupiedBundles,
528     BankAlign align,
529     unsigned numRegNeeded,
530     unsigned startPos,
531     unsigned endPos,
532     unsigned& idx)
533 {
534     unsigned i = startPos;
535     while (i < endPos)
536     {
537         if (((i & 0x1) && align == BankAlign::Even) || // i is odd but intv needs to be even aligned
538             ((i & 0x1) == 0 && align == BankAlign::Odd)) // i is even but intv needs to be odd aligned
539         {
540             i++;
541         }
542         else
543         {
544             if (align == BankAlign::Even2GRF)
545             {
546                 while ((i % 4 >= 2) || ((numRegNeeded >= 2) && (i % 2 != 0)))
547                 {
548                     i++;
549                 }
550             }
551             else if (align == BankAlign::Odd2GRF)
552             {
553                 while ((i % 4 < 2) || ((numRegNeeded >= 2) && (i % 2 != 0)))
554                 {
555                     i++;
556                 }
557             }
558 
559             if (numRegNeeded == 0 ||
560                 i + numRegNeeded > endPos)
561                 return false; // no available regs
562             //
563             // find contiguous numRegNeeded registers
564             // forbidden != NULL then check forbidden
565             //
566             unsigned j = i;
567             if (overlapTest &&
568                 !isOverlapValid(i, numRegNeeded))
569             {
570                 i++;
571             }
572             else if (occupiedBundles & (1 << gra.get_bundle(i, 0)) ||
573                      occupiedBundles & (1 << gra.get_bundle(i, 1)))
574             {
575                 i++;
576             }
577             else
578             {
579                 for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
580                 if (j == i + numRegNeeded)
581                 {
582                     for (unsigned k = i; k < j; k++) availRegs[k] = false;
583                     idx = i;
584                     return true;
585                 }
586                 else
587                     i = j + 1;
588             }
589         }
590     }
591     return false; // no available regs
592 }
593 
594 //
595 // look for contiguous available regs from startPos to maxRegs
596 //
findContiguousNoWrapAddrFlag(bool availRegs[],const bool forbidden[],G4_SubReg_Align subAlign,unsigned numRegNeeded,unsigned startPos,unsigned endPos,unsigned & idx)597 bool PhyRegUsage::findContiguousNoWrapAddrFlag(bool availRegs[],
598     const bool forbidden[],
599     G4_SubReg_Align subAlign, //Sub align is used only for Flag and Address registers
600     unsigned numRegNeeded,
601     unsigned startPos,
602     unsigned endPos,
603     unsigned& idx)
604 {
605     unsigned i = startPos;
606     while (i < endPos)
607     {
608         //
609         // some register assignments need special alignment, we check
610         // whether the alignment criteria is met.
611         //
612         if (subAlign == Sixteen_Word && i != 0)
613         {    // Sixteen_Word sub-align should have i=0
614             return false;
615         } else if ((subAlign == Eight_Word && i % 8 != 0) ||    // 8_Word align, i must be divided by 8
616             (i & 0x1 && subAlign == Even_Word) || // i is odd but intv needs to be even aligned
617             (subAlign == Four_Word && (i % 4 != 0))) // 4_word alignment
618             i++;
619         else
620         {
621             if (numRegNeeded == 0 ||
622                 i + numRegNeeded > endPos)
623             {
624                 return false; // no available regs
625             }
626             //
627             // find contiguous numRegNeeded registers
628             // forbidden != NULL then check forbidden
629             //
630             unsigned j = i;
631             for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
632             if (j == i + numRegNeeded)
633             {
634                 for (unsigned k = i; k < j; k++) availRegs[k] = false;
635                 idx = i;
636                 return true;
637             } else {
638                 i = j + 1;
639             }
640         }
641     }
642     return false; // no available regs
643 }
644 
findFreeRegs(bool availRegs[],const bool forbidden[],BankAlign align,unsigned numRegNeeded,unsigned startRegNum,unsigned endRegNum,unsigned & idx,bool gotoSecondBank,bool oneGRFBankDivision)645 bool PhyRegUsage::findFreeRegs(bool availRegs[],
646     const bool forbidden[],
647     BankAlign align,
648     unsigned numRegNeeded,
649     unsigned startRegNum,  //inclusive
650     unsigned endRegNum, //inclusive: less and equal when startRegNum <= endRegNum, larger and equal when startRegNum > endRegNum
651     unsigned& idx,
652     bool gotoSecondBank,
653     bool oneGRFBankDivision)
654 {
655     bool forward = startRegNum <= endRegNum ? true : false;
656     int startReg = forward ? startRegNum : startRegNum - numRegNeeded + 1;
657     int endReg = forward ? endRegNum - numRegNeeded + 1 : endRegNum;
658     int i = startReg;
659 
660     while (1)
661     {
662         if (forward)
663         {
664             if (i > endReg)
665                 break;
666         }
667         else
668         {
669             if (i < endReg)
670                 break;
671         }
672 
673         if ((align == BankAlign::Even2GRF) && (i % 2 != 0 ||  i % 4 == 3))
674         {
675             i += forward ? 1 : -1;
676             continue;
677         }
678         else if ((align == BankAlign::Odd2GRF) && (i % 2 != 0 || i % 4 == 1))
679         {
680             i += forward ? 1 : -1;
681             continue;
682         }
683         else if ((((i & 0x1) && align == BankAlign::Even) || // i is odd but intv needs to be even aligned
684             (((i & 0x1) == 0) && (align == BankAlign::Odd)))) // i is even but intv needs to be odd aligned
685         {
686             i += forward ? 1 : -1;
687             continue;
688         }
689         else
690         {
691             if ((forward && (i > endReg)) ||
692                 (!forward && (i < endReg)))
693             {
694                 return false; // no available regs
695             }
696 
697             if (regFile == G4_GRF &&
698                 overlapTest &&
699                 !isOverlapValid(i, numRegNeeded))
700             {
701                 i += forward ? 1 : -1;
702             }
703             else
704             {
705                 // find contiguous numRegNeeded registers
706                 // forbidden != NULL then check forbidden
707                 //
708                 unsigned j = i;
709                 for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
710                 if (j == i + numRegNeeded)
711                 {
712                     for (unsigned k = i; k < j; k++) availRegs[k] = false;
713                     idx = i;
714                     return true;
715                 }
716                 else
717                 {   //Jump over the register region which a poke in the end
718                     if (forward)
719                     {
720                         i = j + 1;
721                     }
722                     else
723                     {
724                         if (j > numRegNeeded)
725                         {
726                             i = j - numRegNeeded;
727                         }
728                         else
729                         {
730                             break;
731                         }
732                     }
733                 }
734             }
735         }
736     }
737 
738     return false;
739 }
740 
741 //
742 // return true, if the var can be allocated using sub reg
743 //
canGRFSubRegAlloc(G4_Declare * decl)744 bool PhyRegUsage::canGRFSubRegAlloc(G4_Declare* decl)
745 {
746     if (decl->getNumRows() != 1) // more than 1 row
747         return false;
748     if (numAllocUnit(decl->getNumElems(), decl->getElemType()) < numEltPerGRF<Type_UW>())
749         return true;
750     return false;
751 }
752 
findGRFSubRegFromRegs(int startReg,int endReg,int step,PhyReg * phyReg,G4_SubReg_Align subAlign,unsigned nwords,const bool forbidden[],bool fromPartialOccupiedReg)753 void PhyRegUsage::findGRFSubRegFromRegs(int startReg,
754     int endReg,
755     int step,
756     PhyReg *phyReg,
757     G4_SubReg_Align subAlign,
758     unsigned nwords,
759     const bool forbidden[],
760     bool fromPartialOccupiedReg)
761 {
762     int idx = startReg;
763     while (1)
764     {
765         if (step > 0)
766         {
767             if (idx > endReg)
768             {
769                 break;
770             }
771         }
772         else
773         {
774             if (idx < endReg)
775             {
776                 break;
777             }
778         }
779 
780         if (forbidden && forbidden[idx])
781         {
782             idx += step;
783             continue;
784         }
785 
786         if (fromPartialOccupiedReg && availableSubRegs[idx] == 0xFFFFFFFF)
787         {
788             // favor partially allocated GRF first
789             idx += step;
790             continue;
791         }
792 
793         int subreg = findContiguousWords(availableSubRegs[idx], subAlign, nwords);
794         if (subreg != -1)
795         {
796             phyReg->reg = idx;
797             phyReg->subreg = subreg;
798             return;
799         }
800 
801         idx += step;
802     }
803 
804     return;
805 }
806 
findGRFSubRegFromBanks(G4_Declare * dcl,const bool forbidden[],bool oneGRFBankDivision)807 PhyRegUsage::PhyReg PhyRegUsage::findGRFSubRegFromBanks(G4_Declare *dcl,
808     const bool forbidden[],
809     bool oneGRFBankDivision)
810 {
811     int startReg = 0, endReg = totalGRFNum;
812     int step = 0;
813     G4_SubReg_Align subAlign = gra.getSubRegAlign(dcl);
814     unsigned nwords = numAllocUnit(dcl->getNumElems(), dcl->getElemType());
815     auto dclBC = gra.getBankConflict(dcl);
816     bool gotoSecondBank = dclBC == BANK_CONFLICT_SECOND_HALF_EVEN ||
817         dclBC == BANK_CONFLICT_SECOND_HALF_EVEN;
818 
819     if (gotoSecondBank && oneGRFBankDivision)
820     {
821         startReg = (maxGRFCanBeUsed - 1);
822         startReg = startReg % 2 ? startReg : startReg - 1;
823         if (colorHeuristic == ROUND_ROBIN)
824         {
825             endReg = bank2_end;
826         }
827         else
828         {
829             endReg = 0;
830         }
831         step = -2;
832     }
833     else if (gotoSecondBank && !oneGRFBankDivision)  //We will depends on low high, treated as even align
834     {
835         startReg = (maxGRFCanBeUsed - 1);
836         startReg = startReg % 2 ? startReg - 1 : startReg;
837         if (colorHeuristic == ROUND_ROBIN)
838         {
839             endReg = bank2_end;
840         }
841         else
842         {
843             endReg = 0;
844         }
845         step = -1;
846     }
847     else
848     {
849         if (colorHeuristic == ROUND_ROBIN)
850         {
851             startReg = 0;
852             endReg = bank1_end;
853         }
854         else
855         {
856             startReg = 0;
857             endReg = maxGRFCanBeUsed - 1;
858         }
859         if (oneGRFBankDivision)
860         {
861             step = 2;
862         }
863         else
864         {
865             step = 1;
866         }
867     }
868 
869     PhyReg phyReg = { -1, -1 };
870 
871     //Try to find sub register from the registers which are partially occupied already.
872     findGRFSubRegFromRegs(startReg, endReg, step, &phyReg, subAlign, nwords, forbidden, true);
873 
874     //If failed or across the boundary of specified bank, try again and find from the registers which are totally free
875     if (phyReg.reg == -1 || (gotoSecondBank && ((unsigned)phyReg.reg <= bank1_end)) || (!gotoSecondBank && ((unsigned)phyReg.reg >= bank2_end)))
876     {
877         findGRFSubRegFromRegs(startReg, endReg, step, &phyReg, subAlign, nwords, forbidden, false);
878     }
879 
880     if (phyReg.reg != -1 && colorHeuristic == FIRST_FIT)
881     {
882         if (gotoSecondBank)
883         {
884             bank2_end = std::min((unsigned)phyReg.reg, bank2_end);
885         }
886         else
887         {
888             bank1_end = std::max((unsigned)phyReg.reg, bank1_end);
889         }
890         if (bank1_end >= bank2_end)
891         {
892             phyReg.reg = -1;
893         }
894     }
895 
896     return phyReg;
897 }
898 
899 //
900 // return reg and subRegOff (subr)
901 // To support sub-reg alignment
902 //
findGRFSubReg(const bool forbidden[],bool calleeSaveBias,bool callerSaveBias,BankAlign align,G4_SubReg_Align subAlign,unsigned nwords)903 PhyRegUsage::PhyReg PhyRegUsage::findGRFSubReg(const bool forbidden[],
904     bool calleeSaveBias,
905     bool callerSaveBias,
906     BankAlign align,
907     G4_SubReg_Align subAlign,
908     unsigned nwords)
909 {
910     int startReg = 0, endReg = totalGRFNum;
911     PhyReg phyReg = { -1, -1 };
912     if (calleeSaveBias)
913     {
914         startReg = builder.kernel.calleeSaveStart();
915     }
916     else if (callerSaveBias)
917     {
918         endReg = builder.kernel.calleeSaveStart();
919     }
920 
921     int step = align == BankAlign::Even ? 2 : 1;
922 
923     auto findSubGRFAlloc = [step, forbidden, this, subAlign, nwords](unsigned int startReg, unsigned int endReg)
924     {
925         PhyReg phyReg = { -1, -1 };
926         for (auto idx = startReg; idx < endReg; idx += step)
927         {
928             // forbidden GRF is not an allocation candidate
929             if (forbidden && forbidden[idx])
930             {
931                 continue;
932             }
933 
934             // check if entire GRF is available
935             if (availableSubRegs[idx] == 0xFFFFFFFF)
936             {
937                 if (phyReg.reg == -1)
938                 {
939                     // favor partially allocated GRF first so dont
940                     // return this assignment yet
941                     phyReg.reg = idx;
942                     phyReg.subreg = 0;
943                 }
944                 continue;
945             }
946 
947             int subreg = findContiguousWords(availableSubRegs[idx], subAlign, nwords);
948             if (subreg != -1)
949             {
950                 phyReg.reg = idx;
951                 phyReg.subreg = subreg;
952                 return phyReg;
953             }
954         }
955 
956         // either return {-1, -1} or an allocation where entire GRF is available
957         return phyReg;
958     };
959 
960     if (callerSaveBias || calleeSaveBias)
961     {
962         // attempt bias based assignment first
963         phyReg = findSubGRFAlloc(startReg, endReg);
964         if (phyReg.subreg != -1)
965             return phyReg;
966     }
967 
968     // Find sub-GRF allocation throughout GRF file
969     phyReg = findSubGRFAlloc(0, totalGRFNum);
970 
971     return phyReg;
972 }
973 
assignGRFRegsFromBanks(LiveRange * varBasis,BankAlign align,const bool * forbidden,ColorHeuristic heuristic,bool oneGRFBankDivision)974 bool PhyRegUsage::assignGRFRegsFromBanks(LiveRange*     varBasis,
975     BankAlign  align,
976     const bool*     forbidden,
977     ColorHeuristic  heuristic,
978     bool oneGRFBankDivision)
979 {
980     colorHeuristic = heuristic;
981     G4_Declare* decl = varBasis->getDcl();
982 
983     //
984     // if regs are allocated to intv, i is the reg number and off is the reg
985     // offset for sub reg allocation
986     //
987     unsigned i = 0;   // avail reg number
988 
989     //
990     // determine if we need to do sub reg allcoation
991     //
992     if (canGRFSubRegAlloc(decl))
993     {
994         bool retVal = false;
995 
996         PhyRegUsage::PhyReg phyReg = findGRFSubRegFromBanks(decl, forbidden, oneGRFBankDivision);
997         if (phyReg.reg != -1)
998         {
999             // based on type, adjust sub reg off accordingly
1000             // word: stay the same, dword: *2, byte: /2
1001             // assign r_i.off
1002             varBasis->setPhyReg(regPool.getGreg(phyReg.reg),
1003                 phyReg.subreg*G4_WSIZE / decl->getElemSize());
1004             retVal = true;
1005         }
1006 
1007         return retVal;
1008     }
1009     else
1010     {
1011         bool success = false;
1012         if (varBasis->getEOTSrc() && builder.hasEOTGRFBinding())
1013         {
1014             bool forceCalleeSaveAlloc = builder.kernel.fg.isPseudoVCEDcl(decl);
1015             startGRFReg = totalGRFNum - 16;
1016             success = findContiguousGRF(availableGregs, forbidden, 0, align, decl->getNumRows(), maxGRFCanBeUsed,
1017                 startGRFReg, i, forceCalleeSaveAlloc, true);
1018         }
1019         else
1020         {
1021             success = findContiguousGRFFromBanks(decl, availableGregs, forbidden, align, i, oneGRFBankDivision);
1022         }
1023 
1024         if (success)
1025         {
1026             varBasis->setPhyReg(regPool.getGreg(i), 0);
1027         }
1028 
1029         return success;
1030     }
1031 
1032     return false;
1033 }
1034 
1035 //
1036 // find registers for intv
1037 // To support sub-reg alignment
1038 //
assignRegs(bool highInternalConflict,LiveRange * varBasis,const bool * forbidden,BankAlign align,G4_SubReg_Align subAlign,ColorHeuristic heuristic,float spillCost,bool hintSet)1039 bool PhyRegUsage::assignRegs(bool  highInternalConflict,
1040     LiveRange*         varBasis,
1041     const bool*     forbidden,
1042     BankAlign        align,
1043     G4_SubReg_Align subAlign,
1044     ColorHeuristic  heuristic,
1045     float             spillCost,
1046     bool hintSet)
1047 {
1048     colorHeuristic = heuristic;
1049 
1050     G4_Declare* decl = varBasis->getDcl();
1051     G4_RegFileKind kind = decl->getRegFile();
1052     BankAlign bankAlign = align;
1053 
1054     //
1055     // if regs are allocated to intv, i is the reg number and off is the reg
1056     // offset for sub reg allocation
1057     //
1058     unsigned i = 0;   // avail reg number
1059 
1060     auto getAlignToUse = [](BankAlign align, BankAlign bankAlign)
1061     {
1062         if (GlobalRA::useGenericAugAlign())
1063             return (align != BankAlign::Either ? align : bankAlign);
1064         else
1065             return (bankAlign != BankAlign::Either ? bankAlign : align);
1066     };
1067 
1068     if (kind == G4_GRF) // general register file
1069     {
1070         //
1071         // determine if we need to do sub reg allcoation
1072         //
1073         if (canGRFSubRegAlloc(decl))
1074         {
1075             bool retVal = false;
1076             int oldStartGRFReg = startGRFReg;
1077             BankConflict varBasisBC = gra.getBankConflict(varBasis->getVar()->asRegVar()->getDeclare());
1078 
1079             if (!builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum) &&
1080                 honorBankBias &&
1081                 varBasisBC != BANK_CONFLICT_NONE)
1082             {
1083                 if (highInternalConflict)
1084                 {
1085                     switch (varBasisBC)
1086                     {
1087                     case BANK_CONFLICT_FIRST_HALF_EVEN:
1088                     case BANK_CONFLICT_FIRST_HALF_ODD:
1089                         startGRFReg = 0;
1090                         break;
1091                     case BANK_CONFLICT_SECOND_HALF_EVEN:
1092                     case BANK_CONFLICT_SECOND_HALF_ODD:
1093                         startGRFReg = 64;
1094                         break;
1095                     default: break;
1096                     }
1097                 }
1098                 else
1099                 {
1100                     bankAlign = gra.getBankAlign(varBasis->getVar()->asRegVar()->getDeclare());
1101                 }
1102             }
1103 
1104             // If the var is biased to receive a callee-bias, start at r60 and wrap around.
1105             // NOTE: We are assuming a first-fit strategy when a callee-bias is present.
1106             if (varBasis->getCalleeSaveBias())
1107             {
1108                 startGRFReg = 60;
1109             }
1110 
1111             PhyRegUsage::PhyReg phyReg = findGRFSubReg(forbidden, varBasis->getCalleeSaveBias(),
1112                 varBasis->getCallerSaveBias(), getAlignToUse(align, bankAlign), subAlign,
1113                 numAllocUnit(decl->getNumElems(), decl->getElemType()));
1114             if (phyReg.reg != -1)
1115             {
1116                 // based on type, adjust sub reg off accordingly
1117                 // word: stay the same, dword: *2, byte: /2
1118                 // assign r_i.off
1119                 varBasis->setPhyReg(regPool.getGreg(phyReg.reg),
1120                     phyReg.subreg * G4_WSIZE / decl->getElemSize());
1121                 retVal = true;
1122             }
1123 
1124             if (varBasis->getCalleeSaveBias())
1125             {
1126                 startGRFReg = oldStartGRFReg;
1127             }
1128 
1129             return retVal;
1130         }
1131         else
1132         {
1133             int oldStartGRFReg = startGRFReg;
1134             unsigned endGRFReg = maxGRFCanBeUsed; // round-robin reg  start bias
1135             BankConflict varBasisBC = gra.getBankConflict(varBasis->getVar()->asRegVar()->getDeclare());
1136 
1137             if (!builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum) &&
1138                 honorBankBias &&
1139                 varBasisBC != BANK_CONFLICT_NONE &&
1140                 !hintSet)
1141             {
1142                 if (highInternalConflict)
1143                 {
1144                     switch (varBasisBC)
1145                     {
1146                     case BANK_CONFLICT_FIRST_HALF_EVEN:
1147                     case BANK_CONFLICT_FIRST_HALF_ODD:
1148                         startGRFReg = 0;
1149                         break;
1150                     case BANK_CONFLICT_SECOND_HALF_EVEN:
1151                     case BANK_CONFLICT_SECOND_HALF_ODD:
1152                         startGRFReg = 64;
1153                         break;
1154                     default: break;
1155                     }
1156                 }
1157                 else
1158                 {
1159                     bankAlign = gra.getBankAlign(varBasis->getVar()->asRegVar()->getDeclare());
1160                 }
1161             }
1162 
1163             // If the var is biased to receive a callee-bias, start at r60 and wrap around.
1164             // NOTE: We are assuming a first-fit strategy when a callee-bias is present.
1165             if (varBasis->getCalleeSaveBias())
1166             {
1167                 startGRFReg = builder.kernel.calleeSaveStart();
1168             }
1169 
1170             if (varBasis->getEOTSrc() && builder.hasEOTGRFBinding())
1171             {
1172                 startGRFReg = totalGRFNum - 16;
1173             }
1174 
1175             bool forceCalleeSaveAlloc = builder.kernel.fg.isPseudoVCEDcl(decl);
1176             unsigned short occupiedBundles = getOccupiedBundle(decl);
1177             bool success = findContiguousGRF(availableGregs, forbidden, occupiedBundles,
1178                 getAlignToUse(align, bankAlign), decl->getNumRows(), endGRFReg,
1179                 startGRFReg, i, forceCalleeSaveAlloc, varBasis->getEOTSrc());
1180             if (success) {
1181                 varBasis->setPhyReg(regPool.getGreg(i), 0);
1182             }
1183 
1184             if (varBasis->getEOTSrc())
1185             {
1186                 startGRFReg = oldStartGRFReg;
1187             }
1188 
1189             if (varBasis->getCalleeSaveBias())
1190             {
1191                 startGRFReg = oldStartGRFReg;
1192             }
1193 
1194             return success;
1195         }
1196     }
1197     else if (kind == G4_ADDRESS) // address register
1198     {
1199         MUST_BE_TRUE(decl->getNumRows() == 1, ERROR_UNKNOWN);
1200         //
1201         // determine alignment
1202         // if the number of reg needed is more than 1, then we go ahead
1203         //
1204         unsigned regNeeded = numAllocUnit(decl->getNumElems(), decl->getElemType());
1205         if (findContiguousAddrFlag(availableAddrs, forbidden, subAlign, regNeeded, getNumAddrRegisters(), startARFReg, i))
1206         {
1207             // subregoffset should consider the declare data type
1208             varBasis->setPhyReg(regPool.getAddrReg(), i*G4_WSIZE / decl->getElemSize());
1209             return true;
1210         }
1211         return false;
1212     }
1213     else if (kind == G4_FLAG) // Flag register
1214     {
1215         MUST_BE_TRUE(decl->getNumRows() == 1, ERROR_UNKNOWN);
1216         //
1217         // determine alignment
1218         // if the number of reg needed is more than 1, then we go ahead
1219         //
1220         unsigned regNeeded = numAllocUnit(decl->getNumElems(), decl->getElemType());
1221         if (findContiguousAddrFlag(availableFlags, forbidden, subAlign, regNeeded, builder.getNumFlagRegisters(), startFLAGReg, i))
1222         {
1223             // subregoffset should consider the declare data type
1224             varBasis->setPhyReg(regPool.getFlagAreg(i / 2), i & 1);
1225             return true;
1226         }
1227         return false;
1228     }
1229     else // not handled yet
1230     {
1231         MUST_BE_TRUE(false, ERROR_UNKNOWN);
1232         return false;
1233     }
1234 }
1235 
1236 //
1237 // allocate forbidden vectors
1238 //
getForbiddenVectorSize() const1239 unsigned LiveRange::getForbiddenVectorSize() const
1240 {
1241     switch (regKind)
1242     {
1243     case G4_GRF:
1244     case G4_INPUT:
1245         return gra.kernel.getNumRegTotal();
1246     case G4_ADDRESS:
1247         return getNumAddrRegisters();
1248     case G4_FLAG:
1249         return gra.builder.getNumFlagRegisters();
1250     default:
1251         assert(false && "illegal reg file");
1252         return 0;
1253     }
1254 }
1255 
1256 //
1257 // allocate forbidden vectors
1258 //
allocForbiddenVector(Mem_Manager & mem)1259 void LiveRange::allocForbiddenVector(Mem_Manager& mem)
1260 {
1261     unsigned size = getForbiddenVectorSize();
1262 
1263     if (size > 0)
1264     {
1265         forbidden = (bool*)mem.alloc(sizeof(bool)*size);
1266         memset(forbidden, false, size);
1267     }
1268 }
1269 
getForbiddenGRFs(std::vector<unsigned int> & regNum,G4_Kernel & kernel,unsigned stackCallRegSize,unsigned reserveSpillSize,unsigned rerservedRegNum)1270 void getForbiddenGRFs(
1271     std::vector<unsigned int>& regNum, G4_Kernel &kernel,
1272     unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned rerservedRegNum)
1273 {
1274     // Push forbidden register numbers to vector regNum
1275     //
1276     // r0 - Forbidden when platform is not 3d
1277     // rMax, rMax-1, rMax-2 - Forbidden in presence of stack call sites
1278     unsigned totalGRFNum = kernel.getNumRegTotal();
1279 
1280     if (kernel.getKernelType() != VISA_3D ||
1281         kernel.getOption(vISA_enablePreemption) ||
1282         reserveSpillSize > 0 ||
1283         kernel.getOption(vISA_ReserveR0))
1284     {
1285         regNum.push_back(0);
1286     }
1287 
1288     if (kernel.getOption(vISA_enablePreemption))
1289     {
1290         // r1 is reserved for SIP kernel
1291         regNum.push_back(1);
1292     }
1293 
1294     unsigned reservedRegSize = stackCallRegSize + reserveSpillSize;
1295     for (unsigned int i = 0; i < reservedRegSize; i++)
1296     {
1297         regNum.push_back(totalGRFNum - 1 - i);
1298     }
1299 
1300     unsigned largestNoneReservedReg = totalGRFNum - reservedRegSize - 1;
1301     if (totalGRFNum - reservedRegSize >= totalGRFNum - 16)
1302     {
1303         largestNoneReservedReg = totalGRFNum - 16 - 1;
1304     }
1305 
1306 
1307     if (totalGRFNum - reservedRegSize < rerservedRegNum)
1308     {
1309         MUST_BE_TRUE(false, "After reservation, there is not enough regiser!");
1310     }
1311 
1312     for (unsigned int i = 0; i < rerservedRegNum; i++)
1313     {
1314         regNum.push_back(largestNoneReservedReg - i);
1315     }
1316 }
1317 
getCallerSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1318 void getCallerSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1319 {
1320     unsigned int startCalleeSave = kernel->calleeSaveStart();
1321     unsigned int endCalleeSave = startCalleeSave + kernel->getNumCalleeSaveRegs();
1322     // r60-r124 are caller save regs for SKL
1323     for (unsigned int i = startCalleeSave; i < endCalleeSave; i++)
1324     {
1325         regNum.push_back(i);
1326     }
1327 }
1328 
getCalleeSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1329 void getCalleeSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1330 {
1331     // r1-r59 are callee save regs for SKL
1332     unsigned int numCallerSaveGRFs = kernel->getCallerSaveLastGRF() + 1;
1333     for (unsigned int i = 1; i < numCallerSaveGRFs; i++)
1334     {
1335         regNum.push_back(i);
1336     }
1337 }
1338 
1339 //
1340 // mark forbidden vectors
1341 //
allocForbidden(Mem_Manager & mem,bool reserveStackCallRegs,unsigned reserveSpillSize,unsigned rerservedRegNum)1342 void LiveRange::allocForbidden(Mem_Manager& mem, bool reserveStackCallRegs, unsigned reserveSpillSize, unsigned rerservedRegNum)
1343 {
1344     if (forbidden == NULL)
1345     {
1346         allocForbiddenVector(mem);
1347     }
1348 
1349 
1350     if (regKind == G4_GRF)
1351     {
1352         std::vector<unsigned int> forbiddenGRFs;
1353         unsigned int stackCallRegSize = reserveStackCallRegs ? gra.kernel.numReservedABIGRF() : 0;
1354         getForbiddenGRFs(forbiddenGRFs, gra.kernel, stackCallRegSize, reserveSpillSize, rerservedRegNum);
1355 
1356         for (unsigned int i = 0; i < forbiddenGRFs.size(); i++)
1357         {
1358             unsigned int regNum = forbiddenGRFs[i];
1359             forbidden[regNum] = true;
1360         }
1361     }
1362 }
1363 
1364 //
1365 // mark forbidden registers for caller-save pseudo var
1366 //
allocForbiddenCallerSave(Mem_Manager & mem,G4_Kernel * kernel)1367 void LiveRange::allocForbiddenCallerSave(Mem_Manager& mem, G4_Kernel* kernel)
1368 {
1369     if (forbidden == NULL)
1370     {
1371         allocForbiddenVector(mem);
1372     }
1373 
1374     MUST_BE_TRUE(regKind == G4_GRF, ERROR_UNKNOWN);
1375 
1376     std::vector<unsigned int> callerSaveRegs;
1377     getCallerSaveGRF(callerSaveRegs, kernel);
1378     for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
1379     {
1380         unsigned int callerSaveReg = callerSaveRegs[i];
1381         forbidden[callerSaveReg] = true;
1382     }
1383 }
1384 
1385 //
1386 // mark forbidden registers for callee-save pseudo var
1387 //
allocForbiddenCalleeSave(Mem_Manager & mem,G4_Kernel * kernel)1388 void LiveRange::allocForbiddenCalleeSave(Mem_Manager& mem, G4_Kernel* kernel)
1389 {
1390     if (forbidden == NULL)
1391     {
1392         allocForbiddenVector(mem);
1393     }
1394 
1395     MUST_BE_TRUE(regKind == G4_GRF, ERROR_UNKNOWN);
1396 
1397     std::vector<unsigned int> calleeSaveRegs;
1398     getCalleeSaveGRF(calleeSaveRegs, kernel);
1399     for (unsigned int i = 0; i < calleeSaveRegs.size(); i++)
1400     {
1401         unsigned int calleeSaveReg = calleeSaveRegs[i];
1402         forbidden[calleeSaveReg] = true;
1403     }
1404 }
1405 
1406 //
1407 // print assigned reg info
1408 //
dump() const1409 void LiveRange::dump() const
1410 {
1411     G4_Declare* decl = var->getDeclare();
1412     this->emit(std::cout);
1413     std::cout << " : ";
1414     //
1415     // print alignment
1416     //
1417     std::cout << "\t";
1418     if (gra.getSubRegAlign(decl) == Any)
1419     {
1420         std::cout << "\t";
1421     }
1422     else {
1423         std::cout  << gra.getSubRegAlign(decl) << "_words SubReg_Align";
1424     }
1425     //
1426     // dump number of registers that are needed
1427     //
1428     if (decl->getRegFile() == G4_ADDRESS)
1429     {
1430         std::cout << " + " << (IS_DTYPE(decl->getElemType()) ? 2 * decl->getNumElems() : decl->getNumElems()) << " regs";
1431     }
1432     else
1433     {
1434         std::cout << "\t(" << decl->getNumRows() << "x" << decl->getNumElems() << "):"
1435             << TypeSymbol(decl->getElemType());
1436     }
1437 }
1438 
PhyRegUsageParms(GlobalRA & g,LiveRange * l[],G4_RegFileKind r,unsigned int m,unsigned int & startARF,unsigned int & startFlag,unsigned int & startGRF,unsigned int & bank1_s,unsigned int & bank1_e,unsigned int & bank2_s,unsigned int & bank2_e,bool doBC,bool * avaGReg,uint32_t * avaSubReg,bool * avaAddrs,bool * avaFlags,uint8_t * weakEdges)1439 PhyRegUsageParms::PhyRegUsageParms(
1440     GlobalRA& g, LiveRange* l[], G4_RegFileKind r, unsigned int m,
1441     unsigned int& startARF, unsigned int& startFlag, unsigned int& startGRF,
1442     unsigned int& bank1_s, unsigned int& bank1_e, unsigned int& bank2_s, unsigned int& bank2_e,
1443     bool doBC, bool* avaGReg, uint32_t* avaSubReg,
1444     bool* avaAddrs, bool* avaFlags, uint8_t* weakEdges)
1445     : gra(g), startARFReg(startARF), startFlagReg(startFlag), startGRFReg(startGRF),
1446     bank1_start(bank1_s), bank1_end(bank1_e), bank2_start(bank2_s), bank2_end(bank2_e)
1447 {
1448     doBankConflict = doBC;
1449     availableGregs = avaGReg;
1450     availableSubRegs = avaSubReg;
1451     availableAddrs = avaAddrs;
1452     availableFlags = avaFlags;
1453     weakEdgeUsage = weakEdges;
1454     maxGRFCanBeUsed = m;
1455     rFile = r;
1456     totalGRF = gra.kernel.getNumRegTotal();
1457     lrs = l;
1458 }
1459