1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "PhyRegUsage.h"
10 #include "FlowGraph.h"
11 #include "GraphColor.h"
12
13 using namespace vISA;
14
PhyRegUsage(PhyRegUsageParms & p)15 PhyRegUsage::PhyRegUsage(PhyRegUsageParms& p) :
16 gra(p.gra),
17 lrs(p.lrs),
18 availableGregs(p.availableGregs),
19 availableSubRegs(p.availableSubRegs),
20 availableAddrs(p.availableAddrs),
21 availableFlags(p.availableFlags),
22 colorHeuristic(FIRST_FIT),
23 startARFReg(p.startARFReg),
24 startFLAGReg(p.startFlagReg),
25 startGRFReg(p.startGRFReg),
26 bank1_start(p.bank1_start),
27 bank2_start(p.bank2_start),
28 bank1_end(p.bank1_end),
29 bank2_end(p.bank2_end),
30 totalGRFNum(p.totalGRF),
31 honorBankBias(p.doBankConflict),
32 builder(*p.gra.kernel.fg.builder),
33 regPool(p.gra.regPool)
34 {
35 maxGRFCanBeUsed = p.maxGRFCanBeUsed;
36 regFile = p.rFile;
37
38 weakEdgeUsage = p.weakEdgeUsage;
39 overlapTest = false;
40
41 if (regFile == G4_GRF)
42 {
43 memset(availableGregs, true, sizeof(bool)* totalGRFNum);
44 memset(availableSubRegs, 0xffffffff, sizeof(uint32_t)*totalGRFNum);
45 if (weakEdgeUsage)
46 {
47 memset(weakEdgeUsage, 0, sizeof(uint8_t)* totalGRFNum);
48 }
49 }
50 else if (regFile == G4_ADDRESS)
51 {
52 auto numAddrRegs = getNumAddrRegisters();
53 for (unsigned i = 0; i < numAddrRegs; i++)
54 availableAddrs[i] = true;
55 }
56 else if (regFile == G4_FLAG)
57 {
58 auto numFlags = builder.getNumFlagRegisters();
59 for (unsigned i = 0; i < numFlags; i++)
60 availableFlags[i] = true;
61 }
62 }
63
markBusyForDclSplit(G4_RegFileKind kind,unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)64 void PhyRegUsage::markBusyForDclSplit(G4_RegFileKind kind,
65 unsigned regNum,
66 unsigned regOff,
67 unsigned nunits, //word units
68 unsigned numRows)
69 {
70 MUST_BE_TRUE(numRows > 0 && nunits > 0, ERROR_INTERNAL_ARGUMENT);
71 MUST_BE_TRUE(regNum + numRows <= maxGRFCanBeUsed, ERROR_UNKNOWN);
72
73 unsigned start_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff) / numEltPerGRF<Type_UW>();
74 unsigned end_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff + nunits) / numEltPerGRF<Type_UW>();
75
76 unsigned start_sub_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff) % numEltPerGRF<Type_UW>();
77 unsigned end_sub_GRF = (regNum * numEltPerGRF<Type_UW>() + regOff + nunits) % numEltPerGRF<Type_UW>();
78
79 for (unsigned i = start_GRF; i < end_GRF; i++)
80 {
81 availableGregs[i] = false;
82 if (getGRFSize() == 64)
83 availableSubRegs[i] = 0;
84 else
85 availableSubRegs[i] = 0xffff0000;
86 }
87
88 if (end_sub_GRF)
89 {
90 availableGregs[end_GRF] = false;
91 if (start_GRF == end_GRF)
92 {
93 auto subregMask = getSubregBitMask(start_sub_GRF, nunits);
94 availableSubRegs[end_GRF] &= ~subregMask;
95 }
96 else
97 {
98 auto subregMask = getSubregBitMask(0, end_sub_GRF);
99 availableSubRegs[end_GRF] &= ~subregMask;
100 }
101 }
102 }
103 //
104 // mark availRegs[start ... start+num-1] free again
105 //
freeContiguous(bool availRegs[],unsigned start,unsigned num,unsigned maxRegs)106 void PhyRegUsage::freeContiguous(bool availRegs[],
107 unsigned start,
108 unsigned num,
109 unsigned maxRegs)
110 {
111 for (unsigned i = start; i < start + num; i++)
112 {
113 MUST_BE_TRUE(i < maxRegs && availRegs[i] == false,
114 ERROR_UNKNOWN);
115 availRegs[i] = true;
116 }
117 }
118 //
119 // return true if all entries are true
120 //
allFree(bool availRegs[],unsigned maxRegs)121 bool PhyRegUsage::allFree(bool availRegs[], unsigned maxRegs)
122 {
123 for (unsigned i = 0; i < maxRegs; i++)
124 {
125 if (availRegs[i] == false)
126 return false;
127 }
128 return true;
129 }
130
131 //
132 // mark sub reg [regOff .. regOff + nbytes -1] of the reg regNum free
133 //
freeGRFSubReg(unsigned regNum,unsigned regOff,unsigned nwords,G4_Type ty)134 void PhyRegUsage::freeGRFSubReg(unsigned regNum,
135 unsigned regOff,
136 unsigned nwords,
137 G4_Type ty)
138 {
139 //
140 // adjust regOff to its corresponding word position
141 //
142
143 int startWord = regOff * TypeSize(ty) / G4_WSIZE;
144 auto subregMask = getSubregBitMask(startWord, nwords);
145 availableSubRegs[regNum] |= subregMask;
146
147 //
148 // if all sub regs of regNum are free, then unlink the reg
149 //
150 if (availableSubRegs[regNum] == 0xFFFFFFFF)
151 {
152 MUST_BE_TRUE(!availableGregs[regNum],
153 ERROR_UNKNOWN);
154 availableGregs[regNum] = true;
155 }
156 }
157
158 //
159 // free registers that are held by intv
160 //
freeRegs(LiveRange * varBasis)161 void PhyRegUsage::freeRegs(LiveRange* varBasis)
162 {
163 G4_Declare* decl = varBasis->getDcl();
164 G4_RegFileKind kind = decl->getRegFile();
165 MUST_BE_TRUE(varBasis->getPhyReg(),
166 ERROR_UNKNOWN);
167 if (decl->useGRF())
168 {
169 MUST_BE_TRUE(varBasis->getPhyReg()->isGreg(), ERROR_UNKNOWN);
170 if (canGRFSubRegAlloc(decl))
171 {
172 freeGRFSubReg(((G4_Greg*)varBasis->getPhyReg())->getRegNum(), varBasis->getPhyRegOff(),
173 numAllocUnit(decl->getNumElems(), decl->getElemType()), decl->getElemType());
174 }
175 else
176 {
177 freeContiguous(availableGregs, ((G4_Greg*)varBasis->getPhyReg())->getRegNum(),
178 decl->getNumRows(), totalGRFNum);
179 }
180 }
181 else if (kind == G4_ADDRESS)
182 {
183 MUST_BE_TRUE(varBasis->getPhyReg()->isAreg(), ERROR_UNKNOWN);
184 freeContiguous(availableAddrs, varBasis->getPhyRegOff(),
185 numAllocUnit(decl->getNumElems(), decl->getElemType()), getNumAddrRegisters());
186 }
187 else if (kind == G4_FLAG)
188 {
189 MUST_BE_TRUE(varBasis->getPhyReg()->isFlag(), ERROR_UNKNOWN);
190 freeContiguous(availableFlags, varBasis->getPhyRegOff(),
191 numAllocUnit(decl->getNumElems(), decl->getElemType()), builder.getNumFlagRegisters());
192 }
193 else // not yet handled
194 MUST_BE_TRUE(false, ERROR_UNKNOWN);
195 }
196
getSubAlignInWords(G4_SubReg_Align subAlign)197 static int getSubAlignInWords(G4_SubReg_Align subAlign)
198 {
199 return static_cast<int>(subAlign);
200 }
201
getOccupiedBundle(const G4_Declare * dcl) const202 unsigned short PhyRegUsage::getOccupiedBundle(const G4_Declare* dcl) const
203 {
204 unsigned short occupiedBundles = 0;
205 unsigned bundleNum = 0;
206 if (!builder.getOption(vISA_enableBundleCR))
207 {
208 return occupiedBundles;
209 }
210
211 if (!builder.hasDPAS() || !builder.getOption(vISA_EnableDPASBundleConflictReduction))
212 {
213 return 0;
214 }
215
216 for (const BundleConflict& conflict : gra.getBundleConflicts(dcl))
217 {
218 unsigned reg = -1;
219 int offset = 0;
220
221 offset = conflict.offset;
222 const G4_RegVar* regVar = conflict.dcl->getRegVar();
223 if (regVar->isPhyRegAssigned())
224 {
225 reg = regVar->getPhyReg()->asGreg()->getRegNum();
226 }
227 else
228 {
229 LiveRange* lr = lrs[regVar->getId()];
230 if (lr && lr->getPhyReg())
231 {
232 reg = lr->getPhyReg()->asGreg()->getRegNum();
233 }
234 }
235
236 if (reg != -1)
237 {
238 unsigned bundle = gra.get_bundle(reg, offset);
239 unsigned bundle1 = gra.get_bundle(reg, offset + 1);
240 if (!(occupiedBundles & ((unsigned short)1 << bundle)))
241 {
242 bundleNum++;
243 }
244 occupiedBundles |= (unsigned short)1 << bundle;
245 occupiedBundles |= (unsigned short)1 << bundle1;
246 }
247 }
248 if (bundleNum > 12)
249 {
250 occupiedBundles = 0;
251 }
252
253 return occupiedBundles;
254 }
255
256 // returns the starting word index if we find enough free contiguous words satisfying alignment,
257 // -1 otherwise
findContiguousWords(uint32_t words,G4_SubReg_Align subAlign,int numWords) const258 int PhyRegUsage::findContiguousWords(
259 uint32_t words,
260 G4_SubReg_Align subAlign,
261 int numWords) const
262 {
263 // early exit in (false?) hope of saving compile time
264 if (words == 0)
265 {
266 return -1;
267 }
268
269 int step = getSubAlignInWords(subAlign);
270 int startWord = 0;
271
272 for (int i = startWord; i + numWords <= (int)numEltPerGRF<Type_UW>(); i += step)
273 {
274 uint32_t bitMask = getSubregBitMask(i, numWords);
275 if ((bitMask & words) == bitMask)
276 {
277 return i;
278 }
279 }
280
281 return -1;
282 }
283
284 //
285 // look for contiguous available regs starting from startPos
286 //
findContiguousGRF(bool availRegs[],const bool forbidden[],unsigned occupiedBundles,BankAlign align,unsigned numRegNeeded,unsigned maxRegs,unsigned & startPos,unsigned & idx,bool forceCalleeSaveOnly,bool isEOTSrc)287 bool PhyRegUsage::findContiguousGRF(bool availRegs[],
288 const bool forbidden[],
289 unsigned occupiedBundles,
290 BankAlign align,
291 unsigned numRegNeeded,
292 unsigned maxRegs,
293 unsigned& startPos,
294 unsigned& idx,
295 bool forceCalleeSaveOnly,
296 bool isEOTSrc)
297 {
298 unsigned startPosRunOne = startPos;
299 unsigned endPosRunOne = maxRegs;
300
301 if (isEOTSrc && (startPosRunOne >= maxRegs))
302 {
303 return false;
304 }
305 else
306 {
307 MUST_BE_TRUE(startPosRunOne < maxRegs, ERROR_UNKNOWN);
308 }
309 bool found =
310 findContiguousNoWrapGRF(
311 availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunOne, endPosRunOne, idx);
312
313 if (startPosRunOne > 0 && found == false && !isEOTSrc && !forceCalleeSaveOnly)
314 {
315 unsigned startPosRunTwo = 0;
316 unsigned endPosRunTwo = startPos + numRegNeeded;
317 endPosRunTwo = std::min(endPosRunTwo, maxRegs);
318 MUST_BE_TRUE(endPosRunTwo > 0 && endPosRunTwo <= maxRegs, ERROR_UNKNOWN);
319 found =
320 findContiguousNoWrapGRF(
321 availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
322 }
323
324 if (found)
325 {
326 MUST_BE_TRUE(idx < maxRegs && idx + numRegNeeded <= maxRegs, ERROR_UNKNOWN);
327
328 if (colorHeuristic == ROUND_ROBIN) {
329 startPos = (idx + numRegNeeded) % maxRegs;
330 }
331 }
332
333 return found;
334 }
335
336 //
337 // look for contiguous available regs starting from startPos
338 //
findContiguousAddrFlag(bool availRegs[],const bool forbidden[],G4_SubReg_Align subAlign,unsigned numRegNeeded,unsigned maxRegs,unsigned & startPos,unsigned & idx,bool isCalleeSaveBias,bool isEOTSrc)339 bool PhyRegUsage::findContiguousAddrFlag(bool availRegs[],
340 const bool forbidden[],
341 G4_SubReg_Align subAlign,
342 unsigned numRegNeeded,
343 unsigned maxRegs,
344 unsigned& startPos,
345 unsigned& idx,
346 bool isCalleeSaveBias,
347 bool isEOTSrc)
348 {
349 unsigned startPosRunOne = startPos;
350 unsigned endPosRunOne = maxRegs;
351
352 if (isEOTSrc && (startPosRunOne >= maxRegs))
353 {
354 return false;
355 }
356 else
357 {
358 MUST_BE_TRUE(startPosRunOne < maxRegs, ERROR_UNKNOWN);
359 }
360 bool found =
361 findContiguousNoWrapAddrFlag(
362 availRegs, forbidden, subAlign, numRegNeeded, startPosRunOne, endPosRunOne, idx);
363
364 if (startPosRunOne > 0 && found == false && !isEOTSrc && !isCalleeSaveBias)
365 {
366 unsigned startPosRunTwo = 0;
367 unsigned endPosRunTwo = startPos + numRegNeeded;
368 endPosRunTwo = std::min(endPosRunTwo, maxRegs);
369 MUST_BE_TRUE(endPosRunTwo > 0 && endPosRunTwo <= maxRegs, ERROR_UNKNOWN);
370 found =
371 findContiguousNoWrapAddrFlag(
372 availRegs, forbidden, subAlign, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
373 }
374
375 if (found)
376 {
377 MUST_BE_TRUE(idx < maxRegs && idx + numRegNeeded <= maxRegs, ERROR_UNKNOWN);
378
379 if (colorHeuristic == ROUND_ROBIN) {
380 startPos = (idx + numRegNeeded) % maxRegs;
381 }
382 }
383
384 return found;
385 }
386
findContiguousGRFFromBanks(G4_Declare * dcl,bool availRegs[],const bool forbidden[],BankAlign origAlign,unsigned & idx,bool oneGRFBankDivision)387 bool PhyRegUsage::findContiguousGRFFromBanks(G4_Declare *dcl,
388 bool availRegs[],
389 const bool forbidden[],
390 BankAlign origAlign,
391 unsigned& idx,
392 bool oneGRFBankDivision)
393 { // EOT is not handled in this function
394 bool found = false;
395 unsigned numRegNeeded = dcl->getNumRows();
396 auto dclBC = gra.getBankConflict(dcl);
397 bool gotoSecondBank = (dclBC == BANK_CONFLICT_SECOND_HALF_EVEN ||
398 dclBC == BANK_CONFLICT_SECOND_HALF_ODD) && (dcl->getNumRows() > 1);
399
400 BankAlign align = origAlign;
401 if (dclBC != BANK_CONFLICT_NONE && align == BankAlign::Either && dcl->getNumRows() <= 1)
402 {
403 align = gra.getBankAlign(dcl);
404 }
405
406 ASSERT_USER(bank1_end < totalGRFNum && bank1_start < totalGRFNum && bank2_start < totalGRFNum && bank2_end < totalGRFNum,
407 "Wrong bank boundaries value");
408
409 if (colorHeuristic == ROUND_ROBIN)
410 {
411 //For round robin, bank1_end and bank2_end are fixed.
412 if (gotoSecondBank) //For odd aligned varaibe, we put them to a specific sections.
413 {
414 //From maxGRFCanBeUsed - 1 to bank2_end
415 ASSERT_USER(bank2_start >= bank2_end, "Second bank's start can not less than end\n");
416
417 if ((bank2_start - bank2_end + 1) >= numRegNeeded) //3 - 2 + 1 >= 2
418 {
419 found = findFreeRegs(
420 availRegs, forbidden, align, numRegNeeded, bank2_start, bank2_end, idx, gotoSecondBank, oneGRFBankDivision);
421 }
422
423 if (!found)
424 {
425 if (maxGRFCanBeUsed - 1 >= bank2_start + numRegNeeded)
426 {
427 found = findFreeRegs(
428 availRegs, forbidden, align, numRegNeeded, maxGRFCanBeUsed - 1, bank2_start + 1, idx, gotoSecondBank, oneGRFBankDivision);
429 }
430 else
431 {
432 return false;
433 }
434 }
435
436 if (found)
437 {
438 bank2_start = idx - 1;
439 if (bank2_start < bank2_end)
440 {
441 bank2_start = maxGRFCanBeUsed - 1;
442 }
443 }
444 }
445 else
446 { //From 0 to bank1_end
447 if (bank1_end - bank1_start + 1 >= numRegNeeded)
448 {
449 found = findFreeRegs(
450 availRegs, forbidden, BankAlign::Even, numRegNeeded, bank1_start, bank1_end, idx, gotoSecondBank, oneGRFBankDivision);
451 }
452
453 if (!found)
454 {
455 if (bank1_start >= numRegNeeded)
456 {
457 found = findFreeRegs(
458 availRegs, forbidden, BankAlign::Even, numRegNeeded, 0, bank1_start - 2 + numRegNeeded, idx, gotoSecondBank, oneGRFBankDivision);
459 }
460 }
461
462 if (found)
463 {
464 bank1_start = idx + numRegNeeded;
465 if (bank1_start > bank1_end)
466 {
467 bank1_start = 0;
468 }
469 }
470 }
471 }
472 else
473 {
474 //For first fit, the bank1_start and bank2_start are fixed. bank2_end and bank1_end are dynamically decided, but can not change in one direction (MIN or MAX).
475 if (gotoSecondBank) //For odd aligned varaibe, we put them to a specific sections.
476 {
477 found = findFreeRegs(
478 availRegs, forbidden, align, numRegNeeded, maxGRFCanBeUsed - 1, 0, idx, gotoSecondBank, oneGRFBankDivision);
479
480 if (found)
481 {
482 bank2_end = std::min(idx, bank2_end);
483 }
484 }
485 else
486 {
487 found = findFreeRegs(
488 availRegs, forbidden, align, numRegNeeded, 0, maxGRFCanBeUsed - 1, idx, gotoSecondBank, oneGRFBankDivision);
489 if (found)
490 {
491 bank1_end = std::max(idx + numRegNeeded - 1, bank1_end);
492 }
493 }
494
495 if (bank2_end <= bank1_end)
496 {
497 found = false;
498 }
499
500 }
501
502 return found;
503 }
504
isOverlapValid(unsigned int reg,unsigned int numRegs)505 bool PhyRegUsage::isOverlapValid(unsigned int reg, unsigned int numRegs)
506 {
507 for (unsigned int i = reg; i < (reg + numRegs); i++)
508 {
509 auto k = getWeakEdgeUse(i);
510 if (!(k == 0 ||
511 k == (i - reg + 1)))
512 {
513 // This condition will be taken when there is a partial
514 // overlap.
515 return false;
516 }
517 }
518
519 return true;
520 }
521
522 //
523 // look for contiguous available regs from startPos to maxRegs
524 //
findContiguousNoWrapGRF(bool availRegs[],const bool forbidden[],unsigned short occupiedBundles,BankAlign align,unsigned numRegNeeded,unsigned startPos,unsigned endPos,unsigned & idx)525 bool PhyRegUsage::findContiguousNoWrapGRF(bool availRegs[],
526 const bool forbidden[],
527 unsigned short occupiedBundles,
528 BankAlign align,
529 unsigned numRegNeeded,
530 unsigned startPos,
531 unsigned endPos,
532 unsigned& idx)
533 {
534 unsigned i = startPos;
535 while (i < endPos)
536 {
537 if (((i & 0x1) && align == BankAlign::Even) || // i is odd but intv needs to be even aligned
538 ((i & 0x1) == 0 && align == BankAlign::Odd)) // i is even but intv needs to be odd aligned
539 {
540 i++;
541 }
542 else
543 {
544 if (align == BankAlign::Even2GRF)
545 {
546 while ((i % 4 >= 2) || ((numRegNeeded >= 2) && (i % 2 != 0)))
547 {
548 i++;
549 }
550 }
551 else if (align == BankAlign::Odd2GRF)
552 {
553 while ((i % 4 < 2) || ((numRegNeeded >= 2) && (i % 2 != 0)))
554 {
555 i++;
556 }
557 }
558
559 if (numRegNeeded == 0 ||
560 i + numRegNeeded > endPos)
561 return false; // no available regs
562 //
563 // find contiguous numRegNeeded registers
564 // forbidden != NULL then check forbidden
565 //
566 unsigned j = i;
567 if (overlapTest &&
568 !isOverlapValid(i, numRegNeeded))
569 {
570 i++;
571 }
572 else if (occupiedBundles & (1 << gra.get_bundle(i, 0)) ||
573 occupiedBundles & (1 << gra.get_bundle(i, 1)))
574 {
575 i++;
576 }
577 else
578 {
579 for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
580 if (j == i + numRegNeeded)
581 {
582 for (unsigned k = i; k < j; k++) availRegs[k] = false;
583 idx = i;
584 return true;
585 }
586 else
587 i = j + 1;
588 }
589 }
590 }
591 return false; // no available regs
592 }
593
594 //
595 // look for contiguous available regs from startPos to maxRegs
596 //
findContiguousNoWrapAddrFlag(bool availRegs[],const bool forbidden[],G4_SubReg_Align subAlign,unsigned numRegNeeded,unsigned startPos,unsigned endPos,unsigned & idx)597 bool PhyRegUsage::findContiguousNoWrapAddrFlag(bool availRegs[],
598 const bool forbidden[],
599 G4_SubReg_Align subAlign, //Sub align is used only for Flag and Address registers
600 unsigned numRegNeeded,
601 unsigned startPos,
602 unsigned endPos,
603 unsigned& idx)
604 {
605 unsigned i = startPos;
606 while (i < endPos)
607 {
608 //
609 // some register assignments need special alignment, we check
610 // whether the alignment criteria is met.
611 //
612 if (subAlign == Sixteen_Word && i != 0)
613 { // Sixteen_Word sub-align should have i=0
614 return false;
615 } else if ((subAlign == Eight_Word && i % 8 != 0) || // 8_Word align, i must be divided by 8
616 (i & 0x1 && subAlign == Even_Word) || // i is odd but intv needs to be even aligned
617 (subAlign == Four_Word && (i % 4 != 0))) // 4_word alignment
618 i++;
619 else
620 {
621 if (numRegNeeded == 0 ||
622 i + numRegNeeded > endPos)
623 {
624 return false; // no available regs
625 }
626 //
627 // find contiguous numRegNeeded registers
628 // forbidden != NULL then check forbidden
629 //
630 unsigned j = i;
631 for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
632 if (j == i + numRegNeeded)
633 {
634 for (unsigned k = i; k < j; k++) availRegs[k] = false;
635 idx = i;
636 return true;
637 } else {
638 i = j + 1;
639 }
640 }
641 }
642 return false; // no available regs
643 }
644
findFreeRegs(bool availRegs[],const bool forbidden[],BankAlign align,unsigned numRegNeeded,unsigned startRegNum,unsigned endRegNum,unsigned & idx,bool gotoSecondBank,bool oneGRFBankDivision)645 bool PhyRegUsage::findFreeRegs(bool availRegs[],
646 const bool forbidden[],
647 BankAlign align,
648 unsigned numRegNeeded,
649 unsigned startRegNum, //inclusive
650 unsigned endRegNum, //inclusive: less and equal when startRegNum <= endRegNum, larger and equal when startRegNum > endRegNum
651 unsigned& idx,
652 bool gotoSecondBank,
653 bool oneGRFBankDivision)
654 {
655 bool forward = startRegNum <= endRegNum ? true : false;
656 int startReg = forward ? startRegNum : startRegNum - numRegNeeded + 1;
657 int endReg = forward ? endRegNum - numRegNeeded + 1 : endRegNum;
658 int i = startReg;
659
660 while (1)
661 {
662 if (forward)
663 {
664 if (i > endReg)
665 break;
666 }
667 else
668 {
669 if (i < endReg)
670 break;
671 }
672
673 if ((align == BankAlign::Even2GRF) && (i % 2 != 0 || i % 4 == 3))
674 {
675 i += forward ? 1 : -1;
676 continue;
677 }
678 else if ((align == BankAlign::Odd2GRF) && (i % 2 != 0 || i % 4 == 1))
679 {
680 i += forward ? 1 : -1;
681 continue;
682 }
683 else if ((((i & 0x1) && align == BankAlign::Even) || // i is odd but intv needs to be even aligned
684 (((i & 0x1) == 0) && (align == BankAlign::Odd)))) // i is even but intv needs to be odd aligned
685 {
686 i += forward ? 1 : -1;
687 continue;
688 }
689 else
690 {
691 if ((forward && (i > endReg)) ||
692 (!forward && (i < endReg)))
693 {
694 return false; // no available regs
695 }
696
697 if (regFile == G4_GRF &&
698 overlapTest &&
699 !isOverlapValid(i, numRegNeeded))
700 {
701 i += forward ? 1 : -1;
702 }
703 else
704 {
705 // find contiguous numRegNeeded registers
706 // forbidden != NULL then check forbidden
707 //
708 unsigned j = i;
709 for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
710 if (j == i + numRegNeeded)
711 {
712 for (unsigned k = i; k < j; k++) availRegs[k] = false;
713 idx = i;
714 return true;
715 }
716 else
717 { //Jump over the register region which a poke in the end
718 if (forward)
719 {
720 i = j + 1;
721 }
722 else
723 {
724 if (j > numRegNeeded)
725 {
726 i = j - numRegNeeded;
727 }
728 else
729 {
730 break;
731 }
732 }
733 }
734 }
735 }
736 }
737
738 return false;
739 }
740
741 //
742 // return true, if the var can be allocated using sub reg
743 //
canGRFSubRegAlloc(G4_Declare * decl)744 bool PhyRegUsage::canGRFSubRegAlloc(G4_Declare* decl)
745 {
746 if (decl->getNumRows() != 1) // more than 1 row
747 return false;
748 if (numAllocUnit(decl->getNumElems(), decl->getElemType()) < numEltPerGRF<Type_UW>())
749 return true;
750 return false;
751 }
752
findGRFSubRegFromRegs(int startReg,int endReg,int step,PhyReg * phyReg,G4_SubReg_Align subAlign,unsigned nwords,const bool forbidden[],bool fromPartialOccupiedReg)753 void PhyRegUsage::findGRFSubRegFromRegs(int startReg,
754 int endReg,
755 int step,
756 PhyReg *phyReg,
757 G4_SubReg_Align subAlign,
758 unsigned nwords,
759 const bool forbidden[],
760 bool fromPartialOccupiedReg)
761 {
762 int idx = startReg;
763 while (1)
764 {
765 if (step > 0)
766 {
767 if (idx > endReg)
768 {
769 break;
770 }
771 }
772 else
773 {
774 if (idx < endReg)
775 {
776 break;
777 }
778 }
779
780 if (forbidden && forbidden[idx])
781 {
782 idx += step;
783 continue;
784 }
785
786 if (fromPartialOccupiedReg && availableSubRegs[idx] == 0xFFFFFFFF)
787 {
788 // favor partially allocated GRF first
789 idx += step;
790 continue;
791 }
792
793 int subreg = findContiguousWords(availableSubRegs[idx], subAlign, nwords);
794 if (subreg != -1)
795 {
796 phyReg->reg = idx;
797 phyReg->subreg = subreg;
798 return;
799 }
800
801 idx += step;
802 }
803
804 return;
805 }
806
findGRFSubRegFromBanks(G4_Declare * dcl,const bool forbidden[],bool oneGRFBankDivision)807 PhyRegUsage::PhyReg PhyRegUsage::findGRFSubRegFromBanks(G4_Declare *dcl,
808 const bool forbidden[],
809 bool oneGRFBankDivision)
810 {
811 int startReg = 0, endReg = totalGRFNum;
812 int step = 0;
813 G4_SubReg_Align subAlign = gra.getSubRegAlign(dcl);
814 unsigned nwords = numAllocUnit(dcl->getNumElems(), dcl->getElemType());
815 auto dclBC = gra.getBankConflict(dcl);
816 bool gotoSecondBank = dclBC == BANK_CONFLICT_SECOND_HALF_EVEN ||
817 dclBC == BANK_CONFLICT_SECOND_HALF_EVEN;
818
819 if (gotoSecondBank && oneGRFBankDivision)
820 {
821 startReg = (maxGRFCanBeUsed - 1);
822 startReg = startReg % 2 ? startReg : startReg - 1;
823 if (colorHeuristic == ROUND_ROBIN)
824 {
825 endReg = bank2_end;
826 }
827 else
828 {
829 endReg = 0;
830 }
831 step = -2;
832 }
833 else if (gotoSecondBank && !oneGRFBankDivision) //We will depends on low high, treated as even align
834 {
835 startReg = (maxGRFCanBeUsed - 1);
836 startReg = startReg % 2 ? startReg - 1 : startReg;
837 if (colorHeuristic == ROUND_ROBIN)
838 {
839 endReg = bank2_end;
840 }
841 else
842 {
843 endReg = 0;
844 }
845 step = -1;
846 }
847 else
848 {
849 if (colorHeuristic == ROUND_ROBIN)
850 {
851 startReg = 0;
852 endReg = bank1_end;
853 }
854 else
855 {
856 startReg = 0;
857 endReg = maxGRFCanBeUsed - 1;
858 }
859 if (oneGRFBankDivision)
860 {
861 step = 2;
862 }
863 else
864 {
865 step = 1;
866 }
867 }
868
869 PhyReg phyReg = { -1, -1 };
870
871 //Try to find sub register from the registers which are partially occupied already.
872 findGRFSubRegFromRegs(startReg, endReg, step, &phyReg, subAlign, nwords, forbidden, true);
873
874 //If failed or across the boundary of specified bank, try again and find from the registers which are totally free
875 if (phyReg.reg == -1 || (gotoSecondBank && ((unsigned)phyReg.reg <= bank1_end)) || (!gotoSecondBank && ((unsigned)phyReg.reg >= bank2_end)))
876 {
877 findGRFSubRegFromRegs(startReg, endReg, step, &phyReg, subAlign, nwords, forbidden, false);
878 }
879
880 if (phyReg.reg != -1 && colorHeuristic == FIRST_FIT)
881 {
882 if (gotoSecondBank)
883 {
884 bank2_end = std::min((unsigned)phyReg.reg, bank2_end);
885 }
886 else
887 {
888 bank1_end = std::max((unsigned)phyReg.reg, bank1_end);
889 }
890 if (bank1_end >= bank2_end)
891 {
892 phyReg.reg = -1;
893 }
894 }
895
896 return phyReg;
897 }
898
899 //
900 // return reg and subRegOff (subr)
901 // To support sub-reg alignment
902 //
findGRFSubReg(const bool forbidden[],bool calleeSaveBias,bool callerSaveBias,BankAlign align,G4_SubReg_Align subAlign,unsigned nwords)903 PhyRegUsage::PhyReg PhyRegUsage::findGRFSubReg(const bool forbidden[],
904 bool calleeSaveBias,
905 bool callerSaveBias,
906 BankAlign align,
907 G4_SubReg_Align subAlign,
908 unsigned nwords)
909 {
910 int startReg = 0, endReg = totalGRFNum;
911 PhyReg phyReg = { -1, -1 };
912 if (calleeSaveBias)
913 {
914 startReg = builder.kernel.calleeSaveStart();
915 }
916 else if (callerSaveBias)
917 {
918 endReg = builder.kernel.calleeSaveStart();
919 }
920
921 int step = align == BankAlign::Even ? 2 : 1;
922
923 auto findSubGRFAlloc = [step, forbidden, this, subAlign, nwords](unsigned int startReg, unsigned int endReg)
924 {
925 PhyReg phyReg = { -1, -1 };
926 for (auto idx = startReg; idx < endReg; idx += step)
927 {
928 // forbidden GRF is not an allocation candidate
929 if (forbidden && forbidden[idx])
930 {
931 continue;
932 }
933
934 // check if entire GRF is available
935 if (availableSubRegs[idx] == 0xFFFFFFFF)
936 {
937 if (phyReg.reg == -1)
938 {
939 // favor partially allocated GRF first so dont
940 // return this assignment yet
941 phyReg.reg = idx;
942 phyReg.subreg = 0;
943 }
944 continue;
945 }
946
947 int subreg = findContiguousWords(availableSubRegs[idx], subAlign, nwords);
948 if (subreg != -1)
949 {
950 phyReg.reg = idx;
951 phyReg.subreg = subreg;
952 return phyReg;
953 }
954 }
955
956 // either return {-1, -1} or an allocation where entire GRF is available
957 return phyReg;
958 };
959
960 if (callerSaveBias || calleeSaveBias)
961 {
962 // attempt bias based assignment first
963 phyReg = findSubGRFAlloc(startReg, endReg);
964 if (phyReg.subreg != -1)
965 return phyReg;
966 }
967
968 // Find sub-GRF allocation throughout GRF file
969 phyReg = findSubGRFAlloc(0, totalGRFNum);
970
971 return phyReg;
972 }
973
assignGRFRegsFromBanks(LiveRange * varBasis,BankAlign align,const bool * forbidden,ColorHeuristic heuristic,bool oneGRFBankDivision)974 bool PhyRegUsage::assignGRFRegsFromBanks(LiveRange* varBasis,
975 BankAlign align,
976 const bool* forbidden,
977 ColorHeuristic heuristic,
978 bool oneGRFBankDivision)
979 {
980 colorHeuristic = heuristic;
981 G4_Declare* decl = varBasis->getDcl();
982
983 //
984 // if regs are allocated to intv, i is the reg number and off is the reg
985 // offset for sub reg allocation
986 //
987 unsigned i = 0; // avail reg number
988
989 //
990 // determine if we need to do sub reg allcoation
991 //
992 if (canGRFSubRegAlloc(decl))
993 {
994 bool retVal = false;
995
996 PhyRegUsage::PhyReg phyReg = findGRFSubRegFromBanks(decl, forbidden, oneGRFBankDivision);
997 if (phyReg.reg != -1)
998 {
999 // based on type, adjust sub reg off accordingly
1000 // word: stay the same, dword: *2, byte: /2
1001 // assign r_i.off
1002 varBasis->setPhyReg(regPool.getGreg(phyReg.reg),
1003 phyReg.subreg*G4_WSIZE / decl->getElemSize());
1004 retVal = true;
1005 }
1006
1007 return retVal;
1008 }
1009 else
1010 {
1011 bool success = false;
1012 if (varBasis->getEOTSrc() && builder.hasEOTGRFBinding())
1013 {
1014 bool forceCalleeSaveAlloc = builder.kernel.fg.isPseudoVCEDcl(decl);
1015 startGRFReg = totalGRFNum - 16;
1016 success = findContiguousGRF(availableGregs, forbidden, 0, align, decl->getNumRows(), maxGRFCanBeUsed,
1017 startGRFReg, i, forceCalleeSaveAlloc, true);
1018 }
1019 else
1020 {
1021 success = findContiguousGRFFromBanks(decl, availableGregs, forbidden, align, i, oneGRFBankDivision);
1022 }
1023
1024 if (success)
1025 {
1026 varBasis->setPhyReg(regPool.getGreg(i), 0);
1027 }
1028
1029 return success;
1030 }
1031
1032 return false;
1033 }
1034
1035 //
1036 // find registers for intv
1037 // To support sub-reg alignment
1038 //
assignRegs(bool highInternalConflict,LiveRange * varBasis,const bool * forbidden,BankAlign align,G4_SubReg_Align subAlign,ColorHeuristic heuristic,float spillCost,bool hintSet)1039 bool PhyRegUsage::assignRegs(bool highInternalConflict,
1040 LiveRange* varBasis,
1041 const bool* forbidden,
1042 BankAlign align,
1043 G4_SubReg_Align subAlign,
1044 ColorHeuristic heuristic,
1045 float spillCost,
1046 bool hintSet)
1047 {
1048 colorHeuristic = heuristic;
1049
1050 G4_Declare* decl = varBasis->getDcl();
1051 G4_RegFileKind kind = decl->getRegFile();
1052 BankAlign bankAlign = align;
1053
1054 //
1055 // if regs are allocated to intv, i is the reg number and off is the reg
1056 // offset for sub reg allocation
1057 //
1058 unsigned i = 0; // avail reg number
1059
1060 auto getAlignToUse = [](BankAlign align, BankAlign bankAlign)
1061 {
1062 if (GlobalRA::useGenericAugAlign())
1063 return (align != BankAlign::Either ? align : bankAlign);
1064 else
1065 return (bankAlign != BankAlign::Either ? bankAlign : align);
1066 };
1067
1068 if (kind == G4_GRF) // general register file
1069 {
1070 //
1071 // determine if we need to do sub reg allcoation
1072 //
1073 if (canGRFSubRegAlloc(decl))
1074 {
1075 bool retVal = false;
1076 int oldStartGRFReg = startGRFReg;
1077 BankConflict varBasisBC = gra.getBankConflict(varBasis->getVar()->asRegVar()->getDeclare());
1078
1079 if (!builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum) &&
1080 honorBankBias &&
1081 varBasisBC != BANK_CONFLICT_NONE)
1082 {
1083 if (highInternalConflict)
1084 {
1085 switch (varBasisBC)
1086 {
1087 case BANK_CONFLICT_FIRST_HALF_EVEN:
1088 case BANK_CONFLICT_FIRST_HALF_ODD:
1089 startGRFReg = 0;
1090 break;
1091 case BANK_CONFLICT_SECOND_HALF_EVEN:
1092 case BANK_CONFLICT_SECOND_HALF_ODD:
1093 startGRFReg = 64;
1094 break;
1095 default: break;
1096 }
1097 }
1098 else
1099 {
1100 bankAlign = gra.getBankAlign(varBasis->getVar()->asRegVar()->getDeclare());
1101 }
1102 }
1103
1104 // If the var is biased to receive a callee-bias, start at r60 and wrap around.
1105 // NOTE: We are assuming a first-fit strategy when a callee-bias is present.
1106 if (varBasis->getCalleeSaveBias())
1107 {
1108 startGRFReg = 60;
1109 }
1110
1111 PhyRegUsage::PhyReg phyReg = findGRFSubReg(forbidden, varBasis->getCalleeSaveBias(),
1112 varBasis->getCallerSaveBias(), getAlignToUse(align, bankAlign), subAlign,
1113 numAllocUnit(decl->getNumElems(), decl->getElemType()));
1114 if (phyReg.reg != -1)
1115 {
1116 // based on type, adjust sub reg off accordingly
1117 // word: stay the same, dword: *2, byte: /2
1118 // assign r_i.off
1119 varBasis->setPhyReg(regPool.getGreg(phyReg.reg),
1120 phyReg.subreg * G4_WSIZE / decl->getElemSize());
1121 retVal = true;
1122 }
1123
1124 if (varBasis->getCalleeSaveBias())
1125 {
1126 startGRFReg = oldStartGRFReg;
1127 }
1128
1129 return retVal;
1130 }
1131 else
1132 {
1133 int oldStartGRFReg = startGRFReg;
1134 unsigned endGRFReg = maxGRFCanBeUsed; // round-robin reg start bias
1135 BankConflict varBasisBC = gra.getBankConflict(varBasis->getVar()->asRegVar()->getDeclare());
1136
1137 if (!builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum) &&
1138 honorBankBias &&
1139 varBasisBC != BANK_CONFLICT_NONE &&
1140 !hintSet)
1141 {
1142 if (highInternalConflict)
1143 {
1144 switch (varBasisBC)
1145 {
1146 case BANK_CONFLICT_FIRST_HALF_EVEN:
1147 case BANK_CONFLICT_FIRST_HALF_ODD:
1148 startGRFReg = 0;
1149 break;
1150 case BANK_CONFLICT_SECOND_HALF_EVEN:
1151 case BANK_CONFLICT_SECOND_HALF_ODD:
1152 startGRFReg = 64;
1153 break;
1154 default: break;
1155 }
1156 }
1157 else
1158 {
1159 bankAlign = gra.getBankAlign(varBasis->getVar()->asRegVar()->getDeclare());
1160 }
1161 }
1162
1163 // If the var is biased to receive a callee-bias, start at r60 and wrap around.
1164 // NOTE: We are assuming a first-fit strategy when a callee-bias is present.
1165 if (varBasis->getCalleeSaveBias())
1166 {
1167 startGRFReg = builder.kernel.calleeSaveStart();
1168 }
1169
1170 if (varBasis->getEOTSrc() && builder.hasEOTGRFBinding())
1171 {
1172 startGRFReg = totalGRFNum - 16;
1173 }
1174
1175 bool forceCalleeSaveAlloc = builder.kernel.fg.isPseudoVCEDcl(decl);
1176 unsigned short occupiedBundles = getOccupiedBundle(decl);
1177 bool success = findContiguousGRF(availableGregs, forbidden, occupiedBundles,
1178 getAlignToUse(align, bankAlign), decl->getNumRows(), endGRFReg,
1179 startGRFReg, i, forceCalleeSaveAlloc, varBasis->getEOTSrc());
1180 if (success) {
1181 varBasis->setPhyReg(regPool.getGreg(i), 0);
1182 }
1183
1184 if (varBasis->getEOTSrc())
1185 {
1186 startGRFReg = oldStartGRFReg;
1187 }
1188
1189 if (varBasis->getCalleeSaveBias())
1190 {
1191 startGRFReg = oldStartGRFReg;
1192 }
1193
1194 return success;
1195 }
1196 }
1197 else if (kind == G4_ADDRESS) // address register
1198 {
1199 MUST_BE_TRUE(decl->getNumRows() == 1, ERROR_UNKNOWN);
1200 //
1201 // determine alignment
1202 // if the number of reg needed is more than 1, then we go ahead
1203 //
1204 unsigned regNeeded = numAllocUnit(decl->getNumElems(), decl->getElemType());
1205 if (findContiguousAddrFlag(availableAddrs, forbidden, subAlign, regNeeded, getNumAddrRegisters(), startARFReg, i))
1206 {
1207 // subregoffset should consider the declare data type
1208 varBasis->setPhyReg(regPool.getAddrReg(), i*G4_WSIZE / decl->getElemSize());
1209 return true;
1210 }
1211 return false;
1212 }
1213 else if (kind == G4_FLAG) // Flag register
1214 {
1215 MUST_BE_TRUE(decl->getNumRows() == 1, ERROR_UNKNOWN);
1216 //
1217 // determine alignment
1218 // if the number of reg needed is more than 1, then we go ahead
1219 //
1220 unsigned regNeeded = numAllocUnit(decl->getNumElems(), decl->getElemType());
1221 if (findContiguousAddrFlag(availableFlags, forbidden, subAlign, regNeeded, builder.getNumFlagRegisters(), startFLAGReg, i))
1222 {
1223 // subregoffset should consider the declare data type
1224 varBasis->setPhyReg(regPool.getFlagAreg(i / 2), i & 1);
1225 return true;
1226 }
1227 return false;
1228 }
1229 else // not handled yet
1230 {
1231 MUST_BE_TRUE(false, ERROR_UNKNOWN);
1232 return false;
1233 }
1234 }
1235
1236 //
1237 // allocate forbidden vectors
1238 //
getForbiddenVectorSize() const1239 unsigned LiveRange::getForbiddenVectorSize() const
1240 {
1241 switch (regKind)
1242 {
1243 case G4_GRF:
1244 case G4_INPUT:
1245 return gra.kernel.getNumRegTotal();
1246 case G4_ADDRESS:
1247 return getNumAddrRegisters();
1248 case G4_FLAG:
1249 return gra.builder.getNumFlagRegisters();
1250 default:
1251 assert(false && "illegal reg file");
1252 return 0;
1253 }
1254 }
1255
1256 //
1257 // allocate forbidden vectors
1258 //
allocForbiddenVector(Mem_Manager & mem)1259 void LiveRange::allocForbiddenVector(Mem_Manager& mem)
1260 {
1261 unsigned size = getForbiddenVectorSize();
1262
1263 if (size > 0)
1264 {
1265 forbidden = (bool*)mem.alloc(sizeof(bool)*size);
1266 memset(forbidden, false, size);
1267 }
1268 }
1269
getForbiddenGRFs(std::vector<unsigned int> & regNum,G4_Kernel & kernel,unsigned stackCallRegSize,unsigned reserveSpillSize,unsigned rerservedRegNum)1270 void getForbiddenGRFs(
1271 std::vector<unsigned int>& regNum, G4_Kernel &kernel,
1272 unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned rerservedRegNum)
1273 {
1274 // Push forbidden register numbers to vector regNum
1275 //
1276 // r0 - Forbidden when platform is not 3d
1277 // rMax, rMax-1, rMax-2 - Forbidden in presence of stack call sites
1278 unsigned totalGRFNum = kernel.getNumRegTotal();
1279
1280 if (kernel.getKernelType() != VISA_3D ||
1281 kernel.getOption(vISA_enablePreemption) ||
1282 reserveSpillSize > 0 ||
1283 kernel.getOption(vISA_ReserveR0))
1284 {
1285 regNum.push_back(0);
1286 }
1287
1288 if (kernel.getOption(vISA_enablePreemption))
1289 {
1290 // r1 is reserved for SIP kernel
1291 regNum.push_back(1);
1292 }
1293
1294 unsigned reservedRegSize = stackCallRegSize + reserveSpillSize;
1295 for (unsigned int i = 0; i < reservedRegSize; i++)
1296 {
1297 regNum.push_back(totalGRFNum - 1 - i);
1298 }
1299
1300 unsigned largestNoneReservedReg = totalGRFNum - reservedRegSize - 1;
1301 if (totalGRFNum - reservedRegSize >= totalGRFNum - 16)
1302 {
1303 largestNoneReservedReg = totalGRFNum - 16 - 1;
1304 }
1305
1306
1307 if (totalGRFNum - reservedRegSize < rerservedRegNum)
1308 {
1309 MUST_BE_TRUE(false, "After reservation, there is not enough regiser!");
1310 }
1311
1312 for (unsigned int i = 0; i < rerservedRegNum; i++)
1313 {
1314 regNum.push_back(largestNoneReservedReg - i);
1315 }
1316 }
1317
getCallerSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1318 void getCallerSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1319 {
1320 unsigned int startCalleeSave = kernel->calleeSaveStart();
1321 unsigned int endCalleeSave = startCalleeSave + kernel->getNumCalleeSaveRegs();
1322 // r60-r124 are caller save regs for SKL
1323 for (unsigned int i = startCalleeSave; i < endCalleeSave; i++)
1324 {
1325 regNum.push_back(i);
1326 }
1327 }
1328
getCalleeSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1329 void getCalleeSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1330 {
1331 // r1-r59 are callee save regs for SKL
1332 unsigned int numCallerSaveGRFs = kernel->getCallerSaveLastGRF() + 1;
1333 for (unsigned int i = 1; i < numCallerSaveGRFs; i++)
1334 {
1335 regNum.push_back(i);
1336 }
1337 }
1338
1339 //
1340 // mark forbidden vectors
1341 //
allocForbidden(Mem_Manager & mem,bool reserveStackCallRegs,unsigned reserveSpillSize,unsigned rerservedRegNum)1342 void LiveRange::allocForbidden(Mem_Manager& mem, bool reserveStackCallRegs, unsigned reserveSpillSize, unsigned rerservedRegNum)
1343 {
1344 if (forbidden == NULL)
1345 {
1346 allocForbiddenVector(mem);
1347 }
1348
1349
1350 if (regKind == G4_GRF)
1351 {
1352 std::vector<unsigned int> forbiddenGRFs;
1353 unsigned int stackCallRegSize = reserveStackCallRegs ? gra.kernel.numReservedABIGRF() : 0;
1354 getForbiddenGRFs(forbiddenGRFs, gra.kernel, stackCallRegSize, reserveSpillSize, rerservedRegNum);
1355
1356 for (unsigned int i = 0; i < forbiddenGRFs.size(); i++)
1357 {
1358 unsigned int regNum = forbiddenGRFs[i];
1359 forbidden[regNum] = true;
1360 }
1361 }
1362 }
1363
1364 //
1365 // mark forbidden registers for caller-save pseudo var
1366 //
allocForbiddenCallerSave(Mem_Manager & mem,G4_Kernel * kernel)1367 void LiveRange::allocForbiddenCallerSave(Mem_Manager& mem, G4_Kernel* kernel)
1368 {
1369 if (forbidden == NULL)
1370 {
1371 allocForbiddenVector(mem);
1372 }
1373
1374 MUST_BE_TRUE(regKind == G4_GRF, ERROR_UNKNOWN);
1375
1376 std::vector<unsigned int> callerSaveRegs;
1377 getCallerSaveGRF(callerSaveRegs, kernel);
1378 for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
1379 {
1380 unsigned int callerSaveReg = callerSaveRegs[i];
1381 forbidden[callerSaveReg] = true;
1382 }
1383 }
1384
1385 //
1386 // mark forbidden registers for callee-save pseudo var
1387 //
allocForbiddenCalleeSave(Mem_Manager & mem,G4_Kernel * kernel)1388 void LiveRange::allocForbiddenCalleeSave(Mem_Manager& mem, G4_Kernel* kernel)
1389 {
1390 if (forbidden == NULL)
1391 {
1392 allocForbiddenVector(mem);
1393 }
1394
1395 MUST_BE_TRUE(regKind == G4_GRF, ERROR_UNKNOWN);
1396
1397 std::vector<unsigned int> calleeSaveRegs;
1398 getCalleeSaveGRF(calleeSaveRegs, kernel);
1399 for (unsigned int i = 0; i < calleeSaveRegs.size(); i++)
1400 {
1401 unsigned int calleeSaveReg = calleeSaveRegs[i];
1402 forbidden[calleeSaveReg] = true;
1403 }
1404 }
1405
1406 //
1407 // print assigned reg info
1408 //
dump() const1409 void LiveRange::dump() const
1410 {
1411 G4_Declare* decl = var->getDeclare();
1412 this->emit(std::cout);
1413 std::cout << " : ";
1414 //
1415 // print alignment
1416 //
1417 std::cout << "\t";
1418 if (gra.getSubRegAlign(decl) == Any)
1419 {
1420 std::cout << "\t";
1421 }
1422 else {
1423 std::cout << gra.getSubRegAlign(decl) << "_words SubReg_Align";
1424 }
1425 //
1426 // dump number of registers that are needed
1427 //
1428 if (decl->getRegFile() == G4_ADDRESS)
1429 {
1430 std::cout << " + " << (IS_DTYPE(decl->getElemType()) ? 2 * decl->getNumElems() : decl->getNumElems()) << " regs";
1431 }
1432 else
1433 {
1434 std::cout << "\t(" << decl->getNumRows() << "x" << decl->getNumElems() << "):"
1435 << TypeSymbol(decl->getElemType());
1436 }
1437 }
1438
PhyRegUsageParms(GlobalRA & g,LiveRange * l[],G4_RegFileKind r,unsigned int m,unsigned int & startARF,unsigned int & startFlag,unsigned int & startGRF,unsigned int & bank1_s,unsigned int & bank1_e,unsigned int & bank2_s,unsigned int & bank2_e,bool doBC,bool * avaGReg,uint32_t * avaSubReg,bool * avaAddrs,bool * avaFlags,uint8_t * weakEdges)1439 PhyRegUsageParms::PhyRegUsageParms(
1440 GlobalRA& g, LiveRange* l[], G4_RegFileKind r, unsigned int m,
1441 unsigned int& startARF, unsigned int& startFlag, unsigned int& startGRF,
1442 unsigned int& bank1_s, unsigned int& bank1_e, unsigned int& bank2_s, unsigned int& bank2_e,
1443 bool doBC, bool* avaGReg, uint32_t* avaSubReg,
1444 bool* avaAddrs, bool* avaFlags, uint8_t* weakEdges)
1445 : gra(g), startARFReg(startARF), startFlagReg(startFlag), startGRFReg(startGRF),
1446 bank1_start(bank1_s), bank1_end(bank1_e), bank2_start(bank2_s), bank2_end(bank2_e)
1447 {
1448 doBankConflict = doBC;
1449 availableGregs = avaGReg;
1450 availableSubRegs = avaSubReg;
1451 availableAddrs = avaAddrs;
1452 availableFlags = avaFlags;
1453 weakEdgeUsage = weakEdges;
1454 maxGRFCanBeUsed = m;
1455 rFile = r;
1456 totalGRF = gra.kernel.getNumRegTotal();
1457 lrs = l;
1458 }
1459