1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "DebugInfo.h"
10 #include "common.h"
11 #include "SpillManagerGMRF.h"
12 #include "LocalRA.h"
13 #include "LinearScanRA.h"
14 #include "RegAlloc.h"
15 #include "SCCAnalysis.h"
16
17 #include <fstream>
18 #include <tuple>
19
20 using namespace vISA;
21
22 extern void getForbiddenGRFs(std::vector<unsigned int>& regNum, G4_Kernel& kernel, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned reservedRegNum);
23
LinearScanRA(BankConflictPass & b,GlobalRA & g,LivenessAnalysis & liveAnalysis)24 LinearScanRA::LinearScanRA(BankConflictPass& b, GlobalRA& g, LivenessAnalysis& liveAnalysis) :
25 kernel(g.kernel), builder(g.builder), l(liveAnalysis), mem(g.builder.mem), bc(b), gra(g)
26 {
27 stackCallArgLR = nullptr;
28 stackCallRetLR = nullptr;
29 }
30
allocForbiddenVector(LSLiveRange * lr)31 void LinearScanRA::allocForbiddenVector(LSLiveRange* lr)
32 {
33 unsigned size = kernel.getNumRegTotal();
34 bool* forbidden = (bool*)mem.alloc(sizeof(bool) * size);
35 memset(forbidden, false, size);
36 lr->setForbidden(forbidden);
37 }
38
allocRetRegsVector(LSLiveRange * lr)39 void globalLinearScan::allocRetRegsVector(LSLiveRange* lr)
40 {
41 unsigned size = builder.kernel.getNumRegTotal();
42 bool* forbidden = (bool*)mem.alloc(sizeof(bool) * size);
43 memset(forbidden, false, size);
44 lr->setRegGRFs(forbidden);
45 }
46
GetOrCreateLocalLiveRange(G4_Declare * topdcl)47 LSLiveRange* LinearScanRA::GetOrCreateLocalLiveRange(G4_Declare* topdcl)
48 {
49 LSLiveRange* lr = gra.getLSLR(topdcl);
50
51 // Check topdcl of operand and setup a new live range if required
52 if (!lr)
53 {
54 lr = new (mem)LSLiveRange() ;
55 gra.setLSLR(topdcl, lr);
56 allocForbiddenVector(lr);
57 }
58
59 MUST_BE_TRUE(lr != NULL, "Local LR could not be created");
60 return lr;
61 }
62
CreateLocalLiveRange(G4_Declare * topdcl)63 LSLiveRange* LinearScanRA::CreateLocalLiveRange(G4_Declare* topdcl)
64 {
65 LSLiveRange* lr = new (mem)LSLiveRange();
66 gra.setLSLR(topdcl, lr);
67 allocForbiddenVector(lr);
68
69 MUST_BE_TRUE(lr != NULL, "Local LR could not be created");
70 return lr;
71 }
72
73 class isLifetimeOpCandidateForRemoval
74 {
75 public:
isLifetimeOpCandidateForRemoval(GlobalRA & g)76 isLifetimeOpCandidateForRemoval(GlobalRA& g) : gra(g)
77 {
78 }
79
80 GlobalRA& gra;
81
operator ()(G4_INST * inst)82 bool operator()(G4_INST* inst)
83 {
84 if (inst->isPseudoKill() ||
85 inst->isLifeTimeEnd())
86 {
87 G4_Declare* topdcl = nullptr;
88
89 if (inst->isPseudoKill())
90 {
91 topdcl = GetTopDclFromRegRegion(inst->getDst());
92 }
93 else
94 {
95 topdcl = GetTopDclFromRegRegion(inst->getSrc(0));
96 }
97
98 if (topdcl)
99 {
100 LSLiveRange* lr = gra.getLSLR(topdcl);
101 if (lr->getNumRefs() == 0 &&
102 (topdcl->getRegFile() == G4_GRF ||
103 topdcl->getRegFile() == G4_INPUT))
104 {
105 // Remove this lifetime op
106 return true;
107 }
108 }
109 }
110
111 return false;
112 }
113 };
114
removeUnrequiredLifetimeOps()115 void LinearScanRA::removeUnrequiredLifetimeOps()
116 {
117 // Iterate over all instructions and inspect only
118 // pseudo_kills/lifetime.end instructions. Remove
119 // instructions that have no other useful instruction.
120
121 for (BB_LIST_ITER bb_it = kernel.fg.begin();
122 bb_it != kernel.fg.end();
123 bb_it++)
124 {
125 G4_BB* bb = (*bb_it);
126 bb->erase(std::remove_if(bb->begin(), bb->end(), isLifetimeOpCandidateForRemoval(this->gra)),
127 bb->end());
128 }
129 }
130
setLexicalID()131 void LinearScanRA::setLexicalID()
132 {
133 unsigned int id = 1;
134 for (auto bb : kernel.fg)
135 {
136 for (auto curInst : *bb)
137 {
138 if (curInst->isPseudoKill() ||
139 curInst->isLifeTimeEnd())
140 {
141 curInst->setLexicalId(id);
142 }
143 else
144 {
145 curInst->setLexicalId(id++);
146 }
147 }
148 }
149 lastInstLexID = id;
150 }
151
hasDstSrcOverlapPotential(G4_DstRegRegion * dst,G4_SrcRegRegion * src)152 bool LinearScanRA::hasDstSrcOverlapPotential(G4_DstRegRegion* dst, G4_SrcRegRegion* src)
153 {
154 int dstOpndNumRows = 0;
155
156 if (dst->getBase()->isRegVar())
157 {
158 G4_Declare* dstDcl = dst->getBase()->asRegVar()->getDeclare();
159 if (dstDcl != nullptr)
160 {
161 int dstOffset = (dstDcl->getOffsetFromBase() + dst->getLeftBound()) / numEltPerGRF<Type_UB>();
162 G4_DstRegRegion* dstRgn = dst;
163 dstOpndNumRows = dstRgn->getSubRegOff() + dstRgn->getLinearizedEnd() - dstRgn->getLinearizedStart() + 1 > numEltPerGRF<Type_UB>();
164
165 if (src != NULL &&
166 src->isSrcRegRegion() &&
167 src->asSrcRegRegion()->getBase()->isRegVar())
168 {
169 G4_SrcRegRegion* srcRgn = src->asSrcRegRegion();
170 G4_Declare* srcDcl = src->getBase()->asRegVar()->getDeclare();
171 int srcOffset = (srcDcl->getOffsetFromBase() + src->getLeftBound()) / numEltPerGRF<Type_UB>();
172 bool srcOpndNumRows = srcRgn->getSubRegOff() + srcRgn->getLinearizedEnd() - srcRgn->getLinearizedStart() + 1 > numEltPerGRF<Type_UB>();
173
174 if (dstOpndNumRows || srcOpndNumRows)
175 {
176 if (!(gra.isEvenAligned(dstDcl) && gra.isEvenAligned(srcDcl) &&
177 srcOffset % 2 == dstOffset % 2 &&
178 dstOpndNumRows && srcOpndNumRows))
179 {
180 return true;
181 }
182 }
183 }
184 }
185 }
186
187 return false;
188 }
189
getRowInfo(int size,int & nrows,int & lastRowSize)190 void LinearScanRA::getRowInfo(int size, int& nrows, int& lastRowSize)
191 {
192 if (size <= (int)numEltPerGRF<Type_UW>())
193 {
194 nrows = 1;
195 }
196 else
197 {
198 // nrows is total number of rows, including last row even if it is partial
199 nrows = size / numEltPerGRF<Type_UW>();
200 // lastrowsize is number of words actually used in last row
201 lastRowSize = size % numEltPerGRF<Type_UW>();
202
203 if (size % numEltPerGRF<Type_UW>() != 0)
204 {
205 nrows++;
206 }
207
208 if (lastRowSize == 0)
209 {
210 lastRowSize = numEltPerGRF<Type_UW>();
211 }
212 }
213
214 return;
215 }
216
convertSubRegOffFromWords(G4_Declare * dcl,int subregnuminwords)217 unsigned int LinearScanRA::convertSubRegOffFromWords(G4_Declare* dcl, int subregnuminwords)
218 {
219 // Return subreg offset in units of dcl's element size.
220 // Input is subregnum in word units.
221 unsigned int subregnum;
222
223 subregnum = (subregnuminwords * 2) / dcl->getElemSize();
224
225 return subregnum;
226 }
227
recordRef(G4_BB * bb,bool fromEntry)228 void LSLiveRange::recordRef(G4_BB* bb, bool fromEntry)
229 {
230 if (numRefsInFG < 2)
231 {
232 if (fromEntry)
233 {
234 numRefsInFG += 2;
235 }
236 else if (bb != prevBBRef)
237 {
238 numRefsInFG++;
239 prevBBRef = bb;
240 }
241 }
242 if (!fromEntry)
243 {
244 numRefs++;
245 }
246 }
247
isGRFRegAssigned()248 bool LSLiveRange::isGRFRegAssigned()
249 {
250 MUST_BE_TRUE(topdcl != NULL, "Top dcl not set");
251 G4_RegVar* rvar = topdcl->getRegVar();
252 bool isPhyRegAssigned = false;
253
254 if (rvar)
255 {
256 if (rvar->isPhyRegAssigned())
257 isPhyRegAssigned = true;
258 }
259
260 return isPhyRegAssigned;
261 }
262
getSizeInWords()263 unsigned int LSLiveRange::getSizeInWords()
264 {
265 int nrows = getTopDcl()->getNumRows();
266 int elemsize = getTopDcl()->getElemSize();
267 int nelems = getTopDcl()->getNumElems();
268 int words = 0;
269
270 if (nrows > 1)
271 {
272 // If sizeInWords is set, use it otherwise consider entire row reserved
273 unsigned int sizeInWords = getTopDcl()->getWordSize();
274
275 if (sizeInWords > 0)
276 words = sizeInWords;
277 else
278 words = nrows * numEltPerGRF<Type_UW>();
279 }
280 else if (nrows == 1)
281 {
282 int nbytesperword = 2;
283 words = (nelems * elemsize + 1) / nbytesperword;
284 }
285
286 return words;
287 }
288
289 // Mark physical register allocated to range lr as not busy
freeAllocedRegs(LSLiveRange * lr,bool setInstID)290 void globalLinearScan::freeAllocedRegs(LSLiveRange* lr, bool setInstID)
291 {
292 int sregnum;
293
294 G4_VarBase* preg = lr->getPhyReg(sregnum);
295
296 MUST_BE_TRUE(preg != NULL,
297 "Physical register not assigned to live range. Cannot free regs.");
298
299 unsigned int idx = 0;
300 if (setInstID)
301 {
302 lr->getLastRef(idx);
303 }
304
305 if (!lr->isUseUnAvailableReg())
306 {
307 pregManager.freeRegs(preg->asGreg()->getRegNum(),
308 sregnum,
309 lr->getSizeInWords(),
310 idx);
311 }
312 }
313
printActives()314 void globalLinearScan::printActives()
315 {
316 std::cout << "====================ACTIVATE START===================" << std::endl;
317 for (auto lr : active)
318 {
319 unsigned int start, end;
320
321 lr->getFirstRef(start);
322 lr->getLastRef(end);
323
324 int startregnum, endregnum, startsregnum=0, endsregnum;
325 G4_VarBase* op;
326 op = lr->getPhyReg(startsregnum);
327
328 startregnum = endregnum = op->asGreg()->getRegNum();
329 endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
330
331 if (lr->getTopDcl()->getNumRows() > 1) {
332 endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
333
334 if (lr->getTopDcl()->getWordSize() > 0)
335 {
336 endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
337 if (endsregnum < 0) endsregnum = 15;
338 }
339 else
340 endsregnum = 15; // last word in GRF
341 }
342 if (lr->hasIndirectAccess())
343 {
344 std::cout << "INDIR: ";
345 }
346 else
347 {
348 std::cout << "DIR : ";
349 }
350 if (lr->getPreAssigned())
351 {
352 std::cout << "\tPRE: ";
353 }
354 else
355 {
356 std::cout << "\tNOT: ";
357 }
358
359 std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
360 std::cout << " (r" << startregnum << "." << startsregnum << ":w - " <<
361 "r" << endregnum << "." << endsregnum << ":w)";
362 std::cout << std::endl;
363 }
364 for (int i = 0; i < (int)(numRegLRA); i++)
365 {
366 std::cout << "\nR" << i << ":";
367
368 if (activeGRF[i].activeInput.size())
369 {
370 for (auto lr : activeGRF[i].activeLV)
371 {
372 int startregnum, endregnum, startsregnum=0, endsregnum;
373 G4_VarBase* op;
374 op = lr->getPhyReg(startsregnum);
375
376 startregnum = endregnum = op->asGreg()->getRegNum();
377 endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
378
379 if (lr->getTopDcl()->getNumRows() > 1) {
380 endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
381
382 if (lr->getTopDcl()->getWordSize() > 0)
383 {
384 endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
385 if (endsregnum < 0) endsregnum = 15;
386 }
387 else
388 endsregnum = 15; // last word in GRF
389 }
390
391 std::cout << "\tIN: " << lr->getTopDcl()->getName();
392 std::cout << "(r" << startregnum << "." << startsregnum << ":w - " <<
393 "r" << endregnum << "." << endsregnum << ":w)";
394 //std::cout << std::endl;
395 }
396 }
397
398 if (activeGRF[i].activeLV.size())
399 {
400 // There may be multiple variables take same register with different offsets
401 for (auto lr : activeGRF[i].activeLV)
402 {
403 int startsregnum=0;
404 G4_VarBase* op = lr->getPhyReg(startsregnum);
405
406 int startregnum = op->asGreg()->getRegNum();
407 int endregnum = startregnum;
408 int endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
409
410 if (lr->getTopDcl()->getNumRows() > 1) {
411 endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
412
413 if (lr->getTopDcl()->getWordSize() > 0)
414 {
415 endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
416 if (endsregnum < 0) endsregnum = 15;
417 }
418 else
419 endsregnum = 15; // last word in GRF
420 }
421
422 std::cout << "\t" << lr->getTopDcl()->getName();
423 std::cout << "(r" << startregnum << "." << startsregnum << ":w - " <<
424 "r" << endregnum << "." << endsregnum << ":w)";
425 }
426 }
427 }
428 std::cout << "====================ACTIVATE END===================" << std::endl;
429 }
430
expireAllActive()431 void globalLinearScan::expireAllActive()
432 {
433 if (active.size() > 0)
434 {
435 // Expire any remaining ranges
436 LSLiveRange* lastActive = active.back();
437 unsigned int endIdx;
438
439 lastActive->getLastRef(endIdx);
440
441 expireGlobalRanges(endIdx);
442 }
443 }
444
linearScanMarkReferencesInOpnd(G4_Operand * opnd,bool isEOT,bool isCall)445 void LinearScanRA::linearScanMarkReferencesInOpnd(G4_Operand* opnd, bool isEOT, bool isCall)
446 {
447 G4_Declare* topdcl = NULL;
448
449 if ((opnd->isSrcRegRegion() || opnd->isDstRegRegion()))
450 {
451 topdcl = GetTopDclFromRegRegion(opnd);
452
453 if (topdcl &&
454 (topdcl->getRegFile() == G4_GRF || topdcl->getRegFile() == G4_INPUT))
455 {
456 // Handle GRF here
457 MUST_BE_TRUE(topdcl->getAliasDeclare() == NULL, "Not topdcl");
458 LSLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
459
460 lr->recordRef(curBB_, false);
461 if (isEOT)
462 {
463 lr->markEOT();
464 }
465 if (topdcl->getRegVar()
466 && topdcl->getRegVar()->isPhyRegAssigned()
467 && topdcl->getRegVar()->getPhyReg()->isGreg())
468 {
469 lr->setPreAssigned(true);
470 }
471 if (isCall)
472 {
473 lr->setIsCall(true);
474 }
475 if (topdcl->getRegFile() == G4_INPUT)
476 {
477 BBVector[curBB_->getId()]->setRefInput(true);
478 }
479 }
480 }
481 else if (opnd->isAddrExp())
482 {
483 G4_AddrExp* addrExp = opnd->asAddrExp();
484
485 topdcl = addrExp->getRegVar()->getDeclare();
486 while (topdcl->getAliasDeclare() != NULL)
487 topdcl = topdcl->getAliasDeclare();
488
489 MUST_BE_TRUE(topdcl != NULL, "Top dcl was null for addr exp opnd");
490
491 LSLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
492
493 lr->recordRef(curBB_, false);
494 lr->markIndirectRef(true);
495 if (topdcl->getRegVar()
496 && topdcl->getRegVar()->isPhyRegAssigned()
497 && topdcl->getRegVar()->getPhyReg()->isGreg())
498 {
499 lr->setPreAssigned(true);
500 }
501 if (topdcl->getRegFile() == G4_INPUT)
502 {
503 BBVector[curBB_->getId()]->setRefInput(true);
504 }
505 }
506 }
507
linearScanMarkReferencesInInst(INST_LIST_ITER inst_it)508 void LinearScanRA::linearScanMarkReferencesInInst(INST_LIST_ITER inst_it)
509 {
510 auto inst = (*inst_it);
511
512 // Scan dst
513 G4_Operand* dst = inst->getDst();
514 if (dst)
515 {
516 linearScanMarkReferencesInOpnd(dst, false, inst->isCall());
517 }
518
519 // Scan srcs
520 for (int i = 0, nSrcs = inst->getNumSrc(); i < nSrcs; i++)
521 {
522 G4_Operand* src = inst->getSrc(i);
523
524 if (src)
525 {
526 linearScanMarkReferencesInOpnd(src, inst->isEOT(), inst->isCall());
527 }
528 }
529 }
530
linearScanMarkReferences(unsigned int & numRowsEOT)531 void LinearScanRA::linearScanMarkReferences(unsigned int& numRowsEOT)
532 {
533 // Iterate over all BBs
534 for (auto curBB : kernel.fg)
535 {
536 curBB_ = curBB;
537 // Iterate over all insts
538 for (INST_LIST_ITER inst_it = curBB->begin(), inst_end = curBB->end(); inst_it != inst_end; ++inst_it)
539 {
540 G4_INST* curInst = (*inst_it);
541
542 if (curInst->isPseudoKill() ||
543 curInst->isLifeTimeEnd())
544 {
545 if (curInst->isLifeTimeEnd())
546 {
547 linearScanMarkReferencesInInst(inst_it);
548 }
549 continue;
550 }
551
552 if (curInst->isEOT() && kernel.fg.builder->hasEOTGRFBinding())
553 {
554 numRowsEOT += curInst->getSrc(0)->getTopDcl()->getNumRows();
555
556 if (curInst->isSplitSend() && !curInst->getSrc(1)->isNullReg())
557 {
558 // both src0 and src1 have to be >=r112
559 numRowsEOT += curInst->getSrc(1)->getTopDcl()->getNumRows();
560 }
561 }
562
563 linearScanMarkReferencesInInst(inst_it);
564 }
565
566 if (BBVector[curBB->getId()]->hasBackEdgeIn()
567 || curBB->getId() == 0)
568 {
569 for (unsigned i = 0; i < kernel.Declares.size(); i++)
570 {
571 G4_Declare* dcl = kernel.Declares[i];
572 if (dcl->getAliasDeclare() != NULL)
573 {
574 continue;
575 }
576
577 if (dcl->getRegFile() != G4_GRF && dcl->getRegFile() != G4_INPUT)
578 {
579 continue;
580 }
581
582 LSLiveRange* lr = gra.getSafeLSLR(dcl);
583 if (lr == nullptr)
584 {
585 continue;
586 }
587
588 if (!l.isEmptyLiveness() &&
589 l.isLiveAtEntry(curBB, dcl->getRegVar()->getId()))
590 {
591 lr->recordRef(curBB, true);
592 }
593 }
594 }
595 }
596
597 getGlobalDeclares();
598 }
599
isLiveRangeGlobal() const600 bool LSLiveRange::isLiveRangeGlobal() const
601 {
602 if (numRefsInFG > 1 ||
603 topdcl->isOutput() == true ||
604 (topdcl->getRegVar()
605 && topdcl->getRegVar()->isPhyRegAssigned()
606 && topdcl->getRegVar()->getPhyReg()->isGreg()))
607 {
608 return true;
609 }
610
611 return false;
612 }
613
getGlobalDeclares()614 void LinearScanRA::getGlobalDeclares()
615 {
616 for (G4_Declare* dcl : kernel.Declares)
617 {
618 const LSLiveRange* lr = gra.getSafeLSLR(dcl);
619
620 if (lr && lr->isLiveRangeGlobal())
621 {
622 globalDeclares.push_back(dcl);
623 }
624 }
625
626 return;
627 }
628
markBackEdges()629 void LinearScanRA::markBackEdges()
630 {
631 unsigned numBBId = (unsigned)kernel.fg.size();
632 BBVector.resize(numBBId);
633
634 for (auto curBB : kernel.fg)
635 {
636 BBVector[curBB->getId()] = new (mem)G4_BB_LS(curBB);
637 }
638
639 for (auto curBB : kernel.fg)
640 {
641 for (auto succBB : curBB->Succs)
642 {
643 if (curBB->getId() >= succBB->getId())
644 {
645 BBVector[succBB->getId()]->setBackEdgeIn(true);
646 BBVector[curBB->getId()]->setBackEdgeOut(true);
647 }
648 }
649 }
650
651 if (!kernel.fg.isReducible())
652 {
653 //Find exit BB of SCC and map it to the head BB of SCC
654 SCCAnalysis SCCFinder(kernel.fg);
655 SCCFinder.run();
656 for (auto iter = SCCFinder.SCC_begin(), iterEnd = SCCFinder.SCC_end(); iter != iterEnd; ++iter)
657 {
658 auto&& anSCC = *iter;
659 std::unordered_set<G4_BB*> SCCSucc; // any successor BB of the SCC
660 G4_BB* headBB = anSCC.getEarliestBB();
661 for (auto BI = anSCC.body_begin(), BIEnd = anSCC.body_end(); BI != BIEnd; ++BI)
662 {
663 G4_BB* bb = *BI;
664 for (auto succ : bb->Succs)
665 {
666 if (!anSCC.isMember(succ))
667 {
668 SCCSucc.insert(succ);
669 }
670 }
671 }
672 for (auto exitBB : SCCSucc) //map the exitBB to the head of the SCC
673 {
674 loopHeadExitMap[headBB].push_back(exitBB);
675 }
676 }
677 }
678 else
679 {
680 //Find exit BB of loop and map it to the head BB of loop
681 for (auto&& iter : kernel.fg.naturalLoops)
682 {
683 auto&& backEdge = iter.first;
684 G4_BB* headBB = backEdge.second;
685 const std::set<G4_BB*>& loopBody = iter.second;
686
687 for (auto block : loopBody)
688 {
689 for (auto succBB : block->Succs)
690 {
691 if (loopBody.find(succBB) == loopBody.end())
692 {
693 G4_BB* exitBB = succBB;
694
695 unsigned latchBBId = (backEdge.first)->getId();
696 unsigned exitBBId = succBB->getId();
697 if (exitBBId < latchBBId &&
698 succBB->Succs.size() == 1)
699 {
700 exitBB = succBB->Succs.front();
701 }
702
703 loopHeadExitMap[headBB].push_back(exitBB); //map the exitBB to the head of the loop
704 }
705 }
706 }
707 }
708 }
709 }
710
createLiveIntervals()711 void LinearScanRA::createLiveIntervals()
712 {
713 for (auto dcl : gra.kernel.Declares)
714 {
715 // Mark those physical registers busy that are declared with Output attribute
716 // The live interval will gurantee they are not reused.
717 if (dcl->isOutput() &&
718 dcl->isInput())
719 {
720 pregs->markPhyRegs(dcl);
721 }
722
723 if (dcl->getAliasDeclare() != NULL)
724 {
725 continue;
726 }
727 LSLiveRange*lr = new (mem)LSLiveRange();
728 gra.setLSLR(dcl, lr);
729 allocForbiddenVector(lr);
730 }
731 }
732
preRAAnalysis()733 void LinearScanRA::preRAAnalysis()
734 {
735 int numGRF = kernel.getNumRegTotal();
736
737 // Clear LSLiveRange* computed preRA
738 gra.clearLocalLiveRanges();
739
740 createLiveIntervals();
741
742 markBackEdges();
743 // Mark references made to decls
744 linearScanMarkReferences(numRowsEOT);
745
746 // Check whether pseudo_kill/lifetime.end are only references
747 // for their respective variables. Remove them if so. Doing this
748 // helps reduce number of variables in symbol table increasing
749 // changes of skipping global RA.
750 removeUnrequiredLifetimeOps();
751
752 numRegLRA = numGRF;
753
754 unsigned int reservedGRFNum = builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum);
755 bool hasStackCall = kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc();
756 if (hasStackCall || reservedGRFNum || builder.getOption(vISA_Debug))
757 {
758 std::vector<unsigned int> forbiddenRegs;
759 unsigned int stackCallRegSize = hasStackCall ? gra.kernel.numReservedABIGRF() : 0;
760 getForbiddenGRFs(forbiddenRegs, kernel, stackCallRegSize, 0, reservedGRFNum);
761 for (unsigned int i = 0, size = forbiddenRegs.size(); i < size; i++)
762 {
763 unsigned int regNum = forbiddenRegs[i];
764 pregs->setGRFUnavailable(regNum); //un-available will always be there, if it's conflict with input or pre-assigned, it's still un-available.
765 }
766 }
767 else
768 {
769 pregs->setSimpleGRFAvailable(true);
770 const Options* opt = builder.getOptions();
771 if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) != VISA_3D ||
772 opt->getOption(vISA_enablePreemption) ||
773 opt->getOption(vISA_ReserveR0))
774 {
775 pregs->setR0Forbidden();
776 }
777
778 if (opt->getOption(vISA_enablePreemption))
779 {
780 pregs->setR1Forbidden();
781 }
782 }
783 return;
784 }
785
getCalleeSaveRegisters()786 void LinearScanRA::getCalleeSaveRegisters()
787 {
788 unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
789 unsigned int numCalleeSaveRegs = builder.kernel.getNumCalleeSaveRegs();
790
791 gra.calleeSaveRegs.resize(numCalleeSaveRegs, false);
792 gra.calleeSaveRegCount = 0;
793
794 G4_Declare* dcl = builder.kernel.fg.pseudoVCEDcl;
795 LSLiveRange* lr = gra.getLSLR(dcl);
796 const bool* forbidden = lr->getForbidden();
797 unsigned int startCalleeSave = builder.kernel.getCallerSaveLastGRF() + 1;
798 unsigned int endCalleeSave = startCalleeSave + builder.kernel.getNumCalleeSaveRegs() - 1;
799 for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
800 {
801 if (forbidden[i])
802 {
803 if (i >= startCalleeSave && i < endCalleeSave)
804 {
805 gra.calleeSaveRegs[i - callerSaveNumGRF] = true;
806 gra.calleeSaveRegCount++;
807 }
808 }
809 }
810 }
811
getCallerSaveRegisters()812 void LinearScanRA::getCallerSaveRegisters()
813 {
814 unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
815
816 for (BB_LIST_ITER it = builder.kernel.fg.begin(); it != builder.kernel.fg.end(); ++it)
817 {
818 if ((*it)->isEndWithFCall())
819 {
820 gra.callerSaveRegsMap[(*it)].resize(callerSaveNumGRF, false);
821 gra.retRegsMap[(*it)].resize(callerSaveNumGRF, false);
822 unsigned callerSaveRegCount = 0;
823 G4_INST* callInst = (*it)->back();
824 ASSERT_USER((*it)->Succs.size() == 1, "fcall basic block cannot have more than 1 successor");
825 G4_Declare* dcl = builder.kernel.fg.fcallToPseudoDclMap[callInst->asCFInst()].VCA->getRegVar()->getDeclare();
826 LSLiveRange* lr = gra.getLSLR(dcl);
827
828 const bool* forbidden = lr->getForbidden();
829 unsigned int startCalleeSave = 1;
830 unsigned int endCalleeSave = startCalleeSave + builder.kernel.getCallerSaveLastGRF();
831 for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
832 {
833 if (forbidden[i])
834 {
835 if (i >= startCalleeSave && i < endCalleeSave)
836 {
837 gra.callerSaveRegsMap[(*it)][i] = true;
838 callerSaveRegCount++;
839 }
840 }
841 }
842
843 //ret
844 const bool* rRegs = lr->getRetGRFs();
845 if (rRegs != nullptr)
846 {
847 for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
848 {
849 if (rRegs[i])
850 {
851 if (i >= startCalleeSave && i < endCalleeSave)
852 {
853 gra.retRegsMap[(*it)][i] = true;
854 }
855 }
856 }
857 }
858
859 gra.callerSaveRegCountMap[(*it)] = callerSaveRegCount;
860 }
861 }
862 }
863
getSaveRestoreRegister()864 void LinearScanRA::getSaveRestoreRegister()
865 {
866 if (!builder.getIsKernel())
867 {
868 getCalleeSaveRegisters();
869 }
870 getCallerSaveRegisters();
871 }
872
873 /*
874 * Calculate the last lexcial ID of executed instruction if the function is called
875 */
calculateFuncLastID()876 void LinearScanRA::calculateFuncLastID()
877 {
878 funcLastLexID.resize(kernel.fg.sortedFuncTable.size());
879 for (unsigned i = 0; i < kernel.fg.sortedFuncTable.size(); i++)
880 {
881 funcLastLexID[i] = 0;
882 }
883
884 for (auto func : kernel.fg.sortedFuncTable)
885 {
886 unsigned fid = func->getId();
887
888 if (fid == UINT_MAX)
889 {
890 // entry kernel
891 continue;
892 }
893
894 funcLastLexID[fid] = func->getExitBB()->back()->getLexicalId() * 2;
895 for (auto&& callee : func->getCallees())
896 {
897 if (funcLastLexID[fid] < funcLastLexID[callee->getId()])
898 {
899 funcLastLexID[fid] = funcLastLexID[callee->getId()];
900 }
901 }
902 }
903 }
904
linearScanRA()905 int LinearScanRA::linearScanRA()
906 {
907 if (kernel.fg.getIsStackCallFunc())
908 {
909 // Allocate space to store Frame Descriptor
910 nextSpillOffset += 32;
911 scratchOffset += 32;
912 }
913
914 std::list<LSLiveRange*> spillLRs;
915 int iterator = 0;
916 uint32_t GRFSpillFillCount = 0;
917 bool hasStackCall = kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc();
918 int globalScratchOffset = kernel.getInt32KernelAttr(Attributes::ATTR_SpillMemOffset);
919 bool useScratchMsgForSpill = !hasStackCall && (globalScratchOffset < (int)(SCRATCH_MSG_LIMIT * 0.6));
920 bool enableSpillSpaceCompression = builder.getOption(vISA_SpillSpaceCompression);
921 do {
922 spillLRs.clear();
923 funcCnt = 0;
924 std::vector<LSLiveRange*> eotLiveIntervals;
925 inputIntervals.clear();
926 setLexicalID();
927 calculateFuncLastID();
928
929 #ifdef DEBUG_VERBOSE_ON
930 COUT_ERROR << "============= ITERATION: " << iterator << "============" << std::endl;
931 #endif
932
933 //Input
934 PhyRegsLocalRA initPregs = (*pregs);
935 calculateInputIntervalsGlobal(initPregs);
936 #ifdef DEBUG_VERBOSE_ON
937 COUT_ERROR << "===== printInputLiveIntervalsGlobal============" << kernel.getName() << std::endl;
938 printInputLiveIntervalsGlobal();
939 #endif
940
941 liveThroughIntervals.clear();
942 globalLiveIntervals.clear();
943 preAssignedLiveIntervals.clear();
944 eotLiveIntervals.clear();
945 unsigned latestLexID = 0;
946
947 for (auto bb : kernel.fg)
948 {
949 calculateLiveIntervalsGlobal(bb, globalLiveIntervals, eotLiveIntervals);
950 latestLexID = bb->back()->getLexicalId() * 2;
951 }
952
953 if (liveThroughIntervals.size())
954 {
955 for (auto lr : liveThroughIntervals)
956 {
957 globalLiveIntervals.insert(globalLiveIntervals.begin(), lr);
958 }
959 }
960
961 #ifdef DEBUG_VERBOSE_ON
962 COUT_ERROR << "===== globalLiveIntervals============" << std::endl;
963 printLiveIntervals(globalLiveIntervals);
964 #endif
965
966 if (eotLiveIntervals.size())
967 {
968 assignEOTLiveRanges(builder, eotLiveIntervals);
969 for (auto lr : eotLiveIntervals)
970 {
971 preAssignedLiveIntervals.push_back(lr);
972 }
973 }
974 #ifdef DEBUG_VERBOSE_ON
975 COUT_ERROR << "===== preAssignedLiveIntervals============" << std::endl;
976 printLiveIntervals(preAssignedLiveIntervals);
977 #endif
978 PhyRegsManager pregManager(initPregs, doBCR);
979 globalLinearScan ra(gra, &l, globalLiveIntervals, &preAssignedLiveIntervals, inputIntervals, pregManager,
980 mem, numRegLRA, numRowsEOT, latestLexID,
981 doBCR, highInternalConflict);
982 if (!ra.runLinearScan(builder, globalLiveIntervals, spillLRs))
983 {
984 undoLinearScanRAAssignments();
985 return VISA_FAILURE;
986 }
987
988 if (spillLRs.size())
989 {
990 if (iterator == 0 &&
991 enableSpillSpaceCompression &&
992 kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D &&
993 !(kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc()))
994 {
995 unsigned int spillSize = 0;
996 for (auto lr : spillLRs)
997 {
998 spillSize += lr->getTopDcl()->getByteSize();
999 }
1000 if ((spillSize * 1.5) < (SCRATCH_MSG_LIMIT - nextSpillOffset))
1001 {
1002 enableSpillSpaceCompression = false;
1003 }
1004 }
1005
1006 SpillManagerGRF spillGRF(gra,
1007 nextSpillOffset,
1008 l.getNumSelectedVar(),
1009 &l,
1010 &spillLRs,
1011 enableSpillSpaceCompression,
1012 useScratchMsgForSpill,
1013 builder.avoidDstSrcOverlap());
1014
1015 spillGRF.spillLiveRanges(&kernel);
1016 nextSpillOffset = spillGRF.getNextOffset();;
1017 scratchOffset = std::max(scratchOffset, spillGRF.getNextScratchOffset());
1018 #ifdef DEBUG_VERBOSE_ON
1019 COUT_ERROR << "===== printSpillLiveIntervals============" << std::endl;
1020 printSpillLiveIntervals(spillLRs);
1021 #endif
1022 for (auto lr : spillLRs)
1023 {
1024 GRFSpillFillCount += lr->getNumRefs();
1025 }
1026
1027 // update jit metadata information for spill
1028 if (auto jitInfo = builder.getJitInfo())
1029 {
1030 jitInfo->isSpill = nextSpillOffset > 0;
1031 jitInfo->hasStackcalls = kernel.fg.getHasStackCalls();
1032
1033 if (builder.kernel.fg.frameSizeInOWord != 0) {
1034 // jitInfo->spillMemUsed is the entire visa stack size. Consider the caller/callee
1035 // save size if having caller/callee save
1036 // globalScratchOffset in unit of byte, others in Oword
1037 //
1038 // vISA stack
1039 // globalScratchOffset -> ---------------------
1040 // FIXME: should be 0-based | spill |
1041 // | |
1042 // calleeSaveAreaOffset -> ---------------------
1043 // | callee save |
1044 // callerSaveAreaOffset -> ---------------------
1045 // | caller save |
1046 // paramOverflowAreaOffset -> ---------------------
1047 jitInfo->spillMemUsed =
1048 builder.kernel.fg.frameSizeInOWord * 16;
1049
1050 // reserve spillMemUsed #bytes before 8kb boundary
1051 kernel.getGTPinData()->setScratchNextFree(8 * 1024 - kernel.getGTPinData()->getNumBytesScratchUse());
1052 }
1053 else {
1054 jitInfo->spillMemUsed = nextSpillOffset;
1055 kernel.getGTPinData()->setScratchNextFree(nextSpillOffset);
1056 }
1057 jitInfo->numGRFSpillFill = GRFSpillFillCount;
1058 }
1059 undoLinearScanRAAssignments();
1060 }
1061
1062 if (builder.getOption(vISA_RATrace))
1063 {
1064 std::cout << "\titeration: " << iterator << "\n";
1065 std::cout << "\t\tnextSpillOffset: " << nextSpillOffset << "\n";
1066 std::cout << "\t\tGRFSpillFillCount: " << GRFSpillFillCount << "\n";
1067 }
1068
1069 auto underSpillThreshold = [this](int numSpill, int asmCount)
1070 {
1071 int threshold = std::min(builder.getOptions()->getuInt32Option(vISA_AbortOnSpillThreshold), 200u);
1072 return (numSpill * 200) < (threshold * asmCount);
1073 };
1074
1075 int instNum = 0;
1076 for (auto bb : kernel.fg)
1077 {
1078 instNum += (int)bb->size();
1079 }
1080 if (GRFSpillFillCount && builder.getOption(vISA_AbortOnSpill) && !underSpillThreshold(GRFSpillFillCount, instNum))
1081 {
1082 // update jit metadata information
1083 if (auto jitInfo = builder.getJitInfo())
1084 {
1085 jitInfo->isSpill = true;
1086 jitInfo->spillMemUsed = 0;
1087 jitInfo->numAsmCount = instNum;
1088 jitInfo->numGRFSpillFill = GRFSpillFillCount;
1089 }
1090
1091 // Early exit when -abortonspill is passed, instead of
1092 // spending time inserting spill code and then aborting.
1093 return VISA_SPILL;
1094 }
1095
1096 iterator++;
1097 } while (spillLRs.size() && iterator < MAXIMAL_ITERATIONS);
1098
1099 if (spillLRs.size())
1100 {
1101 std::stringstream spilledVars;
1102 for (auto dcl : kernel.Declares)
1103 {
1104 if (dcl->isSpilled() && dcl->getRegFile() == G4_GRF)
1105 {
1106 spilledVars << dcl->getName() << "\t";
1107 }
1108 }
1109
1110 MUST_BE_TRUE(false,
1111 "ERROR: " << kernel.getNumRegTotal() - builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum)
1112 << " GRF registers are NOT enough to compile kernel " << kernel.getName() << "!"
1113 << " The maximum register pressure in the kernel is higher"
1114 << " than the available physical registers in hardware (even"
1115 << " with spill code)."
1116 << " Please consider rewriting the kernel."
1117 << " Compiling with the symbolic register option and inspecting the"
1118 << " spilled registers may help in determining the region of high pressure.\n"
1119 << "The spilling virtual registers are as follows: "
1120 << spilledVars.str());
1121
1122 spillLRs.clear();
1123 return VISA_FAILURE;
1124 }
1125
1126 if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
1127 {
1128 getSaveRestoreRegister();
1129 unsigned localSpillAreaOwordSize = ROUND(scratchOffset, 64) / 16;
1130 gra.addSaveRestoreCode(localSpillAreaOwordSize);
1131 }
1132 return VISA_SUCCESS;
1133 }
1134
doLinearScanRA()1135 int LinearScanRA::doLinearScanRA()
1136 {
1137 if (builder.getOption(vISA_RATrace))
1138 {
1139 std::cout << "--Global linear Scan RA--\n";
1140 }
1141 //Initial pregs which will be used in the preRAAnalysis
1142 PhyRegsLocalRA phyRegs(&builder, kernel.getNumRegTotal());
1143 pregs = &phyRegs;
1144 preRAAnalysis();
1145
1146 int success = linearScanRA();
1147
1148 if (success == VISA_SUCCESS)
1149 {
1150 kernel.setRAType(RA_Type::GLOBAL_LINEAR_SCAN_RA);
1151 }
1152
1153 return success;
1154 }
1155
undoLinearScanRAAssignments()1156 void LinearScanRA::undoLinearScanRAAssignments()
1157 {
1158 // Undo all assignments made by local RA
1159 for (auto dcl : kernel.Declares)
1160 {
1161 LSLiveRange* lr = gra.getLSLR(dcl);
1162 if (lr != NULL)
1163 {
1164 if (lr->getAssigned() == true)
1165 {
1166 // Undo the assignment
1167 lr->setAssigned(false);
1168 if (lr->getTopDcl()->getRegFile() != G4_INPUT &&
1169 !lr->getPreAssigned())
1170 {
1171 lr->getTopDcl()->getRegVar()->resetPhyReg();
1172 }
1173 lr->resetPhyReg();
1174 }
1175 lr->setActiveLR(false);
1176 lr->setFirstRef(NULL, 0);
1177 lr->setLastRef(NULL, 0);
1178 lr->clearForbiddenGRF(kernel.getNumRegTotal());
1179 lr->setPushed(false);
1180 if (kernel.getOption(vISA_GenerateDebugInfo))
1181 {
1182 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl(), false);
1183 if (lrInfo)
1184 lrInfo->clearLiveIntervals();
1185 }
1186 }
1187 }
1188 }
1189
setPreAssignedLR(LSLiveRange * lr,std::vector<LSLiveRange * > & preAssignedLiveIntervals)1190 void LinearScanRA::setPreAssignedLR(LSLiveRange* lr, std::vector<LSLiveRange*> & preAssignedLiveIntervals)
1191 {
1192 int subreg = 0;
1193 G4_VarBase* reg = lr->getPhyReg(subreg);
1194 unsigned regnum = lr->getTopDcl()->getRegVar()->getPhyReg()->asGreg()->getRegNum();
1195 if (reg == nullptr)
1196 {
1197 unsigned int subReg = lr->getTopDcl()->getRegVar()->getPhyRegOff();
1198 unsigned int subRegInWord = subReg * lr->getTopDcl()->getRegVar()->getDeclare()->getElemSize() / 2;
1199
1200 lr->setPhyReg(builder.phyregpool.getGreg(regnum), subRegInWord);
1201 }
1202 lr->setAssigned(true);
1203
1204 //Pre assigned registers may overlap the unavailable registers
1205 lr->setUseUnAvailableReg(isUseUnAvailableRegister(regnum, lr->getTopDcl()->getNumRows()));
1206
1207 //Insert into preAssgined live intervals
1208 //If the pre-assigned register is marked as unavailable, not join the live range
1209 //FIXME: What about partial overlap?
1210 if (std::find(preAssignedLiveIntervals.begin(), preAssignedLiveIntervals.end(), lr) == preAssignedLiveIntervals.end())
1211 {
1212 preAssignedLiveIntervals.push_back(lr);
1213 }
1214
1215 return;
1216 }
1217
setDstReferences(G4_BB * bb,INST_LIST_ITER inst_it,G4_Declare * dcl,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1218 void LinearScanRA::setDstReferences(G4_BB* bb, INST_LIST_ITER inst_it, G4_Declare *dcl, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1219 {
1220 G4_INST* curInst = (*inst_it);
1221 LSLiveRange* lr = gra.getLSLR(dcl);
1222
1223 if (!lr && dcl->getRegFile() == G4_GRF) //The new variables generated by spill/fill, mark reference should handle it
1224 {
1225 lr = CreateLocalLiveRange(dcl);
1226 }
1227
1228 if (dcl->isLiveIn() && dcl->isOutput() && !lr->isGRFRegAssigned())
1229 {
1230 if (!lr->isPushedToIntervalList())
1231 {
1232 lr->setFirstRef(curInst, 0);
1233 lr->setLastRef(curInst, lastInstLexID * 2 + 1);
1234 liveThroughIntervals.push_back(lr);
1235 lr->setPushed(true);
1236 }
1237 return;
1238 }
1239
1240 if (lr == nullptr ||
1241 (dcl->getRegFile() == G4_INPUT && dcl != kernel.fg.builder->getStackCallArg() && dcl != kernel.fg.builder->getStackCallRet())||
1242 (lr->isGRFRegAssigned() && (!dcl->getRegVar()->isGreg()))) //ARF
1243 {
1244 return;
1245 }
1246
1247 if (dcl == kernel.fg.builder->getStackCallArg())
1248 {
1249 if (stackCallArgLR == nullptr)
1250 {
1251 lr = new (mem)LSLiveRange();
1252 stackCallArgLR = lr;
1253 lr->setTopDcl(dcl);
1254 allocForbiddenVector(lr);
1255 }
1256 else
1257 {
1258 lr = stackCallArgLR;
1259 }
1260 }
1261 else if (dcl == kernel.fg.builder->getStackCallRet())
1262 {
1263 if (stackCallRetLR == nullptr)
1264 {
1265 lr = new (mem)LSLiveRange();
1266 stackCallRetLR = lr;
1267 lr->setTopDcl(dcl);
1268 allocForbiddenVector(lr);
1269 }
1270 else
1271 {
1272 lr = stackCallRetLR;
1273 }
1274 }
1275 // Check whether local LR is a candidate
1276 if (lr->isGRFRegAssigned() == false)
1277 {
1278 if (!lr->isPushedToIntervalList())
1279 {
1280 liveIntervals.push_back(lr);
1281 lr->setPushed(true);
1282 }
1283
1284 unsigned int startIdx;
1285 if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1286 {
1287 lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1288 }
1289 lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1290 }
1291 else if (dcl->getRegVar()->getPhyReg()->isGreg()) //Assigned already and is GRF
1292 { //Such as stack call varaibles
1293 unsigned int startIdx;
1294 if (!lr->isPushedToIntervalList())
1295 {
1296 if (!curInst->isFCall())
1297 {
1298 liveIntervals.push_back(lr);
1299 }
1300 lr->setPushed(true);
1301
1302 //Mark live range as assigned
1303 setPreAssignedLR(lr, preAssignedLiveIntervals);
1304 }
1305
1306 if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1307 {
1308 lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1309 }
1310 lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1311 }
1312
1313 if (lr->isEOT() && std::find(eotLiveIntervals.begin(), eotLiveIntervals.end(), lr) == eotLiveIntervals.end())
1314 {
1315 eotLiveIntervals.push_back(lr);
1316 }
1317
1318 return;
1319 }
1320
setSrcReferences(G4_BB * bb,INST_LIST_ITER inst_it,int srcIdx,G4_Declare * dcl,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1321 void LinearScanRA::setSrcReferences(G4_BB* bb, INST_LIST_ITER inst_it, int srcIdx, G4_Declare* dcl, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1322 {
1323 G4_INST* curInst = (*inst_it);
1324 LSLiveRange* lr = gra.getLSLR(dcl);
1325
1326 if (!lr && dcl->getRegFile() == G4_GRF)
1327 {
1328 lr = CreateLocalLiveRange(dcl);
1329 }
1330
1331 if (dcl->isLiveIn() && dcl->isOutput() && !lr->isGRFRegAssigned())
1332 {
1333 if (!lr->isPushedToIntervalList())
1334 {
1335 lr->setFirstRef(curInst, 0);
1336 lr->setLastRef(curInst, lastInstLexID * 2 + 1);
1337 liveThroughIntervals.push_back(lr);
1338 lr->setPushed(true);
1339 }
1340 return;
1341 }
1342
1343 if (lr == nullptr ||
1344 (dcl->getRegFile() == G4_INPUT && dcl != kernel.fg.builder->getStackCallRet() && dcl != kernel.fg.builder->getStackCallArg()) ||
1345 (lr->isGRFRegAssigned() && (!dcl->getRegVar()->isGreg()))) //ARF
1346 {
1347 return;
1348 }
1349
1350 if (!lr->isPushedToIntervalList())
1351 {
1352 liveIntervals.push_back(lr);
1353 lr->setPushed(true);
1354 gra.addUndefinedDcl(dcl);
1355
1356 unsigned int startIdx;
1357 if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1358 { //Since we scan from front to end, not referenced before means not defined.
1359
1360 if (lr->isGRFRegAssigned() && dcl->getRegVar()->getPhyReg()->isGreg())
1361 {
1362 lr->setFirstRef(nullptr, 1);
1363 setPreAssignedLR(lr, preAssignedLiveIntervals);
1364 }
1365 else //Not pre-asssigned, temp
1366 {
1367 lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1368 }
1369 }
1370 }
1371
1372 lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1373
1374 if ((builder.WaDisableSendSrcDstOverlap() &&
1375 ((curInst->isSend() && srcIdx == 0) ||
1376 (curInst->isSplitSend() && srcIdx == 1)))
1377 || (curInst->isDpas() && srcIdx == 1) //For DPAS, as part of same instruction, src1 should not have overlap with dst.
1378 || (builder.avoidDstSrcOverlap() && curInst->getDst() != NULL && hasDstSrcOverlapPotential(curInst->getDst(), curInst->getSrc(srcIdx)->asSrcRegRegion()))
1379 )
1380 {
1381 lr->setLastRef(curInst, curInst->getLexicalId() * 2 + 1);
1382 }
1383
1384 if (lr->isEOT() && std::find(eotLiveIntervals.begin(), eotLiveIntervals.end(), lr) == eotLiveIntervals.end())
1385 {
1386 eotLiveIntervals.push_back(lr);
1387 }
1388
1389 return;
1390 }
1391
generateInputIntervals(G4_Declare * topdcl,G4_INST * inst,std::vector<uint32_t> & inputRegLastRef,PhyRegsLocalRA & initPregs,bool avoidSameInstOverlap)1392 void LinearScanRA::generateInputIntervals(G4_Declare *topdcl, G4_INST* inst, std::vector<uint32_t> &inputRegLastRef, PhyRegsLocalRA& initPregs, bool avoidSameInstOverlap)
1393 {
1394 unsigned int instID = inst->getLexicalId();
1395 G4_RegVar* var = topdcl->getRegVar();
1396 unsigned int regNum = var->getPhyReg()->asGreg()->getRegNum();
1397 unsigned int regOff = var->getPhyRegOff();
1398 unsigned int idx = regNum * numEltPerGRF<Type_UW>() +
1399 (regOff * topdcl->getElemSize()) / G4_WSIZE + topdcl->getWordSize() - 1;
1400
1401 unsigned int numWords = topdcl->getWordSize();
1402 for (int i = numWords - 1; i >= 0; --i, --idx)
1403 {
1404 if ((inputRegLastRef[idx] == UINT_MAX || inputRegLastRef[idx] < instID) &&
1405 initPregs.isGRFAvailable(idx / numEltPerGRF<Type_UW>()))
1406 {
1407 inputRegLastRef[idx] = instID;
1408 if (avoidSameInstOverlap)
1409 {
1410 inputIntervals.push_front(new (mem)LSInputLiveRange(idx, instID * 2 + 1));
1411 }
1412 else
1413 {
1414 inputIntervals.push_front(new (mem)LSInputLiveRange(idx, instID * 2));
1415 }
1416
1417 if (kernel.getOptions()->getOption(vISA_GenerateDebugInfo))
1418 {
1419 updateDebugInfo(kernel, topdcl, 0, inst->getCISAOff());
1420 }
1421 }
1422 }
1423
1424 initPregs.markPhyRegs(topdcl);
1425
1426 return;
1427 }
1428
1429 // Generate the input intervals for current BB.
1430 // The input live ranges either live through current BB or killed by current BB.
1431 // So, it's enough we check the live out of the BB and the BB it's self
calculateInputIntervalsGlobal(PhyRegsLocalRA & initPregs)1432 void LinearScanRA::calculateInputIntervalsGlobal(PhyRegsLocalRA &initPregs)
1433 {
1434 int numGRF = kernel.getNumRegTotal();
1435 std::vector<uint32_t> inputRegLastRef(numGRF * numEltPerGRF<Type_UW>(), UINT_MAX);
1436 G4_INST* lastInst = nullptr;
1437
1438 for (BB_LIST_RITER bb_it = kernel.fg.rbegin(), bb_rend = kernel.fg.rend();
1439 bb_it != bb_rend;
1440 bb_it++)
1441 {
1442 G4_BB* bb = (*bb_it);
1443
1444 //@ the end of BB
1445 if (BBVector[bb->getId()]->hasBackEdgeOut())
1446 {
1447 for (auto dcl : globalDeclares)
1448 {
1449 if (dcl->getAliasDeclare() != NULL ||
1450 dcl->isSpilled())
1451 continue;
1452
1453 if (dcl->getRegFile() == G4_INPUT &&
1454 dcl->getRegVar()->isGreg() && //Filter out the architecture registers
1455 dcl->isOutput() == false && //Input and out should be marked as unavailable
1456 !builder.isPreDefArg(dcl) && //Not stack call associated variables
1457 l.isLiveAtExit(bb, dcl->getRegVar()->getId()))
1458 {
1459 MUST_BE_TRUE(dcl->getRegVar()->isPhyRegAssigned(), "Input variable has no pre-assigned physical register");
1460 generateInputIntervals(dcl, bb->getInstList().back(), inputRegLastRef, initPregs, false);
1461 }
1462 }
1463 }
1464
1465 //@BB
1466 for (INST_LIST_RITER inst_it = bb->rbegin(), inst_rend = bb->rend();
1467 inst_it != inst_rend;
1468 inst_it++)
1469 {
1470 G4_INST* curInst = (*inst_it);
1471 G4_Declare* topdcl = NULL;
1472
1473 if (lastInst == nullptr)
1474 {
1475 lastInst = curInst;
1476 }
1477 // scan dst operand (may be unnecessary but added for safety)
1478 if (curInst->getDst() != NULL)
1479 {
1480 // Scan dst
1481 G4_DstRegRegion* dst = curInst->getDst();
1482 topdcl = GetTopDclFromRegRegion(dst);
1483
1484 if (topdcl &&
1485 topdcl->getRegFile() == G4_INPUT &&
1486 topdcl->getRegVar()->isGreg() &&
1487 topdcl->isOutput() == false &&
1488 !builder.isPreDefArg(topdcl))
1489 {
1490 generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1491 }
1492 }
1493
1494 // Scan src operands
1495 for (int i = 0, nSrcs = curInst->getNumSrc(); i < nSrcs; i++)
1496 {
1497 G4_Operand* src = curInst->getSrc(i);
1498
1499 if (src == nullptr || src->isNullReg())
1500 {
1501 continue;
1502 }
1503
1504 if (src->getTopDcl())
1505 {
1506 topdcl = GetTopDclFromRegRegion(src);
1507
1508 if (topdcl && topdcl->getRegFile() == G4_INPUT &&
1509 (topdcl->getRegVar()->isGreg()) &&
1510 topdcl->isOutput() == false &&
1511 !builder.isPreDefArg(topdcl))
1512 {
1513 // Check whether it is input
1514 if (builder.avoidDstSrcOverlap() &&
1515 curInst->getDst() != NULL &&
1516 hasDstSrcOverlapPotential(curInst->getDst(), src->asSrcRegRegion()))
1517 {
1518 generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, true);
1519 }
1520 else
1521 {
1522 if (l.isLiveAtEntry(bb, topdcl->getRegVar()->getId()))
1523 {
1524 generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1525 }
1526 else //Not capture by liveness analysis
1527 {
1528 generateInputIntervals(topdcl, lastInst, inputRegLastRef, initPregs, false);
1529 }
1530 }
1531 }
1532 }
1533 else if (src->isAddrExp())
1534 {
1535 G4_AddrExp* addrExp = src->asAddrExp();
1536
1537 topdcl = addrExp->getRegVar()->getDeclare();
1538 while (topdcl->getAliasDeclare() != NULL)
1539 topdcl = topdcl->getAliasDeclare();
1540
1541 MUST_BE_TRUE(topdcl != NULL, "Top dcl was null for addr exp opnd");
1542
1543 if (topdcl->getRegFile() == G4_INPUT &&
1544 topdcl->getRegVar()->isGreg() &&
1545 topdcl->isOutput() == false &&
1546 !builder.isPreDefArg(topdcl))
1547 {
1548 generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1549 }
1550 }
1551 }
1552 }
1553 }
1554
1555 return;
1556 }
1557
1558 //
1559 //@ the entry of BB
1560 //
calculateLiveInIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals)1561 void LinearScanRA::calculateLiveInIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals)
1562 {
1563 //FIXME: The complexity is "block_num * declare_num"
1564 std::vector<LSLiveRange*> preAssignedLiveIntervals;
1565
1566 for (auto dcl: globalDeclares)
1567 {
1568 if (dcl->getAliasDeclare() != NULL ||
1569 dcl->getRegFile() == G4_INPUT ||
1570 dcl->isSpilled())
1571 {
1572 continue;
1573 }
1574 LSLiveRange* lr = gra.getLSLR(dcl);
1575 if (lr && !l.isEmptyLiveness() &&
1576 l.isLiveAtEntry(bb, dcl->getRegVar()->getId())) //Live in current BB
1577 {
1578 if (!lr->isPushedToIntervalList())
1579 {
1580 if (lr->isGRFRegAssigned() && dcl->getRegVar()->isGreg())
1581 {
1582 setPreAssignedLR(lr, preAssignedLiveIntervals);
1583 }
1584 else
1585 {
1586 liveIntervals.push_back(lr);
1587 }
1588 lr->setPushed(true);
1589 }
1590
1591 unsigned curIdx = 0;
1592 if (lr->getFirstRef(curIdx) == NULL && curIdx == 0) //not referenced before, assigned or not assigned?
1593 {
1594 lr->setFirstRef((*bb->begin()), (*bb->begin())->getLexicalId() * 2);
1595 }
1596 }
1597 else //Live in the exist BB of a loop
1598 {
1599 //Extend the live ranges of live in variable of exit BB of the loop to the head of loop.
1600 std::map<G4_BB*, std::vector<G4_BB*>>::iterator loopHEIt = loopHeadExitMap.find(bb);
1601 if (loopHEIt != loopHeadExitMap.end()) //If current BB is the head of a loop
1602 {
1603 for (auto exitBB : loopHeadExitMap[bb]) //for all exit BBs.
1604 {
1605 if (lr && !l.isEmptyLiveness() &&
1606 l.isLiveAtEntry(exitBB, dcl->getRegVar()->getId()))
1607 {
1608 if (!lr->isPushedToIntervalList()) //Is the interval pushed or not
1609 {
1610 if (lr->isGRFRegAssigned() && dcl->getRegVar()->isGreg())
1611 {
1612 setPreAssignedLR(lr, preAssignedLiveIntervals);
1613 }
1614 else
1615 {
1616 liveIntervals.push_back(lr);
1617 }
1618 lr->setPushed(true);
1619 }
1620
1621 unsigned curIdx = 0;
1622 if (lr->getFirstRef(curIdx) == NULL && curIdx == 0) //not referenced before
1623 {
1624 lr->setFirstRef((*bb->begin()), (*bb->begin())->getLexicalId() * 2);
1625 }
1626 }
1627 }
1628 }
1629 }
1630 }
1631
1632 if (preAssignedLiveIntervals.size()&& bb->getId() == 0) //Should happen in the entry BB
1633 {
1634 liveIntervals.insert(liveIntervals.begin(), preAssignedLiveIntervals.begin(), preAssignedLiveIntervals.end());
1635 }
1636
1637 return;
1638 }
1639
calculateCurrentBBLiveIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1640 void LinearScanRA::calculateCurrentBBLiveIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1641 {
1642 for (INST_LIST_ITER inst_it = bb->begin(), bbend = bb->end();
1643 inst_it != bbend;
1644 inst_it++)
1645 {
1646 G4_INST* curInst = (*inst_it);
1647 G4_Declare* topdcl = NULL;
1648
1649 if (curInst->isPseudoKill() ||
1650 curInst->isLifeTimeEnd() ||
1651 curInst->isLabel())
1652 {
1653 continue;
1654 }
1655
1656 if (curInst->isCall() == true)
1657 {
1658 const char* name = kernel.fg.builder->getNameString(kernel.fg.builder->mem, 32, "SCALL_%d", funcCnt++);
1659 G4_Declare* scallDcl = kernel.fg.builder->createDeclareNoLookup(name, G4_GRF, 1, 1, Type_UD);
1660 LSLiveRange* lr = CreateLocalLiveRange(scallDcl);
1661
1662 liveIntervals.push_back(lr);
1663 lr->setPushed(true);
1664 lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1665
1666 FuncInfo* callee = bb->getCalleeInfo();
1667 unsigned int funcId = callee->getId();
1668 lr->setLastRef(curInst, funcLastLexID[funcId]);
1669 lr->setIsCallSite(true);
1670 }
1671
1672 if (curInst->isFCall())
1673 {
1674 G4_FCALL* fcall = kernel.fg.builder->getFcallInfo(curInst);
1675 G4_Declare* arg = kernel.fg.builder->getStackCallArg();
1676 G4_Declare* ret = kernel.fg.builder->getStackCallRet();
1677 MUST_BE_TRUE(fcall != NULL, "fcall info not found");
1678
1679 uint16_t retSize = fcall->getRetSize();
1680 uint16_t argSize = fcall->getArgSize();
1681 if (ret && retSize > 0 && ret->getRegVar())
1682 {
1683 LSLiveRange* stackCallRetLR = new (mem)LSLiveRange();
1684 stackCallRetLR->setTopDcl(ret);
1685 allocForbiddenVector(stackCallRetLR);
1686 stackCallRetLR->setFirstRef(curInst, curInst->getLexicalId() * 2);
1687 liveIntervals.push_back(stackCallRetLR);
1688 stackCallRetLR->setPushed(true);
1689 }
1690 if (arg && argSize > 0 && arg->getRegVar())
1691 {
1692 assert(stackCallArgLR);
1693 stackCallArgLR->setLastRef(curInst, curInst->getLexicalId() * 2 - 1); //Minus one so that arguments will not be spilled
1694 stackCallArgLR = nullptr;
1695 }
1696 }
1697
1698 if (curInst->isFReturn())
1699 {
1700 uint16_t retSize = kernel.fg.builder->getRetVarSize();
1701 if (retSize && stackCallRetLR)
1702 {
1703 stackCallRetLR->setLastRef(curInst, curInst->getLexicalId() * 2);
1704 stackCallRetLR = nullptr;
1705 }
1706 }
1707
1708 // Scan srcs
1709 for (int i = 0, nSrcs = curInst->getNumSrc(); i < nSrcs; i++)
1710 {
1711 G4_Operand* src = curInst->getSrc(i);
1712
1713 if (src == nullptr || src->isNullReg())
1714 {
1715 continue;
1716 }
1717
1718 if (src && src->isSrcRegRegion())
1719 {
1720 if (src->asSrcRegRegion()->isIndirect())
1721 {
1722 auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(src->getBase()->asRegVar());
1723 for (auto pt : *pointsToSet)
1724 {
1725 G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
1726
1727 setSrcReferences(bb, inst_it, i, dcl, liveIntervals, eotLiveIntervals);
1728 }
1729 }
1730 else
1731 {
1732 // Scan all srcs
1733 topdcl = GetTopDclFromRegRegion(src);
1734 if (topdcl)
1735 {
1736 setSrcReferences(bb, inst_it, i, topdcl, liveIntervals, eotLiveIntervals);
1737 }
1738 }
1739 }
1740 }
1741
1742 // Scan dst
1743 G4_DstRegRegion* dst = curInst->getDst();
1744 if (dst)
1745 {
1746 if (dst->isIndirect())
1747 {
1748 auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(dst->getBase()->asRegVar());
1749 for (auto pt : *pointsToSet)
1750 {
1751 G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
1752
1753 setDstReferences(bb, inst_it, dcl, liveIntervals, eotLiveIntervals);
1754 }
1755 }
1756 else
1757 {
1758 topdcl = GetTopDclFromRegRegion(dst);
1759 if (topdcl)
1760 {
1761 setDstReferences(bb, inst_it, topdcl, liveIntervals, eotLiveIntervals);
1762 }
1763 }
1764 }
1765 }
1766
1767 return;
1768 }
1769
calculateLiveOutIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals)1770 void LinearScanRA::calculateLiveOutIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals)
1771 {
1772 for (auto dcl : globalDeclares)
1773 {
1774 if (dcl->getAliasDeclare() != NULL ||
1775 dcl->getRegFile() == G4_INPUT ||
1776 dcl->isSpilled())
1777 continue;
1778
1779 LSLiveRange* lr = gra.getLSLR(dcl);
1780 if (lr &&
1781 l.isLiveAtExit(bb, dcl->getRegVar()->getId()))
1782 {
1783 lr->setLastRef(bb->getInstList().back(), bb->getInstList().back()->getLexicalId() * 2 + 1);
1784 }
1785 }
1786
1787 return;
1788 }
1789
1790 //
1791 // Live intervals:
1792 // 1. not input variables
1793 // 2. variables without assigned value: normal Intervals.
1794 // 3. variables without assigned value, without define: wired, added by front end. Such as cmp f1.0, v11, v11. @BB only
1795 // 4. variables which are pre-defined with registers: such as stack call pre-defined varaibles. @BB only
1796 // 5. variables which are pre-defined but will not be assigned with registers: such as %null. exclusive
1797 // 6. variables which are assigned in previuos region (BB, BBs or function, ..). //@entry BB
1798 // 7. live in of region: pre-assigned, or not.
1799 // 8. live out of region: set the last reference.
1800 //
calculateLiveIntervalsGlobal(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1801 void LinearScanRA::calculateLiveIntervalsGlobal(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1802 {
1803 //@ the entry of BB
1804 if (bb->getId() == 0 ||
1805 BBVector[bb->getId()]->hasBackEdgeIn())
1806 {
1807 calculateLiveInIntervals(bb, liveIntervals);
1808 }
1809
1810 //@ BB
1811 calculateCurrentBBLiveIntervals(bb, liveIntervals, eotLiveIntervals);
1812
1813 //@ the exit of BB
1814 if (BBVector[bb->getId()]->hasBackEdgeOut())
1815 {
1816 calculateLiveOutIntervals(bb, liveIntervals);
1817 }
1818
1819 return;
1820 }
1821
printLiveIntervals(std::vector<LSLiveRange * > & liveIntervals)1822 void LinearScanRA::printLiveIntervals(std::vector<LSLiveRange*>& liveIntervals)
1823 {
1824 for (auto lr : liveIntervals)
1825 {
1826 unsigned int start, end;
1827
1828 lr->getFirstRef(start);
1829 lr->getLastRef(end);
1830
1831 std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
1832 std::cout << std::endl;
1833 }
1834
1835 std::cout << std::endl;
1836 }
1837
printSpillLiveIntervals(std::list<LSLiveRange * > & liveIntervals)1838 void LinearScanRA::printSpillLiveIntervals(std::list<LSLiveRange*>& liveIntervals)
1839 {
1840 for (auto lr : liveIntervals)
1841 {
1842 unsigned int start, end;
1843
1844 lr->getFirstRef(start);
1845 lr->getLastRef(end);
1846
1847 std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
1848 std::cout << std::endl;
1849 }
1850
1851 std::cout << std::endl;
1852 }
1853
printInputLiveIntervalsGlobal()1854 void LinearScanRA::printInputLiveIntervalsGlobal()
1855 {
1856 COUT_ERROR << std::endl << "Input Live intervals " << std::endl;
1857
1858 for (std::list<LSInputLiveRange*>::iterator it = inputIntervals.begin();
1859 it != inputIntervals.end();
1860 it++)
1861 {
1862 unsigned int regWordIdx, lrEndIdx, regNum, subRegInWord;
1863
1864 LSInputLiveRange* lr = (*it);
1865
1866 regWordIdx = lr->getRegWordIdx();
1867 regNum = regWordIdx / numEltPerGRF<Type_UW>();
1868 subRegInWord = regWordIdx % numEltPerGRF<Type_UW>();
1869 lrEndIdx = lr->getLrEndIdx();
1870
1871 COUT_ERROR << "r" << regNum << "." << subRegInWord << " " << lrEndIdx;
1872 COUT_ERROR << std::endl;
1873 }
1874
1875 COUT_ERROR << std::endl;
1876 }
1877
printLiveInterval(LSLiveRange * lr,bool assign)1878 static inline void printLiveInterval(LSLiveRange* lr, bool assign)
1879 {
1880 int startregnum, endregnum, startsregnum=0, endsregnum;
1881 G4_VarBase* op;
1882 op = lr->getPhyReg(startsregnum);
1883
1884 startregnum = endregnum = op->asGreg()->getRegNum();
1885 endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
1886
1887 if (lr->getTopDcl()->getNumRows() > 1) {
1888 endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1889
1890 if (lr->getTopDcl()->getWordSize() > 0)
1891 {
1892 endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
1893 if (endsregnum < 0) endsregnum = 15;
1894 }
1895 else
1896 endsregnum = 15; // last word in GRF
1897 }
1898 if (assign)
1899 {
1900 COUT_ERROR << "Assigned physical register to ";
1901 }
1902 else
1903 {
1904 COUT_ERROR << "Free physical register of ";
1905 }
1906 COUT_ERROR << lr->getTopDcl()->getName() <<
1907 " (r" << startregnum << "." << startsregnum << ":w - " <<
1908 "r" << endregnum << "." << endsregnum << ":w)" << std::endl;
1909
1910 return;
1911 }
1912
globalLinearScan(GlobalRA & g,LivenessAnalysis * l,std::vector<LSLiveRange * > & lv,std::vector<LSLiveRange * > * assignedLiveIntervals,std::list<LSInputLiveRange *,std_arena_based_allocator<LSInputLiveRange * >> & inputLivelIntervals,PhyRegsManager & pregMgr,Mem_Manager & memmgr,unsigned int numReg,unsigned int numEOT,unsigned int lastLexID,bool bankConflict,bool internalConflict)1913 globalLinearScan::globalLinearScan(GlobalRA& g, LivenessAnalysis* l, std::vector<LSLiveRange*>& lv, std::vector<LSLiveRange*>* assignedLiveIntervals,
1914 std::list<LSInputLiveRange*, std_arena_based_allocator<LSInputLiveRange*>>& inputLivelIntervals,
1915 PhyRegsManager& pregMgr, Mem_Manager& memmgr,
1916 unsigned int numReg, unsigned int numEOT, unsigned int lastLexID, bool bankConflict,
1917 bool internalConflict)
1918 : gra(g)
1919 , builder(g.builder)
1920 , mem(memmgr)
1921 , pregManager(pregMgr)
1922 , liveIntervals(lv)
1923 , preAssignedIntervals(assignedLiveIntervals)
1924 , inputIntervals(inputLivelIntervals)
1925 , numRowsEOT(numEOT)
1926 , lastLexicalID(lastLexID)
1927 , numRegLRA(numReg)
1928 , doBankConflict(bankConflict)
1929 , highInternalConflict(internalConflict)
1930 {
1931 startGRFReg = 0;
1932 activeGRF.resize(g.kernel.getNumRegTotal());
1933 for (auto lr : inputLivelIntervals)
1934 {
1935 unsigned int regnum = lr->getRegWordIdx() / numEltPerGRF<Type_UW>();
1936 activeGRF[regnum].activeInput.push_back(lr);
1937 }
1938 }
1939
getCalleeSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1940 void globalLinearScan::getCalleeSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1941 {
1942 unsigned int startCallerSave = kernel->calleeSaveStart();
1943 unsigned int endCallerSave = startCallerSave + kernel->getNumCalleeSaveRegs();
1944
1945 for (auto active_it = active.begin();
1946 active_it != active.end();
1947 active_it++)
1948 {
1949 LSLiveRange* lr = (*active_it);
1950
1951 G4_VarBase* op;
1952 int startsregnum = 0;
1953 op = lr->getPhyReg(startsregnum);
1954 unsigned startregnum = op->asGreg()->getRegNum();
1955 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1956
1957 for (unsigned i = startregnum; i <= endregnum; i++)
1958 {
1959 if (i >= startCallerSave && i <= endCallerSave)
1960 {
1961 regNum.push_back(i);
1962 }
1963 }
1964 }
1965
1966 return;
1967 }
1968
getCallerSaveGRF(std::vector<unsigned int> & regNum,std::vector<unsigned int> & retRegNum,G4_Kernel * kernel)1969 void globalLinearScan::getCallerSaveGRF(
1970 std::vector<unsigned int>& regNum,
1971 std::vector<unsigned int>& retRegNum,
1972 G4_Kernel* kernel)
1973 {
1974 unsigned int startCalleeSave = 1;
1975 unsigned int endCalleeSave = startCalleeSave + kernel->getCallerSaveLastGRF();
1976
1977 for (auto active_it = active.begin();
1978 active_it != active.end();
1979 active_it++)
1980 {
1981 LSLiveRange* lr = (*active_it);
1982 G4_Declare* dcl = lr->getTopDcl();
1983
1984 if (!builder.kernel.fg.isPseudoVCEDcl(dcl) &&
1985 !builder.isPreDefArg(dcl))
1986 {
1987 G4_VarBase* op;
1988 int startsregnum = 0;
1989 op = lr->getPhyReg(startsregnum);
1990 unsigned startregnum = op->asGreg()->getRegNum();
1991 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1992
1993 for (unsigned i = startregnum; i <= endregnum; i++)
1994 {
1995 if (i >= startCalleeSave && i < endCalleeSave)
1996 {
1997 if (builder.isPreDefRet(dcl))
1998 {
1999 retRegNum.push_back(i);
2000 }
2001 else
2002 {
2003 regNum.push_back(i);
2004 }
2005 }
2006 }
2007 }
2008 }
2009
2010 for (auto inputlr : inputIntervals)
2011 {
2012 unsigned int regnum = inputlr->getRegWordIdx() / numEltPerGRF<Type_UW>();
2013 std::vector<unsigned int>::iterator it = std::find(regNum.begin(), regNum.end(), regnum);
2014 if (it == regNum.end())
2015 {
2016 if (regnum >= startCalleeSave && regnum < endCalleeSave)
2017 {
2018 regNum.push_back(regnum);
2019 }
2020 }
2021 }
2022
2023 }
2024
isUseUnAvailableRegister(uint32_t startReg,uint32_t regNum)2025 bool LinearScanRA::isUseUnAvailableRegister(uint32_t startReg, uint32_t regNum)
2026 {
2027 for (uint32_t i = startReg; i < startReg + regNum; ++i)
2028 {
2029 if (!pregs->isGRFAvailable(i))
2030 {
2031 return true;
2032 }
2033 }
2034
2035 return false;
2036 }
2037
assignEOTLiveRanges(IR_Builder & builder,std::vector<LSLiveRange * > & liveIntervals)2038 bool LinearScanRA::assignEOTLiveRanges(IR_Builder& builder, std::vector<LSLiveRange*>& liveIntervals)
2039 {
2040 #ifdef DEBUG_VERBOSE_ON
2041 COUT_ERROR << "--------------------------------- " << std::endl;
2042 #endif
2043 uint32_t nextEOTGRF = numRegLRA - numRowsEOT;
2044 for (auto lr : liveIntervals)
2045 {
2046 assert(lr->isEOT());
2047 G4_Declare* dcl = lr->getTopDcl();
2048 G4_Greg* phyReg = builder.phyregpool.getGreg(nextEOTGRF);
2049 dcl->getRegVar()->setPhyReg(phyReg, 0);
2050 lr->setPhyReg(phyReg, 0);
2051 lr->setAssigned(true);
2052 lr->setUseUnAvailableReg(isUseUnAvailableRegister(nextEOTGRF, dcl->getNumRows()));
2053 nextEOTGRF += dcl->getNumRows();
2054 if (nextEOTGRF > numRegLRA)
2055 {
2056 assert(0);
2057 }
2058 #ifdef DEBUG_VERBOSE_ON
2059 printLiveInterval(lr, true);
2060 #endif
2061 }
2062
2063 return true;
2064 }
2065
updateCallSiteLiveIntervals(LSLiveRange * callSiteLR)2066 void globalLinearScan::updateCallSiteLiveIntervals(LSLiveRange* callSiteLR)
2067 {
2068 unsigned lastIdx = 0;
2069 G4_INST* inst = callSiteLR->getLastRef(lastIdx);
2070
2071 for (auto lr: active)
2072 {
2073 unsigned curLastIdx;
2074 lr->getLastRef(curLastIdx);
2075 if (curLastIdx < lastIdx)
2076 {
2077 lr->setLastRef(inst, lastIdx);
2078 }
2079 }
2080
2081 for (auto inputlr : inputIntervals)
2082 {
2083 unsigned curLastIdx = inputlr->getLrEndIdx();
2084 if (curLastIdx < lastIdx)
2085 {
2086 inputlr->setLrEndIdx(lastIdx);
2087 }
2088 }
2089
2090 return;
2091 }
2092
runLinearScan(IR_Builder & builder,std::vector<LSLiveRange * > & liveIntervals,std::list<LSLiveRange * > & spillLRs)2093 bool globalLinearScan::runLinearScan(IR_Builder& builder, std::vector<LSLiveRange*>& liveIntervals, std::list<LSLiveRange*>& spillLRs)
2094 {
2095 unsigned int idx = 0;
2096 bool allocateRegResult = false;
2097
2098 #ifdef DEBUG_VERBOSE_ON
2099 COUT_ERROR << "--------------------------------- " << std::endl;
2100 #endif
2101
2102 for (auto lr : liveIntervals)
2103 {
2104 G4_Declare* dcl = lr->getTopDcl();
2105
2106 lr->getFirstRef(idx);
2107 if (!lr->isEOT() && !lr->getAssigned())
2108 {
2109 //Add forbidden for preAssigned registers
2110 for (auto preAssginedLI : *preAssignedIntervals)
2111 {
2112 if (builder.kernel.fg.isPseudoVCADcl(lr->getTopDcl()) &&
2113 (builder.isPreDefRet(preAssginedLI->getTopDcl()) ||
2114 builder.isPreDefArg(preAssginedLI->getTopDcl())))
2115 {
2116 continue;
2117 }
2118
2119 unsigned preFirstIdx, preLastIdx;
2120 preAssginedLI->getFirstRef(preFirstIdx);
2121 preAssginedLI->getLastRef(preLastIdx);
2122
2123 unsigned lastIdx = 0;
2124 lr->getLastRef(lastIdx);
2125
2126 if (!(lastIdx < preFirstIdx || preLastIdx < idx))
2127 {
2128 G4_VarBase* preg;
2129 int subregnumword;
2130
2131 preg = preAssginedLI->getPhyReg(subregnumword);
2132 unsigned reg = preg->asGreg()->getRegNum();
2133 unsigned rowNum = preAssginedLI->getTopDcl()->getNumRows();
2134
2135 for (unsigned k = 0; k < rowNum; k++)
2136 {
2137 lr->addForbidden(reg + k);
2138 }
2139 }
2140 }
2141 }
2142
2143 #ifdef DEBUG_VERBOSE_ON
2144 COUT_ERROR << "-------- IDX: " << idx << "---------" << std::endl;
2145 #endif
2146
2147 //Expire the live ranges ended befoe idx
2148 expireGlobalRanges(idx);
2149 expireInputRanges(idx);
2150
2151 if (lr->isCallSite())
2152 {
2153 updateCallSiteLiveIntervals(lr);
2154 continue;
2155 }
2156
2157 if (builder.kernel.fg.isPseudoVCADcl(dcl))
2158 {
2159 std::vector<unsigned int> callerSaveRegs;
2160 std::vector<unsigned int> regRegs;
2161 getCallerSaveGRF(callerSaveRegs, regRegs, &gra.kernel);
2162 for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
2163 {
2164 unsigned int callerSaveReg = callerSaveRegs[i];
2165 lr->addForbidden(callerSaveReg);
2166 }
2167 for (unsigned int i = 0; i < regRegs.size(); i++)
2168 {
2169 unsigned int callerSaveReg = regRegs[i];
2170 if (lr->getRetGRFs() == nullptr)
2171 {
2172 allocRetRegsVector(lr);
2173 }
2174 lr->addRetRegs(callerSaveReg);
2175 }
2176 continue;
2177 }
2178 else if (builder.kernel.fg.isPseudoVCEDcl(dcl))
2179 {
2180 calleeSaveLR = lr;
2181 continue;
2182 }
2183 else if (!lr->getAssigned())
2184 {
2185 if (dcl == gra.getOldFPDcl())
2186 {
2187 std::vector<unsigned int> callerSaveRegs;
2188 std::vector<unsigned int> regRegs;
2189 getCallerSaveGRF(callerSaveRegs, regRegs, &gra.kernel);
2190 for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
2191 {
2192 unsigned int callerSaveReg = callerSaveRegs[i];
2193 lr->addForbidden(callerSaveReg);
2194 }
2195 for (unsigned int i = 0; i < regRegs.size(); i++)
2196 {
2197 unsigned int callerSaveReg = regRegs[i];
2198 if (lr->getRetGRFs() == nullptr)
2199 {
2200 allocRetRegsVector(lr);
2201 }
2202 lr->addRetRegs(callerSaveReg);
2203 }
2204 }
2205
2206 //startGRFReg = 0;
2207 allocateRegResult = allocateRegsLinearScan(lr, builder);
2208 #ifdef DEBUG_VERBOSE_ON
2209 if (allocateRegResult)
2210 {
2211 printLiveInterval(lr, true);
2212 }
2213 #endif
2214 }
2215 else
2216 {
2217 allocateRegResult = true;
2218 int startregnum, subregnum, endsregnum;
2219 G4_VarBase* op;
2220 op = lr->getPhyReg(subregnum);
2221
2222 startregnum = op->asGreg()->getRegNum();
2223 endsregnum = subregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
2224 int nrows = 0;
2225 int lastRowSize = 0;
2226 int size = lr->getSizeInWords();
2227 LinearScanRA::getRowInfo(size, nrows, lastRowSize);
2228
2229 if (!lr->isUseUnAvailableReg())
2230 {
2231 if ((unsigned)size >= numEltPerGRF<Type_UW>())
2232 {
2233 if (size % numEltPerGRF<Type_UW>() == 0)
2234 {
2235 pregManager.getAvaialableRegs()->setGRFBusy(startregnum, lr->getTopDcl()->getNumRows());
2236 }
2237 else
2238 {
2239 pregManager.getAvaialableRegs()->setGRFBusy(startregnum, lr->getTopDcl()->getNumRows() - 1);
2240 pregManager.getAvaialableRegs()->setWordBusy(startregnum + lr->getTopDcl()->getNumRows() - 1, 0, lastRowSize);
2241 }
2242 }
2243 else
2244 {
2245 pregManager.getAvaialableRegs()->setWordBusy(startregnum, subregnum, size);
2246 }
2247 }
2248 }
2249
2250 if (allocateRegResult)
2251 {
2252 updateGlobalActiveList(lr);
2253 }
2254 else //Spill
2255 {
2256 if (spillFromActiveList(lr, spillLRs))
2257 {
2258 //Fixme: get the start GRF already, can allocate immediately
2259 allocateRegResult = allocateRegsLinearScan(lr, builder);
2260 if (!allocateRegResult)
2261 {
2262 #ifdef DEBUG_VERBOSE_ON
2263 COUT_ERROR << "Failed assigned physical register to " << lr->getTopDcl()->getName() << ", rows :" << lr->getTopDcl()->getNumRows() << std::endl;
2264 printActives();
2265 #endif
2266 return false;
2267 }
2268 else
2269 {
2270 updateGlobalActiveList(lr);
2271 #ifdef DEBUG_VERBOSE_ON
2272 printLiveInterval(lr, true);
2273 #endif
2274 }
2275 }
2276 else
2277 {
2278 #ifdef DEBUG_VERBOSE_ON
2279 COUT_ERROR << "Failed to spill registers for " << lr->getTopDcl()->getName() << ", rows :" << lr->getTopDcl()->getNumRows() << std::endl;
2280 printActives();
2281 #endif
2282 spillLRs.push_back(lr);
2283 }
2284 }
2285 }
2286
2287 int totalGRFNum = builder.kernel.getNumRegTotal();
2288 for (int i = 0; i < totalGRFNum; i++)
2289 {
2290 activeGRF[i].activeLV.clear();
2291 activeGRF[i].activeInput.clear();
2292 }
2293
2294 //Assign the registers for the live out ones
2295 expireAllActive();
2296
2297 if (builder.kernel.getOptions()->getOption(vISA_GenerateDebugInfo))
2298 {
2299 updateDebugInfo(builder.kernel, liveIntervals);
2300 }
2301
2302 return true;
2303 }
2304
updateGlobalActiveList(LSLiveRange * lr)2305 void globalLinearScan::updateGlobalActiveList(LSLiveRange* lr)
2306 {
2307 bool done = false;
2308 unsigned int newlr_end;
2309
2310 lr->getLastRef(newlr_end);
2311
2312 for (auto active_it = active.begin();
2313 active_it != active.end();
2314 active_it++)
2315 {
2316 unsigned int end_idx;
2317 LSLiveRange* active_lr = (*active_it);
2318
2319 active_lr->getLastRef(end_idx);
2320
2321 if (end_idx > newlr_end)
2322 {
2323 active.insert(active_it, lr);
2324 done = true;
2325 break;
2326 }
2327 }
2328
2329 if (done == false)
2330 active.push_back(lr);
2331
2332 #ifdef DEBUG_VERBOSE_ON
2333 COUT_ERROR << "Add active " << lr->getTopDcl()->getName() << std::endl;
2334 #endif
2335
2336 G4_VarBase* op;
2337 int startsregnum = 0;
2338 op = lr->getPhyReg(startsregnum);
2339 unsigned startregnum = op->asGreg()->getRegNum();
2340 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2341 for (unsigned i = startregnum; i <= endregnum; i++)
2342 {
2343 activeGRF[i].activeLV.push_back(lr);
2344 #ifdef DEBUG_VERBOSE_ON
2345 COUT_ERROR << "Add activeGRF " << lr->getTopDcl()->getName() << " Reg: " << i << std::endl;
2346 #endif
2347 }
2348 }
2349
insertLiveRange(std::list<LSLiveRange * > * liveIntervals,LSLiveRange * lr)2350 bool globalLinearScan::insertLiveRange(std::list<LSLiveRange*>* liveIntervals, LSLiveRange* lr)
2351 {
2352 unsigned int idx = 0;
2353 lr->getFirstRef(idx);
2354 std::list<LSLiveRange*>::iterator it = liveIntervals->begin();
2355 while (it != liveIntervals->end())
2356 {
2357 LSLiveRange* curLR = (*it);
2358 unsigned curIdx = 0;
2359 curLR->getFirstRef(curIdx);
2360 if (curIdx > idx)
2361 {
2362 liveIntervals->insert(it, lr);
2363 return true;
2364 }
2365
2366 it++;
2367 }
2368
2369 return false;
2370 }
2371
canBeSpilledLR(LSLiveRange * lr)2372 bool globalLinearScan::canBeSpilledLR(LSLiveRange* lr)
2373 {
2374 if (lr->isUseUnAvailableReg())
2375 {
2376 return false;
2377 }
2378
2379 if (lr->isEOT())
2380 {
2381 return false;
2382 }
2383
2384 if (lr->getTopDcl() == builder.getBuiltinR0())
2385 {
2386 return false;
2387 }
2388
2389 if (lr->isCall())
2390 {
2391 return false;
2392 }
2393
2394 if (lr->isGRFRegAssigned())
2395 {
2396 return false;
2397 }
2398
2399 if (lr->getTopDcl()->isSpilled())
2400 {
2401 return false;
2402 }
2403
2404 if (lr->getTopDcl()->getRegFile() == G4_INPUT)
2405 {
2406 return false;
2407 }
2408
2409 if (lr->getTopDcl()->getRegVar()->getId() == UNDEFINED_VAL)
2410 {
2411 return false;
2412 }
2413
2414 if (lr->getTopDcl()->getRegVar()->isRegVarTransient() || lr->getTopDcl()->getRegVar()->isRegVarTmp())
2415 {
2416 return false;
2417 }
2418
2419 //Stack call variables
2420 if (lr->getTopDcl() == gra.getOldFPDcl())
2421 {
2422 return false;
2423 }
2424
2425 if (builder.kernel.fg.isPseudoVCADcl(lr->getTopDcl()) ||
2426 builder.kernel.fg.isPseudoVCEDcl(lr->getTopDcl()))
2427 {
2428 return false;
2429 }
2430
2431 return true;
2432 }
2433
findSpillCandidate(LSLiveRange * tlr)2434 int globalLinearScan::findSpillCandidate(LSLiveRange* tlr)
2435 {
2436 unsigned short requiredRows = tlr->getTopDcl()->getNumRows();
2437 int referenceCount = 0;
2438 int startGRF = -1;
2439 float spillCost = (float)(int)0x7FFFFFFF;
2440 unsigned lastIdxs = 1;
2441 unsigned tStartIdx = 0;
2442
2443 tlr->getFirstRef(tStartIdx);
2444 BankAlign bankAlign = getBankAlign(tlr);
2445 for (int i = 0; i < (int)(numRegLRA - requiredRows); i++)
2446 {
2447 unsigned endIdx = 0;
2448 bool canBeFree = true;
2449 LSLiveRange* analyzedLV = nullptr;
2450
2451 pregManager.getAvaialableRegs()->findRegisterCandiateWithAlignForward(i, bankAlign, false);
2452
2453 // Check the following adjacent registers
2454 for (int k = i; k < i + requiredRows; k++)
2455 {
2456 if (activeGRF[k].activeInput.size() ||
2457 tlr->getForbidden()[k])
2458 {
2459 i = k;
2460 canBeFree = false;
2461 break;
2462 }
2463
2464 if (activeGRF[k].activeLV.size())
2465 {
2466 // There may be multiple variables take same register with different offsets
2467 for (auto lr : activeGRF[k].activeLV)
2468 {
2469 if (lr == analyzedLV) // one LV may occupy multiple registers
2470 {
2471 continue;
2472 }
2473
2474 analyzedLV = lr;
2475
2476 if (!canBeSpilledLR(lr) || lr->getForbidden()[k])
2477 {
2478 int startsregnum = 0;
2479 G4_VarBase* op = lr->getPhyReg(startsregnum);
2480 unsigned startregnum = op->asGreg()->getRegNum();
2481
2482 canBeFree = false;
2483 i = startregnum + lr->getTopDcl()->getNumRows() - 1; // Jump to k + rows - 1 to avoid unnecessory analysis.
2484
2485 break;
2486 }
2487
2488 int startsregnum = 0;
2489 G4_VarBase* op = lr->getPhyReg(startsregnum);
2490 int startregnum = op->asGreg()->getRegNum();
2491 unsigned effectGRFNum = startregnum > i ? lr->getTopDcl()->getNumRows() : lr->getTopDcl()->getNumRows() - (i - startregnum);
2492 lr->getLastRef(endIdx);
2493 lastIdxs += (endIdx - tStartIdx) * effectGRFNum;
2494 referenceCount += lr->getNumRefs();
2495 }
2496
2497 if (!canBeFree)
2498 {
2499 break;
2500 }
2501 }
2502 else if (pregManager.getAvaialableRegs()->isGRFAvailable(k) && !pregManager.getAvaialableRegs()->isGRFBusy(k))
2503 {
2504 lastIdxs += lastLexicalID - tStartIdx;
2505 }
2506 else //Reserved regsiters
2507 {
2508 i = k;
2509 canBeFree = false;
2510 break;
2511 }
2512 }
2513
2514 if (canBeFree)
2515 {
2516 //Spill cost
2517 float currentSpillCost = (float)referenceCount / lastIdxs;
2518
2519 if (currentSpillCost < spillCost)
2520 {
2521 startGRF = i;
2522 spillCost = currentSpillCost;
2523 }
2524 }
2525
2526 lastIdxs = 1;
2527 referenceCount = 0;
2528 }
2529
2530 return startGRF;
2531 }
2532
freeSelectedRegistsers(int startGRF,LSLiveRange * tlr,std::list<LSLiveRange * > & spillLRs)2533 void globalLinearScan::freeSelectedRegistsers(int startGRF, LSLiveRange* tlr, std::list<LSLiveRange*>& spillLRs)
2534 {
2535 unsigned short requiredRows = tlr->getTopDcl()->getNumRows();
2536 #ifdef DEBUG_VERBOSE_ON
2537 COUT_ERROR << "Required GRF size for spill: " << requiredRows << std::endl;
2538 #endif
2539
2540 //Free registers.
2541 for (int k = startGRF; k < startGRF + requiredRows; k++)
2542 {
2543 #ifdef DEBUG_VERBOSE_ON
2544 if (!activeGRF[k].activeLV.size())
2545 {
2546 COUT_ERROR << "Pick free GRF for spill: " << " GRF:" << k << std::endl;
2547 }
2548 #endif
2549
2550 while (activeGRF[k].activeLV.size())
2551 {
2552 LSLiveRange* lr = activeGRF[k].activeLV.front();
2553
2554 G4_VarBase* op;
2555 int startsregnum = 0;
2556 op = lr->getPhyReg(startsregnum);
2557 unsigned startregnum = op->asGreg()->getRegNum();
2558 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2559
2560 assert(startregnum <= (unsigned)k);
2561 assert(lr->getTopDcl()->getRegFile() != G4_INPUT);
2562
2563 //Free from the register buckect array
2564 for (unsigned s = startregnum; s <= endregnum; s++)
2565 {
2566 std::vector<LSLiveRange*>::iterator it = std::find(activeGRF[s].activeLV.begin(), activeGRF[s].activeLV.end(), lr);
2567 if (it != activeGRF[s].activeLV.end())
2568 {
2569 #ifdef DEBUG_VERBOSE_ON
2570 COUT_ERROR << "SPILL: Free activeGRF from : " << (lr)->getTopDcl()->getName() << " GRF:" << s << std::endl;
2571 #endif
2572 activeGRF[s].activeLV.erase(it);
2573 }
2574 }
2575
2576 #ifdef DEBUG_VERBOSE_ON
2577 printLiveInterval(lr, false);
2578 #endif
2579
2580 //Free the allocated register
2581 freeAllocedRegs(lr, true);
2582
2583 //Record spilled live range
2584 if (std::find(spillLRs.begin(), spillLRs.end(), lr) == spillLRs.end())
2585 {
2586 spillLRs.push_back(lr);
2587 }
2588
2589 //Remove spilled live range from active list
2590 std::list<LSLiveRange*>::iterator activeListIter = active.begin();
2591 while (activeListIter != active.end())
2592 {
2593 std::list<LSLiveRange*>::iterator nextIt = activeListIter;
2594 nextIt++;
2595
2596 if ((*activeListIter) == lr)
2597 {
2598 #ifdef DEBUG_VERBOSE_ON
2599 COUT_ERROR << "SPILL: Free active lr: " << (*activeListIter)->getTopDcl()->getName() << std::endl;
2600 #endif
2601 active.erase(activeListIter);
2602 break;
2603 }
2604 activeListIter = nextIt;
2605 }
2606 }
2607 }
2608 }
2609
spillFromActiveList(LSLiveRange * tlr,std::list<LSLiveRange * > & spillLRs)2610 bool globalLinearScan::spillFromActiveList(LSLiveRange* tlr, std::list<LSLiveRange*>& spillLRs)
2611 {
2612 int startGRF = findSpillCandidate(tlr);
2613
2614 if (startGRF == -1)
2615 {
2616 #ifdef DEBUG_VERBOSE_ON
2617 printActives();
2618 #endif
2619 return false;
2620 }
2621
2622 freeSelectedRegistsers(startGRF, tlr, spillLRs);
2623
2624 return true;
2625 }
2626
expireGlobalRanges(unsigned int idx)2627 void globalLinearScan::expireGlobalRanges(unsigned int idx)
2628 {
2629 //active list is sorted in ascending order of starting index
2630
2631 while (active.size() > 0)
2632 {
2633 unsigned int endIdx;
2634 LSLiveRange* lr = active.front();
2635
2636 lr->getLastRef(endIdx);
2637
2638 if (endIdx <= idx)
2639 {
2640 G4_VarBase* preg;
2641 int subregnumword, subregnum;
2642
2643 preg = lr->getPhyReg(subregnumword);
2644
2645 if (preg)
2646 {
2647 subregnum = LinearScanRA::convertSubRegOffFromWords(lr->getTopDcl(), subregnumword);
2648
2649 // Mark the RegVar object of dcl as assigned to physical register
2650 lr->getTopDcl()->getRegVar()->setPhyReg(preg, subregnum);
2651 lr->setAssigned(true);
2652 }
2653
2654 #ifdef DEBUG_VERBOSE_ON
2655 printLiveInterval(lr, false);
2656 #endif
2657 if (preg)
2658 {
2659 unsigned startregnum = preg->asGreg()->getRegNum();
2660 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2661 for (unsigned i = startregnum; i <= endregnum; i++)
2662 {
2663 std::vector<LSLiveRange*>::iterator activeListIter = activeGRF[i].activeLV.begin();
2664 while (activeListIter != activeGRF[i].activeLV.end())
2665 {
2666 std::vector<LSLiveRange*>::iterator nextIt = activeListIter;
2667 nextIt++;
2668 if ((*activeListIter) == lr)
2669 {
2670 activeGRF[i].activeLV.erase(activeListIter);
2671 #ifdef DEBUG_VERBOSE_ON
2672 COUT_ERROR << "Remove range " << lr->getTopDcl()->getName() << " from activeGRF: " << i << std::endl;
2673 #endif
2674 break;
2675 }
2676 activeListIter = nextIt;
2677 }
2678 }
2679
2680 if (calleeSaveLR)
2681 {
2682 unsigned int startCallerSave = builder.kernel.calleeSaveStart();
2683 unsigned int endCallerSave = startCallerSave + builder.kernel.getNumCalleeSaveRegs();
2684
2685 for (unsigned i = startregnum; i <= endregnum; i++)
2686 {
2687 if (i >= startCallerSave && i <= endCallerSave)
2688 {
2689 calleeSaveLR->addForbidden(i);
2690 }
2691 }
2692 }
2693 }
2694
2695 // Free physical regs marked for this range
2696 freeAllocedRegs(lr, true);
2697
2698 // Remove range from active list
2699 active.pop_front();
2700 }
2701 else
2702 {
2703 // As soon as we find first range that ends after ids break loop
2704 break;
2705 }
2706 }
2707 }
2708
expireInputRanges(unsigned int global_idx)2709 void globalLinearScan::expireInputRanges(unsigned int global_idx)
2710 {
2711 while (inputIntervals.size() > 0)
2712 {
2713 LSInputLiveRange* lr = inputIntervals.front();
2714 unsigned int endIdx = lr->getLrEndIdx();
2715
2716 if (endIdx <= global_idx)
2717 {
2718 unsigned int regnum = lr->getRegWordIdx() / numEltPerGRF<Type_UW>();
2719 unsigned int subRegInWord = lr->getRegWordIdx() % numEltPerGRF<Type_UW>();
2720
2721 // Free physical regs marked for this range
2722 pregManager.freeRegs(regnum, subRegInWord, 1, endIdx);
2723
2724 #ifdef DEBUG_VERBOSE_ON
2725 COUT_ERROR << "Expiring input r" << regnum << "." << subRegInWord << std::endl;
2726 #endif
2727
2728 // Remove range from inputIntervals list
2729 inputIntervals.pop_front();
2730 assert(lr == activeGRF[regnum].activeInput.front());
2731 activeGRF[regnum].activeInput.erase(activeGRF[regnum].activeInput.begin());
2732 }
2733 else
2734 {
2735 // As soon as we find first range that ends after ids break loop
2736 break;
2737 }
2738 }
2739 }
2740
getBankAlign(LSLiveRange * lr)2741 BankAlign globalLinearScan::getBankAlign(LSLiveRange* lr)
2742 {
2743 G4_Declare* dcl = lr->getTopDcl();
2744 BankAlign bankAlign = gra.isEvenAligned(dcl) ? BankAlign::Even : BankAlign::Either;
2745
2746 if (gra.getVarSplitPass()->isPartialDcl(lr->getTopDcl()))
2747 {
2748 // Special alignment is not needed for var split intrinsic
2749 bankAlign = BankAlign::Either;
2750 }
2751
2752 return bankAlign;
2753 }
2754
allocateRegsLinearScan(LSLiveRange * lr,IR_Builder & builder)2755 bool globalLinearScan::allocateRegsLinearScan(LSLiveRange* lr, IR_Builder& builder)
2756 {
2757 int regnum, subregnum;
2758 unsigned int localRABound = 0;
2759 unsigned int instID;
2760
2761 lr->getFirstRef(instID);
2762 // Let local RA allocate only those ranges that need < 10 GRFs
2763 // Larger ranges are not many and are best left to global RA
2764 // as it can make a better judgement by considering the
2765 // spill cost.
2766 int nrows = 0;
2767 int size = lr->getSizeInWords();
2768 G4_Declare* dcl = lr->getTopDcl();
2769 G4_SubReg_Align subalign = gra.getSubRegAlign(dcl);
2770 localRABound = numRegLRA - 1;
2771
2772 BankAlign bankAlign = getBankAlign(lr);
2773 nrows = pregManager.findFreeRegs(size,
2774 bankAlign,
2775 subalign,
2776 regnum,
2777 subregnum,
2778 startGRFReg,
2779 localRABound,
2780 instID,
2781 lr->getForbidden());
2782
2783 if (nrows)
2784 {
2785 #ifdef DEBUG_VERBOSE_ON
2786 COUT_ERROR << lr->getTopDcl()->getName() << ":r" << regnum << " BANK: " << (int)bankAlign << std::endl;
2787 #endif
2788 lr->setPhyReg(builder.phyregpool.getGreg(regnum), subregnum);
2789 if (!builder.getOptions()->getOption(vISA_LSFristFit))
2790 {
2791 startGRFReg = (startGRFReg + nrows) % localRABound;
2792 }
2793 else
2794 {
2795 assert(startGRFReg == 0);
2796 }
2797
2798 return true;
2799 }
2800 else if (!builder.getOptions()->getOption(vISA_LSFristFit))
2801 {
2802 startGRFReg = 0;
2803 nrows = pregManager.findFreeRegs(size,
2804 bankAlign,
2805 subalign,
2806 regnum,
2807 subregnum,
2808 startGRFReg,
2809 localRABound,
2810 instID,
2811 lr->getForbidden());
2812 if (nrows)
2813 {
2814 #ifdef DEBUG_VERBOSE_ON
2815 COUT_ERROR << lr->getTopDcl()->getName() << ":r" << regnum << " BANK: " << (int)bankAlign << std::endl;
2816 #endif
2817 lr->setPhyReg(builder.phyregpool.getGreg(regnum), subregnum);
2818 startGRFReg = (startGRFReg + nrows) % localRABound;
2819 return true;
2820 }
2821 }
2822 #ifdef DEBUG_VERBOSE_ON
2823 COUT_ERROR << lr->getTopDcl()->getName() << ": failed to allocate" << std::endl;
2824 #endif
2825
2826 return false;
2827 }
2828
findFreeMultipleRegsForward(int regIdx,BankAlign align,int & regnum,int nrows,int lastRowSize,int endReg,int instID,const bool * forbidden)2829 bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, BankAlign align, int& regnum, int nrows, int lastRowSize, int endReg, int instID, const bool* forbidden)
2830 {
2831 int foundItem = 0;
2832 int startReg = 0;
2833 int i = regIdx;
2834 int grfRows = 0;
2835 bool multiSteps = nrows > 1;
2836
2837 if (lastRowSize % numEltPerGRF<Type_UW>() == 0)
2838 {
2839 grfRows = nrows;
2840 }
2841 else
2842 {
2843 grfRows = nrows - 1;
2844 }
2845
2846 findRegisterCandiateWithAlignForward(i, align, multiSteps);
2847
2848 startReg = i;
2849 while (i <= endReg + nrows - 1)
2850 {
2851 if (isGRFAvailable(i) && !forbidden[i] &&
2852 regBusyVector[i] == 0)
2853 {
2854 foundItem++;
2855 }
2856 else if (foundItem < grfRows)
2857 {
2858 foundItem = 0;
2859 i++;
2860 findRegisterCandiateWithAlignForward(i, align, multiSteps);
2861 startReg = i;
2862 continue;
2863 }
2864
2865 if (foundItem == grfRows)
2866 {
2867 if (lastRowSize % numEltPerGRF<Type_UW>() == 0)
2868 {
2869 regnum = startReg;
2870 return true;
2871 }
2872 else
2873 {
2874 if (i + 1 <= endReg + nrows - 1 &&
2875 isGRFAvailable(i + 1) && !forbidden[i + 1] &&
2876 (isWordBusy(i + 1, 0, lastRowSize) == false))
2877 {
2878 regnum = startReg;
2879 return true;
2880 }
2881 else
2882 {
2883 foundItem = 0;
2884 i++;
2885 findRegisterCandiateWithAlignForward(i, align, multiSteps);
2886 startReg = i;
2887 continue;
2888 }
2889 }
2890 }
2891
2892 i++;
2893 }
2894
2895 return false;
2896 }
2897
findFreeSingleReg(int regIdx,int size,BankAlign align,G4_SubReg_Align subalign,int & regnum,int & subregnum,int endReg,const bool * forbidden)2898 bool PhyRegsLocalRA::findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int endReg, const bool* forbidden)
2899 {
2900 int i = regIdx;
2901 bool found = false;
2902
2903 while (!found)
2904 {
2905 if (i > endReg) //<= works
2906 break;
2907
2908 // Align GRF
2909 if ((align == BankAlign::Even) && (i % 2 != 0))
2910 {
2911 i++;
2912 continue;
2913 }
2914 else if ((align == BankAlign::Odd) && (i % 2 == 0))
2915 {
2916 i++;
2917 continue;
2918 }
2919 else if ((align == BankAlign::Even2GRF) && ((i % 4 >= 2)))
2920 {
2921 i++;
2922 continue;
2923 }
2924 else if ((align == BankAlign::Odd2GRF) && ((i % 4 < 2)))
2925 {
2926 i++;
2927 continue;
2928 }
2929
2930 if (isGRFAvailable(i, 1) && !forbidden[i])
2931 {
2932 found = findFreeSingleReg(i, subalign, regnum, subregnum, size);
2933 if (found)
2934 {
2935 return true;
2936 }
2937 }
2938 i++;
2939 }
2940
2941 return false;
2942 }
2943
findFreeRegs(int size,BankAlign align,G4_SubReg_Align subalign,int & regnum,int & subregnum,int startRegNum,int endRegNum,unsigned int instID,const bool * forbidden)2944 int PhyRegsManager::findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum,
2945 int startRegNum, int endRegNum, unsigned int instID, const bool* forbidden)
2946 {
2947 int nrows = 0;
2948 int lastRowSize = 0;
2949 LocalRA::getRowInfo(size, nrows, lastRowSize);
2950
2951 int startReg = startRegNum;
2952 int endReg = endRegNum - nrows + 1;
2953
2954 bool found = false;
2955
2956 if (size >= (int)numEltPerGRF<Type_UW>())
2957 {
2958 found = availableRegs.findFreeMultipleRegsForward(startReg, align, regnum, nrows, lastRowSize, endReg, instID, forbidden);
2959 if (found)
2960 {
2961 subregnum = 0;
2962 if (size % numEltPerGRF<Type_UW>() == 0)
2963 {
2964 availableRegs.setGRFBusy(regnum, nrows);
2965 }
2966 else
2967 {
2968 availableRegs.setGRFBusy(regnum, nrows - 1);
2969 availableRegs.setWordBusy(regnum + nrows - 1, 0, lastRowSize);
2970 }
2971 }
2972 }
2973 else
2974 {
2975 found = availableRegs.findFreeSingleReg(startReg, size, align, subalign, regnum, subregnum, endReg, forbidden);
2976 if (found)
2977 {
2978 availableRegs.setWordBusy(regnum, subregnum, size);
2979 }
2980 }
2981
2982 if (found)
2983 {
2984 return nrows;
2985 }
2986
2987 return 0;
2988 }
2989