1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "DebugInfo.h"
10 #include "common.h"
11 #include "SpillManagerGMRF.h"
12 #include "LocalRA.h"
13 #include "LinearScanRA.h"
14 #include "RegAlloc.h"
15 #include "SCCAnalysis.h"
16 
17 #include <fstream>
18 #include <tuple>
19 
20 using namespace vISA;
21 
22 extern void getForbiddenGRFs(std::vector<unsigned int>& regNum, G4_Kernel& kernel, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned reservedRegNum);
23 
LinearScanRA(BankConflictPass & b,GlobalRA & g,LivenessAnalysis & liveAnalysis)24 LinearScanRA::LinearScanRA(BankConflictPass& b, GlobalRA& g, LivenessAnalysis& liveAnalysis) :
25     kernel(g.kernel), builder(g.builder), l(liveAnalysis), mem(g.builder.mem), bc(b), gra(g)
26 {
27     stackCallArgLR = nullptr;
28     stackCallRetLR = nullptr;
29 }
30 
allocForbiddenVector(LSLiveRange * lr)31 void LinearScanRA::allocForbiddenVector(LSLiveRange* lr)
32 {
33     unsigned size = kernel.getNumRegTotal();
34     bool* forbidden = (bool*)mem.alloc(sizeof(bool) * size);
35     memset(forbidden, false, size);
36     lr->setForbidden(forbidden);
37 }
38 
allocRetRegsVector(LSLiveRange * lr)39 void globalLinearScan::allocRetRegsVector(LSLiveRange* lr)
40 {
41     unsigned size = builder.kernel.getNumRegTotal();
42     bool* forbidden = (bool*)mem.alloc(sizeof(bool) * size);
43     memset(forbidden, false, size);
44     lr->setRegGRFs(forbidden);
45 }
46 
GetOrCreateLocalLiveRange(G4_Declare * topdcl)47 LSLiveRange* LinearScanRA::GetOrCreateLocalLiveRange(G4_Declare* topdcl)
48 {
49     LSLiveRange* lr = gra.getLSLR(topdcl);
50 
51     // Check topdcl of operand and setup a new live range if required
52     if (!lr)
53     {
54         lr = new (mem)LSLiveRange() ;
55         gra.setLSLR(topdcl, lr);
56         allocForbiddenVector(lr);
57     }
58 
59     MUST_BE_TRUE(lr != NULL, "Local LR could not be created");
60     return lr;
61 }
62 
CreateLocalLiveRange(G4_Declare * topdcl)63 LSLiveRange* LinearScanRA::CreateLocalLiveRange(G4_Declare* topdcl)
64 {
65     LSLiveRange* lr = new (mem)LSLiveRange();
66     gra.setLSLR(topdcl, lr);
67     allocForbiddenVector(lr);
68 
69     MUST_BE_TRUE(lr != NULL, "Local LR could not be created");
70     return lr;
71 }
72 
73 class isLifetimeOpCandidateForRemoval
74 {
75 public:
isLifetimeOpCandidateForRemoval(GlobalRA & g)76     isLifetimeOpCandidateForRemoval(GlobalRA& g) : gra(g)
77     {
78     }
79 
80     GlobalRA& gra;
81 
operator ()(G4_INST * inst)82     bool operator()(G4_INST* inst)
83     {
84         if (inst->isPseudoKill() ||
85             inst->isLifeTimeEnd())
86         {
87             G4_Declare* topdcl = nullptr;
88 
89             if (inst->isPseudoKill())
90             {
91                 topdcl = GetTopDclFromRegRegion(inst->getDst());
92             }
93             else
94             {
95                 topdcl = GetTopDclFromRegRegion(inst->getSrc(0));
96             }
97 
98             if (topdcl)
99             {
100                 LSLiveRange* lr = gra.getLSLR(topdcl);
101                 if (lr->getNumRefs() == 0 &&
102                     (topdcl->getRegFile() == G4_GRF ||
103                         topdcl->getRegFile() == G4_INPUT))
104                 {
105                     // Remove this lifetime op
106                     return true;
107                 }
108             }
109         }
110 
111         return false;
112     }
113 };
114 
removeUnrequiredLifetimeOps()115 void LinearScanRA::removeUnrequiredLifetimeOps()
116 {
117     // Iterate over all instructions and inspect only
118     // pseudo_kills/lifetime.end instructions. Remove
119     // instructions that have no other useful instruction.
120 
121     for (BB_LIST_ITER bb_it = kernel.fg.begin();
122         bb_it != kernel.fg.end();
123         bb_it++)
124     {
125         G4_BB* bb = (*bb_it);
126         bb->erase(std::remove_if(bb->begin(), bb->end(), isLifetimeOpCandidateForRemoval(this->gra)),
127             bb->end());
128     }
129 }
130 
setLexicalID()131 void LinearScanRA::setLexicalID()
132 {
133     unsigned int id = 1;
134     for (auto bb : kernel.fg)
135     {
136         for (auto curInst : *bb)
137         {
138             if (curInst->isPseudoKill() ||
139                 curInst->isLifeTimeEnd())
140             {
141                 curInst->setLexicalId(id);
142             }
143             else
144             {
145                 curInst->setLexicalId(id++);
146             }
147         }
148     }
149     lastInstLexID = id;
150 }
151 
hasDstSrcOverlapPotential(G4_DstRegRegion * dst,G4_SrcRegRegion * src)152 bool LinearScanRA::hasDstSrcOverlapPotential(G4_DstRegRegion* dst, G4_SrcRegRegion* src)
153 {
154     int dstOpndNumRows = 0;
155 
156     if (dst->getBase()->isRegVar())
157     {
158         G4_Declare* dstDcl = dst->getBase()->asRegVar()->getDeclare();
159         if (dstDcl != nullptr)
160         {
161             int dstOffset = (dstDcl->getOffsetFromBase() + dst->getLeftBound()) / numEltPerGRF<Type_UB>();
162             G4_DstRegRegion* dstRgn = dst;
163             dstOpndNumRows = dstRgn->getSubRegOff() + dstRgn->getLinearizedEnd() - dstRgn->getLinearizedStart() + 1 > numEltPerGRF<Type_UB>();
164 
165             if (src != NULL &&
166                 src->isSrcRegRegion() &&
167                 src->asSrcRegRegion()->getBase()->isRegVar())
168             {
169                 G4_SrcRegRegion* srcRgn = src->asSrcRegRegion();
170                 G4_Declare* srcDcl = src->getBase()->asRegVar()->getDeclare();
171                 int srcOffset = (srcDcl->getOffsetFromBase() + src->getLeftBound()) / numEltPerGRF<Type_UB>();
172                 bool srcOpndNumRows = srcRgn->getSubRegOff() + srcRgn->getLinearizedEnd() - srcRgn->getLinearizedStart() + 1 > numEltPerGRF<Type_UB>();
173 
174                 if (dstOpndNumRows || srcOpndNumRows)
175                 {
176                     if (!(gra.isEvenAligned(dstDcl) && gra.isEvenAligned(srcDcl) &&
177                         srcOffset % 2 == dstOffset % 2 &&
178                         dstOpndNumRows && srcOpndNumRows))
179                     {
180                         return true;
181                     }
182                 }
183             }
184         }
185     }
186 
187     return false;
188 }
189 
getRowInfo(int size,int & nrows,int & lastRowSize)190 void LinearScanRA::getRowInfo(int size, int& nrows, int& lastRowSize)
191 {
192     if (size <= (int)numEltPerGRF<Type_UW>())
193     {
194         nrows = 1;
195     }
196     else
197     {
198         // nrows is total number of rows, including last row even if it is partial
199         nrows = size / numEltPerGRF<Type_UW>();
200         // lastrowsize is number of words actually used in last row
201         lastRowSize = size % numEltPerGRF<Type_UW>();
202 
203         if (size % numEltPerGRF<Type_UW>() != 0)
204         {
205             nrows++;
206         }
207 
208         if (lastRowSize == 0)
209         {
210             lastRowSize = numEltPerGRF<Type_UW>();
211         }
212     }
213 
214     return;
215 }
216 
convertSubRegOffFromWords(G4_Declare * dcl,int subregnuminwords)217 unsigned int LinearScanRA::convertSubRegOffFromWords(G4_Declare* dcl, int subregnuminwords)
218 {
219     // Return subreg offset in units of dcl's element size.
220     // Input is subregnum in word units.
221     unsigned int subregnum;
222 
223     subregnum = (subregnuminwords * 2) / dcl->getElemSize();
224 
225     return subregnum;
226 }
227 
recordRef(G4_BB * bb,bool fromEntry)228 void LSLiveRange::recordRef(G4_BB* bb, bool fromEntry)
229 {
230     if (numRefsInFG < 2)
231     {
232         if (fromEntry)
233         {
234             numRefsInFG += 2;
235         }
236         else if (bb != prevBBRef)
237         {
238             numRefsInFG++;
239             prevBBRef = bb;
240         }
241     }
242     if (!fromEntry)
243     {
244         numRefs++;
245     }
246 }
247 
isGRFRegAssigned()248 bool LSLiveRange::isGRFRegAssigned()
249 {
250     MUST_BE_TRUE(topdcl != NULL, "Top dcl not set");
251     G4_RegVar* rvar = topdcl->getRegVar();
252     bool isPhyRegAssigned = false;
253 
254     if (rvar)
255     {
256         if (rvar->isPhyRegAssigned())
257             isPhyRegAssigned = true;
258     }
259 
260     return isPhyRegAssigned;
261 }
262 
getSizeInWords()263 unsigned int LSLiveRange::getSizeInWords()
264 {
265     int nrows = getTopDcl()->getNumRows();
266     int elemsize = getTopDcl()->getElemSize();
267     int nelems = getTopDcl()->getNumElems();
268     int words = 0;
269 
270     if (nrows > 1)
271     {
272         // If sizeInWords is set, use it otherwise consider entire row reserved
273         unsigned int sizeInWords = getTopDcl()->getWordSize();
274 
275         if (sizeInWords > 0)
276             words = sizeInWords;
277         else
278             words = nrows * numEltPerGRF<Type_UW>();
279     }
280     else if (nrows == 1)
281     {
282         int nbytesperword = 2;
283         words = (nelems * elemsize + 1) / nbytesperword;
284     }
285 
286     return words;
287 }
288 
289 // Mark physical register allocated to range lr as not busy
freeAllocedRegs(LSLiveRange * lr,bool setInstID)290 void globalLinearScan::freeAllocedRegs(LSLiveRange* lr, bool setInstID)
291 {
292     int sregnum;
293 
294     G4_VarBase* preg = lr->getPhyReg(sregnum);
295 
296     MUST_BE_TRUE(preg != NULL,
297         "Physical register not assigned to live range. Cannot free regs.");
298 
299     unsigned int idx = 0;
300     if (setInstID)
301     {
302         lr->getLastRef(idx);
303     }
304 
305     if (!lr->isUseUnAvailableReg())
306     {
307         pregManager.freeRegs(preg->asGreg()->getRegNum(),
308             sregnum,
309             lr->getSizeInWords(),
310             idx);
311     }
312 }
313 
printActives()314 void globalLinearScan::printActives()
315 {
316     std::cout << "====================ACTIVATE START===================" << std::endl;
317     for (auto lr : active)
318     {
319         unsigned int start, end;
320 
321         lr->getFirstRef(start);
322         lr->getLastRef(end);
323 
324         int startregnum, endregnum, startsregnum=0, endsregnum;
325         G4_VarBase* op;
326         op = lr->getPhyReg(startsregnum);
327 
328         startregnum = endregnum = op->asGreg()->getRegNum();
329         endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
330 
331         if (lr->getTopDcl()->getNumRows() > 1) {
332             endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
333 
334             if (lr->getTopDcl()->getWordSize() > 0)
335             {
336                 endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
337                 if (endsregnum < 0) endsregnum = 15;
338             }
339             else
340                 endsregnum = 15; // last word in GRF
341         }
342         if (lr->hasIndirectAccess())
343         {
344             std::cout << "INDIR: ";
345         }
346         else
347         {
348             std::cout << "DIR  : ";
349         }
350         if (lr->getPreAssigned())
351         {
352             std::cout << "\tPRE: ";
353         }
354         else
355         {
356             std::cout << "\tNOT: ";
357         }
358 
359         std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
360         std::cout << " (r" << startregnum << "." << startsregnum << ":w - " <<
361             "r" << endregnum << "." << endsregnum << ":w)";
362         std::cout << std::endl;
363     }
364     for (int i = 0; i < (int)(numRegLRA); i++)
365     {
366         std::cout << "\nR" << i << ":";
367 
368         if (activeGRF[i].activeInput.size())
369         {
370             for (auto lr : activeGRF[i].activeLV)
371             {
372                 int startregnum, endregnum, startsregnum=0, endsregnum;
373                 G4_VarBase* op;
374                 op = lr->getPhyReg(startsregnum);
375 
376                 startregnum = endregnum = op->asGreg()->getRegNum();
377                 endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
378 
379                 if (lr->getTopDcl()->getNumRows() > 1) {
380                     endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
381 
382                     if (lr->getTopDcl()->getWordSize() > 0)
383                     {
384                         endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
385                         if (endsregnum < 0) endsregnum = 15;
386                     }
387                     else
388                         endsregnum = 15; // last word in GRF
389                 }
390 
391                 std::cout << "\tIN: " << lr->getTopDcl()->getName();
392                 std::cout << "(r" << startregnum << "." << startsregnum << ":w - " <<
393                     "r" << endregnum << "." << endsregnum << ":w)";
394                 //std::cout << std::endl;
395             }
396         }
397 
398         if (activeGRF[i].activeLV.size())
399         {
400             // There may be multiple variables take same register with different offsets
401             for (auto lr : activeGRF[i].activeLV)
402             {
403                 int startsregnum=0;
404                 G4_VarBase* op = lr->getPhyReg(startsregnum);
405 
406                 int startregnum = op->asGreg()->getRegNum();
407                 int endregnum = startregnum;
408                 int endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
409 
410                 if (lr->getTopDcl()->getNumRows() > 1) {
411                     endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
412 
413                     if (lr->getTopDcl()->getWordSize() > 0)
414                     {
415                         endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
416                         if (endsregnum < 0) endsregnum = 15;
417                     }
418                     else
419                         endsregnum = 15; // last word in GRF
420                 }
421 
422                 std::cout << "\t" << lr->getTopDcl()->getName();
423                 std::cout << "(r" << startregnum << "." << startsregnum << ":w - " <<
424                     "r" << endregnum << "." << endsregnum << ":w)";
425             }
426         }
427     }
428     std::cout << "====================ACTIVATE END===================" << std::endl;
429 }
430 
expireAllActive()431 void globalLinearScan::expireAllActive()
432 {
433     if (active.size() > 0)
434     {
435         // Expire any remaining ranges
436         LSLiveRange* lastActive = active.back();
437         unsigned int endIdx;
438 
439         lastActive->getLastRef(endIdx);
440 
441         expireGlobalRanges(endIdx);
442     }
443 }
444 
linearScanMarkReferencesInOpnd(G4_Operand * opnd,bool isEOT,bool isCall)445 void LinearScanRA::linearScanMarkReferencesInOpnd(G4_Operand* opnd, bool isEOT, bool isCall)
446 {
447     G4_Declare* topdcl = NULL;
448 
449     if ((opnd->isSrcRegRegion() || opnd->isDstRegRegion()))
450     {
451         topdcl = GetTopDclFromRegRegion(opnd);
452 
453         if (topdcl &&
454             (topdcl->getRegFile() == G4_GRF || topdcl->getRegFile() == G4_INPUT))
455         {
456             // Handle GRF here
457             MUST_BE_TRUE(topdcl->getAliasDeclare() == NULL, "Not topdcl");
458             LSLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
459 
460             lr->recordRef(curBB_, false);
461             if (isEOT)
462             {
463                 lr->markEOT();
464             }
465             if (topdcl->getRegVar()
466                 && topdcl->getRegVar()->isPhyRegAssigned()
467                 && topdcl->getRegVar()->getPhyReg()->isGreg())
468             {
469                 lr->setPreAssigned(true);
470             }
471             if (isCall)
472             {
473                 lr->setIsCall(true);
474             }
475             if (topdcl->getRegFile() == G4_INPUT)
476             {
477                 BBVector[curBB_->getId()]->setRefInput(true);
478             }
479         }
480     }
481     else if (opnd->isAddrExp())
482     {
483         G4_AddrExp* addrExp = opnd->asAddrExp();
484 
485         topdcl = addrExp->getRegVar()->getDeclare();
486         while (topdcl->getAliasDeclare() != NULL)
487             topdcl = topdcl->getAliasDeclare();
488 
489         MUST_BE_TRUE(topdcl != NULL, "Top dcl was null for addr exp opnd");
490 
491         LSLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
492 
493         lr->recordRef(curBB_, false);
494         lr->markIndirectRef(true);
495         if (topdcl->getRegVar()
496             && topdcl->getRegVar()->isPhyRegAssigned()
497             && topdcl->getRegVar()->getPhyReg()->isGreg())
498         {
499             lr->setPreAssigned(true);
500         }
501         if (topdcl->getRegFile() == G4_INPUT)
502         {
503             BBVector[curBB_->getId()]->setRefInput(true);
504         }
505     }
506 }
507 
linearScanMarkReferencesInInst(INST_LIST_ITER inst_it)508 void LinearScanRA::linearScanMarkReferencesInInst(INST_LIST_ITER inst_it)
509 {
510     auto inst = (*inst_it);
511 
512     // Scan dst
513     G4_Operand* dst = inst->getDst();
514     if (dst)
515     {
516         linearScanMarkReferencesInOpnd(dst, false, inst->isCall());
517     }
518 
519     // Scan srcs
520     for (int i = 0, nSrcs = inst->getNumSrc(); i < nSrcs; i++)
521     {
522         G4_Operand* src = inst->getSrc(i);
523 
524         if (src)
525         {
526             linearScanMarkReferencesInOpnd(src, inst->isEOT(), inst->isCall());
527         }
528     }
529 }
530 
linearScanMarkReferences(unsigned int & numRowsEOT)531 void LinearScanRA::linearScanMarkReferences(unsigned int& numRowsEOT)
532 {
533     // Iterate over all BBs
534     for (auto curBB : kernel.fg)
535     {
536         curBB_ = curBB;
537         // Iterate over all insts
538         for (INST_LIST_ITER inst_it = curBB->begin(), inst_end = curBB->end(); inst_it != inst_end; ++inst_it)
539         {
540             G4_INST* curInst = (*inst_it);
541 
542             if (curInst->isPseudoKill() ||
543                 curInst->isLifeTimeEnd())
544             {
545                 if (curInst->isLifeTimeEnd())
546                 {
547                     linearScanMarkReferencesInInst(inst_it);
548                 }
549                 continue;
550             }
551 
552             if (curInst->isEOT() && kernel.fg.builder->hasEOTGRFBinding())
553             {
554                 numRowsEOT += curInst->getSrc(0)->getTopDcl()->getNumRows();
555 
556                 if (curInst->isSplitSend() && !curInst->getSrc(1)->isNullReg())
557                 {
558                     // both src0 and src1 have to be >=r112
559                     numRowsEOT += curInst->getSrc(1)->getTopDcl()->getNumRows();
560                 }
561             }
562 
563             linearScanMarkReferencesInInst(inst_it);
564         }
565 
566         if (BBVector[curBB->getId()]->hasBackEdgeIn()
567             || curBB->getId() == 0)
568         {
569             for (unsigned i = 0; i < kernel.Declares.size(); i++)
570             {
571                 G4_Declare* dcl = kernel.Declares[i];
572                 if (dcl->getAliasDeclare() != NULL)
573                 {
574                     continue;
575                 }
576 
577                 if (dcl->getRegFile() != G4_GRF && dcl->getRegFile() != G4_INPUT)
578                 {
579                     continue;
580                 }
581 
582                 LSLiveRange* lr = gra.getSafeLSLR(dcl);
583                 if (lr == nullptr)
584                 {
585                     continue;
586                 }
587 
588                 if (!l.isEmptyLiveness() &&
589                     l.isLiveAtEntry(curBB, dcl->getRegVar()->getId()))
590                 {
591                     lr->recordRef(curBB, true);
592                 }
593             }
594         }
595     }
596 
597     getGlobalDeclares();
598 }
599 
isLiveRangeGlobal() const600 bool LSLiveRange::isLiveRangeGlobal() const
601 {
602     if (numRefsInFG > 1 ||
603         topdcl->isOutput() == true ||
604         (topdcl->getRegVar()
605         && topdcl->getRegVar()->isPhyRegAssigned()
606         && topdcl->getRegVar()->getPhyReg()->isGreg()))
607     {
608         return true;
609     }
610 
611     return false;
612 }
613 
getGlobalDeclares()614 void LinearScanRA::getGlobalDeclares()
615 {
616     for (G4_Declare* dcl : kernel.Declares)
617     {
618         const LSLiveRange* lr = gra.getSafeLSLR(dcl);
619 
620         if (lr && lr->isLiveRangeGlobal())
621         {
622             globalDeclares.push_back(dcl);
623         }
624     }
625 
626     return;
627 }
628 
markBackEdges()629 void LinearScanRA::markBackEdges()
630 {
631     unsigned numBBId = (unsigned)kernel.fg.size();
632     BBVector.resize(numBBId);
633 
634     for (auto curBB : kernel.fg)
635     {
636         BBVector[curBB->getId()] = new (mem)G4_BB_LS(curBB);
637     }
638 
639     for (auto curBB : kernel.fg)
640     {
641         for (auto succBB : curBB->Succs)
642         {
643             if (curBB->getId() >= succBB->getId())
644             {
645                 BBVector[succBB->getId()]->setBackEdgeIn(true);
646                 BBVector[curBB->getId()]->setBackEdgeOut(true);
647             }
648         }
649     }
650 
651     if (!kernel.fg.isReducible())
652     {
653         //Find exit BB of SCC and map it to the head BB of SCC
654         SCCAnalysis SCCFinder(kernel.fg);
655         SCCFinder.run();
656         for (auto iter = SCCFinder.SCC_begin(), iterEnd = SCCFinder.SCC_end(); iter != iterEnd; ++iter)
657         {
658             auto&& anSCC = *iter;
659             std::unordered_set<G4_BB*> SCCSucc; // any successor BB of the SCC
660             G4_BB* headBB = anSCC.getEarliestBB();
661             for (auto BI = anSCC.body_begin(), BIEnd = anSCC.body_end(); BI != BIEnd; ++BI)
662             {
663                 G4_BB* bb = *BI;
664                 for (auto succ : bb->Succs)
665                 {
666                     if (!anSCC.isMember(succ))
667                     {
668                         SCCSucc.insert(succ);
669                     }
670                 }
671             }
672             for (auto exitBB : SCCSucc) //map the exitBB to the head of the SCC
673             {
674                 loopHeadExitMap[headBB].push_back(exitBB);
675             }
676         }
677     }
678     else
679     {
680         //Find exit BB of loop and map it to the head BB of loop
681         for (auto&& iter : kernel.fg.naturalLoops)
682         {
683             auto&& backEdge = iter.first;
684             G4_BB* headBB = backEdge.second;
685             const std::set<G4_BB*>& loopBody = iter.second;
686 
687             for (auto block : loopBody)
688             {
689                 for (auto succBB : block->Succs)
690                 {
691                     if (loopBody.find(succBB) == loopBody.end())
692                     {
693                         G4_BB* exitBB = succBB;
694 
695                         unsigned latchBBId = (backEdge.first)->getId();
696                         unsigned exitBBId = succBB->getId();
697                         if (exitBBId < latchBBId &&
698                             succBB->Succs.size() == 1)
699                         {
700                             exitBB = succBB->Succs.front();
701                         }
702 
703                         loopHeadExitMap[headBB].push_back(exitBB); //map the exitBB to the head of the loop
704                     }
705                 }
706             }
707         }
708     }
709 }
710 
createLiveIntervals()711 void LinearScanRA::createLiveIntervals()
712 {
713     for (auto dcl : gra.kernel.Declares)
714     {
715         // Mark those physical registers busy that are declared with Output attribute
716         // The live interval will gurantee they are not reused.
717         if (dcl->isOutput() &&
718             dcl->isInput())
719         {
720             pregs->markPhyRegs(dcl);
721         }
722 
723         if (dcl->getAliasDeclare() != NULL)
724         {
725             continue;
726         }
727         LSLiveRange*lr = new (mem)LSLiveRange();
728         gra.setLSLR(dcl, lr);
729         allocForbiddenVector(lr);
730     }
731 }
732 
preRAAnalysis()733 void LinearScanRA::preRAAnalysis()
734 {
735     int numGRF = kernel.getNumRegTotal();
736 
737     // Clear LSLiveRange* computed preRA
738     gra.clearLocalLiveRanges();
739 
740     createLiveIntervals();
741 
742     markBackEdges();
743     // Mark references made to decls
744     linearScanMarkReferences(numRowsEOT);
745 
746     // Check whether pseudo_kill/lifetime.end are only references
747     // for their respective variables. Remove them if so. Doing this
748     // helps reduce number of variables in symbol table increasing
749     // changes of skipping global RA.
750     removeUnrequiredLifetimeOps();
751 
752     numRegLRA = numGRF;
753 
754     unsigned int reservedGRFNum = builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum);
755     bool hasStackCall = kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc();
756     if (hasStackCall || reservedGRFNum || builder.getOption(vISA_Debug))
757     {
758         std::vector<unsigned int> forbiddenRegs;
759         unsigned int stackCallRegSize = hasStackCall ? gra.kernel.numReservedABIGRF() : 0;
760         getForbiddenGRFs(forbiddenRegs, kernel, stackCallRegSize, 0, reservedGRFNum);
761         for (unsigned int i = 0, size = forbiddenRegs.size(); i < size; i++)
762         {
763             unsigned int regNum = forbiddenRegs[i];
764             pregs->setGRFUnavailable(regNum); //un-available will always be there, if it's conflict with input or pre-assigned, it's still un-available.
765         }
766     }
767     else
768     {
769         pregs->setSimpleGRFAvailable(true);
770         const Options* opt = builder.getOptions();
771         if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) != VISA_3D ||
772             opt->getOption(vISA_enablePreemption) ||
773             opt->getOption(vISA_ReserveR0))
774         {
775             pregs->setR0Forbidden();
776         }
777 
778         if (opt->getOption(vISA_enablePreemption))
779         {
780             pregs->setR1Forbidden();
781         }
782     }
783     return;
784 }
785 
getCalleeSaveRegisters()786 void LinearScanRA::getCalleeSaveRegisters()
787 {
788     unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
789     unsigned int numCalleeSaveRegs = builder.kernel.getNumCalleeSaveRegs();
790 
791     gra.calleeSaveRegs.resize(numCalleeSaveRegs, false);
792     gra.calleeSaveRegCount = 0;
793 
794     G4_Declare* dcl = builder.kernel.fg.pseudoVCEDcl;
795     LSLiveRange* lr = gra.getLSLR(dcl);
796     const bool* forbidden = lr->getForbidden();
797     unsigned int startCalleeSave = builder.kernel.getCallerSaveLastGRF() + 1;
798     unsigned int endCalleeSave = startCalleeSave + builder.kernel.getNumCalleeSaveRegs() - 1;
799     for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
800     {
801         if (forbidden[i])
802         {
803             if (i >= startCalleeSave && i < endCalleeSave)
804             {
805                 gra.calleeSaveRegs[i - callerSaveNumGRF] = true;
806                 gra.calleeSaveRegCount++;
807             }
808         }
809     }
810 }
811 
getCallerSaveRegisters()812 void LinearScanRA::getCallerSaveRegisters()
813 {
814     unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
815 
816     for (BB_LIST_ITER it = builder.kernel.fg.begin(); it != builder.kernel.fg.end(); ++it)
817     {
818         if ((*it)->isEndWithFCall())
819         {
820             gra.callerSaveRegsMap[(*it)].resize(callerSaveNumGRF, false);
821             gra.retRegsMap[(*it)].resize(callerSaveNumGRF, false);
822             unsigned callerSaveRegCount = 0;
823             G4_INST* callInst = (*it)->back();
824             ASSERT_USER((*it)->Succs.size() == 1, "fcall basic block cannot have more than 1 successor");
825             G4_Declare* dcl = builder.kernel.fg.fcallToPseudoDclMap[callInst->asCFInst()].VCA->getRegVar()->getDeclare();
826             LSLiveRange* lr = gra.getLSLR(dcl);
827 
828             const bool* forbidden = lr->getForbidden();
829             unsigned int startCalleeSave = 1;
830             unsigned int endCalleeSave = startCalleeSave + builder.kernel.getCallerSaveLastGRF();
831             for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
832             {
833                 if (forbidden[i])
834                 {
835                     if (i >= startCalleeSave && i < endCalleeSave)
836                     {
837                         gra.callerSaveRegsMap[(*it)][i] = true;
838                         callerSaveRegCount++;
839                     }
840                 }
841             }
842 
843             //ret
844             const bool* rRegs = lr->getRetGRFs();
845             if (rRegs != nullptr)
846             {
847                 for (unsigned i = 0; i < builder.kernel.getNumRegTotal(); i++)
848                 {
849                     if (rRegs[i])
850                     {
851                         if (i >= startCalleeSave && i < endCalleeSave)
852                         {
853                             gra.retRegsMap[(*it)][i] = true;
854                         }
855                     }
856                 }
857             }
858 
859             gra.callerSaveRegCountMap[(*it)] = callerSaveRegCount;
860         }
861     }
862 }
863 
getSaveRestoreRegister()864 void LinearScanRA::getSaveRestoreRegister()
865 {
866     if (!builder.getIsKernel())
867     {
868         getCalleeSaveRegisters();
869     }
870     getCallerSaveRegisters();
871 }
872 
873 /*
874  * Calculate the last lexcial ID of executed instruction if the function is called
875  */
calculateFuncLastID()876 void LinearScanRA::calculateFuncLastID()
877 {
878     funcLastLexID.resize(kernel.fg.sortedFuncTable.size());
879     for (unsigned i = 0; i < kernel.fg.sortedFuncTable.size(); i++)
880     {
881         funcLastLexID[i] = 0;
882     }
883 
884     for (auto func : kernel.fg.sortedFuncTable)
885     {
886         unsigned fid = func->getId();
887 
888         if (fid == UINT_MAX)
889         {
890             // entry kernel
891             continue;
892         }
893 
894         funcLastLexID[fid] = func->getExitBB()->back()->getLexicalId() * 2;
895         for (auto&& callee : func->getCallees())
896         {
897             if (funcLastLexID[fid] < funcLastLexID[callee->getId()])
898             {
899                 funcLastLexID[fid] = funcLastLexID[callee->getId()];
900             }
901         }
902     }
903 }
904 
linearScanRA()905 int LinearScanRA::linearScanRA()
906 {
907     if (kernel.fg.getIsStackCallFunc())
908     {
909         // Allocate space to store Frame Descriptor
910         nextSpillOffset += 32;
911         scratchOffset += 32;
912     }
913 
914     std::list<LSLiveRange*> spillLRs;
915     int iterator = 0;
916     uint32_t GRFSpillFillCount = 0;
917     bool hasStackCall = kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc();
918     int globalScratchOffset = kernel.getInt32KernelAttr(Attributes::ATTR_SpillMemOffset);
919     bool useScratchMsgForSpill = !hasStackCall && (globalScratchOffset < (int)(SCRATCH_MSG_LIMIT * 0.6));
920     bool enableSpillSpaceCompression = builder.getOption(vISA_SpillSpaceCompression);
921     do {
922         spillLRs.clear();
923         funcCnt = 0;
924         std::vector<LSLiveRange*> eotLiveIntervals;
925         inputIntervals.clear();
926         setLexicalID();
927         calculateFuncLastID();
928 
929 #ifdef DEBUG_VERBOSE_ON
930         COUT_ERROR << "=============  ITERATION: " << iterator << "============" << std::endl;
931 #endif
932 
933         //Input
934         PhyRegsLocalRA initPregs = (*pregs);
935         calculateInputIntervalsGlobal(initPregs);
936 #ifdef DEBUG_VERBOSE_ON
937         COUT_ERROR << "===== printInputLiveIntervalsGlobal============" << kernel.getName() << std::endl;
938         printInputLiveIntervalsGlobal();
939 #endif
940 
941         liveThroughIntervals.clear();
942         globalLiveIntervals.clear();
943         preAssignedLiveIntervals.clear();
944         eotLiveIntervals.clear();
945         unsigned latestLexID = 0;
946 
947         for (auto bb : kernel.fg)
948         {
949             calculateLiveIntervalsGlobal(bb, globalLiveIntervals, eotLiveIntervals);
950             latestLexID = bb->back()->getLexicalId() * 2;
951         }
952 
953         if (liveThroughIntervals.size())
954         {
955             for (auto lr : liveThroughIntervals)
956             {
957                 globalLiveIntervals.insert(globalLiveIntervals.begin(), lr);
958             }
959         }
960 
961 #ifdef DEBUG_VERBOSE_ON
962         COUT_ERROR << "===== globalLiveIntervals============" << std::endl;
963         printLiveIntervals(globalLiveIntervals);
964 #endif
965 
966         if (eotLiveIntervals.size())
967         {
968             assignEOTLiveRanges(builder, eotLiveIntervals);
969             for (auto lr : eotLiveIntervals)
970             {
971                 preAssignedLiveIntervals.push_back(lr);
972             }
973         }
974 #ifdef DEBUG_VERBOSE_ON
975         COUT_ERROR << "===== preAssignedLiveIntervals============" << std::endl;
976         printLiveIntervals(preAssignedLiveIntervals);
977 #endif
978         PhyRegsManager pregManager(initPregs, doBCR);
979         globalLinearScan ra(gra, &l, globalLiveIntervals, &preAssignedLiveIntervals, inputIntervals, pregManager,
980             mem, numRegLRA, numRowsEOT, latestLexID,
981             doBCR, highInternalConflict);
982         if (!ra.runLinearScan(builder, globalLiveIntervals, spillLRs))
983         {
984             undoLinearScanRAAssignments();
985             return VISA_FAILURE;
986         }
987 
988         if (spillLRs.size())
989         {
990             if (iterator == 0 &&
991                 enableSpillSpaceCompression &&
992                 kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D &&
993                 !(kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc()))
994             {
995                 unsigned int spillSize = 0;
996                 for (auto lr : spillLRs)
997                 {
998                     spillSize += lr->getTopDcl()->getByteSize();
999                 }
1000                 if ((spillSize * 1.5) < (SCRATCH_MSG_LIMIT - nextSpillOffset))
1001                 {
1002                     enableSpillSpaceCompression = false;
1003                 }
1004             }
1005 
1006             SpillManagerGRF spillGRF(gra,
1007                 nextSpillOffset,
1008                 l.getNumSelectedVar(),
1009                 &l,
1010                 &spillLRs,
1011                 enableSpillSpaceCompression,
1012                 useScratchMsgForSpill,
1013                 builder.avoidDstSrcOverlap());
1014 
1015             spillGRF.spillLiveRanges(&kernel);
1016             nextSpillOffset = spillGRF.getNextOffset();;
1017             scratchOffset = std::max(scratchOffset, spillGRF.getNextScratchOffset());
1018 #ifdef DEBUG_VERBOSE_ON
1019             COUT_ERROR << "===== printSpillLiveIntervals============" << std::endl;
1020             printSpillLiveIntervals(spillLRs);
1021 #endif
1022             for (auto lr : spillLRs)
1023             {
1024                 GRFSpillFillCount += lr->getNumRefs();
1025             }
1026 
1027             // update jit metadata information for spill
1028             if (auto jitInfo = builder.getJitInfo())
1029             {
1030                 jitInfo->isSpill = nextSpillOffset > 0;
1031                 jitInfo->hasStackcalls = kernel.fg.getHasStackCalls();
1032 
1033                 if (builder.kernel.fg.frameSizeInOWord != 0) {
1034                     // jitInfo->spillMemUsed is the entire visa stack size. Consider the caller/callee
1035                     // save size if having caller/callee save
1036                     // globalScratchOffset in unit of byte, others in Oword
1037                     //
1038                     //                               vISA stack
1039                     //  globalScratchOffset     -> ---------------------
1040                     //  FIXME: should be 0-based   |  spill            |
1041                     //                             |                   |
1042                     //  calleeSaveAreaOffset    -> ---------------------
1043                     //                             |  callee save      |
1044                     //  callerSaveAreaOffset    -> ---------------------
1045                     //                             |  caller save      |
1046                     //  paramOverflowAreaOffset -> ---------------------
1047                     jitInfo->spillMemUsed =
1048                         builder.kernel.fg.frameSizeInOWord * 16;
1049 
1050                     // reserve spillMemUsed #bytes before 8kb boundary
1051                     kernel.getGTPinData()->setScratchNextFree(8 * 1024 - kernel.getGTPinData()->getNumBytesScratchUse());
1052                 }
1053                 else {
1054                     jitInfo->spillMemUsed = nextSpillOffset;
1055                     kernel.getGTPinData()->setScratchNextFree(nextSpillOffset);
1056                 }
1057                 jitInfo->numGRFSpillFill = GRFSpillFillCount;
1058             }
1059             undoLinearScanRAAssignments();
1060         }
1061 
1062         if (builder.getOption(vISA_RATrace))
1063         {
1064             std::cout << "\titeration: " << iterator << "\n";
1065             std::cout << "\t\tnextSpillOffset: " << nextSpillOffset << "\n";
1066             std::cout << "\t\tGRFSpillFillCount: " << GRFSpillFillCount << "\n";
1067         }
1068 
1069         auto underSpillThreshold = [this](int numSpill, int asmCount)
1070         {
1071             int threshold = std::min(builder.getOptions()->getuInt32Option(vISA_AbortOnSpillThreshold), 200u);
1072             return (numSpill * 200) < (threshold * asmCount);
1073         };
1074 
1075         int instNum = 0;
1076         for (auto bb : kernel.fg)
1077         {
1078             instNum += (int)bb->size();
1079         }
1080         if (GRFSpillFillCount && builder.getOption(vISA_AbortOnSpill) && !underSpillThreshold(GRFSpillFillCount, instNum))
1081         {
1082             // update jit metadata information
1083             if (auto jitInfo = builder.getJitInfo())
1084             {
1085                 jitInfo->isSpill = true;
1086                 jitInfo->spillMemUsed = 0;
1087                 jitInfo->numAsmCount = instNum;
1088                 jitInfo->numGRFSpillFill = GRFSpillFillCount;
1089             }
1090 
1091             // Early exit when -abortonspill is passed, instead of
1092             // spending time inserting spill code and then aborting.
1093             return VISA_SPILL;
1094         }
1095 
1096         iterator++;
1097     } while (spillLRs.size() && iterator < MAXIMAL_ITERATIONS);
1098 
1099     if (spillLRs.size())
1100     {
1101         std::stringstream spilledVars;
1102         for (auto dcl : kernel.Declares)
1103         {
1104             if (dcl->isSpilled() && dcl->getRegFile() == G4_GRF)
1105             {
1106                 spilledVars << dcl->getName() << "\t";
1107             }
1108         }
1109 
1110         MUST_BE_TRUE(false,
1111             "ERROR: " << kernel.getNumRegTotal() - builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum)
1112             << " GRF registers are NOT enough to compile kernel " << kernel.getName() << "!"
1113             << " The maximum register pressure in the kernel is higher"
1114             << " than the available physical registers in hardware (even"
1115             << " with spill code)."
1116             << " Please consider rewriting the kernel."
1117             << " Compiling with the symbolic register option and inspecting the"
1118             << " spilled registers may help in determining the region of high pressure.\n"
1119             << "The spilling virtual registers are as follows: "
1120             << spilledVars.str());
1121 
1122         spillLRs.clear();
1123         return VISA_FAILURE;
1124     }
1125 
1126     if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
1127     {
1128         getSaveRestoreRegister();
1129         unsigned localSpillAreaOwordSize = ROUND(scratchOffset, 64) / 16;
1130         gra.addSaveRestoreCode(localSpillAreaOwordSize);
1131     }
1132     return VISA_SUCCESS;
1133 }
1134 
doLinearScanRA()1135 int LinearScanRA::doLinearScanRA()
1136 {
1137     if (builder.getOption(vISA_RATrace))
1138     {
1139         std::cout << "--Global linear Scan RA--\n";
1140     }
1141     //Initial pregs which will be used in the preRAAnalysis
1142     PhyRegsLocalRA phyRegs(&builder, kernel.getNumRegTotal());
1143     pregs = &phyRegs;
1144     preRAAnalysis();
1145 
1146     int success = linearScanRA();
1147 
1148     if (success == VISA_SUCCESS)
1149     {
1150         kernel.setRAType(RA_Type::GLOBAL_LINEAR_SCAN_RA);
1151     }
1152 
1153     return success;
1154 }
1155 
undoLinearScanRAAssignments()1156 void LinearScanRA::undoLinearScanRAAssignments()
1157 {
1158     // Undo all assignments made by local RA
1159     for (auto dcl : kernel.Declares)
1160     {
1161         LSLiveRange* lr = gra.getLSLR(dcl);
1162         if (lr != NULL)
1163         {
1164             if (lr->getAssigned() == true)
1165             {
1166                 // Undo the assignment
1167                 lr->setAssigned(false);
1168                 if (lr->getTopDcl()->getRegFile() != G4_INPUT &&
1169                     !lr->getPreAssigned())
1170                 {
1171                     lr->getTopDcl()->getRegVar()->resetPhyReg();
1172                 }
1173                 lr->resetPhyReg();
1174             }
1175             lr->setActiveLR(false);
1176             lr->setFirstRef(NULL, 0);
1177             lr->setLastRef(NULL, 0);
1178             lr->clearForbiddenGRF(kernel.getNumRegTotal());
1179             lr->setPushed(false);
1180             if (kernel.getOption(vISA_GenerateDebugInfo))
1181             {
1182                 auto lrInfo = kernel.getKernelDebugInfo()->getLiveIntervalInfo(lr->getTopDcl(), false);
1183                 if (lrInfo)
1184                     lrInfo->clearLiveIntervals();
1185             }
1186         }
1187     }
1188 }
1189 
setPreAssignedLR(LSLiveRange * lr,std::vector<LSLiveRange * > & preAssignedLiveIntervals)1190 void LinearScanRA::setPreAssignedLR(LSLiveRange* lr, std::vector<LSLiveRange*> & preAssignedLiveIntervals)
1191 {
1192     int subreg = 0;
1193     G4_VarBase* reg = lr->getPhyReg(subreg);
1194     unsigned regnum = lr->getTopDcl()->getRegVar()->getPhyReg()->asGreg()->getRegNum();
1195     if (reg == nullptr)
1196     {
1197         unsigned int subReg = lr->getTopDcl()->getRegVar()->getPhyRegOff();
1198         unsigned int subRegInWord = subReg * lr->getTopDcl()->getRegVar()->getDeclare()->getElemSize() / 2;
1199 
1200         lr->setPhyReg(builder.phyregpool.getGreg(regnum), subRegInWord);
1201     }
1202     lr->setAssigned(true);
1203 
1204     //Pre assigned registers may overlap the unavailable registers
1205     lr->setUseUnAvailableReg(isUseUnAvailableRegister(regnum, lr->getTopDcl()->getNumRows()));
1206 
1207     //Insert into preAssgined live intervals
1208     //If the pre-assigned register is marked as unavailable, not join the live range
1209     //FIXME: What about partial overlap?
1210     if (std::find(preAssignedLiveIntervals.begin(), preAssignedLiveIntervals.end(), lr) == preAssignedLiveIntervals.end())
1211     {
1212         preAssignedLiveIntervals.push_back(lr);
1213     }
1214 
1215     return;
1216 }
1217 
setDstReferences(G4_BB * bb,INST_LIST_ITER inst_it,G4_Declare * dcl,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1218 void LinearScanRA::setDstReferences(G4_BB* bb, INST_LIST_ITER inst_it, G4_Declare *dcl, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1219 {
1220     G4_INST* curInst = (*inst_it);
1221     LSLiveRange* lr = gra.getLSLR(dcl);
1222 
1223     if (!lr && dcl->getRegFile() == G4_GRF) //The new variables generated by spill/fill, mark reference should handle it
1224     {
1225         lr = CreateLocalLiveRange(dcl);
1226     }
1227 
1228     if (dcl->isLiveIn() && dcl->isOutput() && !lr->isGRFRegAssigned())
1229     {
1230         if (!lr->isPushedToIntervalList())
1231         {
1232             lr->setFirstRef(curInst, 0);
1233             lr->setLastRef(curInst, lastInstLexID * 2 + 1);
1234             liveThroughIntervals.push_back(lr);
1235             lr->setPushed(true);
1236         }
1237         return;
1238     }
1239 
1240     if (lr == nullptr ||
1241         (dcl->getRegFile() == G4_INPUT && dcl != kernel.fg.builder->getStackCallArg() && dcl != kernel.fg.builder->getStackCallRet())||
1242         (lr->isGRFRegAssigned() && (!dcl->getRegVar()->isGreg())))  //ARF
1243     {
1244         return;
1245     }
1246 
1247     if (dcl == kernel.fg.builder->getStackCallArg())
1248     {
1249         if (stackCallArgLR == nullptr)
1250         {
1251             lr = new (mem)LSLiveRange();
1252             stackCallArgLR = lr;
1253             lr->setTopDcl(dcl);
1254             allocForbiddenVector(lr);
1255         }
1256         else
1257         {
1258             lr = stackCallArgLR;
1259         }
1260     }
1261     else if (dcl == kernel.fg.builder->getStackCallRet())
1262     {
1263         if (stackCallRetLR == nullptr)
1264         {
1265             lr = new (mem)LSLiveRange();
1266             stackCallRetLR = lr;
1267             lr->setTopDcl(dcl);
1268             allocForbiddenVector(lr);
1269         }
1270         else
1271         {
1272             lr = stackCallRetLR;
1273         }
1274     }
1275     // Check whether local LR is a candidate
1276     if (lr->isGRFRegAssigned() == false)
1277     {
1278         if (!lr->isPushedToIntervalList())
1279         {
1280             liveIntervals.push_back(lr);
1281             lr->setPushed(true);
1282         }
1283 
1284         unsigned int startIdx;
1285         if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1286         {
1287             lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1288         }
1289         lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1290     }
1291     else if (dcl->getRegVar()->getPhyReg()->isGreg()) //Assigned already and is GRF
1292     { //Such as stack call varaibles
1293         unsigned int startIdx;
1294         if (!lr->isPushedToIntervalList())
1295         {
1296             if (!curInst->isFCall())
1297             {
1298                 liveIntervals.push_back(lr);
1299             }
1300             lr->setPushed(true);
1301 
1302             //Mark live range as assigned
1303             setPreAssignedLR(lr, preAssignedLiveIntervals);
1304         }
1305 
1306         if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1307         {
1308             lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1309         }
1310         lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1311     }
1312 
1313     if (lr->isEOT() && std::find(eotLiveIntervals.begin(), eotLiveIntervals.end(), lr) == eotLiveIntervals.end())
1314     {
1315         eotLiveIntervals.push_back(lr);
1316     }
1317 
1318     return;
1319 }
1320 
setSrcReferences(G4_BB * bb,INST_LIST_ITER inst_it,int srcIdx,G4_Declare * dcl,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1321 void LinearScanRA::setSrcReferences(G4_BB* bb, INST_LIST_ITER inst_it, int srcIdx, G4_Declare* dcl, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1322 {
1323     G4_INST* curInst = (*inst_it);
1324     LSLiveRange* lr = gra.getLSLR(dcl);
1325 
1326     if (!lr && dcl->getRegFile() == G4_GRF)
1327     {
1328         lr = CreateLocalLiveRange(dcl);
1329     }
1330 
1331     if (dcl->isLiveIn() && dcl->isOutput() && !lr->isGRFRegAssigned())
1332     {
1333         if (!lr->isPushedToIntervalList())
1334         {
1335             lr->setFirstRef(curInst, 0);
1336             lr->setLastRef(curInst, lastInstLexID * 2 + 1);
1337             liveThroughIntervals.push_back(lr);
1338             lr->setPushed(true);
1339         }
1340         return;
1341     }
1342 
1343     if (lr == nullptr ||
1344         (dcl->getRegFile() == G4_INPUT && dcl != kernel.fg.builder->getStackCallRet() && dcl != kernel.fg.builder->getStackCallArg()) ||
1345         (lr->isGRFRegAssigned() && (!dcl->getRegVar()->isGreg())))  //ARF
1346     {
1347         return;
1348     }
1349 
1350     if (!lr->isPushedToIntervalList())
1351     {
1352         liveIntervals.push_back(lr);
1353         lr->setPushed(true);
1354         gra.addUndefinedDcl(dcl);
1355 
1356         unsigned int startIdx;
1357         if (lr->getFirstRef(startIdx) == NULL && startIdx == 0)
1358         {  //Since we scan from front to end, not referenced before means not defined.
1359 
1360             if (lr->isGRFRegAssigned() && dcl->getRegVar()->getPhyReg()->isGreg())
1361             {
1362                 lr->setFirstRef(nullptr, 1);
1363                 setPreAssignedLR(lr, preAssignedLiveIntervals);
1364             }
1365             else //Not pre-asssigned, temp
1366             {
1367                 lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1368             }
1369         }
1370     }
1371 
1372     lr->setLastRef(curInst, curInst->getLexicalId() * 2);
1373 
1374     if ((builder.WaDisableSendSrcDstOverlap() &&
1375         ((curInst->isSend() && srcIdx == 0) ||
1376             (curInst->isSplitSend() && srcIdx == 1)))
1377         || (curInst->isDpas() && srcIdx == 1)  //For DPAS, as part of same instruction, src1 should not have overlap with dst.
1378         || (builder.avoidDstSrcOverlap() && curInst->getDst() != NULL && hasDstSrcOverlapPotential(curInst->getDst(), curInst->getSrc(srcIdx)->asSrcRegRegion()))
1379         )
1380     {
1381         lr->setLastRef(curInst, curInst->getLexicalId() * 2 + 1);
1382     }
1383 
1384     if (lr->isEOT() && std::find(eotLiveIntervals.begin(), eotLiveIntervals.end(), lr) == eotLiveIntervals.end())
1385     {
1386         eotLiveIntervals.push_back(lr);
1387     }
1388 
1389     return;
1390 }
1391 
generateInputIntervals(G4_Declare * topdcl,G4_INST * inst,std::vector<uint32_t> & inputRegLastRef,PhyRegsLocalRA & initPregs,bool avoidSameInstOverlap)1392 void LinearScanRA::generateInputIntervals(G4_Declare *topdcl, G4_INST* inst, std::vector<uint32_t> &inputRegLastRef, PhyRegsLocalRA& initPregs, bool avoidSameInstOverlap)
1393 {
1394     unsigned int instID = inst->getLexicalId();
1395     G4_RegVar* var = topdcl->getRegVar();
1396     unsigned int regNum = var->getPhyReg()->asGreg()->getRegNum();
1397     unsigned int regOff = var->getPhyRegOff();
1398     unsigned int idx = regNum * numEltPerGRF<Type_UW>() +
1399         (regOff * topdcl->getElemSize()) / G4_WSIZE + topdcl->getWordSize() - 1;
1400 
1401     unsigned int numWords = topdcl->getWordSize();
1402     for (int i = numWords - 1; i >= 0; --i, --idx)
1403     {
1404         if ((inputRegLastRef[idx] == UINT_MAX || inputRegLastRef[idx] < instID) &&
1405             initPregs.isGRFAvailable(idx / numEltPerGRF<Type_UW>()))
1406         {
1407             inputRegLastRef[idx] = instID;
1408             if (avoidSameInstOverlap)
1409             {
1410                 inputIntervals.push_front(new (mem)LSInputLiveRange(idx, instID * 2 + 1));
1411             }
1412             else
1413             {
1414                 inputIntervals.push_front(new (mem)LSInputLiveRange(idx, instID * 2));
1415             }
1416 
1417             if (kernel.getOptions()->getOption(vISA_GenerateDebugInfo))
1418             {
1419                 updateDebugInfo(kernel, topdcl, 0, inst->getCISAOff());
1420             }
1421         }
1422     }
1423 
1424     initPregs.markPhyRegs(topdcl);
1425 
1426     return;
1427 }
1428 
1429 // Generate the input intervals for current BB.
1430 // The input live ranges either live through current BB or killed by current BB.
1431 // So, it's enough we check the live out of the BB and the BB it's self
calculateInputIntervalsGlobal(PhyRegsLocalRA & initPregs)1432 void LinearScanRA::calculateInputIntervalsGlobal(PhyRegsLocalRA &initPregs)
1433 {
1434     int numGRF = kernel.getNumRegTotal();
1435     std::vector<uint32_t> inputRegLastRef(numGRF * numEltPerGRF<Type_UW>(), UINT_MAX);
1436     G4_INST* lastInst = nullptr;
1437 
1438     for (BB_LIST_RITER bb_it = kernel.fg.rbegin(), bb_rend = kernel.fg.rend();
1439         bb_it != bb_rend;
1440         bb_it++)
1441     {
1442         G4_BB* bb = (*bb_it);
1443 
1444         //@ the end of BB
1445         if (BBVector[bb->getId()]->hasBackEdgeOut())
1446         {
1447             for (auto dcl : globalDeclares)
1448             {
1449                 if (dcl->getAliasDeclare() != NULL ||
1450                     dcl->isSpilled())
1451                     continue;
1452 
1453                 if (dcl->getRegFile() == G4_INPUT &&
1454                     dcl->getRegVar()->isGreg() && //Filter out the architecture registers
1455                     dcl->isOutput() == false &&  //Input and out should be marked as unavailable
1456                     !builder.isPreDefArg(dcl) &&  //Not stack call associated variables
1457                     l.isLiveAtExit(bb, dcl->getRegVar()->getId()))
1458                 {
1459                     MUST_BE_TRUE(dcl->getRegVar()->isPhyRegAssigned(), "Input variable has no pre-assigned physical register");
1460                     generateInputIntervals(dcl, bb->getInstList().back(), inputRegLastRef, initPregs, false);
1461                 }
1462             }
1463         }
1464 
1465         //@BB
1466         for (INST_LIST_RITER inst_it = bb->rbegin(), inst_rend = bb->rend();
1467             inst_it != inst_rend;
1468             inst_it++)
1469         {
1470             G4_INST* curInst = (*inst_it);
1471             G4_Declare* topdcl = NULL;
1472 
1473             if (lastInst == nullptr)
1474             {
1475                 lastInst = curInst;
1476             }
1477             // scan dst operand (may be unnecessary but added for safety)
1478             if (curInst->getDst() != NULL)
1479             {
1480                 // Scan dst
1481                 G4_DstRegRegion* dst = curInst->getDst();
1482                 topdcl = GetTopDclFromRegRegion(dst);
1483 
1484                 if (topdcl &&
1485                     topdcl->getRegFile() == G4_INPUT &&
1486                     topdcl->getRegVar()->isGreg() &&
1487                     topdcl->isOutput() == false &&
1488                     !builder.isPreDefArg(topdcl))
1489                 {
1490                     generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1491                 }
1492             }
1493 
1494             // Scan src operands
1495             for (int i = 0, nSrcs = curInst->getNumSrc(); i < nSrcs; i++)
1496             {
1497                 G4_Operand* src = curInst->getSrc(i);
1498 
1499                 if (src == nullptr || src->isNullReg())
1500                 {
1501                     continue;
1502                 }
1503 
1504                 if (src->getTopDcl())
1505                 {
1506                     topdcl = GetTopDclFromRegRegion(src);
1507 
1508                     if (topdcl && topdcl->getRegFile() == G4_INPUT &&
1509                         (topdcl->getRegVar()->isGreg()) &&
1510                         topdcl->isOutput() == false &&
1511                         !builder.isPreDefArg(topdcl))
1512                     {
1513                         // Check whether it is input
1514                         if (builder.avoidDstSrcOverlap() &&
1515                             curInst->getDst() != NULL &&
1516                             hasDstSrcOverlapPotential(curInst->getDst(), src->asSrcRegRegion()))
1517                         {
1518                             generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, true);
1519                         }
1520                         else
1521                         {
1522                             if (l.isLiveAtEntry(bb, topdcl->getRegVar()->getId()))
1523                             {
1524                                 generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1525                             }
1526                             else //Not capture by liveness analysis
1527                             {
1528                                 generateInputIntervals(topdcl, lastInst, inputRegLastRef, initPregs, false);
1529                             }
1530                         }
1531                     }
1532                 }
1533                 else if (src->isAddrExp())
1534                 {
1535                     G4_AddrExp* addrExp = src->asAddrExp();
1536 
1537                     topdcl = addrExp->getRegVar()->getDeclare();
1538                     while (topdcl->getAliasDeclare() != NULL)
1539                         topdcl = topdcl->getAliasDeclare();
1540 
1541                     MUST_BE_TRUE(topdcl != NULL, "Top dcl was null for addr exp opnd");
1542 
1543                     if (topdcl->getRegFile() == G4_INPUT &&
1544                         topdcl->getRegVar()->isGreg() &&
1545                         topdcl->isOutput() == false &&
1546                         !builder.isPreDefArg(topdcl))
1547                     {
1548                         generateInputIntervals(topdcl, curInst, inputRegLastRef, initPregs, false);
1549                     }
1550                 }
1551             }
1552         }
1553     }
1554 
1555     return;
1556 }
1557 
1558 //
1559 //@ the entry of BB
1560 //
calculateLiveInIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals)1561 void LinearScanRA::calculateLiveInIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals)
1562 {
1563     //FIXME: The complexity is "block_num * declare_num"
1564     std::vector<LSLiveRange*> preAssignedLiveIntervals;
1565 
1566     for (auto dcl: globalDeclares)
1567     {
1568         if (dcl->getAliasDeclare() != NULL ||
1569             dcl->getRegFile() == G4_INPUT ||
1570             dcl->isSpilled())
1571         {
1572             continue;
1573         }
1574         LSLiveRange* lr = gra.getLSLR(dcl);
1575         if (lr && !l.isEmptyLiveness() &&
1576             l.isLiveAtEntry(bb, dcl->getRegVar()->getId()))  //Live in current BB
1577         {
1578             if (!lr->isPushedToIntervalList())
1579             {
1580                 if (lr->isGRFRegAssigned() && dcl->getRegVar()->isGreg())
1581                 {
1582                     setPreAssignedLR(lr, preAssignedLiveIntervals);
1583                 }
1584                 else
1585                 {
1586                     liveIntervals.push_back(lr);
1587                 }
1588                 lr->setPushed(true);
1589             }
1590 
1591             unsigned curIdx = 0;
1592             if (lr->getFirstRef(curIdx) == NULL && curIdx == 0) //not referenced before, assigned or not assigned?
1593             {
1594                 lr->setFirstRef((*bb->begin()), (*bb->begin())->getLexicalId() * 2);
1595             }
1596         }
1597         else //Live in the exist BB of a loop
1598         {
1599             //Extend the live ranges of live in variable of exit BB of the loop to the head of loop.
1600             std::map<G4_BB*, std::vector<G4_BB*>>::iterator loopHEIt = loopHeadExitMap.find(bb);
1601             if (loopHEIt != loopHeadExitMap.end()) //If current BB is the head of a loop
1602             {
1603                 for (auto exitBB : loopHeadExitMap[bb]) //for all exit BBs.
1604                 {
1605                     if (lr && !l.isEmptyLiveness() &&
1606                         l.isLiveAtEntry(exitBB, dcl->getRegVar()->getId()))
1607                     {
1608                         if (!lr->isPushedToIntervalList()) //Is the interval pushed or not
1609                         {
1610                             if (lr->isGRFRegAssigned() && dcl->getRegVar()->isGreg())
1611                             {
1612                                 setPreAssignedLR(lr, preAssignedLiveIntervals);
1613                             }
1614                             else
1615                             {
1616                                 liveIntervals.push_back(lr);
1617                             }
1618                             lr->setPushed(true);
1619                         }
1620 
1621                         unsigned curIdx = 0;
1622                         if (lr->getFirstRef(curIdx) == NULL && curIdx == 0) //not referenced before
1623                         {
1624                             lr->setFirstRef((*bb->begin()), (*bb->begin())->getLexicalId() * 2);
1625                         }
1626                     }
1627                 }
1628             }
1629         }
1630     }
1631 
1632     if (preAssignedLiveIntervals.size()&& bb->getId() == 0) //Should happen in the entry BB
1633     {
1634         liveIntervals.insert(liveIntervals.begin(), preAssignedLiveIntervals.begin(), preAssignedLiveIntervals.end());
1635     }
1636 
1637     return;
1638 }
1639 
calculateCurrentBBLiveIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1640 void LinearScanRA::calculateCurrentBBLiveIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1641 {
1642     for (INST_LIST_ITER inst_it = bb->begin(), bbend = bb->end();
1643         inst_it != bbend;
1644         inst_it++)
1645     {
1646         G4_INST* curInst = (*inst_it);
1647         G4_Declare* topdcl = NULL;
1648 
1649         if (curInst->isPseudoKill() ||
1650             curInst->isLifeTimeEnd() ||
1651             curInst->isLabel())
1652         {
1653             continue;
1654         }
1655 
1656         if (curInst->isCall() == true)
1657         {
1658             const char* name = kernel.fg.builder->getNameString(kernel.fg.builder->mem, 32, "SCALL_%d", funcCnt++);
1659             G4_Declare* scallDcl = kernel.fg.builder->createDeclareNoLookup(name, G4_GRF, 1, 1, Type_UD);
1660             LSLiveRange* lr = CreateLocalLiveRange(scallDcl);
1661 
1662             liveIntervals.push_back(lr);
1663             lr->setPushed(true);
1664             lr->setFirstRef(curInst, curInst->getLexicalId() * 2);
1665 
1666             FuncInfo* callee = bb->getCalleeInfo();
1667             unsigned int funcId = callee->getId();
1668             lr->setLastRef(curInst, funcLastLexID[funcId]);
1669             lr->setIsCallSite(true);
1670         }
1671 
1672         if (curInst->isFCall())
1673         {
1674             G4_FCALL* fcall = kernel.fg.builder->getFcallInfo(curInst);
1675             G4_Declare* arg = kernel.fg.builder->getStackCallArg();
1676             G4_Declare* ret = kernel.fg.builder->getStackCallRet();
1677             MUST_BE_TRUE(fcall != NULL, "fcall info not found");
1678 
1679             uint16_t retSize = fcall->getRetSize();
1680             uint16_t argSize = fcall->getArgSize();
1681             if (ret && retSize > 0 && ret->getRegVar())
1682             {
1683                 LSLiveRange* stackCallRetLR = new (mem)LSLiveRange();
1684                 stackCallRetLR->setTopDcl(ret);
1685                 allocForbiddenVector(stackCallRetLR);
1686                 stackCallRetLR->setFirstRef(curInst, curInst->getLexicalId() * 2);
1687                 liveIntervals.push_back(stackCallRetLR);
1688                 stackCallRetLR->setPushed(true);
1689             }
1690             if (arg && argSize > 0 && arg->getRegVar())
1691             {
1692                 assert(stackCallArgLR);
1693                 stackCallArgLR->setLastRef(curInst, curInst->getLexicalId() * 2 - 1); //Minus one so that arguments will not be spilled
1694                 stackCallArgLR = nullptr;
1695             }
1696         }
1697 
1698         if (curInst->isFReturn())
1699         {
1700             uint16_t retSize = kernel.fg.builder->getRetVarSize();
1701             if (retSize && stackCallRetLR)
1702             {
1703                 stackCallRetLR->setLastRef(curInst, curInst->getLexicalId() * 2);
1704                 stackCallRetLR = nullptr;
1705             }
1706         }
1707 
1708         // Scan srcs
1709         for (int i = 0, nSrcs = curInst->getNumSrc(); i < nSrcs; i++)
1710         {
1711             G4_Operand* src = curInst->getSrc(i);
1712 
1713             if (src == nullptr || src->isNullReg())
1714             {
1715                 continue;
1716             }
1717 
1718             if (src && src->isSrcRegRegion())
1719             {
1720                 if (src->asSrcRegRegion()->isIndirect())
1721                 {
1722                     auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(src->getBase()->asRegVar());
1723                     for (auto pt : *pointsToSet)
1724                     {
1725                         G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
1726 
1727                         setSrcReferences(bb, inst_it, i, dcl, liveIntervals, eotLiveIntervals);
1728                     }
1729                 }
1730                 else
1731                 {
1732                     // Scan all srcs
1733                     topdcl = GetTopDclFromRegRegion(src);
1734                     if (topdcl)
1735                     {
1736                         setSrcReferences(bb, inst_it, i, topdcl, liveIntervals, eotLiveIntervals);
1737                     }
1738                 }
1739             }
1740         }
1741 
1742         // Scan dst
1743         G4_DstRegRegion* dst = curInst->getDst();
1744         if (dst)
1745         {
1746             if (dst->isIndirect())
1747             {
1748                 auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(dst->getBase()->asRegVar());
1749                 for (auto pt : *pointsToSet)
1750                 {
1751                     G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
1752 
1753                     setDstReferences(bb, inst_it, dcl, liveIntervals, eotLiveIntervals);
1754                 }
1755             }
1756             else
1757             {
1758                 topdcl = GetTopDclFromRegRegion(dst);
1759                 if (topdcl)
1760                 {
1761                     setDstReferences(bb, inst_it, topdcl, liveIntervals, eotLiveIntervals);
1762                 }
1763             }
1764         }
1765     }
1766 
1767     return;
1768 }
1769 
calculateLiveOutIntervals(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals)1770 void LinearScanRA::calculateLiveOutIntervals(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals)
1771 {
1772     for (auto dcl : globalDeclares)
1773     {
1774         if (dcl->getAliasDeclare() != NULL ||
1775             dcl->getRegFile() == G4_INPUT ||
1776             dcl->isSpilled())
1777             continue;
1778 
1779         LSLiveRange* lr = gra.getLSLR(dcl);
1780         if (lr &&
1781             l.isLiveAtExit(bb, dcl->getRegVar()->getId()))
1782         {
1783             lr->setLastRef(bb->getInstList().back(), bb->getInstList().back()->getLexicalId() * 2 + 1);
1784         }
1785     }
1786 
1787     return;
1788 }
1789 
1790 //
1791 // Live intervals:
1792 // 1. not input variables
1793 // 2. variables without assigned value: normal Intervals.
1794 // 3. variables without assigned value, without define: wired, added by front end. Such as cmp f1.0,  v11, v11. @BB only
1795 // 4. variables which are pre-defined with registers: such as stack call pre-defined varaibles. @BB only
1796 // 5. variables which are pre-defined but will not be assigned with registers: such as %null.  exclusive
1797 // 6. variables which are assigned in previuos region (BB, BBs or function, ..).  //@entry BB
1798 // 7. live in of region: pre-assigned, or not.
1799 // 8. live out of region: set the last reference.
1800 //
calculateLiveIntervalsGlobal(G4_BB * bb,std::vector<LSLiveRange * > & liveIntervals,std::vector<LSLiveRange * > & eotLiveIntervals)1801 void LinearScanRA::calculateLiveIntervalsGlobal(G4_BB* bb, std::vector<LSLiveRange*>& liveIntervals, std::vector<LSLiveRange*>& eotLiveIntervals)
1802 {
1803     //@ the entry of BB
1804     if (bb->getId() == 0 ||
1805         BBVector[bb->getId()]->hasBackEdgeIn())
1806     {
1807         calculateLiveInIntervals(bb, liveIntervals);
1808     }
1809 
1810     //@ BB
1811     calculateCurrentBBLiveIntervals(bb, liveIntervals, eotLiveIntervals);
1812 
1813     //@ the exit of BB
1814     if (BBVector[bb->getId()]->hasBackEdgeOut())
1815     {
1816         calculateLiveOutIntervals(bb, liveIntervals);
1817     }
1818 
1819     return;
1820 }
1821 
printLiveIntervals(std::vector<LSLiveRange * > & liveIntervals)1822 void LinearScanRA::printLiveIntervals(std::vector<LSLiveRange*>& liveIntervals)
1823 {
1824     for (auto lr : liveIntervals)
1825     {
1826         unsigned int start, end;
1827 
1828         lr->getFirstRef(start);
1829         lr->getLastRef(end);
1830 
1831         std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
1832         std::cout << std::endl;
1833     }
1834 
1835     std::cout << std::endl;
1836 }
1837 
printSpillLiveIntervals(std::list<LSLiveRange * > & liveIntervals)1838 void LinearScanRA::printSpillLiveIntervals(std::list<LSLiveRange*>& liveIntervals)
1839 {
1840     for (auto lr : liveIntervals)
1841     {
1842         unsigned int start, end;
1843 
1844         lr->getFirstRef(start);
1845         lr->getLastRef(end);
1846 
1847         std::cout << lr->getTopDcl()->getName() << "(" << start << ", " << end << ", " << lr->getTopDcl()->getByteSize() << ")";
1848         std::cout << std::endl;
1849     }
1850 
1851     std::cout << std::endl;
1852 }
1853 
printInputLiveIntervalsGlobal()1854 void LinearScanRA::printInputLiveIntervalsGlobal()
1855 {
1856     COUT_ERROR << std::endl << "Input Live intervals " << std::endl;
1857 
1858     for (std::list<LSInputLiveRange*>::iterator it = inputIntervals.begin();
1859         it != inputIntervals.end();
1860         it++)
1861     {
1862         unsigned int regWordIdx, lrEndIdx, regNum, subRegInWord;
1863 
1864         LSInputLiveRange* lr = (*it);
1865 
1866         regWordIdx = lr->getRegWordIdx();
1867         regNum = regWordIdx / numEltPerGRF<Type_UW>();
1868         subRegInWord = regWordIdx % numEltPerGRF<Type_UW>();
1869         lrEndIdx = lr->getLrEndIdx();
1870 
1871         COUT_ERROR << "r" << regNum << "." << subRegInWord << " " << lrEndIdx;
1872         COUT_ERROR << std::endl;
1873     }
1874 
1875     COUT_ERROR << std::endl;
1876 }
1877 
printLiveInterval(LSLiveRange * lr,bool assign)1878 static inline void printLiveInterval(LSLiveRange* lr, bool assign)
1879 {
1880     int startregnum, endregnum, startsregnum=0, endsregnum;
1881     G4_VarBase* op;
1882     op = lr->getPhyReg(startsregnum);
1883 
1884     startregnum = endregnum = op->asGreg()->getRegNum();
1885     endsregnum = startsregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
1886 
1887     if (lr->getTopDcl()->getNumRows() > 1) {
1888         endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1889 
1890         if (lr->getTopDcl()->getWordSize() > 0)
1891         {
1892             endsregnum = lr->getTopDcl()->getWordSize() % numEltPerGRF<Type_UW>() - 1;
1893             if (endsregnum < 0) endsregnum = 15;
1894         }
1895         else
1896             endsregnum = 15; // last word in GRF
1897     }
1898     if (assign)
1899     {
1900         COUT_ERROR << "Assigned physical register to ";
1901     }
1902     else
1903     {
1904         COUT_ERROR << "Free physical register of ";
1905     }
1906     COUT_ERROR << lr->getTopDcl()->getName() <<
1907         " (r" << startregnum << "." << startsregnum << ":w - " <<
1908         "r" << endregnum << "." << endsregnum << ":w)" << std::endl;
1909 
1910     return;
1911 }
1912 
globalLinearScan(GlobalRA & g,LivenessAnalysis * l,std::vector<LSLiveRange * > & lv,std::vector<LSLiveRange * > * assignedLiveIntervals,std::list<LSInputLiveRange *,std_arena_based_allocator<LSInputLiveRange * >> & inputLivelIntervals,PhyRegsManager & pregMgr,Mem_Manager & memmgr,unsigned int numReg,unsigned int numEOT,unsigned int lastLexID,bool bankConflict,bool internalConflict)1913 globalLinearScan::globalLinearScan(GlobalRA& g, LivenessAnalysis* l, std::vector<LSLiveRange*>& lv, std::vector<LSLiveRange*>* assignedLiveIntervals,
1914     std::list<LSInputLiveRange*, std_arena_based_allocator<LSInputLiveRange*>>& inputLivelIntervals,
1915     PhyRegsManager& pregMgr, Mem_Manager& memmgr,
1916     unsigned int numReg, unsigned int numEOT, unsigned int lastLexID, bool bankConflict,
1917     bool internalConflict)
1918     : gra(g)
1919     , builder(g.builder)
1920     , mem(memmgr)
1921     , pregManager(pregMgr)
1922     , liveIntervals(lv)
1923     , preAssignedIntervals(assignedLiveIntervals)
1924     , inputIntervals(inputLivelIntervals)
1925     , numRowsEOT(numEOT)
1926     , lastLexicalID(lastLexID)
1927     , numRegLRA(numReg)
1928     , doBankConflict(bankConflict)
1929     , highInternalConflict(internalConflict)
1930 {
1931     startGRFReg = 0;
1932     activeGRF.resize(g.kernel.getNumRegTotal());
1933     for (auto lr : inputLivelIntervals)
1934     {
1935         unsigned int regnum = lr->getRegWordIdx() / numEltPerGRF<Type_UW>();
1936         activeGRF[regnum].activeInput.push_back(lr);
1937     }
1938 }
1939 
getCalleeSaveGRF(std::vector<unsigned int> & regNum,G4_Kernel * kernel)1940 void globalLinearScan::getCalleeSaveGRF(std::vector<unsigned int>& regNum, G4_Kernel* kernel)
1941 {
1942     unsigned int startCallerSave = kernel->calleeSaveStart();
1943     unsigned int endCallerSave = startCallerSave + kernel->getNumCalleeSaveRegs();
1944 
1945     for (auto active_it = active.begin();
1946         active_it != active.end();
1947         active_it++)
1948     {
1949         LSLiveRange* lr = (*active_it);
1950 
1951         G4_VarBase* op;
1952         int startsregnum = 0;
1953         op = lr->getPhyReg(startsregnum);
1954         unsigned startregnum = op->asGreg()->getRegNum();
1955         unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1956 
1957         for (unsigned i = startregnum; i <= endregnum; i++)
1958         {
1959             if (i >= startCallerSave && i <= endCallerSave)
1960             {
1961                 regNum.push_back(i);
1962             }
1963         }
1964     }
1965 
1966     return;
1967 }
1968 
getCallerSaveGRF(std::vector<unsigned int> & regNum,std::vector<unsigned int> & retRegNum,G4_Kernel * kernel)1969 void globalLinearScan::getCallerSaveGRF(
1970     std::vector<unsigned int>& regNum,
1971     std::vector<unsigned int>& retRegNum,
1972     G4_Kernel* kernel)
1973 {
1974     unsigned int startCalleeSave = 1;
1975     unsigned int endCalleeSave = startCalleeSave + kernel->getCallerSaveLastGRF();
1976 
1977     for (auto active_it = active.begin();
1978         active_it != active.end();
1979         active_it++)
1980     {
1981         LSLiveRange* lr = (*active_it);
1982         G4_Declare* dcl = lr->getTopDcl();
1983 
1984         if (!builder.kernel.fg.isPseudoVCEDcl(dcl) &&
1985             !builder.isPreDefArg(dcl))
1986         {
1987             G4_VarBase* op;
1988             int startsregnum = 0;
1989             op = lr->getPhyReg(startsregnum);
1990             unsigned startregnum = op->asGreg()->getRegNum();
1991             unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
1992 
1993             for (unsigned i = startregnum; i <= endregnum; i++)
1994             {
1995                 if (i >= startCalleeSave && i < endCalleeSave)
1996                 {
1997                     if (builder.isPreDefRet(dcl))
1998                     {
1999                         retRegNum.push_back(i);
2000                     }
2001                     else
2002                     {
2003                         regNum.push_back(i);
2004                     }
2005                 }
2006             }
2007         }
2008     }
2009 
2010     for (auto inputlr : inputIntervals)
2011     {
2012         unsigned int regnum = inputlr->getRegWordIdx() / numEltPerGRF<Type_UW>();
2013         std::vector<unsigned int>::iterator it = std::find(regNum.begin(), regNum.end(), regnum);
2014         if (it == regNum.end())
2015         {
2016             if (regnum >= startCalleeSave && regnum < endCalleeSave)
2017             {
2018                 regNum.push_back(regnum);
2019             }
2020         }
2021     }
2022 
2023 }
2024 
isUseUnAvailableRegister(uint32_t startReg,uint32_t regNum)2025 bool LinearScanRA::isUseUnAvailableRegister(uint32_t startReg, uint32_t regNum)
2026 {
2027     for (uint32_t i = startReg; i < startReg + regNum; ++i)
2028     {
2029         if (!pregs->isGRFAvailable(i))
2030         {
2031             return true;
2032         }
2033     }
2034 
2035     return false;
2036 }
2037 
assignEOTLiveRanges(IR_Builder & builder,std::vector<LSLiveRange * > & liveIntervals)2038 bool LinearScanRA::assignEOTLiveRanges(IR_Builder& builder, std::vector<LSLiveRange*>& liveIntervals)
2039 {
2040 #ifdef DEBUG_VERBOSE_ON
2041     COUT_ERROR << "--------------------------------- " << std::endl;
2042 #endif
2043     uint32_t nextEOTGRF = numRegLRA - numRowsEOT;
2044     for (auto lr : liveIntervals)
2045     {
2046         assert(lr->isEOT());
2047         G4_Declare* dcl = lr->getTopDcl();
2048         G4_Greg* phyReg = builder.phyregpool.getGreg(nextEOTGRF);
2049         dcl->getRegVar()->setPhyReg(phyReg, 0);
2050         lr->setPhyReg(phyReg, 0);
2051         lr->setAssigned(true);
2052         lr->setUseUnAvailableReg(isUseUnAvailableRegister(nextEOTGRF, dcl->getNumRows()));
2053         nextEOTGRF += dcl->getNumRows();
2054         if (nextEOTGRF > numRegLRA)
2055         {
2056             assert(0);
2057         }
2058 #ifdef DEBUG_VERBOSE_ON
2059         printLiveInterval(lr, true);
2060 #endif
2061     }
2062 
2063     return true;
2064 }
2065 
updateCallSiteLiveIntervals(LSLiveRange * callSiteLR)2066 void globalLinearScan::updateCallSiteLiveIntervals(LSLiveRange* callSiteLR)
2067 {
2068     unsigned lastIdx = 0;
2069     G4_INST* inst = callSiteLR->getLastRef(lastIdx);
2070 
2071     for (auto lr: active)
2072     {
2073         unsigned curLastIdx;
2074         lr->getLastRef(curLastIdx);
2075         if (curLastIdx < lastIdx)
2076         {
2077             lr->setLastRef(inst, lastIdx);
2078         }
2079     }
2080 
2081     for (auto inputlr : inputIntervals)
2082     {
2083         unsigned curLastIdx = inputlr->getLrEndIdx();
2084         if (curLastIdx < lastIdx)
2085         {
2086             inputlr->setLrEndIdx(lastIdx);
2087         }
2088     }
2089 
2090     return;
2091 }
2092 
runLinearScan(IR_Builder & builder,std::vector<LSLiveRange * > & liveIntervals,std::list<LSLiveRange * > & spillLRs)2093 bool globalLinearScan::runLinearScan(IR_Builder& builder, std::vector<LSLiveRange*>& liveIntervals, std::list<LSLiveRange*>& spillLRs)
2094 {
2095     unsigned int idx = 0;
2096     bool allocateRegResult = false;
2097 
2098 #ifdef DEBUG_VERBOSE_ON
2099     COUT_ERROR << "--------------------------------- " <<  std::endl;
2100 #endif
2101 
2102     for (auto lr : liveIntervals)
2103     {
2104         G4_Declare* dcl = lr->getTopDcl();
2105 
2106         lr->getFirstRef(idx);
2107         if (!lr->isEOT() && !lr->getAssigned())
2108         {
2109             //Add forbidden for preAssigned registers
2110             for (auto preAssginedLI : *preAssignedIntervals)
2111             {
2112                 if (builder.kernel.fg.isPseudoVCADcl(lr->getTopDcl()) &&
2113                     (builder.isPreDefRet(preAssginedLI->getTopDcl()) ||
2114                      builder.isPreDefArg(preAssginedLI->getTopDcl())))
2115                 {
2116                     continue;
2117                 }
2118 
2119                 unsigned preFirstIdx, preLastIdx;
2120                 preAssginedLI->getFirstRef(preFirstIdx);
2121                 preAssginedLI->getLastRef(preLastIdx);
2122 
2123                 unsigned lastIdx = 0;
2124                 lr->getLastRef(lastIdx);
2125 
2126                 if (!(lastIdx < preFirstIdx || preLastIdx < idx))
2127                 {
2128                     G4_VarBase* preg;
2129                     int subregnumword;
2130 
2131                     preg = preAssginedLI->getPhyReg(subregnumword);
2132                     unsigned reg = preg->asGreg()->getRegNum();
2133                     unsigned rowNum = preAssginedLI->getTopDcl()->getNumRows();
2134 
2135                     for (unsigned k = 0; k < rowNum; k++)
2136                     {
2137                         lr->addForbidden(reg + k);
2138                     }
2139                 }
2140             }
2141         }
2142 
2143 #ifdef DEBUG_VERBOSE_ON
2144         COUT_ERROR << "-------- IDX: " << idx << "---------" << std::endl;
2145 #endif
2146 
2147         //Expire the live ranges ended befoe idx
2148         expireGlobalRanges(idx);
2149         expireInputRanges(idx);
2150 
2151         if (lr->isCallSite())
2152         {
2153             updateCallSiteLiveIntervals(lr);
2154             continue;
2155         }
2156 
2157         if (builder.kernel.fg.isPseudoVCADcl(dcl))
2158         {
2159             std::vector<unsigned int> callerSaveRegs;
2160             std::vector<unsigned int> regRegs;
2161             getCallerSaveGRF(callerSaveRegs, regRegs, &gra.kernel);
2162             for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
2163             {
2164                 unsigned int callerSaveReg = callerSaveRegs[i];
2165                 lr->addForbidden(callerSaveReg);
2166             }
2167             for (unsigned int i = 0; i < regRegs.size(); i++)
2168             {
2169                 unsigned int callerSaveReg = regRegs[i];
2170                 if (lr->getRetGRFs() == nullptr)
2171                 {
2172                     allocRetRegsVector(lr);
2173                 }
2174                 lr->addRetRegs(callerSaveReg);
2175             }
2176             continue;
2177         }
2178         else if (builder.kernel.fg.isPseudoVCEDcl(dcl))
2179         {
2180             calleeSaveLR = lr;
2181             continue;
2182         }
2183         else if (!lr->getAssigned())
2184         {
2185             if (dcl == gra.getOldFPDcl())
2186             {
2187                 std::vector<unsigned int> callerSaveRegs;
2188                 std::vector<unsigned int> regRegs;
2189                 getCallerSaveGRF(callerSaveRegs, regRegs, &gra.kernel);
2190                 for (unsigned int i = 0; i < callerSaveRegs.size(); i++)
2191                 {
2192                     unsigned int callerSaveReg = callerSaveRegs[i];
2193                     lr->addForbidden(callerSaveReg);
2194                 }
2195                 for (unsigned int i = 0; i < regRegs.size(); i++)
2196                 {
2197                     unsigned int callerSaveReg = regRegs[i];
2198                     if (lr->getRetGRFs() == nullptr)
2199                     {
2200                         allocRetRegsVector(lr);
2201                     }
2202                     lr->addRetRegs(callerSaveReg);
2203                 }
2204             }
2205 
2206             //startGRFReg = 0;
2207             allocateRegResult = allocateRegsLinearScan(lr, builder);
2208 #ifdef DEBUG_VERBOSE_ON
2209             if (allocateRegResult)
2210             {
2211                 printLiveInterval(lr, true);
2212             }
2213 #endif
2214         }
2215         else
2216         {
2217             allocateRegResult = true;
2218             int startregnum, subregnum, endsregnum;
2219             G4_VarBase* op;
2220             op = lr->getPhyReg(subregnum);
2221 
2222             startregnum = op->asGreg()->getRegNum();
2223             endsregnum = subregnum + (lr->getTopDcl()->getNumElems() * lr->getTopDcl()->getElemSize() / 2) - 1;
2224             int nrows = 0;
2225             int lastRowSize = 0;
2226             int size = lr->getSizeInWords();
2227             LinearScanRA::getRowInfo(size, nrows, lastRowSize);
2228 
2229             if (!lr->isUseUnAvailableReg())
2230             {
2231                 if ((unsigned)size >= numEltPerGRF<Type_UW>())
2232                 {
2233                     if (size % numEltPerGRF<Type_UW>() == 0)
2234                     {
2235                         pregManager.getAvaialableRegs()->setGRFBusy(startregnum, lr->getTopDcl()->getNumRows());
2236                     }
2237                     else
2238                     {
2239                         pregManager.getAvaialableRegs()->setGRFBusy(startregnum, lr->getTopDcl()->getNumRows() - 1);
2240                         pregManager.getAvaialableRegs()->setWordBusy(startregnum + lr->getTopDcl()->getNumRows() - 1, 0, lastRowSize);
2241                     }
2242                 }
2243                 else
2244                 {
2245                     pregManager.getAvaialableRegs()->setWordBusy(startregnum, subregnum, size);
2246                 }
2247             }
2248         }
2249 
2250         if (allocateRegResult)
2251         {
2252             updateGlobalActiveList(lr);
2253         }
2254         else //Spill
2255         {
2256             if (spillFromActiveList(lr, spillLRs))
2257             {
2258                 //Fixme: get the start GRF already, can allocate immediately
2259                 allocateRegResult = allocateRegsLinearScan(lr, builder);
2260                 if (!allocateRegResult)
2261                 {
2262 #ifdef DEBUG_VERBOSE_ON
2263                     COUT_ERROR << "Failed assigned physical register to " << lr->getTopDcl()->getName() << ", rows :" << lr->getTopDcl()->getNumRows() << std::endl;
2264                     printActives();
2265 #endif
2266                     return false;
2267                 }
2268                 else
2269                 {
2270                     updateGlobalActiveList(lr);
2271 #ifdef DEBUG_VERBOSE_ON
2272                     printLiveInterval(lr, true);
2273 #endif
2274                 }
2275             }
2276             else
2277             {
2278 #ifdef DEBUG_VERBOSE_ON
2279                 COUT_ERROR << "Failed to spill registers for " << lr->getTopDcl()->getName() << ", rows :" << lr->getTopDcl()->getNumRows() << std::endl;
2280                 printActives();
2281 #endif
2282                 spillLRs.push_back(lr);
2283             }
2284         }
2285     }
2286 
2287     int totalGRFNum = builder.kernel.getNumRegTotal();
2288     for (int i = 0; i < totalGRFNum; i++)
2289     {
2290         activeGRF[i].activeLV.clear();
2291         activeGRF[i].activeInput.clear();
2292     }
2293 
2294     //Assign the registers for the live out ones
2295     expireAllActive();
2296 
2297     if (builder.kernel.getOptions()->getOption(vISA_GenerateDebugInfo))
2298     {
2299         updateDebugInfo(builder.kernel, liveIntervals);
2300     }
2301 
2302     return true;
2303 }
2304 
updateGlobalActiveList(LSLiveRange * lr)2305 void globalLinearScan::updateGlobalActiveList(LSLiveRange* lr)
2306 {
2307     bool done = false;
2308     unsigned int newlr_end;
2309 
2310     lr->getLastRef(newlr_end);
2311 
2312     for (auto active_it = active.begin();
2313         active_it != active.end();
2314         active_it++)
2315     {
2316         unsigned int end_idx;
2317         LSLiveRange* active_lr = (*active_it);
2318 
2319         active_lr->getLastRef(end_idx);
2320 
2321         if (end_idx > newlr_end)
2322         {
2323             active.insert(active_it, lr);
2324             done = true;
2325             break;
2326         }
2327     }
2328 
2329     if (done == false)
2330         active.push_back(lr);
2331 
2332 #ifdef DEBUG_VERBOSE_ON
2333     COUT_ERROR << "Add active " << lr->getTopDcl()->getName() << std::endl;
2334 #endif
2335 
2336     G4_VarBase* op;
2337     int startsregnum = 0;
2338     op = lr->getPhyReg(startsregnum);
2339     unsigned startregnum = op->asGreg()->getRegNum();
2340     unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2341     for (unsigned i = startregnum; i <= endregnum; i++)
2342     {
2343         activeGRF[i].activeLV.push_back(lr);
2344 #ifdef DEBUG_VERBOSE_ON
2345         COUT_ERROR << "Add activeGRF " << lr->getTopDcl()->getName() << " Reg: " << i << std::endl;
2346 #endif
2347     }
2348 }
2349 
insertLiveRange(std::list<LSLiveRange * > * liveIntervals,LSLiveRange * lr)2350 bool globalLinearScan::insertLiveRange(std::list<LSLiveRange*>* liveIntervals, LSLiveRange* lr)
2351 {
2352     unsigned int idx = 0;
2353     lr->getFirstRef(idx);
2354     std::list<LSLiveRange*>::iterator it = liveIntervals->begin();
2355     while (it != liveIntervals->end())
2356     {
2357         LSLiveRange* curLR = (*it);
2358         unsigned curIdx = 0;
2359         curLR->getFirstRef(curIdx);
2360         if (curIdx > idx)
2361         {
2362             liveIntervals->insert(it, lr);
2363             return true;
2364         }
2365 
2366         it++;
2367     }
2368 
2369     return false;
2370 }
2371 
canBeSpilledLR(LSLiveRange * lr)2372 bool globalLinearScan::canBeSpilledLR(LSLiveRange* lr)
2373 {
2374     if (lr->isUseUnAvailableReg())
2375     {
2376         return false;
2377     }
2378 
2379     if (lr->isEOT())
2380     {
2381         return false;
2382     }
2383 
2384     if (lr->getTopDcl() == builder.getBuiltinR0())
2385     {
2386         return false;
2387     }
2388 
2389     if (lr->isCall())
2390     {
2391         return false;
2392     }
2393 
2394     if (lr->isGRFRegAssigned())
2395     {
2396         return false;
2397     }
2398 
2399     if (lr->getTopDcl()->isSpilled())
2400     {
2401         return false;
2402     }
2403 
2404     if (lr->getTopDcl()->getRegFile() == G4_INPUT)
2405     {
2406         return false;
2407     }
2408 
2409     if (lr->getTopDcl()->getRegVar()->getId() == UNDEFINED_VAL)
2410     {
2411         return false;
2412     }
2413 
2414     if (lr->getTopDcl()->getRegVar()->isRegVarTransient() || lr->getTopDcl()->getRegVar()->isRegVarTmp())
2415     {
2416         return false;
2417     }
2418 
2419     //Stack call variables
2420     if (lr->getTopDcl() == gra.getOldFPDcl())
2421     {
2422         return false;
2423     }
2424 
2425     if (builder.kernel.fg.isPseudoVCADcl(lr->getTopDcl()) ||
2426         builder.kernel.fg.isPseudoVCEDcl(lr->getTopDcl()))
2427     {
2428         return false;
2429     }
2430 
2431     return true;
2432 }
2433 
findSpillCandidate(LSLiveRange * tlr)2434 int globalLinearScan::findSpillCandidate(LSLiveRange* tlr)
2435 {
2436     unsigned short requiredRows = tlr->getTopDcl()->getNumRows();
2437     int referenceCount = 0;
2438     int startGRF = -1;
2439     float spillCost = (float)(int)0x7FFFFFFF;
2440     unsigned lastIdxs = 1;
2441     unsigned tStartIdx = 0;
2442 
2443     tlr->getFirstRef(tStartIdx);
2444     BankAlign bankAlign = getBankAlign(tlr);
2445     for (int i = 0; i < (int)(numRegLRA - requiredRows); i++)
2446     {
2447         unsigned endIdx = 0;
2448         bool canBeFree = true;
2449         LSLiveRange* analyzedLV = nullptr;
2450 
2451         pregManager.getAvaialableRegs()->findRegisterCandiateWithAlignForward(i, bankAlign, false);
2452 
2453         // Check the following adjacent registers
2454         for (int k = i; k < i + requiredRows; k++)
2455         {
2456             if (activeGRF[k].activeInput.size() ||
2457                 tlr->getForbidden()[k])
2458             {
2459                 i = k;
2460                 canBeFree = false;
2461                 break;
2462             }
2463 
2464             if (activeGRF[k].activeLV.size())
2465             {
2466                 // There may be multiple variables take same register with different offsets
2467                 for (auto lr : activeGRF[k].activeLV)
2468                 {
2469                     if (lr == analyzedLV) // one LV may occupy multiple registers
2470                     {
2471                         continue;
2472                     }
2473 
2474                     analyzedLV = lr;
2475 
2476                     if (!canBeSpilledLR(lr) || lr->getForbidden()[k])
2477                     {
2478                         int startsregnum = 0;
2479                         G4_VarBase* op = lr->getPhyReg(startsregnum);
2480                         unsigned startregnum = op->asGreg()->getRegNum();
2481 
2482                         canBeFree = false;
2483                         i = startregnum + lr->getTopDcl()->getNumRows() - 1; // Jump to k + rows - 1 to avoid unnecessory analysis.
2484 
2485                         break;
2486                     }
2487 
2488                     int startsregnum = 0;
2489                     G4_VarBase* op = lr->getPhyReg(startsregnum);
2490                     int startregnum = op->asGreg()->getRegNum();
2491                     unsigned effectGRFNum = startregnum > i ? lr->getTopDcl()->getNumRows() : lr->getTopDcl()->getNumRows() - (i - startregnum);
2492                     lr->getLastRef(endIdx);
2493                     lastIdxs += (endIdx - tStartIdx) * effectGRFNum;
2494                     referenceCount += lr->getNumRefs();
2495                 }
2496 
2497                 if (!canBeFree)
2498                 {
2499                     break;
2500                 }
2501             }
2502             else if (pregManager.getAvaialableRegs()->isGRFAvailable(k) && !pregManager.getAvaialableRegs()->isGRFBusy(k))
2503             {
2504                 lastIdxs += lastLexicalID - tStartIdx;
2505             }
2506             else //Reserved regsiters
2507             {
2508                 i = k;
2509                 canBeFree = false;
2510                 break;
2511             }
2512         }
2513 
2514         if (canBeFree)
2515         {
2516             //Spill cost
2517             float currentSpillCost = (float)referenceCount / lastIdxs;
2518 
2519             if (currentSpillCost < spillCost)
2520             {
2521                 startGRF = i;
2522                 spillCost = currentSpillCost;
2523             }
2524         }
2525 
2526         lastIdxs = 1;
2527         referenceCount = 0;
2528     }
2529 
2530     return startGRF;
2531 }
2532 
freeSelectedRegistsers(int startGRF,LSLiveRange * tlr,std::list<LSLiveRange * > & spillLRs)2533 void globalLinearScan::freeSelectedRegistsers(int startGRF, LSLiveRange* tlr, std::list<LSLiveRange*>& spillLRs)
2534 {
2535     unsigned short requiredRows = tlr->getTopDcl()->getNumRows();
2536 #ifdef DEBUG_VERBOSE_ON
2537     COUT_ERROR << "Required GRF size for spill: " << requiredRows << std::endl;
2538 #endif
2539 
2540         //Free registers.
2541     for (int k = startGRF; k < startGRF + requiredRows; k++)
2542     {
2543 #ifdef DEBUG_VERBOSE_ON
2544         if (!activeGRF[k].activeLV.size())
2545         {
2546             COUT_ERROR << "Pick free GRF for spill: " << " GRF:" << k << std::endl;
2547         }
2548 #endif
2549 
2550         while (activeGRF[k].activeLV.size())
2551         {
2552             LSLiveRange* lr = activeGRF[k].activeLV.front();
2553 
2554             G4_VarBase* op;
2555             int startsregnum = 0;
2556             op = lr->getPhyReg(startsregnum);
2557             unsigned startregnum = op->asGreg()->getRegNum();
2558             unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2559 
2560             assert(startregnum <= (unsigned)k);
2561             assert(lr->getTopDcl()->getRegFile() != G4_INPUT);
2562 
2563             //Free from the register buckect array
2564             for (unsigned s = startregnum; s <= endregnum; s++)
2565             {
2566                 std::vector<LSLiveRange*>::iterator it = std::find(activeGRF[s].activeLV.begin(), activeGRF[s].activeLV.end(), lr);
2567                 if (it != activeGRF[s].activeLV.end())
2568                 {
2569 #ifdef DEBUG_VERBOSE_ON
2570                     COUT_ERROR << "SPILL: Free activeGRF from : " << (lr)->getTopDcl()->getName() << " GRF:" << s << std::endl;
2571 #endif
2572                     activeGRF[s].activeLV.erase(it);
2573                 }
2574             }
2575 
2576 #ifdef DEBUG_VERBOSE_ON
2577             printLiveInterval(lr, false);
2578 #endif
2579 
2580             //Free the allocated register
2581             freeAllocedRegs(lr, true);
2582 
2583             //Record spilled live range
2584             if (std::find(spillLRs.begin(), spillLRs.end(), lr) == spillLRs.end())
2585             {
2586                 spillLRs.push_back(lr);
2587             }
2588 
2589             //Remove spilled live range from active list
2590             std::list<LSLiveRange*>::iterator activeListIter = active.begin();
2591             while (activeListIter != active.end())
2592             {
2593                 std::list<LSLiveRange*>::iterator nextIt = activeListIter;
2594                 nextIt++;
2595 
2596                 if ((*activeListIter) == lr)
2597                 {
2598 #ifdef DEBUG_VERBOSE_ON
2599                     COUT_ERROR << "SPILL: Free active lr: " << (*activeListIter)->getTopDcl()->getName() << std::endl;
2600 #endif
2601                     active.erase(activeListIter);
2602                     break;
2603                 }
2604                 activeListIter = nextIt;
2605             }
2606         }
2607     }
2608 }
2609 
spillFromActiveList(LSLiveRange * tlr,std::list<LSLiveRange * > & spillLRs)2610 bool globalLinearScan::spillFromActiveList(LSLiveRange* tlr, std::list<LSLiveRange*>& spillLRs)
2611 {
2612     int startGRF = findSpillCandidate(tlr);
2613 
2614     if (startGRF == -1)
2615     {
2616 #ifdef DEBUG_VERBOSE_ON
2617         printActives();
2618 #endif
2619         return false;
2620     }
2621 
2622     freeSelectedRegistsers(startGRF, tlr, spillLRs);
2623 
2624     return true;
2625 }
2626 
expireGlobalRanges(unsigned int idx)2627 void globalLinearScan::expireGlobalRanges(unsigned int idx)
2628 {
2629     //active list is sorted in ascending order of starting index
2630 
2631     while (active.size() > 0)
2632     {
2633         unsigned int endIdx;
2634         LSLiveRange* lr = active.front();
2635 
2636         lr->getLastRef(endIdx);
2637 
2638         if (endIdx <= idx)
2639         {
2640             G4_VarBase* preg;
2641             int subregnumword, subregnum;
2642 
2643             preg = lr->getPhyReg(subregnumword);
2644 
2645             if (preg)
2646             {
2647                 subregnum = LinearScanRA::convertSubRegOffFromWords(lr->getTopDcl(), subregnumword);
2648 
2649                 // Mark the RegVar object of dcl as assigned to physical register
2650                 lr->getTopDcl()->getRegVar()->setPhyReg(preg, subregnum);
2651                 lr->setAssigned(true);
2652             }
2653 
2654 #ifdef DEBUG_VERBOSE_ON
2655             printLiveInterval(lr, false);
2656 #endif
2657             if (preg)
2658             {
2659                 unsigned startregnum = preg->asGreg()->getRegNum();
2660                 unsigned endregnum = startregnum + lr->getTopDcl()->getNumRows() - 1;
2661                 for (unsigned i = startregnum; i <= endregnum; i++)
2662                 {
2663                     std::vector<LSLiveRange*>::iterator activeListIter = activeGRF[i].activeLV.begin();
2664                     while (activeListIter != activeGRF[i].activeLV.end())
2665                     {
2666                         std::vector<LSLiveRange*>::iterator nextIt = activeListIter;
2667                         nextIt++;
2668                         if ((*activeListIter) == lr)
2669                         {
2670                             activeGRF[i].activeLV.erase(activeListIter);
2671 #ifdef DEBUG_VERBOSE_ON
2672                             COUT_ERROR << "Remove range " << lr->getTopDcl()->getName() << " from activeGRF: " << i << std::endl;
2673 #endif
2674                             break;
2675                         }
2676                         activeListIter = nextIt;
2677                     }
2678                 }
2679 
2680                 if (calleeSaveLR)
2681                 {
2682                     unsigned int startCallerSave = builder.kernel.calleeSaveStart();
2683                     unsigned int endCallerSave = startCallerSave + builder.kernel.getNumCalleeSaveRegs();
2684 
2685                     for (unsigned i = startregnum; i <= endregnum; i++)
2686                     {
2687                         if (i >= startCallerSave && i <= endCallerSave)
2688                         {
2689                             calleeSaveLR->addForbidden(i);
2690                         }
2691                     }
2692                 }
2693             }
2694 
2695             // Free physical regs marked for this range
2696             freeAllocedRegs(lr, true);
2697 
2698             // Remove range from active list
2699             active.pop_front();
2700         }
2701         else
2702         {
2703             // As soon as we find first range that ends after ids break loop
2704             break;
2705         }
2706     }
2707 }
2708 
expireInputRanges(unsigned int global_idx)2709 void globalLinearScan::expireInputRanges(unsigned int global_idx)
2710 {
2711     while (inputIntervals.size() > 0)
2712     {
2713         LSInputLiveRange* lr = inputIntervals.front();
2714         unsigned int endIdx = lr->getLrEndIdx();
2715 
2716         if (endIdx <= global_idx)
2717         {
2718             unsigned int regnum = lr->getRegWordIdx() / numEltPerGRF<Type_UW>();
2719             unsigned int subRegInWord = lr->getRegWordIdx() % numEltPerGRF<Type_UW>();
2720 
2721             // Free physical regs marked for this range
2722             pregManager.freeRegs(regnum, subRegInWord, 1, endIdx);
2723 
2724 #ifdef DEBUG_VERBOSE_ON
2725             COUT_ERROR << "Expiring input r" << regnum << "." << subRegInWord << std::endl;
2726 #endif
2727 
2728             // Remove range from inputIntervals list
2729             inputIntervals.pop_front();
2730             assert(lr == activeGRF[regnum].activeInput.front());
2731             activeGRF[regnum].activeInput.erase(activeGRF[regnum].activeInput.begin());
2732         }
2733         else
2734         {
2735             // As soon as we find first range that ends after ids break loop
2736             break;
2737         }
2738     }
2739 }
2740 
getBankAlign(LSLiveRange * lr)2741 BankAlign globalLinearScan::getBankAlign(LSLiveRange* lr)
2742 {
2743     G4_Declare* dcl = lr->getTopDcl();
2744     BankAlign bankAlign = gra.isEvenAligned(dcl) ? BankAlign::Even : BankAlign::Either;
2745 
2746     if (gra.getVarSplitPass()->isPartialDcl(lr->getTopDcl()))
2747     {
2748         // Special alignment is not needed for var split intrinsic
2749         bankAlign = BankAlign::Either;
2750     }
2751 
2752     return bankAlign;
2753 }
2754 
allocateRegsLinearScan(LSLiveRange * lr,IR_Builder & builder)2755 bool globalLinearScan::allocateRegsLinearScan(LSLiveRange* lr, IR_Builder& builder)
2756 {
2757     int regnum, subregnum;
2758     unsigned int localRABound = 0;
2759     unsigned int instID;
2760 
2761     lr->getFirstRef(instID);
2762     // Let local RA allocate only those ranges that need < 10 GRFs
2763     // Larger ranges are not many and are best left to global RA
2764     // as it can make a better judgement by considering the
2765     // spill cost.
2766     int nrows = 0;
2767     int size = lr->getSizeInWords();
2768     G4_Declare* dcl = lr->getTopDcl();
2769     G4_SubReg_Align subalign = gra.getSubRegAlign(dcl);
2770     localRABound = numRegLRA - 1;
2771 
2772     BankAlign bankAlign = getBankAlign(lr);
2773     nrows = pregManager.findFreeRegs(size,
2774         bankAlign,
2775         subalign,
2776         regnum,
2777         subregnum,
2778         startGRFReg,
2779         localRABound,
2780         instID,
2781         lr->getForbidden());
2782 
2783     if (nrows)
2784     {
2785 #ifdef DEBUG_VERBOSE_ON
2786         COUT_ERROR << lr->getTopDcl()->getName() << ":r" << regnum << "  BANK: " << (int)bankAlign << std::endl;
2787 #endif
2788         lr->setPhyReg(builder.phyregpool.getGreg(regnum), subregnum);
2789         if (!builder.getOptions()->getOption(vISA_LSFristFit))
2790         {
2791             startGRFReg = (startGRFReg + nrows) % localRABound;
2792         }
2793         else
2794         {
2795             assert(startGRFReg == 0);
2796         }
2797 
2798         return true;
2799     }
2800     else if (!builder.getOptions()->getOption(vISA_LSFristFit))
2801     {
2802         startGRFReg = 0;
2803         nrows = pregManager.findFreeRegs(size,
2804             bankAlign,
2805             subalign,
2806             regnum,
2807             subregnum,
2808             startGRFReg,
2809             localRABound,
2810             instID,
2811             lr->getForbidden());
2812         if (nrows)
2813         {
2814 #ifdef DEBUG_VERBOSE_ON
2815             COUT_ERROR << lr->getTopDcl()->getName() << ":r" << regnum << "  BANK: " << (int)bankAlign << std::endl;
2816 #endif
2817             lr->setPhyReg(builder.phyregpool.getGreg(regnum), subregnum);
2818             startGRFReg = (startGRFReg + nrows) % localRABound;
2819             return true;
2820         }
2821     }
2822 #ifdef DEBUG_VERBOSE_ON
2823     COUT_ERROR << lr->getTopDcl()->getName() << ": failed to allocate" << std::endl;
2824 #endif
2825 
2826     return false;
2827 }
2828 
findFreeMultipleRegsForward(int regIdx,BankAlign align,int & regnum,int nrows,int lastRowSize,int endReg,int instID,const bool * forbidden)2829 bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, BankAlign align, int& regnum, int nrows, int lastRowSize, int endReg, int instID, const bool* forbidden)
2830 {
2831     int foundItem = 0;
2832     int startReg = 0;
2833     int i = regIdx;
2834     int grfRows = 0;
2835     bool multiSteps = nrows > 1;
2836 
2837     if (lastRowSize % numEltPerGRF<Type_UW>() == 0)
2838     {
2839         grfRows = nrows;
2840     }
2841     else
2842     {
2843         grfRows = nrows - 1;
2844     }
2845 
2846     findRegisterCandiateWithAlignForward(i, align, multiSteps);
2847 
2848     startReg = i;
2849     while (i <= endReg + nrows - 1)
2850     {
2851         if (isGRFAvailable(i) && !forbidden[i] &&
2852             regBusyVector[i] == 0)
2853         {
2854             foundItem++;
2855         }
2856         else if (foundItem < grfRows)
2857         {
2858             foundItem = 0;
2859             i++;
2860             findRegisterCandiateWithAlignForward(i, align, multiSteps);
2861             startReg = i;
2862             continue;
2863         }
2864 
2865         if (foundItem == grfRows)
2866         {
2867             if (lastRowSize % numEltPerGRF<Type_UW>() == 0)
2868             {
2869                 regnum = startReg;
2870                 return true;
2871             }
2872             else
2873             {
2874                 if (i + 1 <= endReg + nrows - 1 &&
2875                     isGRFAvailable(i + 1) && !forbidden[i + 1] &&
2876                     (isWordBusy(i + 1, 0, lastRowSize) == false))
2877                 {
2878                     regnum = startReg;
2879                     return true;
2880                 }
2881                 else
2882                 {
2883                     foundItem = 0;
2884                     i++;
2885                     findRegisterCandiateWithAlignForward(i, align, multiSteps);
2886                     startReg = i;
2887                     continue;
2888                 }
2889             }
2890         }
2891 
2892         i++;
2893     }
2894 
2895     return false;
2896 }
2897 
findFreeSingleReg(int regIdx,int size,BankAlign align,G4_SubReg_Align subalign,int & regnum,int & subregnum,int endReg,const bool * forbidden)2898 bool PhyRegsLocalRA::findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int endReg, const bool* forbidden)
2899 {
2900     int i = regIdx;
2901     bool found = false;
2902 
2903     while (!found)
2904     {
2905         if (i > endReg) //<= works
2906             break;
2907 
2908         // Align GRF
2909         if ((align == BankAlign::Even) && (i % 2 != 0))
2910         {
2911             i++;
2912             continue;
2913         }
2914         else if ((align == BankAlign::Odd) && (i % 2 == 0))
2915         {
2916             i++;
2917             continue;
2918         }
2919         else if ((align == BankAlign::Even2GRF) && ((i % 4 >= 2)))
2920         {
2921             i++;
2922             continue;
2923         }
2924         else if ((align == BankAlign::Odd2GRF) && ((i % 4 < 2)))
2925         {
2926             i++;
2927             continue;
2928         }
2929 
2930         if (isGRFAvailable(i, 1) && !forbidden[i])
2931         {
2932             found = findFreeSingleReg(i, subalign, regnum, subregnum, size);
2933             if (found)
2934             {
2935                 return true;
2936             }
2937         }
2938         i++;
2939     }
2940 
2941     return false;
2942 }
2943 
findFreeRegs(int size,BankAlign align,G4_SubReg_Align subalign,int & regnum,int & subregnum,int startRegNum,int endRegNum,unsigned int instID,const bool * forbidden)2944 int PhyRegsManager::findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum,
2945     int startRegNum, int endRegNum, unsigned int instID, const bool* forbidden)
2946 {
2947     int nrows = 0;
2948     int lastRowSize = 0;
2949     LocalRA::getRowInfo(size, nrows, lastRowSize);
2950 
2951     int startReg = startRegNum;
2952     int endReg = endRegNum - nrows + 1;
2953 
2954     bool found = false;
2955 
2956     if (size >= (int)numEltPerGRF<Type_UW>())
2957     {
2958         found = availableRegs.findFreeMultipleRegsForward(startReg, align, regnum, nrows, lastRowSize, endReg, instID, forbidden);
2959         if (found)
2960         {
2961             subregnum = 0;
2962             if (size % numEltPerGRF<Type_UW>() == 0)
2963             {
2964                 availableRegs.setGRFBusy(regnum, nrows);
2965             }
2966             else
2967             {
2968                 availableRegs.setGRFBusy(regnum, nrows - 1);
2969                 availableRegs.setWordBusy(regnum + nrows - 1, 0, lastRowSize);
2970             }
2971         }
2972     }
2973     else
2974     {
2975         found = availableRegs.findFreeSingleReg(startReg, size, align, subalign, regnum, subregnum, endReg, forbidden);
2976         if (found)
2977         {
2978             availableRegs.setWordBusy(regnum, subregnum, size);
2979         }
2980     }
2981 
2982     if (found)
2983     {
2984         return nrows;
2985     }
2986 
2987     return 0;
2988 }
2989