1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Mem_Manager.h"
10 #include "FlowGraph.h"
11 #include "RegAlloc.h"
12 #include "GraphColor.h"
13 #include "Timer.h"
14 #include "DebugInfo.h"
15 #include "VarSplit.h"
16 
17 #include <bitset>
18 #include <climits>
19 #include <cmath>
20 #include <fstream>
21 #include <optional>
22 #include <vector>
23 
24 using namespace vISA;
25 
26 #define GRAPH_COLOR
27 
PointsToAnalysis(const DECLARE_LIST & declares,unsigned int numBB)28 PointsToAnalysis::PointsToAnalysis(const DECLARE_LIST &declares, unsigned int numBB) :
29     numBBs(numBB), numAddrs(0), indirectUses(std::make_unique<REGVAR_VECTOR[]>(numBB))
30 {
31     for (auto decl : declares)
32     {
33         //add alias check, For Alias Dcl
34         if ((decl->getRegFile() == G4_ADDRESS || decl->getRegFile() == G4_SCALAR) &&
35             decl->getAliasDeclare() == NULL)  // It is a base declaration, not alias
36         {
37             // participate liveness analysis
38             decl->getRegVar()->setId(numAddrs++);
39         }
40         else
41         {
42             decl->getRegVar()->setId(UNDEFINED_VAL);
43         }
44     }
45 
46     // assign all addr aliases the same ID as its root
47     for (auto decl : declares)
48     {
49         if ((decl->getRegFile() == G4_ADDRESS || decl->getRegFile() == G4_SCALAR) &&
50             decl->getAliasDeclare() != NULL)
51         {
52             // participate liveness analysis
53             decl->getRegVar()->setId(decl->getRegVar()->getId());
54         }
55     }
56 
57     if (numAddrs > 0)
58     {
59         for (unsigned int i = 0; i < numAddrs; i++)
60             regVars.push_back(NULL);
61 
62         for (auto decl : declares)
63         {
64             if ((decl->getRegFile() == G4_ADDRESS || decl->getRegFile() == G4_SCALAR) &&
65                 decl->getAliasDeclare() == NULL &&
66                 decl->getRegVar()->getId() != UNDEFINED_VAL)
67             {
68                 regVars[decl->getRegVar()->getId()] = decl->getRegVar();
69             }
70         }
71 
72         pointsToSets.resize(numAddrs);
73         addrPointsToSetIndex.resize(numAddrs);
74         // initially each address variable has its own points-to set
75         for (unsigned i = 0; i < numAddrs; i++)
76         {
77             addrPointsToSetIndex[i] = i;
78         }
79     }
80 }
81 
82 
83 //
84 //  A flow-insensitive algroithm to compute the register usage for indirect accesses.
85 //  The algorithm is divided into two parts:
86 //  1. We go through every basic block computing the points-to set for each adddress
87 //     variable.  This happens when we see an instruction like
88 //     mov (8) A0 &R0
89 //
90 //  2. We go through each basic block again, and for each r[A0] expression
91 //     we mark variables in A0's points-to set as used in the block
92 //
93 //  The algorithm is conservative but should work well for our inputs since
94 //  the front end pretty much always uses a fresh address variable when taking
95 //  the address of a GRF variable, wtih the exception of call-by-reference parameters
96 //  It's performed only once at the beginning of RA, at the point where all variables
97 //  are virtual and no spill code (either for address or GRF) has been inserted.
98 //
doPointsToAnalysis(FlowGraph & fg)99 void PointsToAnalysis::doPointsToAnalysis(FlowGraph& fg)
100 {
101     if (numAddrs == 0)
102     {
103         return;
104     }
105 
106     // keep a list of address taken variables
107     std::vector<G4_RegVar*> addrTakenDsts;
108     std::map<G4_RegVar*, std::vector<std::pair<G4_RegVar*, unsigned char>> > addrTakenMapping;
109     std::vector< std::pair<G4_RegVar*, unsigned char>> addrTakenVariables;
110 
111     for (G4_BB* bb : fg)
112     {
113         for (const G4_INST* inst : *bb)
114         {
115             G4_DstRegRegion* dst = inst->getDst();
116             if (dst != NULL && dst->getRegAccess() == Direct && dst->getType() != Type_UD)
117             {
118                 G4_VarBase* ptr = dst->getBase();
119 
120                 for (int i = 0; i < G4_MAX_SRCS; i++)
121                 {
122                     G4_Operand* src = inst->getSrc(i);
123                     if (src != NULL && src->isAddrExp())
124                     {
125                         int offset = 0;
126                         if (dst && !dst->isNullReg() && dst->getBase()->asRegVar()->getDeclare()->getRegFile() == G4_SCALAR)
127                         {
128                             offset = src->asAddrExp()->getOffset();
129                         }
130                         addrTakenMapping[ptr->asRegVar()].push_back(std::make_pair(src->asAddrExp()->getRegVar(), offset));
131                         addrTakenDsts.push_back(ptr->asRegVar());
132                         addrTakenVariables.push_back(std::make_pair(src->asAddrExp()->getRegVar(), offset));
133                     }
134                 }
135             }
136         }
137     }
138 
139     // first compute the points-to set for each address variable
140     for (G4_BB* bb : fg)
141     {
142         for (G4_INST* inst : *bb)
143         {
144             if (inst->isPseudoKill() || inst->isLifeTimeEnd())
145             {
146                 // No need to consider these lifetime placeholders for points2analysis
147                 continue;
148             }
149 
150             G4_DstRegRegion* dst = inst->getDst();
151             if (dst != NULL && dst->getRegAccess() == Direct && dst->getType() != Type_UD)
152             {
153                 G4_VarBase* ptr = dst->getBase();
154                 //Dst is address variable
155                 if (ptr->isRegVar() && (ptr->asRegVar()->getDeclare()->getRegFile() == G4_ADDRESS || ptr->asRegVar()->getDeclare()->getRegFile() == G4_SCALAR) &&
156                     !ptr->asRegVar()->getDeclare()->isMsgDesc())
157                 {
158 
159                     // dst is an address variable.  ExDesc A0 may be ignored since they are never used in indirect access
160                     if (inst->isMov() || inst->isPseudoAddrMovIntrinsic())
161                     {
162                         for (int i = 0; i < inst->getNumSrc(); i++)
163                         {
164                             G4_Operand* src = inst->getSrc(i);
165                             if (!src || src->isNullReg())
166                             {
167                                 continue;
168                             }
169                             if (src->isAddrExp())
170                             {
171                                 // case 1:  mov A0 &GRF
172                                 G4_RegVar* addrTaken = src->asAddrExp()->getRegVar();
173 
174                                 if (addrTaken != NULL)
175                                 {
176                                     unsigned char offset = 0;
177                                     if (ptr->asRegVar()->getDeclare()->getRegFile() == G4_SCALAR)
178                                     {
179                                         offset = src->asAddrExp()->getOffset();
180                                     }
181                                     addToPointsToSet(ptr->asRegVar(), addrTaken, offset);
182                                 }
183                             }
184                             else
185                             {
186                                 //G4_Operand* srcPtr = src->isSrcRegRegion() ? src->asSrcRegRegion()->getBase() : src;
187                                 G4_VarBase* srcPtr = src->isSrcRegRegion() ? src->asSrcRegRegion()->getBase() : nullptr;
188 
189                                 if (srcPtr && srcPtr->isRegVar() && (srcPtr->asRegVar()->getDeclare()->getRegFile() == G4_ADDRESS))
190                                 {
191                                     // case 2:  mov A0 A1
192                                     // merge the two addr's points-to set together
193                                     if (ptr->asRegVar()->getId() != srcPtr->asRegVar()->getId())
194                                     {
195                                         mergePointsToSet(srcPtr->asRegVar(), ptr->asRegVar());
196                                     }
197                                 }
198                                 else
199                                 {
200                                     // case ?: mov v1 A0
201                                     // case ?: mov A0 v1
202                                     if (srcPtr &&
203                                         srcPtr->isRegVar() &&
204                                         addrTakenMapping[srcPtr->asRegVar()].size() != 0)
205                                     {
206                                         for (int i = 0; i < (int)addrTakenMapping[srcPtr->asRegVar()].size(); i++)
207                                         {
208                                             addToPointsToSet(ptr->asRegVar(), addrTakenMapping[srcPtr->asRegVar()][i].first, addrTakenMapping[srcPtr->asRegVar()][i].second);
209                                         }
210                                     }
211                                     else
212                                     {
213                                         // case 3: mov A0 0
214                                         // Initial of address register, igore the point to analysis
215                                         // FIXME: currently, vISA don't expect mov imm value to the address register. So, 0 is treated as initialization.
216                                         // If support mov A0 imm in future, 0 may be R0.
217                                         if (!(src->isImm() && (src->asImm()->getImm() == 0)))
218                                         {
219                                             // case 4:  mov A0 V2
220                                             // conservatively assume address can point to anything
221                                             DEBUG_MSG("unexpected addr move for pointer analysis:\n");
222                                             DEBUG_EMIT(inst);
223                                             DEBUG_MSG("\n");
224                                             for (int i = 0, size = (int)addrTakenVariables.size(); i < size; i++)
225                                             {
226                                                 addToPointsToSet(ptr->asRegVar(), addrTakenVariables[i].first, addrTakenVariables[i].second);
227                                             }
228                                         }
229                                     }
230                                 }
231                             }
232                         }
233                     }
234                     else if (inst->isArithmetic())
235                     {
236                         G4_Operand* src0 = inst->getSrc(0);
237                         G4_Operand* src1 = inst->getSrc(1);
238                         bool src0addr = false;
239                         if (src0->isAddrExp())
240                         {
241                             src0addr = true;
242                         }
243                         else if (src0->isSrcRegRegion() && src0->getRegAccess() == Direct)
244                         {
245                             if (src0->isAddress())
246                             {
247                                 src0addr = true;
248                             }
249                         }
250 
251                         bool src1addr = false;
252                         if (src1->isAddrExp())
253                         {
254                             src1addr = true;
255                         }
256                         else if (src1->isSrcRegRegion() && src1->getRegAccess() == Direct)
257                         {
258                             if (src1->isAddress())
259                             {
260                                 src1addr = true;
261                             }
262                         }
263 
264                         if (src0addr ^ src1addr)
265                         {
266                             G4_Operand* src = src0addr ? src0 : src1;
267 
268                             if (src->isAddrExp())
269                             {
270                                 // case 5:  add/mul A0 &GRF src1
271                                 G4_RegVar* addrTaken = src->asAddrExp()->getRegVar();
272                                 addToPointsToSet(ptr->asRegVar(), addrTaken, 0);
273                             }
274                             else
275                             {
276                                 G4_VarBase* srcPtr = src->isSrcRegRegion() ? src->asSrcRegRegion()->getBase() : nullptr;
277                                 // case 6:  add/mul A0 A1 src1
278                                 // merge the two addr's points-to set together
279                                 if (srcPtr && (ptr->asRegVar()->getId() != srcPtr->asRegVar()->getId()))
280                                 {
281                                     mergePointsToSet(srcPtr->asRegVar(), ptr->asRegVar());
282                                 }
283                             }
284                         }
285                         else if (ptr->isRegVar() && ptr->asRegVar()->isPhyRegAssigned())
286                         {
287                             // OK, using builtin a0 or a0.2 directly.
288                         }
289                         else
290                         {
291                             // case 7:  add/mul A0 V1 V2
292                             DEBUG_MSG("unexpected addr add/mul for pointer analysis:\n");
293                             DEBUG_EMIT(inst);
294                             DEBUG_MSG("\n");
295                             for (int i = 0; i < (int)addrTakenVariables.size(); i++)
296                             {
297                                 addToPointsToSet(ptr->asRegVar(), addrTakenVariables[i].first, 0);
298                             }
299                         }
300                     }
301                     else
302                     {
303                         // case 8: A0 = ???
304                         DEBUG_MSG("unexpected instruction with address destination:\n");
305                         DEBUG_EMIT(inst);
306                         DEBUG_MSG("\n");
307                         for (int i = 0; i < (int)addrTakenVariables.size(); i++)
308                         {
309                             addToPointsToSet(ptr->asRegVar(), addrTakenVariables[i].first, addrTakenVariables[i].second);
310                         }
311                     }
312                 }
313                 else if (ptr->isRegVar() && !ptr->asRegVar()->getDeclare()->isMsgDesc())
314                 {
315                     for (int i = 0; i < G4_MAX_SRCS; i++)
316                     {
317                         G4_Operand* src = inst->getSrc(i);
318                         G4_VarBase* srcPtr = (src && src->isSrcRegRegion()) ? src->asSrcRegRegion()->getBase() : nullptr;
319                         //We don't support using "r[a0.0]" as address expression.
320                         //For instructions like following, it's not point-to propagation for simdShuffle and add64_i_i_i_i.
321                         //(W) mov (1)              simdShuffle(0,0)<1>:d  r[A0(0,0), 0]<0;1,0>:d
322                         //    pseudo_mad (16)      add64_i_i_i_i(0,0)<1>:d  0x6:w  simdShuffle(0,0)<0;0>:d  rem_i_i_i_i(0,0)<1;0>:d // $470:
323                         //    shl (16)             add64_i_i_i_i(0,0)<1>:d  add64_i_i_i_i(0,0)<1;1,0>:d  0x2:w // $472:
324                         if (srcPtr != nullptr && srcPtr->isRegVar() && ptr != srcPtr && src->getRegAccess() != IndirGRF)
325                         {
326                             std::vector<G4_RegVar*>::iterator addrDst = std::find(addrTakenDsts.begin(), addrTakenDsts.end(), srcPtr->asRegVar());
327                             if (addrDst != addrTakenDsts.end())
328                             {
329                                 addrTakenDsts.push_back(ptr->asRegVar());
330                                 for (int i = 0; i < (int)addrTakenMapping[srcPtr->asRegVar()].size(); i++)
331                                 {
332                                     addrTakenMapping[ptr->asRegVar()].push_back(addrTakenMapping[srcPtr->asRegVar()][i]);
333                                 }
334                             }
335                         }
336                     }
337                 }
338             }
339         }
340     }
341 
342 #ifdef DEBUG_VERBOSE_ON
343     DEBUG_VERBOSE("Results of points-to analysis:\n");
344     for (unsigned int i = 0; i < numAddrs; i++)
345     {
346         DEBUG_VERBOSE("Addr " << i);
347         for (G4_RegVar *grf : pointsToSets[addrPointsToSetIndex[i]])
348         {
349             DEBUG_EMIT(grf);
350             DEBUG_VERBOSE("\t");
351         }
352         DEBUG_VERBOSE("\n");
353     }
354 #endif
355 
356     // mark GRF that may be used indirect access as live in the block
357     // This includes all GRFs in the address's points-to set
358     for (auto bb : fg)
359     {
360         for (G4_INST* inst : *bb)
361         {
362             G4_DstRegRegion* dst = inst->getDst();
363 
364             if (dst != NULL &&
365                 dst->getRegAccess() == IndirGRF)
366             {
367                 G4_VarBase* dstptr = dst->getBase();
368                 MUST_BE_TRUE(dstptr->isRegVar() && (dstptr->asRegVar()->getDeclare()->getRegFile() == G4_ADDRESS || dstptr->asRegVar()->getDeclare()->getRegFile() == G4_SCALAR),
369                     "base must be address");
370                 addPointsToSetToBB(bb->getId(), dstptr->asRegVar());
371             }
372 
373             for (unsigned j = 0; j < G4_MAX_SRCS; j++)
374             {
375                 //
376                 // look for indirect reg access r[ptr] which refers addrTaken reg var
377                 //
378                 if (inst->getSrc(j) == NULL || !inst->getSrc(j)->isSrcRegRegion()) {
379                     continue;
380                 }
381 
382                 G4_SrcRegRegion* src = inst->getSrc(j)->asSrcRegRegion();
383 
384                 if (src->getRegAccess() == IndirGRF)
385                 {
386                     G4_VarBase* srcptr = src->getBase();
387                     MUST_BE_TRUE(srcptr->isRegVar() && (srcptr->asRegVar()->getDeclare()->getRegFile() == G4_ADDRESS || srcptr->asRegVar()->getDeclare()->getRegFile() == G4_SCALAR),
388                         "base must be address");
389                     addPointsToSetToBB(bb->getId(), srcptr->asRegVar());
390                 }
391             }
392         }
393     }
394 
395 #ifndef NDEBUG
396     for (unsigned i = 0; i < numAddrs; i++)
397     {
398         REGVAR_VECTOR& vec = pointsToSets[addrPointsToSetIndex[i]];
399         for (const pointInfo cur : vec)
400         {
401             unsigned indirectVarSize = cur.var->getDeclare()->getByteSize();
402             assert((indirectVarSize <= (unsigned)getGRFSize()* fg.getKernel()->getNumRegTotal()) && "indirected variables' size is larger than GRF file size");
403         }
404     }
405 #endif
406 
407 #ifdef DEBUG_VERBOSE_ON
408     for (unsigned int i = 0; i < numBBs; i++)
409     {
410         DEBUG_VERBOSE("Indirect uses for BB" << i << "\t");
411         const REGVAR_VECTOR &grfVec = getIndrUseVectorForBB(i);
412         for (G4_RegVar* grf : grfVec)
413         {
414             DEBUG_EMIT(grf);
415             DEBUG_VERBOSE("\t");
416         }
417         DEBUG_VERBOSE("\n");
418     }
419 #endif
420 
421 }
422 
isLocalVar(G4_Declare * decl) const423 bool LivenessAnalysis::isLocalVar(G4_Declare* decl) const
424 {
425     if ((decl->isInput() == true &&
426         !(fg.builder->getFCPatchInfo() &&
427             fg.builder->getFCPatchInfo()->getFCComposableKernel() &&
428             !decl->isLiveIn())) &&
429         !(fg.builder->isPreDefArg(decl) &&
430             (fg.builder->getIsKernel() ||
431                 (fg.getIsStackCallFunc() &&
432                     fg.builder->getArgSize() == 0))))
433         return false;
434 
435     if (fg.builder->getOption(vISA_enablePreemption) &&
436         decl == fg.builder->getBuiltinR0())
437         return false;
438 
439 
440     if (decl->isOutput() == true &&
441         !(fg.builder->isPreDefRet(decl) &&
442             (fg.builder->getIsKernel() ||
443                 (fg.getIsStackCallFunc() &&
444                     fg.builder->getRetVarSize() == 0))))
445         return false;
446 
447     LocalLiveRange* dclLR = gra.getLocalLR(decl);
448     if (decl &&
449         dclLR &&
450         dclLR->isLiveRangeLocal()&&
451         decl->getRegFile() != G4_INPUT)
452     {
453         return true;
454     }
455 
456     // Since variable split is done only for local variables, there is no need to analysis for global variables.
457     if (decl->getIsSplittedDcl() || decl->getIsPartialDcl())
458     {
459         return true;
460     }
461 
462     return false;
463 }
464 
setGlobalVarIDs(bool verifyRA,bool areAllPhyRegAssigned)465 bool LivenessAnalysis::setGlobalVarIDs(bool verifyRA, bool areAllPhyRegAssigned)
466 {
467     bool phyRegAssigned = areAllPhyRegAssigned;
468 
469     for (G4_Declare* decl : gra.kernel.Declares)
470     {
471         if (!isLocalVar(decl))
472         {
473             /* Note that we only do local split, so there is no need to handle partial declare and splitted declare */
474             if (livenessCandidate(decl, verifyRA) && decl->getAliasDeclare() == NULL)
475             {
476                 decl->getRegVar()->setId(numGlobalVarId++);
477                 if (decl->getRegVar()->getPhyReg() == NULL && !decl->getIsPartialDcl())
478                     numUnassignedVarId++;
479                 if (decl->getRegVar()->isPhyRegAssigned() == false)
480                 {
481                     phyRegAssigned = false;
482                 }
483             }
484             else
485             {
486                 decl->getRegVar()->setId(UNDEFINED_VAL);
487             }
488         }
489     }
490 
491     return phyRegAssigned;
492 }
493 
setLocalVarIDs(bool verifyRA,bool areAllPhyRegAssigned)494 bool LivenessAnalysis::setLocalVarIDs(bool verifyRA, bool areAllPhyRegAssigned)
495 {
496     numVarId = numGlobalVarId;
497     bool phyRegAssigned = areAllPhyRegAssigned;
498 
499     for (G4_Declare* decl : gra.kernel.Declares)
500     {
501         if (isLocalVar(decl))
502         {
503             if (livenessCandidate(decl, verifyRA) && decl->getAliasDeclare() == NULL)
504             {
505                 if (decl->getIsSplittedDcl())
506                 {
507                     decl->setSplitVarStartID(0);
508                 }
509                 if (decl->getIsPartialDcl())
510                 {
511                     auto declSplitDcl = gra.getSplittedDeclare(decl);
512                     if (declSplitDcl && declSplitDcl->getIsSplittedDcl())
513                     {
514                         if (numSplitStartID == 0)
515                         {
516                             numSplitStartID = numVarId;
517                         }
518 
519                         if (declSplitDcl->getSplitVarStartID() == 0)
520                         {
521                             declSplitDcl->setSplitVarStartID(numVarId);
522                         }
523                         numSplitVar++;
524                     }
525                     else
526                     {
527                         assert(0 && "Found child declare without parent");
528                     }
529                 }
530 
531                 decl->getRegVar()->setId(numVarId++);
532                 if (decl->getRegVar()->getPhyReg() == NULL && !decl->getIsPartialDcl())
533                     numUnassignedVarId++;
534                 if (decl->getRegVar()->isPhyRegAssigned() == false)
535                 {
536                     phyRegAssigned = false;
537                 }
538             }
539             else
540             {
541                 decl->getRegVar()->setId(UNDEFINED_VAL);
542             }
543         }
544     }
545 
546     return phyRegAssigned;
547 }
548 
setVarIDs(bool verifyRA,bool areAllPhyRegAssigned)549 bool LivenessAnalysis::setVarIDs(bool verifyRA, bool areAllPhyRegAssigned)
550 {
551     bool phyRegAssigned = areAllPhyRegAssigned;
552     for (G4_Declare* decl : gra.kernel.Declares)
553     {
554 
555         if (livenessCandidate(decl, verifyRA) && decl->getAliasDeclare() == NULL)
556         {
557             if (decl->getIsSplittedDcl())
558             {
559                 decl->setSplitVarStartID(0);
560             }
561             if (decl->getIsPartialDcl())
562             {
563                 auto declSplitDcl = gra.getSplittedDeclare(decl);
564                 if (declSplitDcl->getIsSplittedDcl())
565                 {
566                     if (numSplitStartID == 0)
567                     {
568                         numSplitStartID = numVarId;
569                     }
570 
571                     if (declSplitDcl->getSplitVarStartID() == 0)
572                     {
573                         declSplitDcl->setSplitVarStartID(numVarId);
574                     }
575                     numSplitVar++;
576                 }
577                 else
578                 {
579                     assert(0 && "Found child declare without parent");
580                 }
581             }
582 
583             // participate liveness analysis
584             decl->getRegVar()->setId(numVarId++);
585 
586             if (decl->getRegVar()->getPhyReg() == NULL && !decl->getIsPartialDcl())
587                 numUnassignedVarId++;
588 
589             //
590             // dump Reg Var info for debugging
591             //
592 
593             if (decl->getRegVar()->isPhyRegAssigned() == false)
594             {
595                 phyRegAssigned = false;
596             }
597 #ifdef DEBUG_VERBOSE_ON
598             DEBUG_EMIT(decl->getRegVar());
599             DEBUG_VERBOSE(" id = " << decl->getRegVar()->getId() << std::endl);
600 #endif
601         }
602         //
603         // those reg vars that are not candidates, set their id to
604         // undefined value
605         //
606         else
607         {
608             decl->getRegVar()->setId(UNDEFINED_VAL);
609         }
610     }
611 
612     numGlobalVarId = numVarId;
613 
614     return phyRegAssigned;
615 }
616 
LivenessAnalysis(GlobalRA & g,unsigned char kind,bool verifyRA,bool forceRun)617 LivenessAnalysis::LivenessAnalysis(
618         GlobalRA& g,
619         unsigned char kind,
620         bool verifyRA,
621         bool forceRun) :
622         selectedRF(kind),
623         pointsToAnalysis(g.pointsToAnalysis), m(4096), gra(g), fg(g.kernel.fg)
624 {
625     //
626     // NOTE:
627     // The maydef sets are simply aliases to the mayuse sets, since their uses are
628     // mutually exclusive.
629     //
630     // Go over each reg var if it's a liveness candidate, assign id for bitset.
631     //
632     bool areAllPhyRegAssigned = !forceRun;
633     bool hasStackCall = fg.getHasStackCalls() || fg.getIsStackCallFunc();
634 
635     if ((selectedRF & G4_GRF) &&
636         !fg.builder->getOption(vISA_GlobalSendVarSplit) && !hasStackCall)
637     {
638         areAllPhyRegAssigned = setGlobalVarIDs(verifyRA, areAllPhyRegAssigned);
639         areAllPhyRegAssigned = setLocalVarIDs(verifyRA, areAllPhyRegAssigned);
640     }
641     else
642     {
643         areAllPhyRegAssigned = setVarIDs(verifyRA, areAllPhyRegAssigned);
644     }
645 
646     // For Alias Dcl
647     for (auto decl : gra.kernel.Declares)
648     {
649         if (livenessCandidate(decl, verifyRA) && decl->getAliasDeclare() != NULL)
650         {
651             // It is an alias declaration. Set its id = base declaration id
652             decl->getRegVar()->setId(decl->getAliasDeclare()->getRegVar()->getId());
653         }
654 #ifdef DEBUG_VERBOSE_ON
655         DEBUG_EMIT(decl->getRegVar());
656         DEBUG_VERBOSE(" id = " << decl->getRegVar()->getId() << std::endl);
657 #endif
658     }
659 
660     //
661     // if no chosen candidate for reg allocation return
662     //
663     if (numVarId == 0 ||
664         (verifyRA == false &&
665          areAllPhyRegAssigned == true))
666     {
667         // If all variables have physical register assignments
668         // there are no candidates for allocation
669         numVarId = 0;
670         return;
671     }
672 
673     //
674     // put selected reg vars into vars[]
675     //
676     vars.resize(numVarId);
677     for (auto dcl : gra.kernel.Declares)
678     {
679         if (livenessCandidate(dcl, verifyRA) &&
680             dcl->getAliasDeclare() == NULL)
681         {
682             G4_RegVar* var = dcl->getRegVar();
683             vars[var->getId()] = var;
684         }
685     }
686 
687     addr_taken = BitSet(numVarId, false);
688 
689     numBBId = (unsigned) fg.size();
690 
691     def_in.resize(numBBId);
692     def_out.resize(numBBId);
693     use_in.resize(numBBId);
694     use_out.resize(numBBId);
695     use_gen.resize(numBBId);
696     use_kill.resize(numBBId);
697     indr_use.resize(numBBId);
698 
699     for (unsigned i = 0; i < numBBId; i++)
700     {
701         def_in[i]  = BitSet(numVarId, false);
702         def_out[i] = BitSet(numVarId, false);
703         use_in[i]  = BitSet(numVarId, false);
704         use_out[i] = BitSet(numVarId, false);
705         use_gen[i] = BitSet(numVarId, false);
706         use_kill[i]= BitSet(numVarId, false);
707         indr_use[i]= BitSet(numVarId, false);
708     }
709 }
710 
~LivenessAnalysis()711 LivenessAnalysis::~LivenessAnalysis()
712 {
713     //
714     // if no chosen candidate for reg allocation return
715     //
716     if (numVarId == 0)
717     {
718         return;
719     }
720 
721     // Remove liveness inserted pseudo kills
722     for (auto bb : fg)
723     {
724         for (auto instIt = bb->begin(); instIt != bb->end();)
725         {
726             auto inst = (*instIt);
727             if (inst->isPseudoKill())
728             {
729                 auto src0 = inst->getSrc(0);
730                 MUST_BE_TRUE(src0 && src0->isImm(), "expecting src0 immediate for pseudo kill");
731                 if (src0->asImm()->getImm() == PseudoKillType::FromLiveness)
732                 {
733                     instIt = bb->erase(instIt);
734                     continue;
735                 }
736             }
737             ++instIt;
738         }
739     }
740 }
741 
livenessCandidate(const G4_Declare * decl,bool verifyRA) const742 bool LivenessAnalysis::livenessCandidate(const G4_Declare* decl, bool verifyRA) const
743 {
744     const LocalLiveRange* declLR = nullptr;
745     if (verifyRA == false && (declLR = gra.getLocalLR(decl)) && declLR->getAssigned() && !declLR->isEOT())
746     {
747         return false;
748     }
749     else if ((selectedRF & decl->getRegFile()))
750     {
751         if (selectedRF & G4_GRF)
752         {
753             if ((decl->getRegFile() & G4_INPUT) && decl->getRegVar()->isPhyRegAssigned() && !decl->getRegVar()->isGreg())
754             {
755                 return false;
756             }
757             if (decl->getByteSize() == 0)
758             {
759                 // regrettably, this can happen for arg/retval pre-defined variable
760                 return false;
761             }
762         }
763         return true;
764     }
765     else
766     {
767         return false;
768     }
769 }
770 
updateKillSetForDcl(G4_Declare * dcl,BitSet * curBBGen,BitSet * curBBKill,G4_BB * curBB,BitSet * entryBBGen,BitSet * entryBBKill,G4_BB * entryBB,unsigned scopeID)771 void LivenessAnalysis::updateKillSetForDcl(G4_Declare* dcl, BitSet* curBBGen, BitSet* curBBKill, G4_BB* curBB, BitSet* entryBBGen, BitSet* entryBBKill, G4_BB* entryBB, unsigned scopeID)
772 {
773     if (scopeID != 0 &&
774         scopeID != UINT_MAX &&
775         dcl->getScopeID() == scopeID)
776     {
777         entryBBKill->set(dcl->getRegVar()->getId(), true);
778         entryBBGen->set(dcl->getRegVar()->getId(), false);
779 #ifdef DEBUG_VERBOSE_ON
780         DEBUG_VERBOSE("Killed sub-routine scope " << dcl->getName() << " at bb with id = " << entryBB->getId() << std::endl);
781 #endif
782     }
783 }
784 
785 // Scoping info is stored per decl. A variable can be either global scope (default),
786 // sub-routine scope, or basic block scope. This function iterates over all
787 // instructions and their operands in curBB and if scoping for it is set in symbol
788 // table then it marks kills accordingly. A bb scope variable is killed in the bb it appears
789 // and a sub-routine local variable is killed in entry block of the sub-routine. No
790 // error check is performed currently so if variable scoping information is incorrect
791 // then generated code will be so too.
performScoping(BitSet * curBBGen,BitSet * curBBKill,G4_BB * curBB,BitSet * entryBBGen,BitSet * entryBBKill,G4_BB * entryBB)792 void LivenessAnalysis::performScoping(BitSet* curBBGen, BitSet* curBBKill, G4_BB* curBB, BitSet* entryBBGen, BitSet* entryBBKill, G4_BB* entryBB)
793 {
794     unsigned scopeID = curBB->getScopeID();
795     for (G4_INST* inst : *curBB)
796     {
797         G4_DstRegRegion* dst = inst->getDst();
798 
799         if (dst &&
800             dst->getBase()->isRegAllocPartaker())
801         {
802             G4_Declare* dcl = GetTopDclFromRegRegion(dst);
803             updateKillSetForDcl(dcl, curBBGen, curBBKill, curBB, entryBBGen, entryBBKill, entryBB, scopeID);
804         }
805 
806         for (int i = 0; i < G4_MAX_SRCS; i++)
807         {
808             G4_Operand* src = inst->getSrc(i);
809 
810             if (src)
811             {
812                 if (src->isSrcRegRegion() &&
813                     src->asSrcRegRegion()->getBase()->isRegAllocPartaker())
814                 {
815                     G4_Declare* dcl = GetTopDclFromRegRegion(src);
816                     updateKillSetForDcl(dcl, curBBGen, curBBKill, curBB, entryBBGen, entryBBKill, entryBB, scopeID);
817                 }
818                 else if (src->isAddrExp() &&
819                     src->asAddrExp()->getRegVar()->isRegAllocPartaker())
820                 {
821                     G4_Declare* dcl = src->asAddrExp()->getRegVar()->getDeclare()->getRootDeclare();
822 
823                     updateKillSetForDcl(dcl, curBBGen, curBBKill, curBB, entryBBGen, entryBBKill, entryBB, scopeID);
824                 }
825             }
826         }
827     }
828 }
829 
detectNeverDefinedVarRows()830 void LivenessAnalysis::detectNeverDefinedVarRows()
831 {
832     // This function records variables and its rows that are never defined
833     // in the kernel. This information helps detect kills for partial
834     // writes when VISA optimizer optimizes away some rows of a variable.
835     // In interest of compile time we only look for full rows that are
836     // not defined rather than sub-regs.
837     std::unordered_map<G4_Declare*, BitSet> largeDefs;
838 
839     // Populate largeDefs map with dcls > 1 GRF size
840     for (auto dcl : gra.kernel.Declares)
841     {
842         if (dcl->getAliasDeclare() || dcl->getIsPartialDcl() || dcl->getAddressed())
843             continue;
844 
845         if (dcl->getRegFile() != G4_GRF)
846             continue;
847 
848         unsigned int dclNumRows = dcl->getNumRows();
849 
850         if (dclNumRows < 2)
851             continue;
852 
853         BitSet bitset(dclNumRows, false);
854 
855         largeDefs.insert(std::make_pair(dcl, std::move(bitset)));
856     }
857 
858     if (largeDefs.empty())
859         return;
860 
861     const unsigned bytesPerGRF = numEltPerGRF<Type_UB>();
862 
863     // Update row usage of each dcl in largeDefs
864     for (auto bb : gra.kernel.fg)
865     {
866         for (auto inst : *bb)
867         {
868             auto dst = inst->getDst();
869             if (!dst)
870                 continue;
871 
872             auto dstTopDcl = dst->getTopDcl();
873 
874             if (dstTopDcl)
875             {
876                 auto it = largeDefs.find(dstTopDcl);
877 
878                 if (it == largeDefs.end())
879                 {
880                     continue;
881                 }
882 
883                 unsigned int rowStart = dst->getLeftBound() / bytesPerGRF;
884                 unsigned int rowEnd = dst->getRightBound() / bytesPerGRF;
885 
886                 it->second.set(rowStart, rowEnd);
887             }
888         }
889     }
890 
891     // Propagate largeDefs to neverDefinedRows bit vector to later bitwise OR it
892     for (auto it : largeDefs)
893     {
894         unsigned int numRows = it.first->getNumRows();
895         BitSet* undefinedRows = nullptr;
896         for (unsigned int i = 0; i < numRows; i++)
897         {
898             if (!it.second.isSet(i))
899             {
900                 if (undefinedRows == nullptr)
901                 {
902                     undefinedRows = &neverDefinedRows.emplace(it.first, BitSet(it.first->getByteSize(), false)).first->second;
903                 }
904                 undefinedRows->set(i * bytesPerGRF, i * bytesPerGRF + bytesPerGRF - 1);
905             }
906         }
907     }
908 }
909 
910 //
911 // compute liveness of reg vars
912 // Each reg var indicates a region within the register file. As such, the case in which two consecutive defs
913 // of a reg region without any use in between does not mean the second def overwrites the first one because the two defs
914 // may write different parts of the region. Def vectors are used to track which definitions of reg vars reach
915 // the entry and the end of a basic block, which tell us the first definitions of reg vars. Use vectors track which
916 // uses of reg vars are anticipated, which tell use the uses of reg vars.Def and Use vectors encapsulate the liveness
917 // of reg vars.
918 //
computeLiveness()919 void LivenessAnalysis::computeLiveness()
920 {
921     //
922     // no reg var is selected, then no need to compute liveness
923     //
924     if (getNumSelectedVar() == 0)
925     {
926         return;
927     }
928 
929     startTimer(TimerID::LIVENESS);
930 
931 #ifdef DEBUG_VERBOSE_ON
932     std::vector<FuncInfo*>& fns = fg.funcInfoTable;
933 #endif
934     //
935     // mark input arguments live at the entry of kernel
936     // mark output arguments live at the exit of kernel
937     //
938     BitSet inputDefs(numVarId, false);
939     BitSet outputUses(numVarId, false);
940 
941     for (unsigned i = 0; i < numVarId; i++)
942     {
943         bool setLiveIn = false;
944 
945         G4_Declare *decl = vars[i]->getDeclare();
946 
947         if ((decl->isInput() == true &&
948             !(fg.builder->getFCPatchInfo() &&
949                 fg.builder->getFCPatchInfo()->getFCComposableKernel() &&
950                 !decl->isLiveIn())) &&
951             !(fg.builder->isPreDefArg(decl) &&
952                 (fg.builder->getIsKernel() ||
953                     (fg.getIsStackCallFunc() &&
954                         fg.builder->getArgSize() == 0))))
955             setLiveIn = true;
956 
957         if (fg.builder->getOption(vISA_enablePreemption) &&
958             decl == fg.builder->getBuiltinR0())
959             setLiveIn = true;
960 
961         if(setLiveIn)
962         {
963             inputDefs.set(i, true);
964 #ifdef DEBUG_VERBOSE_ON
965             DEBUG_VERBOSE("First def input = " << decl->getName() << std::endl);
966 #endif
967         }
968 
969         bool setLiveOut = false;
970         if (decl->isOutput() == true &&
971             !(fg.builder->isPreDefRet(decl) &&
972                 (fg.builder->getIsKernel() ||
973                     (fg.getIsStackCallFunc() &&
974                         fg.builder->getRetVarSize() == 0))))
975             setLiveOut = true;
976 
977         if (fg.builder->getOption(vISA_enablePreemption) &&
978             decl == fg.builder->getBuiltinR0())
979             setLiveOut = true;
980 
981         if(setLiveOut)
982         {
983             outputUses.set(i, true);
984 #ifdef DEBUG_VERBOSE_ON
985             DEBUG_VERBOSE("First def output    = " << decl->getName() << std::endl);
986 #endif
987         }
988     }
989 
990     //
991     // clean up def_in & def_out that are used in markFirstDef
992     //
993     for (unsigned i = 0; i < numBBId; i++)
994     {
995         def_in[i].clear();
996         def_out[i].clear();
997     }
998 
999     if (livenessClass(G4_GRF))
1000         detectNeverDefinedVarRows();
1001 
1002     //
1003     // compute def_out and use_in vectors for each BB
1004     //
1005     for (G4_BB * bb  : fg)
1006     {
1007         unsigned id = bb->getId();
1008 
1009         computeGenKillandPseudoKill(bb, def_out[id], use_in[id], use_gen[id], use_kill[id]);
1010 
1011         //
1012         // exit block: mark output parameters live
1013         //
1014         if (bb->Succs.empty())
1015         {
1016             use_out[id] = outputUses;
1017         }
1018     }
1019 
1020     G4_BB* subEntryBB = NULL;
1021     BitSet* subEntryKill = NULL;
1022     BitSet* subEntryGen = NULL;
1023 
1024     if (fg.getKernel()->getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM)
1025     {
1026         //
1027         // Top-down order of BB list iteration guarantees that
1028         // entry BB of each sub-routine will be seen before any other
1029         // BBs belonging to that sub-routine. This assumes that BBs of
1030         // a sub-routine are laid out back to back in bb list.
1031         //
1032         for (auto bb : fg)
1033         {
1034             unsigned id = bb->getId();
1035 
1036             if (bb->getScopeID() != 0 &&
1037                 bb->getScopeID() != UINT_MAX)
1038             {
1039                 subEntryBB = fg.sortedFuncTable[bb->getScopeID() - 1]->getInitBB();
1040                 unsigned entryBBID = subEntryBB->getId();
1041                 subEntryKill = &use_kill[entryBBID];
1042                 subEntryGen = &use_gen[entryBBID];
1043             }
1044 
1045             //
1046             // Mark explicitly scoped variables as kills
1047             //
1048             performScoping(&use_gen[id], &use_kill[id], bb, subEntryGen, subEntryKill, subEntryBB);
1049         }
1050     }
1051 
1052     //
1053     // compute indr accesses
1054     //
1055     if (selectedRF & G4_GRF)
1056     {
1057         // only GRF variables can have their address taken
1058         for (auto bb : fg)
1059         {
1060             const REGVAR_VECTOR& grfVec = pointsToAnalysis.getIndrUseVectorForBB(bb->getId());
1061             for (const pointInfo addrTaken : grfVec)
1062             {
1063                 indr_use[bb->getId()].set(addrTaken.var->getId(), true);
1064                 addr_taken.set(addrTaken.var->getId(), true);
1065             }
1066         }
1067     }
1068     //
1069     // Perform inter-procedural context-sensitive flow analysis.
1070     // This is required when the CFG involves function calls with multiple calling
1071     // contexts for the same function, as peforming just a context-insensitive
1072     // analysis results in uses being propgated along paths that are not feasible
1073     // in the actual program.
1074     //
1075     if (performIPA())
1076     {
1077         hierarchicalIPA(inputDefs, outputUses);
1078         stopTimer(TimerID::LIVENESS);
1079         return;
1080     }
1081 
1082 
1083     if (fg.getKernel()->getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D &&
1084         (selectedRF & G4_GRF || selectedRF & G4_FLAG) &&
1085         (numFnId > 0))
1086     {
1087         // compute the maydef for each subroutine
1088         maydefAnalysis();
1089 
1090         //
1091         // dump vectors for debugging
1092         //
1093 #ifdef DEBUG_VERBOSE_ON
1094         dump_bb_vector("MAYDEF IN", maydef_in);
1095         dump_bb_vector("MAYDEF OUT", maydef_out);
1096         dump_fn_vector("MAYDEF", fns, maydef);
1097 #endif
1098     }
1099 
1100     //
1101     // backward flow analysis to propagate uses (locate last uses)
1102     //
1103 
1104     bool change = true;
1105 
1106     while (change)
1107     {
1108         change = false;
1109         BB_LIST::iterator rit = fg.end();
1110         do
1111         {
1112             //
1113             // use_out = use_in(s1) + use_in(s2) + ...
1114             // where s1 s2 ... are the successors of bb
1115             // use_in  = use_gen + (use_out - use_kill)
1116             //
1117             --rit;
1118             if (contextFreeUseAnalyze((*rit), change))
1119             {
1120                 change = true;
1121             }
1122 
1123         } while (rit != fg.begin());
1124     }
1125 
1126     //
1127     // forward flow analysis to propagate defs (locate first defs)
1128     //
1129 
1130     //
1131     // initialize entry block with payload input
1132     //
1133     def_in[fg.getEntryBB()->getId()] = inputDefs;
1134     change = true;
1135     while (change)
1136     {
1137         change = false;
1138         for (auto bb : fg)
1139         {
1140             //
1141             // def_in   = def_out(p1) + def_out(p2) + ... where p1 p2 ... are the predecessors of bb
1142             // def_out |= def_in
1143             //
1144             if (contextFreeDefAnalyze(bb, change))
1145             {
1146                 change = true;
1147             }
1148         }
1149     }
1150 
1151 #if 0
1152     // debug code to compare old v. new IPA
1153     {
1154         std::vector<G4_Declare*> idToDecl;
1155         idToDecl.resize(numVarId);
1156         for (auto dcl : fg.getKernel()->Declares)
1157         {
1158             auto id = dcl->getRegVar()->getId();
1159             if (id < numVarId)
1160             {
1161                 idToDecl[id] = dcl;
1162             }
1163         }
1164 
1165         fg.getKernel()->dump(std::cerr);
1166 
1167         auto printLive = [this, &idToDecl](int id)
1168         {
1169             std::cerr << "Liveness for " << idToDecl[id]->getName() << "\n";
1170             std::cerr << "Use In: ";
1171             for (int i = 0; i < (int) useInCopy.size(); ++i)
1172             {
1173                 if (useInCopy[i].isSet(id))
1174                 {
1175                     std::cerr << "BB" << i << " ";
1176                 }
1177             }
1178             std::cerr << "\n";
1179             std::cerr << "Use Out: ";
1180             for (int i = 0; i < (int) useOutCopy.size(); ++i)
1181             {
1182                 if (useOutCopy[i].isSet(id))
1183                 {
1184                     std::cerr << "BB" << i << " ";
1185                 }
1186             }
1187             std::cerr << "\n";
1188         };
1189 
1190         auto printSetDiff = [&idToDecl, this](const std::vector<BitSet>& set1,
1191             const std::vector<BitSet>& set2)
1192         {
1193             for (int i = 0, size = (int) set1.size(); i < size; ++i)
1194             {
1195                 bool printBB = true;
1196                 for (int j = 0; j < (int)numVarId; ++j)
1197                 {
1198                     if (set1[i].isSet(j) ^ set2[i].isSet(j))
1199                     {
1200                         if (printBB)
1201                         {
1202                             std::cerr << "BB" << i << ": ";
1203                             printBB = false;
1204                         }
1205                         std::cerr << idToDecl[j]->getName() << "(" << j << "):" <<
1206                             (set1[i].isSet(j) ? 1 : 0) << " ";
1207                     }
1208                 }
1209                 if (!printBB)
1210                 {
1211                     std::cerr << "\n";
1212                 }
1213             }
1214         };
1215 
1216         std::cerr << "use-in comparison:\n";
1217         printSetDiff(use_in, useInCopy);
1218 
1219         std::cerr << "use-out comparison:\n";
1220         printSetDiff(use_out, useOutCopy);
1221     }
1222 #endif
1223 
1224     //
1225     // dump vectors for debugging
1226     //
1227 #if 0
1228     {
1229         dump_bb_vector("DEF IN", def_in);
1230         dump_bb_vector("DEF OUT", def_out);
1231         dump_bb_vector("USE IN", use_in);
1232         dump_bb_vector("USE OUT", use_out);
1233     }
1234 #endif
1235 
1236     stopTimer(TimerID::LIVENESS);
1237 }
1238 
1239 //
1240 // compute the maydef set for every subroutine
1241 // This includes recursively all the variables that are defined by the
1242 // subroutine, but does not include defs in the caller
1243 // This means this must be called before we do fix-point on def_in/def_out
1244 // and destroy their original values
1245 // This is used by augmentation later to model all variables that may be defined by a call
1246 // FIXME: we should use a separate def set to represent declares defined in each BB
1247 //
maydefAnalysis()1248 void LivenessAnalysis::maydefAnalysis()
1249 {
1250     for (auto func : fg.sortedFuncTable)
1251     {
1252         unsigned fid = func->getId();
1253         if (fid == UINT_MAX)
1254         {
1255             // entry kernel
1256             continue;
1257         }
1258 
1259         auto& BV = subroutineMaydef[func];
1260         for (auto&& bb : func->getBBList())
1261         {
1262             BV |= def_out[bb->getId()];
1263         }
1264         for (auto&& callee : func->getCallees())
1265         {
1266             BV |= subroutineMaydef[callee];
1267         }
1268     }
1269 }
1270 
1271 //
1272 // Use analysis for this subroutine only
1273 // use_out[call-BB] = use_in[ret-BB]
1274 // use_out[exit-BB] should be initialized by the caller
1275 //
useAnalysis(FuncInfo * subroutine)1276 void LivenessAnalysis::useAnalysis(FuncInfo* subroutine)
1277 {
1278     bool changed = false;
1279     do
1280     {
1281         changed = false;
1282         for (auto BI = subroutine->getBBList().rbegin(), BE = subroutine->getBBList().rend(); BI != BE; ++BI)
1283         {
1284             //
1285             // use_out = use_in(s1) + use_in(s2) + ...
1286             // where s1 s2 ... are the successors of bb
1287             // use_in  = use_gen + (use_out - use_kill)
1288             //
1289             G4_BB* bb = *BI;
1290             unsigned bbid = bb->getId();
1291             if (bb->getBBType() & G4_BB_EXIT_TYPE)
1292             {
1293                 // use_out is set by caller
1294             }
1295             else if (bb->getBBType() & G4_BB_CALL_TYPE)
1296             {
1297                 use_out[bbid] |= use_in[bb->getPhysicalSucc()->getId()];
1298             }
1299             else
1300             {
1301                 for (auto succ : bb->Succs)
1302                 {
1303                     use_out[bbid] |= use_in[succ->getId()];
1304                 }
1305             }
1306 
1307             if (changed)
1308             {
1309                 // no need to update changed, save a copy
1310                 use_in[bbid] = use_out[bbid];
1311                 use_in[bbid] -= use_kill[bbid];
1312                 use_in[bbid] |= use_gen[bbid];
1313             }
1314             else
1315             {
1316                 BitSet oldUseIn = use_in[bbid];
1317 
1318                 use_in[bbid] = use_out[bbid];
1319                 use_in[bbid] -= use_kill[bbid];
1320                 use_in[bbid] |= use_gen[bbid];
1321 
1322                 if (!(bb->getBBType() & G4_BB_INIT_TYPE) && oldUseIn != use_in[bbid])
1323                 {
1324                     changed = true;
1325                 }
1326             }
1327         }
1328     } while (changed);
1329 }
1330 
1331 //
1332 // Use analysis for this subroutine only, considering both arg/retval of its callees
1333 // use_out[call-BB] = (use_in[ret-BB] | arg[callee]) - retval[callee]
1334 //
useAnalysisWithArgRetVal(FuncInfo * subroutine,const std::unordered_map<FuncInfo *,BitSet> & args,const std::unordered_map<FuncInfo *,BitSet> & retVal)1335 void LivenessAnalysis::useAnalysisWithArgRetVal(FuncInfo* subroutine,
1336     const std::unordered_map<FuncInfo*, BitSet>& args, const std::unordered_map<FuncInfo*, BitSet>& retVal)
1337 {
1338     bool changed = false;
1339     do
1340     {
1341         changed = false;
1342         for (auto BI = subroutine->getBBList().rbegin(), BE = subroutine->getBBList().rend(); BI != BE; ++BI)
1343         {
1344             //
1345             // use_out = use_in(s1) + use_in(s2) + ...
1346             // where s1 s2 ... are the successors of bb
1347             // use_in  = use_gen + (use_out - use_kill)
1348             //
1349             G4_BB* bb = *BI;
1350             unsigned bbid = bb->getId();
1351             if (bb->getBBType() & G4_BB_EXIT_TYPE)
1352             {
1353                 // use_out is set by previous analysis
1354             }
1355             else if (bb->getBBType() & G4_BB_CALL_TYPE)
1356             {
1357                 use_out[bbid] = use_in[bb->getPhysicalSucc()->getId()];
1358                 auto callee = bb->getCalleeInfo();
1359                 auto BVIt = args.find(callee);
1360                 MUST_BE_TRUE(BVIt != args.end(), "Missing entry in map");
1361                 use_out[bbid] |= (*BVIt).second;
1362                 BVIt = retVal.find(callee);
1363                 MUST_BE_TRUE(BVIt != retVal.end(), "Missing entry in map");
1364                 use_out[bbid] -= (*BVIt).second;
1365             }
1366             else
1367             {
1368                 for (auto succ : bb->Succs)
1369                 {
1370                     use_out[bbid] |= use_in[succ->getId()];
1371                 }
1372             }
1373 
1374             if (changed)
1375             {
1376                 // no need to update changed, save a copy
1377                 use_in[bbid] = use_out[bbid];
1378                 use_in[bbid] -= use_kill[bbid];
1379                 use_in[bbid] |= use_gen[bbid];
1380             }
1381             else
1382             {
1383                 BitSet oldUseIn = use_in[bbid];
1384 
1385                 use_in[bbid] = use_out[bbid];
1386                 use_in[bbid] -= use_kill[bbid];
1387                 use_in[bbid] |= use_gen[bbid];
1388 
1389                 if (!(bb->getBBType() & G4_BB_INIT_TYPE) && oldUseIn != use_in[bbid])
1390                 {
1391                     changed = true;
1392                 }
1393             }
1394         }
1395     } while (changed);
1396 }
1397 
1398 //
1399 // Def analysis for each subroutine only
1400 // at a call site, we do
1401 // def_in[ret-BB] = def_out[call-BB] U def_out[callee exit-BB]
1402 // callee's def_in/def_out is not modified
1403 //
defAnalysis(FuncInfo * subroutine)1404 void LivenessAnalysis::defAnalysis(FuncInfo* subroutine)
1405 {
1406 
1407     //def_in[bb] = null (inputs for entry BB)
1408     //def_out[bb] is initialized to all defs in the bb
1409     bool changed = false;
1410     do
1411     {
1412         changed = false;
1413         for (auto&& bb : subroutine->getBBList())
1414         {
1415             uint32_t bbid = bb->getId();
1416             std::optional<BitSet> defInOrNull = std::nullopt;
1417             if (!changed)
1418             {
1419                 defInOrNull = def_in[bbid];
1420             }
1421             auto phyPredBB = (bb == fg.getEntryBB()) ? nullptr : bb->getPhysicalPred();
1422             if (phyPredBB && (phyPredBB->getBBType() & G4_BB_CALL_TYPE))
1423             {
1424                 // this is the return BB, we take the def_out of the callBB + the predecessors
1425                 G4_BB* callBB = bb->getPhysicalPred();
1426                 def_in[bbid] |= def_out[callBB->getId()];
1427                 for (auto&& pred : bb->Preds)
1428                 {
1429                     def_in[bbid] |= def_out[pred->getId()];
1430                 }
1431             }
1432             else if (bb->getBBType() & G4_BB_INIT_TYPE)
1433             {
1434                 // do nothing as we don't want to propagate caller defs yet
1435             }
1436             else
1437             {
1438                 for (auto&& pred : bb->Preds)
1439                 {
1440                     def_in[bbid] |= def_out[pred->getId()];
1441                 }
1442             }
1443 
1444             if (!changed)
1445             {
1446                 if (def_in[bbid] != defInOrNull.value())
1447                 {
1448                     changed = true;
1449                 }
1450             }
1451             def_out[bbid] |= def_in[bbid];
1452         }
1453     } while (changed);
1454 }
1455 
hierarchicalIPA(const BitSet & kernelInput,const BitSet & kernelOutput)1456 void LivenessAnalysis::hierarchicalIPA(const BitSet& kernelInput, const BitSet& kernelOutput)
1457 {
1458 
1459     assert (fg.sortedFuncTable.size() > 0 && "topological sort must already be performed");
1460     std::unordered_map<FuncInfo*, BitSet> args;
1461     std::unordered_map<FuncInfo*, BitSet> retVal;
1462 
1463     auto initKernelLiveOut = [this, &kernelOutput]()
1464     {
1465         for (auto&& bb : fg.kernelInfo->getBBList())
1466         {
1467             if (bb->Succs.empty())
1468             {
1469                 // EOT BB
1470                 use_out[bb->getId()] = kernelOutput;
1471             }
1472         }
1473     };
1474     // reset all live-in/out sets except for the kernel live-out
1475     auto clearLiveSets = [this]()
1476     {
1477         for (auto subroutine : fg.sortedFuncTable)
1478         {
1479             for (auto bb : subroutine->getBBList())
1480             {
1481                 use_in[bb->getId()].clear();
1482                 use_out[bb->getId()].clear();
1483             }
1484         }
1485     };
1486 
1487     // top-down traversal to compute retval for each subroutine
1488     // retval[s] = live_out[s] - live_in[s],
1489     // where live_out[s] is the union of the live-in of the ret BB at each call site (hence top-down traversal).
1490     // this is not entirely accurate since we may have pass-through retVals
1491     // (e.g., A call B call C, C's retVal is pass-through in B and used in A, which
1492     //  means it won't be killed in B if we do top-down)
1493     // But for now let's trade some loss of accuracy to save one more round of fix-point
1494     initKernelLiveOut();
1495     for (auto FI = fg.sortedFuncTable.rbegin(), FE = fg.sortedFuncTable.rend(); FI != FE; ++FI)
1496     {
1497         auto subroutine = *FI;
1498         useAnalysis(subroutine);
1499         if (subroutine != fg.kernelInfo)
1500         {
1501             retVal[subroutine] = use_out[subroutine->getExitBB()->getId()];
1502             retVal[subroutine] -= use_in[subroutine->getInitBB()->getId()];
1503         }
1504         for (auto&& bb : subroutine->getBBList())
1505         {
1506             if (bb->getBBType() & G4_BB_CALL_TYPE)
1507             {
1508                 G4_BB* retBB = bb->getPhysicalSucc();
1509                 G4_BB* exitBB = bb->getCalleeInfo()->getExitBB();
1510                 assert((exitBB->getBBType() & G4_BB_EXIT_TYPE) &&
1511                     "should be a subroutine's exit BB");
1512                 use_out[exitBB->getId()] |= use_in[retBB->getId()];
1513             }
1514         }
1515     }
1516 
1517     // bottom-up traversal to compute arg for each subroutine
1518     // arg[s] = live-in[s], except retval of its callees are excluded as by definition they will not be live-in
1519     // The live-out of each subroutine is initialized to null so that
1520     // args are limited to variables actually used in this subroutine (and its callees)
1521     clearLiveSets();
1522     initKernelLiveOut();
1523     for (auto subroutine : fg.sortedFuncTable)
1524     {
1525         useAnalysisWithArgRetVal(subroutine, args, retVal);
1526         if (subroutine != fg.kernelInfo)
1527         {
1528             args[subroutine] = use_in[subroutine->getInitBB()->getId()];
1529             args[subroutine] -= use_out[subroutine->getExitBB()->getId()];
1530         }
1531     }
1532 
1533     // the real deal -- top-down traversal taking arg/retval/live-through all into consideration
1534     // again top-down traversal is needed to compute the live-out of each subroutine.
1535     clearLiveSets();
1536     initKernelLiveOut();
1537     for (auto FI = fg.sortedFuncTable.rbegin(), FE = fg.sortedFuncTable.rend(); FI != FE; ++FI)
1538     {
1539         auto subroutine = *FI;
1540         useAnalysisWithArgRetVal(subroutine, args, retVal);
1541         for (auto&& bb : subroutine->getBBList())
1542         {
1543             if (bb->getBBType() & G4_BB_CALL_TYPE)
1544             {
1545                 G4_BB* retBB = bb->getPhysicalSucc();
1546                 G4_BB* exitBB = bb->getCalleeInfo()->getExitBB();
1547                 assert((exitBB->getBBType() & G4_BB_EXIT_TYPE) &&
1548                     "should be a subroutine's exit BB");
1549                 use_out[exitBB->getId()] |= use_in[retBB->getId()];
1550             }
1551         }
1552     }
1553 
1554     maydefAnalysis();   // must be done before defAnalysis!
1555 
1556     // algorithm sketch for def-in/def-out:
1557     // In reverse topological order :
1558     //  -- Run def analysis on subroutine
1559     //  -- at each call site:
1560     //       def_in[ret-BB] |= def_out[call-BB] U def_out[exit-BB]
1561     // In topological order :
1562     //  -- At each call site:
1563     //       add def_out[call-BB] to all of callee's BBs
1564     def_in[fg.getEntryBB()->getId()] = kernelInput;
1565     for (auto subroutine : fg.sortedFuncTable)
1566     {
1567         defAnalysis(subroutine);
1568     }
1569 
1570 
1571     // FIXME: I assume we consider all caller's defs to be callee's defs too?
1572     for (auto FI = fg.sortedFuncTable.rbegin(), FE = fg.sortedFuncTable.rend(); FI != FE; ++FI)
1573     {
1574         auto subroutine = *FI;
1575         if (subroutine->getCallees().size() == 0)
1576         {
1577             continue;
1578         }
1579         for (auto&& bb : subroutine->getBBList())
1580         {
1581             if (bb->getBBType() & G4_BB_CALL_TYPE)
1582             {
1583                 auto callee = bb->getCalleeInfo();
1584                 for (auto&& calleeBB : callee->getBBList())
1585                 {
1586                     def_in[calleeBB->getId()] |= def_out[bb->getId()];
1587                     def_out[calleeBB->getId()] |= def_out[bb->getId()];
1588                 }
1589             }
1590         }
1591     }
1592 
1593 #if 0
1594     std::vector<G4_Declare*> idToDecl;
1595     idToDecl.resize(numVarId);
1596     for (auto dcl : fg.getKernel()->Declares)
1597     {
1598         auto id = dcl->getRegVar()->getId();
1599         if (id < numVarId)
1600         {
1601             idToDecl[id] = dcl;
1602         }
1603     }
1604     for (auto subroutine : fg.sortedFuncTable)
1605     {
1606         auto printVal = [&idToDecl](const BitSet& bs)
1607         {
1608             for (int i = 0, size = (int)bs.getSize(); i < size; ++i)
1609             {
1610                 if (bs.isSet(i))
1611                 {
1612                     std::cerr << idToDecl[i]->getName() << "(" << i << ") ";
1613                 }
1614             }
1615         };
1616 
1617         subroutine->dump();
1618 
1619         if (subroutine != fg.kernelInfo)
1620         {
1621             std::cerr << "\tArgs: ";
1622             printVal(args[subroutine->getId()]);
1623             std::cerr << "\n";
1624 
1625             std::cerr << "\tRetVal: ";
1626             printVal(retVal[subroutine->getId()]);
1627             std::cerr << "\n";
1628             std::cerr << "\tLiveThrough: ";
1629             BitSet liveThrough = use_in[subroutine->getInitBB()->getId()];
1630             liveThrough &= use_out[subroutine->getExitBB()->getId()];
1631             printVal(liveThrough);
1632             //std::cerr << "\n";
1633             //std::cerr << "\tDef in: ";
1634             //printVal(def_in[subroutine->getInitBB()->getId()]);
1635             //std::cerr << "\n";
1636             //std::cerr << "\tDef out: ";
1637             //printVal(def_out[subroutine->getInitBB()->getId()]);
1638             std::cerr << "\n";
1639         }
1640     }
1641 #endif
1642  }
1643 
1644 //
1645 // determine if the dst writes the whole region of target declare
1646 //
writeWholeRegion(const G4_BB * bb,const G4_INST * inst,G4_DstRegRegion * dst,const Options * opt) const1647 bool LivenessAnalysis::writeWholeRegion(const G4_BB* bb,
1648                                         const G4_INST* inst,
1649                                         G4_DstRegRegion* dst,
1650                                         const Options *opt) const
1651 {
1652     unsigned execSize = inst->getExecSize();
1653     MUST_BE_TRUE(dst->getBase()->isRegVar(), ERROR_REGALLOC);
1654 
1655     if (!bb->isAllLaneActive() && !inst->isWriteEnableInst() &&
1656         fg.getKernel()->getInt32KernelAttr(Attributes::ATTR_Target) != VISA_3D)
1657     {
1658         // conservatively assume non-nomask instructions in simd control flow
1659         // may not write the whole region
1660         return false;
1661     }
1662 
1663     if (inst->getPredicate())
1664     {
1665         return false;
1666     }
1667 
1668     if (inst->isFCall())
1669         return true;
1670 
1671     // Flags may be partially written when used as the destination
1672     // e.g., setp (M5_NM, 16) P11 V97(8,0)<0;1,0>
1673     // It can be only considered as a complete kill
1674     // if the computed bound diff matches with the number of flag elements
1675     if (dst->isFlag() == true)
1676     {
1677         if ((dst->getRightBound() - dst->getLeftBound() + 1) ==
1678             dst->getBase()->asRegVar()->getDeclare()->getNumberFlagElements())
1679         {
1680         return true;
1681     }
1682         else
1683         {
1684             return false;
1685         }
1686     }
1687 
1688     //
1689     // Find Primary Variable Declare
1690     //
1691 
1692     const G4_Declare* decl = ((const G4_RegVar*)dst->getBase())->getDeclare();
1693     const G4_Declare* primaryDcl = decl->getRootDeclare();
1694 
1695     //
1696     //  Cannot write whole register if
1697     //     * alias offset in non zero
1698     //     * reg or sub-reg offset is non zero
1699     //     * horiz stride is non zero
1700     //     * predicate is non null
1701     //
1702     if (decl->getAliasOffset() != 0 ||
1703         dst->getRegAccess() != Direct ||
1704         dst->getRegOff() != 0 ||
1705         dst->getSubRegOff() != 0 ||
1706         dst->getHorzStride() != 1 ||
1707         inst->isPartialWrite()) {
1708         return false;
1709     }
1710 
1711      //
1712     // For CISA3, pseudo-callee-save and pseudo-caller-save insts
1713     // are kills
1714     //
1715     if (fg.isPseudoDcl(primaryDcl))
1716     {
1717         return true;
1718     }
1719 
1720     //
1721     // If the region does not cover the whole declare then it does not write the whole region.
1722     //
1723     if (!primaryDcl->getRegVar()->isRegVarTransient() && (dst->getTypeSize() * execSize !=
1724         primaryDcl->getElemSize() * primaryDcl->getNumElems() * primaryDcl->getNumRows())) {
1725            return false;
1726     }
1727 
1728     return true;
1729 }
1730 
1731 //
1732 // determine if the dst writes the whole region of target declare
1733 //
writeWholeRegion(const G4_BB * bb,const G4_INST * inst,const G4_VarBase * flagReg) const1734 bool LivenessAnalysis::writeWholeRegion(const G4_BB* bb,
1735                                         const G4_INST* inst,
1736                                         const G4_VarBase* flagReg) const
1737 {
1738     if (!bb->isAllLaneActive() && !inst->isWriteEnableInst() && gra.kernel.getKernelType() != VISA_3D)
1739     {
1740         // conservatively assume non-nomask instructions in simd control flow
1741         // may not write the whole region
1742         return false;
1743     }
1744 
1745     const G4_Declare* decl = flagReg->asRegVar()->getDeclare();
1746     if (inst->getExecSize() != G4_ExecSize(decl->getNumberFlagElements()))
1747     {
1748         return false;
1749     }
1750 
1751     return true;
1752 }
1753 
1754 // Set bits in dst footprint based on dst region's left/right bound
footprintDst(const G4_BB * bb,const G4_INST * i,G4_Operand * opnd,BitSet * dstfootprint) const1755 void LivenessAnalysis::footprintDst(const G4_BB* bb,
1756     const G4_INST* i,
1757     G4_Operand* opnd,
1758     BitSet* dstfootprint) const
1759 {
1760     if (dstfootprint &&
1761         !(i->isPartialWrite()) &&
1762         ((bb->isAllLaneActive() ||
1763             i->isWriteEnableInst() == true) ||
1764             gra.kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D))
1765     {
1766         // Bitwise OR left-bound/right-bound with dst footprint to indicate
1767         // bytes that are written in to
1768         opnd->updateFootPrint(*dstfootprint, true);
1769     }
1770 }
1771 
1772 // Reset bits in srcfootprint based on src region's left/right bound
footprintSrc(const G4_INST * i,G4_Operand * opnd,BitSet * srcfootprint)1773 void LivenessAnalysis::footprintSrc(const G4_INST* i,
1774     G4_Operand *opnd,
1775     BitSet* srcfootprint)
1776 {
1777     // Reset bits in kill map footprint
1778     opnd->updateFootPrint(*srcfootprint, false);
1779 }
1780 
computeGenKillandPseudoKill(G4_BB * bb,BitSet & def_out,BitSet & use_in,BitSet & use_gen,BitSet & use_kill) const1781 void LivenessAnalysis::computeGenKillandPseudoKill(G4_BB* bb,
1782                                                    BitSet& def_out,
1783                                                    BitSet& use_in,
1784                                                    BitSet& use_gen,
1785                                                    BitSet& use_kill) const
1786 {
1787     //
1788     // Mark each fcall as using all globals and arg pre-defined var
1789     //
1790     if (bb->isEndWithFCall() && (selectedRF & G4_GRF))
1791     {
1792         const G4_Declare* arg = fg.builder->getStackCallArg();
1793         const G4_Declare* ret = fg.builder->getStackCallRet();
1794 
1795         const G4_FCALL* fcall = fg.builder->getFcallInfo(bb->back());
1796         MUST_BE_TRUE(fcall != NULL, "fcall info not found");
1797 
1798         if (arg->getByteSize() != 0)
1799         {
1800             // arg var is a use and a kill at each fcall
1801             if (fcall->getArgSize() != 0)
1802             {
1803                 use_gen.set(arg->getRegVar()->getId(), true);
1804             }
1805             use_kill.set(arg->getRegVar()->getId(), true);
1806         }
1807 
1808         if (ret->getByteSize() != 0)
1809         {
1810             // ret var is a kill at each fcall
1811             use_kill.set(ret->getRegVar()->getId(), true);
1812             if (fcall->getRetSize() != 0)
1813             {
1814                 def_out.set(ret->getRegVar()->getId(), true);
1815             }
1816         }
1817     }
1818 
1819     std::vector<BitSet> footprints(numVarId);
1820     std::vector<std::pair<G4_Declare*, INST_LIST_RITER>> pseudoKills;
1821 
1822     for (INST_LIST::reverse_iterator rit = bb->rbegin(), rend = bb->rend(); rit != rend; ++rit)
1823     {
1824         G4_INST* i = (*rit);
1825         if (i->isLifeTimeEnd())
1826         {
1827             continue;
1828         }
1829 
1830         G4_DstRegRegion* dst = i->getDst();
1831         if (dst)
1832         {
1833             G4_DstRegRegion* dstrgn = dst;
1834 
1835             if (dstrgn->getBase()->isRegAllocPartaker())
1836             {
1837                 G4_Declare* topdcl = GetTopDclFromRegRegion(dstrgn);
1838                 unsigned id = topdcl->getRegVar()->getId();
1839                 if (i->isPseudoKill())
1840                 {
1841                     // Mark kill, reset gen
1842                     use_kill.set(id, true);
1843                     use_gen.set(id, false);
1844 
1845                     continue;
1846                 }
1847 
1848                 BitSet* dstfootprint = &footprints[id];
1849 
1850                 if (dstfootprint->getSize() == 0)
1851                 {
1852                     // Write for dst was not seen before, so insert in to map
1853                     // bitsetSize is in bytes
1854                     unsigned int bitsetSize = dstrgn->isFlag() ? topdcl->getNumberFlagElements() : topdcl->getByteSize();
1855 
1856                     BitSet newBitSet(bitsetSize, false);
1857 
1858                     auto it = neverDefinedRows.find(topdcl);
1859                     if (it != neverDefinedRows.end())
1860                     {
1861                         // Bitwise OR new bitset with never defined rows
1862                         newBitSet |= it->second;
1863                     }
1864 
1865                     footprints[id] = std::move(newBitSet);
1866                     if (gra.isBlockLocal(topdcl) &&
1867                         topdcl->getAddressed() == false &&
1868                         dstrgn->getRegAccess() == Direct)
1869                     {
1870                         // Local live ranges are never live-out of the only
1871                         // basic block they are defined. So in top-down order
1872                         // the first lexical definition is a kill irrespective
1873                         // of the footprint. In cases when local live-range
1874                         // def and use have h-stride != 1, the footprint at this
1875                         // lexically first definition will not have all bits set.
1876                         // This prevents that def to be seen as a kill. A simple
1877                         // solution to this is to set all bits when initializing
1878                         // the bitvector while iterating in bottom-up order. As
1879                         // we traverse further up uses will reset bits and defs
1880                         // will set bits. So when we encounter the lexically first
1881                         // def, we will be guaranteed to find all bits set, thus
1882                         // interpreting that def as a kill.
1883                         dstfootprint->setAll();
1884                     }
1885                 }
1886 
1887                 if (dstrgn->getRegAccess() == Direct)
1888                 {
1889                     def_out.set(id, true);
1890                     //
1891                     // if the inst writes the whole region the var declared, we set use_kill
1892                     // so that use of var will not pass through (i.e., var's interval starts
1893                     // at this instruction.
1894                     //
1895                     if (writeWholeRegion(bb, i, dstrgn, fg.builder->getOptions()))
1896                     {
1897                         use_kill.set(id, true);
1898                         use_gen.set(id, false);
1899 
1900                         dstfootprint->setAll();
1901                     }
1902                     else
1903                     {
1904                         footprintDst(bb, i, dstrgn, dstfootprint);
1905 
1906                         use_gen.set(id, true);
1907                     }
1908                 }
1909                 else
1910                 {
1911                     use_gen.set(id, true);
1912                 }
1913             }
1914             else if ((selectedRF & G4_GRF) && dst->isIndirect())
1915             {
1916                 // conservatively add each variable potentially accessed by dst to gen
1917                 const REGVAR_VECTOR& pointsToSet = pointsToAnalysis.getAllInPointsToOrIndrUse(dst, bb);
1918                 for (auto pt : pointsToSet)
1919                 {
1920                     if (pt.var->isRegAllocPartaker())
1921                     {
1922                         use_gen.set(pt.var->getId(), true);
1923                     }
1924                 }
1925             }
1926         }
1927 
1928         //
1929         // process each source operand
1930         //
1931         for (unsigned j = 0; j < G4_MAX_SRCS; j++)
1932         {
1933             G4_Operand* src = i->getSrc(j);
1934 
1935             if (!src)
1936             {
1937                 continue;
1938             }
1939             if (src->isSrcRegRegion())
1940             {
1941                 G4_Declare* topdcl = GetTopDclFromRegRegion(src);
1942                 const G4_VarBase* base = (topdcl != nullptr ? topdcl->getRegVar() :
1943                     src->asSrcRegRegion()->getBase());
1944                 if (base->isRegAllocPartaker())
1945                 {
1946                     unsigned id = topdcl->getRegVar()->getId();
1947                     BitSet* srcfootprint = &footprints[id];
1948 
1949                     if (srcfootprint->getSize() != 0)
1950                     {
1951                         footprintSrc(i, src->asSrcRegRegion(), srcfootprint);
1952                     }
1953                     else
1954                     {
1955                         unsigned int bitsetSize = (src->asSrcRegRegion()->isFlag()) ? topdcl->getNumberFlagElements() : topdcl->getByteSize();
1956 
1957                         BitSet newBitSet(bitsetSize, false);
1958 
1959                         auto it = neverDefinedRows.find(topdcl);
1960                         if (it != neverDefinedRows.end())
1961                         {
1962                             // Bitwise OR new bitset with never defined rows
1963                             newBitSet |= it->second;
1964                         }
1965 
1966                         footprints[id] = std::move(newBitSet);
1967                         if (gra.isBlockLocal(topdcl) &&
1968                             topdcl->getAddressed() == false &&
1969                             (topdcl->getRegFile() == G4_ADDRESS ||
1970                                 src->asSrcRegRegion()->getRegAccess() == Direct))
1971                         {
1972                             srcfootprint->setAll();
1973                         }
1974                         footprintSrc(i, src->asSrcRegRegion(), srcfootprint);
1975                     }
1976 
1977                     use_gen.set(static_cast<const G4_RegVar*>(base)->getId(), true);
1978                 }
1979 
1980                 if ((selectedRF & G4_GRF) && src->getRegAccess() == IndirGRF)
1981                 {
1982                     int idx = 0;
1983                     G4_RegVar* grf;
1984                     G4_Declare* topdcl = GetTopDclFromRegRegion(src);
1985 
1986                     while ((grf = pointsToAnalysis.getPointsTo(topdcl->getRegVar(), idx++)) != NULL)
1987                     {
1988                         // grf is a variable that src potentially points to
1989                         // since we dont know exactly which part of grf is sourced
1990                         // assume entire grf is sourced
1991                         // Also add grf to the gen set as it may be potentially used
1992                         unsigned int id = grf->getId();
1993                         use_gen.set(id, true);
1994                         BitSet* srcfootprint = &footprints[id];
1995 
1996                         if (srcfootprint->getSize() != 0)
1997                         {
1998                             srcfootprint->clear();
1999 
2000                             DEBUG_VERBOSE("Found potential indirect use of " << grf->getDeclare()->getName() <<
2001                                 " so resetting its footprint" << std::endl);
2002                         }
2003                     }
2004                 }
2005             }
2006             //
2007             // treat the addr expr as both a use and a partial def
2008             //
2009             else if (src->isAddrExp())
2010             {
2011                 const G4_RegVar* reg = static_cast<const G4_AddrExp*>(src)->getRegVar();
2012                 if (reg->isRegAllocPartaker() && reg->isSpilled() == false)
2013                 {
2014                     unsigned srcId = reg->getId();
2015                     use_gen.set(srcId, true);
2016                     def_out.set(srcId, true);
2017                 }
2018             }
2019         }
2020 
2021         //
2022         // Process condMod
2023         //
2024         G4_CondMod* mod = i->getCondMod();
2025         if (mod) {
2026             G4_VarBase *flagReg = mod->getBase();
2027             if (flagReg)
2028             {
2029                 if (flagReg->asRegVar()->isRegAllocPartaker())
2030                 {
2031                     G4_Declare* topdcl = flagReg->asRegVar()->getDeclare();
2032                     MUST_BE_TRUE(topdcl->getAliasDeclare() == nullptr, "Invalid alias flag decl.");
2033                     unsigned id = topdcl->getRegVar()->getId();
2034 
2035                     BitSet* dstfootprint = &footprints[id];
2036 
2037                     if (dstfootprint->getSize() == 0)
2038                     {
2039                         // Write for dst was not seen before, so insert in to map
2040                         // bitsetSize is in bits for flag
2041                         unsigned int bitsetSize = topdcl->getNumberFlagElements();
2042 
2043                         BitSet newBitSet(bitsetSize, false);
2044                         footprints[id] = std::move(newBitSet);
2045 
2046                         if (gra.isBlockLocal(topdcl))
2047                         {
2048                             dstfootprint->setAll();
2049                         }
2050                     }
2051 
2052                     def_out.set(id, true);
2053 
2054                     if (writeWholeRegion(bb, i, flagReg))
2055                     {
2056                         use_kill.set(id, true);
2057                         use_gen.set(id, false);
2058 
2059                         dstfootprint->setAll();
2060                     }
2061                     else
2062                     {
2063                         footprintDst(bb, i, mod, dstfootprint);
2064                         use_gen.set(id, true);
2065                     }
2066                 }
2067             }
2068             else
2069             {
2070                 MUST_BE_TRUE((i->opcode() == G4_sel ||
2071                     i->opcode() == G4_csel) &&
2072                     i->getCondMod() != nullptr,
2073                     "Invalid CondMod");
2074             }
2075         }
2076 
2077         //
2078         // Process predicate
2079         //
2080         G4_Predicate* predicate = i->getPredicate();
2081         if (predicate) {
2082             G4_VarBase *flagReg = predicate->getBase();
2083             MUST_BE_TRUE(flagReg->asRegVar()->getDeclare()->getAliasDeclare() == nullptr, "Invalid alias flag decl.");
2084             if (flagReg->asRegVar()->isRegAllocPartaker())
2085             {
2086                 const G4_Declare* topdcl = flagReg->asRegVar()->getDeclare();
2087                 unsigned id = topdcl->getRegVar()->getId();
2088                 auto srcfootprint = &footprints[id];
2089 
2090                 if (srcfootprint->getSize() != 0)
2091                 {
2092                     footprintSrc(i, predicate, srcfootprint);
2093                 }
2094                 else
2095                 {
2096                     unsigned int bitsetSize = topdcl->getNumberFlagElements();
2097 
2098                     BitSet newBitSet(bitsetSize, false);
2099                     footprints[id] = std::move(newBitSet);
2100                     if (gra.isBlockLocal(topdcl))
2101                     {
2102                         srcfootprint->setAll();
2103                     }
2104                     footprintSrc(i, predicate, srcfootprint);
2105                 }
2106 
2107                 use_gen.set(static_cast<const G4_RegVar*>(flagReg)->getId(), true);
2108             }
2109         }
2110 
2111         //
2112         // Check whether dst can be killed at this point
2113         // A block of code is said to kill a variable when union
2114         // of all partial writes causes all elements to be written
2115         // into and any reads in the block can be sourced from
2116         // writes within that block itself
2117         //
2118         if (dst && dst->getBase()->isRegAllocPartaker())
2119         {
2120             G4_Declare* topdcl = GetTopDclFromRegRegion(dst);
2121             unsigned id = topdcl->getRegVar()->getId();
2122             auto dstfootprint = &footprints[id];
2123 
2124             if (dstfootprint->getSize() != 0)
2125             {
2126                 // Found dst in map
2127                 // Check whether all bits set
2128                 // pseudo_kill for this dst was not found in this BB yet
2129                 unsigned int first;
2130                 LocalLiveRange* topdclLR = nullptr;
2131 
2132                 if ((dstfootprint->isAllset() ||
2133                     // Check whether local RA marked this range
2134                     (topdcl &&
2135                     (topdclLR = gra.getLocalLR(topdcl)) &&
2136                     topdclLR->isLiveRangeLocal() &&
2137                     (!topdcl->isInput()) &&
2138                     topdclLR->getFirstRef(first) == i)) &&
2139                     // If single inst writes whole region then dont insert pseudo_kill
2140                     writeWholeRegion(bb, i, dst, fg.builder->getOptions()) == false)
2141                 {
2142                     bool foundKill = false;
2143                     INST_LIST::reverse_iterator nextIt = rit;
2144                     ++nextIt;
2145                     if (nextIt != bb->rend())
2146                     {
2147                         const G4_INST* nextInst = (*nextIt);
2148                         if (nextInst->isPseudoKill())
2149                         {
2150                             G4_DstRegRegion* nextDst = nextInst->getDst();
2151 
2152                             if (nextDst != NULL &&
2153                                 nextDst->isDstRegRegion() &&
2154                                 nextDst->getBase()->isRegAllocPartaker() &&
2155                                 topdcl == GetTopDclFromRegRegion(nextDst))
2156                             {
2157                                 foundKill = true;
2158                             }
2159                         }
2160                     }
2161                     if (!foundKill)
2162                     {
2163                         // All bytes of dst written at this point, so this is a good place to insert
2164                         // pseudo kill inst
2165                         pseudoKills.emplace_back(topdcl, rit);
2166                     }
2167 
2168                     // Reset gen
2169                     use_gen.set(dst->getBase()->asRegVar()->getId(), false);
2170 
2171                     // Set kill
2172                     use_kill.set(dst->getBase()->asRegVar()->getId(), true);
2173 #ifdef DEBUG_VERBOSE_ON
2174                     DEBUG_VERBOSE("Found kill at inst ");
2175                     INST_LIST_ITER fwdIter = rit.base();
2176                     fwdIter--;
2177                     (*fwdIter)->emit_inst(std::cout, false, NULL);
2178                     DEBUG_VERBOSE(" // $" << (*fwdIter)->getCISAOff());
2179                     DEBUG_VERBOSE(std::endl);
2180 #endif
2181                 }
2182             }
2183         }
2184 
2185         if (mod && mod->getBase() && mod->getBase()->asRegVar()->isRegAllocPartaker())
2186         {
2187             G4_VarBase *flagReg = mod->getBase();
2188             G4_Declare* topdcl = flagReg->asRegVar()->getDeclare();
2189             unsigned id = topdcl->getRegVar()->getId();
2190             auto dstfootprint = &footprints[id];
2191 
2192             if (dstfootprint->getSize() != 0)
2193             {
2194                 unsigned int first;
2195                 const LocalLiveRange* topdclLR = nullptr;
2196                 if ((dstfootprint->isAllset() ||
2197                     // Check whether local RA marked this range
2198                     // This may not be necessary as currently local RA is not performed for flags.
2199                     (topdcl &&
2200                     (topdclLR = gra.getLocalLR(topdcl)) &&
2201                         topdclLR->isLiveRangeLocal() &&
2202                         topdclLR->getFirstRef(first) == i)) &&
2203                         // If single inst writes whole region then dont insert pseudo_kill
2204                         writeWholeRegion(bb, i, flagReg) == false)
2205                 {
2206                     // All bytes of dst written at this point, so this is a good place to insert
2207                     // pseudo kill inst
2208                     pseudoKills.emplace_back(topdcl, rit);
2209 
2210                     // Reset gen
2211                     use_gen.set(flagReg->asRegVar()->getId(), false);
2212 
2213                     // Set kill
2214                     use_kill.set(flagReg->asRegVar()->getId(), true);
2215 #ifdef DEBUG_VERBOSE_ON
2216                     DEBUG_VERBOSE("Found kill at inst ");
2217                     INST_LIST_ITER fwdIter = rit.base();
2218                     fwdIter--;
2219                     (*fwdIter)->emit_inst(std::cout, false, NULL);
2220                     DEBUG_VERBOSE(" // $" << (*fwdIter)->getCISAOff());
2221                     DEBUG_VERBOSE(std::endl);
2222 #endif
2223                 }
2224             }
2225         }
2226     }
2227 
2228     //
2229     // Insert pseudo_kill nodes in BB
2230     //
2231     for (auto&& pseudoKill : pseudoKills)
2232     {
2233         INST_LIST_ITER iterToInsert = pseudoKill.second.base();
2234         do
2235         {
2236             --iterToInsert;
2237         } while ((*iterToInsert)->isPseudoKill());
2238         G4_INST* killInst = fg.builder->createPseudoKill(pseudoKill.first, PseudoKillType::FromLiveness);
2239         bb->insertBefore(iterToInsert, killInst);
2240     }
2241 
2242     //
2243     // initialize use_in
2244     //
2245     use_in = use_gen;
2246 }
2247 
2248 //
2249 // use_out = use_in(s1) + use_in(s2) + ... where s1 s2 ... are the successors of bb
2250 // use_in  = use_gen + (use_out - use_kill)
2251 //
contextFreeUseAnalyze(G4_BB * bb,bool isChanged)2252 bool LivenessAnalysis::contextFreeUseAnalyze(G4_BB* bb, bool isChanged)
2253 {
2254     bool changed;
2255 
2256     unsigned bbid = bb->getId();
2257 
2258     if (bb->Succs.empty()) // exit block
2259     {
2260         changed = false;
2261     }
2262     else if (isChanged)
2263     {
2264         // no need to update changed. This saves a memcpy
2265         for (auto succBB : bb->Succs)
2266         {
2267             use_out[bbid] |= use_in[succBB->getId()];
2268         }
2269         changed = true;
2270     }
2271     else
2272     {
2273         BitSet old = use_out[bbid];
2274         for (auto succBB : bb->Succs)
2275         {
2276             use_out[bbid] |= use_in[succBB->getId()];
2277         }
2278 
2279         changed = (old != use_out[bbid]);
2280     }
2281 
2282     //
2283     // in = gen + (out - kill)
2284     //
2285     use_in[bbid] = use_out[bbid];
2286     use_in[bbid] -= use_kill[bbid];
2287     use_in[bbid] |= use_gen[bbid];
2288 
2289     return changed;
2290 }
2291 
2292 //
2293 // def_in = def_out(p1) + def_out(p2) + ... where p1 p2 ... are the predecessors of bb
2294 // def_out |= def_in
2295 //
contextFreeDefAnalyze(G4_BB * bb,bool isChanged)2296 bool LivenessAnalysis::contextFreeDefAnalyze(G4_BB* bb, bool isChanged)
2297 {
2298     bool changed  = false;
2299     unsigned bbid = bb->getId();
2300 
2301     if (bb->Preds.empty())
2302     {
2303         changed = false;
2304     }
2305     else if (isChanged)
2306     {
2307         // no need to update changed. This saves a memcpy
2308         for (auto predBB : bb->Preds)
2309         {
2310             def_in[bbid] |= def_out[predBB->getId()];
2311         }
2312         changed = true;
2313     }
2314     else
2315     {
2316         BitSet old = def_in[bbid];
2317         for (auto predBB : bb->Preds)
2318         {
2319             def_in[bbid] |= def_out[predBB->getId()];
2320         }
2321         changed = (old != def_in[bbid]);
2322     }
2323 
2324      def_out[bb->getId()] |= def_in[bb->getId()];
2325 
2326      return changed;
2327 }
2328 
dump_bb_vector(char * vname,std::vector<BitSet> & vec)2329 void LivenessAnalysis::dump_bb_vector(char* vname, std::vector<BitSet>& vec)
2330 {
2331     std::cerr << vname << "\n";
2332     for (BB_LIST_ITER it = fg.begin(); it != fg.end(); it++)
2333     {
2334         G4_BB* bb = (*it);
2335         std::cerr << "    BB" << bb->getId() << "\n";
2336         const BitSet& in = vec[bb->getId()];
2337         std::cerr << "        ";
2338         for (unsigned i = 0; i < in.getSize(); i+= 10)
2339         {
2340             //
2341             // dump 10 bits a group
2342             //
2343             for (unsigned j = i; j < in.getSize() && j < i+10; j++)
2344             {
2345                 std::cerr << (in.isSet(j) ? "1" : "0");
2346             }
2347             std::cerr << " ";
2348         }
2349         std::cerr << "\n";
2350     }
2351 }
2352 
dump_fn_vector(char * vname,std::vector<FuncInfo * > & fns,std::vector<BitSet> & vec)2353 void LivenessAnalysis::dump_fn_vector(char* vname, std::vector<FuncInfo*>& fns, std::vector<BitSet>& vec)
2354 {
2355     DEBUG_VERBOSE(vname << std::endl);
2356     for (std::vector<FuncInfo*>::iterator it = fns.begin(); it != fns.end(); it++)
2357     {
2358         FuncInfo* funcInfo = (*it);
2359 
2360         DEBUG_VERBOSE("    FN" << funcInfo->getId() << std::endl);
2361         const BitSet& in = vec[funcInfo->getId()];
2362         DEBUG_VERBOSE("        ");
2363         for (unsigned i = 0; i < in.getSize(); i += 10)
2364         {
2365             //
2366             // dump 10 bits a group
2367             //
2368             for (unsigned j = i; j < in.getSize() && j < i + 10; j++)
2369             {
2370                 DEBUG_VERBOSE(in.isSet(j) ? "1" : "0");
2371             }
2372             DEBUG_VERBOSE(" ");
2373         }
2374         DEBUG_VERBOSE(std::endl);
2375     }
2376 }
2377 
2378 //
2379 // dump which vars are live at the entry of BB
2380 //
dump() const2381 void LivenessAnalysis::dump() const
2382 {
2383     for (auto bb : fg)
2384     {
2385         std::cerr << "BB" << bb->getId() << "'s live in: ";
2386         unsigned total_size = 0;
2387         auto dumpVar = [&total_size](G4_RegVar* var)
2388         {
2389             int size = var->getDeclare()->getTotalElems() * var->getDeclare()->getElemSize();
2390             std::cerr << var->getName() << "(" << size << "), ";
2391             total_size += size;
2392         };
2393 
2394         unsigned count = 0;
2395         for (auto var : vars)
2396         {
2397             if (var->isRegAllocPartaker() && isLiveAtEntry(bb, var->getId()))
2398             {
2399                 if (count++ % 10 == 0) std::cerr << "\n";
2400                 dumpVar(var);
2401             }
2402         }
2403         std::cerr << "\nBB" << bb->getId() << "'s live in size: " << total_size / numEltPerGRF<Type_UB>() << "\n\n";
2404         std::cerr << "BB" << bb->getId() << "'s live out: ";
2405         total_size = 0;
2406         count = 0;
2407         for (auto var : vars)
2408         {
2409             if (var->isRegAllocPartaker() && isLiveAtExit(bb, var->getId()))
2410             {
2411                 if (count++ % 10 == 0) std::cerr << "\n";
2412                 dumpVar(var);
2413             }
2414         }
2415         std::cerr << "\nBB" << bb->getId() << "'s live out size: " << total_size / numEltPerGRF<Type_UB>()<< "\n\n";
2416     }
2417 }
2418 
dumpBB(G4_BB * bb) const2419 void LivenessAnalysis::dumpBB(G4_BB *bb) const
2420 {
2421     std::cerr << "\n\nBB" << bb->getId() << "'s live in: ";
2422     unsigned total_size = 0;
2423     auto dumpVar = [&total_size](G4_RegVar* var)
2424     {
2425         int size = var->getDeclare()->getTotalElems() * var->getDeclare()->getElemSize();
2426         std::cerr << var->getName() << "(" << size << ")" << "[" << var->getRegAllocPartaker() << "], ";
2427         total_size += size;
2428     };
2429 
2430     unsigned count = 0;
2431     for (auto var : vars)
2432     {
2433         if (var->isRegAllocPartaker() && isLiveAtEntry(bb, var->getId()))
2434         {
2435             if (count++ % 10 == 0) std::cerr << "\n";
2436             dumpVar(var);
2437         }
2438     }
2439     std::cerr << "\n\nBB" << bb->getId() << "'s live out: ";
2440     total_size = 0;
2441     count = 0;
2442     for (auto var : vars)
2443     {
2444         if (var->isRegAllocPartaker() && isLiveAtExit(bb, var->getId()))
2445         {
2446             if (count++ % 10 == 0) std::cerr << "\n";
2447             dumpVar(var);
2448         }
2449     }
2450     std::cerr << "\n\nBB" << bb->getId() << "'s use through: ";
2451     total_size = 0;
2452     count = 0;
2453     for (auto var : vars)
2454     {
2455         if (var->isRegAllocPartaker() && isUseThrough(bb, var->getId()))
2456         {
2457             if (count++ % 10 == 0) std::cerr << "\n";
2458             dumpVar(var);
2459         }
2460     }
2461     std::cerr << "\n\nBB" << bb->getId() << "'s def through: ";
2462     total_size = 0;
2463     count = 0;
2464     for (auto var : vars)
2465     {
2466         if (var->isRegAllocPartaker() && isDefThrough(bb, var->getId()))
2467         {
2468             if (count++ % 10 == 0) std::cerr << "\n";
2469             dumpVar(var);
2470         }
2471     }
2472 }
2473 
dumpLive(BitSet & live) const2474 void LivenessAnalysis::dumpLive(BitSet& live) const
2475 {
2476     auto dumpVar = [](G4_RegVar* var)
2477     {
2478         int size = var->getDeclare()->getTotalElems() * var->getDeclare()->getElemSize();
2479         std::cerr << var->getName() << "(" << size << ")" << "[" << var->getRegAllocPartaker() << "], ";
2480     };
2481 
2482     unsigned count = 0;
2483     for (auto var : vars)
2484     {
2485         if (live.isSet(var->getId()))
2486         {
2487             if (count++ % 10 == 0) std::cerr << "\n";
2488             dumpVar(var);
2489         }
2490     }
2491 }
2492 
2493 //
2494 // dump which vars are live at the entry of BB
2495 //
dumpGlobalVarNum() const2496 void LivenessAnalysis::dumpGlobalVarNum() const
2497 {
2498     BitSet global_def_out = BitSet(numVarId, false);
2499     BitSet global_use_in = BitSet(numVarId, false);
2500 
2501     for (auto bb : fg)
2502     {
2503         BitSet global_in = use_in[bb->getId()];
2504         BitSet global_out = def_out[bb->getId()];
2505         global_in &= def_in[bb->getId()];
2506         global_use_in |= global_in;
2507         global_out &= use_out[bb->getId()];
2508         global_def_out |= global_out;
2509     }
2510 
2511     int global_var_num = 0;
2512     for (auto var : vars)
2513     {
2514         if (var->isRegAllocPartaker())
2515         {
2516             if (global_use_in.isSet(var->getId()) || global_def_out.isSet(var->getId()))
2517             {
2518                 global_var_num ++;
2519             }
2520         }
2521     }
2522     std::cerr << "total var num: " << numVarId << " global var num: " << global_var_num << "\n";
2523 }
2524 
isEmptyLiveness() const2525 bool LivenessAnalysis::isEmptyLiveness() const
2526 {
2527     return numBBId == 0;
2528 }
2529 
2530 //
2531 // return true if var is live at the entry of bb
2532 // check both use_in and def_in, if one condition fails then var is not in the live range
2533 //
isLiveAtEntry(const G4_BB * bb,unsigned var_id) const2534 bool LivenessAnalysis::isLiveAtEntry(const G4_BB* bb, unsigned var_id) const
2535 {
2536     return use_in[bb->getId()].isSet(var_id) && def_in[bb->getId()].isSet(var_id);
2537 }
2538 //
2539 // return true if var is live at the exit of bb
2540 //
isLiveAtExit(const G4_BB * bb,unsigned var_id) const2541 bool LivenessAnalysis::isLiveAtExit(const G4_BB* bb, unsigned var_id) const
2542 {
2543     return use_out[bb->getId()].isSet(var_id) && def_out[bb->getId()].isSet(var_id);
2544 }
2545 
2546 //
2547 // return true if var is user through the bb
2548 //
isUseOut(const G4_BB * bb,unsigned var_id) const2549 bool LivenessAnalysis::isUseOut(const G4_BB* bb, unsigned var_id) const
2550 {
2551     return use_out[bb->getId()].isSet(var_id);
2552 }
2553 
2554 //
2555 // return true if var is user through the bb
2556 //
isUseIn(const G4_BB * bb,unsigned var_id) const2557 bool LivenessAnalysis::isUseIn(const G4_BB* bb, unsigned var_id) const
2558 {
2559     return use_in[bb->getId()].isSet(var_id);
2560 }
2561 
2562 //
2563 // return true if var is user through the bb
2564 //
isUseThrough(const G4_BB * bb,unsigned var_id) const2565 bool LivenessAnalysis::isUseThrough(const G4_BB* bb, unsigned var_id) const
2566 {
2567     return use_in[bb->getId()].isSet(var_id) && use_out[bb->getId()].isSet(var_id);
2568 }
2569 //
2570 // return true if var is live at the exit of bb
2571 //
isDefThrough(const G4_BB * bb,unsigned var_id) const2572 bool LivenessAnalysis::isDefThrough(const G4_BB* bb, unsigned var_id) const
2573 {
2574     return def_in[bb->getId()].isSet(var_id) && def_out[bb->getId()].isSet(var_id);
2575 }
2576 
2577 
markBlockLocalVar(G4_RegVar * var,unsigned bbId)2578 void GlobalRA::markBlockLocalVar(G4_RegVar* var, unsigned bbId)
2579 {
2580     G4_Declare* dcl = var->getDeclare()->getRootDeclare();
2581 
2582     if (dcl->isInput() || dcl->isOutput())
2583     {
2584         setBBId(dcl, UINT_MAX - 1);
2585     }
2586     else
2587     {
2588         if (getBBId(dcl) == bbId)
2589         {
2590             // Do nothing.
2591         }
2592         else if (getBBId(dcl) == UINT_MAX)
2593         {
2594             setBBId(dcl, bbId);
2595         }
2596         else {
2597             setBBId(dcl, UINT_MAX - 1);
2598         }
2599     }
2600 }
2601 
markBlockLocalVars()2602 void GlobalRA::markBlockLocalVars()
2603 {
2604     for (auto bb : kernel.fg)
2605     {
2606         for (std::list<G4_INST*>::iterator it = bb->begin(); it != bb->end(); it++)
2607         {
2608             G4_INST* inst = *it;
2609 
2610             // Track direct dst references.
2611 
2612             G4_DstRegRegion* dst = inst->getDst();
2613 
2614             if (dst != NULL)
2615             {
2616                 G4_DstRegRegion* dstRgn = dst->asDstRegRegion();
2617 
2618                 if (dstRgn->getBase()->isRegVar()) {
2619                     markBlockLocalVar(dstRgn->getBase()->asRegVar(), bb->getId());
2620 
2621                     G4_Declare* topdcl = GetTopDclFromRegRegion(dst);
2622                     if (topdcl)
2623                     {
2624                         if (inst->isSend())
2625                         {
2626                             topdcl->setIsRefInSendDcl(true);
2627                         }
2628 
2629                         LocalLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
2630                         unsigned int startIdx;
2631                         if (lr->getFirstRef(startIdx) == NULL)
2632                         {
2633                             lr->setFirstRef(inst, 0);
2634                         }
2635                         lr->recordRef(bb);
2636                         recordRef(topdcl);
2637                     }
2638                 }
2639             }
2640 
2641             G4_CondMod* condMod = inst->getCondMod();
2642 
2643             if (condMod != NULL &&
2644                 condMod->getBase() != NULL)
2645             {
2646                 if (condMod->getBase() && condMod->getBase()->isRegVar())
2647                 {
2648                     markBlockLocalVar(condMod->getBase()->asRegVar(), bb->getId());
2649 
2650                     G4_Declare* topdcl = condMod->getBase()->asRegVar()->getDeclare();
2651                     if (topdcl)
2652                     {
2653                         LocalLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
2654                         unsigned int startIdx;
2655                         if (lr->getFirstRef(startIdx) == NULL)
2656                         {
2657                             lr->setFirstRef(inst, 0);
2658                         }
2659                         lr->recordRef(bb);
2660                         recordRef(topdcl);
2661                     }
2662                 }
2663             }
2664 
2665             // Track direct src references.
2666             for (unsigned j = 0; j < G4_MAX_SRCS; j++)
2667             {
2668                 G4_Operand* src = inst->getSrc(j);
2669 
2670                 if (src == NULL)
2671                 {
2672                     // Do nothing.
2673                 }
2674                 else if (src->isSrcRegRegion() && src->asSrcRegRegion()->getBase()->isRegVar())
2675                 {
2676                     G4_SrcRegRegion* srcRgn = src->asSrcRegRegion();
2677 
2678                     if (srcRgn->getBase()->isRegVar()) {
2679                         markBlockLocalVar(src->asSrcRegRegion()->getBase()->asRegVar(), bb->getId());
2680 
2681                         G4_Declare* topdcl = GetTopDclFromRegRegion(src);
2682                         if (topdcl)
2683                         {
2684                             if (inst->isSend())
2685                             {
2686                                 topdcl->setIsRefInSendDcl(true);
2687                             }
2688 
2689                             LocalLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
2690 
2691                             lr->recordRef(bb);
2692                             recordRef(topdcl);
2693                             if (inst->isEOT())
2694                             {
2695                                 lr->markEOT();
2696                             }
2697                         }
2698                     }
2699                 }
2700                 else if (src->isAddrExp())
2701                 {
2702                     G4_RegVar* addExpVar = src->asAddrExp()->getRegVar();
2703                     markBlockLocalVar(addExpVar, bb->getId());
2704 
2705                     G4_Declare* topdcl = addExpVar->getDeclare()->getRootDeclare();
2706                     MUST_BE_TRUE(topdcl != NULL, "Top dcl was null for addr exp opnd");
2707 
2708                     LocalLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
2709                     lr->recordRef(bb);
2710                     lr->markIndirectRef();
2711                     recordRef(topdcl);
2712                 }
2713             }
2714 
2715             G4_Operand* pred = inst->getPredicate();
2716 
2717             if (pred != NULL)
2718             {
2719                 if (pred->getBase() && pred->getBase()->isRegVar())
2720                 {
2721                     markBlockLocalVar(pred->getBase()->asRegVar(), bb->getId());
2722                     G4_Declare* topdcl = pred->getBase()->asRegVar()->getDeclare();
2723                     if (topdcl)
2724                     {
2725                         LocalLiveRange* lr = GetOrCreateLocalLiveRange(topdcl);
2726                         lr->recordRef(bb);
2727                         recordRef(topdcl);
2728                     }
2729                 }
2730             }
2731 
2732             // Track all indirect references.
2733             const REGVAR_VECTOR& grfVec = pointsToAnalysis.getIndrUseVectorForBB(bb->getId());
2734             for (pointInfo grf : grfVec)
2735             {
2736                 markBlockLocalVar(grf.var, bb->getId());
2737             }
2738         }
2739     }
2740 }
2741 
resetGlobalRAStates()2742 void GlobalRA::resetGlobalRAStates()
2743 {
2744     if (builder.getOption(vISA_LocalDeclareSplitInGlobalRA))
2745     {
2746         // remove partial decls
2747         auto isPartialDcl = [](G4_Declare* dcl)
2748         {
2749             return dcl->getIsPartialDcl();
2750         };
2751 
2752         kernel.Declares.erase(
2753             std::remove_if (kernel.Declares.begin(), kernel.Declares.end(), isPartialDcl),
2754             kernel.Declares.end());
2755     }
2756 
2757     for (auto dcl : kernel.Declares)
2758     {
2759         //Reset all the local live ranges
2760         resetLocalLR(dcl);
2761 
2762         if (builder.getOption(vISA_LocalDeclareSplitInGlobalRA))
2763         {
2764             //Remove the split declares
2765             if (dcl->getIsSplittedDcl())
2766             {
2767                 dcl->setIsSplittedDcl(false);
2768                 clearSubDcl(dcl);
2769             }
2770         }
2771 
2772         //Remove the bank assignment
2773         if (builder.getOption(vISA_LocalBankConflictReduction) &&
2774             builder.hasBankCollision())
2775         {
2776             setBankConflict(dcl, BANK_CONFLICT_NONE);
2777         }
2778         clearBundleConflictDcl(dcl);
2779     }
2780 
2781     return;
2782 }
2783 
2784 //
2785 // Mark block local (temporary) variables.
2786 //
markGraphBlockLocalVars()2787 void GlobalRA::markGraphBlockLocalVars()
2788 {
2789     // Clear stale LocalLiveRange* first to avoid double ref counting
2790     clearStaleLiveRanges();
2791 
2792     //Create live ranges and record the reference info
2793     markBlockLocalVars();
2794 
2795 #ifdef DEBUG_VERBOSE_ON
2796     std::cout << "\t--LOCAL VARIABLES--\n";
2797     for (auto dcl : kernel.Declares)
2798     {
2799         LocalLiveRange* topdclLR = getLocalLR(dcl);
2800 
2801         if (topdclLR != nullptr &&
2802             topdclLR->isLiveRangeLocal())
2803         {
2804             std::cout << dcl->getName() << ",\t";
2805         }
2806     }
2807     std::cout << "\n";
2808 #endif
2809 }
2810 
2811 //
2812 // Pre-assign phy regs to stack call function return variable as per ABI.
2813 //
setABIForStackCallFunctionCalls()2814 void FlowGraph::setABIForStackCallFunctionCalls()
2815 {
2816     // For each G4_pseudo_fcall inst, create dst of GRF type
2817     // with physical register 1.0 pre-assigned to it.
2818     // Similarly, for G4_pseudo_fret create src of GRF type
2819     // with physical register 1.0 pre-assigned to it.
2820     // Each will use 2 dwords of r1.0.
2821     int call_id = 0, ret_id = 0;
2822 
2823     for (auto bb : *this)
2824     {
2825         if (bb->isEndWithFCall())
2826         {
2827             const char* n = builder->getNameString(mem, 25, "FCALL_RET_LOC_%d", call_id++);
2828 
2829             G4_INST* fcall = bb->back();
2830             // Set call dst to r125.0
2831             G4_Declare* r1_dst = builder->createDeclareNoLookup(n, G4_GRF, numEltPerGRF<Type_UD>(), 1, Type_UD);
2832             r1_dst->getRegVar()->setPhyReg(builder->phyregpool.getGreg(builder->kernel.getFPSPGRF()), IR_Builder::SubRegs_Stackcall::Ret_IP);
2833             G4_DstRegRegion* dstRgn = builder->createDst(r1_dst->getRegVar(), 0, 0, 1, Type_UD);
2834             fcall->setDest(dstRgn);
2835         }
2836 
2837         if (bb->isEndWithFRet())
2838         {
2839             const char* n = builder->getNameString(mem, 25, "FRET_RET_LOC_%d", ret_id++);
2840             G4_INST* fret = bb->back();
2841             const RegionDesc* rd = builder->createRegionDesc(2, 2, 1);
2842             G4_Declare* r1_src = builder->createDeclareNoLookup(n, G4_INPUT, numEltPerGRF<Type_UD>(), 1, Type_UD);
2843             r1_src->getRegVar()->setPhyReg(builder->phyregpool.getGreg(builder->kernel.getFPSPGRF()), IR_Builder::SubRegs_Stackcall::Ret_IP);
2844             G4_Operand* srcRgn = builder->createSrc(r1_src->getRegVar(), 0, 0, rd, Type_UD);
2845             fret->setSrc(srcRgn, 0);
2846             if (fret->getExecSize() == g4::SIMD1)
2847             {
2848                 // due to <2;2,1> regioning we must update exec size as well
2849                 fret->setExecSize(g4::SIMD2);
2850             }
2851             if (builder->getOption(vISA_GenerateDebugInfo))
2852             {
2853                 pKernel->getKernelDebugInfo()->setFretVar(GetTopDclFromRegRegion(fret->getSrc(0)));
2854             }
2855         }
2856     }
2857 }
2858 
2859 // Function to verify RA results
verifyRA(LivenessAnalysis & liveAnalysis)2860 void GlobalRA::verifyRA(LivenessAnalysis & liveAnalysis)
2861 {
2862     for (auto bb : kernel.fg)
2863     {
2864         // Verify PREG assignment
2865         for (auto inst : *bb)
2866         {
2867             G4_DstRegRegion* dst = inst->getDst();
2868             if (dst != NULL &&
2869                 dst->getBase()->isRegAllocPartaker())
2870             {
2871                 MUST_BE_TRUE(dst->getBase()->asRegVar()->getPhyReg(), "RA verification error: No PREG assigned for variable " << GetTopDclFromRegRegion(dst)->getName() << "!");
2872             }
2873 
2874             for (unsigned j = 0; j < G4_MAX_SRCS; j++)
2875             {
2876                 G4_Operand* src = inst->getSrc(j);
2877                 if (src != NULL &&
2878                     src->isSrcRegRegion() &&
2879                     src->asSrcRegRegion()->getBase()->isRegAllocPartaker())
2880                 {
2881                     MUST_BE_TRUE(src->asSrcRegRegion()->getBase()->asRegVar()->getPhyReg(),
2882                         "RA verification error: No PREG assigned for variable " << GetTopDclFromRegRegion(src->asSrcRegRegion())->getName() << "!");
2883                 }
2884             }
2885         }
2886 
2887         int numGRF = kernel.getNumRegTotal();
2888         // Verify Live-in
2889         std::map<uint32_t, G4_Declare*> LiveInRegMap;
2890         std::map<uint32_t, G4_Declare*>::iterator LiveInRegMapIt;
2891         std::vector<uint32_t> liveInRegVec(numGRF * numEltPerGRF<Type_UW>(), UINT_MAX);
2892 
2893         for (G4_Declare* dcl : kernel.Declares)
2894         {
2895             if (dcl->getAliasDeclare() != nullptr)
2896                 continue;
2897 
2898             if (dcl->getRegVar()->isRegAllocPartaker())
2899             {
2900                 G4_RegVar* var = dcl->getRegVar();
2901                 uint32_t varID = var->getId();
2902                 if (liveAnalysis.isLiveAtEntry(bb, dcl->getRegVar()->getId()))
2903                 {
2904                     MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid preg assignment for variable " << dcl->getName() << "!");
2905 
2906                     uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
2907                     uint32_t regOff = var->getPhyRegOff();
2908 
2909                     uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
2910                         (regOff * dcl->getElemSize()) / G4_WSIZE;
2911                     for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
2912                     {
2913                         LiveInRegMapIt = LiveInRegMap.find(idx);
2914                         if (liveInRegVec[idx] != UINT_MAX)
2915                         {
2916                             MUST_BE_TRUE(LiveInRegMapIt != LiveInRegMap.end(), "RA verification error: Invalid entry in LiveInRegMap!");
2917                             if (dcl->isInput())
2918                             {
2919                                 DEBUG_MSG("RA verification warning: Found conflicting input variables: " << dcl->getName()
2920                                     << " and " << (*LiveInRegMapIt).second->getName() << " assigned to r" << regNum
2921                                     << "." << regOff << "!\n");
2922                                 liveInRegVec[idx] = varID;
2923                                 LiveInRegMapIt->second = dcl;
2924                             }
2925                             else
2926                             {
2927                                 DEBUG_MSG("RA verification warning: Found conflicting live-in variables: " << dcl->getName()
2928                                     << " and " << LiveInRegMapIt->second->getName() << " assigned to r" <<
2929                                     regNum << "." << regOff << "!\n");
2930                             }
2931 
2932                         }
2933                         else
2934                         {
2935                             liveInRegVec[idx] = varID;
2936                             MUST_BE_TRUE(LiveInRegMapIt == LiveInRegMap.end(), "RA verification error: Invalid entry in LiveInRegMap!");
2937                             LiveInRegMap.emplace(idx, dcl);
2938                         }
2939                     }
2940                 }
2941             }
2942         }
2943 
2944         // Verify Live-out
2945         G4_Declare *ret = kernel.fg.builder->getStackCallRet();
2946         std::map<uint32_t, G4_Declare*> liveOutRegMap;
2947         std::map<uint32_t, G4_Declare*>::iterator liveOutRegMapIt;
2948         std::vector<uint32_t> liveOutRegVec(numGRF * numEltPerGRF<Type_UW>(), UINT_MAX);
2949 
2950         for (G4_Declare* dcl : kernel.Declares)
2951         {
2952             if (dcl->getAliasDeclare() != NULL)
2953                 continue;
2954             if (dcl->getRegVar()->isRegAllocPartaker())
2955             {
2956                 G4_RegVar* var = dcl->getRegVar();
2957                 uint32_t varID = var->getId();
2958                 if (liveAnalysis.isLiveAtExit(bb, varID))
2959                 {
2960                     MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid preg assignment for variable " << dcl->getName() << "!");
2961 
2962                     uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
2963                     uint32_t regOff = var->getPhyRegOff();
2964 
2965                     uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
2966                         (regOff * dcl->getElemSize()) / G4_WSIZE;
2967                     for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
2968                     {
2969                         liveOutRegMapIt = liveOutRegMap.find(idx);
2970                         if (liveOutRegVec[idx] != UINT_MAX)
2971                         {
2972                             MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
2973                             if (dcl->isInput())
2974                             {
2975                                 DEBUG_MSG("RA verification warning: Found conflicting input variables: " << dcl->getName()
2976                                     << " and " << liveOutRegMapIt->second->getName() << " assigned to r" << regNum
2977                                     << "." << regOff << "!\n");
2978                                 liveOutRegVec[idx] = varID;
2979                                 liveOutRegMapIt->second = dcl;
2980                             }
2981                             else
2982                             {
2983                                 DEBUG_MSG("RA verification warning: Found conflicting live-out variables: " << dcl->getName()
2984                                     << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
2985                                     regNum << "." << regOff << "!\n");
2986                             }
2987 
2988                         }
2989                         else
2990                         {
2991                             liveOutRegVec[idx] = varID;
2992                             MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
2993                             liveOutRegMap.emplace(idx, dcl);
2994                         }
2995                     }
2996                 }
2997             }
2998         }
2999 
3000         for (INST_LIST::reverse_iterator rit = bb->rbegin(); rit != bb->rend(); ++rit)
3001         {
3002             G4_INST* inst = (*rit);
3003             INST_LIST_RITER ritNext = rit;
3004             ritNext++;
3005             G4_INST* rNInst = nullptr;
3006             if (ritNext != bb->rend())
3007             {
3008                 rNInst = (*ritNext);
3009             }
3010 
3011             if (inst->isPseudoKill())
3012             {
3013                 continue;
3014             }
3015             G4_DstRegRegion* dst = inst->getDst();
3016             G4_DstRegRegion* rNDst = nullptr;
3017 
3018             if (rNInst && rNInst->isPseudoKill())
3019             {
3020                 rNDst = rNInst->getDst();
3021             }
3022 
3023             //
3024             // verify dst operand
3025             //
3026             if (dst != NULL)
3027             {
3028                 if (dst->getBase()->isRegAllocPartaker())
3029                 {
3030                     G4_DstRegRegion* dstrgn = dst;
3031                     G4_RegVar* var = dstrgn->getBase()->asRegVar();
3032                     uint32_t varID = var->getId();
3033                     G4_Declare* dcl = GetTopDclFromRegRegion(dstrgn);
3034                     G4_Declare* rNDcl = nullptr;
3035                     if (rNDst != nullptr)
3036                     {
3037                         rNDcl = GetTopDclFromRegRegion(rNDst);
3038                     }
3039                     MUST_BE_TRUE(dcl != nullptr, "Null declare found");
3040                     var = dcl->getRegVar();
3041 
3042                     MUST_BE_TRUE(var->getId() == varID, "RA verification error: Invalid regVar ID!");
3043                     MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid dst reg!");
3044 
3045                     uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3046                     uint32_t regOff = var->getPhyRegOff();
3047 
3048                     uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3049                         (regOff * dcl->getElemSize()) / G4_WSIZE;
3050                     for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3051                     {
3052                         liveOutRegMapIt = liveOutRegMap.find(idx);
3053                         if (liveOutRegVec[idx] == UINT_MAX)
3054                         {
3055                             if (!inst->isPseudoKill())
3056                             {
3057                                 MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3058                                 DEBUG_MSG("RA verification warning: Found unused variable " << dcl->getName() << "!\n");
3059                             }
3060                         }
3061                         else
3062                         {
3063                             MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3064                             if (liveOutRegVec[idx] != varID)
3065                             {
3066                                 const BitSet& indr_use = liveAnalysis.indr_use[bb->getId()];
3067 
3068                                 if (strstr(dcl->getName(), GlobalRA::StackCallStr) != NULL)
3069                                 {
3070                                     DEBUG_MSG("RA verification warning: Found conflicting stackCall variable: " << dcl->getName()
3071                                         << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3072                                         regNum << "." << regOff << "!\n");
3073                                 }
3074                                 else if (indr_use.isSet(liveOutRegVec[idx]) == true)
3075                                 {
3076                                     MUST_BE_TRUE(false, "RA verification warning: Found conflicting indirect variables: " << dcl->getName()
3077                                         << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3078                                         regNum << "." << regOff << "!\n");
3079                                 }
3080                                 else
3081                                 {
3082                                     if (!inst->isPseudoKill())
3083                                     {
3084                                         DEBUG_MSG("RA verification error: Found conflicting variables: " << dcl->getName()
3085                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3086                                             regNum << "." << regOff << "!\n");
3087                                     }
3088                                 }
3089                             }
3090 
3091                             if (liveAnalysis.writeWholeRegion(bb, inst, dstrgn, kernel.getOptions()) ||
3092                                 inst->isPseudoKill() || rNDcl == dcl) {
3093                                 liveOutRegVec[idx] = UINT_MAX;
3094                                 MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3095                                 liveOutRegMap.erase(liveOutRegMapIt);
3096                             }
3097                         }
3098                     }
3099                 }
3100                 else if (dst->getRegAccess() == IndirGRF)
3101                 {
3102                     G4_DstRegRegion* dstrgn = dst;
3103                     G4_Declare* addrdcl = GetTopDclFromRegRegion(dstrgn);
3104                     G4_RegVar* ptvar = NULL;
3105                     int vid = 0;
3106 
3107                     while ((ptvar = pointsToAnalysis.getPointsTo(addrdcl->getRegVar(), vid++)) != NULL)
3108                     {
3109                         uint32_t varID = ptvar->getId();
3110                         G4_Declare* dcl = ptvar->getDeclare();
3111                         MUST_BE_TRUE(dcl != nullptr, "Null declare found");
3112                         while (dcl->getAliasDeclare())
3113                         {
3114                             dcl = dcl->getAliasDeclare();
3115                         }
3116                         G4_RegVar* var = dcl->getRegVar();
3117 
3118                         MUST_BE_TRUE(var->getId() == varID, "RA verification error: Invalid regVar ID!");
3119                         MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid dst reg!");
3120 
3121                         uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3122                         uint32_t regOff = var->getPhyRegOff();
3123 
3124                         uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3125                             (regOff * dcl->getElemSize()) / G4_WSIZE;
3126                         for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3127                         {
3128                             liveOutRegMapIt = liveOutRegMap.find(idx);
3129                             if (liveOutRegVec[idx] == UINT_MAX)
3130                             {
3131                                 MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3132                                 DEBUG_MSG("RA verification warning: Found unused variable " << dcl->getName() << "!\n");
3133                             }
3134                             else
3135                             {
3136                                 MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3137                                 if (liveOutRegVec[idx] != varID)
3138                                 {
3139                                     const BitSet& indr_use = liveAnalysis.indr_use[bb->getId()];
3140 
3141                                     if (strstr(dcl->getName(), GlobalRA::StackCallStr) != NULL)
3142                                     {
3143                                         DEBUG_MSG("RA verification warning: Found conflicting stackCall variables: " << dcl->getName()
3144                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3145                                             regNum << "." << regOff << "!\n");
3146                                     }
3147                                     else if (indr_use.isSet(liveOutRegVec[idx]) == true)
3148                                     {
3149                                         MUST_BE_TRUE(false, "RA verification warning: Found conflicting indirect variables: " << dcl->getName()
3150                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3151                                             regNum << "." << regOff << "!\n");
3152                                     }
3153                                     else
3154                                     {
3155                                         MUST_BE_TRUE(false, "RA verification error: Found conflicting variables: " << dcl->getName()
3156                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3157                                             regNum << "." << regOff << "!\n");
3158                                     }
3159                                 }
3160                             }
3161                         }
3162                     }
3163                 }
3164             }
3165 
3166             if (inst->opcode() == G4_pseudo_fcall)
3167             {
3168                 if (ret != NULL && ret->getRegVar() != NULL)
3169                 {
3170                     G4_RegVar* var = ret->getRegVar();
3171                     uint32_t varID = var->getId();
3172                     MUST_BE_TRUE(var->getId() == varID, "RA verification error: Invalid regVar ID!");
3173                     MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid dst reg!");
3174 
3175                     uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3176                     uint32_t regOff = var->getPhyRegOff();
3177 
3178                     uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3179                         regOff * ret->getElemSize() / G4_WSIZE;
3180                     for (uint32_t i = 0; i < ret->getWordSize(); ++i, ++idx)
3181                     {
3182                         liveOutRegMapIt = liveOutRegMap.find(idx);
3183                         liveOutRegVec[idx] = UINT_MAX;
3184                         MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(),
3185                             "RA verification error: Invalid entry in liveOutRegMap!");
3186                         liveOutRegMap.erase(liveOutRegMapIt);
3187                     }
3188                 }
3189             }
3190 
3191             //
3192             // verify each source operand
3193             //
3194             for (unsigned j = 0; j < G4_MAX_SRCS; j++)
3195             {
3196                 G4_Operand* src = inst->getSrc(j);
3197                 if (src == NULL)
3198                 {
3199                     continue;
3200                 }
3201                 if (src->isAddrExp() && src->asAddrExp()->isRegAllocPartaker())
3202                 {
3203                     G4_RegVar* var = src->asAddrExp()->getRegVar();
3204                     uint32_t varID = UINT_MAX;
3205                     G4_Declare* dcl = NULL;
3206 
3207                     varID = var->getId();
3208                     dcl = var->getDeclare();
3209 
3210                     uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3211                     uint32_t regOff = var->getPhyRegOff();
3212 
3213                     uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3214                         (regOff * dcl->getElemSize()) / G4_WSIZE;
3215                     for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3216                     {
3217                         liveOutRegMapIt = liveOutRegMap.find(idx);
3218                         if (liveOutRegVec[idx] == UINT_MAX)
3219                         {
3220                             liveOutRegVec[idx] = varID;
3221                             MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3222                             liveOutRegMap.emplace(idx, dcl);
3223                         }
3224                         else
3225                         {
3226                             if (liveOutRegVec[idx] != varID)
3227                             {
3228                                 const BitSet& indr_use = liveAnalysis.indr_use[bb->getId()];
3229 
3230                                 if (dcl->isInput())
3231                                 {
3232                                     DEBUG_MSG("RA verification warning: Found conflicting input variables: " << dcl->getName()
3233                                         << " and " << liveOutRegMapIt->second->getName() << " assigned to r" << regNum
3234                                         << "." << regOff << "!\n");
3235                                     liveOutRegVec[idx] = varID;
3236                                     liveOutRegMapIt->second = dcl;
3237                                 }
3238                                 else if (strstr(dcl->getName(), GlobalRA::StackCallStr) != NULL)
3239                                 {
3240                                     DEBUG_MSG("RA verification warning: Found conflicting stackCall variables: " << dcl->getName()
3241                                         << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3242                                         regNum << "." << regOff << "!\n");
3243                                 }
3244                                 else if (indr_use.isSet(liveOutRegVec[idx]) == true)
3245                                 {
3246                                     MUST_BE_TRUE(false, "RA verification warning: Found conflicting indirect variables: " << dcl->getName()
3247                                         << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3248                                         regNum << "." << regOff << "!\n");
3249                                 }
3250                                 else
3251                                 {
3252                                     INST_LIST::reverse_iterator succ = rit;
3253                                     ++succ;
3254                                     bool idMismatch = false;
3255                                     G4_Declare* topdcl = GetTopDclFromRegRegion((*succ)->getDst());
3256                                     if (topdcl != nullptr &&
3257                                         liveOutRegVec[idx] != topdcl->getRegVar()->getId())
3258                                     {
3259                                         idMismatch = true;
3260                                     }
3261                                     if (succ == bb->rend() ||
3262                                         !(*succ)->isPseudoKill()  ||
3263                                         (*succ)->getDst() == NULL ||
3264                                         idMismatch)
3265                                     {
3266                                         MUST_BE_TRUE(false, "RA verification error: Found conflicting variables: " << dcl->getName()
3267                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3268                                             regNum << "." << regOff << "!\n");
3269                                     }
3270                                 }
3271                             }
3272                         }
3273                     }
3274                 }
3275                 else if (src->isSrcRegRegion() && src->asSrcRegRegion()->getBase()->isRegAllocPartaker())
3276                 {
3277                     G4_SrcRegRegion* srcrgn = src->asSrcRegRegion();
3278                     G4_RegVar* var = srcrgn->getBase()->asRegVar();
3279                     uint32_t varID = var->getId();
3280                     G4_Declare* dcl = GetTopDclFromRegRegion(srcrgn);
3281                     var = dcl->getRegVar();
3282                     MUST_BE_TRUE(var->getId() == varID, "RA verification error: Invalid regVar ID!");
3283                     MUST_BE_TRUE(var->getPhyReg()->isGreg(), "RA verification error: Invalid dst reg!");
3284 
3285                     if (!inst->isLifeTimeEnd())
3286                     {
3287                         uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3288                         uint32_t regOff = var->getPhyRegOff();
3289 
3290                         uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3291                             (regOff * dcl->getElemSize()) / G4_WSIZE;
3292                         for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3293                         {
3294                             liveOutRegMapIt = liveOutRegMap.find(idx);
3295                             if (liveOutRegVec[idx] == UINT_MAX)
3296                             {
3297                                 liveOutRegVec[idx] = varID;
3298                                 MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3299                                 liveOutRegMap.emplace(idx, dcl);
3300                             }
3301                             else
3302                             {
3303                                 if (liveOutRegVec[idx] != varID)
3304                                 {
3305                                     const BitSet& indr_use = liveAnalysis.indr_use[bb->getId()];
3306 
3307                                     if (dcl->isInput())
3308                                     {
3309                                         DEBUG_MSG("RA verification warning: Found conflicting input variables: " << dcl->getName()
3310                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" << regNum
3311                                             << "." << regOff << "!\n");
3312                                         liveOutRegVec[idx] = varID;
3313                                         liveOutRegMapIt->second = dcl;
3314                                     }
3315                                     else if (strstr(dcl->getName(), GlobalRA::StackCallStr) != NULL)
3316                                     {
3317                                         DEBUG_MSG("RA verification warning: Found conflicting stackCall variables: " << dcl->getName()
3318                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3319                                             regNum << "." << regOff << "!\n");
3320                                     }
3321                                     else if (indr_use.isSet(liveOutRegVec[idx]) == true)
3322                                     {
3323                                         MUST_BE_TRUE(false, "RA verification warning: Found conflicting indirect variables: " << dcl->getName()
3324                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3325                                             regNum << "." << regOff << "!\n");
3326                                     }
3327                                     else
3328                                     {
3329                                         INST_LIST::reverse_iterator succ = rit;
3330                                         ++succ;
3331                                         bool idMismatch = false;
3332                                         G4_Declare* topdcl = GetTopDclFromRegRegion((*succ)->getDst());
3333                                         if (topdcl != nullptr &&
3334                                             liveOutRegVec[idx] != topdcl->getRegVar()->getId())
3335                                         {
3336                                             idMismatch = true;
3337                                         }
3338                                         if (succ == bb->rbegin() ||
3339                                             !(*succ)->isPseudoKill() ||
3340                                             (*succ)->getDst() == NULL ||
3341                                             idMismatch)
3342                                         {
3343                                             DEBUG_MSG("RA verification error: Found conflicting variables: " << dcl->getName()
3344                                                 << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3345                                                 regNum << "." << regOff << "!\n");
3346                                         }
3347                                     }
3348                                 }
3349                             }
3350                         }
3351                     }
3352                     else
3353                     {
3354                         uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3355                         uint32_t regOff = var->getPhyRegOff();
3356 
3357                         uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3358                             (regOff * dcl->getElemSize()) / G4_WSIZE;
3359                         for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3360                         {
3361                             if (liveOutRegVec[idx] != UINT_MAX)
3362                             {
3363                                 liveOutRegMapIt = liveOutRegMap.find(idx);
3364                                 MUST_BE_TRUE(liveOutRegMapIt != liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3365                                 MUST_BE_TRUE(false, "RA verification error: Found live variable: " << dcl->getName()
3366                                     << " after lifetime_end " << " assigned to r" << regNum << "." << regOff << "!\n");
3367                             }
3368                         }
3369                     }
3370 
3371                     // verify EOT source
3372                     if (inst->isEOT() && kernel.fg.builder->hasEOTGRFBinding())
3373                     {
3374                         uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3375                         MUST_BE_TRUE(regNum >= 112,
3376                             "RA verification error: EOT source: " << dcl->getName()
3377                             << " is assigned to r." << regNum << "\n");
3378                     }
3379                 }
3380                 else if (src->isSrcRegRegion() && src->getRegAccess() == IndirGRF)
3381                 {
3382                     G4_SrcRegRegion* srcrgn = src->asSrcRegRegion();
3383                     G4_Declare* addrdcl = GetTopDclFromRegRegion(srcrgn);
3384                     G4_RegVar* ptvar = NULL;
3385                     int vid = 0;
3386 
3387                     while ((ptvar = pointsToAnalysis.getPointsTo(addrdcl->getRegVar(), vid++)) != NULL)
3388                     {
3389                         uint32_t varID = ptvar->getId();
3390                         G4_Declare* dcl = ptvar->getDeclare()->getRootDeclare();
3391                         G4_RegVar* var = dcl->getRegVar();
3392 
3393                         uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
3394                         uint32_t regOff = var->getPhyRegOff();
3395 
3396                         uint32_t idx = regNum * numEltPerGRF<Type_UW>() +
3397                             (regOff * dcl->getElemSize()) / G4_WSIZE;
3398                         for (uint32_t i = 0; i < dcl->getWordSize(); ++i, ++idx)
3399                         {
3400                             liveOutRegMapIt = liveOutRegMap.find(idx);
3401                             if (liveOutRegVec[idx] == UINT_MAX)
3402                             {
3403                                 liveOutRegVec[idx] = varID;
3404                                 MUST_BE_TRUE(liveOutRegMapIt == liveOutRegMap.end(), "RA verification error: Invalid entry in liveOutRegMap!");
3405                                 liveOutRegMap.emplace(idx, dcl);
3406                             }
3407                             else
3408                             {
3409                                 if (liveOutRegVec[idx] != varID)
3410                                 {
3411                                     const BitSet& indr_use = liveAnalysis.indr_use[bb->getId()];
3412 
3413                                     if (dcl->isInput())
3414                                     {
3415                                         DEBUG_MSG("RA verification warning: Found conflicting input variables: " << dcl->getName()
3416                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" << regNum
3417                                             << "." << regOff << "!\n");
3418                                         liveOutRegVec[idx] = varID;
3419                                         liveOutRegMapIt->second = dcl;
3420                                     }
3421                                     else if (indr_use.isSet(liveOutRegVec[idx]) == true)
3422                                     {
3423                                         MUST_BE_TRUE(false, "RA verification warning: Found conflicting indirect variables: " << dcl->getName()
3424                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3425                                             regNum << "." << regOff << "!\n");
3426                                     }
3427                                     else
3428                                     {
3429                                         MUST_BE_TRUE(false, "RA verification error: Found conflicting variables: " << dcl->getName()
3430                                             << " and " << liveOutRegMapIt->second->getName() << " assigned to r" <<
3431                                             regNum << "." << regOff << "!\n");
3432                                     }
3433                                 }
3434                             }
3435                         }
3436                     }
3437                 }
3438             }
3439         }
3440     }
3441 }
3442 
recordRAStats(IR_Builder & builder,G4_Kernel & kernel,int RAStatus)3443 static void recordRAStats(IR_Builder& builder,
3444                           G4_Kernel& kernel,
3445                           int RAStatus)
3446 {
3447 #if COMPILER_STATS_ENABLE
3448     CompilerStats &Stats = builder.getcompilerStats();
3449     int SimdSize = kernel.getSimdSize();
3450     if (RAStatus == VISA_SUCCESS)
3451     {
3452         Stats.SetFlag("IsRAsuccessful", SimdSize);
3453         switch (kernel.getRAType())
3454         {
3455         case RA_Type::TRIVIAL_BC_RA:
3456         case RA_Type::TRIVIAL_RA:
3457             Stats.SetFlag("IsTrivialRA", SimdSize);
3458             break;
3459         case RA_Type::LOCAL_ROUND_ROBIN_BC_RA:
3460         case RA_Type::LOCAL_ROUND_ROBIN_RA:
3461         case RA_Type::LOCAL_FIRST_FIT_BC_RA:
3462         case RA_Type::LOCAL_FIRST_FIT_RA:
3463             Stats.SetFlag("IsLocalRA", SimdSize);
3464             break;
3465         case RA_Type::HYBRID_BC_RA:
3466         case RA_Type::HYBRID_RA:
3467             Stats.SetFlag("IsHybridRA", SimdSize);
3468             break;
3469         case RA_Type::GRAPH_COLORING_RR_RA:
3470         case RA_Type::GRAPH_COLORING_FF_RA:
3471         case RA_Type::GRAPH_COLORING_RR_BC_RA:
3472         case RA_Type::GRAPH_COLORING_FF_BC_RA:
3473         case RA_Type::GRAPH_COLORING_SPILL_RR_RA:
3474         case RA_Type::GRAPH_COLORING_SPILL_FF_RA:
3475         case RA_Type::GRAPH_COLORING_SPILL_RR_BC_RA:
3476         case RA_Type::GRAPH_COLORING_SPILL_FF_BC_RA:
3477         case RA_Type::GLOBAL_LINEAR_SCAN_RA:
3478         case RA_Type::GLOBAL_LINEAR_SCAN_BC_RA:
3479             Stats.SetFlag("IsGlobalRA", SimdSize);
3480             break;
3481         case RA_Type::UNKNOWN_RA:
3482             break;
3483         default:
3484             assert(0 && "Incorrect RA type");
3485         }
3486     }
3487 #endif // COMPILER_STATS_ENABLE
3488 }
3489 
replaceSSO(G4_Kernel & kernel)3490 static void replaceSSO(G4_Kernel& kernel)
3491 {
3492     // Invoke function only for XeHP_SDV+
3493     // Replace SSO with r126.7 (scratch reg)
3494 
3495     auto dst = kernel.fg.builder->createDst(
3496         kernel.fg.getScratchRegDcl()->getRegVar(), 0, 7, 1, Type_UD);
3497     for (auto bb : kernel.fg)
3498     {
3499         for (auto instIt = bb->begin(); instIt != bb->end(); instIt++)
3500         {
3501             auto inst = (*instIt);
3502             if (inst->getDst() &&
3503                 inst->getDst()->getTopDcl() == kernel.fg.builder->getSpillSurfaceOffset())
3504             {
3505                 if (kernel.fg.getIsStackCallFunc())
3506                 {
3507                     bb->erase(instIt);
3508                 }
3509                 else
3510                     inst->setDest(dst);
3511 
3512                 // Also update scratch msg dcl to be an alias
3513                 kernel.fg.builder->getSpillSurfaceOffset()->setAliasDeclare(
3514                     kernel.fg.getScratchRegDcl(), 7 * TypeSize(Type_UD));
3515 
3516                 return;
3517             }
3518         }
3519     }
3520 }
3521 
3522 
regAlloc(IR_Builder & builder,PhyRegPool & regPool,G4_Kernel & kernel)3523 int regAlloc(IR_Builder& builder, PhyRegPool& regPool, G4_Kernel& kernel)
3524 {
3525     kernel.fg.callerSaveAreaOffset = kernel.fg.calleeSaveAreaOffset = kernel.fg.frameSizeInOWord = 0;
3526 
3527     // This must be done before Points-to analysis as it may modify CFG and add new BB!
3528     if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
3529     {
3530         kernel.fg.setABIForStackCallFunctionCalls();
3531         kernel.fg.addFrameSetupDeclares(builder, regPool);
3532         kernel.fg.normalizeFlowGraph();
3533         if (builder.getPlatform() >= XeHP_SDV)
3534             replaceSSO(kernel);
3535     }
3536 
3537     if (kernel.getOption(vISA_DoSplitOnSpill))
3538     {
3539         // loop computation is done here because we may need to add
3540         // new preheader BBs. later parts of RA assume no change
3541         // to CFG structure.
3542         kernel.fg.getLoops().computePreheaders();
3543     }
3544 
3545     kernel.fg.reassignBlockIDs();
3546     // do it once for whole RA pass. Assumption is RA should not modify CFG at all
3547     kernel.fg.setPhysicalPredSucc();
3548 
3549     if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D)
3550     {
3551         kernel.fg.findNaturalLoops();
3552 
3553 #ifdef DEBUG_VERBOSE_ON
3554         for (auto backedge : kernel.fg.backEdges)
3555         {
3556             DEBUG_VERBOSE("\n===> Found natural loop: ");
3557             for (auto block : kernel.fg.naturalLoops[backedge])
3558             {
3559                 DEBUG_VERBOSE("\tBB" << block->getId());
3560             }
3561             DEBUG_VERBOSE(std::endl);
3562         }
3563 #endif
3564     }
3565 
3566     //
3567     // Perform flow-insensitive points-to-analysis.
3568     //
3569     PointsToAnalysis pointsToAnalysis(kernel.Declares, kernel.fg.getNumBB());
3570     pointsToAnalysis.doPointsToAnalysis(kernel.fg);
3571 
3572     GlobalRA gra(kernel, regPool, pointsToAnalysis);
3573 
3574     //
3575     // insert pseudo save/restore return address so that reg alloc
3576     // can assign registers to hold the return addresses
3577     //
3578     gra.assignLocForReturnAddr();
3579 
3580     //FIXME: here is a temp WA
3581     bool hybridWithSpill = builder.getOption(vISA_HybridRAWithSpill) && !(kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc());
3582     if (kernel.fg.funcInfoTable.size() > 0 &&
3583         kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_3D && !hybridWithSpill)
3584     {
3585         kernel.getOptions()->setOption(vISAOptions::vISA_LocalRA, false);
3586     }
3587 
3588     //
3589     // Mark block local variables for the whole graph prior to performing liveness analysis.
3590     // 1. Is required for flag/address register allocation
3591     // 2. We must make sure the reference number, reference BB(which will be identified in local RA as well) happens only one time.
3592        // Otherwise, there will be correctness issue
3593     gra.markGraphBlockLocalVars();
3594 
3595     //Remove the un-referenced declares
3596     gra.removeUnreferencedDcls();
3597 
3598     if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM)
3599     {
3600         kernel.fg.markScope();
3601     }
3602 
3603     //
3604     // perform graph coloring for whole program
3605     //
3606 
3607     if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
3608     {
3609         kernel.fg.addSaveRestorePseudoDeclares(builder);
3610         gra.fixSrc0IndirFcall();
3611     }
3612 
3613     int status = gra.coloringRegAlloc();
3614 
3615     if (auto jitInfo = builder.getJitInfo())
3616     {
3617         jitInfo->numBytesScratchGtpin = kernel.getGTPinData()->getNumBytesScratchUse();
3618     }
3619 
3620     recordRAStats(builder, kernel, status);
3621     if (status != VISA_SUCCESS)
3622     {
3623         return status;
3624     }
3625 
3626     if (auto sp = kernel.getVarSplitPass())
3627     {
3628         sp->replaceIntrinsics();
3629     }
3630     if (builder.getOption(vISA_VerifyRA))
3631     {
3632         LivenessAnalysis liveAnalysis(gra,
3633             G4_GRF | G4_INPUT, true);
3634         liveAnalysis.computeLiveness();
3635         gra.verifyRA(liveAnalysis);
3636     }
3637 
3638     // printf("EU Fusion WA insts for func: %s\n", kernel.getName());
3639     for (auto inst : gra.getEUFusionWAInsts())
3640     {
3641         kernel.setMaskOffset(inst, InstOpt_M16);
3642         // inst->dump();
3643     }
3644 
3645     return status;
3646 }
3647