1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _SWSB_H_
10 #define _SWSB_H_
11 #include <string>
12 #include <set>
13 #include <bitset>
14 #include "../Mem_Manager.h"
15 #include "../FlowGraph.h"
16 #include "../G4_IR.hpp"
17 #include "../Timer.h"
18 #include "../RegAlloc.h"
19 #include <vector>
20 #include "../BitSet.h"
21 #include "LocalScheduler_G4IR.h"
22 #include <utility>
23 
24 namespace vISA
25 {
26     class SBNode;
27     struct SBBucketNode;
28     class SBDDD;
29     class G4_BB_SB;
30     class SWSB;
31 }
32 
33 //FIXME, build a table for different platforms
34 #define SWSB_MAX_ALU_DEPENDENCE_DISTANCE 11
35 #define SWSB_MAX_ALU_DEPENDENCE_DISTANCE_64BIT 15
36 #define SWSB_MAX_MATH_DEPENDENCE_DISTANCE 18
37 #define SWSB_MAX_ALU_DEPENDENCE_DISTANCE_VALUE 7u
38 
39 #define TOKEN_AFTER_READ_DPAS_CYCLE 8
40 #define SWSB_MAX_DPAS_DEPENDENCE_DISTANCE 4
41 #define FOUR_THREAD_TOKEN_NUM  32
42 
43 #define DEPENCENCE_ATTR(DO) \
44     DO(DEP_EXPLICT) \
45     DO(DEP_IMPLICIT)
46 
47 enum SBDependenceAttr
48 {
49     DEPENCENCE_ATTR(MAKE_ENUM)
50 };
51 
52 #define DPAS_BLOCK_SIZE 8
53 #define DPAS_8x8_CYCLE 28
54 #define DPAS_8x4_CYCLE 24
55 #define DPAS_8x2_CYCLE 22
56 #define DPAS_8x1_CYCLE 21
57 
58 #define DPAS_CYCLES_BEFORE_GRF_READ 8
59 #define DPAS_8x8_GRFREAD_CYCLE 8
60 #define DPAS_8x4_GRFREAD_CYCLE 4
61 #define DPAS_8x2_GRFREAD_CYCLE 2
62 #define DPAS_8x1_GRFREAD_CYCLE 1
63 
64 //1 GRF map to 1 flag: bytes per bit
65 #define FLAG_TO_GRF_MAP  (numEltPerGRF<Type_UB>() / 16)
66 
67 typedef enum  _DPAS_DEP
68 {
69     REP_1 = 1,
70     REP_2 = 2,
71     REP_4 = 4,
72     REP_8 = 8
73 } DPAS_DEP;
74 
75 #define UNKNOWN_TOKEN         -1
76 #define UNINIT_BUCKET         -1
77 
78 namespace vISA {
79     struct SBDep {
SBDepSBDep80         SBDep(SBNode *SBNode, DepType Type, SBDependenceAttr Attr) {
81             node = SBNode;
82             type = Type;
83             attr = Attr;
84             exclusiveNodes.clear();
85         }
86         SBNode* node;
87         DepType type;
88         SBDependenceAttr attr;
89         std::vector<vISA::SBNode*> exclusiveNodes;
90     };
91 
92    struct SBDISTDep {
93        SB_INST_PIPE liveNodePipe;
94        SB_INST_PIPE nodePipe;
95        SB_INST_PIPE operandType;
96        bool dstDep;
97    };
98 
99     using SWSBTokenType = G4_INST::SWSBTokenType;
100     void singleInstStallSWSB(G4_Kernel *kernel, uint32_t instID, uint32_t endInstID, bool is_barrier);
101     void forceDebugSWSB(G4_Kernel *kernel);
102     void forceFCSWSB(G4_Kernel *kernel);
103 }
104 
105 typedef vISA::SBDep SBDEP_ITEM;
106 typedef std::vector< SBDEP_ITEM> SBDEP_VECTOR;
107 typedef SBDEP_VECTOR::iterator SBDEP_VECTOR_ITER;
108 
109 typedef vISA::SBDISTDep SBDISTDEP_ITEM;
110 typedef std::vector< SBDISTDEP_ITEM> SBDISTDEP_VECTOR;
111 
112 namespace vISA
113 {
114     typedef enum  _FOOTPRINT_TYPE
115     {
116         GRF_T = 1,
117         ACC_T = 2,
118         FLAG_T = 4,
119     } FOOTPRINT_TYPE;
120 
121     struct SBFootprint {
122         const FOOTPRINT_TYPE fType;
123         const G4_Type type;
124         const unsigned short LeftB;
125         const unsigned short RightB;
126         unsigned short offset = 0;
127         G4_INST*       inst;
128         struct SBFootprint *next = nullptr; //The ranges linked together to represent the possible register ranges may be occupied by an operand.
129                                          //For indirect access, non-contiguous ranges exist.
130 
SBFootprintSBFootprint131         SBFootprint() : fType(GRF_T), type (Type_UNDEF), LeftB(0), RightB(0), inst(nullptr) { ; }
SBFootprintSBFootprint132         SBFootprint(FOOTPRINT_TYPE ft, G4_Type t, unsigned short LB, unsigned short RB, G4_INST *i)
133             : fType(ft), type(t), LeftB(LB), RightB(RB), inst(i) {
134             ;
135         }
~SBFootprintSBFootprint136         ~SBFootprint()
137         {
138         }
139 
setOffsetSBFootprint140         void setOffset(unsigned short o) { offset = o; }
141 
hasOverlapSBFootprint142         bool hasOverlap(const SBFootprint *liveFootprint, unsigned short &internalOffset) const
143         {
144             for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
145             {
146                 // Negative of no overlap: !(LeftB > curFootprint2Ptr->RightB || RightB < curFootprint2Ptr->LeftB)
147                 if (fType == curFootprint2Ptr->fType &&
148                     LeftB <= curFootprint2Ptr->RightB && RightB >= curFootprint2Ptr->LeftB)
149                 {
150                     internalOffset = curFootprint2Ptr->offset;
151                     return true;
152                 }
153             }
154 
155             //Overlap with other ranges.
156 
157             for (const SBFootprint *curFootprintPtr = next; curFootprintPtr; curFootprintPtr = curFootprintPtr->next)
158             {
159                 FOOTPRINT_TYPE curFType = curFootprintPtr->fType;
160                 for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
161                 {
162                     if (curFType == curFootprint2Ptr->fType &&
163                            curFootprintPtr->LeftB <= curFootprint2Ptr->RightB && curFootprintPtr->RightB >= curFootprint2Ptr->LeftB)
164                     {
165                         internalOffset = curFootprint2Ptr->offset;
166                         return true;
167                     }
168                 }
169             }
170 
171             return false;
172         }
173 
hasOverlapSBFootprint174         bool hasOverlap(const SBFootprint *liveFootprint, bool &isRMWOverlap, unsigned short &internalOffset) const
175         {
176             for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
177             {
178                 // Negative of no overlap: !(LeftB > curFootprint2Ptr->RightB || RightB < curFootprint2Ptr->LeftB)
179                 if (fType == curFootprint2Ptr->fType)
180                 {
181                     if (LeftB <= curFootprint2Ptr->RightB && RightB >= curFootprint2Ptr->LeftB)
182                     {
183                         internalOffset = curFootprint2Ptr->offset;
184                         if (fType == GRF_T
185                             && (type == Type_UB || type == Type_B))
186                         {
187                             isRMWOverlap = true;
188                         }
189                         return true;
190                     }
191                     else if (fType == GRF_T
192                                 && (type == Type_UB || type == Type_B))
193                     {
194                         unsigned short w_LeftB = LeftB/2;
195                         unsigned short w_RightB = RightB/2;
196                         unsigned short w_curLeftB = curFootprint2Ptr->LeftB/2;
197                         unsigned short w_curRightB = curFootprint2Ptr->RightB/2;
198                         if (w_LeftB <= w_curRightB && w_RightB >= w_curLeftB)
199                         {
200                             isRMWOverlap = true;
201                             return true;
202                         }
203                     }
204                 }
205             }
206 
207             //Overlap with other ranges.
208             for (const SBFootprint *curFootprintPtr = next; curFootprintPtr; curFootprintPtr = curFootprintPtr->next)
209             {
210                 FOOTPRINT_TYPE curFType = curFootprintPtr->fType;
211                 for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
212                 {
213                     if (curFType == curFootprint2Ptr->fType)
214                     {
215                         if (curFootprintPtr->LeftB <= curFootprint2Ptr->RightB && curFootprintPtr->RightB >= curFootprint2Ptr->LeftB)
216                         {
217                             internalOffset = curFootprint2Ptr->offset;
218                             if (curFType == GRF_T
219                                 && (type == Type_UB || type == Type_B))
220                             {
221                                 isRMWOverlap = true;
222                             }
223                             return true;
224                         }
225                         else if (curFType == GRF_T
226                                     && (type == Type_UB || type == Type_B))
227                         {
228                             unsigned short w_LeftB = curFootprintPtr->LeftB/2;
229                             unsigned short w_RightB = curFootprintPtr->RightB/2;
230                             unsigned short w_curLeftB = curFootprint2Ptr->LeftB/2;
231                             unsigned short w_curRightB = curFootprint2Ptr->RightB/2;
232                             if (w_LeftB <= w_curRightB && w_RightB >= w_curLeftB)
233                             {
234                                 isRMWOverlap = true;
235                                 return true;
236                             }
237                         }
238                     }
239                 }
240             }
241 
242             return false;
243         }
244 
hasGRFGrainOverlapSBFootprint245         bool hasGRFGrainOverlap(const SBFootprint *liveFootprint) const
246         {
247             for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
248             {
249                 if (fType == curFootprint2Ptr->fType &&
250                     ((LeftB / numEltPerGRF<Type_UB>()) <= (curFootprint2Ptr->RightB / numEltPerGRF<Type_UB>())) &&
251                     ((RightB / numEltPerGRF<Type_UB>()) >= (curFootprint2Ptr->LeftB / numEltPerGRF<Type_UB>())))
252                 {
253                     return true;
254                 }
255             }
256 
257             //Overlap with other ranges.
258             for (const SBFootprint *curFootprintPtr = next; curFootprintPtr; curFootprintPtr = curFootprintPtr->next)
259             {
260                 for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr; curFootprint2Ptr = curFootprint2Ptr->next)
261                 {
262                     if (fType == curFootprint2Ptr->fType &&
263                         ((curFootprintPtr->LeftB  / numEltPerGRF<Type_UB>()) <= (curFootprint2Ptr->RightB  / numEltPerGRF<Type_UB>())) &&
264                         ((curFootprintPtr->RightB  / numEltPerGRF<Type_UB>()) >= (curFootprint2Ptr->LeftB  / numEltPerGRF<Type_UB>())))
265                     {
266                         return true;
267                     }
268                 }
269             }
270 
271             return false;
272         }
273 
274 
275         //Check if current footprint overlaps footprint2
276         //FIXME: it's conservative. Because for the indirect, the ranges may be contiguous?
isWholeOverlapSBFootprint277         bool isWholeOverlap(const SBFootprint *liveFootprint) const
278         {
279             bool findOverlap = false;
280 
281             for (const SBFootprint *footprint2Ptr = liveFootprint; footprint2Ptr; footprint2Ptr = footprint2Ptr->next)
282             {
283                 findOverlap = false;
284 
285                 if (fType == footprint2Ptr->fType &&
286                     LeftB <= footprint2Ptr->LeftB && RightB >= footprint2Ptr->RightB)
287                 {
288                     findOverlap = true;
289                     continue;
290                 }
291                 for (const SBFootprint *curFootprintPtr = next; curFootprintPtr; curFootprintPtr = curFootprintPtr->next)
292                 {
293                     FOOTPRINT_TYPE curFType = curFootprintPtr->fType;
294                     if (curFType == footprint2Ptr->fType &&
295                             curFootprintPtr->LeftB <= footprint2Ptr->LeftB && curFootprintPtr->RightB >= footprint2Ptr->RightB)
296                     {
297                         findOverlap = true;
298                         break;
299                     }
300                 }
301 
302                 if (!findOverlap)
303                 {
304                     return false;
305                 }
306             }
307 
308             return findOverlap;
309         }
310 
311     };
312 
313     // Bit set which is used for global dependence analysis for SBID.
314     // Since dependencies may come from dst and src and there may be dependence kill between dst and src dependencies,
315     // we use internal bit set to track the live of dst and src separately.
316     // Each bit map to one global SBID node according to the node's global ID.
317     struct SBBitSets {
318         BitSet dst;
319         BitSet src;
320 
SBBitSetsSBBitSets321         SBBitSets()
322         {
323         }
324 
SBBitSetsSBBitSets325         SBBitSets(unsigned size) : dst(size, false), src(size, false)
326         {
327         }
328 
~SBBitSetsSBBitSets329         ~SBBitSets()
330         {
331         }
332 
setDstSBBitSets333         void setDst(int ID, bool value)
334         {
335             dst.set(ID, value);
336         }
setSrcSBBitSets337         void setSrc(int ID, bool value)
338         {
339             src.set(ID, value);
340         }
341 
getSizeSBBitSets342         unsigned getSize() const
343         {
344             assert(dst.getSize() == src.getSize());
345             return dst.getSize();
346         }
347 
isEmptySBBitSets348         bool isEmpty() const
349         {
350             return dst.isEmpty() && src.isEmpty();
351         }
352 
isDstEmptySBBitSets353         bool isDstEmpty() const
354         {
355             return dst.isEmpty();
356         }
357 
isSrcEmptySBBitSets358         bool isSrcEmpty() const
359         {
360             return src.isEmpty();
361         }
362 
isDstSetSBBitSets363         bool isDstSet(unsigned i) const
364         {
365             return dst.isSet(i);
366         }
367 
isSrcSetSBBitSets368         bool isSrcSet(unsigned i) const
369         {
370             return src.isSet(i);
371         }
372 
373         SBBitSets& operator= (const SBBitSets& other)
374         {
375             dst = other.dst;
376             src = other.src;
377             return *this;
378         }
379 
380         SBBitSets& operator= (SBBitSets&& other) noexcept = default;
381 
382         SBBitSets& operator|= (const SBBitSets& other)
383         {
384             dst |= other.dst;
385             src |= other.src;
386             return *this;
387         }
388 
389         SBBitSets& operator&= (const SBBitSets& other)
390         {
391             dst &= other.dst;
392             src &= other.src;
393             return *this;
394         }
395 
396         SBBitSets& operator-= (const SBBitSets& other)
397         {
398             dst -= other.dst;
399             src -= other.src;
400             return *this;
401         }
402 
403         bool operator!= (const SBBitSets& other) const
404         {
405             return (dst != other.dst) || (src != other.src);
406         }
407 
newSBBitSets408         void* operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
409     };
410 
411     class SBNode {
412     private:
413         std::vector<SBFootprint*>  footprints;  // The coarse grained footprint of operands
414         unsigned      nodeID = -1;          // Unique ID of the node
415         unsigned      BBID = 0;           // ID of basic block
416         int      ALUID = 0;          // The ID for in-order instructions. The out-of-order instructions are not counted.
417         unsigned      liveStartID = 0; // The start ID of live range
418         unsigned      liveEndID = 0;   // The end ID of live range
419         unsigned      liveStartBBID = -1; // The start BB ID of live range
420         unsigned      liveEndBBID = -1; // The start BB ID of live range
421         int      integerID = -1;
422         int      floatID = -1;
423         int      longID = -1;
424         int      mathID = -1;
425         int      DPASID = -1;
426         unsigned short DPASSize = 0;
427         bool          instKilled = false; // Used for global analysis, if the node generated dependencies have all been resolved.
428         bool          sourceKilled = false; // If the dependencies caused by source operands have all been resolved.
429         bool          hasAW = false;      // Used for global analysis, if has AW (RAW or WAW) dependencies from the node
430         bool          hasAR = false;      // Used for global analysis, if has AR (WAR) dependencies from the node
431         bool          hasFollowDistOneAReg = false;
432         bool          followDistOneAReg = false;
433         unsigned depDelay = 0;
434 
435         struct DepToken {
436             unsigned short token;
437             SWSBTokenType  type;
438             SBNode*        depNode;
439         };
440         std::vector <DepToken> depTokens;
441 
442     public:
443         std::vector<G4_INST *> instVec;
444         SBDEP_VECTOR   succs;          // A list of node's successors in dependence graph
445         SBDEP_VECTOR   preds;          // A list of node's predecessors in dependence graph
446         int            globalID = -1;  // ID of global send instructions
447         int            sendID = -1;
448         int            sendUseID = -1;
449         SBNode *       tokenReusedNode = nullptr;  // For global token reuse optimization, the node whose token is reused by current one.
450         SB_INST_PIPE   ALUPipe;
451         SBDISTDEP_VECTOR  distDep;
452         SBBitSets reachingSends;
453         SBBitSets reachedUses;
454         unsigned reuseOverhead = 0;
455 
456         /* Constructor */
SBNode()457         SBNode()
458         {
459 
460         }
461 
SBNode(uint32_t id,int ALUId,uint32_t BBId,G4_INST * i)462         SBNode(uint32_t id, int ALUId, uint32_t BBId, G4_INST *i)
463             : nodeID(id), ALUID(ALUId), BBID(BBId),
464             footprints(Opnd_total_num, nullptr)
465         {
466             ALUPipe = PIPE_NONE;
467             if (i->isDpas())
468             {
469                 G4_InstDpas* dpasInst = i->asDpasInst();
470                 DPASSize = dpasInst->getRepeatCount(); //Set the size of the current dpas inst, if there is a macro, following will be added.
471             }
472 
473             instVec.push_back(i);
474         }
475 
~SBNode()476         ~SBNode()
477         {
478             for (SBFootprint *sm : footprints)
479             {
480                 sm->~SBFootprint();
481             }
482         }
483 
setSendID(int ID)484         void setSendID(int ID)
485         {
486             sendID = ID;
487         }
488 
setSendUseID(int ID)489         void setSendUseID(int ID)
490         {
491             sendUseID = ID;
492         }
493 
getSendID()494         int getSendID() const
495         {
496             return sendID;
497         }
498 
getSendUseID()499         int getSendUseID() const
500         {
501             return sendUseID;
502         }
503 
504         /* Member functions */
GetInstruction()505         G4_INST*  GetInstruction() const { return instVec.front(); }
addInstruction(G4_INST * i)506         void addInstruction(G4_INST *i) { instVec.push_back(i); }
getLastInstruction()507         G4_INST*  getLastInstruction() const { return instVec.back(); }
setDepDelay(unsigned val)508         void setDepDelay(unsigned val) { depDelay = val; }
getDepDelay()509         unsigned getDepDelay() const { return depDelay; }
510 
getALUID()511         int getALUID() const { return ALUID; }
getNodeID()512         unsigned getNodeID() const { return nodeID; };
getBBID()513         unsigned getBBID() const { return BBID; };
getIntegerID()514         int getIntegerID() const { return integerID; }
getFloatID()515         int getFloatID() const { return floatID; }
getLongID()516         int getLongID() const { return longID; }
getMathID()517         int getMathID() const {return mathID;}
getDPASID()518         int getDPASID() const { return DPASID; }
getDPASSize()519         unsigned short getDPASSize() const { return DPASSize; }
520 
setIntegerID(int id)521         void setIntegerID(int id) { integerID = id; }
setFloatID(int id)522         void setFloatID(int id) { floatID = id; }
setLongID(int id)523         void setLongID(int id) { longID = id; }
setMathID(int id)524         void setMathID(int id) { mathID = id; }
setDPASID(int id)525         void setDPASID(int id) { DPASID = id; }
addDPASSize(unsigned short size)526         void addDPASSize(unsigned short size) { DPASSize += size; }
527 
getLiveStartID()528         unsigned getLiveStartID() const { return liveStartID; }
getLiveEndID()529         unsigned getLiveEndID() const { return liveEndID; }
530 
getLiveStartBBID()531         unsigned getLiveStartBBID() const { return liveStartBBID; }
getLiveEndBBID()532         unsigned getLiveEndBBID() const { return liveEndBBID; }
533 
setLiveEarliestID(unsigned id,unsigned startBBID)534         void setLiveEarliestID(unsigned id, unsigned startBBID)
535         {
536             if (!liveStartID)
537             {
538                 liveStartID = id;
539                 liveStartBBID = startBBID;
540             }
541             else if (liveStartID > id)
542             {
543                 liveStartID = id;
544                 liveStartBBID = startBBID;
545             }
546         }
setLiveLatestID(unsigned id,unsigned endBBID)547         void setLiveLatestID(unsigned id, unsigned endBBID)
548         {
549             if (!liveEndID)
550             {
551                 liveEndID = id;
552                 liveEndBBID = endBBID;
553             }
554             else if (liveEndID < id)
555             {
556                 liveEndID = id;
557                 liveEndBBID = endBBID;
558             }
559         }
560 
setLiveEarliestID(unsigned id)561         void setLiveEarliestID(unsigned id)
562         {
563             liveStartID = id;
564         }
565 
setLiveLatestID(unsigned id)566         void setLiveLatestID(unsigned id)
567         {
568             liveEndID = id;
569         }
570 
setInstKilled(bool value)571         void setInstKilled(bool value)
572         {
573             instKilled = value;
574             if (value)
575             {
576                 hasAW = true;
577             }
578         }
579 
isInstKilled()580         bool isInstKilled() const
581         {
582             return instKilled;
583         }
584 
setSourceKilled(bool value)585         void setSourceKilled(bool value)
586         {
587             sourceKilled = value;
588             if (value)
589             {
590                 hasAR = true;
591             }
592         }
593 
isSourceKilled()594         bool isSourceKilled() const
595         {
596             return sourceKilled;
597         }
598 
setAW()599         void setAW()
600         {
601             hasAW = true;
602         }
603 
setAR()604         void setAR()
605         {
606             hasAR = true;
607         }
608 
setDistOneAReg()609         void setDistOneAReg()
610         {
611             hasFollowDistOneAReg = true;
612         }
setFollowDistOneAReg()613         void setFollowDistOneAReg()
614         {
615             followDistOneAReg = true;
616         }
617 
hasAWDep()618         bool hasAWDep() const { return hasAW; }
hasARDep()619         bool hasARDep() const { return hasAR; }
hasDistOneAreg()620         bool hasDistOneAreg() const { return hasFollowDistOneAReg; }
followDistOneAreg()621         bool followDistOneAreg() const { return followDistOneAReg; }
622 
setFootprint(SBFootprint * footprint,Gen4_Operand_Number opndNum)623         void setFootprint(SBFootprint *footprint, Gen4_Operand_Number opndNum)
624         {
625             if (footprints[opndNum] == nullptr)
626             {
627                 footprints[opndNum] = footprint;
628             }
629             else
630             {
631                 footprint->next = footprints[opndNum];
632                 footprints[opndNum] = footprint;
633             }
634         }
635 
getFirstFootprint(Gen4_Operand_Number opndNum)636         SBFootprint* getFirstFootprint(Gen4_Operand_Number opndNum) const
637         {
638             return footprints[opndNum];
639         }
640 
setDistance(unsigned distance)641         void setDistance(unsigned distance)
642         {
643             distance = std::min(distance, SWSB_MAX_ALU_DEPENDENCE_DISTANCE_VALUE);
644             unsigned curDistance = (unsigned)instVec.front()->getDistance();
645             if (curDistance == 0 ||
646                 curDistance > distance)
647             {
648                 instVec.front()->setDistance((unsigned char)distance);
649             }
650         }
651 
setAccurateDistType(SB_INST_PIPE depPipe)652         void setAccurateDistType(SB_INST_PIPE depPipe)
653         {
654             switch (depPipe)
655             {
656             case PIPE_INT:
657                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTINT);
658                 break;
659             case PIPE_FLOAT:
660                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTFLOAT);
661                 break;
662             case PIPE_LONG:
663                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTLONG);
664                 break;
665             case PIPE_MATH:
666                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTMATH);
667                 break;
668             case PIPE_SEND:
669                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
670                 break;
671             default:
672                 assert(0 && "Wrong ALU PIPE");
673                 break;
674             }
675         }
676 
677         //This is to reduce the sync instructions.
678         //When A@1 is used to replace I@1, make sure it will not cause the stall for float or long pipelines
isClosedALUType(std::vector<unsigned> ** latestInstID,unsigned distance,SB_INST_PIPE type)679         bool isClosedALUType(std::vector<unsigned>** latestInstID, unsigned distance, SB_INST_PIPE type)
680         {
681             int instDist = SWSB_MAX_MATH_DEPENDENCE_DISTANCE;
682             SB_INST_PIPE curType = PIPE_NONE;
683 
684             for (int i = 1; i < PIPE_DPAS; i++)
685             {
686                 int dist = SWSB_MAX_MATH_DEPENDENCE_DISTANCE + 1;
687                 if (i == PIPE_INT || i == PIPE_FLOAT)
688                 {
689                     if (latestInstID[i]->size() >= distance)
690                     {
691                         dist = (int)nodeID - (*latestInstID[i])[latestInstID[i]->size() - distance] - SWSB_MAX_ALU_DEPENDENCE_DISTANCE;
692                     }
693                 }
694                 if (i == PIPE_MATH)
695                 {
696                     if (latestInstID[i]->size() >= distance)
697                     {
698                         dist = (int)nodeID - (*latestInstID[i])[latestInstID[i]->size() - distance] - SWSB_MAX_MATH_DEPENDENCE_DISTANCE;
699                     }
700                 }
701                 if (i == PIPE_LONG)
702                 {
703                     if (latestInstID[i]->size() >= distance)
704                     {
705                         dist = (int)nodeID - (*latestInstID[i])[latestInstID[i]->size() - distance] - SWSB_MAX_ALU_DEPENDENCE_DISTANCE_64BIT;
706                     }
707                 }
708 
709                 if (dist < instDist)
710                 {
711                     instDist = dist;
712                     curType = (SB_INST_PIPE)i;
713                 }
714             }
715 
716             return type == curType;
717         }
718 
finalizeDistanceType1(IR_Builder & builder,std::vector<unsigned> ** latestInstID)719         void finalizeDistanceType1(IR_Builder& builder, std::vector<unsigned>** latestInstID)
720         {
721             if (instVec.front()->getDistanceTypeXe() != G4_INST::DistanceType::DIST_NONE)
722             {  //Forced to a type already
723                 return;
724             }
725 
726             if (builder.loadThreadPayload() && nodeID <= 8 &&
727                 GetInstruction()->isSend() && distDep.size())
728             {
729                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
730                 return;
731             }
732 
733             if (builder.hasA0WARHWissue() && builder.hasFourALUPipes())
734             {
735                 G4_INST* inst = GetInstruction();
736 
737                 if (inst->getDst() && inst->getDst()->isA0())
738                 {
739                     instVec.front()->setDistance(1);
740                     instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
741 
742                     return;
743                 }
744             }
745 
746             unsigned curDistance = (unsigned)instVec.front()->getDistance();
747             if (distDep.size())
748             {
749                 SB_INST_PIPE depPipe = PIPE_NONE;
750                 SB_INST_PIPE sameOpndTypeDepPipe = PIPE_NONE;
751                 bool sameOperandType = true;
752                 int diffPipes = 0;
753                 bool depLongPipe = false;
754 
755                 //Check if there is type conversion
756                 for (int i = 0; i < (int)(distDep.size()); i++)
757                 {
758                     SBDISTDEP_ITEM& it = distDep[i];
759 
760                     if (it.operandType != it.liveNodePipe || it.dstDep)
761                     {
762                         sameOperandType = false;
763                     }
764                 }
765 
766                 //If the dependent instructions belong to different pipelines
767                 for (int i = 0; i < (int)(distDep.size()); i++)
768                 {
769                     SBDISTDEP_ITEM& it = distDep[i];
770 
771                     if (it.liveNodePipe != it.nodePipe)
772                     {
773                         diffPipes++;
774                         depPipe = it.liveNodePipe;
775                     }
776 
777                     depLongPipe = it.liveNodePipe == PIPE_LONG;
778                 }
779 
780                 //In some platforms, A@ need be used for following case when the regDist cannot be used (due to HW issue), and inst depends on different pipes
781                 //mov(8 | M0)               r63.0 < 2 > :ud   r15.0 < 1; 1, 0 > : ud{ I@7 }
782                 //...
783                 //add(8 | M0)               r95.0 < 1 > : q    r87.0 < 1; 1, 0 > : q    r10.0 < 0; 1, 0 > : q{ I@7 }
784                 //...
785                 //add(8 | M0)               r55.0 < 2 > : d    r95.0 < 1; 1, 0 > : q    r63.0 < 2; 1, 0 > : ud{ A@4 }
786                 //In some platforms I@ accurate dependence can used for following case when the regDist cannot be used (due to HW issue), because inst depends on same pipe
787                 //The example code piece :
788                 //mov(16 | M0) r88.0 < 2 > : d r84.0 < 1; 1, 0 > : d{ F@2 } // $79&103
789                 //mov(16 | M16) r90.0 < 2 > : d r85.0 < 1; 1, 0 > : d // $80&104
790                 //mov(16 | M0) r88.1 < 2 > : d r86.0 < 1; 1, 0 > : d{ F@1 } // $81&105
791                 //mov(16 | M16) r90.1 < 2 > : d r87.0 < 1; 1, 0 > : d // $82&106
792                 //mov(16 | M0) r32.0 < 1 > : df r88.0 < 1; 1, 0 > : q{ @2/I@2 } // $83&107
793                 //Since:q and : d all go to integer pipeline in, @2 should work.However, due to HW bug, I@2 is good
794                 bool mulitpleSamePipe = false;
795                 if (distDep.size() > 1 && sameOperandType)
796                 {
797                     sameOpndTypeDepPipe = distDep[0].liveNodePipe;
798                     for (int i = 1; i < (int)(distDep.size()); i++)
799                     {
800                         SBDISTDEP_ITEM& it = distDep[i];
801 
802                         if (it.liveNodePipe != sameOpndTypeDepPipe)
803                         {
804                             mulitpleSamePipe = true;
805                         }
806                     }
807                 }
808 
809                 if (builder.hasLongOperandTypeDepIssue() && depLongPipe)
810                 {
811                     sameOperandType = false;
812                 }
813 
814                 if (!builder.getOptions()->getOption(vISA_disableRegDistDep) &&
815                     !builder.hasRegDistDepIssue())
816                 {
817                     instVec.front()->setOperandTypeIndicated(sameOperandType);
818                 }
819 
820                 //Multiple dependences
821                 if (distDep.size() > 1)
822                 {
823                     if (diffPipes) //Dependence instructions belong to different ALU pipelines.
824                     {
825                         if (sameOperandType) //But the operand type is from same instruction pipeline.
826                         {
827                             assert(!GetInstruction()->isSend()); //Send operand has no type, sameOperandType is always false
828                             if (mulitpleSamePipe)
829                             {
830                                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
831                             }
832                             else
833                             {
834                                 setAccurateDistType(depPipe);
835                             }
836                             instVec.front()->setOperandTypeIndicated(false); //DPAS distance will be in sync inst
837                         }
838                         else // For send, we will set it to DISTALL if there are multiple dependences. For non-send, DIST
839                         {
840                             instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
841                             instVec.front()->setOperandTypeIndicated(false);
842                         }
843                     }
844                     else //From same pipeline
845                     {
846                         setAccurateDistType(ALUPipe);
847                         instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, ALUPipe));
848                     }
849                 }
850                 else //Only one dependency
851                 {
852                     if (depPipe != PIPE_NONE) // From different pipeline
853                     {
854                         setAccurateDistType(depPipe);
855                         if (sameOperandType || GetInstruction()->isSend())
856                         {
857                             instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, depPipe));
858                         }
859                     }
860                     else // From same pipeline
861                     {
862                         setAccurateDistType(ALUPipe);
863                         if (sameOperandType)
864                         {
865                             instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, ALUPipe));
866                         }
867                     }
868                 }
869             }
870         }
871 
finalizeDistanceType2(IR_Builder & builder,std::vector<unsigned> ** latestInstID)872         void finalizeDistanceType2(IR_Builder& builder, std::vector<unsigned> **latestInstID)
873         {
874             if (instVec.front()->getDistanceTypeXe() != G4_INST::DistanceType::DIST_NONE)
875             {  //Forced to a type already
876                 return;
877             }
878 
879             if (builder.loadThreadPayload() && nodeID <= 8 &&
880                 GetInstruction()->isSend() && distDep.size())
881             {
882                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
883                 return;
884             }
885 
886             if (builder.hasA0WARHWissue() && builder.hasFourALUPipes())
887             {
888                 G4_INST* inst = GetInstruction();
889 
890                 if (inst->getDst() && inst->getDst()->isA0())
891                 {
892                     instVec.front()->setDistance(1);
893                     instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
894 
895                     return;
896                 }
897             }
898 
899             unsigned curDistance = (unsigned)instVec.front()->getDistance();
900             if (distDep.size())
901             {
902                 SB_INST_PIPE depPipe = PIPE_NONE;
903                 SB_INST_PIPE sameOpndTypeDepPipe = PIPE_NONE;
904                 bool sameOperandType = true;
905                 bool depSamePipe = true;
906                 bool depLongPipe = false;
907 
908                 //Check the potential to use regDist
909                 for (int i = 0; i < (int)(distDep.size()); i++)
910                 {
911                     SBDISTDEP_ITEM& it = distDep[i];
912 
913                     //For regDist, the operand type is same as pipeline type of dependent instruction
914                     if (it.operandType != it.liveNodePipe || it.dstDep)
915                     {
916                         sameOperandType = false;
917                     }
918                 }
919 
920                 //If the dependent instructions belong to different pipelines
921                 for (int i = 0; i < (int)(distDep.size()); i++)
922                 {
923                     SBDISTDEP_ITEM& it = distDep[i];
924 
925                     if (it.liveNodePipe != it.nodePipe)
926                     {
927                         depSamePipe = false; //Current instruction and dependence instruction belong to different pipeline
928                     }
929 
930                     if (depPipe == PIPE_NONE)
931                     {
932                         depPipe = it.liveNodePipe;
933                     }
934 
935                     depLongPipe = it.liveNodePipe == PIPE_LONG;
936                 }
937 
938                 //In some platforms, A@ need be used for following case when the regDist cannot be used (due to HW issue), and inst depends on different pipes
939                 //mov(8 | M0)               r63.0 < 2 > :ud   r15.0 < 1; 1, 0 > : ud{ I@7 }
940                 //...
941                 //add(8 | M0)               r95.0 < 1 > : q    r87.0 < 1; 1, 0 > : q    r10.0 < 0; 1, 0 > : q{ I@7 }
942                 //...
943                 //add(8 | M0)               r55.0 < 2 > : d    r95.0 < 1; 1, 0 > : q    r63.0 < 2; 1, 0 > : ud{ A@4 }
944                 //In some platforms I@ accurate dependence can used for following case when the regDist cannot be used (due to HW issue), because inst depends on same pipe
945                 //The example code piece :
946                 //mov(16 | M0) r88.0 < 2 > : d r84.0 < 1; 1, 0 > : d{ F@2 } // $79&103
947                 //mov(16 | M16) r90.0 < 2 > : d r85.0 < 1; 1, 0 > : d // $80&104
948                 //mov(16 | M0) r88.1 < 2 > : d r86.0 < 1; 1, 0 > : d{ F@1 } // $81&105
949                 //mov(16 | M16) r90.1 < 2 > : d r87.0 < 1; 1, 0 > : d // $82&106
950                 //mov(16 | M0) r32.0 < 1 > : df r88.0 < 1; 1, 0 > : q{ @2/I@2 } // $83&107
951                 //Since:q and : d all go to integer pipeline in, @2 should work.However, due to HW bug, we can only set to I@2
952                 //Depends on multiple pipelines but same pipeline
953                 bool mulitpleSamePipe = true;
954                 if (distDep.size() > 1)
955                 {
956                     sameOpndTypeDepPipe = distDep[0].liveNodePipe;
957                     for (int i = 1; i < (int)(distDep.size()); i++)
958                     {
959                         SBDISTDEP_ITEM& it = distDep[i];
960 
961                         if (it.liveNodePipe != sameOpndTypeDepPipe)
962                         {
963                             mulitpleSamePipe = false;
964                         }
965                     }
966                 }
967 
968                 if (builder.hasLongOperandTypeDepIssue() && depLongPipe)
969                 {
970                     sameOperandType = false;
971                 }
972 
973                 if (!builder.getOptions()->getOption(vISA_disableRegDistDep) &&
974                     !builder.hasRegDistDepIssue())
975                 {
976                     instVec.front()->setOperandTypeIndicated(sameOperandType);
977                 }
978 
979                 //Multiple dependences
980                 if (distDep.size() > 1)
981                 {
982                     if (!depSamePipe) //Dependent instruction and current instruction belong to different ALU pipelines.
983                     {
984                         if (sameOperandType) //But the operand type and dependence instruction are from same instruction pipeline.
985                         {
986                             assert(!GetInstruction()->isSend()); //Send operand has no type, sameOperandType is always false
987                             if (mulitpleSamePipe)
988                             {
989                                 setAccurateDistType(depPipe);
990                             }
991                             else
992                             {
993                                 instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
994                             }
995                             instVec.front()->setOperandTypeIndicated(false); //DPAS distance will be in sync inst
996                         }
997                         else if (mulitpleSamePipe)
998                         {
999                             setAccurateDistType(depPipe);
1000                             instVec.front()->setOperandTypeIndicated(false);
1001                         }
1002                         else // set to DISTALL
1003                         {
1004                             instVec.front()->setDistanceTypeXe(G4_INST::DistanceType::DISTALL);
1005                             instVec.front()->setOperandTypeIndicated(false);
1006                         }
1007                     }
1008                     else //From same pipeline
1009                     {
1010                         setAccurateDistType(depPipe);
1011                         instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, depPipe));
1012                     }
1013                 }
1014                 else //Only one dependency
1015                 {
1016                     if (depPipe != PIPE_NONE) // From different pipeline
1017                     {
1018                         setAccurateDistType(depPipe);
1019                         if (sameOperandType || GetInstruction()->isSend())
1020                         {
1021                             instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, depPipe));
1022                         }
1023                     }
1024                     else // From same pipeline
1025                     {
1026                         setAccurateDistType(ALUPipe);
1027                         if (sameOperandType)
1028                         {
1029                             instVec.front()->setIsClosestALUType(isClosedALUType(latestInstID, curDistance, ALUPipe));
1030                         }
1031                     }
1032                 }
1033             }
1034         }
1035 
getMaxDepDistance()1036         int getMaxDepDistance() const
1037         {
1038             auto inst = GetInstruction();
1039             if (inst->getDst() && inst->getDst()->getTypeSize() == 8)
1040             {  // Note that for XeLP, there are no 8 bytes ALU instruction.
1041                 return SWSB_MAX_ALU_DEPENDENCE_DISTANCE_64BIT;
1042             }
1043             else
1044             {
1045                 return SWSB_MAX_ALU_DEPENDENCE_DISTANCE;
1046             }
1047         }
1048 
setDepToken(unsigned short token,SWSBTokenType type,SBNode * node)1049         void setDepToken(unsigned short token, SWSBTokenType type, SBNode *node)
1050         {
1051             for (DepToken& depToken : depTokens)
1052             {
1053                 if (depToken.token == token)
1054                 {
1055                     if (depToken.type == SWSBTokenType::AFTER_WRITE)
1056                     {
1057                         return;
1058                     }
1059                     if (type == SWSBTokenType::AFTER_WRITE)
1060                     {
1061                         depToken.type = type;
1062                         depToken.depNode = node;
1063                     }
1064                     return;
1065                 }
1066             }
1067 
1068             struct DepToken dt;
1069             dt.token = token;
1070             dt.type = type;
1071             dt.depNode = node;
1072             depTokens.push_back(dt);
1073         }
eraseDepToken(unsigned i)1074         void eraseDepToken(unsigned i)
1075         {
1076             depTokens.erase(depTokens.begin() + i);
1077         }
1078 
getDepTokenNum()1079         size_t getDepTokenNum() const { return depTokens.size(); }
getDepToken(unsigned int i,SWSBTokenType & type)1080         unsigned short getDepToken(unsigned int i, SWSBTokenType& type) const
1081         {
1082             type = depTokens[i].type;
1083             return depTokens[i].token;
1084         }
1085 
getDepTokenNodeID(unsigned int i)1086         unsigned short getDepTokenNodeID(unsigned int i) const
1087         {
1088             return depTokens[i].depNode->getNodeID();;
1089         }
1090 
setTokenReuseNode(SBNode * node)1091         void setTokenReuseNode(SBNode *node)
1092         {
1093             tokenReusedNode = node;
1094         }
1095 
new(size_t sz,vISA::Mem_Manager & m)1096         void *operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
1097 
dumpInterval()1098         void dumpInterval() const
1099         {
1100             std::cerr << "#" << nodeID << ": " << liveStartID << "-" << liveEndID << "\n";
1101         }
dumpAssignedTokens()1102         void dumpAssignedTokens() const
1103         {
1104             std::cerr << "#" << nodeID << ": " << liveStartID << "-" << liveEndID << "\n";
1105             std::cerr << "Token: " << getLastInstruction()->getToken();
1106             if (tokenReusedNode != nullptr)
1107             {
1108                 std::cerr << "\t\tReuse:" << tokenReusedNode->getNodeID();
1109             }
1110             std::cerr << "\n";
1111         }
1112     };
1113 }
1114 typedef std::vector<vISA::SBNode*>             SBNODE_VECT;
1115 typedef std::vector<vISA::SBNode*>::iterator SBNODE_VECT_ITER;
1116 typedef std::list<vISA::SBNode*>             SBNODE_LIST;
1117 typedef std::list<vISA::SBNode*>::iterator     SBNODE_LIST_ITER;
1118 
1119 namespace vISA
1120 {
1121     //Similar as SBBucketNode, but it's used for the bucket descriptions got from each operands.
1122     struct SBBucketDesc {
1123         const int bucket;
1124         const Gen4_Operand_Number opndNum;
1125         SBNode* const node;
1126         const SBFootprint* footprint;
1127 
SBBucketDescSBBucketDesc1128         SBBucketDesc(int Bucket, Gen4_Operand_Number opnd_num, SBNode *sNode, const SBFootprint *f)
1129             : bucket(Bucket), opndNum(opnd_num), node(sNode), footprint(f) {
1130             ;
1131         }
1132     };
1133 
1134     //The node in the node vector of each bucket.
1135     struct SBBucketNode
1136     {
1137         SBNode*             node;
1138         Gen4_Operand_Number opndNum;
1139         int                 sendID = -1;
1140         const SBFootprint*  footprint;
1141 
SBBucketNodeSBBucketNode1142         SBBucketNode(SBNode *node1, Gen4_Operand_Number opndNum1, const SBFootprint *f)
1143             : node(node1), opndNum(opndNum1), footprint(f)
1144         {
1145         }
1146 
~SBBucketNodeSBBucketNode1147         ~SBBucketNode()
1148         {
1149         }
1150 
setSendIDSBBucketNode1151         void setSendID(int ID)
1152         {
1153             sendID = ID;
1154         }
1155 
getSendIDSBBucketNode1156         int getSendID() const
1157         {
1158             return sendID;
1159         }
1160 
dumpSBBucketNode1161         void dump() const
1162         {
1163             std::cerr << "#" << node->getNodeID() << "-" << opndNum << ",";
1164         }
1165     };
1166 
1167     typedef std::vector<SBBucketNode *> SBBUCKET_VECTOR;
1168     typedef SBBUCKET_VECTOR::iterator SBBUCKET_VECTOR_ITER;
1169 
1170     // This class hides the internals of dependence tracking using buckets
1171     class LiveGRFBuckets
1172     {
1173         std::vector<SBBUCKET_VECTOR *> nodeBucketsArray;
1174         G4_Kernel &k;
1175         vISA::Mem_Manager &mem;
1176         const int numOfBuckets;
1177 
1178     public:
LiveGRFBuckets(vISA::Mem_Manager & m,int TOTAL_BUCKETS,G4_Kernel & k)1179         LiveGRFBuckets(vISA::Mem_Manager& m, int TOTAL_BUCKETS, G4_Kernel& k)
1180             : nodeBucketsArray(TOTAL_BUCKETS), k(k), mem(m), numOfBuckets(TOTAL_BUCKETS)
1181         {
1182             // Initialize a vector for each bucket
1183             for (auto& bucket : nodeBucketsArray)
1184             {
1185                 void* allocedMem = mem.alloc(sizeof(SBBUCKET_VECTOR));
1186                 bucket = new (allocedMem) SBBUCKET_VECTOR();
1187             }
1188         }
1189 
~LiveGRFBuckets()1190         ~LiveGRFBuckets()
1191         {
1192             for (int i = 0; i < numOfBuckets; i++)
1193             {
1194                 SBBUCKET_VECTOR *bucketVec = nodeBucketsArray[i];
1195                 if (bucketVec)
1196                 {
1197                     bucketVec->~SBBUCKET_VECTOR();
1198                 }
1199             }
1200         }
1201 
getNumOfBuckets()1202         int getNumOfBuckets() const
1203         {
1204             return numOfBuckets;
1205         }
1206 
1207         //The iterator which is used to scan the node vector of each bucket
1208         class BN_iterator
1209         {
1210         public:
1211             const LiveGRFBuckets *LB;
1212             SBBUCKET_VECTOR_ITER node_it;
1213             int bucket;
1214 
BN_iterator(const LiveGRFBuckets * LB1,SBBUCKET_VECTOR_ITER It,int Bucket)1215             BN_iterator(const LiveGRFBuckets *LB1, SBBUCKET_VECTOR_ITER It, int Bucket)
1216                 : LB(LB1), node_it(It), bucket(Bucket)
1217             {
1218             }
1219 
1220             BN_iterator &operator++()
1221             {
1222                 ++node_it;
1223                 return *this;
1224             }
1225 
1226             bool operator!=(const BN_iterator &it2) const
1227             {
1228                 assert(LB == it2.LB);
1229                 return (!(bucket == it2.bucket && node_it == it2.node_it));
1230             }
1231 
1232             SBBucketNode *operator*()
1233             {
1234                 assert(node_it != LB->nodeBucketsArray[bucket]->end());
1235                 return *node_it;
1236             }
1237         };
1238 
begin(int bucket)1239         BN_iterator begin(int bucket) const
1240         {
1241             return BN_iterator(this, nodeBucketsArray[bucket]->begin(), bucket);
1242         }
1243 
end(int bucket)1244         BN_iterator end(int bucket) const
1245         {
1246             return BN_iterator(this, nodeBucketsArray[bucket]->end(), bucket);
1247         }
1248 
1249         //Scan the node vector of the bucket, and kill the bucket node with the specified node and operand
bucketKill(int bucket,SBNode * node,Gen4_Operand_Number opnd)1250         void bucketKill(int bucket, SBNode *node, Gen4_Operand_Number opnd)
1251         {
1252             SBBUCKET_VECTOR &vec = *nodeBucketsArray[bucket];
1253             for (unsigned int i = 0; i < vec.size(); i++)
1254             {
1255                 SBBucketNode *bNode = vec[i];
1256 
1257                 //Same node and same operand
1258                 if (bNode->node == node &&
1259                     bNode->opndNum == opnd)
1260                 {
1261                     vec.erase(vec.begin() + i);
1262                     break;
1263                 }
1264             }
1265         }
1266         //Kill the bucket node specified by bn_it
killSingleOperand(BN_iterator & bn_it)1267         void killSingleOperand(BN_iterator &bn_it)
1268         {
1269             SBBUCKET_VECTOR &vec = *nodeBucketsArray[bn_it.bucket];
1270             SBBUCKET_VECTOR_ITER &node_it = bn_it.node_it;
1271 
1272             //Kill current node
1273             if (*node_it == vec.back())
1274             {
1275                 vec.pop_back();
1276                 node_it = vec.end();
1277             }
1278             else
1279             {
1280                 //Current node is assigned with the last one
1281                 //For caller, same iterator position need be handled again,
1282                 //Because a new node is copied here
1283                 *node_it = vec.back();
1284                 vec.pop_back();
1285             }
1286         }
1287 
1288         //Kill the bucket node specified by bn_it, also kill the same node in other buckets
killOperand(BN_iterator & bn_it)1289         void killOperand(BN_iterator &bn_it)
1290         {
1291             SBBUCKET_VECTOR &vec = *nodeBucketsArray[bn_it.bucket];
1292             SBBUCKET_VECTOR_ITER &node_it = bn_it.node_it;
1293             SBBucketNode *bucketNode = *node_it; //Get the node before it is destroyed
1294             int aregOffset = k.getNumRegTotal();
1295 
1296             //Kill current node
1297             if (*node_it == vec.back())
1298             {
1299                 vec.pop_back();
1300                 node_it = vec.end();
1301             }
1302             else
1303             {
1304                 //Current node is assigned with the last one
1305                 //For caller, same iterator position need be handled again,
1306                 //Because a new node is copied here
1307                 *node_it = vec.back();
1308                 vec.pop_back();
1309             }
1310 
1311             //Kill the same node in other bucket.
1312             for (const SBFootprint *footprint = bucketNode->node->getFirstFootprint(bucketNode->opndNum); footprint; footprint = footprint->next)
1313             {
1314                 unsigned int startBucket = footprint->LeftB / numEltPerGRF<Type_UB>();
1315                 unsigned int endBucket = footprint->RightB / numEltPerGRF<Type_UB>();
1316                 if (footprint->fType == ACC_T)
1317                 {
1318                     startBucket = startBucket + aregOffset;
1319                     endBucket = endBucket + aregOffset;
1320                 }
1321 
1322                 if (footprint->inst == bucketNode->footprint->inst)
1323                 {
1324                     for (unsigned int i = startBucket; (i < endBucket + 1) && (i < nodeBucketsArray.size()); i++)
1325                     {
1326                         if (i == bn_it.bucket)
1327                         {
1328                             continue;
1329                         }
1330                         bucketKill(i, bucketNode->node, bucketNode->opndNum);
1331                     }
1332                 }
1333             }
1334         }
1335 
1336         //Add a node into bucket
add(SBBucketNode * bucketNode,int bucket)1337         void add(SBBucketNode *bucketNode, int bucket)
1338         {
1339             assert(nodeBucketsArray[bucket] != nullptr);
1340             SBBUCKET_VECTOR& nodeVec = *(nodeBucketsArray[bucket]);
1341             if (std::find(nodeVec.begin(), nodeVec.end(), bucketNode) == nodeVec.end())
1342             {
1343                 nodeVec.push_back(bucketNode);
1344             }
1345         }
1346 
new(size_t sz,vISA::Mem_Manager & m)1347         void *operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
1348 
dumpLives()1349         void dumpLives() const
1350         {
1351             for (int curBucket = 0; curBucket < numOfBuckets; curBucket++)
1352             {
1353                 if (nodeBucketsArray[curBucket]->size())
1354                 {
1355                     std::cerr << " GRF" << curBucket << ":";
1356                     for (SBBucketNode *liveBN : *nodeBucketsArray[curBucket])
1357                     {
1358                         std::cerr << " " << liveBN->node->getNodeID() << "(" << liveBN->opndNum << ")";
1359                     }
1360                     std::cerr << "\t";
1361                     if ((curBucket + 1) % 8 == 0)
1362                     {
1363                         std::cerr << "\n";
1364                     }
1365                 }
1366             }
1367             std::cerr << "\n";
1368         }
1369     };
1370 
1371     typedef std::list<G4_BB_SB *> BB_SWSB_LIST;
1372     typedef BB_SWSB_LIST::iterator BB_SWSB_LIST_ITER;
1373 
1374     typedef struct _SWSB_INDEXES {
1375         int instIndex = 0;
1376         int ALUIndex = 0;
1377         int integerIndex = 0;
1378         int floatIndex = 0;
1379         int longIndex = 0;
1380         int DPASIndex = 0;
1381         int mathIndex = 0;
1382         unsigned latestDepALUID[PIPE_DPAS] = {0};
1383         std::vector<unsigned> latestInstID[PIPE_DPAS];
1384     } SWSB_INDEXES;
1385 
1386     typedef struct _SWSB_LOOP {
1387         int entryBBID = -1 ;
1388         int endBBID = -1;
1389     } SWSB_LOOP;
1390 
1391     class G4_BB_SB {
1392     private:
1393         const SWSB& swsb;
1394         IR_Builder& builder;
1395         vISA::Mem_Manager &mem;
1396         G4_BB             *bb;
1397         G4_Label*         BBLabel;
1398         int nodeID;
1399         int ALUID;
1400 
1401         SBNode*           dpasNode = nullptr;
1402         SBNode*           lastDpasNode = nullptr;
1403         int integerID;
1404         int floatID;
1405         int longID;
1406         int mathID;
1407         int DPASID;
1408         unsigned latestDepALUID[PIPE_DPAS];
1409         std::vector<unsigned> *latestInstID[PIPE_DPAS];
1410 
1411         int totalGRFNum;
1412         int tokenAfterDPASCycle;
1413 
1414     public:
1415         LiveGRFBuckets *send_use_kills;
1416         BB_SWSB_LIST      Preds;
1417         BB_SWSB_LIST      Succs;
1418 
1419         BB_SWSB_LIST      domPreds;
1420         BB_SWSB_LIST      domSuccs;
1421 
1422         int first_node = -1;
1423         int last_node = -1;
1424 
1425         int first_send_node;
1426         int last_send_node;
1427 
1428         bool tokenAssigned = false;
1429 
1430         int send_start = -1;
1431         int send_end = -1;
1432 
1433         unsigned      loopStartBBID = -1;    // The start BB ID of live range
1434         unsigned      loopEndBBID = -1;    // The start BB ID of live range
1435 
1436         SBBitSets send_def_out;
1437         SBBitSets send_live_in;
1438         SBBitSets send_live_out;
1439         SBBitSets send_may_kill;
1440         SBBitSets send_live_in_scalar;
1441         SBBitSets send_live_out_scalar;
1442         SBBitSets send_kill_scalar;
1443 
1444         BitSet dominators;
1445         BitSet send_WAW_may_kill;
1446 
1447         //For token reduction
1448         BitSet   liveInTokenNodes;
1449         BitSet   liveOutTokenNodes;
1450         BitSet   killedTokens;
1451         std::vector<BitSet> tokeNodesMap;
1452         int first_DPASID = 0;
1453         int last_DPASID = 0;
1454         unsigned    *tokenLiveInDist;
1455         unsigned    *tokenLiveOutDist;
1456         SBBitSets localReachingSends;
1457 
1458         //BB local data dependence analysis
G4_BB_SB(const SWSB & sb,IR_Builder & b,Mem_Manager & m,G4_BB * block,SBNODE_VECT * SBNodes,SBNODE_VECT * SBSendNodes,SBBUCKET_VECTOR * globalSendOpndList,SWSB_INDEXES * indexes,uint32_t & globalSendNum,LiveGRFBuckets * lb,LiveGRFBuckets * globalLB,PointsToAnalysis & p,std::map<G4_Label *,G4_BB_SB * > * LabelToBlockMap,const unsigned dpasLatency)1459         G4_BB_SB(const SWSB& sb, IR_Builder& b, Mem_Manager &m, G4_BB *block, SBNODE_VECT *SBNodes, SBNODE_VECT* SBSendNodes,
1460             SBBUCKET_VECTOR *globalSendOpndList,  SWSB_INDEXES *indexes, uint32_t &globalSendNum, LiveGRFBuckets *lb,
1461             LiveGRFBuckets *globalLB, PointsToAnalysis& p,
1462             std::map<G4_Label*, G4_BB_SB*> *LabelToBlockMap, const unsigned dpasLatency) : swsb(sb), builder(b), mem(m), bb(block), tokenAfterDPASCycle(dpasLatency)
1463         {
1464             for (int i = 0; i < PIPE_DPAS; i++)
1465             {
1466                 latestDepALUID[i] = 0;
1467                 latestInstID[i] = nullptr;
1468             }
1469             first_send_node = -1;
1470             last_send_node = -1;
1471             totalGRFNum = block->getKernel().getNumRegTotal();
1472             SBDDD(bb, lb, globalLB, SBNodes, SBSendNodes, globalSendOpndList,  indexes, globalSendNum, p, LabelToBlockMap);
1473         }
1474 
~G4_BB_SB()1475         ~G4_BB_SB()
1476         {
1477         }
1478 
getBB()1479         G4_BB* getBB() const { return bb; }
getLabel()1480         G4_Label* getLabel() const { return BBLabel; }
1481 
1482         static bool isGRFEdgeAdded(const SBNode* pred, const SBNode* succ, DepType d, SBDependenceAttr a);
1483         void createAddGRFEdge(SBNode* pred, SBNode* succ, DepType d, SBDependenceAttr a);
1484 
1485         //Bucket and range analysis
1486         SBFootprint* getFootprintForGRF(G4_Operand * opnd,
1487             Gen4_Operand_Number opnd_num,
1488             G4_INST *inst,
1489             int startingBucket,
1490             bool mustBeWholeGRF);
1491         SBFootprint * getFootprintForACC(G4_Operand * opnd,
1492             Gen4_Operand_Number opnd_num,
1493             G4_INST *inst);
1494         SBFootprint* getFootprintForFlag(G4_Operand* opnd,
1495             Gen4_Operand_Number opnd_num,
1496             G4_INST* inst);
1497         bool isLongPipeType(G4_Type type) const;
1498         bool isIntegerPipeType(G4_Type type) const;
1499         bool isLongPipeInstructionXe(const G4_INST* inst) const;
1500         bool isIntegerPipeInstructionXe(const G4_INST* inst) const;
1501         bool isFloatPipeInstructionXe(const G4_INST* inst) const;
1502         SB_INST_PIPE getInstructionPipeXe(const G4_INST* inst);
1503         bool getFootprintForOperand(SBNode *node,
1504             G4_INST *inst,
1505             G4_Operand* opnd,
1506             Gen4_Operand_Number opnd_num);
1507         void getGRFBuckets(SBNode* node, const SBFootprint* footprint, Gen4_Operand_Number opndNum, std::vector<SBBucketDesc>& BDvec, bool GRFOnly);
1508         bool getGRFFootPrintOperands(SBNode *node,
1509             G4_INST *inst,
1510             Gen4_Operand_Number first_opnd,
1511             Gen4_Operand_Number last_opnd,
1512             PointsToAnalysis& p);
1513         void getGRFFootprintForIndirect(SBNode* node,
1514             Gen4_Operand_Number opnd_num,
1515             G4_Operand* opnd,
1516             PointsToAnalysis& p);
1517         void getGRFBucketsForOperands(SBNode *node,
1518             Gen4_Operand_Number first_opnd,
1519             Gen4_Operand_Number last_opnd,
1520             std::vector<SBBucketDesc>& BDvec,
1521             bool GRFOnly);
1522 
1523         bool getGRFFootPrint(SBNode *node,
1524             PointsToAnalysis &p);
1525 
1526         void getGRFBucketDescs(SBNode *node,
1527             std::vector<SBBucketDesc>& BDvec,
1528             bool GRFOnly);
1529 
1530         void clearSLMWARWAissue(SBNode* curNode, LiveGRFBuckets* LB);
1531 
1532         void setDistance(const SBFootprint * footprint, SBNode *node, SBNode *liveNode, bool dstDep);
1533         void setSpecialDistance(SBNode* node);
1534         static void footprintMerge(SBNode * node, const SBNode * nextNode);
1535 
1536         void pushItemToQueue(std::vector<unsigned>* nodeIDQueue, unsigned nodeID);
1537 
1538         bool hasInternalDependence(SBNode* nodeFirst, SBNode* nodeNext);
1539 
1540         bool is2xDPBlockCandidate(G4_INST* inst, bool accDST);
1541         //Local distance dependence analysis and assignment
1542         void SBDDD(G4_BB* bb,
1543             LiveGRFBuckets* &LB,
1544             LiveGRFBuckets* &globalSendsLB,
1545             SBNODE_VECT *SBNodes,
1546             SBNODE_VECT *SBSendNodes,
1547             SBBUCKET_VECTOR *globalSendOpndList,
1548             SWSB_INDEXES *indexes,
1549             uint32_t &globalSendNum,
1550             PointsToAnalysis& p,
1551             std::map<G4_Label*, G4_BB_SB*> *LabelToBlockMap);
1552 
1553         //Global SBID dependence analysis
1554         void setSendOpndMayKilled(LiveGRFBuckets *globalSendsLB, SBNODE_VECT *SBNodes, PointsToAnalysis &p);
1555         void dumpTokenLiveInfo(SBNODE_VECT * SBSendNodes);
1556         void getLiveBucketsFromFootprint(const SBFootprint *firstFootprint, SBBucketNode* sBucketNode, LiveGRFBuckets *send_use_kills) const;
1557         void addGlobalDependence(unsigned globalSendNum, SBBUCKET_VECTOR *globalSendOpndList, SBNODE_VECT *SBNodes, PointsToAnalysis &p, bool afterWrite);
1558         void clearKilledBucketNodeXeLP(LiveGRFBuckets * LB, int ALUID);
1559 
1560         void clearKilledBucketNodeXeHP(LiveGRFBuckets* LB, int integerID, int floatID, int longID, int mathID);
1561 
1562         bool hasInternalDependenceWithinDPAS(SBNode *node);
1563         bool hasDependenceBetweenDPASNodes(SBNode * node, SBNode * nextNode);
1564         bool src2FootPrintCachePVC(SBNode* curNode, SBNode* nextNode) const;
1565         bool src2SameFootPrintDiffType(SBNode* curNode, SBNode* nextNode) const;
1566         bool isLastDpas(SBNode * curNode, SBNode * nextNode);
1567 
1568 
1569         void getLiveOutToken(unsigned allSendNum, const SBNODE_VECT *SBNodes);
1570 
1571 
getLoopStartBBID()1572         unsigned getLoopStartBBID() const { return loopStartBBID; }
getLoopEndBBID()1573         unsigned getLoopEndBBID() const { return loopEndBBID; }
1574 
setLoopStartBBID(unsigned id)1575         void setLoopStartBBID(unsigned id) { loopStartBBID = id; }
setLoopEndBBID(unsigned id)1576         void setLoopEndBBID(unsigned id) { loopEndBBID = id; }
1577 
new(size_t sz,vISA::Mem_Manager & m)1578         void *operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
1579 
1580         void dumpLiveInfo(const SBBUCKET_VECTOR *globalSendOpndList, unsigned globalSendNum, const SBBitSets *send_kill) const;
1581     };
1582 
1583     typedef std::vector<G4_BB_SB *> BB_SWSB_VECTOR;
1584 
1585     class SWSB_TOKEN_PROFILE {
1586         uint32_t tokenInstructionCount = 0;
1587         uint32_t tokenReuseCount = 0;
1588         uint32_t AWTokenReuseCount = 0;
1589         uint32_t ARTokenReuseCount = 0;
1590         uint32_t AATokenReuseCount = 0;
1591         uint32_t mathInstCount = 0;
1592         uint32_t syncInstCount = 0;
1593         uint32_t mathReuseCount = 0;
1594         uint32_t ARSyncInstCount = 0;
1595         uint32_t AWSyncInstCount = 0;
1596         uint32_t ARSyncAllCount = 0;
1597         uint32_t AWSyncAllCount = 0;
1598         uint32_t prunedDepEdges = 0;
1599         uint32_t prunedGlobalEdgeNum = 0;
1600         uint32_t prunedDiffBBEdgeNum = 0;
1601         uint32_t prunedDiffBBSameTokenEdgeNum = 0;
1602     public:
SWSB_TOKEN_PROFILE()1603         SWSB_TOKEN_PROFILE() {
1604             ;
1605         }
1606 
~SWSB_TOKEN_PROFILE()1607         ~SWSB_TOKEN_PROFILE()
1608         {
1609         }
new(size_t sz,vISA::Mem_Manager & m)1610         void* operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); }
1611 
setTokenInstructionCount(int count)1612         void setTokenInstructionCount(int count) { tokenInstructionCount = count; }
getTokenInstructionCount()1613         uint32_t getTokenInstructionCount() const { return tokenInstructionCount; }
1614 
setTokenReuseCount(int count)1615         void setTokenReuseCount(int count) { tokenReuseCount = count; }
getTokenReuseCount()1616         uint32_t getTokenReuseCount() const { return tokenReuseCount; }
1617 
setAWTokenReuseCount(int count)1618         void setAWTokenReuseCount(int count) { AWTokenReuseCount = count; }
getAWTokenReuseCount()1619         uint32_t getAWTokenReuseCount() const { return AWTokenReuseCount; }
1620 
setARTokenReuseCount(int count)1621         void setARTokenReuseCount(int count) { ARTokenReuseCount = count; }
getARTokenReuseCount()1622         uint32_t getARTokenReuseCount() const { return ARTokenReuseCount; }
1623 
setAATokenReuseCount(int count)1624         void setAATokenReuseCount(int count) { AATokenReuseCount = count; }
getAATokenReuseCount()1625         uint32_t getAATokenReuseCount() const { return AATokenReuseCount; }
1626 
setMathInstCount(int count)1627         void setMathInstCount(int count) { mathInstCount = count; }
getMathInstCount()1628         uint32_t getMathInstCount() const { return mathInstCount; }
1629 
setSyncInstCount(int count)1630         void setSyncInstCount(int count) { syncInstCount = count; }
getSyncInstCount()1631         uint32_t getSyncInstCount() const { return syncInstCount; }
1632 
setMathReuseCount(int count)1633         void setMathReuseCount(int count) { mathReuseCount = count; }
getMathReuseCount()1634         uint32_t getMathReuseCount() const { return mathReuseCount; }
1635 
setARSyncInstCount(int count)1636         void setARSyncInstCount(int count) { ARSyncInstCount = count; }
getARSyncInstCount()1637         uint32_t getARSyncInstCount() const { return ARSyncInstCount; }
1638 
setAWSyncInstCount(int count)1639         void setAWSyncInstCount(int count) { AWSyncInstCount = count; }
getAWSyncInstCount()1640         uint32_t getAWSyncInstCount() const { return AWSyncInstCount; }
1641 
setARSyncAllCount(int count)1642         void setARSyncAllCount(int count) { ARSyncAllCount = count; }
getARSyncAllCount()1643         uint32_t getARSyncAllCount() const { return ARSyncAllCount; }
1644 
setAWSyncAllCount(int count)1645         void setAWSyncAllCount(int count) { AWSyncAllCount = count; }
getAWSyncAllCount()1646         uint32_t getAWSyncAllCount() const { return AWSyncAllCount; }
1647 
setPrunedEdgeNum(int num)1648         void setPrunedEdgeNum(int num) { prunedDepEdges = num; }
getPrunedEdgeNum()1649         uint32_t getPrunedEdgeNum() const { return prunedDepEdges; }
1650 
setPrunedGlobalEdgeNum(int num)1651         void setPrunedGlobalEdgeNum(int num) { prunedGlobalEdgeNum = num; }
getPrunedGlobalEdgeNum()1652         uint32_t getPrunedGlobalEdgeNum() const { return prunedGlobalEdgeNum; }
1653 
setPrunedDiffBBEdgeNum(int num)1654         void setPrunedDiffBBEdgeNum(int num) { prunedDiffBBEdgeNum = num; }
getPrunedDiffBBEdgeNum()1655         uint32_t getPrunedDiffBBEdgeNum() const { return prunedDiffBBEdgeNum; }
1656 
setPrunedDiffBBSameTokenEdgeNum(int num)1657         void setPrunedDiffBBSameTokenEdgeNum(int num) { prunedDiffBBSameTokenEdgeNum = num; }
getPrunedDiffBBSameTokenEdgeNum()1658         uint32_t getPrunedDiffBBSameTokenEdgeNum() const { return prunedDiffBBSameTokenEdgeNum; }
1659     };
1660 
1661     class SWSB {
1662         G4_Kernel &kernel;
1663         FlowGraph&  fg;
1664         vISA::Mem_Manager& mem;
1665 
1666         BB_SWSB_VECTOR BBVector;    // The basic block vector, ordered with ID of the BB
1667         SBNODE_VECT SBNodes;        // All instruction nodes
1668         SBNODE_VECT SBSendNodes;    // All out-of-order instruction nodes
1669         SBNODE_VECT SBSendUses;    // All out-of-order instruction nodes
1670 #ifdef DEBUG_VERBOSE_ON
1671         SBNODE_VECT globalSBNodes;        // All instruction nodes
1672 #endif
1673         SWSB_INDEXES indexes;         // To pass ALU ID  from previous BB to current.
1674         uint32_t  globalSendNum = 0;  // The number of out-of-order instructions which generate global dependencies.
1675         SBBUCKET_VECTOR globalSendOpndList;  //All send operands which live out their instructions' BBs. No redundant.
1676         const uint32_t totalTokenNum;
1677         static constexpr unsigned TOKEN_AFTER_READ_CYCLE = 4;
1678         const unsigned tokenAfterWriteMathCycle;
1679         const unsigned tokenAfterWriteSendSlmCycle;
1680         const unsigned tokenAfterWriteSendMemoryCycle;
1681         const unsigned tokenAfterWriteSendSamplerCycle;
1682         int tokenAfterDPASCycle;
1683 
1684         //For profiling
1685         uint32_t syncInstCount = 0;
1686         uint32_t AWSyncInstCount = 0;
1687         uint32_t ARSyncInstCount = 0;
1688         uint32_t mathReuseCount = 0;
1689         uint32_t ARSyncAllCount = 0;
1690         uint32_t AWSyncAllCount = 0;
1691         uint32_t tokenReuseCount = 0;
1692 
1693         bool hasFCall = false;
1694         //Linear scan data structures for token allocation
1695         SBNODE_LIST linearScanLiveNodes;
1696 
1697         std::vector<SBNode *> freeTokenList;
1698 
1699         std::vector<SBNODE_VECT *> reachTokenArray;
1700         std::vector<SBNODE_VECT *> reachUseArray;
1701         SBNODE_VECT localTokenUsage;
1702 
1703         int topIndex = -1;
1704 
1705         std::map<G4_Label*, G4_BB_SB*> labelToBlockMap;
1706 
1707         // TokenAllocation uses a BitSet to track nodes assigned by marking the
1708         // send IDs of nodes, so that it's possible to get a SBNode using the
1709         // send ID to index into SBSendNodes.
1710         // Also add a filed to record the max send ID being assigned.
1711         struct TokenAllocation
1712         {
1713             BitSet bitset;
1714             int maxSendID = 0;
setTokenAllocation1715             void set(int sendID)
1716             {
1717                 bitset.set(sendID, true);
1718                 maxSendID = std::max(sendID, maxSendID);
1719             }
1720         };
1721         std::vector<TokenAllocation> allTokenNodesMap;
1722         SWSB_TOKEN_PROFILE tokenProfile;
1723 
1724         //Global dependence analysis
1725         bool globalDependenceDefReachAnalysis(G4_BB* bb);
1726         bool globalDependenceUseReachAnalysis(G4_BB* bb);
1727         void addGlobalDependence(unsigned globalSendNum, SBBUCKET_VECTOR *globalSendOpndList, SBNODE_VECT *SBNodes, PointsToAnalysis &p, bool afterWrite);
1728         void tokenEdgePrune(unsigned& prunedEdgeNum, unsigned& prunedGlobalEdgeNum, unsigned& prunedDiffBBEdgeNum, unsigned& prunedDiffBBSameTokenEdgeNum);
1729         void dumpTokenLiveInfo();
1730 
1731         void removePredsEdges(SBNode * node, SBNode * pred);
1732 
1733         void dumpImmDom(ImmDominator* dom) const;
1734 
1735         void setDefaultDistanceAtFirstInstruction();
1736 
1737         void quickTokenAllocation();
1738 
1739         //Token allocation
1740         void tokenAllocation();
1741         void buildLiveIntervals();
1742         void expireIntervals(unsigned startID);
1743         void addToLiveList(SBNode *node);
1744 
1745         std::pair<int /*reuseDelay*/, int /*curDistance*/> examineNodeForTokenReuse(unsigned nodeID, unsigned nodeDelay, const SBNode *curNode, unsigned char nestLoopLevel, unsigned curLoopStartBB, unsigned curLoopEndBB) const;
1746         SBNode * reuseTokenSelection(const SBNode * node) const;
1747         unsigned short reuseTokenSelectionGlobal(SBNode* node, G4_BB* bb, SBNode*& candidateNode, bool& fromUse);
1748         void addReachingDefineSet(SBNode* node, SBBitSets* globalLiveSet, SBBitSets* localLiveSet);
1749         void addReachingUseSet(SBNode* node, SBNode* use);
1750         void addGlobalDependenceWithReachingDef(unsigned globalSendNum, SBBUCKET_VECTOR* globalSendOpndList, SBNODE_VECT* SBNodes, PointsToAnalysis& p, bool afterWrite);
1751         void expireLocalIntervals(unsigned startID, unsigned BBID);
1752         void assignTokenToPred(SBNode* node, SBNode* pred, G4_BB* bb);
1753         bool assignTokenWithPred(SBNode* node, G4_BB* bb);
1754         void allocateToken(G4_BB* bb);
1755         void tokenAllocationBB(G4_BB* bb);
1756         void buildExclusiveForCoalescing();
1757         void tokenAllocationGlobalWithPropogation();
1758         void tokenAllocationGlobal();
1759         bool propogateDist(G4_BB* bb);
1760         void tokenAllocationWithDistPropogationPerBB(G4_BB* bb);
1761         void tokenAllocationWithDistPropogation();
1762         void calculateDist();
1763 
1764 
1765         //Assign Token
1766         void assignToken(SBNode *node, unsigned short token, uint32_t &AWTokenReuseCount, uint32_t &ARTokenReuseCount, uint32_t &AATokenReuseCount);
1767         void assignDepToken(SBNode *node);
1768         void assignDepTokens();
1769         bool insertSyncTokenPVC(G4_BB * bb, SBNode * node, G4_INST * inst, INST_LIST_ITER inst_it, int newInstID, BitSet * dstTokens, BitSet * srcTokens, bool removeAllToken);
1770         bool insertSyncPVC(G4_BB * bb, SBNode * node, G4_INST * inst, INST_LIST_ITER inst_it, int newInstID, BitSet * dstTokens, BitSet * srcTokens);
1771         bool insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER inst_it, int newInstID, BitSet* dstTokens, BitSet* srcTokens);
1772         void insertSync(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER inst_it, int newInstID, BitSet* dstTokens, BitSet* srcTokens);
1773         void insertTest();
1774 
1775         //Insert sync instructions
1776         G4_INST *insertTestInstruction(G4_BB *bb, INST_LIST_ITER nextIter, int CISAOff, int lineNo, bool countSyns);  //FIXME: Please remove it when meta is not needed anymore
1777         G4_INST *insertSyncInstruction(G4_BB *bb, INST_LIST_ITER nextIter, int CISAOff, int lineNo);
1778         G4_INST *insertSyncInstructionAfter(G4_BB * bb, INST_LIST_ITER nextIter, int CISAOff, int lineNo);
1779         G4_INST* insertSyncAllRDInstruction(G4_BB *bb, unsigned int SBIDs, INST_LIST_ITER nextIter, int CISAOff, int lineNo);
1780         G4_INST *insertSyncAllWRInstruction(G4_BB *bb, unsigned int SBIDs, INST_LIST_ITER nextIter, int CISAOff, int lineNo);
1781 
1782         bool insertSyncToken(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER inst_it, int newInstID, BitSet* dstTokens, BitSet* srcTokens, bool& keepDst, bool removeAllToken);
1783 
1784         void SWSBDepDistanceGenerator(PointsToAnalysis& p, LiveGRFBuckets &LB, LiveGRFBuckets &globalSendsLB);
1785         void handleFuncCall();
1786         void SWSBGlobalTokenGenerator(PointsToAnalysis& p, LiveGRFBuckets &LB, LiveGRFBuckets &globalSendsLB);
1787         void SWSBBuildSIMDCFG();
1788         void addSIMDEdge(G4_BB_SB *pred, G4_BB_SB* succ);
1789         void SWSBGlobalScalarCFGReachAnalysis();
1790         void SWSBGlobalSIMDCFGReachAnalysis();
1791 
1792         void setTopTokenIndex();
1793 
1794         //Optimizations
1795         void tokenDepReduction(SBNode* node1, SBNode *node2);
1796         bool cycleExpired(const SBNode * node, int currentID) const;
1797 
1798         void shareToken(const SBNode *node, const SBNode *succ, unsigned short token);
1799 
1800         void SWSBGlobalTokenAnalysis();
1801         bool globalTokenReachAnalysis(G4_BB *bb);
1802 
1803 
1804         //Dump
1805         void dumpDepInfo() const;
1806         void dumpLiveIntervals() const;
1807         void dumpTokeAssignResult() const;
1808         void dumpSync(const SBNode * tokenNode, const SBNode * syncNode, unsigned short token, SWSBTokenType type) const;
1809 
1810         // Fast-composite support.
1811         void genSWSBPatchInfo();
1812 
1813         void getDominators(ImmDominator* dom);
1814 
1815 
1816     public:
SWSB(G4_Kernel & k,vISA::Mem_Manager & m)1817         SWSB(G4_Kernel &k, vISA::Mem_Manager& m)
1818             : kernel(k), fg(k.fg), mem(m),
1819             totalTokenNum(k.fg.builder->kernel.getNumSWSBTokens()),
1820             tokenAfterWriteMathCycle(k.fg.builder->isXeLP() ? 20u : 17u),
1821             tokenAfterWriteSendSlmCycle(k.fg.builder->isXeLP() ? 33u : 25u), //unlocaled 25
1822             tokenAfterWriteSendMemoryCycle(k.fg.builder->getOptions()->getOption(vISA_USEL3HIT)
1823                 ? (k.fg.builder->isXeLP() ? 106u : 150u) // TOKEN_AFTER_WRITE_SEND_L3_MEMORY_CYCLE
1824                 : (k.fg.builder->isXeLP() ? 65u : 50u)), // TOKEN_AFTER_WRITE_SEND_L1_MEMORY_CYCLE
1825             tokenAfterWriteSendSamplerCycle(k.fg.builder->getOptions()->getOption(vISA_USEL3HIT)
1826                 ? (k.fg.builder->isXeLP() ? 175u : 210u) // TOKEN_AFTER_WRITE_SEND_L3_SAMPLER_CYCLE
1827                 : 60u)                                   // TOKEN_AFTER_WRITE_SEND_L1_SAMPLER_CYCLE
1828         {
1829             indexes.instIndex = 0;
1830             indexes.ALUIndex = 0;
1831             indexes.integerIndex = 0;
1832             indexes.floatIndex = 0;
1833             indexes.longIndex = 0;
1834             indexes.DPASIndex = 0;
1835             indexes.mathIndex = 0;
1836             LatencyTable LT(k.fg.builder);
1837             tokenAfterDPASCycle = LT.getDPAS8x8Latency();
1838         }
~SWSB()1839         ~SWSB()
1840         {
1841             for (SBNode *node : SBNodes)
1842             {
1843                 node->~SBNode();
1844             }
1845 
1846             for (G4_BB_SB *bb : BBVector)
1847             {
1848                 bb->~G4_BB_SB();
1849             }
1850 
1851             for (int i = 0; i != (int)reachTokenArray.size(); ++i)
1852             {
1853                  reachTokenArray[i]->~SBNODE_VECT();
1854                  reachUseArray[i]->~SBNODE_VECT();
1855             }
1856         }
1857         void SWSBGenerator();
1858         unsigned calcDepDelayForNode(const SBNode *node) const;
1859     };
1860 }
1861 #endif // _SWSB_H_
1862