1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef FLOWGRAPH_H
10 #define FLOWGRAPH_H
11 
12 #include "VISADefines.h"
13 #include "G4_BB.hpp"
14 #include "G4_IR.hpp"
15 #include "RelocationInfo.h"
16 #include "LoopAnalysis.h"
17 
18 #include <list>
19 #include <map>
20 #include <ostream>
21 #include <set>
22 #include <string>
23 #include <unordered_set>
24 #include <unordered_map>
25 #include <vector>
26 
27 namespace vISA
28 {
29 class FlowGraph;
30 class G4_BB;
31 class G4_Kernel;
32 class IR_Builder;
33 class PhyRegSummary;
34 class VarSplitPass;
35 
36 //
37 // FuncInfo - Function CFG information
38 //    This class maintains a CFG summary of the function (its INIT block, EXIT block and
39 //    number of call sites). The functions's INIT block will contain a pointer to its
40 //    related FuncInfo object. The FuncInfo definition is used for inter-procedural liveness
41 //    analysis (IPA).
42 class FuncInfo
43 {
44 private:
45     unsigned id;        // the function id
46     G4_BB*   initBB;    // the init node
47     G4_BB*   exitBB;    // the exit node
48     unsigned callCount; // the number of call sites
49 
50     std::vector<G4_BB*>  BBList;      // the list of BBs
51     std::list<FuncInfo *>  callees; // the list of callees
52     unsigned scopeID;               // the function scope ID
53 
54     bool visited;
55     unsigned preID;
56     unsigned postID;
57 
58 public:
59 
FuncInfo(unsigned p_id,G4_BB * p_initBB,G4_BB * p_exitBB)60     FuncInfo(unsigned p_id, G4_BB* p_initBB, G4_BB* p_exitBB)
61         : id(p_id), initBB(p_initBB), exitBB(p_exitBB), callCount(1)
62         , scopeID(0), visited(false), preID(0), postID(0)
63     {
64     }
65 
~FuncInfo()66     ~FuncInfo()
67     {
68         BBList.clear();
69         callees.clear();
70     }
71 
new(size_t sz,Mem_Manager & m)72     void *operator new(size_t sz, Mem_Manager& m)    {return m.alloc(sz);}
73 
doIPA()74     bool doIPA() const {return callCount > 1;}
75 
getId()76     unsigned getId() const {return id;}
setId(unsigned val)77     void setId(unsigned val) {id = val;}
78 
getInitBB()79     G4_BB* getInitBB() const {return initBB;}
getExitBB()80     G4_BB* getExitBB() const {return exitBB;}
81 
incrementCallCount()82     void incrementCallCount() {++callCount;}
83 
updateInitBB(G4_BB * p_initBB)84     void updateInitBB(G4_BB* p_initBB) {initBB = p_initBB;}
updateExitBB(G4_BB * p_exitBB)85     void updateExitBB(G4_BB* p_exitBB) {exitBB = p_exitBB;}
86 
addCallee(FuncInfo * fn)87     void                    addCallee(FuncInfo *fn) {callees.push_back(fn);}
getCallees()88     std::list<FuncInfo *>&  getCallees() {return callees;}
89 
addBB(G4_BB * bb)90     void                  addBB(G4_BB* bb) {BBList.push_back(bb);}
getBBList()91     std::vector<G4_BB*>&  getBBList() {return BBList;}
92 
getScopeID()93     unsigned getScopeID() const {return scopeID;}
setScopeID(unsigned id)94     void     setScopeID(unsigned id) {scopeID = id;}
95 
getVisited()96     bool     getVisited() const {return visited;}
setVisited()97     void     setVisited() {visited = true;}
98 
getPreID()99     unsigned getPreID() const {return preID;}
setPreID(unsigned id)100     void     setPreID(unsigned id) {preID = id;}
101 
getPostID()102     unsigned getPostID() const {return postID;}
setPostID(unsigned id)103     void     setPostID(unsigned id) {postID = id;}
104 
105     void dump() const;
106 }; // FuncInfo
107 
108 
109 
110 ///
111 /// A hashtable of <declare, node> where every node is a vector of
112 /// {LB, RB} (left-bounds and right-bounds)
113 /// A source operand (either SrcRegRegion or Predicate) is considered to be global
114 /// if it is not fully defined in one BB
115 ///
116 class GlobalOpndHashTable
117 {
118     Mem_Manager& mem;
119     std_arena_based_allocator<uint32_t> private_arena_allocator;
120 
packBound(uint16_t lb,uint16_t rb)121     static uint32_t packBound(uint16_t lb, uint16_t rb)
122     {
123         return (rb << 16) + lb;
124     }
125 
getLB(uint32_t value)126     static uint16_t getLB(uint32_t value)
127     {
128         return (uint16_t) (value & 0xFFFF);
129     }
getRB(uint32_t value)130     static uint16_t getRB(uint32_t value)
131     {
132         return (uint16_t) (value >> 16);
133     }
134 
135     struct HashNode
136     {
137         // each elements is {LB, RB} pair where [0:15] is LB and [16:31] is RB
138         std::vector<uint32_t, std_arena_based_allocator<uint32_t>> bounds;
139 
HashNodeHashNode140         HashNode(uint16_t lb, uint16_t rb, std_arena_based_allocator<uint32_t>& m)
141             : bounds(m)
142         {
143             bounds.push_back(packBound(lb, rb));
144         }
145 
newHashNode146         void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
147 
148         void insert(uint16_t newLB, uint16_t newRB);
149         bool isInNode(uint16_t lb, uint16_t rb) const;
150     };
151 
152     // "global" refers to declares with elements that are used without a preceding define in the same BB
153     std::map<G4_Declare*, HashNode*> globalVars;
154     // for debugging it's often useful to dump out the global operands, not just declares.
155     // Note that this may not be an exhaustive list, for example it does not cover dst global operands;
156     // for accuracy one should use isOpndGlobal()
157     std::vector<G4_Operand*> globalOpnds;
158 
159 public:
GlobalOpndHashTable(Mem_Manager & m)160     GlobalOpndHashTable(Mem_Manager& m) : mem(m) { }
161 
162     void addGlobalOpnd(G4_Operand * opnd);
163     // returns true if def may possibly define a global variable
164     bool isOpndGlobal(G4_Operand * def) const;
165     void clearHashTable();
166 
167     void dump(std::ostream &os = std::cerr) const;
168 }; // GlobalOpndHashTable
169 
170 //
171 // A table mapping the subroutine (INIT) block id's to their FuncInfo nodes.
172 //
173 typedef std::unordered_map<int, FuncInfo*> FuncInfoHashTable;
174 typedef std::unordered_map<G4_Label*, G4_BB*> Label_BB_Map;
175 
176 class FlowGraph
177 {
178     // This list maintains the ordering of the basic blocks (i.e., asm and binary emission will output
179     // the blocks in list oder.
180     // Important: Due to the nature of SIMD CF, it is unsafe to change the order of basic blocks
181     // Once the list is populated in constructFlowGraph(), the only changes allowed are
182     // 1. insertion of new exit BBs due to handleExit/handleReturn/handleFRet.  The exit BB
183     //    must be the last BB for the kernel/subroutine/function it belongs to
184     // 2. deletion of unreachable blocks
185     // 3. merging of blocks that only contain one label with its (single) successor
186     // If you need to change the block ordering for any reason, create another data structure instead of
187     // modifying this one
188     BB_LIST BBs;
189 
190     unsigned traversalNum;                      // used for flow graph traversals
191     unsigned numBBId;                            // number of basic blocks
192     bool     reducible;                            // reducibility of the graph
193     bool     doIPA;                             // requires inter-procedural liveness analysis
194     bool     hasStackCalls;                     // indicates that the flowgraph contains STACK_CALL calls
195     bool     isStackCallFunc;                    // indicates the function itself is a STACK_CALL function
196     unsigned autoLabelId;
197     G4_Kernel* pKernel;                         // back pointer to the kernel object
198 
199     // map each BB to its local RA GRF usage summary, populated in local RA
200     std::map<G4_BB*, PhyRegSummary*> bbLocalRAMap;
201     // vector of summaries created for each BB, needed for deallocation later
202     std::vector<PhyRegSummary*> localRASummaries;
203 
204     // list of all BBs ever created
205     // This list only grows and is freed when the FlowGraph is destroyed
206     std::vector<G4_BB*> BBAllocList;
207 
208     // stores all INST that may be target of indirect jump. Currently these inst must be jmpi themselves
209     std::unordered_set<G4_INST*> indirectJmpTarget;
210 
211     // stores all endift inst that have labels associated with it
212     std::unordered_map<G4_INST*, G4_Label*> endifWithLabels;
213 
214     // label to subroutine BB's map. This is used to add edges between subroutine caller/callee
215     // ToDo: We should use FuncInfo instead, but at the time it was needed FuncInfo was not constructed yet..
216     std::unordered_map<G4_Label*, std::vector<G4_BB*>> subroutines;
217 
218     vISA::Dominator dom;
219     vISA::ImmDominator immDom;
220     vISA::PostDom pDom;
221     vISA::LoopDetection loops;
222 
223 public:
224     typedef std::pair<G4_BB*, G4_BB*> Edge;
225     typedef std::set<G4_BB*> Blocks;
226     typedef std::map<Edge, Blocks> Loop;
227 
228     Mem_Manager& mem;                            // mem mananger for creating BBs & starting IP table
229     INST_LIST_NODE_ALLOCATOR& instListAlloc;     // a reference to dedicated mem allocator for holding instruction list nodes
230 
231     std::list<Edge> backEdges;                  // list of all backedges (tail->head)
232     Loop naturalLoops;
233 
234     // function info nodes. entry function is not included.
235     std::vector<FuncInfo*> funcInfoTable;
236 
237     std::vector<FuncInfo*> sortedFuncTable;     // subroutines in reverse topographical order (leaf at top)
238                                                 // kernelInfo is the last element with invalid func id
239 
240     FuncInfo* kernelInfo;                       // the call info for the kernel function
241 
242     IR_Builder *builder;                        // needed to create new instructions (mainly labels)
243     GlobalOpndHashTable globalOpndHT;
244 
245     G4_Declare *            framePtrDcl;
246     G4_Declare *            stackPtrDcl;
247     G4_Declare *            scratchRegDcl;
248     G4_Declare *            pseudoVCEDcl;
249 
250     // pseudo declares used by RA to model the save/restore variables at each call site
251     struct PseudoDcls
252     {
253         G4_Declare* VCA;
254         G4_Declare* A0;
255         G4_Declare* Flag;
256     };
257 
258     std::unordered_map<G4_InstCF*, struct PseudoDcls> fcallToPseudoDclMap;
259 
260     // offset in unit of OW
261     unsigned                    callerSaveAreaOffset = 0;
262     unsigned                    calleeSaveAreaOffset = 0;
263     unsigned                    frameSizeInOWord = 0;
264 
265     // Bank conflict statistics.
266     struct BankConflictStatistics
267     {
268         unsigned NumOfGoodInsts = 0;
269         unsigned NumOfBadInsts = 0;
270         unsigned NumOfOKInsts = 0;
271 
addGoodBankConflictStatistics272         void addGood() { ++NumOfGoodInsts; }
addBadBankConflictStatistics273         void addBad() { ++NumOfBadInsts; }
addOKBankConflictStatistics274         void addOK() { ++NumOfOKInsts; }
clearBankConflictStatistics275         void clear()
276         {
277             NumOfGoodInsts = 0;
278             NumOfBadInsts = 0;
279             NumOfOKInsts = 0;
280         }
281     } BCStats;
282 
283     struct XeBankConflictStatistics
284     {
285         unsigned simd8 = 0;    //The number of simd8 instructions, one simd16 is treated as two simd8 if it's not HF
286         unsigned BCNum = 0;  //The number of band conflict.
287         int sameBankConflicts = 0;
288         int simd16ReadSuppression = 0;
289         int twoSrcBC = 0;
290 
clearXeBankConflictStatistics291         void clear()
292         {
293             simd8 = 0;
294             BCNum = 0;
295             sameBankConflicts = 0;
296             simd16ReadSuppression = 0;
297             twoSrcBC = 0;
298         }
addBCXeBankConflictStatistics299         void addBC(unsigned num) { BCNum += num; }
addSameBankBCXeBankConflictStatistics300         void addSameBankBC(unsigned num) { sameBankConflicts += num; }
addSimd16RSBCXeBankConflictStatistics301         void addSimd16RSBC(unsigned num) { simd16ReadSuppression += num; }
add2SrcBCXeBankConflictStatistics302         void add2SrcBC(unsigned num) { twoSrcBC += num; }
addSIMD8XeBankConflictStatistics303         void addSIMD8() { ++simd8; }
304 
305     } XeBCStats;
306     unsigned numRMWs = 0;    // counting the number of read-modify-write
307 public:
308     // forwarding functions to the BBs list
begin()309     BB_LIST_ITER begin() { return BBs.begin(); }
end()310     BB_LIST_ITER end() { return BBs.end(); }
rbegin()311     BB_LIST::reverse_iterator rbegin() { return BBs.rbegin(); }
rend()312     BB_LIST::reverse_iterator rend() { return BBs.rend(); }
cbegin()313     BB_LIST::const_iterator cbegin() const { return BBs.cbegin(); }
cend()314     BB_LIST::const_iterator cend() const { return BBs.cend(); }
crbegin()315     BB_LIST::const_reverse_iterator crbegin() const { return BBs.crbegin(); }
crend()316     BB_LIST::const_reverse_iterator crend() const { return BBs.crend(); }
317 
size()318     size_t size() { return BBs.size(); }
empty()319     bool empty() const { return BBs.empty(); }
back()320     G4_BB* back() const {return BBs.back(); }
321 
322     static void setPhysicalLink(G4_BB* pred, G4_BB* succ);
323 
324     BB_LIST_ITER insert(BB_LIST_ITER iter, G4_BB* bb);
325 
326 
push_back(G4_BB * bb)327     void push_back(G4_BB* bb) {insert(BBs.end(), bb);}
328 
329     void erase(BB_LIST_ITER iter);
330 
getBBList()331     BB_LIST& getBBList() { return BBs; }
332 
333     // add BB to be the first BB
334     void addPrologBB(G4_BB* BB);
335 
336 
337     // append another CFG's BBs to this CFG.
338     // note that we don't add additional CFG edges as its purpose is just to
339     // stitch the two binaries togather
340     void append(const FlowGraph& otherFG);
341 
342     G4_BB* getLabelBB(Label_BB_Map& map, G4_Label* label);
343     G4_BB* beginBB(Label_BB_Map& map, G4_INST* first);
344 
performIPA()345     bool performIPA() const {return doIPA;}
346 
getHasStackCalls()347     bool getHasStackCalls() const { return hasStackCalls; }
setHasStackCalls()348     void setHasStackCalls() { hasStackCalls = true; }
resetHasStackCalls()349     void resetHasStackCalls() { hasStackCalls = false;}
350 
getIsStackCallFunc()351     bool getIsStackCallFunc() const {return isStackCallFunc;}
setIsStackCallFunc()352     void setIsStackCallFunc() {isStackCallFunc = true;}
353 
getKernel()354     G4_Kernel* getKernel() {return pKernel;}
355 
356     void mergeFReturns();
357 
getFramePtrDcl()358     G4_Declare*& getFramePtrDcl()                   {return framePtrDcl;}
getStackPtrDcl()359     G4_Declare*& getStackPtrDcl()                   {return stackPtrDcl;}
getScratchRegDcl()360     G4_Declare*& getScratchRegDcl()                 {return scratchRegDcl;}
361 
isPseudoVCEDcl(const G4_Declare * dcl)362     bool isPseudoVCEDcl(const G4_Declare* dcl) const { return dcl == pseudoVCEDcl; }
isPseudoVCADcl(const G4_Declare * dcl)363     bool isPseudoVCADcl(const G4_Declare* dcl) const
364     {
365         for (auto iter : fcallToPseudoDclMap)
366         {
367             if (iter.second.VCA == dcl)
368             {
369                 return true;
370             }
371         }
372         return false;
373     }
isPseudoA0Dcl(const G4_Declare * dcl)374     bool isPseudoA0Dcl(const G4_Declare* dcl) const
375     {
376         for (auto iter : fcallToPseudoDclMap)
377         {
378             if (iter.second.A0 == dcl)
379             {
380                 return true;
381             }
382         }
383         return false;
384     }
isPseudoFlagDcl(const G4_Declare * dcl)385     bool isPseudoFlagDcl(const G4_Declare* dcl) const
386     {
387         for (auto iter : fcallToPseudoDclMap)
388         {
389             if (iter.second.Flag == dcl)
390             {
391                 return true;
392             }
393         }
394         return false;
395     }
isPseudoDcl(const G4_Declare * dcl)396     bool isPseudoDcl(const G4_Declare* dcl) const
397     {
398         if (!getHasStackCalls() && !getIsStackCallFunc())
399         {
400             return false;
401         }
402         if (isPseudoVCEDcl(dcl))
403         {
404             return true;
405         }
406         for (auto iter : fcallToPseudoDclMap)
407         {
408             if (iter.second.A0 == dcl || iter.second.Flag == dcl || iter.second.VCA == dcl)
409             {
410                 return true;
411             }
412         }
413         return false;
414     }
415 
416     //
417     // Merge multiple returns into one, prepare for spill code insertion
418     //
419     void mergeReturn(FuncInfoHashTable& funcInfoTable);
420     G4_BB* mergeSubRoutineReturn(G4_Label* subroutine);
421     void normalizeSubRoutineBB(FuncInfoHashTable& funcInfoTable);
422     void processGoto(bool HasSIMDCF);
423     void processSCF(FuncInfoHashTable& FuncInfoMap);
424     void insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip);
425 
426     // functions for structure analysis
getKernel()427     G4_Kernel *getKernel() const { return pKernel; }
428     G4_Label* insertEndif(G4_BB* bb, G4_ExecSize execSize, bool createLabel);
429     void setJIPForEndif(G4_INST* endif, G4_INST* target, G4_BB* targetBB);
430     void convertGotoToJmpi(G4_INST *gotoInst);
431     G4_BB* getSinglePredecessor(G4_BB* BB, G4_BB* ExcludedPred) const;
432     bool convertJmpiToGoto();
433 
getNumFuncs()434     unsigned getNumFuncs() const {return unsigned(funcInfoTable.size());}
435 
436     void handleReturn(Label_BB_Map& map, FuncInfoHashTable& funcInfoTable);
437     void linkReturnAddr(G4_BB* bb, G4_BB* returnAddr);
438 
439     void handleExit(G4_BB* lastKernelBB);
440     void handleWait();
441 
442     void preprocess(INST_LIST& instlist);
443 
444     FlowGraph() = delete;
445     FlowGraph(const FlowGraph&) = delete;
446     FlowGraph& operator=(const FlowGraph&) = delete;
447 
FlowGraph(INST_LIST_NODE_ALLOCATOR & alloc,G4_Kernel * kernel,Mem_Manager & m)448     FlowGraph(INST_LIST_NODE_ALLOCATOR& alloc, G4_Kernel* kernel, Mem_Manager& m) :
449       traversalNum(0), numBBId(0), reducible(true),
450       doIPA(false), hasStackCalls(false), isStackCallFunc(false), autoLabelId(0),
451       pKernel(kernel), mem(m), instListAlloc(alloc),
452       kernelInfo(NULL), builder(NULL), globalOpndHT(m), framePtrDcl(NULL),
453       stackPtrDcl(NULL), scratchRegDcl(NULL), pseudoVCEDcl(NULL),
454       dom(*kernel), immDom(*kernel), pDom(*kernel), loops(*kernel) {}
455 
456     ~FlowGraph();
457 
setBuilder(IR_Builder * pBuilder)458     void setBuilder(IR_Builder *pBuilder) {builder = pBuilder;}
459 
460     void addPredSuccEdges(G4_BB* pred, G4_BB* succ, bool tofront=true)
461     {
462         markStale();
463 
464         if (tofront)
465             pred->Succs.push_front(succ);
466         else
467             pred->Succs.push_back(succ);
468 
469         succ->Preds.push_front(pred);
470     }
471 
472     void addUniquePredSuccEdges(G4_BB* pred, G4_BB* succ, bool tofront=true)
473     {
474         // like above, but check for duplicate edges
475         auto iter = std::find(pred->Succs.begin(), pred->Succs.end(), succ);
476         if (iter == pred->Succs.end())
477         {
478             addPredSuccEdges(pred, succ, tofront);
479         }
480     }
481 
482     void removePredSuccEdges(G4_BB* pred, G4_BB* succ);
483 
484     G4_INST* createNewLabelInst(G4_Label* label, int lineNo = 0, int CISAOff = -1);
485 
486     G4_BB* createNewBB(bool insertInFG = true);
487     G4_BB* createNewBBWithLabel(const char* LabelPrefix, int Lineno = 0, int CISAoff = -1);
488     int64_t insertDummyUUIDMov();
489     //
490     // Increase by one so that all BBs' traversal are less than traversalNum
491     //
prepareTraversal()492     void prepareTraversal()    {traversalNum++;}
getTraversalNum()493     unsigned getTraversalNum() {return traversalNum;}
494 
495     //
496     // Check if the graph is reducible
497     //
isReducible()498     bool isReducible() { return reducible; }
499 
500     //
501     // Remove any placeholder empty blocks that could have been inserted to aid analysis
502     //
503     void removeRedundantLabels();
504     //
505     // remove any mov with the same src and dst opnds
506     //
507     void removeRedundMov();
508     //
509     // Remove any placeholder empty blocks that could have been inserted to aid analysis.
510     //
511     void removeEmptyBlocks();
512     //
513     // Add a dummy BB for multiple-exit flow graph
514     //
515     void linkDummyBB();
516     //
517     // Re-assign block ID so that we can use id to determine the ordering of two blocks in
518     // the code layout
519     //
520     void reassignBlockIDs();
521 
522     //
523     // Remove blocks that are unreachable via control flow of program
524     //
525     void removeUnreachableBlocks(FuncInfoHashTable& funcInfoHT);
526 
527     void constructFlowGraph(INST_LIST& instlist);
528     bool matchBranch(int &sn, INST_LIST& instlist, INST_LIST_ITER &it);
529 
530     void localDataFlowAnalysis();
531     void resetLocalDataFlowData();
532 
getNumBB()533     unsigned getNumBB() const {return numBBId;}
getEntryBB()534     G4_BB* getEntryBB()       {return BBs.front();}
535 
536     void addFrameSetupDeclares(IR_Builder& builder, PhyRegPool& regPool);
537     void addSaveRestorePseudoDeclares(IR_Builder& builder);
538     void markDivergentBBs();
539 
540     // Used for CISA 3.0
incrementNumBBs()541     void incrementNumBBs() { numBBId++ ; }
542     G4_BB* getUniqueReturnBlock();
543 
544     void normalizeFlowGraph();
545 
546     // This is mainly used to link subroutine call-return BBs
547     // ToDo: maintain this during BB add/delete instead of having to call it explicitly
548     void setPhysicalPredSucc();
549 
550     void markRPOTraversal();
551 
552     void findBackEdges();
553     void findNaturalLoops();
554 
555     void traverseFunc(FuncInfo* func, unsigned *ptr);
556     void topologicalSortCallGraph();
557     void findDominators(std::map<FuncInfo*, std::set<FuncInfo*>>& domMap);
558     unsigned resolveVarScope(G4_Declare* dcl, FuncInfo* func);
559     void markVarScope(std::vector<G4_BB*>& BBList, FuncInfo* func);
560     void markScope();
561 
562     void addSIMDEdges();
563 
addBBLRASummary(G4_BB * bb,PhyRegSummary * summary)564     void addBBLRASummary(G4_BB* bb, PhyRegSummary* summary)
565     {
566         bbLocalRAMap.insert(std::make_pair(bb, summary));
567         localRASummaries.push_back(summary);
568     }
569 
clearBBLRASummaries()570     void clearBBLRASummaries()
571     {
572         bbLocalRAMap.clear();
573     }
574 
getBBLRASummary(G4_BB * bb)575     PhyRegSummary* getBBLRASummary(G4_BB* bb) const
576     {
577         auto&& iter = bbLocalRAMap.find(bb);
578         return iter != bbLocalRAMap.end() ? iter->second : nullptr;
579     }
580 
getNumCalls()581     uint32_t getNumCalls() const
582     {
583         uint32_t numCalls = 0;
584         for (auto bb : BBs)
585         {
586             if (bb->isEndWithCall())
587             {
588                 ++numCalls;
589             }
590         }
591         return numCalls;
592     }
593 
isIndirectJmpTarget(G4_INST * inst)594     bool isIndirectJmpTarget(G4_INST* inst) const
595     {
596         return indirectJmpTarget.count(inst) > 0;
597     }
598 
getLabelForEndif(G4_INST * inst)599     G4_Label* getLabelForEndif(G4_INST* inst) const
600     {
601         auto iter = endifWithLabels.find(inst);
602         if (iter != endifWithLabels.end())
603         {
604             return iter->second;
605         }
606         else
607         {
608             return nullptr;
609         }
610     }
611 
endWithGotoInLastBB()612     bool endWithGotoInLastBB() const
613     {
614         if (BBs.empty())
615         {
616             return false;
617         }
618         G4_BB* lastBB = back();
619         return lastBB->isEndWithGoto();
620     }
621 
622     /// Return true if PredBB->SuccBB is a backward branch goto/jmpi/while.
isBackwardBranch(G4_BB * PredBB,G4_BB * SuccBB)623     bool isBackwardBranch(G4_BB* PredBB, G4_BB* SuccBB) const
624     {
625         if (PredBB->size() == 0) return false;
626         G4_INST* bInst = PredBB->back();
627         G4_BB* targetBB = PredBB->Succs.size() > 0 ? PredBB->Succs.back() : nullptr;
628         bool isBr = (bInst->opcode() == G4_goto || bInst->opcode() == G4_jmpi);
629         // Note that isBackward() should return true for while as well.
630         return targetBB == SuccBB &&
631             ((isBr && bInst->asCFInst()->isBackward()) || bInst->opcode() == G4_while);
632     }
633 
634     void setABIForStackCallFunctionCalls();
635 
636     // This is for TGL WA
637     void findNestedDivergentBBs(std::unordered_map<G4_BB*, int>& nestedDivergentBBs);
638 
639     void print(std::ostream& OS) const;
640     void dump() const;  // used in debugger
641 
getDominator()642     Dominator& getDominator() { return dom; }
getImmDominator()643     ImmDominator& getImmDominator() { return immDom; }
getPostDominator()644     PostDom& getPostDominator() { return pDom; }
getLoops()645     LoopDetection& getLoops() { return loops; }
646     void markStale();
647 
648 private:
649     // Use normalized region descriptors for each source operand if possible.
650     void normalizeRegionDescriptors();
651 
652     // Find the BB that has the given label from the range [StartIter, EndIter).
653     static G4_BB* findLabelBB(
654         BB_LIST_ITER StartIter,
655         BB_LIST_ITER EndIter,
656         const char* Label);
657 
658     void decoupleReturnBlock(G4_BB*);
659     void decoupleInitBlock(G4_BB*, FuncInfoHashTable& funcInfoTable);
660     void DFSTraverse(G4_BB* bb, unsigned& preId, unsigned& postId, FuncInfo* fn);
661 
662 }; // FlowGraph
663 
664 } // vISA::
665 #endif // FLOWGRAPH_H
666