1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "FlowGraph.h"
10 #include "BuildIR.h"
11 
12 using namespace vISA;
13 // This is the debugging code.
14 
15 // Type for ANode (Abstract Node)
16 typedef enum
17 {
18     AN_UNUSED,
19 
20     // corresponding to each BB, not necessarily a hammock
21     AN_BB,
22 
23     // The following are hammock nodes
24     AN_HAMMOCK,   // temporarily used
25     AN_IF_THEN_ENDIF,
26     AN_IF_THEN_ELSE_ENDIF,
27     AN_DO_WHILE,
28     AN_COMPOSITE,
29     AN_SEQUENCE
30 } ANodeType;
31 
32 // Kind of ANode: indicate the final conversion choice of this ANode.
33 typedef enum {
34     ANKIND_GOTOJOIN,     // goto/join
35     ANKIND_JMPI,         // scalar jump (uniform branch)
36     ANKIND_SCF           // structured CF
37 } ANodeKind;
38 
39 namespace {
40 
41      enum ANodeBBType
42     {
43         ANODEBB_NORMAL,
44         ANODEBB_CALL,
45         ANODEBB_RETURN
46     };
47 
48     class ControlGraph
49     {
50     public:
51         // forward goto :  goto in beginBB, endBB is target of goto
52         // backward goto:  goto in endBB, beginBB is target of goto
53         G4_BB *beginBB;
54         G4_BB *endBB;
55         G4_INST *gotoInst;
56 
57         bool isBackward;
58         bool isLoopCandidate;  // Used if isBackward is true
59 
ControlGraph(G4_BB * begin,G4_BB * end,G4_INST * inst,bool backward)60         ControlGraph(G4_BB *begin, G4_BB *end, G4_INST* inst, bool backward) :
61             beginBB(begin), endBB(end), gotoInst(inst), isBackward(backward),
62             isLoopCandidate(false)
63         {}
64     };
65 
66     // forward declaration
67     class CFGStructurizer;
68     class ANode;
69     class ANodeBB;
70 
71     typedef std::vector<G4_BB*> BBVector;
72     typedef std::vector<ANode*> ANVector;
73     typedef std::list<ANode*> ANList;
74     typedef std::vector<ControlGraph*> CGVector;
75     typedef std::list<ControlGraph*> CGList;
76     typedef std::map<G4_BB*, G4_BB*> BBToBBMap;
77     typedef std::map<G4_BB*, ANodeBB*> BBToANodeBBMap;
78     typedef std::map<ANode*, ANode*> ANodeMap;
79 
80     // ANode base, abstract class
81     class ANode
82     {
83     public:
84         uint32_t  anodeId;
85         ANodeType type;
86         ANList    preds;
87         ANList    succs;
88         ANode     *parent;
89         ANodeKind kind;
90 
91 
92         // Indicate if this node is in ACFG. ACFG changes as BB is
93         // processed, and once a group of nodes are condensed and
94         // represented by a new, single node, the new node will be
95         // in ACFG and all condensed nodes are no longer in ACFG.
96         bool      inACFG;
97 
98         // True if structured CF is allowed for this node (for example,
99         // do_while ANode has break inst that we cannot handle for now.
100         // Then, set this field to false so we has to use goto/join.)
101         bool      allowSCF;
102 
103         // True if it has break. Note that break isn't represented by
104         // a separate ANode type.  So, if AN_BB has this field set to
105         // true, its goto is a break. For other ANode, it means it
106         // contains some AN_BB that has break.
107         bool      hasBreak;
108 
109         bool      visited;
110 
111         virtual G4_BB* getBeginBB() const = 0;
112         virtual G4_BB* getEndBB() const = 0;
113         virtual G4_BB* getExitBB() const = 0;
114         virtual void setBeginBB(G4_BB *bb) = 0;
115         virtual void setEndBB(G4_BB *bb) = 0;
116         virtual void setExitBB(G4_BB *bb) = 0;
117 
ANode()118         explicit ANode() :
119             type(AN_UNUSED), preds(), succs(),  parent(nullptr),
120             kind(ANKIND_GOTOJOIN), inACFG(true), allowSCF(true),
121             hasBreak(false), visited(false) {}
ANode(ANodeType Ty)122         explicit ANode(ANodeType Ty) :
123             type(Ty), preds(), succs(),  parent(nullptr),
124             kind(ANKIND_GOTOJOIN), inACFG(true), allowSCF(true),
125             hasBreak(false), visited(false) {}
~ANode()126         virtual ~ANode() {}
127 
getType() const128         ANodeType getType() const { return type; }
setType(ANodeType t)129         void setType(ANodeType t) { type = t; }
getKind() const130         ANodeKind getKind() const { return kind; }
setKind(ANodeKind t)131         void setKind(ANodeKind t) { kind = t; }
132 
isInACFG() const133         bool isInACFG() const { return inACFG; }
setInACFG(bool v)134         void setInACFG(bool v) { inACFG = v; }
135 
136         // Return true if this node's type is a SCF type.
137         bool isSCF() const;
138         bool isHammock() const;
139 
140         ANode* getCurrentANode() const;
141         ANode* getChildANode(ANode *parent);
142         bool isMember(ANode *nd) const;
143         bool isExitPhySucc() const;
getHasBreak() const144         bool getHasBreak() const { return hasBreak; }
setHasBreak(bool v)145         void setHasBreak(bool v) { hasBreak = v; }
getAllowSCF() const146         bool getAllowSCF() const { return allowSCF; }
setAllowSCF(bool v)147         void setAllowSCF(bool v) { allowSCF = v; }
isVisited() const148         bool isVisited() const { return visited; }
setVisited(bool v)149         void setVisited(bool v) {visited = v; }
150 
151     private:
152         // copy-ctor and assignment operator are not allowed
153         ANode(const ANode&);
154         ANode& operator= (const ANode&);
155     };
156 
157     // ANodeBB : ANode for a single BB
158     class ANodeBB : public ANode
159     {
160     public:
161         G4_BB *bb;
162         ANodeBBType type;
163         // Used during convertPST() to tell if a BB's label has been used as jmpi's target. As we
164         // convert PST from the outer to inner. If this is true, endif/join etc (to this label) must
165         // need a new BB (new label).
166         // For example:
167         //       goto label0                    jmpi label0
168         //       ...                            ...
169         //       (p) goto label0   --->         if (goto)
170         //       ...                            ...
171         //     label0:                        label_new:
172         //                                      endif (join)
173         //                                    label0:
174         //
175         bool isJmpiTarget;
176 
ANodeBB()177         explicit ANodeBB() : ANode(AN_BB), bb(nullptr), type(ANODEBB_NORMAL), isJmpiTarget(false) {}
ANodeBB(G4_BB * b)178         explicit ANodeBB(G4_BB *b) :
179             ANode(AN_BB), bb(b), type(ANODEBB_NORMAL) {}
~ANodeBB()180         virtual ~ANodeBB() {}
181 
setANodeBBType(ANodeBBType v)182         void setANodeBBType(ANodeBBType v) { type = v; }
183 
184         // ExitBB is the target BB for BB with goto inst or
185         // next BB otherwise. If no successor, return nullptr.
getExitBB() const186         virtual G4_BB* getExitBB() const
187         {
188             if (type == ANODEBB_CALL)
189             {
190                 return bb->getPhysicalSucc();
191             }
192             return (type == ANODEBB_RETURN || bb->Succs.size() == 0)
193                 ? nullptr : bb->Succs.back();
194         }
getBeginBB() const195         virtual G4_BB* getBeginBB() const { return bb; }
getEndBB() const196         virtual G4_BB* getEndBB()   const { return bb; }
197 
setExitBB(G4_BB * b)198         virtual void setExitBB(G4_BB *b)  { MUST_BE_TRUE(false, "setExitBB() not allowed for ANodeBB"); }
setBeginBB(G4_BB * b)199         virtual void setBeginBB(G4_BB *b) { MUST_BE_TRUE(false, "setBeginBB() not allowed for ANodeBB"); }
setEndBB(G4_BB * b)200         virtual void setEndBB(G4_BB *b)   { MUST_BE_TRUE(false, "setEndBB() not allowed for ANodeBB"); }
201     };
202 
203     // Hammock Graph
204     class ANodeHG : public ANode
205     {
206     public:
207         ANList children; // in order, 1st one is entryNode.
208 
209         // This node has BBs from beginBB to endBB, inclusive; and
210         // every outgoing edges will go to exitBB. exitBB is not
211         // part of this node.
212         G4_BB* beginBB;
213         G4_BB* endBB;
214         G4_BB* exitBB;   // not part of this ANode
215         ANode* exitNode; // not used yet
216 
217         /* Temporary variables used during ANodeHG constructions */
218         bool isLoopCandidate;
219 
220         // Index to ANStack (vector) when this node is on ANStack.
221         // -1 means it is not on ANStack.
222         int32_t ANStackIx;
223 
ANodeHG(ANodeType t)224         explicit ANodeHG(ANodeType t) :
225             ANode(t), beginBB(nullptr), endBB(nullptr), exitBB(nullptr),
226             exitNode(nullptr), isLoopCandidate(false), ANStackIx(-1)
227         { }
228         explicit ANodeHG(ControlGraph *cg, ANodeBB *nodebb);
~ANodeHG()229         virtual ~ANodeHG() {}
230 
getEntryNode() const231         ANode* getEntryNode() const { return children.size() > 0 ? children.front() : nullptr; }
getEndNode() const232         ANode* getEndNode() const { return children.size() > 0 ? children.back() : nullptr; }
233         ANode* getExitNode(ANodeBB *exitANBB);
setExitNode(ANode * nd)234         void   setExitNode(ANode *nd) { exitNode = nd; }
235 
getBeginBB() const236         virtual G4_BB* getBeginBB() const { return beginBB; }
getEndBB() const237         virtual G4_BB* getEndBB() const { return endBB; }
getExitBB() const238         virtual G4_BB* getExitBB() const { return exitBB; }
setBeginBB(G4_BB * b)239         virtual void setBeginBB(G4_BB* b) { beginBB = b; }
setEndBB(G4_BB * b)240         virtual void setEndBB(G4_BB *b) { endBB = b; }
setExitBB(G4_BB * b)241         virtual void setExitBB(G4_BB* b) { exitBB = b; }
242     };
243 
244     // CFG Structurizer
245     //
246     // [TODO] algo description
247     //
248     // For a CFG that has gotos, convert it to structurized control flow
249     // so that we can use structured control-flow instructions, jmpi,
250     // and goto/join to replace them.
251     //
252     // This algorithm uses the existing order of BBs and will not reorder
253     // them.
254     class CFGStructurizer
255     {
256     private:
257         FlowGraph  *CFG;
258         uint32_t   numOfBBs;
259 
260         // Used to assign unique id to each ANode
261         uint32_t numOfANodes;
262 
263         // For new BB, map its ID to its insertion BB,
264         // ie, the original BB after which it is inserted.
265         BBToBBMap  newBBToInsertAfterBB;
266 
267         // Given a BB, find its ANodeBB.
268         // Access them via setANodeBB()/getANodeBB().
269         struct {
270             ANodeBB *IDToANodeBB;
271             BBToANodeBBMap newBBToANodeBB;
272         } anodeBBs;
273 
274         // caching the flags
275         bool doScalarJmp;
276         bool doStructCF;
277 
278         // Assume that the entire CFG is the single ANode: Root ANode for
279         // short (We don't actually build it, but imagine we have one). A root
280         // ANodes are the ANodes whose parent is this Root ANode and who
281         // are also hammock. In another word, any BB with goto should belong
282         // to one of topANodes(only one of them, not more than 1).
283         ANVector topANodes;
284 
285         // Used during constructing hammock ANodes.
286         ANVector  ANStack;
287 
288         // BBs is the list of BBs that shows the layout order. Originally,
289         // it is the same as the input CFG->BBs. As the algo goes, new BBs
290         // can be inserted in the list.
291         BB_LIST* BBs;               // ptr to CFG->BBs
292         G4_ExecSize kernelExecSize;     // default execSize
293 
294         void init();
295         void fini();
296         void deletePST(ANodeHG* node);
297         void preProcess();
298 
299         // return true if bb is the start of ControlGraph.
300         bool getCGBegin(G4_BB *bb, CGList &cgs);
301         void insertAtBegin(G4_BB* aBB, G4_INST *anInst);
302 
303         // Insert 'cg' in the ordered list 'cgs'
304         void cg_oinsert(CGList &cgs, ControlGraph *cg);
305         void bb_oinsert(BB_LIST &bblist, G4_BB *bb);
306         bool isGotoScalarJmp(G4_INST *gotoInst);
307         bool isJoinBB(G4_BB* bb);
308         bool canbeJoinBB(G4_BB *bb);
309         bool isBBLabelAvailable(G4_BB *bb);
310         ANode *ANListGetOther(ANList &anlist, ANode *node);
311         void reConnectAllPreds(ANode *from, ANode *to);
312         void reConnectAllSuccs(ANode *from, ANode *to);
313 
314         // Use ANStack_push/ANStack_pop to push/pop ANStack
ANStack_push(ANodeHG * AN)315         void ANStack_push(ANodeHG* AN)
316         {
317             AN->ANStackIx = (uint32_t) ANStack.size();
318             ANStack.push_back(AN);
319         }
320 
ANStack_pop()321         ANodeHG *ANStack_pop()
322         {
323             ANodeHG *AN = (ANodeHG *)ANStack.back();
324             ANStack.pop_back();
325             AN->ANStackIx = (uint32_t)(-1);
326             return AN;
327         }
328 
getNextANodeId()329         uint32_t getNextANodeId() { return numOfANodes++; }
330         void constructPST(BB_LIST_ITER IB, BB_LIST_ITER IE);
331         void reConstructDoWhileforBreak(ANodeHG *whileNode);
332         void ANStack_reconstruct(ANodeHG *& node, ANodeBB *ndbb);
333         ANodeHG *finalizeHG(ANodeHG *node);
334         void condenseNode(ANodeHG *node);
335 
336         // Return true if bb0 appears before bb1, false otherwise.
337         // (If bb0 is the same as bb1, return false.)
338         bool isBefore(G4_BB *bb0, G4_BB *bb1);
339 
340         ANodeBB *getANodeBB(G4_BB *b);
341         void setANodeBB(ANodeBB *a, G4_BB *b);
342         G4_BB *createBBWithLabel();
343         G4_BB *getInsertAfterBB(G4_BB *bb);
344         void setInsertAfterBB(G4_BB *newbb, G4_BB *insertAfter);
345         G4_BB* findInsertAfterBB(G4_BB *bb);
346         void adjustBBId(G4_BB *newbb);
347 
348         void getNewRange(G4_BB*& end, G4_BB*& exit, G4_BB* end1, G4_BB* exit1);
349         void extendNode(ANode *Node, ControlGraph *CG);
350         ANodeHG* getEnclosingANodeHG(ANode *AN);
351         ANode* mergeWithPred(ANode *AN);
352         ANodeBB* addLandingBB(
353             ANodeHG *node, BB_LIST_ITER insertAfterIter, bool updateACFG);
354         ANode *addSplitBBAtBegin(G4_BB *splitBB);
355         ANode* addSplitBBAtEnd(G4_BB *splitBB);
356         bool isANodeOnlyPred(G4_BB *abb, ANode *node);
357         void PSTAddANode(
358             ANodeHG *parent, ANode *node, ANode *newNode, bool isAfter);
359 
360         bool convertPST(ANode *node, G4_BB *nextJoinBB);
361         void setJoinJIP(G4_BB* joinBB, G4_BB* jipBB);
362         void convertChildren(ANodeHG *nodehg, G4_BB *nextJoinBB);
363         void convertIf(ANodeHG *node, G4_BB *nextJoinBB);
364         void convertDoWhile(ANodeHG *node, G4_BB *nextJoinBB);
365         void convertGoto(ANodeBB *node, G4_BB *nextJoinBB);
366         void generateGotoJoin(G4_BB *gotoBB, G4_BB *jibBB, G4_BB *targetBB);
367 
368         // helper
369         // return goto inst if any; null otherwise
370         G4_INST *getGotoInst(G4_BB *bb);
371         G4_INST *getJmpiInst(G4_BB *bb);
372         void updateInstDI(G4_INST* I, G4_BB* DIFromBB);
373 
374     private:
375         // Don't implement
376         CFGStructurizer(const CFGStructurizer&);
377         CFGStructurizer& operator=(const CFGStructurizer&);
378 
379     public:
380 
381         static void ANListReplace(ANList &anlist, ANode *from, ANode *to);
382         static void BBListReplace(BB_LIST &bblist, G4_BB *from, G4_BB *to);
383         static BB_LIST_ITER findBB(BB_LIST *bblist, G4_BB *bb);
384         static ANList::iterator findANode(ANList &anlist, ANode *nd);
385         static void ANListErase(ANList &anlist, ANode *toBeErased);
386         static void BBListErase(BB_LIST &bblist, G4_BB *toBeErased);
387 
CFGStructurizer(FlowGraph * cfg)388         CFGStructurizer(FlowGraph *cfg) : CFG(cfg)
389         {
390             init();
391         }
392 
~CFGStructurizer()393         ~CFGStructurizer()
394         {
395             fini();
396         }
397 
398         void run();
399 
400 #if     0
401         // Forward definition
402         class DebugSelector;
403         DebugSelector *dbgSel;
404 #endif
405 
406 #ifdef _DEBUG
407         void doIndent(uint32_t numOfSpaces); // Generate indent
408         void dump();                         // Dump the program(kernel/shader)
409         void dumpcfg();                      // Dump the CFG only without instructions
410         void dump(G4_BB *bb);                // Dump the given bb
411         void dump(uint32_t BBId);            // Dump BB with the given BBId
412         void dump(char *labelString);        // Dump BB with the given labelString
413         void dump(ANode *node, uint32_t indentSpaces);  // Dump node recursively
414         void dumpacfg();                     // Dump ACFG.
415 #endif
416     };
417 }  // namespace
418 
419 #ifdef _DEBUG
420 
421 // 0:  default, dumps only CF related instructions (CF instr, label)
422 // 1:  All instructions
423 static int dump_level = 0;
424 static const char* currFileName = nullptr;
425 static std::ofstream dump_ofs;
426 static std::ostream* dumpOut = &std::cout;  // default
427 
428 // use this func or debugger to set dump_level
setDumpLevel(int l)429 void setDumpLevel(int l) { dump_level = l;}
430 
431 // Direct output to a given file or std::cout
resetoutput()432 void resetoutput() { dumpOut = & std::cout; }  // restore default
setoutputfile(const char * filename)433 void setoutputfile(const char* filename)
434 {
435     if (dump_ofs.is_open()) dump_ofs.close();
436 
437     currFileName = filename;
438     dump_ofs.open(filename,
439               std::ofstream::out | std::ofstream::ate | std::ofstream::trunc);
440     dump_ofs.flush();
441     dumpOut = &dump_ofs;
442 }
forceFlush()443 void forceFlush()
444 {
445     if (dump_ofs.is_open())
446     {
447         dump_ofs.close();
448         dump_ofs.open(currFileName,
449             std::ofstream::out | std::ofstream::app);
450     }
451 }
452 
453 //
454 // CFG dump routines
455 //
dumpbbbase(G4_BB * bb)456 void dumpbbbase(G4_BB *bb)
457 {
458     if (dump_level == 0)
459     {
460         for (INST_LIST_ITER it = bb->begin(), ie = bb->end();
461             it != ie; ++it)
462         {
463             G4_INST* inst = *it;
464             if (!inst->isLabel() && !inst->isFlowControl())
465             {
466                 continue;
467             }
468             bb->emitInstruction(*dumpOut, it);
469         }
470     }
471     else
472     {
473         bb->emit(*dumpOut);
474     }
475     dumpOut->flush();
476 }
477 
dumpbb(G4_BB * bb)478 void dumpbb(G4_BB *bb)
479 {
480     (*dumpOut) << "\n[BB" << bb->getId() << "] ";
481     dumpbbbase(bb);
482 }
483 
dumpbbid(BB_LIST * bblist,uint32_t BBId)484 void dumpbbid(BB_LIST *bblist, uint32_t BBId)
485 {
486     for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
487         I != E; ++I)
488     {
489         G4_BB *bb = *I;
490         if (bb->getId() == BBId)
491         {
492             dumpbb(bb);
493             break;
494         }
495     }
496 }
497 
dumpbblabel(BB_LIST * bblist,char * labelString)498 void dumpbblabel(BB_LIST *bblist, char* labelString)
499 {
500     for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
501         I != E; ++I)
502     {
503         G4_BB *bb = *I;
504         G4_Label *labelInst = bb->getLabel();
505         if (labelInst && strcmp(labelInst->getLabel(), labelString) == 0)
506         {
507             dumpbb(bb);
508             break;
509         }
510     }
511 }
512 
dumpallbbs(BB_LIST * bblist)513 void dumpallbbs(BB_LIST *bblist)
514 {
515     (*dumpOut) << "Program Dump\n";
516 
517     for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
518         I != E; ++I)
519     {
520         G4_BB *bb = *I;
521         dumpbb(bb);
522     }
523     dumpOut->flush();
524 }
525 
dumpbblist(BB_LIST * bblist)526 void dumpbblist(BB_LIST* bblist)
527 {
528     (*dumpOut) << "\nCFG dump\n\n";
529 
530     for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
531         I != E; ++I)
532     {
533         G4_BB *bb = *I;
534         G4_Label *labelInst = bb->getLabel();
535         (*dumpOut) << "  BB(" << bb->getId() << ")";
536         if (labelInst)
537         {
538             (*dumpOut) << " " << labelInst->getLabel() << ",";
539         }
540         (*dumpOut) << "  Preds:";
541         for (BB_LIST_ITER I = bb->Preds.begin(), E = bb->Preds.end();
542             I != E; ++I)
543         {
544             G4_BB *pred = *I;
545             (*dumpOut) << " " << pred->getId();
546         }
547         (*dumpOut) << "    Succs:";
548         for (BB_LIST_ITER I = bb->Succs.begin(), E = bb->Succs.end();
549             I != E; ++I)
550         {
551             G4_BB *succ = *I;
552             (*dumpOut) << " " << succ->getId();
553         }
554         (*dumpOut) << "\n";
555     }
556     dumpOut->flush();
557 }
558 
559 
560 // CFGStructurizer::dump(), etc cannot be invoked in MSVC's immediate window.
561 // Use file-scope functions with cfgs prefix as work-around.
cfgsdump(CFGStructurizer * p)562 void cfgsdump(CFGStructurizer *p)
563 {
564     p->dump();
565 }
566 
cfgsdumpcfg(CFGStructurizer * p)567 void cfgsdumpcfg(CFGStructurizer *p)
568 {
569     p->dumpcfg();
570 }
571 
cfgsdump(CFGStructurizer * p,uint32_t BBId)572 void cfgsdump(CFGStructurizer *p, uint32_t BBId)
573 {
574     p->dump(BBId);
575 }
576 
cfgsdump(CFGStructurizer * p,G4_BB * bb)577 void cfgsdump(CFGStructurizer *p, G4_BB *bb)
578 {
579     p->dump(bb);
580 }
581 
cfgsdump(CFGStructurizer * p,char * labelName)582 void cfgsdump(CFGStructurizer *p, char *labelName)
583 {
584     p->dump(labelName);
585 }
586 
cfgsdump(CFGStructurizer * p,ANode * nd)587 void cfgsdump(CFGStructurizer *p, ANode *nd)
588 {
589     p->dump(nd, 0);
590 }
591 
cfgsdumpacfg(CFGStructurizer * p)592 void cfgsdumpacfg(CFGStructurizer *p)
593 {
594     p->dumpacfg();
595 }
596 
597 // Debugging functions
getANodeTypeString(ANodeType t)598 const char *getANodeTypeString(ANodeType t)
599 {
600     switch (t)
601     {
602     case AN_BB: return "BB";
603     case AN_HAMMOCK: return "HAMMOCK";
604     case AN_IF_THEN_ENDIF: return "IF_THEN_ENDIF";
605     case AN_IF_THEN_ELSE_ENDIF: return "IF_THEN_ELSE_ENDIF";
606     case AN_DO_WHILE: return "DO_WHILE";
607     case AN_COMPOSITE: return "COMPOSITE";
608     case AN_SEQUENCE: return "SEQUENCE";
609     default:
610         return " ";
611     }
612 }
613 
getANodeKindString(ANodeKind t)614 const char *getANodeKindString(ANodeKind t)
615 {
616     switch (t)
617     {
618     case ANKIND_GOTOJOIN: return "gotojoin";
619     case ANKIND_JMPI: return "jmpi";
620     case ANKIND_SCF: return "structuredCF";
621     }
622     return " ";
623 }
624 
doIndent(uint32_t numOfSpaces)625 void CFGStructurizer::doIndent(uint32_t numOfSpaces)
626 {
627     for (uint32_t ix = 0; ix < numOfSpaces; ++ix)
628     {
629         (*dumpOut) << " ";
630     }
631 }
632 
dump(ANode * node,uint32_t level)633 void CFGStructurizer::dump(ANode *node, uint32_t level)
634 {
635     // Just show the single digit nesting level
636     uint32_t showLevel = level % 10;
637     uint32_t indentSpaces = level * 4;
638 
639     ANodeType Ty = node->getType();
640     doIndent(indentSpaces);
641     (*dumpOut) << "  "
642               << showLevel
643               << "-node("
644               << node->anodeId << ") ["
645               << getANodeTypeString(Ty)
646               << "]  begin=BB" << node->getBeginBB()->getId()
647               << "  end=BB" << node->getEndBB()->getId();
648     G4_BB *exitbb = node->getExitBB();
649     if (exitbb)
650     {
651         (*dumpOut) << "  exit=BB" << exitbb->getId();
652     }
653     else
654     {
655         (*dumpOut) << "  exit=NULL";
656     }
657     (*dumpOut) << "\n";
658     doIndent(indentSpaces);
659     (*dumpOut) << "    Attr:";
660     if (node->getType() == AN_BB && getGotoInst(node->getEndBB()))
661     {
662         (*dumpOut) << " " << getANodeKindString(node->getKind());
663     }
664     if (node->getHasBreak())
665     {
666         (*dumpOut) << " hasBreak";
667     }
668     if (node->isSCF())
669     {
670         (*dumpOut) << (node->getAllowSCF() ? " allowSCF" : " notAllowSCF");
671     }
672     (*dumpOut) << "\n";
673 
674     doIndent(indentSpaces);
675     (*dumpOut) << "    preds:";
676     for (ANList::iterator I = node->preds.begin(), E = node->preds.end();
677         I != E; ++I)
678     {
679         ANode *nd = *I;
680         (*dumpOut) << " " << nd->anodeId;
681     }
682     (*dumpOut) << "  succs:";
683     for (ANList::iterator I = node->succs.begin(), E = node->succs.end();
684         I != E; ++I)
685     {
686         ANode *nd = *I;
687         (*dumpOut) << " " << nd->anodeId;
688     }
689     (*dumpOut) << "\n";
690     doIndent(indentSpaces);
691     if (node->parent)
692     {
693         (*dumpOut) << "    parent: " << node->parent->anodeId << "\n";
694     }
695     else
696     {
697         (*dumpOut) << "    parent:\n";
698     }
699     if (node->isHammock())
700     {
701         ANodeHG *ndhg = (ANodeHG*)node;
702         doIndent(indentSpaces);
703         (*dumpOut) << "    children:\n";
704         for (ANList::iterator I = ndhg->children.begin(), E = ndhg->children.end();
705             I != E; ++I)
706         {
707             ANode *nd = *I;
708             dump(nd, level + 1);
709         }
710     }
711     dumpOut->flush();
712 }
713 
dump(G4_BB * bb)714 void CFGStructurizer::dump(G4_BB *bb)
715 {
716     dumpbbbase(bb);
717 }
718 
dump(uint32_t BBId)719 void CFGStructurizer::dump(uint32_t BBId)
720 {
721     dumpbbid(BBs, BBId);
722 }
723 
dump(char * labelString)724 void CFGStructurizer::dump(char* labelString)
725 {
726     dumpbblabel(BBs, labelString);
727 }
728 
dump()729 void CFGStructurizer::dump()
730 {
731     (*dumpOut) << "Program Dump\n\n";
732     dumpallbbs(BBs);
733 }
734 
dumpcfg()735 void CFGStructurizer::dumpcfg()
736 {
737     dumpbblist(BBs);
738 }
739 
dumpacfg()740 void CFGStructurizer::dumpacfg()
741 {
742     (*dumpOut) << "\nACFG dump\n\n";
743 
744     BB_LIST_ITER I = BBs->begin();
745     BB_LIST_ITER E = BBs->end();
746     while (I != E)
747     {
748         BB_LIST_ITER nextI = I;
749         ++nextI;
750         G4_BB *bb = *I;
751         ANodeBB *ndbb = getANodeBB(bb);
752         if (ndbb)
753         {
754             ANode *nd = ndbb->getCurrentANode();
755             dump(nd, 0);
756             G4_BB *endbb = nd->getEndBB();
757             G4_BB *phyNext = endbb->getPhysicalSucc();
758             nextI = findBB(BBs, phyNext);
759         }
760         I = nextI;
761     }
762     dumpOut->flush();
763 }
764 
765 #endif
766 
767 
isSCF() const768 inline bool ANode::isSCF() const
769 {
770     return (type == AN_IF_THEN_ENDIF ||
771             type == AN_IF_THEN_ELSE_ENDIF ||
772             type == AN_DO_WHILE);
773 }
774 
isHammock() const775 inline bool ANode::isHammock() const
776 {
777     return (type == AN_IF_THEN_ENDIF ||
778             type == AN_IF_THEN_ELSE_ENDIF ||
779             type == AN_DO_WHILE ||
780             type == AN_COMPOSITE ||
781             type == AN_SEQUENCE);
782 }
783 
getCurrentANode() const784 inline ANode* ANode::getCurrentANode() const
785 {
786     const ANode *nd = this;
787     while (nd && !nd->isInACFG())
788     {
789         nd = nd->parent;
790     }
791     MUST_BE_TRUE(nd && nd->isInACFG(),
792                  "ACFG flag must be set incorrectly");
793     return const_cast<ANode*>(nd);
794 }
795 
isMember(ANode * nd) const796 inline bool ANode::isMember(ANode *nd) const
797 {
798     while (nd && nd != this)
799     {
800         nd = nd->parent;
801     }
802     return nd == this;
803 }
804 
isExitPhySucc() const805 inline bool ANode::isExitPhySucc() const
806 {
807     const G4_BB *endbb = getEndBB();
808     const G4_BB *exitbb = getExitBB();
809     return exitbb && endbb && endbb->getPhysicalSucc() == exitbb;
810 }
811 
ANodeHG(ControlGraph * cg,ANodeBB * nodebb)812 ANodeHG::ANodeHG(ControlGraph *cg, ANodeBB* nodebb)
813     : ANode(AN_HAMMOCK), children()
814 {
815     if (nodebb)
816     {
817         children.push_back(nodebb);
818         nodebb->parent = this;
819     }
820     exitNode = nullptr;
821     isLoopCandidate = false;
822     ANStackIx = -1;
823 
824     // AN_HAMMOCK is temporary. Its succs/preds won't be availabe until
825     // it is converted to a final hammock node, ie, finalized. So, it
826     // is actually not in ACFG yet.
827 
828     bool isCondGoto = (cg->gotoInst->getPredicate() != NULL);
829     setBeginBB(cg->beginBB);
830 
831     if (cg->isBackward)
832     {
833         // set up loop
834         setEndBB(cg->endBB);
835         G4_BB*  succ0 = cg->endBB->Succs.front();
836         G4_BB *bb = isCondGoto ? succ0 : nullptr;
837         setExitBB(bb);
838     }
839     else
840     {
841         setEndBB(cg->beginBB);
842         setExitBB(cg->endBB);
843     }
844 }
845 
getExitNode(ANodeBB * exitANBB)846 ANode* ANodeHG::getExitNode(ANodeBB *exitANBB)
847 {
848 #if 0
849     if (exitNode)
850     {
851         return exitNode;
852     }
853 #endif
854 
855     ANode *nd = exitANBB;
856     if (nd)
857     {
858         nd = nd->getCurrentANode();
859     }
860     return nd;
861 }
862 
getInnerMostWhile(ANode * node)863 ANodeHG *getInnerMostWhile(ANode *node)
864 {
865     ANode *tmp = node;
866     while (tmp && tmp->getType() != AN_DO_WHILE)
867     {
868         tmp = tmp->parent;
869     }
870     return (ANodeHG *)tmp;
871 }
872 
873 // If bb is inside this ANode, return the top node (direct child
874 // node), otherwise, return nullptr.
getChildANode(ANode * parent)875 ANode *ANode::getChildANode(ANode *parent)
876 {
877     ANode *nd = this;
878     while (nd && nd->parent != parent)
879     {
880         nd = nd->parent;
881     }
882     return nd;
883 }
884 
885 //  Preprocess CFG so that the structurizer can be simpler without considering
886 /// those corner cases. Currently, it does:
887 //  1) avoid sharing target label among forward gotos and backward gotos
888 //     For example,
889 //          goto L
890 //       L:
891 //          goto L
892 //
893 //     changed to
894 //           goto L0
895 //       L0:      (new empty BB)
896 //       L1:
897 //           goto L1
898 //  2) avoid a fall-thru BB of a backward goto BB is the target BB of another
899 //     backward goto (two back-to-back loops). For example,
900 //          L0:
901 //            (p0) goto L0
902 //          L1 :
903 //            (p1) goto L1
904 //    changed to
905 //          L0:
906 //            (p0) goto L0
907 //          L :         (new empty BB)
908 //          L1 :
909 //            (p1) goto L1
910 // 3) make sure the last BB's pred is its physical predecessor.
911 //    In another word, if there is a case:
912 //          B0 : goto L1
913 //          ...
914 //        L0:
915 //          ...
916 //          B1:  goto L0
917 //        L1:
918 //           ret
919 //   changed to
920 //          B0 : goto L
921 //          ...
922 //       L0:
923 //          ...
924 //          B1: goto L0
925 //       L:            (new empty BB)
926 //       L1:
927 //          ret
928 //   This case is to guarantee that the last HG has its valid, non-null exit BB
929 //   except Basic block ANode. (Without this, the last HG isn't handled completely
930 //   with the current algo.)
preProcess()931 void CFGStructurizer::preProcess()
932 {
933     bool CFGChanged = false;
934     for (BB_LIST_ITER BI = CFG->begin(), BE = CFG->end();
935         BI != BE; ++BI)
936     {
937         G4_BB* B = *BI;
938         if (B->getBBType() & (G4_BB_RETURN_TYPE | G4_BB_INIT_TYPE))
939         {
940             // If B is RETURN_BB, don't insert as all cases should not happen,
941             // so is INIT BB.
942             continue;
943         }
944 
945         bool insertEmptyBBBefore = false;
946         // Both entry's end (with or without subroutine and subroutine's end.
947         G4_BB* phySucc = B->getPhysicalSucc();
948         bool isLastBB = ((std::next(BI) == BE)
949             || (B->Succs.empty() && phySucc && (phySucc->getBBType() & G4_BB_INIT_TYPE))
950             || (B->getBBType() & G4_BB_EXIT_TYPE));
951         if (isLastBB)
952         {
953             for (auto IT = B->Preds.begin(), IE = B->Preds.end(); IT != IE; IT++)
954             {
955                 G4_BB* P = *IT;
956                 if (P == B->getPhysicalPred())
957                 {
958                     // If pred is B's physicalPred, skip. Even if pred actually
959                     // gotos/jmpi to B, the goto/jmpi shall be removed later.
960                     continue;
961                 }
962                 G4_INST* gotoInstP = getGotoInst(P);
963                 G4_INST* jmpiInstP = getJmpiInst(P);
964                 if (!gotoInstP && !jmpiInstP)
965                 {
966                     continue;
967                 }
968                 // case 3
969                 // P must have a forward-goto/jmpi to B (B is the last BB).
970                 insertEmptyBBBefore = true;
971                 break;
972             }
973         }
974 
975         if (!insertEmptyBBBefore && B->Preds.size() >= 2)
976         {
977             // case 1 & 2
978             // Check if this B is a successor of both backward and forward branches.
979             bool isForwardTarget = false;
980             bool isBackwardTarget = false;
981             bool isFallThruOfBackwardGotoBB = false;
982             for (auto IT = B->Preds.begin(), IE = B->Preds.end(); IT != IE; IT++)
983             {
984                 G4_BB* P = *IT;
985                 G4_INST* gotoInstP = getGotoInst(P);
986                 if (!gotoInstP)
987                 {
988                     continue;
989                 }
990                 if (P->getId() >= B->getId())
991                 {
992                     isBackwardTarget = true;
993                     continue;
994                 }
995                 // P is a BB before B
996                 if (P->getPhysicalSucc() != B)
997                 {
998                     isForwardTarget = true;
999                 }
1000                 else if (P->getPhysicalSucc() == B
1001                     && gotoInstP->asCFInst()->isBackward())
1002                 {
1003                     isFallThruOfBackwardGotoBB = true;
1004                 }
1005             }
1006 
1007             if (   (isBackwardTarget && isForwardTarget)              // case 1
1008                 || (isBackwardTarget && isFallThruOfBackwardGotoBB))  // case 2
1009             {
1010                 insertEmptyBBBefore = true;
1011             }
1012         }
1013 
1014         if (!insertEmptyBBBefore)
1015         {
1016             continue;
1017         }
1018 
1019         // Now, create an empty BB right before "B", and adjust all forward
1020         // branching to this new BB and leave all backward branching unchanged.
1021         G4_BB *newBB = createBBWithLabel();
1022         G4_Label *newLabel = newBB->getLabel();
1023 
1024         // Adjust BB's pred/succs
1025         BB_LIST_ITER NextIT;
1026         BB_LIST_ITER IT = B->Preds.begin();
1027         BB_LIST_ITER IE = B->Preds.end();
1028         for (NextIT = IT; IT != IE; IT = NextIT)
1029         {
1030             ++NextIT;
1031             G4_BB* P = *IT;
1032             if (P->getId() >= B->getId())
1033             {
1034                 // keep the backward branch unchanged.
1035                 continue;
1036             }
1037             G4_INST* gotoInst = getGotoInst(P);
1038             G4_INST* jmpiInst = getJmpiInst(P);
1039             if (   !gotoInst && !jmpiInst       // not jump to B
1040                 && P->getPhysicalSucc() != B)   // not fall-thru to B
1041             {
1042                 // Possible mixed goto/if-endif. Skip non-goto/non-jmpi edges.
1043                 continue;
1044             }
1045 
1046             // forward branching/fall-thru "P->B" is changed to "P->newBB"
1047             BBListReplace(P->Succs, B, newBB);
1048             newBB->Preds.push_back(P);
1049 
1050             // Change this branching
1051             if (gotoInst && gotoInst->asCFInst()->getUip() == B->getLabel())
1052             {
1053                 gotoInst->asCFInst()->setUip(newLabel);
1054             }
1055             else if (jmpiInst && jmpiInst->getSrc(0) == B->getLabel())
1056             {
1057                 jmpiInst->setSrc(newLabel, 0);
1058             }
1059 
1060             // the edge from B's Preds
1061             B->Preds.erase(IT);
1062         }
1063         newBB->Succs.push_back(B);
1064         B->Preds.push_front(newBB);
1065 
1066         // insert it into BBs
1067         CFG->insert(BI, newBB);
1068         G4_BB *phyPred = B->getPhysicalPred();
1069         assert(phyPred && "B should have physical pred!");
1070         phyPred->setPhysicalSucc(newBB);
1071         newBB->setPhysicalPred(phyPred);
1072         newBB->setPhysicalSucc(B);
1073         B->setPhysicalPred(newBB);
1074 
1075         CFGChanged = true;
1076     }
1077 
1078     if (CFGChanged)
1079     {
1080         // New BBs generated, reassign block Ids.
1081         CFG->reassignBlockIDs();
1082     }
1083 }
1084 
init()1085 void CFGStructurizer::init()
1086 {
1087 #if _DEBUG
1088     // Default dump level.
1089     // To change dump level while debugging, using setDumpLevel().
1090     dump_level = 0;
1091 #endif
1092 
1093     // Assume G4_BB's id is set from 0 to numOfBBs - 1. This id
1094     // is used to map G4_BB to ANodeBB.
1095     // (Note that removeUnreachableBlocks() right before invoking this
1096     //  pass will reassign block IDs.)
1097 
1098     // First, preprocess CFG so that the structurizer does not need to handle corner cases.
1099     // Currently, it does:
1100     //    1. no forward gotos and backward gotos share the same label (but allow that
1101     //       all forward gotos can share the same label, so can all backward gotos);
1102     //    2. No fall-thru BB of a backward goto BB is also the target BB of another
1103     //       backward goto.
1104     preProcess();
1105 
1106     BBs = &(CFG->getBBList());
1107     numOfBBs = CFG->getNumBB();
1108     numOfANodes = 0;
1109     kernelExecSize = CFG->getKernel()->getSimdSize();
1110 
1111     // caching the flags
1112     doScalarJmp = !CFG->builder->noScalarJmp();
1113     doStructCF = CFG->builder->getOption(vISA_StructurizerCF);
1114 
1115     if (numOfBBs == 0)
1116     {
1117         anodeBBs.IDToANodeBB = nullptr;
1118         return;
1119     }
1120 
1121     anodeBBs.IDToANodeBB = new ANodeBB [numOfBBs];
1122 
1123     // CFG at this moment has subroutine calls explicitly linked (between call and
1124     // callee and callee's return to call's succ). For structurizing purpose, those
1125     // edges should not be considered. Here ANodeBB will have those edges ignored.
1126     for (G4_BB *bb : *CFG)
1127     {
1128         uint32_t id = bb->getId();
1129         ANodeBB *node = &(anodeBBs.IDToANodeBB[id]);
1130         node->anodeId = getNextANodeId();
1131         node->bb = bb;
1132         bool isCallBB = (bb->isEndWithCall() || bb->isEndWithFCall());
1133         bool isReturnBB = (bb->getLastOpcode() == G4_return || bb->isEndWithFRet());
1134         if (isCallBB)
1135         {
1136             G4_BB *returnBB = bb->getPhysicalSucc();
1137             assert(returnBB && "Call BB must have a return BB.");
1138             ANode *succNode = &(anodeBBs.IDToANodeBB[returnBB->getId()]);
1139             node->succs.push_back(succNode);
1140             succNode->preds.push_back(node);
1141 
1142             node->setANodeBBType(ANODEBB_CALL);
1143         }
1144         else if (isReturnBB)
1145         {
1146             node->setANodeBBType(ANODEBB_RETURN);
1147         }
1148         else
1149         {
1150             for (G4_BB *succ : bb->Succs)
1151             {
1152                 ANode *succNode = &(anodeBBs.IDToANodeBB[succ->getId()]);
1153                 node->succs.push_back(succNode);
1154                 succNode->preds.push_back(node);
1155             }
1156         }
1157     }
1158 }
1159 
getANodeBB(G4_BB * bb)1160 ANodeBB* CFGStructurizer::getANodeBB(G4_BB *bb)
1161 {
1162     uint32_t id = bb->getId();
1163     if (id < numOfBBs)
1164     {
1165         return &(anodeBBs.IDToANodeBB[id]);
1166     }
1167 
1168     BBToANodeBBMap::iterator I = anodeBBs.newBBToANodeBB.find(bb);
1169     MUST_BE_TRUE(I != anodeBBs.newBBToANodeBB.end(),
1170                  "Corresponding ANodeBB isn't set up yet");
1171     ANodeBB *ndbb = I->second;
1172     return ndbb;
1173 }
1174 
setANodeBB(ANodeBB * ndbb,G4_BB * bb)1175 void CFGStructurizer::setANodeBB(ANodeBB *ndbb, G4_BB *bb)
1176 {
1177     uint32_t id = bb->getId();
1178     if (id < numOfBBs)
1179     {
1180         MUST_BE_TRUE(false, "ANodeBB has been set up already");
1181         return;
1182     }
1183     MUST_BE_TRUE(anodeBBs.newBBToANodeBB.find(bb) == anodeBBs.newBBToANodeBB.end(),
1184                  "ANodeBB has been in map already");
1185     anodeBBs.newBBToANodeBB[bb] = ndbb;
1186 }
1187 
getInsertAfterBB(G4_BB * bb)1188 G4_BB *CFGStructurizer::getInsertAfterBB(G4_BB *bb)
1189 {
1190     MUST_BE_TRUE(newBBToInsertAfterBB.find(bb) != newBBToInsertAfterBB.end(),
1191                  "The BB isn't a new BB or something else is wrong");
1192     MUST_BE_TRUE(bb->getId() >= numOfBBs, "The BB isn't a new BB");
1193     return newBBToInsertAfterBB[bb];
1194 }
1195 
setInsertAfterBB(G4_BB * newbb,G4_BB * insertAfter)1196 void CFGStructurizer::setInsertAfterBB(G4_BB *newbb, G4_BB *insertAfter)
1197 {
1198     MUST_BE_TRUE(newbb->getId() >= numOfBBs, "The BB isn't a new BB");
1199 
1200     if (insertAfter->getId() < numOfBBs)
1201     {
1202         newBBToInsertAfterBB[newbb] = insertAfter;
1203     }
1204     else
1205     {
1206         newBBToInsertAfterBB[newbb] = getInsertAfterBB(insertAfter);
1207     }
1208 }
1209 
deletePST(ANodeHG * nodehg)1210 void CFGStructurizer::deletePST(ANodeHG *nodehg)
1211 {
1212     for (ANList::iterator I = nodehg->children.begin(),
1213         E = nodehg->children.end();
1214         I != E; ++I)
1215     {
1216         ANode *tmp = *I;
1217         if (tmp->getType() != AN_BB)
1218         {
1219             deletePST((ANodeHG*)tmp);
1220         }
1221     }
1222     nodehg->children.clear();
1223     delete nodehg;
1224 }
1225 
1226 // Return goto inst if bb has one, null otherwise.
getGotoInst(G4_BB * bb)1227 G4_INST* CFGStructurizer::getGotoInst(G4_BB *bb)
1228 {
1229     G4_INST *inst = bb->size() > 0 ? bb->back() : nullptr;
1230     return (inst && inst->opcode() == G4_goto) ? inst : nullptr;
1231 }
1232 
1233 // Return either jmpi inst if bb has one, null otherwise.
getJmpiInst(G4_BB * bb)1234 G4_INST* CFGStructurizer::getJmpiInst(G4_BB *bb)
1235 {
1236     G4_INST *inst = bb->size() > 0 ? bb->back() : nullptr;
1237     return (inst && inst->opcode() == G4_jmpi) ? inst : nullptr;
1238 }
1239 
fini()1240 void CFGStructurizer::fini()
1241 {
1242     for (uint32_t i = 0, size = topANodes.size(); i< size; ++i)
1243     {
1244         deletePST((ANodeHG*)topANodes[i]);
1245     }
1246     topANodes.clear();
1247 
1248     if (numOfBBs > 0)
1249     {
1250         delete[] anodeBBs.IDToANodeBB;
1251         anodeBBs.IDToANodeBB = nullptr;
1252 
1253         for (BBToANodeBBMap::iterator I = anodeBBs.newBBToANodeBB.begin(),
1254                                       E = anodeBBs.newBBToANodeBB.end();
1255             I != E; ++I)
1256         {
1257             ANodeBB *tmp = I->second;
1258             delete tmp;
1259         }
1260         anodeBBs.newBBToANodeBB.clear();
1261 
1262         newBBToInsertAfterBB.clear();
1263     }
1264 }
1265 
ANListGetOther(ANList & anlist,ANode * node)1266 ANode *CFGStructurizer::ANListGetOther(ANList &anlist, ANode *node)
1267 {
1268     if (anlist.size() == 0)
1269     {
1270         return nullptr;
1271     }
1272     ANode *nd = anlist.back();
1273     if (nd == node)
1274     {
1275         nd = anlist.front();
1276     }
1277     return (nd == node) ? nullptr : nd;
1278 }
1279 
1280 // Insert cg into the ordered CGList in the increasing order
1281 // of cg's endBB.
cg_oinsert(CGList & cgs,ControlGraph * cg)1282 void CFGStructurizer::cg_oinsert(CGList &cgs, ControlGraph *cg)
1283 {
1284     bool hasInserted = false;
1285     CGList::iterator I = cgs.begin();
1286     CGList::iterator E = cgs.end();
1287     G4_BB *end = cg->endBB;
1288     while (I != E)
1289     {
1290         CGList::iterator Iter = I;
1291         ++I;
1292         ControlGraph *tmp = *Iter;
1293         if (isBefore(tmp->endBB, end))
1294         {
1295             continue;
1296         }
1297         cgs.insert(Iter, cg);
1298         hasInserted = true;
1299         break;
1300     }
1301     if (!hasInserted)
1302     {
1303         cgs.push_back(cg);
1304     }
1305 }
1306 
1307 // Insert BB into the ordered BB_List in the increasing order of BB
bb_oinsert(BB_LIST & bblist,G4_BB * bb)1308 void CFGStructurizer::bb_oinsert(BB_LIST &bblist, G4_BB *bb)
1309 {
1310     bool hasInserted = false;
1311     BB_LIST_ITER I = bblist.begin();
1312     BB_LIST_ITER E = bblist.end();
1313     while (I != E)
1314     {
1315         BB_LIST_ITER Iter = I;
1316         ++I;
1317         G4_BB *tmp = *Iter;
1318         if (isBefore(tmp, bb))
1319         {
1320             continue;
1321         }
1322         bblist.insert(Iter, bb);
1323         hasInserted = true;
1324         break;
1325     }
1326     if (!hasInserted)
1327     {
1328         bblist.push_back(bb);
1329     }
1330 }
1331 
1332 // Created all CGs whose beginBB is bb. If any, return true and
1333 // all CGs are returned as cgs. If not, return false.
1334 //
1335 // cgs is in order so that the last one will be outmost block
1336 // and the first one is the innermost block. If bb has goto
1337 // inst, the CG for this goto will be the first one in cgs.
getCGBegin(G4_BB * bb,CGList & cgs)1338 bool CFGStructurizer::getCGBegin(G4_BB *bb, CGList &cgs)
1339 {
1340     if (bb->size() == 0)
1341     {
1342         return false;
1343     }
1344 
1345     // If bb's goto is backward to a BB prior to bb, this bb cannot
1346     // be a loop candidate.  For example,
1347     //
1348     //       B0:
1349     //          ......
1350     //       bb: ..
1351     //         (p) goto B0
1352     //       ...
1353     //       B1:
1354     //         (p) goto bb
1355     //  In this case, CG for bb is marked with non loop candidate.
1356     //
1357     G4_INST *bbgotoInst = getGotoInst(bb);
1358     bool isLoopCandidate = true;
1359     if (bbgotoInst && bbgotoInst->asCFInst()->isBackward() &&
1360         bb->Succs.size() > 0 && bb->Succs.back() != bb)
1361     {
1362         isLoopCandidate = false;
1363     }
1364 
1365     ControlGraph *cg;
1366     bool isBegin = false;
1367     // Check if bb is a loop head
1368     for (BB_LIST_ITER I = bb->Preds.begin(), E = bb->Preds.end(); I != E; ++I)
1369     {
1370         G4_BB *pred = *I;
1371         G4_INST *gotoInst = getGotoInst(pred);
1372         if (gotoInst && gotoInst->asCFInst()->isBackward() && pred->Succs.back() == bb)
1373         {
1374             cg = new ControlGraph(bb, pred, gotoInst, true);
1375             cg_oinsert(cgs, cg);
1376             cg->isLoopCandidate = isLoopCandidate;
1377             isBegin = true;
1378         }
1379     }
1380 
1381     // check if its last inst is a forward goto. Note that all backward
1382     // gotos have been processed when the target BB of this backward goto
1383     // is processed.
1384     G4_INST *gotoInst = getGotoInst(bb);
1385     if (gotoInst && !gotoInst->asCFInst()->isBackward())
1386     {
1387         G4_BB *succ = bb->Succs.back();
1388         cg = new ControlGraph(bb, succ, gotoInst, false);
1389         // Add this cg into the beginning of cgs.
1390         cgs.push_front(cg);
1391         isBegin = true;
1392     }
1393     return isBegin;
1394 }
1395 
isGotoScalarJmp(G4_INST * gotoInst)1396 inline bool CFGStructurizer::isGotoScalarJmp(G4_INST *gotoInst)
1397 {
1398     MUST_BE_TRUE(gotoInst->opcode() == G4_goto, "It should be a goto inst");
1399     return gotoInst->getExecSize() == g4::SIMD1 ||
1400            gotoInst->getPredicate() == nullptr ||
1401            gotoInst->asCFInst()->isUniform();
1402 }
1403 
isJoinBB(G4_BB * bb)1404 inline bool CFGStructurizer::isJoinBB(G4_BB* bb)
1405 {
1406     G4_INST* joinInst = bb->getFirstInst();
1407     return joinInst && (joinInst->opcode() == G4_join);
1408 }
1409 
canbeJoinBB(G4_BB * bb)1410 inline bool CFGStructurizer::canbeJoinBB(G4_BB *bb)
1411 {
1412     // We treat while/else/endif as the first inst
1413     // of BB. Whenever a BB has any of them, join
1414     // cannot be inserted as the first inst.
1415     G4_INST* firstInst = bb->getFirstInst();
1416     if (!firstInst)
1417     {
1418         // Emptry BB can always have a join
1419         return true;
1420     }
1421     G4_opcode opc = firstInst->opcode();
1422     return opc != G4_while && opc != G4_endif && opc != G4_else;
1423 }
1424 
1425 // return true if bb's label has not been used by while/else/endif/join
isBBLabelAvailable(G4_BB * bb)1426 bool CFGStructurizer::isBBLabelAvailable(G4_BB *bb)
1427 {
1428     G4_INST* firstInst = bb->getFirstInst();
1429     if (!firstInst)
1430     {
1431         return true;
1432     }
1433     G4_opcode opc = firstInst->opcode();
1434     return opc != G4_while && opc != G4_endif &&
1435            opc != G4_else && opc != G4_join;
1436 }
1437 
ANListReplace(ANList & anlist,ANode * from,ANode * to)1438 void CFGStructurizer::ANListReplace(ANList &anlist, ANode *from, ANode *to)
1439 {
1440     for (ANList::iterator I = anlist.begin(), E = anlist.end(); I != E; ++I)
1441     {
1442         ANode *tmp = *I;
1443         if (tmp == from)
1444         {
1445             (*I) = to;
1446             return;
1447         }
1448     }
1449 }
1450 
BBListReplace(BB_LIST & bblist,G4_BB * from,G4_BB * to)1451 void CFGStructurizer::BBListReplace(BB_LIST &bblist, G4_BB *from, G4_BB *to)
1452 {
1453     for (BB_LIST_ITER I = bblist.begin(), E = bblist.end(); I != E; ++I)
1454     {
1455         G4_BB *tmp = *I;
1456         if (tmp == from)
1457         {
1458             (*I) = to;
1459             return;
1460         }
1461     }
1462     MUST_BE_TRUE(false, "BBList should have to-be-replaced element");
1463 }
1464 
ANListErase(ANList & anlist,ANode * toBeErased)1465 void CFGStructurizer::ANListErase(ANList &anlist, ANode *toBeErased)
1466 {
1467     ANList::iterator I = findANode(anlist, toBeErased);
1468     if (I != anlist.end())
1469     {
1470         anlist.erase(I);
1471     }
1472 }
1473 
BBListErase(BB_LIST & bblist,G4_BB * toBeErased)1474 void CFGStructurizer::BBListErase(BB_LIST &bblist, G4_BB *toBeErased)
1475 {
1476     BB_LIST_ITER I = findBB(&bblist, toBeErased);
1477     if (I != bblist.end())
1478     {
1479         bblist.erase(I);
1480     }
1481 }
1482 
1483 // Given ranges [end, exit] and [end1, exit1], and
1484 //      end < exit && end1 < exit1
1485 // return the last two BBs of them. Note that the order is the
1486 // BB order as given in CFG's BBs list.
1487 //
1488 // Note that it's possible that exit could be nullptr, but end
1489 // will never be nullptr. And under some cases, return exit is
1490 // unknown (set to nullptr).
getNewRange(G4_BB * & end,G4_BB * & exit,G4_BB * end1,G4_BB * exit1)1491 void CFGStructurizer::getNewRange(
1492     G4_BB*& end, G4_BB*& exit, G4_BB* end1, G4_BB* exit1)
1493 {
1494     G4_BB *e0, *e1;
1495     G4_BB* newEnd = isBefore(end, end1) ? end1 : end;
1496 
1497     if (exit && exit1)
1498     {
1499         if (exit == exit1)
1500         {
1501             // exit is still the new exit
1502             end = newEnd;
1503             return;
1504         }
1505 
1506         if (isBefore(exit, exit1))
1507         {
1508             e0 = exit1;
1509             e1 = exit;
1510         }
1511         else
1512         {
1513             e0 = exit;
1514             e1 = exit1;
1515         }
1516 
1517         if (isBefore(newEnd, e1))
1518         {
1519             newEnd = e1;
1520         }
1521         exit = e0;
1522         end = newEnd;
1523         return;
1524     }
1525 
1526     e0 = exit ? exit : exit1;
1527     if (e0 && (e0 == newEnd || isBefore(e0, newEnd)))
1528     {
1529         e0 = nullptr;
1530     }
1531 
1532     exit = e0;
1533     end = newEnd;
1534 }
1535 
1536 // Given a cg, if it definitely does not start a HG, no new HG should
1537 // be created.  Instead, the range covered by this cg will be used to
1538 // extend the current Node's range. This function is used to extend
1539 // Node's range from this cg.
extendNode(ANode * Node,ControlGraph * CG)1540 void CFGStructurizer::extendNode(ANode *Node, ControlGraph *CG)
1541 {
1542     if (!Node)
1543     {
1544         return;
1545     }
1546     // Extending end & exit
1547     G4_BB *begin = CG->beginBB;
1548     G4_BB* end = CG->endBB;
1549     G4_BB* exit;
1550     if (CG->isBackward)
1551     {
1552         G4_BB*  succ0 = end->Succs.front();
1553         bool isCondGoto = (CG->gotoInst->getPredicate() != NULL);
1554         exit = isCondGoto ? succ0 : nullptr;
1555     }
1556     else
1557     {
1558         exit = end;
1559         end = begin;
1560     }
1561 
1562     getNewRange(end, exit, Node->getEndBB(), Node->getExitBB());
1563     Node->setEndBB(end);
1564     Node->setExitBB(exit);
1565 }
1566 
1567 
1568 // Get the immediate enclosing ANodeHG that contains all the preds of AN
1569 // and all AN's backward succs (due to backward gotos).
getEnclosingANodeHG(ANode * AN)1570 ANodeHG* CFGStructurizer::getEnclosingANodeHG(ANode *AN)
1571 {
1572     MUST_BE_TRUE(!ANStack.empty(), "ANStack should not be empty");
1573 
1574     G4_BB *begin = AN->getBeginBB();
1575     int32_t ix = (int32_t) ANStack.size() - 1;
1576     if (ANStack.size() > 1)
1577     {
1578         // As loop's end and begin must be in the same HG. Here, check
1579         // if AN has a backward goto, if so, merge ANode of the loop's
1580         // begin with AN.  However, it is possible that the ANode for
1581         // the loop's begin is not in ANStack anymore (due to previous
1582         // merging); and in this case, find ANode that contains loop's
1583         // begin and merge with AN.
1584         if (AN->getType() == AN_BB)
1585         {
1586             G4_INST *gotoInst = getGotoInst(begin);
1587             if (gotoInst && gotoInst->asCFInst()->isBackward())
1588             {
1589                 uint32_t j;
1590                 ANodeHG *hg = nullptr;
1591                 for (j = (uint32_t) ANStack.size(); j != 0; --j)
1592                 {
1593                     ANodeHG *tmp = (ANodeHG*)ANStack[j - 1];
1594                     if (tmp->isLoopCandidate && tmp->endBB == begin)
1595                     {
1596                         // ANode for innermost loop
1597                         hg = tmp;
1598                         break;
1599                     }
1600                 }
1601                 if (j == 0)
1602                 {
1603                     // loop's begin ANode is no longer on ANStack. Find ANStack's node
1604                     // that enclose the loop's begin.
1605                     G4_BB *loopBeginBB = begin->Succs.back();
1606                     if (loopBeginBB != begin)
1607                     {
1608                         hg = (ANodeHG *)(getANodeBB(loopBeginBB)->getCurrentANode());
1609                         // The parent of ACFG node should be on ANStack
1610                         hg = (ANodeHG *)hg->parent;
1611                     }
1612                 }
1613 
1614                 if (hg)
1615                 {
1616                     MUST_BE_TRUE(hg->ANStackIx >= 0, "ANodeHG's index is wrong");
1617                     if (ix > hg->ANStackIx)
1618                     {
1619                         ix = hg->ANStackIx;
1620                     }
1621                 }
1622             }
1623         }
1624 
1625         for (ANList::iterator II = AN->preds.begin(), IE = AN->preds.end();
1626             II != IE; ++II)
1627         {
1628             ANode *pred = *II;
1629             if (pred == AN || isBefore(begin, pred->getBeginBB()))
1630             {
1631                 // Skip. For pred == AN, they are already in AN;
1632                 // for backward goto, its pred will be processed later.
1633                 continue;
1634             }
1635 
1636             ANodeHG *hg = (ANodeHG *)pred->parent;
1637             if (hg)
1638             {
1639                 MUST_BE_TRUE(hg->ANStackIx >= 0, "ANodeHG's index is wrong");
1640                 if (ix > hg->ANStackIx)
1641                 {
1642                     ix = hg->ANStackIx;
1643                 }
1644             }
1645             else
1646             {
1647                 // Error message
1648                 G4_BB* predBB = pred->getBeginBB();
1649                 G4_INST* predGoto = getGotoInst(predBB);
1650 
1651                 // If predGoto is null, it means that a non-goto branching inst like jmpi jumps
1652                 // into a range of goto instrutions, such as the following:
1653                 //     jmp  A
1654                 //     goto B
1655                 //  A:
1656                 //     ...
1657                 //  B:
1658                 //
1659                 MUST_BE_TRUE(predGoto != nullptr, "Error: Non-goto (like jmp) and goto crossing !");
1660                 MUST_BE_TRUE(false, "Error: unknown control flow in the program.");
1661             }
1662         }
1663         // No need to check succs as forward goto will be handled later
1664         // and backward goto has been handled already!
1665     }
1666 
1667     return (ANodeHG *)ANStack[ix];
1668 }
1669 
1670 // Merge AN with its immediate pred if possible.
1671 //   1. If pred is AN_SEQUENCE, just add AN into pred
1672 //   2. otherwise, create AN_SEQUENCE and include both
1673 // Return the new merged node if merged, or AN otherwise.
mergeWithPred(ANode * AN)1674 ANode *CFGStructurizer::mergeWithPred(ANode *AN)
1675 {
1676     if (AN->parent == nullptr ||
1677         AN->succs.size() != 1 || AN->preds.size() != 1)
1678     {
1679         return AN;
1680     }
1681 
1682     // check if it can be merged with the previous ANode. Note that we
1683     // only need to check its immediate predecessor under the same parent
1684     // as it is the only candidate for possible merging.
1685     ANode *node = AN;
1686     ANodeHG *parent = (ANodeHG *)AN->parent;
1687     ANode *pred = AN->preds.back();
1688     if (pred->succs.size() != 1 || (ANodeHG*)pred->parent != parent)
1689     {
1690         return AN;
1691     }
1692 
1693     // Get AN's physical previous ANode.
1694     ANList::iterator E = parent->children.end();
1695     ANList::iterator I = findANode(parent->children, pred);
1696     MUST_BE_TRUE(I != E, "Child Node isn't in Parent's children list");
1697     ++I;
1698     ANode* predPhySucc = (I != E) ? *I : nullptr;
1699 
1700     ANode *succ = AN->succs.back();
1701     if (predPhySucc == AN)
1702     {
1703         // check:  do we allow self-looping on SEQ node ?
1704         if (pred->getType() == AN_SEQUENCE)
1705         {
1706             ANodeHG *predhg = (ANodeHG *)pred;
1707             ANode *lastNode = predhg->children.back();
1708             AN->parent = predhg;
1709             predhg->children.push_back(AN);
1710             predhg->succs.clear();
1711             predhg->succs.push_back(succ);
1712             ANListReplace(succ->preds, AN, predhg);
1713 
1714             lastNode->succs.clear();
1715             lastNode->succs.push_back(AN);
1716             AN->preds.clear();
1717             AN->preds.push_back(lastNode);
1718 
1719             predhg->setEndBB(AN->getEndBB());
1720             predhg->setExitBB(AN->getExitBB());
1721             if (succ != pred)
1722             {
1723                 predhg->setExitNode(succ);
1724             }
1725 
1726             ANListErase(parent->children, AN);
1727             AN->setInACFG(false);
1728             node = predhg;
1729         }
1730         else
1731         {
1732             ANodeHG *newSeqNode = new ANodeHG(AN_SEQUENCE);
1733             newSeqNode->anodeId = getNextANodeId();
1734             newSeqNode->parent = parent;
1735             pred->parent = newSeqNode;
1736             AN->parent = newSeqNode;
1737             ANList::iterator II = pred->preds.begin();
1738             ANList::iterator IE = pred->preds.end();
1739             while (II != IE)
1740             {
1741                 ANList::iterator Iter = II;
1742                 ++II;
1743                 ANode *tmp = *Iter;
1744                 if (tmp == AN || AN->isMember(tmp))
1745                 {
1746                     newSeqNode->succs.push_back(newSeqNode);
1747                     newSeqNode->preds.push_back(newSeqNode);
1748                 }
1749                 else
1750                 {
1751                     ANListReplace(tmp->succs, pred, newSeqNode);
1752                     newSeqNode->preds.push_back(tmp);
1753                 }
1754             }
1755 
1756             // The above loop has handled case of succ == pred already
1757             if (succ != pred)
1758             {
1759                 ANListReplace(succ->preds, AN, newSeqNode);
1760                 newSeqNode->succs.push_back(succ);
1761             }
1762             newSeqNode->children.push_back(pred);
1763             newSeqNode->children.push_back(AN);
1764             pred->setInACFG(false);
1765             AN->setInACFG(false);
1766 
1767             newSeqNode->setBeginBB(pred->getBeginBB());
1768             newSeqNode->setEndBB(AN->getEndBB());
1769             newSeqNode->setExitBB(AN->getExitBB());
1770 
1771             if (succ != pred)
1772             {
1773                 newSeqNode->setExitNode(succ);
1774             }
1775 
1776             ANListErase(parent->children, pred);
1777             ANListErase(parent->children, AN);
1778             parent->children.push_back(newSeqNode);
1779 
1780             node = newSeqNode;
1781         }
1782     }
1783     return node;
1784 }
1785 
1786 // Condensing a HG into a single node in ACFG. The children of node is still
1787 // a valid partial ACFG. The current ACFG is the all ANode whose inACFG is true.
condenseNode(ANodeHG * node)1788 void CFGStructurizer::condenseNode(ANodeHG *node)
1789 {
1790     ANode *entryNode = node->getEntryNode();
1791     G4_BB *exit = node->getExitBB();
1792     ANList::iterator I = entryNode->preds.begin();
1793     ANList::iterator E = entryNode->preds.end();
1794     while (I != E)
1795     {
1796         ANList::iterator Iter = I;
1797         ++I;
1798         ANode *pred = *Iter;
1799         if (node->isMember(pred))
1800         {
1801             continue;
1802         }
1803 
1804         node->preds.push_back(pred);
1805         ANListReplace(pred->succs, entryNode, node);
1806     }
1807 
1808     if (exit) {
1809         ANode *exitNode = node->getExitNode(getANodeBB(exit));
1810         I = exitNode->preds.begin();
1811         E = exitNode->preds.end();
1812         while (I != E)
1813         {
1814             ANList::iterator iter = I;
1815             ++I;
1816             ANode *pred = *iter;
1817             if (node->isMember(pred))
1818             {
1819                 exitNode->preds.erase(iter);
1820             }
1821         }
1822         exitNode->preds.push_back(node);
1823         node->succs.push_back(exitNode);
1824     }
1825 
1826     // Set children nodes' inAFCG flag to false.
1827     E = node->children.end();
1828     for (I = node->children.begin(); I != E; ++I)
1829     {
1830         ANode* nd = *I;
1831         nd->setInACFG(false);
1832     }
1833 }
1834 
finalizeHG(ANodeHG * node)1835 ANodeHG *CFGStructurizer::finalizeHG(ANodeHG *node)
1836 {
1837     // Identify the type of this node
1838     ANode *entryNode = node->getEntryNode();
1839     ANode *endNode = node->getEndNode();
1840     G4_BB *exitBB = node->getExitBB();
1841     ANode *exitNode = nullptr;
1842     if (exitBB) {
1843         exitNode = node->getExitNode(getANodeBB(exitBB));
1844         if (exitNode)
1845         {
1846             // Set exitNode when node is finalized
1847             node->setExitNode(exitNode);
1848         }
1849     }
1850     G4_BB *begin = node->getBeginBB();
1851     G4_INST *gotoInst = begin->back();
1852     if (endNode->succs.size() > 0 && endNode->succs.back() == entryNode)
1853     {
1854         node->setType(AN_DO_WHILE);
1855     }
1856     else if (entryNode->getType() == AN_BB && gotoInst &&
1857              gotoInst->getPredicate() && !gotoInst->asCFInst()->isBackward())
1858     {
1859         ANodeType ty = AN_COMPOSITE;
1860         if (entryNode->succs.size() == 2)
1861         {
1862             ANode *thenNode = entryNode->succs.front();
1863             ANode *elseNode = entryNode->succs.back();
1864             if (thenNode == elseNode)
1865             {
1866                 // The CFG is not well formed and has the following
1867                 //     BB0: succs=BB1, BB1
1868                 // Simply make it COMPOSITE as an work-around. Will
1869                 // fix igc to not generate such a case.
1870                 ty = AN_COMPOSITE;
1871             }
1872             else if (thenNode->succs.size() == 1 && thenNode->succs.front() == elseNode &&
1873                 elseNode == exitNode)
1874             {
1875                 ty = AN_IF_THEN_ENDIF;
1876             }
1877             else if (thenNode->succs.size() == 1 && elseNode->succs.size() == 1 &&
1878                 thenNode->succs.front() == elseNode->succs.front() &&
1879                 thenNode->succs.front() == exitNode)
1880             {
1881                 ty = AN_IF_THEN_ELSE_ENDIF;
1882             }
1883         }
1884         node->setType(ty);
1885     }
1886     else
1887     {
1888         node->setType(AN_COMPOSITE);
1889     }
1890 
1891     condenseNode(node);
1892 
1893     if (node->getType() == AN_DO_WHILE)
1894     {
1895         reConstructDoWhileforBreak(node);
1896     }
1897 
1898     return node;
1899 }
1900 
findBB(BB_LIST * bblist,G4_BB * bb)1901 BB_LIST_ITER CFGStructurizer::findBB(BB_LIST* bblist, G4_BB *bb)
1902 {
1903     for (BB_LIST_ITER I = bblist->begin(), E = bblist->end(); I != E; ++I)
1904     {
1905         G4_BB *tmp = *I;
1906         if (tmp == bb)
1907         {
1908             return I;
1909         }
1910     }
1911     return bblist->end();
1912 }
1913 
findANode(ANList & anlist,ANode * nd)1914 ANList::iterator CFGStructurizer::findANode(ANList& anlist, ANode *nd)
1915 {
1916     for (ANList::iterator I = anlist.begin(), E = anlist.end(); I != E; ++I)
1917     {
1918         ANode *tmp = *I;
1919         if (tmp == nd)
1920         {
1921             return I;
1922         }
1923     }
1924     return anlist.end();
1925 }
1926 
1927 // Return true if all preds of abb is inside node; false otherwise.
isANodeOnlyPred(G4_BB * abb,ANode * node)1928 bool CFGStructurizer::isANodeOnlyPred(G4_BB *abb, ANode *node)
1929 {
1930     for (BB_LIST_ITER II = abb->Preds.begin(), IE = abb->Preds.end();
1931         II != IE; ++II)
1932     {
1933         G4_BB *tmp = *II;
1934         ANodeBB *ndbb = getANodeBB(tmp);
1935 
1936         // Note that isMember() requires PST is up to date.
1937         if (!node->isMember(ndbb))
1938         {
1939             return false;
1940         }
1941     }
1942     return true;
1943 }
1944 
1945 //
1946 //  This function creates a new BB as node's exit BB. Node must be
1947 //  finalized before invoking this function. InsertAfterIter is the
1948 //  iterator to node->getEndBB(), which is provided if the calling
1949 //  context already has it; otherwise, this function will compute
1950 //  this iterator.
1951 //
1952 //  If updateACFG is true, ACFG will be updated. This is used during
1953 //  the construction of ACFG and the new BB will always in ACFG
1954 //  (as ANodeBB).  Note that PST is up to its caller to update.
1955 //
addLandingBB(ANodeHG * node,BB_LIST_ITER insertAfterIter,bool updateACFG)1956 ANodeBB* CFGStructurizer::addLandingBB(
1957     ANodeHG *node, BB_LIST_ITER insertAfterIter, bool updateACFG)
1958 {
1959     G4_BB *endbb = node->getEndBB();
1960     if (insertAfterIter == BBs->end())
1961     {
1962         insertAfterIter = findBB(BBs, endbb);
1963     }
1964     G4_BB *insertAfterBB = *insertAfterIter;
1965     G4_BB *exitBB = node->getExitBB();
1966 
1967     G4_BB *newBB = createBBWithLabel();
1968     G4_Label *newLabel = newBB->getLabel();
1969     G4_Label *targetLabel = exitBB->getLabel();
1970     setInsertAfterBB(newBB, insertAfterBB);
1971 
1972     // Adjust BB's pred/succs
1973     BB_LIST_ITER BE = exitBB->Preds.end();
1974     BB_LIST_ITER BI = exitBB->Preds.begin();
1975     while (BI != BE)
1976     {
1977         BB_LIST_ITER curr = BI;
1978         ++BI;
1979         G4_BB *bb = *curr;
1980         ANode *tmp = getANodeBB(bb);
1981         if (node->isMember(tmp))
1982         {
1983             BBListErase(exitBB->Preds, bb);
1984             BBListReplace(bb->Succs, exitBB, newBB);
1985             newBB->Preds.push_back(bb);
1986             G4_INST *gotoInst = getGotoInst(bb);
1987             if (gotoInst)
1988             {
1989                 if (gotoInst->asCFInst()->getUip() == targetLabel)
1990                 {
1991                     if (bb == insertAfterBB)
1992                     {
1993                         // remove goto
1994                         bb->pop_back();
1995                     }
1996                     else
1997                     {
1998                         // modify the goto to branch to the new target
1999                         gotoInst->asCFInst()->setUip(newLabel);
2000                     }
2001                 }
2002             }
2003             else
2004             {
2005                 G4_INST *jmpiInst = getJmpiInst(bb);
2006                 if (jmpiInst)
2007                 {
2008                     if (jmpiInst->getSrc(0) == targetLabel)
2009                     {
2010                         if (bb == insertAfterBB)
2011                         {
2012                             // remove jmpi
2013                             bb->pop_back();
2014                         }
2015                         else
2016                         {
2017                             // modify the jmpi to branch to the new target
2018                             jmpiInst->setSrc(newLabel, 0);
2019                         }
2020                     }
2021                 }
2022                 else if (bb != insertAfterBB)
2023                 {
2024                     MUST_BE_TRUE(false, "BB has unknown non-goto/jmpi branch");
2025                 }
2026             }
2027 
2028             // Update exit nodes
2029             tmp = tmp->parent;
2030             while (tmp != node)
2031             {
2032                 if (tmp->getExitBB() == exitBB)
2033                 {
2034                     tmp->setExitBB(newBB);
2035                 }
2036                 tmp = tmp->parent;
2037             }
2038         }
2039     }
2040 
2041     if (insertAfterBB->getPhysicalSucc() != exitBB)
2042     {
2043         G4_INST *gotoInst = CFG->builder->createInternalCFInst(
2044                 NULL, G4_goto, g4::SIMD1, NULL, targetLabel, InstOpt_NoOpt);
2045         newBB->push_back(gotoInst);
2046 
2047         // Consider newBB is part of exitBB, thus use exitBB's DI
2048         updateInstDI(gotoInst, exitBB);
2049     }
2050     newBB->Succs.push_back(exitBB);
2051     exitBB->Preds.push_back(newBB);
2052 
2053     // insert it into BBs
2054     ++insertAfterIter;
2055     BBs->insert(insertAfterIter, newBB);
2056     G4_BB *phySucc = insertAfterBB->getPhysicalSucc();
2057     insertAfterBB->setPhysicalSucc(newBB);
2058     newBB->setPhysicalPred(insertAfterBB);
2059     newBB->setPhysicalSucc(phySucc);
2060     if (phySucc)
2061     {
2062         phySucc->setPhysicalPred(newBB);
2063     }
2064 
2065     ANodeBB *ndbb = new ANodeBB(newBB);
2066     ndbb->anodeId = getNextANodeId();
2067     setANodeBB(ndbb, newBB);
2068 
2069     if (updateACFG)
2070     {
2071         // Node has been finalized. Its children's ACFG is no longer
2072         // up to date. But within this node, acfg is still good;
2073         // only edges outside of this node is no longer valid!
2074         // We will rely on begin/end/exit when doing conversion.
2075         ANode *exitNode = getANodeBB(exitBB)->getCurrentANode();
2076         ANListReplace(exitNode->preds, node, ndbb);
2077         ANListReplace(node->succs, exitNode, ndbb);
2078         ndbb->succs.push_back(exitNode);
2079         ndbb->preds.push_back(node);
2080     }
2081 
2082     node->setExitBB(newBB);
2083     adjustBBId(newBB);
2084 
2085     return ndbb;
2086 }
2087 
2088 // Add a new empty BB as a predecessor of splitBB.
addSplitBBAtBegin(G4_BB * splitBB)2089 ANode *CFGStructurizer::addSplitBBAtBegin(G4_BB *splitBB)
2090 {
2091     G4_BB *newBB = createBBWithLabel();
2092     G4_Label *newLabel = newBB->getLabel();
2093     G4_BB *insertAfter = findInsertAfterBB(splitBB->getPhysicalPred());
2094     setInsertAfterBB(newBB, insertAfter);
2095 
2096     // Adjust BB's pred/succs
2097     newBB->Preds.splice(newBB->Preds.end(), splitBB->Preds);
2098     splitBB->Preds.clear();
2099     splitBB->Preds.push_back(newBB);
2100     newBB->Succs.push_back(splitBB);
2101     BB_LIST_ITER BE = newBB->Preds.end();
2102     BB_LIST_ITER BI = newBB->Preds.begin();
2103     while (BI != BE)
2104     {
2105         BB_LIST_ITER curr = BI;
2106         ++BI;
2107         G4_BB *pred = *curr;
2108         BBListReplace(pred->Succs, splitBB, newBB);
2109         G4_INST *gotoInst = getGotoInst(pred);
2110         if (gotoInst)
2111         {
2112             if (gotoInst->asCFInst()->getUip() == splitBB->getLabel())
2113             {
2114                 // modify the goto to branch to the new target
2115                 gotoInst->asCFInst()->setUip(newLabel);
2116             }
2117         }
2118 
2119         // adjust exit BB
2120         ANode *nd = getANodeBB(pred)->parent;
2121         while (nd)
2122         {
2123             if (nd->getExitBB() == splitBB)
2124             {
2125                 nd->setExitBB(newBB);
2126             }
2127             nd = nd->parent;
2128         }
2129     }
2130 
2131     // insert it into BBs
2132     BB_LIST_ITER iter = findBB(BBs, splitBB);
2133     BBs->insert(iter, newBB);
2134     G4_BB *phyPred = splitBB->getPhysicalPred();
2135     phyPred->setPhysicalSucc(newBB);
2136     newBB->setPhysicalPred(phyPred);
2137     newBB->setPhysicalSucc(splitBB);
2138     splitBB->setPhysicalPred(newBB);
2139 
2140     ANodeBB *ndbb = new ANodeBB(newBB);
2141     ndbb->anodeId = getNextANodeId();
2142     setANodeBB(ndbb, newBB);
2143 
2144     // newBB's id is the largest at this moment. Make sure it is in order
2145     adjustBBId(newBB);
2146     return ndbb;
2147 }
2148 
2149 // Add a new BB that only holds the branch instruction of splitBB
2150 // if it has one, otherwise, the new BB is empty.
addSplitBBAtEnd(G4_BB * splitBB)2151 ANode* CFGStructurizer::addSplitBBAtEnd(G4_BB *splitBB)
2152 {
2153     G4_BB *newBB = createBBWithLabel();
2154     setInsertAfterBB(newBB, splitBB);
2155 
2156     // Adjust splitBB's pred/succs
2157     BB_LIST_ITER BE = splitBB->Succs.end();
2158     BB_LIST_ITER BI = splitBB->Succs.begin();
2159     for (; BI != BE; ++BI)
2160     {
2161         G4_BB *bb = *BI;
2162         BBListReplace(bb->Preds, splitBB, newBB);
2163     }
2164     newBB->Succs.splice(newBB->Succs.end(), splitBB->Succs);
2165     newBB->Preds.push_back(splitBB);
2166     splitBB->Succs.clear();
2167     splitBB->Succs.push_back(newBB);
2168     G4_INST *gotoInst = getGotoInst(splitBB);
2169     if (gotoInst)
2170     {
2171         splitBB->pop_back();
2172         newBB->push_back(gotoInst);
2173     }
2174 
2175     // insert it into BBs
2176     BI = findBB(BBs, splitBB);
2177     ++BI;
2178     BBs->insert(BI, newBB);
2179     G4_BB *phySucc = splitBB->getPhysicalSucc();
2180     splitBB->setPhysicalSucc(newBB);
2181     newBB->setPhysicalPred(splitBB);
2182     newBB->setPhysicalSucc(phySucc);
2183     if (phySucc)
2184     {
2185         phySucc->setPhysicalPred(newBB);
2186     }
2187 
2188     ANodeBB *ndbb = new ANodeBB(newBB);
2189     ndbb->anodeId = getNextANodeId();
2190     setANodeBB(ndbb, newBB);
2191 
2192     ANode *splitNode = getANodeBB(splitBB);
2193     ANode *tmp = splitNode->parent;
2194     while (tmp)
2195     {
2196         if (tmp->getEndBB() == splitBB)
2197         {
2198             tmp->setEndBB(newBB);
2199         }
2200         tmp = tmp->parent;
2201     }
2202 
2203     adjustBBId(newBB);
2204 
2205     return ndbb;
2206 }
2207 
2208 // Start from bb (inclusive), search backward to find the first original BB.
findInsertAfterBB(G4_BB * bb)2209 G4_BB *CFGStructurizer::findInsertAfterBB(G4_BB *bb)
2210 {
2211     G4_BB *insertAfter = bb;
2212     while (insertAfter && insertAfter->getId() >= numOfBBs)
2213     {
2214         insertAfter = insertAfter->getPhysicalPred();
2215     }
2216     return insertAfter;
2217 }
2218 
2219 // Re-connect every predecessors of "from" to its new successor "to". Update
2220 // "to"'s preds and its new predecessors' succs. Note that "from"'s preds are
2221 // left unchanged.
reConnectAllPreds(ANode * from,ANode * to)2222 void CFGStructurizer::reConnectAllPreds(ANode *from, ANode *to)
2223 {
2224     for (ANList::iterator I = from->preds.begin(), E = from->preds.end();
2225         I != E; ++I)
2226     {
2227         ANode *pred = *I;
2228         ANListReplace(pred->succs, from, to);
2229         to->preds.push_back(pred);
2230     }
2231 }
2232 
2233 // Re-connect every successors of "from" to its new predecesor "to". Update
2234 // "to"'s succs and its new successors' preds. Note that "from"'s succs are
2235 // left unchanged.
reConnectAllSuccs(ANode * from,ANode * to)2236 void CFGStructurizer::reConnectAllSuccs(ANode *from, ANode *to)
2237 {
2238     for (ANList::iterator I = from->succs.begin(), E = from->succs.end();
2239         I != E; ++I)
2240     {
2241         ANode *succ = *I;
2242         ANListReplace(succ->preds, from, to);
2243         to->succs.push_back(succ);
2244     }
2245 }
2246 
2247 // This function is to merge ANStack nodes into the new node during HG
2248 // construction.
ANStack_reconstruct(ANodeHG * & node,ANodeBB * ndbb)2249 void CFGStructurizer::ANStack_reconstruct(ANodeHG *& node, ANodeBB *ndbb)
2250 {
2251     ANodeHG *newNode = getEnclosingANodeHG(ndbb);
2252     if (newNode->ANStackIx == ANStack.size() - 1)
2253     {
2254         // No need to do ANStack reconstruction
2255         return;
2256     }
2257 
2258     // Merge all nodes in ANStack[newNode->ANStackIx+1 : ANStack.size() - 1] into newNode
2259     G4_BB *exit = newNode->getExitBB();
2260     G4_BB *end = newNode->getEndBB();
2261 
2262     // Merging from ANStackIx to the top of ANStack is needed to make
2263     // sure the order of children is correct.
2264     for (uint32_t i = newNode->ANStackIx + 1, size = ANStack.size(); i < size; ++i)
2265     {
2266         ANodeHG *nd = (ANodeHG *)ANStack[i];
2267         getNewRange(end, exit, nd->getEndBB(), nd->getExitBB());
2268         for (ANList::iterator I = nd->children.begin(), E = nd->children.end();
2269             I != E; ++I)
2270         {
2271             ANode *tmp = *I;
2272             tmp->parent = newNode;
2273             newNode->children.push_back(tmp);
2274         }
2275     }
2276     newNode->isLoopCandidate = false;
2277     newNode->setExitBB(exit);
2278     newNode->setEndBB(end);
2279     uint32_t n = (uint32_t) ANStack.size() - newNode->ANStackIx - 1;
2280     if (n > 0)
2281     {
2282         for (uint32_t i = 0; i < n; ++i)
2283         {
2284             ANode *nd = ANStack.back();
2285 
2286             // Make sure to delete from newNode' children
2287             ANListErase(newNode->children, nd);
2288 
2289             ANStack.pop_back();
2290             delete nd;
2291         }
2292     }
2293 
2294     node = newNode;
2295     return;
2296 }
2297 
2298 // For case like:
2299 //   do: ...
2300 //       if (p) {
2301 //         break;
2302 //       else
2303 //         ...
2304 //       endif
2305 //       .....
2306 //   while
2307 //
2308 //   if-else-endif does not form a hammock because of break (it has two exits,
2309 //   one is the target of break, the other is the BB right after endif). we
2310 //   will need to make break as "no-op" instruction and then re-run
2311 //   constructPST() on the body of the while loop.
2312 //
2313 //  For now, implement a poor-man's break construction. More general code
2314 //  will come after.
2315 //
reConstructDoWhileforBreak(ANodeHG * whileNode)2316 void CFGStructurizer::reConstructDoWhileforBreak(ANodeHG *whileNode)
2317 {
2318     G4_BB *exitbb = whileNode->getExitBB();
2319     ANList::iterator IB = whileNode->children.begin();
2320     ANList::iterator IE = whileNode->children.end();
2321     ANList::iterator II = IB;
2322     BB_LIST activeJoinBBs;
2323     while (II != IE)
2324     {
2325         ANode *nd = *II;
2326         while (!activeJoinBBs.empty())
2327         {
2328             G4_BB *bb = activeJoinBBs.front();
2329             if (isBefore(nd->getBeginBB(), bb))
2330             {
2331                 break;
2332             }
2333             activeJoinBBs.pop_front();
2334         }
2335 
2336         G4_BB *ebb = nd->getExitBB();
2337         if (ebb != exitbb)
2338         {
2339             if (ebb)
2340             {
2341                 bb_oinsert(activeJoinBBs, ebb);
2342             }
2343             ++II;
2344             continue;
2345         }
2346 
2347         if (nd == whileNode->children.back())
2348         {
2349             ++II;
2350             continue;
2351         }
2352 
2353         whileNode->setHasBreak(true);
2354         if (nd->getType() != AN_BB)
2355         {
2356             // don't handle it.
2357             whileNode->setAllowSCF(false);
2358             break;
2359         }
2360 
2361         ANodeBB *ndbb = (ANodeBB*)nd;
2362         if (II == IB || activeJoinBBs.empty())
2363         {
2364             // Break isn't inside any possible gotos (conservative)
2365             ndbb->setHasBreak(true);
2366             ++II;
2367             continue;
2368         }
2369 
2370         if (ndbb->preds.size() != 1)
2371         {
2372             whileNode->setAllowSCF(false);
2373             break;
2374         }
2375 
2376         ANode *exitNode = ndbb->succs.back();
2377         if (whileNode->isMember(exitNode))
2378         {
2379             exitNode = ndbb->succs.front();
2380         }
2381         ANode *pred = ndbb->preds.back();
2382         ANList::iterator IPred = II;
2383         --IPred;
2384         ANode *phyPred = *IPred;
2385         ANList::iterator IPredPred = IPred;
2386         ANode *phyPredPred = nullptr;
2387         if (IPredPred != IB)
2388         {
2389             --IPredPred;
2390             phyPredPred = *IPredPred;
2391         }
2392 
2393         ANList::iterator ISucc = II;
2394         ++ISucc;
2395         ANode *phySucc = (ISucc != IE) ? *ISucc : nullptr;
2396         ANode *predTarget = ANListGetOther(pred->succs, ndbb);
2397 
2398         // Handle two cases:
2399         //  Case 1 (if-then-endif with break)
2400         //    BB0:
2401         //       [p] goto BB2
2402         //    BB1:
2403         //       break;
2404         //    BB2:
2405         //
2406         // Case 2: (if-then-else-endif with break)
2407         //    BB0:
2408         //       [p] goto BB2
2409         //    BB1:
2410         //       goto BB3;
2411         //    BB2:
2412         //       break;
2413         //    BB3:
2414         //
2415 
2416         // case 1
2417         if (phyPred == pred && pred->getType() == AN_BB &&
2418             predTarget && predTarget == phySucc)
2419         {
2420             bool calNewIB = (IPred == IB);
2421 
2422             ANodeHG *newIf = new ANodeHG(AN_IF_THEN_ENDIF);
2423             newIf->anodeId = getNextANodeId();
2424 
2425             newIf->parent = whileNode;
2426             whileNode->children.erase(IPred);
2427             whileNode->children.erase(II);
2428             whileNode->children.insert(ISucc, newIf);
2429             reConnectAllPreds(pred, newIf);
2430             newIf->succs.push_back(predTarget);
2431             newIf->succs.push_back(exitNode);
2432             ANListReplace(exitNode->preds, ndbb, newIf);
2433             ANListReplace(predTarget->preds, pred, newIf);
2434 
2435             newIf->children.push_back(pred);
2436             newIf->children.push_back(ndbb);
2437             newIf->setBeginBB(pred->getBeginBB());
2438             newIf->setEndBB(ndbb->getEndBB());
2439             newIf->setExitBB(predTarget->getBeginBB());
2440 
2441             ndbb->setHasBreak(true);
2442             newIf->setHasBreak(true);
2443             ndbb->setAllowSCF(true);
2444             newIf->setAllowSCF(true);
2445 
2446             if (calNewIB)
2447             {
2448                 IB = whileNode->children.begin();
2449             }
2450             II = ISucc;
2451         }
2452         // Case 2
2453         else if (predTarget && phyPredPred && phyPredPred == pred &&
2454             phyPred == predTarget && pred->getType() == AN_BB &&
2455             phyPred->succs.size() == 1 && phyPred->succs.front() == phySucc)
2456         {
2457             bool calNewIB = (IPredPred == IB);
2458 
2459             ANodeHG *newIf = new ANodeHG(AN_IF_THEN_ELSE_ENDIF);
2460             newIf->anodeId = getNextANodeId();
2461 
2462             newIf->parent = whileNode;
2463             whileNode->children.erase(IPredPred);
2464             whileNode->children.erase(IPred);
2465             whileNode->children.erase(II);
2466             whileNode->children.insert(ISucc, newIf);
2467             reConnectAllPreds(pred, newIf);
2468             newIf->succs.push_back(phySucc);
2469             newIf->succs.push_back(exitNode);
2470             ANListReplace(exitNode->preds, ndbb, newIf);
2471             ANListReplace(predTarget->preds, pred, newIf);
2472 
2473             newIf->children.push_back(pred);
2474             newIf->children.push_back(phyPred);
2475             newIf->children.push_back(ndbb);
2476             newIf->setBeginBB(pred->getBeginBB());
2477             newIf->setEndBB(ndbb->getEndBB());
2478             newIf->setExitBB(phySucc->getBeginBB());
2479 
2480             ndbb->setHasBreak(true);
2481             newIf->setHasBreak(true);
2482             ndbb->setAllowSCF(true);
2483             newIf->setAllowSCF(true);
2484 
2485             if (calNewIB)
2486             {
2487                 IB = whileNode->children.begin();
2488             }
2489             II = ISucc;
2490         }
2491         else
2492         {
2493             whileNode->setAllowSCF(false);
2494             break;
2495         }
2496     }
2497 
2498     if (!whileNode->getAllowSCF())
2499     {
2500         // If not allowed, propagate this attribute to all
2501         // its children with break inside.
2502         ANList::iterator II = IB;
2503         for (II = IB; II != IE; ++II)
2504         {
2505             ANode *nd = *II;
2506             if (nd->getHasBreak())
2507             {
2508                 nd->setAllowSCF(false);
2509             }
2510         }
2511     }
2512 }
2513 
2514 //
2515 // PST : program structure tree.
2516 //         ANode -- PST's node. It's an abstract class.
2517 //           ANodeBB : ANode -- one for each BB (leaf node)
2518 //           ANodeHG : ANode -- Hammock graph (group of ANodeBB)
2519 //                              (non-leaf node)
2520 //   ANodeBB is for BB, while ANodeHG is a Hammock graph that is
2521 //   composed of seveal BBs (mostly more than one, it could be one).
2522 //   The Hammock graph defined in this implementation is the minimum
2523 //   hammock graph for any goto BB (BB that has goto inst). Nesting
2524 //   relation is represented with ANode's parent field and ANodeHG's
2525 //   children (ANodeBB cannot have children, it is a leaf node). Given
2526 //   two hammock graphs, they either have no common ANodes or one is
2527 //   completely enclosed within the other. The Whole program is a Hammock,
2528 //   and this node is the PST's root node.
2529 //
2530 //   In the implementation, we don't really create ANode for the PST's
2531 //   root. Instead, we construct only hammock children of the Root,
2532 //   denoted by topANodes. This topANodes are refered to as top-level
2533 //   nodes. (Note that for any BB that is b/w any goto BB and its target
2534 //   BB in the program order, an ANode is created for it; otherwise, it
2535 //   has no ANode.)
2536 //
2537 //   Note that the children of each ANodeHG is the ordered list of ANode
2538 //   (in BB order).
2539 //
2540 // ACFG : Abstract Control Flow Graph of ANode (ACFG):
2541 //   This is a CFG of ANode, used in the process of constructing PST.
2542 //   It can be viewed as the top-level PST on-the-fly (ignoring any nodes
2543 //   that are children of the top level nodes). By "on-the-fly", it means
2544 //   that the ACFG is constantly changing. Once a group of nodes in ACFG
2545 //   is formed into a single ANodeHG, this group is replaced by this new
2546 //   single ANodeHG, and the group of nodes is no longer in the ACFG. Such
2547 //   node condensing will make finding interesting program structures (if,
2548 //   while, etc.) easier. The process continues until all BBs are processed.
2549 //   All the nodes in the final ACFG is the actually top-level nodes of the PST.
2550 //   (In another word, ACFG will always refect the control flow among the
2551 //    outmost parent nodes of all ANodeBBs.)
2552 //
2553 //   ACFG's control-flow edges : represented by ANode's preds & succs.
2554 //
2555 //   Note that ACFG edges within any ANodeHG via preds & succs should be still
2556 //   valid, but outgoing/incoming edges are no longer valid. To get a right
2557 //   ANode target, using BB's preds/succs as they are always up to date.
2558 //
2559 // input
2560 //    [IB, IE) : denotes BB from IB to IE, containing IB, but not IE.
2561 // Results
2562 //    topANodes
2563 //
constructPST(BB_LIST_ITER IB,BB_LIST_ITER IE)2564 void CFGStructurizer::constructPST(BB_LIST_ITER IB, BB_LIST_ITER IE)
2565 {
2566     // This algorithm assumes that IE is loop-invariant. For the code
2567     // that might insert/delete BB, pay attention to this rule!
2568 
2569     // ANStack is used to keep all pending ANodes. It also reflects
2570     // possible nesting relation (ANStack[i] could be the parent of
2571     // ANStack[i+1]).
2572     ANodeHG *node = nullptr;
2573     BB_LIST_ITER II = IB;
2574     while (II != IE)
2575     {
2576         G4_BB *bb = *II;
2577         ANodeBB *ndbb = getANodeBB(bb);
2578 
2579         // Do the following cases in order. (Note that bb should be part of
2580         // the current pending ANode (top of ANStack), although it could
2581         // be the beginning node of the new inner ANode.)
2582         //
2583         // 1: If bb is the target of some gotos, that is, the end of
2584         //    some CGs, merge the current bb into ANStack nodes of
2585         //    which those CGs belong to (as they should be in the same
2586         //    hammock).
2587         //    Also, any backward goto's begin and end BB should be in
2588         //    the same hammock. Once merging is done (if any), go on to
2589         //    the next step in the loop.
2590         //
2591         // 2: If bb is the begin of CGs, need to create new HG for each
2592         //    cg that is associated with a backward goto, a forward
2593         //    conditional goto, and the root (first) forward unconditional
2594         //    goto.  No new HG is created for a non-root unconditional
2595         //    goto (see details in code). This is where an inner HG is formed.)
2596         //
2597         // 3: Add bb into this node as it is part of node. If bb can
2598         //    be merged with its pred, merge it. The new node's type
2599         //    must be type AN_SEQUENCE.
2600         //
2601         // 4: Check if bb is the end of the node.  If it is, finalize the
2602         //    node. If it is not, go on to process the next BB in the next
2603         //    iteration (This implies that if the next BB should be part
2604         //    of the current pending HG in ANStack).
2605         //
2606 
2607         // case 1.
2608         if (node)
2609         {
2610             ANStack_reconstruct(node, ndbb);
2611         }
2612 
2613         // case 2
2614         CGList cgs;
2615         if (getCGBegin(bb, cgs))
2616         {
2617             // If bb is the begin of more than one ANode on ANStack, ndbb
2618             // should be added only into innermost ANode. Keep the
2619             // following in mind:
2620             //   1. (a) bb's last inst is a goto.
2621             //          ndbb should be added to the current node (ANStack's
2622             //          top) if it exists and the goto is a unconditional goto;
2623             //          otherwise, a new hammock node is created and ndbb
2624             //          is added into this new hammock node (thus for both
2625             //          conditional goto and a top node).
2626             //      (b) bb's last inst is not a goto
2627             //          add ndbb in the current node.
2628             //   2. If there are several backward gotos with bb as the begin
2629             //      BB, ndbb is added only into the ANode corresponding to the
2630             //      innermost backward goto. (All backward gotos should have their
2631             //      new ANodes created.)
2632             ControlGraph *cg;
2633 
2634             // For backward cg, we will only create ANodeHG if the backward cg
2635             // is a loop candidate. If it is not candidate, extend the current
2636             // node with this cg.
2637             for (uint32_t i = (uint32_t) cgs.size() - 1; i > 0; --i)
2638             {
2639                 // This must be non-innermost backward goto, don't add ndbb to
2640                 // the new node, ie. passing nullptr(2nd arg) to ANodeHG's ctor.
2641                 cg = cgs.back();
2642                 MUST_BE_TRUE(cg->isBackward,
2643                     "Additional CG must be for backward gotos");
2644                 if (!cg->isLoopCandidate)
2645                 {
2646                     assert(node && "Node should not be empty here!");
2647                     extendNode(node, cg);
2648                 }
2649                 else
2650                 {
2651                     ANodeHG *parent = node;
2652                     node = new ANodeHG(cg, nullptr);
2653                     node->anodeId = getNextANodeId();
2654                     node->parent = parent;
2655                     node->isLoopCandidate = true;
2656                     if (parent)
2657                     {
2658                         parent->children.push_back(node);
2659                     }
2660                     ANStack_push(node);
2661                 }
2662                 cgs.pop_back();
2663                 delete cg;
2664             }
2665 
2666             // First one in cgs, it is either an innermost backward goto or a forward goto
2667             cg = cgs.back();
2668             cgs.pop_back();
2669             if (cg->isBackward || cg->gotoInst->getPredicate() != nullptr || node == nullptr)
2670             {
2671                 if (cg->isBackward && !cg->isLoopCandidate)
2672                 {
2673                     assert(node && "Node should not be empty at this point!");
2674                     extendNode(node, cg);
2675                 }
2676                 else
2677                 {
2678                     // New HG node created and pushed on ANStack
2679                     ANodeHG *parent = node;
2680                     node = new ANodeHG(cg, ndbb);
2681                     node->anodeId = getNextANodeId();
2682                     node->parent = parent;
2683                     if (cg->isBackward)
2684                     {
2685                         node->isLoopCandidate = true;
2686                     }
2687                     if (parent)
2688                     {
2689                         parent->children.push_back(node);
2690                     }
2691                     ANStack_push(node);
2692                 }
2693             }
2694             else
2695             {
2696                 // For an unconditional goto, just add it into the current ANodeHG.
2697                 G4_BB *exit = node->getExitBB();
2698                 G4_BB *end = node->getEndBB();
2699                 getNewRange(end, exit, ndbb->getEndBB(), ndbb->getExitBB());
2700                 node->setExitBB(exit);
2701                 node->setEndBB(end);
2702             }
2703             delete cg;
2704         }
2705         else if (!node)
2706         {
2707             // Not a goto BB, skip.
2708             ++II;
2709             continue;
2710         }
2711 
2712         // case 3
2713         // Add this new ANodeBB into node and also check if it can be
2714         // merged with the previous one to form a new SEQUENCE hammock.
2715         if (ndbb->parent != node)
2716         {
2717             node->children.push_back(ndbb);
2718             ndbb->parent = node;
2719         }
2720         (void)mergeWithPred(ndbb);
2721 
2722         // Step 4
2723         if (isBefore(bb, node->getEndBB()))
2724         {
2725             ++II;
2726             continue;
2727         }
2728 
2729         // At this point, bb == node->getEndBB().
2730         // Found a HG if
2731         //    1. bb is a unconditional backward goto, or
2732         //    2. bb's fall-thru is the same as node's exitBB
2733         G4_INST *gotoInst = getGotoInst(bb);
2734         bool uncondBackwardGoto =
2735             gotoInst && bb->Succs.size() == 1 && gotoInst->asCFInst()->isBackward();
2736         if (!(uncondBackwardGoto ||
2737               ndbb->succs.size() == 0 || node->getExitBB() == nullptr ||
2738               ndbb->succs.front()->getBeginBB() == node->getExitBB()))
2739         {
2740             G4_BB *next = bb->getPhysicalSucc();
2741             if (next)
2742             {
2743                 node->setEndBB(next);
2744             }
2745             ++II;
2746             continue;
2747         }
2748 
2749         // At this point, node is a MHG.
2750 
2751         // The top of ANStack is an MHG, poping and condensing it into a new
2752         // ANodeHG. After condensing, it might expose merging opportunity,
2753         // if so, merge the newly condensed node with its predecessor. Once
2754         // merging and/or condensing is performed, check if the new top of
2755         // ANStack is an MHG, if so, doing the above again. Continue this
2756         // until the top of ANStack is not a HG.
2757         //
2758         // At this stage, if a MHG's exit isn't its physical next,  add a
2759         // landingBB as its next physical BB.  Note that the landingBB is
2760         // the new exit BB, which is not part of this MHG. For now, we will
2761         // only generate a single BB at most after each original BB. In
2762         // another word, if more than one MHG share the same exit,  it will
2763         // share this same landingBB (creating more than one BB could make
2764         // jmpi/goto inefficient - two goto/jmpi instead of one.
2765         // (May need a cfg simplification after structurizer.)
2766         //
2767         bool isTopHG = true;
2768 
2769         // Keep track of the outmost SCF so that we can add a landingBB
2770         // only for the oustmost SCF if needed.
2771         ANodeHG *outmostSCF = nullptr;
2772         ANodeHG *lastHG = nullptr;
2773 
2774         G4_BB *exitBB = node->getExitBB();
2775         while (isTopHG)
2776         {
2777             // a Hammock has been detected. Set HG to the right type.
2778             finalizeHG(node);
2779 
2780             // record the outmost structured CF MHG
2781             if (node->isSCF())
2782             {
2783                 outmostSCF = node;
2784             }
2785 
2786             // check if it can be merged with the previous ANode. Note that
2787             // we only need to check its immediate predecessor !
2788             ANode *nd = mergeWithPred(node);
2789             lastHG = (ANodeHG*)nd;
2790 
2791             isTopHG = false;
2792             ANStack_pop();
2793             if (!ANStack.empty())
2794             {
2795                 node = (ANodeHG *)ANStack.back();
2796                 G4_BB *exit = node->getExitBB();
2797                 G4_BB *end = node->getEndBB();
2798                 getNewRange(end, exit, nd->getEndBB(), nd->getExitBB());
2799                 node->setEndBB(end);
2800                 node->setExitBB(exit);
2801 
2802                 if (isBefore(bb, end) || exit != exitBB)
2803                 {
2804                     // No more HG that ends with this exitBB
2805                     continue;
2806                 }
2807                 isTopHG = true;
2808             }
2809         }  // while (istopHG)
2810 
2811         if (outmostSCF)
2812         {
2813             // Add a landingBB and adjust control flows (both CFG and ACFG).
2814             // The new BB will be processed later (so no PST updating is
2815             // needed here).
2816 #if 0
2817             // TODO: for performance, we should only add landingBB for outmostSCF,
2818             // however, doing so would have to special-handling the landingBB as
2819             // it should merge with outmostSCF, which is not in ACFG anymore (not
2820             // a top node anymore). So far, the this function is able to handle
2821             // node in ACFG only.
2822             if (!outmostSCF->isExitPhySucc())
2823             {
2824                 (void)addLandingBB(outmostSCF, II, true);
2825             }
2826 #else
2827             if (!lastHG->isExitPhySucc() && lastHG->getExitBB())
2828             {
2829                 (void)addLandingBB(lastHG, II, true);
2830             }
2831 #endif
2832          }
2833 
2834         if (ANStack.empty() && node)
2835         {
2836             // This is a top ANode, add it.
2837             topANodes.push_back(node);
2838             node = nullptr;
2839         }
2840         ++II;
2841     }  // while (!ANStack.empty())
2842 
2843     MUST_BE_TRUE(ANStack.empty(), "ANodeHG stack is incorrectly formed");
2844 #ifdef _DEBUG
2845 #if  0
2846         dumpcfg();
2847         dumpacfg();
2848 #endif
2849 #endif
2850     return;
2851 }
2852 
2853 // return true if bb0 is before bb1 in the BB list.
isBefore(G4_BB * bb0,G4_BB * bb1)2854 bool CFGStructurizer::isBefore(G4_BB *bb0, G4_BB *bb1)
2855 {
2856     MUST_BE_TRUE(bb0 && bb1, "BB ptrs should not be null");
2857     if (bb0 == bb1)
2858     {
2859         return false;
2860     }
2861 
2862     // common case first
2863     uint32_t id0 = bb0->getId();
2864     uint32_t id1 = bb1->getId();
2865     MUST_BE_TRUE(id0 != id1, "Two different BBs must have different Ids");
2866 
2867     if (id0 < numOfBBs && id1 < numOfBBs)
2868     {
2869         return id0 < id1;
2870     }
2871 
2872     uint32_t insertAtId0 = id0 >= numOfBBs ? getInsertAfterBB(bb0)->getId() : id0;
2873     uint32_t insertAtId1 = id1 >= numOfBBs ? getInsertAfterBB(bb1)->getId() : id1;
2874     if (insertAtId0 != insertAtId1)
2875     {
2876         return insertAtId0 < insertAtId1;
2877     }
2878 
2879     // Now, the case for a BB and its new BB or two new BBs with the same
2880     // insertion position.
2881     //
2882     // Since we make sure the larger the id, the later the position during
2883     // inserting new BBs. We will simply check their ids
2884     return id0 < id1;
2885 }
2886 
2887 
2888 // Insert newNode into parent and insertion place is after node
2889 // if isAfter is true, or before node if isAfter is false.
PSTAddANode(ANodeHG * parent,ANode * node,ANode * newNode,bool isAfter)2890 void CFGStructurizer::PSTAddANode(
2891     ANodeHG *parent, ANode *node, ANode *newNode, bool isAfter)
2892 {
2893     newNode->parent = parent;
2894     if (parent)
2895     {
2896         ANList::iterator I1 = findANode(parent->children, node);
2897         MUST_BE_TRUE(I1 != parent->children.end(),
2898             "Child node should be in parent's children list");
2899         if (isAfter)
2900         {
2901             ++I1;
2902         }
2903         parent->children.insert(I1, newNode);
2904     }
2905 }
2906 
2907 // adjustBBId will make sure that all the new BB's with the same insertion
2908 // position should have ids in the increasing order.
2909 //
2910 // Before calling this function, newBB has to be inserted into
2911 // the BBs and its physicalSucc/Pred has been set up correctly.
adjustBBId(G4_BB * newbb)2912 void CFGStructurizer::adjustBBId(G4_BB *newbb)
2913 {
2914     // newBB's id is the largest at this moment. Make sure it is in order
2915     uint32_t id = newbb->getId();
2916     G4_BB *pred = newbb;
2917     G4_BB *next = newbb->getPhysicalSucc();
2918     while (next && next->getId() >= numOfBBs)
2919     {
2920         pred->setId(next->getId());
2921         pred = next;
2922         next = next->getPhysicalSucc();
2923     }
2924     pred->setId(id);
2925 }
2926 
createBBWithLabel()2927 G4_BB *CFGStructurizer::createBBWithLabel()
2928 {
2929     G4_BB *newBB = CFG->createNewBB();
2930 
2931     // Create a label for the new BB
2932     std::string str = std::string(CFG->builder->kernel.getName()) + "_cf_" + std::to_string(newBB->getId());
2933     G4_Label* lab = CFG->builder->createLabel(str, LABEL_BLOCK);
2934     G4_INST *labelInst = CFG->builder->createLabelInst(lab, false);
2935     newBB->push_front(labelInst);
2936     return newBB;
2937 }
2938 
2939 // Insert an inst at the begin after a label instruction.
insertAtBegin(G4_BB * aBB,G4_INST * anInst)2940 void CFGStructurizer::insertAtBegin(G4_BB* aBB, G4_INST *anInst)
2941 {
2942     INST_LIST_ITER I = aBB->begin();
2943     MUST_BE_TRUE((*I)->opcode() == G4_label,
2944                  "The 1st inst of BB must be a label inst");
2945     ++I;
2946     aBB->insertBefore(I, anInst);
2947 }
2948 
setJoinJIP(G4_BB * joinBB,G4_BB * jipBB)2949 void CFGStructurizer::setJoinJIP(G4_BB* joinBB, G4_BB* jipBB)
2950 {
2951     G4_INST *firstInst = joinBB->getFirstInst();
2952     MUST_BE_TRUE(firstInst, "Missing a join inst in join BB!");
2953     if (firstInst &&
2954         (firstInst->opcode() == G4_join || firstInst->opcode() == G4_endif))
2955     {
2956         G4_Label *jipLabel = jipBB ? jipBB->getLabel() : nullptr;
2957         firstInst->asCFInst()->setJip(jipLabel);
2958         return;
2959     }
2960     MUST_BE_TRUE(false, "Missing Join/endif instruction in join BB!");
2961 }
2962 
convertChildren(ANodeHG * nodehg,G4_BB * nextJoinBB)2963 void CFGStructurizer::convertChildren(ANodeHG *nodehg, G4_BB *nextJoinBB)
2964 {
2965     // JIP setting of Join instruction:
2966     //   activeJoinBBs : a list of join BBs in the increasing order of BB layout,
2967     //                   which are after the BB being processed currently.
2968     //
2969     //   The join BB is the BB with join instruction as its first instruction.
2970     //   If the current BB is the first join BB (if exists) in acticveJoinBBs,
2971     //   its JIP will be set to the 2nd join in activeJoinBBs. Once its JIP is
2972     //   set, this BB is removed from activeJoinBBs.
2973     //
2974     //   activeJoinBBs keeps a list of join BBs whose jips needs to be set.
2975     //   It works like this: for each child ANode, if its exit BB is a join BB,
2976     //   add it into activeJoinBBs (we check if a BB is a join BB by checking
2977     //   if the BB has join instruction in it). As conversion is done bottom-up,
2978     //   all children must have been converted before their parents. This means
2979     //   that a join should be inserted in a child's exit BB if that child does
2980     //   need join. But that join's JIP is not known until the next join is found.
2981     //   Adding unknown-JIP join into activeJoinBBs so that it will get set when
2982     //   next next join is reached.
2983     //
2984     BB_LIST activeJoinBBs;
2985     if (nextJoinBB)
2986     {
2987         // The incoming joinBB is a special one, it could
2988         // be a BB with its else/endif/while/join.
2989         activeJoinBBs.push_back(nextJoinBB);
2990     }
2991 
2992     // If nodehg is DO_WHILE, its end should have been processed in convertDoWhile()
2993     // before invoking this function, hence, skip the end BB.
2994     ANList::iterator II = nodehg->children.begin();
2995     ANList::iterator IE = nodehg->children.end();
2996     if (nodehg->getKind() == ANKIND_SCF && nodehg->getType() == AN_DO_WHILE)
2997     {
2998         if (nodehg->children.size() > 1)
2999         {
3000             --IE;
3001             ANode *loopWhileNode = *IE;
3002             loopWhileNode->setVisited(true);
3003         }
3004         else
3005         {
3006             II = IE;
3007         }
3008     }
3009 
3010     for (; II != IE; ++II)
3011     {
3012         ANode *nd = *II;
3013         G4_BB *begin = nd->getBeginBB();
3014 
3015         G4_BB *nextjoin = nullptr;
3016         while (!activeJoinBBs.empty())
3017         {
3018             nextjoin = activeJoinBBs.front();
3019             if (begin == nextjoin || isBefore(nextjoin, begin))
3020             {
3021                 // nextjoin has been reached. Set its JIP.
3022                 activeJoinBBs.pop_front();
3023                 G4_BB *tmp =
3024                     !activeJoinBBs.empty() ? activeJoinBBs.front() : nullptr;
3025                 setJoinJIP(nextjoin, tmp);
3026             }
3027             else
3028             {
3029                 break;
3030             }
3031         }
3032 
3033         // For loops, need to handle the backward goto and may insert join
3034         for (BB_LIST_ITER I1 = begin->Preds.begin(), E1 = begin->Preds.end();
3035             I1 != E1; ++I1)
3036         {
3037             // Don't process loopWhileNode if it is as it has been processed
3038             G4_BB *bb = *I1;
3039             G4_INST *gotoInst = getGotoInst(bb);
3040             ANodeBB *ndbb = getANodeBB(bb);
3041             ANode *nd1 = ndbb->getChildANode(nodehg);
3042             if (nd1 && nd != nd1 && gotoInst && !nd1->isVisited() &&
3043                 gotoInst->asCFInst()->isBackward() &&
3044                 bb->Succs.back() == begin)
3045             {
3046                 convertPST(nd1, nullptr);
3047                 nd1->setVisited(true);
3048                 G4_BB *loopJoinBB = bb->getPhysicalSucc();
3049                 if (loopJoinBB && isJoinBB(loopJoinBB))
3050                 {
3051                     // Add loop's join bb into activeJoinBBs
3052                     if (findBB(&activeJoinBBs, loopJoinBB) == activeJoinBBs.end())
3053                     {
3054                         bb_oinsert(activeJoinBBs, loopJoinBB);
3055                     }
3056                 }
3057             }
3058         }
3059 
3060         // Skip backward goto ANode, as it has been handled already
3061         if (nd->isVisited())
3062         {
3063             continue;
3064         }
3065 
3066         nextjoin = activeJoinBBs.empty() ? nullptr : activeJoinBBs.front();
3067 
3068         (void)convertPST(nd, nextjoin);
3069         nd->setVisited(true);
3070 
3071         G4_BB *joinbb = nd->getExitBB();
3072         // Special case for ANodeBB of a break.
3073         //   As its exit BB is still the original target (loop exit), there is
3074         //   no join instruction needed at the target as it is implicit loop
3075         //   join place. Here, just set joinbb to nullptr.
3076         //   (need to refactor the code as SCF (except SEQUENCE) should not
3077         //    have a join in its exit.)
3078         if (nd->getType() == AN_BB && nd->getKind() == ANKIND_SCF && nd->getHasBreak())
3079         {
3080             joinbb = nullptr;
3081         }
3082 
3083 #if 0
3084         if (nd->getType() == AN_BB && begin->size() > 0)
3085         {
3086             G4_INST *gotoInst = begin->back();
3087             MUST_BE_TRUE(false, "Backward goto should have ben processed");
3088             if (gotoInst->opcode() == G4_goto && gotoInst->isBackward())
3089             {
3090                 //
3091                 MUST_BE_TRUE(false, "Backward goto should have ben processed");
3092                 joinbb = begin->Succs.front();
3093             }
3094         }
3095 #endif
3096         if (joinbb && isJoinBB(joinbb))
3097         {
3098             // Add exit into activeJoinBBs
3099             if (findBB(&activeJoinBBs, joinbb) == activeJoinBBs.end())
3100             {
3101                 bb_oinsert(activeJoinBBs, joinbb);
3102             }
3103         }
3104     }
3105 
3106     uint32_t sz = (uint32_t)activeJoinBBs.size();
3107 #if 0
3108     // temporarily turn off assert as it is too conservative and miss
3109     // some cases, thus genrate false assertion!
3110 
3111     // activeJoinBBs should have the following at this moment:
3112     //    1. nextJoinBB
3113     //    2. the nodehg's exit(s).
3114     //       nodehg is a hammock, so it should have a single exit. However,
3115     //       for something like "if (c) break endif", and nodehg is somehow
3116     //       decided to be non-SCF (ie converted to goto), nodehg will have
3117     //       two exits.
3118     //  [todo: have a simple/better way to handle this]
3119     //  Note that the outer node of PST may recalculate JIP.
3120     uint32_t NEntries = nextJoinBB ? 2 : 1;
3121     if (nodehg->getHasBreak() && !nodehg->getAllowSCF())
3122     {
3123         ++NEntries;
3124     }
3125     assert(sz <= NEntries && "Something is wrong!");
3126 #endif
3127     if (sz > 0)
3128     {
3129         // lastJoin will never be nullptr.
3130         G4_BB *lastJoin = activeJoinBBs.front();
3131         activeJoinBBs.pop_front();
3132         for (--sz; sz > 0; --sz)
3133         {
3134             if (lastJoin == nextJoinBB)
3135             {
3136                 // don't need to process it further as the outer
3137                 // ANode will handle nextJoinBB and nodes after it.
3138                 break;
3139             }
3140             G4_BB *JipBB = activeJoinBBs.front();
3141             activeJoinBBs.pop_front();
3142             setJoinJIP(lastJoin, JipBB);
3143             lastJoin = JipBB;
3144         }
3145         // Handle the last one.
3146         if (lastJoin != nextJoinBB)
3147         {
3148             // set the last to nullptr
3149             setJoinJIP(lastJoin, nullptr);
3150         }
3151     }
3152     return;
3153 }
3154 
3155 // TODO: more comments
3156 // If then block and else block may have gotos, we will insert join at the end of
3157 // then or else blocks as the new join exit.
convertIf(ANodeHG * node,G4_BB * nextJoinBB)3158 void CFGStructurizer::convertIf(ANodeHG *node, G4_BB *nextJoinBB)
3159 {
3160 
3161 //#if defined(_DEBUG)
3162 //    useDebugEnv();
3163 //#endif
3164 
3165     // nextJoinLabel is used to set JIP for endif instruction
3166     G4_Label *nextJoinLabel = nextJoinBB ? nextJoinBB->getLabel() : nullptr;
3167     G4_BB *begin = node->getBeginBB();
3168     G4_INST *gotoInst = begin->back();
3169     G4_ExecSize execSize = gotoInst->getExecSize();
3170     execSize = execSize > 1 ? execSize : kernelExecSize;
3171     ANList::iterator II = node->children.begin();
3172     ANode *thenNode = *(++II);  // children[1]
3173     G4_BB *end = node->getEndBB();
3174     G4_BB *exit = node->getExitBB();
3175 
3176     MUST_BE_TRUE(end->getPhysicalSucc() == exit,
3177         "Landing BB should have been inserted during construction of hammock graph");
3178 
3179     bool isUniform = isGotoScalarJmp(gotoInst);
3180     ANodeKind kind = ANKIND_GOTOJOIN;
3181     if (doScalarJmp && isUniform)
3182     {
3183         kind = ANKIND_JMPI;
3184     }
3185     else if (doStructCF && node->getAllowSCF())
3186     {
3187         if (node->getHasBreak())
3188         {
3189             ANodeHG *innermostWhile = getInnerMostWhile(node);
3190             MUST_BE_TRUE(innermostWhile, "if-endif with break isn't inside a while");
3191             if (innermostWhile->getKind() == ANKIND_SCF)
3192             {
3193                 kind = ANKIND_SCF;
3194             }
3195         }
3196         else
3197         {
3198             kind = ANKIND_SCF;
3199         }
3200     }
3201     node->setKind(kind);
3202 
3203 
3204     // If this IF-ENDIF does not have break, there must be at least
3205     // one active channel enterning IF-ENDIF and at least one active
3206     // channel at the end of "IF-ENDIF". Thus endif's JIP will never
3207     // be used.
3208     if (!node->getHasBreak())
3209     {
3210         nextJoinLabel = nullptr;
3211     }
3212 
3213     if (node->getHasBreak() && kind == ANKIND_GOTOJOIN)
3214     {
3215         // if break isn't honored, if-endif should not be
3216         // honored and should be treated as non if/endif node.
3217         convertChildren(node, nextJoinBB);
3218         return;
3219     }
3220 
3221     ANodeBB *exitndbb = getANodeBB(exit);
3222     if (kind != ANKIND_JMPI && !isANodeOnlyPred(exit, node) &&
3223         (exitndbb->isJmpiTarget ||
3224          (kind == ANKIND_GOTOJOIN && !isJoinBB(exit)) ||
3225          (kind == ANKIND_SCF && !isBBLabelAvailable(exit))))
3226     {
3227         // To have a new BB for either endif or join
3228         exitndbb = addLandingBB(node, BBs->end(), false);
3229 
3230         // Add it into PST. Will need {[SEQUENCE] Node, exitndbb} as
3231         // a new ANode to replace node.
3232         PSTAddANode((ANodeHG*)node->parent, node, exitndbb, true);
3233 
3234         exit = exitndbb->getBeginBB();
3235     }
3236     G4_Label *endifLabel = exit->getLabel();
3237 
3238     switch (node->type)
3239     {
3240     case AN_IF_THEN_ENDIF:
3241     {
3242         if (kind == ANKIND_JMPI)
3243         {
3244             CFG->convertGotoToJmpi(gotoInst);
3245             exitndbb->isJmpiTarget = true;
3246             (void)convertPST(thenNode, nextJoinBB);
3247             return;
3248         }
3249         else if (kind == ANKIND_GOTOJOIN)
3250         {
3251             // Generate goto/join
3252             generateGotoJoin(begin, exit, exit);
3253             setJoinJIP(exit, nextJoinBB);
3254             (void)convertPST(thenNode, node->getHasBreak() ? exit : nullptr);
3255             return;
3256         }
3257 
3258         G4_INST* endifInst = CFG->builder->createInternalCFInst(
3259             NULL, G4_endif, execSize, nextJoinLabel, NULL, gotoInst->getOption());
3260         // Set CISA index link here explicitly since not doing so causes
3261         // endif to merge with immediately following line. This prevents
3262         // debugger from setting bp on src line immediately following
3263         // the endif.
3264         endifInst->inheritDIFrom(gotoInst);
3265         insertAtBegin(exit, endifInst);
3266 
3267         // jip = uip = endif
3268         G4_Predicate* pred = gotoInst->getPredicate();
3269         MUST_BE_TRUE(pred != NULL, "if must have non-null predicate");
3270         pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3271         G4_INST* ifInst = CFG->builder->createInternalCFInst(
3272             pred, G4_if, execSize, endifLabel, endifLabel, gotoInst->getOption());
3273         ifInst->inheritDIFrom(endifInst);
3274         begin->pop_back();
3275         begin->push_back(ifInst);
3276         if (isUniform)
3277         {
3278             ifInst->asCFInst()->setUniform(true);
3279         }
3280 
3281         if (thenNode->isHammock())
3282         {
3283             //  if:
3284             //       goto endif
3285             //       ...
3286             //       goto endif
3287             //  else
3288             //       ...
3289             //  endif:
3290             //
3291             //  changed to:
3292             //  if:
3293             //      goto joinbb
3294             //      ...
3295             //      goto joinbb
3296             // joinbb:   <--- insert join instruction here
3297             // else
3298             //      ...
3299             // endif:
3300             //
3301             ANodeBB *ndbb = addLandingBB((ANodeHG*)thenNode, BBs->end(), false);
3302 
3303             // For now, just insert it into node (should create SEQ)
3304             PSTAddANode(node, thenNode, ndbb, true);
3305         }
3306         (void)convertPST(thenNode, node->getHasBreak() ? exit  : nullptr);
3307 
3308         break;
3309     }
3310 
3311     case AN_IF_THEN_ELSE_ENDIF:
3312     {
3313         ANode *elseNode = *(++II); // children[2];
3314         G4_BB *thenLastBB = thenNode->getEndBB();
3315         G4_INST *thenLastInst = thenLastBB->back();
3316 
3317         // May have the case in which thenLastInst isn't goto
3318         //  if (uniformCond)
3319         //     return;
3320         //  else
3321         //     ...
3322         //  endif
3323         MUST_BE_TRUE(
3324             (thenLastInst->opcode() != G4_goto &&
3325              thenLastBB->Succs.size() == 0) ||
3326             (thenLastInst->opcode() == G4_goto &&
3327              thenLastInst->getPredicate() == nullptr),
3328             "Goto in then block should be unconditional");
3329 
3330         if (kind == ANKIND_JMPI)
3331         {
3332             // goto's target is elseBB, and no need to create a new label.
3333             CFG->convertGotoToJmpi(gotoInst);
3334             {
3335                 G4_BB *elsebeginbb = elseNode->getBeginBB();
3336                 ANodeBB *elsendbb = getANodeBB(elsebeginbb);
3337                 elsendbb->isJmpiTarget = true;
3338             }
3339             if (thenLastInst->opcode() == G4_goto)
3340             {
3341                 CFG->convertGotoToJmpi(thenLastInst);
3342                 exitndbb->isJmpiTarget = true;
3343             }
3344 
3345             (void)convertPST(thenNode, nextJoinBB);
3346             (void)convertPST(elseNode, nextJoinBB);
3347             break;
3348         }
3349         else if (kind == ANKIND_GOTOJOIN)
3350         {
3351             // Generate goto/join
3352             G4_BB *elseFirstBB = elseNode->getBeginBB();
3353             generateGotoJoin(begin, elseFirstBB, elseFirstBB);
3354 
3355             // convertPST(thenNode, elseFirstBB) will handle this
3356             //   generateGotoJoin(thenLastBB, elseFirstBB, exit);
3357             (void)convertPST(thenNode, elseFirstBB);
3358             (void)convertPST(elseNode, exit);
3359             setJoinJIP(elseFirstBB, exit);
3360             setJoinJIP(exit, nextJoinBB);
3361 
3362             return;
3363         }
3364 
3365         // Add endif instruction into exit BB, else into the beginning
3366         // BB of else part,  and replace goto in the begin BB with if
3367         // instruction.
3368         auto endifInst = CFG->builder->createInternalCFInst(
3369             NULL, G4_endif, execSize, nextJoinLabel, NULL, gotoInst->getOption());
3370         endifInst->inheritDIFrom(gotoInst);
3371         insertAtBegin(exit, endifInst);
3372 
3373         G4_BB *elseFirstBB = elseNode->getBeginBB();
3374         G4_Label* elseLabel = elseFirstBB->getLabel();
3375         G4_Predicate* pred = gotoInst->getPredicate();
3376         MUST_BE_TRUE(pred != NULL, "if must have non-null predicate");
3377         pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3378 
3379         // if instruction : jip = else_label, uip = endif
3380         G4_INST* ifInst = CFG->builder->createInternalCFInst(
3381             pred, G4_if, execSize, elseLabel, endifLabel, gotoInst->getOption());
3382         ifInst->inheritDIFrom(endifInst);
3383         begin->pop_back();
3384         begin->push_back(ifInst);
3385 
3386         // else instruction: jip = uip = endif
3387         G4_BB *newThenLastBB = thenLastBB;
3388         if (thenNode->isHammock())
3389         {
3390             ANodeBB *ndbb = addLandingBB((ANodeHG*)thenNode, BBs->end(), false);
3391 
3392             // Correct way to update PST is as {[SEQ]thenNode, newThenLastBB}
3393             // and replace thenNode with this new SEQ node.
3394             // For now, it does not matter much, just use the following
3395             PSTAddANode(node, thenNode, ndbb, true);
3396 
3397             newThenLastBB = ndbb->getBeginBB();
3398         }
3399         G4_INST *thenGoto = newThenLastBB->back();
3400         G4_INST* elseInst = CFG->builder->createInternalCFInst(
3401             NULL, G4_else, execSize, endifLabel, endifLabel, gotoInst->getOption());
3402         elseInst->inheritDIFrom(endifInst);
3403         if (thenGoto->opcode() == G4_goto)
3404         {
3405             newThenLastBB->pop_back();
3406         }
3407         newThenLastBB->push_back(elseInst);
3408         if (isUniform)
3409         {
3410             ifInst->asCFInst()->setUniform(true);
3411         }
3412 
3413         (void)convertPST(thenNode, node->getHasBreak() ? newThenLastBB : nullptr);
3414 
3415         if (elseNode->isHammock())
3416         {
3417             // Insert a join bb
3418             ANode *ndbb = addLandingBB((ANodeHG*)elseNode, BBs->end(), false);
3419             PSTAddANode(node, thenNode, ndbb, true);
3420         }
3421         // exit must be the physical next of this if-endif
3422         (void)convertPST(elseNode, node->getHasBreak() ? exit : nullptr);
3423         break;
3424     }
3425     default:
3426         MUST_BE_TRUE(false, "Unreachable, must be a wrong node type");
3427         break;
3428     }
3429     return;
3430 }
3431 
convertDoWhile(ANodeHG * node,G4_BB * nextJoinBB)3432 void CFGStructurizer::convertDoWhile(ANodeHG *node, G4_BB *nextJoinBB)
3433 {
3434 
3435 //#if defined(_DEBUG)
3436 //    useDebugEnv();
3437 //#endif
3438 
3439     G4_BB *end = node->getEndBB();
3440     G4_INST *gotoInst = end->back();
3441     G4_ExecSize execSize = gotoInst->getExecSize();
3442     execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3443     G4_BB *head = node->getBeginBB();
3444     G4_BB *exit = node->getExitBB();
3445 
3446     // For break/continue(no continue for now), unfortunately hammock does
3447     // not work. Need to specialize it. A generic approach is to
3448     // change all break/continue into a single ANode with its targets ignored
3449     // and just use its physical next as its next. Then re-do hammock only for
3450     // loop's children (no while BB). If every single break/continue are
3451     // within the structured no-loop CF, then it can be break/continue.
3452     // Note that mix of goto-join with break/continue may be not working, so
3453     // need to check it out to avoid generating them.
3454     //
3455 
3456     bool isUniform = isGotoScalarJmp(gotoInst);
3457     ANodeKind ndkind = ANKIND_GOTOJOIN;
3458     if (doScalarJmp && isUniform &&
3459         (!doStructCF || !node->getHasBreak()))
3460     {
3461         // If loop has break, favor structured CF, not jmpi
3462         ndkind = ANKIND_JMPI;
3463     }
3464     else if (doStructCF && node->getAllowSCF())
3465     {
3466         ndkind = ANKIND_SCF;
3467     }
3468     node->setKind(ndkind);
3469     if (ndkind == ANKIND_JMPI)
3470     {
3471         // goto's target is head.
3472         CFG->convertGotoToJmpi(gotoInst);
3473         convertChildren(node, nextJoinBB);
3474     }
3475     else if (ndkind == ANKIND_SCF)
3476     {
3477         G4_Label* doLabel = head->getLabel();
3478 
3479         // Create a new BB to keep while. This new BB
3480         // would be in the same PST ANode as the original end.
3481         ANode *ndbb = addSplitBBAtEnd(end);
3482         node->children.push_back(ndbb);
3483         ndbb->parent = node;
3484 
3485         end = ndbb->getBeginBB();
3486 
3487         // jip = uip = do
3488         G4_INST *whileInst = CFG->builder->createInternalCFInst(
3489             gotoInst->getPredicate(), G4_while, execSize, doLabel, doLabel, InstOpt_NoOpt);
3490         whileInst->inheritDIFrom(end->back());
3491         end->pop_back();
3492         end->push_back(whileInst);
3493         if (isUniform)
3494         {
3495             whileInst->asCFInst()->setUniform(true);
3496         }
3497 
3498         convertChildren(node, node->getHasBreak() ? end : nullptr);
3499     }
3500     else
3501     {
3502         if (!isJoinBB(exit) && !canbeJoinBB(exit))
3503         {
3504             ANode *ndbb = addLandingBB(node, BBs->end(), false);
3505 
3506             // Make it simply by adding newexitbb into node->parent.
3507             // (May use {[SEQ] node, newexitbb} to replace.)
3508             PSTAddANode((ANodeHG*)node->parent, node, ndbb, true);
3509 
3510             exit = ndbb->getBeginBB();
3511         }
3512         generateGotoJoin(end, exit, exit);
3513         setJoinJIP(exit, nextJoinBB);
3514         getANodeBB(end)->setVisited(true);
3515 
3516         convertChildren(node, exit);
3517     }
3518 }
3519 
generateGotoJoin(G4_BB * gotoBB,G4_BB * jibBB,G4_BB * joinBB)3520 void CFGStructurizer::generateGotoJoin(G4_BB *gotoBB, G4_BB *jibBB, G4_BB *joinBB)
3521 {
3522     G4_INST *gotoInst = gotoBB->back();
3523     MUST_BE_TRUE(gotoInst && gotoInst->opcode() == G4_goto,
3524                  "gotoBB should have goto instruction");
3525     G4_Label *nextJoinLabel = jibBB ? jibBB->getLabel() : nullptr;
3526     G4_ExecSize execSize = gotoInst->getExecSize();
3527     execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3528 
3529     if (gotoInst->getExecSize() == g4::SIMD1)
3530     {   // For simd1 goto, convert it to a goto with the right execSize.
3531         gotoInst->setExecSize(execSize);
3532         gotoInst->setOptions(InstOpt_M0);
3533     }
3534     gotoInst->asCFInst()->setJip(nextJoinLabel);
3535 
3536     // set join's JIP to null as it will be handled later
3537     CFG->insertJoinToBB(joinBB, execSize, nullptr);
3538 
3539     if (!gotoInst->asCFInst()->isBackward() && !CFG->builder->gotoJumpOnTrue())
3540     {
3541         // For BDW/SKL, need to flip goto's pred as false condition will jump.
3542         G4_Predicate *pred = gotoInst->getPredicate();
3543         if (pred)
3544         {
3545             pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3546         }
3547         else
3548         {
3549             // if there is no predicate, generate a predicate with all 0s.
3550             // if predicate is SIMD32, we have to use a :ud dst type for the move
3551             uint8_t numFlags = gotoInst->getExecSize() > g4::SIMD16 ? 2 : 1;
3552             G4_Declare* tmpFlagDcl = CFG->builder->createTempFlag(numFlags);
3553             G4_DstRegRegion* newPredDef = CFG->builder->createDst(tmpFlagDcl->getRegVar(), 0, 0, 1,
3554                 numFlags == 2 ? Type_UD : Type_UW);
3555             G4_INST *predInst = CFG->builder->createMov(
3556                 g4::SIMD1,
3557                 newPredDef,
3558                 CFG->builder->createImm(0, Type_UW),
3559                 InstOpt_WriteEnable, false);
3560             INST_LIST_ITER iter = gotoBB->end();
3561             iter--;
3562             gotoBB->insertBefore(iter, predInst);
3563 
3564             pred = CFG->builder->createPredicate(
3565                 PredState_Plus,
3566                 tmpFlagDcl->getRegVar(),
3567                 0);
3568             gotoInst->setPredicate(pred);
3569         }
3570     }
3571 }
3572 
convertGoto(ANodeBB * node,G4_BB * nextJoinBB)3573 void CFGStructurizer::convertGoto(ANodeBB *node, G4_BB *nextJoinBB)
3574 {
3575     G4_BB *beginbb = node->getEndBB();
3576     G4_INST *gotoInst = getGotoInst(beginbb);
3577     if (!gotoInst || node->isVisited())
3578     {
3579         return;
3580     }
3581     node->setVisited(true);
3582 
3583     bool isUniform = isGotoScalarJmp(gotoInst);
3584     G4_BB *exitbb = node->getExitBB();
3585     ANodeHG *innermostWhile = getInnerMostWhile(node);
3586     ANodeKind kind = ANKIND_GOTOJOIN;
3587     MUST_BE_TRUE(innermostWhile || (!innermostWhile && !node->getHasBreak()),
3588                  "Break isn't inside a while");
3589     if (innermostWhile && node->getHasBreak() &&
3590         innermostWhile->getKind() == ANKIND_SCF)
3591     {
3592         kind = ANKIND_SCF;
3593     }
3594     else
3595     {
3596         // nextJoinBB, if any, must be after getBeginBB(). So, if nextJoinBB
3597         // is before exitbb, nextJoinBB is in the middle of [beginbb, exitbb]
3598         bool hasJoinInMiddle = nextJoinBB && isBefore(nextJoinBB, exitbb);
3599         if (doScalarJmp && isUniform &&
3600             (gotoInst->asCFInst()->isBackward() || !hasJoinInMiddle))
3601         {
3602             kind = ANKIND_JMPI;
3603         }
3604     }
3605     node->setKind(kind);
3606 
3607     //
3608     // generate goto/join
3609     //
3610     G4_ExecSize execSize = gotoInst->getExecSize();
3611     execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3612 
3613     if (kind == ANKIND_JMPI)
3614     {
3615         CFG->convertGotoToJmpi(gotoInst);
3616         {
3617             ANodeBB *exitndbb = getANodeBB(exitbb);
3618             exitndbb->isJmpiTarget = true;
3619         }
3620 
3621         if (beginbb == exitbb || isBefore(exitbb, beginbb))
3622         {
3623             // TODO: Should find the 1st join BB out of this loop instead
3624             //       of using the next BB as join BB (see comments in
3625             //       convertChildren().
3626             G4_BB *joinbb = beginbb->getPhysicalSucc();
3627 
3628             // set join's JIP to null as it will be handled later
3629             CFG->insertJoinToBB(joinbb, execSize, nullptr);
3630         }
3631         return;
3632     }
3633 
3634     if (kind == ANKIND_SCF)
3635     {
3636         G4_BB *whilebb = innermostWhile->getEndBB();
3637         G4_BB *innerBlock = nextJoinBB ? nextJoinBB : whilebb;
3638 
3639         G4_INST* breakInst = CFG->builder->createInternalCFInst(
3640             gotoInst->getPredicate(), G4_break, execSize,
3641             innerBlock->getLabel(), whilebb->getLabel(), InstOpt_NoOpt);
3642         breakInst->inheritDIFrom(gotoInst);
3643         beginbb->pop_back();
3644         beginbb->push_back(breakInst);
3645         if (isUniform)
3646         {
3647             breakInst->asCFInst()->setUniform(true);
3648         }
3649         return;
3650     }
3651 
3652     G4_BB *joinbb = exitbb;
3653     G4_BB *JipBB = joinbb;
3654     if (beginbb == exitbb || isBefore(exitbb, beginbb))
3655     {
3656         joinbb = beginbb->getPhysicalSucc();
3657         JipBB = joinbb;
3658     }
3659     else{
3660         if (nextJoinBB && isBefore(nextJoinBB, JipBB))
3661         {
3662             JipBB = nextJoinBB;
3663         }
3664     }
3665     generateGotoJoin(beginbb, JipBB, joinbb);
3666 }
3667 
convertPST(ANode * node,G4_BB * nextJoinBB)3668 bool CFGStructurizer::convertPST(ANode *node, G4_BB *nextJoinBB)
3669 {
3670     bool change = false;
3671     ANodeType ty = node->getType();
3672     if (ty == AN_IF_THEN_ENDIF ||
3673         ty == AN_IF_THEN_ELSE_ENDIF ||
3674         ty == AN_DO_WHILE ||
3675         ty == AN_SEQUENCE ||
3676         ty == AN_COMPOSITE)
3677     {
3678         ANodeHG *nodehg = (ANodeHG*)node;
3679         if (node->type == AN_DO_WHILE)
3680         {
3681             // For a while loop, it must have at least one active
3682             // channel at exit. Thus, passing nullptr as nextJoinBB!
3683             convertDoWhile(nodehg, nullptr);
3684         }
3685         else if (node->type == AN_IF_THEN_ENDIF ||
3686                  node->type == AN_IF_THEN_ELSE_ENDIF)
3687         {
3688             convertIf(nodehg, nextJoinBB);
3689         }
3690         else
3691         {
3692             convertChildren(nodehg, nextJoinBB);
3693         }
3694         change = true;
3695     }
3696     else if (ty == AN_BB)
3697     {
3698         convertGoto((ANodeBB*)node, nextJoinBB);
3699         change = true;
3700     }
3701     else
3702     {
3703         MUST_BE_TRUE(false, "Wrong ANode type");
3704     }
3705 
3706     return change;
3707 }
3708 
3709 // Get the debug info from DIFromBB and use it as I's debug info
updateInstDI(G4_INST * I,G4_BB * DIFromBB)3710 void CFGStructurizer::updateInstDI(G4_INST* I, G4_BB* DIFromBB)
3711 {
3712     // sanity check
3713     if (DIFromBB == nullptr) return;
3714 
3715     // If DIFromBB is empty, skip setting.
3716     G4_INST* DIFromInst = nullptr;
3717     for (auto II = DIFromBB->begin(), IE = DIFromBB->end(); II != IE; ++II)
3718     {
3719         G4_INST* Inst = (*II);
3720         if (Inst->isLabel() || Inst->isLifeTimeEnd() || Inst->isPseudoKill() ||
3721             Inst->isPseudoUse() || Inst->isSpillIntrinsic() || Inst->isFillIntrinsic())
3722         {
3723             continue;
3724         }
3725         DIFromInst = Inst;
3726         break;
3727     }
3728     if (DIFromInst)
3729     {
3730         I->inheritDIFrom(DIFromInst);
3731     }
3732 }
3733 
run()3734 void CFGStructurizer::run()
3735 {
3736     constructPST(CFG->begin(), CFG->end());
3737 
3738     if (topANodes.size() == 0)
3739     {
3740         return;
3741     }
3742 
3743     // Now, convert ANodes
3744     if (topANodes.size() > 0)
3745     {
3746         for (uint32_t i = 0, size = topANodes.size(); i < size; ++i)
3747         {
3748             (void)convertPST(topANodes[i], nullptr);
3749         }
3750     }
3751 
3752     if (anodeBBs.newBBToANodeBB.size() > 0)
3753     {
3754         // New BBs generated, reassign block Ids.
3755         CFG->reassignBlockIDs();
3756     }
3757 }
3758 
doCFGStructurize(FlowGraph * FG)3759 void doCFGStructurize(FlowGraph *FG)
3760 {
3761     CFGStructurizer S(FG);
3762 
3763     S.run();
3764 }
3765