1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "FlowGraph.h"
10 #include "BuildIR.h"
11
12 using namespace vISA;
13 // This is the debugging code.
14
15 // Type for ANode (Abstract Node)
16 typedef enum
17 {
18 AN_UNUSED,
19
20 // corresponding to each BB, not necessarily a hammock
21 AN_BB,
22
23 // The following are hammock nodes
24 AN_HAMMOCK, // temporarily used
25 AN_IF_THEN_ENDIF,
26 AN_IF_THEN_ELSE_ENDIF,
27 AN_DO_WHILE,
28 AN_COMPOSITE,
29 AN_SEQUENCE
30 } ANodeType;
31
32 // Kind of ANode: indicate the final conversion choice of this ANode.
33 typedef enum {
34 ANKIND_GOTOJOIN, // goto/join
35 ANKIND_JMPI, // scalar jump (uniform branch)
36 ANKIND_SCF // structured CF
37 } ANodeKind;
38
39 namespace {
40
41 enum ANodeBBType
42 {
43 ANODEBB_NORMAL,
44 ANODEBB_CALL,
45 ANODEBB_RETURN
46 };
47
48 class ControlGraph
49 {
50 public:
51 // forward goto : goto in beginBB, endBB is target of goto
52 // backward goto: goto in endBB, beginBB is target of goto
53 G4_BB *beginBB;
54 G4_BB *endBB;
55 G4_INST *gotoInst;
56
57 bool isBackward;
58 bool isLoopCandidate; // Used if isBackward is true
59
ControlGraph(G4_BB * begin,G4_BB * end,G4_INST * inst,bool backward)60 ControlGraph(G4_BB *begin, G4_BB *end, G4_INST* inst, bool backward) :
61 beginBB(begin), endBB(end), gotoInst(inst), isBackward(backward),
62 isLoopCandidate(false)
63 {}
64 };
65
66 // forward declaration
67 class CFGStructurizer;
68 class ANode;
69 class ANodeBB;
70
71 typedef std::vector<G4_BB*> BBVector;
72 typedef std::vector<ANode*> ANVector;
73 typedef std::list<ANode*> ANList;
74 typedef std::vector<ControlGraph*> CGVector;
75 typedef std::list<ControlGraph*> CGList;
76 typedef std::map<G4_BB*, G4_BB*> BBToBBMap;
77 typedef std::map<G4_BB*, ANodeBB*> BBToANodeBBMap;
78 typedef std::map<ANode*, ANode*> ANodeMap;
79
80 // ANode base, abstract class
81 class ANode
82 {
83 public:
84 uint32_t anodeId;
85 ANodeType type;
86 ANList preds;
87 ANList succs;
88 ANode *parent;
89 ANodeKind kind;
90
91
92 // Indicate if this node is in ACFG. ACFG changes as BB is
93 // processed, and once a group of nodes are condensed and
94 // represented by a new, single node, the new node will be
95 // in ACFG and all condensed nodes are no longer in ACFG.
96 bool inACFG;
97
98 // True if structured CF is allowed for this node (for example,
99 // do_while ANode has break inst that we cannot handle for now.
100 // Then, set this field to false so we has to use goto/join.)
101 bool allowSCF;
102
103 // True if it has break. Note that break isn't represented by
104 // a separate ANode type. So, if AN_BB has this field set to
105 // true, its goto is a break. For other ANode, it means it
106 // contains some AN_BB that has break.
107 bool hasBreak;
108
109 bool visited;
110
111 virtual G4_BB* getBeginBB() const = 0;
112 virtual G4_BB* getEndBB() const = 0;
113 virtual G4_BB* getExitBB() const = 0;
114 virtual void setBeginBB(G4_BB *bb) = 0;
115 virtual void setEndBB(G4_BB *bb) = 0;
116 virtual void setExitBB(G4_BB *bb) = 0;
117
ANode()118 explicit ANode() :
119 type(AN_UNUSED), preds(), succs(), parent(nullptr),
120 kind(ANKIND_GOTOJOIN), inACFG(true), allowSCF(true),
121 hasBreak(false), visited(false) {}
ANode(ANodeType Ty)122 explicit ANode(ANodeType Ty) :
123 type(Ty), preds(), succs(), parent(nullptr),
124 kind(ANKIND_GOTOJOIN), inACFG(true), allowSCF(true),
125 hasBreak(false), visited(false) {}
~ANode()126 virtual ~ANode() {}
127
getType() const128 ANodeType getType() const { return type; }
setType(ANodeType t)129 void setType(ANodeType t) { type = t; }
getKind() const130 ANodeKind getKind() const { return kind; }
setKind(ANodeKind t)131 void setKind(ANodeKind t) { kind = t; }
132
isInACFG() const133 bool isInACFG() const { return inACFG; }
setInACFG(bool v)134 void setInACFG(bool v) { inACFG = v; }
135
136 // Return true if this node's type is a SCF type.
137 bool isSCF() const;
138 bool isHammock() const;
139
140 ANode* getCurrentANode() const;
141 ANode* getChildANode(ANode *parent);
142 bool isMember(ANode *nd) const;
143 bool isExitPhySucc() const;
getHasBreak() const144 bool getHasBreak() const { return hasBreak; }
setHasBreak(bool v)145 void setHasBreak(bool v) { hasBreak = v; }
getAllowSCF() const146 bool getAllowSCF() const { return allowSCF; }
setAllowSCF(bool v)147 void setAllowSCF(bool v) { allowSCF = v; }
isVisited() const148 bool isVisited() const { return visited; }
setVisited(bool v)149 void setVisited(bool v) {visited = v; }
150
151 private:
152 // copy-ctor and assignment operator are not allowed
153 ANode(const ANode&);
154 ANode& operator= (const ANode&);
155 };
156
157 // ANodeBB : ANode for a single BB
158 class ANodeBB : public ANode
159 {
160 public:
161 G4_BB *bb;
162 ANodeBBType type;
163 // Used during convertPST() to tell if a BB's label has been used as jmpi's target. As we
164 // convert PST from the outer to inner. If this is true, endif/join etc (to this label) must
165 // need a new BB (new label).
166 // For example:
167 // goto label0 jmpi label0
168 // ... ...
169 // (p) goto label0 ---> if (goto)
170 // ... ...
171 // label0: label_new:
172 // endif (join)
173 // label0:
174 //
175 bool isJmpiTarget;
176
ANodeBB()177 explicit ANodeBB() : ANode(AN_BB), bb(nullptr), type(ANODEBB_NORMAL), isJmpiTarget(false) {}
ANodeBB(G4_BB * b)178 explicit ANodeBB(G4_BB *b) :
179 ANode(AN_BB), bb(b), type(ANODEBB_NORMAL) {}
~ANodeBB()180 virtual ~ANodeBB() {}
181
setANodeBBType(ANodeBBType v)182 void setANodeBBType(ANodeBBType v) { type = v; }
183
184 // ExitBB is the target BB for BB with goto inst or
185 // next BB otherwise. If no successor, return nullptr.
getExitBB() const186 virtual G4_BB* getExitBB() const
187 {
188 if (type == ANODEBB_CALL)
189 {
190 return bb->getPhysicalSucc();
191 }
192 return (type == ANODEBB_RETURN || bb->Succs.size() == 0)
193 ? nullptr : bb->Succs.back();
194 }
getBeginBB() const195 virtual G4_BB* getBeginBB() const { return bb; }
getEndBB() const196 virtual G4_BB* getEndBB() const { return bb; }
197
setExitBB(G4_BB * b)198 virtual void setExitBB(G4_BB *b) { MUST_BE_TRUE(false, "setExitBB() not allowed for ANodeBB"); }
setBeginBB(G4_BB * b)199 virtual void setBeginBB(G4_BB *b) { MUST_BE_TRUE(false, "setBeginBB() not allowed for ANodeBB"); }
setEndBB(G4_BB * b)200 virtual void setEndBB(G4_BB *b) { MUST_BE_TRUE(false, "setEndBB() not allowed for ANodeBB"); }
201 };
202
203 // Hammock Graph
204 class ANodeHG : public ANode
205 {
206 public:
207 ANList children; // in order, 1st one is entryNode.
208
209 // This node has BBs from beginBB to endBB, inclusive; and
210 // every outgoing edges will go to exitBB. exitBB is not
211 // part of this node.
212 G4_BB* beginBB;
213 G4_BB* endBB;
214 G4_BB* exitBB; // not part of this ANode
215 ANode* exitNode; // not used yet
216
217 /* Temporary variables used during ANodeHG constructions */
218 bool isLoopCandidate;
219
220 // Index to ANStack (vector) when this node is on ANStack.
221 // -1 means it is not on ANStack.
222 int32_t ANStackIx;
223
ANodeHG(ANodeType t)224 explicit ANodeHG(ANodeType t) :
225 ANode(t), beginBB(nullptr), endBB(nullptr), exitBB(nullptr),
226 exitNode(nullptr), isLoopCandidate(false), ANStackIx(-1)
227 { }
228 explicit ANodeHG(ControlGraph *cg, ANodeBB *nodebb);
~ANodeHG()229 virtual ~ANodeHG() {}
230
getEntryNode() const231 ANode* getEntryNode() const { return children.size() > 0 ? children.front() : nullptr; }
getEndNode() const232 ANode* getEndNode() const { return children.size() > 0 ? children.back() : nullptr; }
233 ANode* getExitNode(ANodeBB *exitANBB);
setExitNode(ANode * nd)234 void setExitNode(ANode *nd) { exitNode = nd; }
235
getBeginBB() const236 virtual G4_BB* getBeginBB() const { return beginBB; }
getEndBB() const237 virtual G4_BB* getEndBB() const { return endBB; }
getExitBB() const238 virtual G4_BB* getExitBB() const { return exitBB; }
setBeginBB(G4_BB * b)239 virtual void setBeginBB(G4_BB* b) { beginBB = b; }
setEndBB(G4_BB * b)240 virtual void setEndBB(G4_BB *b) { endBB = b; }
setExitBB(G4_BB * b)241 virtual void setExitBB(G4_BB* b) { exitBB = b; }
242 };
243
244 // CFG Structurizer
245 //
246 // [TODO] algo description
247 //
248 // For a CFG that has gotos, convert it to structurized control flow
249 // so that we can use structured control-flow instructions, jmpi,
250 // and goto/join to replace them.
251 //
252 // This algorithm uses the existing order of BBs and will not reorder
253 // them.
254 class CFGStructurizer
255 {
256 private:
257 FlowGraph *CFG;
258 uint32_t numOfBBs;
259
260 // Used to assign unique id to each ANode
261 uint32_t numOfANodes;
262
263 // For new BB, map its ID to its insertion BB,
264 // ie, the original BB after which it is inserted.
265 BBToBBMap newBBToInsertAfterBB;
266
267 // Given a BB, find its ANodeBB.
268 // Access them via setANodeBB()/getANodeBB().
269 struct {
270 ANodeBB *IDToANodeBB;
271 BBToANodeBBMap newBBToANodeBB;
272 } anodeBBs;
273
274 // caching the flags
275 bool doScalarJmp;
276 bool doStructCF;
277
278 // Assume that the entire CFG is the single ANode: Root ANode for
279 // short (We don't actually build it, but imagine we have one). A root
280 // ANodes are the ANodes whose parent is this Root ANode and who
281 // are also hammock. In another word, any BB with goto should belong
282 // to one of topANodes(only one of them, not more than 1).
283 ANVector topANodes;
284
285 // Used during constructing hammock ANodes.
286 ANVector ANStack;
287
288 // BBs is the list of BBs that shows the layout order. Originally,
289 // it is the same as the input CFG->BBs. As the algo goes, new BBs
290 // can be inserted in the list.
291 BB_LIST* BBs; // ptr to CFG->BBs
292 G4_ExecSize kernelExecSize; // default execSize
293
294 void init();
295 void fini();
296 void deletePST(ANodeHG* node);
297 void preProcess();
298
299 // return true if bb is the start of ControlGraph.
300 bool getCGBegin(G4_BB *bb, CGList &cgs);
301 void insertAtBegin(G4_BB* aBB, G4_INST *anInst);
302
303 // Insert 'cg' in the ordered list 'cgs'
304 void cg_oinsert(CGList &cgs, ControlGraph *cg);
305 void bb_oinsert(BB_LIST &bblist, G4_BB *bb);
306 bool isGotoScalarJmp(G4_INST *gotoInst);
307 bool isJoinBB(G4_BB* bb);
308 bool canbeJoinBB(G4_BB *bb);
309 bool isBBLabelAvailable(G4_BB *bb);
310 ANode *ANListGetOther(ANList &anlist, ANode *node);
311 void reConnectAllPreds(ANode *from, ANode *to);
312 void reConnectAllSuccs(ANode *from, ANode *to);
313
314 // Use ANStack_push/ANStack_pop to push/pop ANStack
ANStack_push(ANodeHG * AN)315 void ANStack_push(ANodeHG* AN)
316 {
317 AN->ANStackIx = (uint32_t) ANStack.size();
318 ANStack.push_back(AN);
319 }
320
ANStack_pop()321 ANodeHG *ANStack_pop()
322 {
323 ANodeHG *AN = (ANodeHG *)ANStack.back();
324 ANStack.pop_back();
325 AN->ANStackIx = (uint32_t)(-1);
326 return AN;
327 }
328
getNextANodeId()329 uint32_t getNextANodeId() { return numOfANodes++; }
330 void constructPST(BB_LIST_ITER IB, BB_LIST_ITER IE);
331 void reConstructDoWhileforBreak(ANodeHG *whileNode);
332 void ANStack_reconstruct(ANodeHG *& node, ANodeBB *ndbb);
333 ANodeHG *finalizeHG(ANodeHG *node);
334 void condenseNode(ANodeHG *node);
335
336 // Return true if bb0 appears before bb1, false otherwise.
337 // (If bb0 is the same as bb1, return false.)
338 bool isBefore(G4_BB *bb0, G4_BB *bb1);
339
340 ANodeBB *getANodeBB(G4_BB *b);
341 void setANodeBB(ANodeBB *a, G4_BB *b);
342 G4_BB *createBBWithLabel();
343 G4_BB *getInsertAfterBB(G4_BB *bb);
344 void setInsertAfterBB(G4_BB *newbb, G4_BB *insertAfter);
345 G4_BB* findInsertAfterBB(G4_BB *bb);
346 void adjustBBId(G4_BB *newbb);
347
348 void getNewRange(G4_BB*& end, G4_BB*& exit, G4_BB* end1, G4_BB* exit1);
349 void extendNode(ANode *Node, ControlGraph *CG);
350 ANodeHG* getEnclosingANodeHG(ANode *AN);
351 ANode* mergeWithPred(ANode *AN);
352 ANodeBB* addLandingBB(
353 ANodeHG *node, BB_LIST_ITER insertAfterIter, bool updateACFG);
354 ANode *addSplitBBAtBegin(G4_BB *splitBB);
355 ANode* addSplitBBAtEnd(G4_BB *splitBB);
356 bool isANodeOnlyPred(G4_BB *abb, ANode *node);
357 void PSTAddANode(
358 ANodeHG *parent, ANode *node, ANode *newNode, bool isAfter);
359
360 bool convertPST(ANode *node, G4_BB *nextJoinBB);
361 void setJoinJIP(G4_BB* joinBB, G4_BB* jipBB);
362 void convertChildren(ANodeHG *nodehg, G4_BB *nextJoinBB);
363 void convertIf(ANodeHG *node, G4_BB *nextJoinBB);
364 void convertDoWhile(ANodeHG *node, G4_BB *nextJoinBB);
365 void convertGoto(ANodeBB *node, G4_BB *nextJoinBB);
366 void generateGotoJoin(G4_BB *gotoBB, G4_BB *jibBB, G4_BB *targetBB);
367
368 // helper
369 // return goto inst if any; null otherwise
370 G4_INST *getGotoInst(G4_BB *bb);
371 G4_INST *getJmpiInst(G4_BB *bb);
372 void updateInstDI(G4_INST* I, G4_BB* DIFromBB);
373
374 private:
375 // Don't implement
376 CFGStructurizer(const CFGStructurizer&);
377 CFGStructurizer& operator=(const CFGStructurizer&);
378
379 public:
380
381 static void ANListReplace(ANList &anlist, ANode *from, ANode *to);
382 static void BBListReplace(BB_LIST &bblist, G4_BB *from, G4_BB *to);
383 static BB_LIST_ITER findBB(BB_LIST *bblist, G4_BB *bb);
384 static ANList::iterator findANode(ANList &anlist, ANode *nd);
385 static void ANListErase(ANList &anlist, ANode *toBeErased);
386 static void BBListErase(BB_LIST &bblist, G4_BB *toBeErased);
387
CFGStructurizer(FlowGraph * cfg)388 CFGStructurizer(FlowGraph *cfg) : CFG(cfg)
389 {
390 init();
391 }
392
~CFGStructurizer()393 ~CFGStructurizer()
394 {
395 fini();
396 }
397
398 void run();
399
400 #if 0
401 // Forward definition
402 class DebugSelector;
403 DebugSelector *dbgSel;
404 #endif
405
406 #ifdef _DEBUG
407 void doIndent(uint32_t numOfSpaces); // Generate indent
408 void dump(); // Dump the program(kernel/shader)
409 void dumpcfg(); // Dump the CFG only without instructions
410 void dump(G4_BB *bb); // Dump the given bb
411 void dump(uint32_t BBId); // Dump BB with the given BBId
412 void dump(char *labelString); // Dump BB with the given labelString
413 void dump(ANode *node, uint32_t indentSpaces); // Dump node recursively
414 void dumpacfg(); // Dump ACFG.
415 #endif
416 };
417 } // namespace
418
419 #ifdef _DEBUG
420
421 // 0: default, dumps only CF related instructions (CF instr, label)
422 // 1: All instructions
423 static int dump_level = 0;
424 static const char* currFileName = nullptr;
425 static std::ofstream dump_ofs;
426 static std::ostream* dumpOut = &std::cout; // default
427
428 // use this func or debugger to set dump_level
setDumpLevel(int l)429 void setDumpLevel(int l) { dump_level = l;}
430
431 // Direct output to a given file or std::cout
resetoutput()432 void resetoutput() { dumpOut = & std::cout; } // restore default
setoutputfile(const char * filename)433 void setoutputfile(const char* filename)
434 {
435 if (dump_ofs.is_open()) dump_ofs.close();
436
437 currFileName = filename;
438 dump_ofs.open(filename,
439 std::ofstream::out | std::ofstream::ate | std::ofstream::trunc);
440 dump_ofs.flush();
441 dumpOut = &dump_ofs;
442 }
forceFlush()443 void forceFlush()
444 {
445 if (dump_ofs.is_open())
446 {
447 dump_ofs.close();
448 dump_ofs.open(currFileName,
449 std::ofstream::out | std::ofstream::app);
450 }
451 }
452
453 //
454 // CFG dump routines
455 //
dumpbbbase(G4_BB * bb)456 void dumpbbbase(G4_BB *bb)
457 {
458 if (dump_level == 0)
459 {
460 for (INST_LIST_ITER it = bb->begin(), ie = bb->end();
461 it != ie; ++it)
462 {
463 G4_INST* inst = *it;
464 if (!inst->isLabel() && !inst->isFlowControl())
465 {
466 continue;
467 }
468 bb->emitInstruction(*dumpOut, it);
469 }
470 }
471 else
472 {
473 bb->emit(*dumpOut);
474 }
475 dumpOut->flush();
476 }
477
dumpbb(G4_BB * bb)478 void dumpbb(G4_BB *bb)
479 {
480 (*dumpOut) << "\n[BB" << bb->getId() << "] ";
481 dumpbbbase(bb);
482 }
483
dumpbbid(BB_LIST * bblist,uint32_t BBId)484 void dumpbbid(BB_LIST *bblist, uint32_t BBId)
485 {
486 for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
487 I != E; ++I)
488 {
489 G4_BB *bb = *I;
490 if (bb->getId() == BBId)
491 {
492 dumpbb(bb);
493 break;
494 }
495 }
496 }
497
dumpbblabel(BB_LIST * bblist,char * labelString)498 void dumpbblabel(BB_LIST *bblist, char* labelString)
499 {
500 for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
501 I != E; ++I)
502 {
503 G4_BB *bb = *I;
504 G4_Label *labelInst = bb->getLabel();
505 if (labelInst && strcmp(labelInst->getLabel(), labelString) == 0)
506 {
507 dumpbb(bb);
508 break;
509 }
510 }
511 }
512
dumpallbbs(BB_LIST * bblist)513 void dumpallbbs(BB_LIST *bblist)
514 {
515 (*dumpOut) << "Program Dump\n";
516
517 for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
518 I != E; ++I)
519 {
520 G4_BB *bb = *I;
521 dumpbb(bb);
522 }
523 dumpOut->flush();
524 }
525
dumpbblist(BB_LIST * bblist)526 void dumpbblist(BB_LIST* bblist)
527 {
528 (*dumpOut) << "\nCFG dump\n\n";
529
530 for (BB_LIST_ITER I = bblist->begin(), E = bblist->end();
531 I != E; ++I)
532 {
533 G4_BB *bb = *I;
534 G4_Label *labelInst = bb->getLabel();
535 (*dumpOut) << " BB(" << bb->getId() << ")";
536 if (labelInst)
537 {
538 (*dumpOut) << " " << labelInst->getLabel() << ",";
539 }
540 (*dumpOut) << " Preds:";
541 for (BB_LIST_ITER I = bb->Preds.begin(), E = bb->Preds.end();
542 I != E; ++I)
543 {
544 G4_BB *pred = *I;
545 (*dumpOut) << " " << pred->getId();
546 }
547 (*dumpOut) << " Succs:";
548 for (BB_LIST_ITER I = bb->Succs.begin(), E = bb->Succs.end();
549 I != E; ++I)
550 {
551 G4_BB *succ = *I;
552 (*dumpOut) << " " << succ->getId();
553 }
554 (*dumpOut) << "\n";
555 }
556 dumpOut->flush();
557 }
558
559
560 // CFGStructurizer::dump(), etc cannot be invoked in MSVC's immediate window.
561 // Use file-scope functions with cfgs prefix as work-around.
cfgsdump(CFGStructurizer * p)562 void cfgsdump(CFGStructurizer *p)
563 {
564 p->dump();
565 }
566
cfgsdumpcfg(CFGStructurizer * p)567 void cfgsdumpcfg(CFGStructurizer *p)
568 {
569 p->dumpcfg();
570 }
571
cfgsdump(CFGStructurizer * p,uint32_t BBId)572 void cfgsdump(CFGStructurizer *p, uint32_t BBId)
573 {
574 p->dump(BBId);
575 }
576
cfgsdump(CFGStructurizer * p,G4_BB * bb)577 void cfgsdump(CFGStructurizer *p, G4_BB *bb)
578 {
579 p->dump(bb);
580 }
581
cfgsdump(CFGStructurizer * p,char * labelName)582 void cfgsdump(CFGStructurizer *p, char *labelName)
583 {
584 p->dump(labelName);
585 }
586
cfgsdump(CFGStructurizer * p,ANode * nd)587 void cfgsdump(CFGStructurizer *p, ANode *nd)
588 {
589 p->dump(nd, 0);
590 }
591
cfgsdumpacfg(CFGStructurizer * p)592 void cfgsdumpacfg(CFGStructurizer *p)
593 {
594 p->dumpacfg();
595 }
596
597 // Debugging functions
getANodeTypeString(ANodeType t)598 const char *getANodeTypeString(ANodeType t)
599 {
600 switch (t)
601 {
602 case AN_BB: return "BB";
603 case AN_HAMMOCK: return "HAMMOCK";
604 case AN_IF_THEN_ENDIF: return "IF_THEN_ENDIF";
605 case AN_IF_THEN_ELSE_ENDIF: return "IF_THEN_ELSE_ENDIF";
606 case AN_DO_WHILE: return "DO_WHILE";
607 case AN_COMPOSITE: return "COMPOSITE";
608 case AN_SEQUENCE: return "SEQUENCE";
609 default:
610 return " ";
611 }
612 }
613
getANodeKindString(ANodeKind t)614 const char *getANodeKindString(ANodeKind t)
615 {
616 switch (t)
617 {
618 case ANKIND_GOTOJOIN: return "gotojoin";
619 case ANKIND_JMPI: return "jmpi";
620 case ANKIND_SCF: return "structuredCF";
621 }
622 return " ";
623 }
624
doIndent(uint32_t numOfSpaces)625 void CFGStructurizer::doIndent(uint32_t numOfSpaces)
626 {
627 for (uint32_t ix = 0; ix < numOfSpaces; ++ix)
628 {
629 (*dumpOut) << " ";
630 }
631 }
632
dump(ANode * node,uint32_t level)633 void CFGStructurizer::dump(ANode *node, uint32_t level)
634 {
635 // Just show the single digit nesting level
636 uint32_t showLevel = level % 10;
637 uint32_t indentSpaces = level * 4;
638
639 ANodeType Ty = node->getType();
640 doIndent(indentSpaces);
641 (*dumpOut) << " "
642 << showLevel
643 << "-node("
644 << node->anodeId << ") ["
645 << getANodeTypeString(Ty)
646 << "] begin=BB" << node->getBeginBB()->getId()
647 << " end=BB" << node->getEndBB()->getId();
648 G4_BB *exitbb = node->getExitBB();
649 if (exitbb)
650 {
651 (*dumpOut) << " exit=BB" << exitbb->getId();
652 }
653 else
654 {
655 (*dumpOut) << " exit=NULL";
656 }
657 (*dumpOut) << "\n";
658 doIndent(indentSpaces);
659 (*dumpOut) << " Attr:";
660 if (node->getType() == AN_BB && getGotoInst(node->getEndBB()))
661 {
662 (*dumpOut) << " " << getANodeKindString(node->getKind());
663 }
664 if (node->getHasBreak())
665 {
666 (*dumpOut) << " hasBreak";
667 }
668 if (node->isSCF())
669 {
670 (*dumpOut) << (node->getAllowSCF() ? " allowSCF" : " notAllowSCF");
671 }
672 (*dumpOut) << "\n";
673
674 doIndent(indentSpaces);
675 (*dumpOut) << " preds:";
676 for (ANList::iterator I = node->preds.begin(), E = node->preds.end();
677 I != E; ++I)
678 {
679 ANode *nd = *I;
680 (*dumpOut) << " " << nd->anodeId;
681 }
682 (*dumpOut) << " succs:";
683 for (ANList::iterator I = node->succs.begin(), E = node->succs.end();
684 I != E; ++I)
685 {
686 ANode *nd = *I;
687 (*dumpOut) << " " << nd->anodeId;
688 }
689 (*dumpOut) << "\n";
690 doIndent(indentSpaces);
691 if (node->parent)
692 {
693 (*dumpOut) << " parent: " << node->parent->anodeId << "\n";
694 }
695 else
696 {
697 (*dumpOut) << " parent:\n";
698 }
699 if (node->isHammock())
700 {
701 ANodeHG *ndhg = (ANodeHG*)node;
702 doIndent(indentSpaces);
703 (*dumpOut) << " children:\n";
704 for (ANList::iterator I = ndhg->children.begin(), E = ndhg->children.end();
705 I != E; ++I)
706 {
707 ANode *nd = *I;
708 dump(nd, level + 1);
709 }
710 }
711 dumpOut->flush();
712 }
713
dump(G4_BB * bb)714 void CFGStructurizer::dump(G4_BB *bb)
715 {
716 dumpbbbase(bb);
717 }
718
dump(uint32_t BBId)719 void CFGStructurizer::dump(uint32_t BBId)
720 {
721 dumpbbid(BBs, BBId);
722 }
723
dump(char * labelString)724 void CFGStructurizer::dump(char* labelString)
725 {
726 dumpbblabel(BBs, labelString);
727 }
728
dump()729 void CFGStructurizer::dump()
730 {
731 (*dumpOut) << "Program Dump\n\n";
732 dumpallbbs(BBs);
733 }
734
dumpcfg()735 void CFGStructurizer::dumpcfg()
736 {
737 dumpbblist(BBs);
738 }
739
dumpacfg()740 void CFGStructurizer::dumpacfg()
741 {
742 (*dumpOut) << "\nACFG dump\n\n";
743
744 BB_LIST_ITER I = BBs->begin();
745 BB_LIST_ITER E = BBs->end();
746 while (I != E)
747 {
748 BB_LIST_ITER nextI = I;
749 ++nextI;
750 G4_BB *bb = *I;
751 ANodeBB *ndbb = getANodeBB(bb);
752 if (ndbb)
753 {
754 ANode *nd = ndbb->getCurrentANode();
755 dump(nd, 0);
756 G4_BB *endbb = nd->getEndBB();
757 G4_BB *phyNext = endbb->getPhysicalSucc();
758 nextI = findBB(BBs, phyNext);
759 }
760 I = nextI;
761 }
762 dumpOut->flush();
763 }
764
765 #endif
766
767
isSCF() const768 inline bool ANode::isSCF() const
769 {
770 return (type == AN_IF_THEN_ENDIF ||
771 type == AN_IF_THEN_ELSE_ENDIF ||
772 type == AN_DO_WHILE);
773 }
774
isHammock() const775 inline bool ANode::isHammock() const
776 {
777 return (type == AN_IF_THEN_ENDIF ||
778 type == AN_IF_THEN_ELSE_ENDIF ||
779 type == AN_DO_WHILE ||
780 type == AN_COMPOSITE ||
781 type == AN_SEQUENCE);
782 }
783
getCurrentANode() const784 inline ANode* ANode::getCurrentANode() const
785 {
786 const ANode *nd = this;
787 while (nd && !nd->isInACFG())
788 {
789 nd = nd->parent;
790 }
791 MUST_BE_TRUE(nd && nd->isInACFG(),
792 "ACFG flag must be set incorrectly");
793 return const_cast<ANode*>(nd);
794 }
795
isMember(ANode * nd) const796 inline bool ANode::isMember(ANode *nd) const
797 {
798 while (nd && nd != this)
799 {
800 nd = nd->parent;
801 }
802 return nd == this;
803 }
804
isExitPhySucc() const805 inline bool ANode::isExitPhySucc() const
806 {
807 const G4_BB *endbb = getEndBB();
808 const G4_BB *exitbb = getExitBB();
809 return exitbb && endbb && endbb->getPhysicalSucc() == exitbb;
810 }
811
ANodeHG(ControlGraph * cg,ANodeBB * nodebb)812 ANodeHG::ANodeHG(ControlGraph *cg, ANodeBB* nodebb)
813 : ANode(AN_HAMMOCK), children()
814 {
815 if (nodebb)
816 {
817 children.push_back(nodebb);
818 nodebb->parent = this;
819 }
820 exitNode = nullptr;
821 isLoopCandidate = false;
822 ANStackIx = -1;
823
824 // AN_HAMMOCK is temporary. Its succs/preds won't be availabe until
825 // it is converted to a final hammock node, ie, finalized. So, it
826 // is actually not in ACFG yet.
827
828 bool isCondGoto = (cg->gotoInst->getPredicate() != NULL);
829 setBeginBB(cg->beginBB);
830
831 if (cg->isBackward)
832 {
833 // set up loop
834 setEndBB(cg->endBB);
835 G4_BB* succ0 = cg->endBB->Succs.front();
836 G4_BB *bb = isCondGoto ? succ0 : nullptr;
837 setExitBB(bb);
838 }
839 else
840 {
841 setEndBB(cg->beginBB);
842 setExitBB(cg->endBB);
843 }
844 }
845
getExitNode(ANodeBB * exitANBB)846 ANode* ANodeHG::getExitNode(ANodeBB *exitANBB)
847 {
848 #if 0
849 if (exitNode)
850 {
851 return exitNode;
852 }
853 #endif
854
855 ANode *nd = exitANBB;
856 if (nd)
857 {
858 nd = nd->getCurrentANode();
859 }
860 return nd;
861 }
862
getInnerMostWhile(ANode * node)863 ANodeHG *getInnerMostWhile(ANode *node)
864 {
865 ANode *tmp = node;
866 while (tmp && tmp->getType() != AN_DO_WHILE)
867 {
868 tmp = tmp->parent;
869 }
870 return (ANodeHG *)tmp;
871 }
872
873 // If bb is inside this ANode, return the top node (direct child
874 // node), otherwise, return nullptr.
getChildANode(ANode * parent)875 ANode *ANode::getChildANode(ANode *parent)
876 {
877 ANode *nd = this;
878 while (nd && nd->parent != parent)
879 {
880 nd = nd->parent;
881 }
882 return nd;
883 }
884
885 // Preprocess CFG so that the structurizer can be simpler without considering
886 /// those corner cases. Currently, it does:
887 // 1) avoid sharing target label among forward gotos and backward gotos
888 // For example,
889 // goto L
890 // L:
891 // goto L
892 //
893 // changed to
894 // goto L0
895 // L0: (new empty BB)
896 // L1:
897 // goto L1
898 // 2) avoid a fall-thru BB of a backward goto BB is the target BB of another
899 // backward goto (two back-to-back loops). For example,
900 // L0:
901 // (p0) goto L0
902 // L1 :
903 // (p1) goto L1
904 // changed to
905 // L0:
906 // (p0) goto L0
907 // L : (new empty BB)
908 // L1 :
909 // (p1) goto L1
910 // 3) make sure the last BB's pred is its physical predecessor.
911 // In another word, if there is a case:
912 // B0 : goto L1
913 // ...
914 // L0:
915 // ...
916 // B1: goto L0
917 // L1:
918 // ret
919 // changed to
920 // B0 : goto L
921 // ...
922 // L0:
923 // ...
924 // B1: goto L0
925 // L: (new empty BB)
926 // L1:
927 // ret
928 // This case is to guarantee that the last HG has its valid, non-null exit BB
929 // except Basic block ANode. (Without this, the last HG isn't handled completely
930 // with the current algo.)
preProcess()931 void CFGStructurizer::preProcess()
932 {
933 bool CFGChanged = false;
934 for (BB_LIST_ITER BI = CFG->begin(), BE = CFG->end();
935 BI != BE; ++BI)
936 {
937 G4_BB* B = *BI;
938 if (B->getBBType() & (G4_BB_RETURN_TYPE | G4_BB_INIT_TYPE))
939 {
940 // If B is RETURN_BB, don't insert as all cases should not happen,
941 // so is INIT BB.
942 continue;
943 }
944
945 bool insertEmptyBBBefore = false;
946 // Both entry's end (with or without subroutine and subroutine's end.
947 G4_BB* phySucc = B->getPhysicalSucc();
948 bool isLastBB = ((std::next(BI) == BE)
949 || (B->Succs.empty() && phySucc && (phySucc->getBBType() & G4_BB_INIT_TYPE))
950 || (B->getBBType() & G4_BB_EXIT_TYPE));
951 if (isLastBB)
952 {
953 for (auto IT = B->Preds.begin(), IE = B->Preds.end(); IT != IE; IT++)
954 {
955 G4_BB* P = *IT;
956 if (P == B->getPhysicalPred())
957 {
958 // If pred is B's physicalPred, skip. Even if pred actually
959 // gotos/jmpi to B, the goto/jmpi shall be removed later.
960 continue;
961 }
962 G4_INST* gotoInstP = getGotoInst(P);
963 G4_INST* jmpiInstP = getJmpiInst(P);
964 if (!gotoInstP && !jmpiInstP)
965 {
966 continue;
967 }
968 // case 3
969 // P must have a forward-goto/jmpi to B (B is the last BB).
970 insertEmptyBBBefore = true;
971 break;
972 }
973 }
974
975 if (!insertEmptyBBBefore && B->Preds.size() >= 2)
976 {
977 // case 1 & 2
978 // Check if this B is a successor of both backward and forward branches.
979 bool isForwardTarget = false;
980 bool isBackwardTarget = false;
981 bool isFallThruOfBackwardGotoBB = false;
982 for (auto IT = B->Preds.begin(), IE = B->Preds.end(); IT != IE; IT++)
983 {
984 G4_BB* P = *IT;
985 G4_INST* gotoInstP = getGotoInst(P);
986 if (!gotoInstP)
987 {
988 continue;
989 }
990 if (P->getId() >= B->getId())
991 {
992 isBackwardTarget = true;
993 continue;
994 }
995 // P is a BB before B
996 if (P->getPhysicalSucc() != B)
997 {
998 isForwardTarget = true;
999 }
1000 else if (P->getPhysicalSucc() == B
1001 && gotoInstP->asCFInst()->isBackward())
1002 {
1003 isFallThruOfBackwardGotoBB = true;
1004 }
1005 }
1006
1007 if ( (isBackwardTarget && isForwardTarget) // case 1
1008 || (isBackwardTarget && isFallThruOfBackwardGotoBB)) // case 2
1009 {
1010 insertEmptyBBBefore = true;
1011 }
1012 }
1013
1014 if (!insertEmptyBBBefore)
1015 {
1016 continue;
1017 }
1018
1019 // Now, create an empty BB right before "B", and adjust all forward
1020 // branching to this new BB and leave all backward branching unchanged.
1021 G4_BB *newBB = createBBWithLabel();
1022 G4_Label *newLabel = newBB->getLabel();
1023
1024 // Adjust BB's pred/succs
1025 BB_LIST_ITER NextIT;
1026 BB_LIST_ITER IT = B->Preds.begin();
1027 BB_LIST_ITER IE = B->Preds.end();
1028 for (NextIT = IT; IT != IE; IT = NextIT)
1029 {
1030 ++NextIT;
1031 G4_BB* P = *IT;
1032 if (P->getId() >= B->getId())
1033 {
1034 // keep the backward branch unchanged.
1035 continue;
1036 }
1037 G4_INST* gotoInst = getGotoInst(P);
1038 G4_INST* jmpiInst = getJmpiInst(P);
1039 if ( !gotoInst && !jmpiInst // not jump to B
1040 && P->getPhysicalSucc() != B) // not fall-thru to B
1041 {
1042 // Possible mixed goto/if-endif. Skip non-goto/non-jmpi edges.
1043 continue;
1044 }
1045
1046 // forward branching/fall-thru "P->B" is changed to "P->newBB"
1047 BBListReplace(P->Succs, B, newBB);
1048 newBB->Preds.push_back(P);
1049
1050 // Change this branching
1051 if (gotoInst && gotoInst->asCFInst()->getUip() == B->getLabel())
1052 {
1053 gotoInst->asCFInst()->setUip(newLabel);
1054 }
1055 else if (jmpiInst && jmpiInst->getSrc(0) == B->getLabel())
1056 {
1057 jmpiInst->setSrc(newLabel, 0);
1058 }
1059
1060 // the edge from B's Preds
1061 B->Preds.erase(IT);
1062 }
1063 newBB->Succs.push_back(B);
1064 B->Preds.push_front(newBB);
1065
1066 // insert it into BBs
1067 CFG->insert(BI, newBB);
1068 G4_BB *phyPred = B->getPhysicalPred();
1069 assert(phyPred && "B should have physical pred!");
1070 phyPred->setPhysicalSucc(newBB);
1071 newBB->setPhysicalPred(phyPred);
1072 newBB->setPhysicalSucc(B);
1073 B->setPhysicalPred(newBB);
1074
1075 CFGChanged = true;
1076 }
1077
1078 if (CFGChanged)
1079 {
1080 // New BBs generated, reassign block Ids.
1081 CFG->reassignBlockIDs();
1082 }
1083 }
1084
init()1085 void CFGStructurizer::init()
1086 {
1087 #if _DEBUG
1088 // Default dump level.
1089 // To change dump level while debugging, using setDumpLevel().
1090 dump_level = 0;
1091 #endif
1092
1093 // Assume G4_BB's id is set from 0 to numOfBBs - 1. This id
1094 // is used to map G4_BB to ANodeBB.
1095 // (Note that removeUnreachableBlocks() right before invoking this
1096 // pass will reassign block IDs.)
1097
1098 // First, preprocess CFG so that the structurizer does not need to handle corner cases.
1099 // Currently, it does:
1100 // 1. no forward gotos and backward gotos share the same label (but allow that
1101 // all forward gotos can share the same label, so can all backward gotos);
1102 // 2. No fall-thru BB of a backward goto BB is also the target BB of another
1103 // backward goto.
1104 preProcess();
1105
1106 BBs = &(CFG->getBBList());
1107 numOfBBs = CFG->getNumBB();
1108 numOfANodes = 0;
1109 kernelExecSize = CFG->getKernel()->getSimdSize();
1110
1111 // caching the flags
1112 doScalarJmp = !CFG->builder->noScalarJmp();
1113 doStructCF = CFG->builder->getOption(vISA_StructurizerCF);
1114
1115 if (numOfBBs == 0)
1116 {
1117 anodeBBs.IDToANodeBB = nullptr;
1118 return;
1119 }
1120
1121 anodeBBs.IDToANodeBB = new ANodeBB [numOfBBs];
1122
1123 // CFG at this moment has subroutine calls explicitly linked (between call and
1124 // callee and callee's return to call's succ). For structurizing purpose, those
1125 // edges should not be considered. Here ANodeBB will have those edges ignored.
1126 for (G4_BB *bb : *CFG)
1127 {
1128 uint32_t id = bb->getId();
1129 ANodeBB *node = &(anodeBBs.IDToANodeBB[id]);
1130 node->anodeId = getNextANodeId();
1131 node->bb = bb;
1132 bool isCallBB = (bb->isEndWithCall() || bb->isEndWithFCall());
1133 bool isReturnBB = (bb->getLastOpcode() == G4_return || bb->isEndWithFRet());
1134 if (isCallBB)
1135 {
1136 G4_BB *returnBB = bb->getPhysicalSucc();
1137 assert(returnBB && "Call BB must have a return BB.");
1138 ANode *succNode = &(anodeBBs.IDToANodeBB[returnBB->getId()]);
1139 node->succs.push_back(succNode);
1140 succNode->preds.push_back(node);
1141
1142 node->setANodeBBType(ANODEBB_CALL);
1143 }
1144 else if (isReturnBB)
1145 {
1146 node->setANodeBBType(ANODEBB_RETURN);
1147 }
1148 else
1149 {
1150 for (G4_BB *succ : bb->Succs)
1151 {
1152 ANode *succNode = &(anodeBBs.IDToANodeBB[succ->getId()]);
1153 node->succs.push_back(succNode);
1154 succNode->preds.push_back(node);
1155 }
1156 }
1157 }
1158 }
1159
getANodeBB(G4_BB * bb)1160 ANodeBB* CFGStructurizer::getANodeBB(G4_BB *bb)
1161 {
1162 uint32_t id = bb->getId();
1163 if (id < numOfBBs)
1164 {
1165 return &(anodeBBs.IDToANodeBB[id]);
1166 }
1167
1168 BBToANodeBBMap::iterator I = anodeBBs.newBBToANodeBB.find(bb);
1169 MUST_BE_TRUE(I != anodeBBs.newBBToANodeBB.end(),
1170 "Corresponding ANodeBB isn't set up yet");
1171 ANodeBB *ndbb = I->second;
1172 return ndbb;
1173 }
1174
setANodeBB(ANodeBB * ndbb,G4_BB * bb)1175 void CFGStructurizer::setANodeBB(ANodeBB *ndbb, G4_BB *bb)
1176 {
1177 uint32_t id = bb->getId();
1178 if (id < numOfBBs)
1179 {
1180 MUST_BE_TRUE(false, "ANodeBB has been set up already");
1181 return;
1182 }
1183 MUST_BE_TRUE(anodeBBs.newBBToANodeBB.find(bb) == anodeBBs.newBBToANodeBB.end(),
1184 "ANodeBB has been in map already");
1185 anodeBBs.newBBToANodeBB[bb] = ndbb;
1186 }
1187
getInsertAfterBB(G4_BB * bb)1188 G4_BB *CFGStructurizer::getInsertAfterBB(G4_BB *bb)
1189 {
1190 MUST_BE_TRUE(newBBToInsertAfterBB.find(bb) != newBBToInsertAfterBB.end(),
1191 "The BB isn't a new BB or something else is wrong");
1192 MUST_BE_TRUE(bb->getId() >= numOfBBs, "The BB isn't a new BB");
1193 return newBBToInsertAfterBB[bb];
1194 }
1195
setInsertAfterBB(G4_BB * newbb,G4_BB * insertAfter)1196 void CFGStructurizer::setInsertAfterBB(G4_BB *newbb, G4_BB *insertAfter)
1197 {
1198 MUST_BE_TRUE(newbb->getId() >= numOfBBs, "The BB isn't a new BB");
1199
1200 if (insertAfter->getId() < numOfBBs)
1201 {
1202 newBBToInsertAfterBB[newbb] = insertAfter;
1203 }
1204 else
1205 {
1206 newBBToInsertAfterBB[newbb] = getInsertAfterBB(insertAfter);
1207 }
1208 }
1209
deletePST(ANodeHG * nodehg)1210 void CFGStructurizer::deletePST(ANodeHG *nodehg)
1211 {
1212 for (ANList::iterator I = nodehg->children.begin(),
1213 E = nodehg->children.end();
1214 I != E; ++I)
1215 {
1216 ANode *tmp = *I;
1217 if (tmp->getType() != AN_BB)
1218 {
1219 deletePST((ANodeHG*)tmp);
1220 }
1221 }
1222 nodehg->children.clear();
1223 delete nodehg;
1224 }
1225
1226 // Return goto inst if bb has one, null otherwise.
getGotoInst(G4_BB * bb)1227 G4_INST* CFGStructurizer::getGotoInst(G4_BB *bb)
1228 {
1229 G4_INST *inst = bb->size() > 0 ? bb->back() : nullptr;
1230 return (inst && inst->opcode() == G4_goto) ? inst : nullptr;
1231 }
1232
1233 // Return either jmpi inst if bb has one, null otherwise.
getJmpiInst(G4_BB * bb)1234 G4_INST* CFGStructurizer::getJmpiInst(G4_BB *bb)
1235 {
1236 G4_INST *inst = bb->size() > 0 ? bb->back() : nullptr;
1237 return (inst && inst->opcode() == G4_jmpi) ? inst : nullptr;
1238 }
1239
fini()1240 void CFGStructurizer::fini()
1241 {
1242 for (uint32_t i = 0, size = topANodes.size(); i< size; ++i)
1243 {
1244 deletePST((ANodeHG*)topANodes[i]);
1245 }
1246 topANodes.clear();
1247
1248 if (numOfBBs > 0)
1249 {
1250 delete[] anodeBBs.IDToANodeBB;
1251 anodeBBs.IDToANodeBB = nullptr;
1252
1253 for (BBToANodeBBMap::iterator I = anodeBBs.newBBToANodeBB.begin(),
1254 E = anodeBBs.newBBToANodeBB.end();
1255 I != E; ++I)
1256 {
1257 ANodeBB *tmp = I->second;
1258 delete tmp;
1259 }
1260 anodeBBs.newBBToANodeBB.clear();
1261
1262 newBBToInsertAfterBB.clear();
1263 }
1264 }
1265
ANListGetOther(ANList & anlist,ANode * node)1266 ANode *CFGStructurizer::ANListGetOther(ANList &anlist, ANode *node)
1267 {
1268 if (anlist.size() == 0)
1269 {
1270 return nullptr;
1271 }
1272 ANode *nd = anlist.back();
1273 if (nd == node)
1274 {
1275 nd = anlist.front();
1276 }
1277 return (nd == node) ? nullptr : nd;
1278 }
1279
1280 // Insert cg into the ordered CGList in the increasing order
1281 // of cg's endBB.
cg_oinsert(CGList & cgs,ControlGraph * cg)1282 void CFGStructurizer::cg_oinsert(CGList &cgs, ControlGraph *cg)
1283 {
1284 bool hasInserted = false;
1285 CGList::iterator I = cgs.begin();
1286 CGList::iterator E = cgs.end();
1287 G4_BB *end = cg->endBB;
1288 while (I != E)
1289 {
1290 CGList::iterator Iter = I;
1291 ++I;
1292 ControlGraph *tmp = *Iter;
1293 if (isBefore(tmp->endBB, end))
1294 {
1295 continue;
1296 }
1297 cgs.insert(Iter, cg);
1298 hasInserted = true;
1299 break;
1300 }
1301 if (!hasInserted)
1302 {
1303 cgs.push_back(cg);
1304 }
1305 }
1306
1307 // Insert BB into the ordered BB_List in the increasing order of BB
bb_oinsert(BB_LIST & bblist,G4_BB * bb)1308 void CFGStructurizer::bb_oinsert(BB_LIST &bblist, G4_BB *bb)
1309 {
1310 bool hasInserted = false;
1311 BB_LIST_ITER I = bblist.begin();
1312 BB_LIST_ITER E = bblist.end();
1313 while (I != E)
1314 {
1315 BB_LIST_ITER Iter = I;
1316 ++I;
1317 G4_BB *tmp = *Iter;
1318 if (isBefore(tmp, bb))
1319 {
1320 continue;
1321 }
1322 bblist.insert(Iter, bb);
1323 hasInserted = true;
1324 break;
1325 }
1326 if (!hasInserted)
1327 {
1328 bblist.push_back(bb);
1329 }
1330 }
1331
1332 // Created all CGs whose beginBB is bb. If any, return true and
1333 // all CGs are returned as cgs. If not, return false.
1334 //
1335 // cgs is in order so that the last one will be outmost block
1336 // and the first one is the innermost block. If bb has goto
1337 // inst, the CG for this goto will be the first one in cgs.
getCGBegin(G4_BB * bb,CGList & cgs)1338 bool CFGStructurizer::getCGBegin(G4_BB *bb, CGList &cgs)
1339 {
1340 if (bb->size() == 0)
1341 {
1342 return false;
1343 }
1344
1345 // If bb's goto is backward to a BB prior to bb, this bb cannot
1346 // be a loop candidate. For example,
1347 //
1348 // B0:
1349 // ......
1350 // bb: ..
1351 // (p) goto B0
1352 // ...
1353 // B1:
1354 // (p) goto bb
1355 // In this case, CG for bb is marked with non loop candidate.
1356 //
1357 G4_INST *bbgotoInst = getGotoInst(bb);
1358 bool isLoopCandidate = true;
1359 if (bbgotoInst && bbgotoInst->asCFInst()->isBackward() &&
1360 bb->Succs.size() > 0 && bb->Succs.back() != bb)
1361 {
1362 isLoopCandidate = false;
1363 }
1364
1365 ControlGraph *cg;
1366 bool isBegin = false;
1367 // Check if bb is a loop head
1368 for (BB_LIST_ITER I = bb->Preds.begin(), E = bb->Preds.end(); I != E; ++I)
1369 {
1370 G4_BB *pred = *I;
1371 G4_INST *gotoInst = getGotoInst(pred);
1372 if (gotoInst && gotoInst->asCFInst()->isBackward() && pred->Succs.back() == bb)
1373 {
1374 cg = new ControlGraph(bb, pred, gotoInst, true);
1375 cg_oinsert(cgs, cg);
1376 cg->isLoopCandidate = isLoopCandidate;
1377 isBegin = true;
1378 }
1379 }
1380
1381 // check if its last inst is a forward goto. Note that all backward
1382 // gotos have been processed when the target BB of this backward goto
1383 // is processed.
1384 G4_INST *gotoInst = getGotoInst(bb);
1385 if (gotoInst && !gotoInst->asCFInst()->isBackward())
1386 {
1387 G4_BB *succ = bb->Succs.back();
1388 cg = new ControlGraph(bb, succ, gotoInst, false);
1389 // Add this cg into the beginning of cgs.
1390 cgs.push_front(cg);
1391 isBegin = true;
1392 }
1393 return isBegin;
1394 }
1395
isGotoScalarJmp(G4_INST * gotoInst)1396 inline bool CFGStructurizer::isGotoScalarJmp(G4_INST *gotoInst)
1397 {
1398 MUST_BE_TRUE(gotoInst->opcode() == G4_goto, "It should be a goto inst");
1399 return gotoInst->getExecSize() == g4::SIMD1 ||
1400 gotoInst->getPredicate() == nullptr ||
1401 gotoInst->asCFInst()->isUniform();
1402 }
1403
isJoinBB(G4_BB * bb)1404 inline bool CFGStructurizer::isJoinBB(G4_BB* bb)
1405 {
1406 G4_INST* joinInst = bb->getFirstInst();
1407 return joinInst && (joinInst->opcode() == G4_join);
1408 }
1409
canbeJoinBB(G4_BB * bb)1410 inline bool CFGStructurizer::canbeJoinBB(G4_BB *bb)
1411 {
1412 // We treat while/else/endif as the first inst
1413 // of BB. Whenever a BB has any of them, join
1414 // cannot be inserted as the first inst.
1415 G4_INST* firstInst = bb->getFirstInst();
1416 if (!firstInst)
1417 {
1418 // Emptry BB can always have a join
1419 return true;
1420 }
1421 G4_opcode opc = firstInst->opcode();
1422 return opc != G4_while && opc != G4_endif && opc != G4_else;
1423 }
1424
1425 // return true if bb's label has not been used by while/else/endif/join
isBBLabelAvailable(G4_BB * bb)1426 bool CFGStructurizer::isBBLabelAvailable(G4_BB *bb)
1427 {
1428 G4_INST* firstInst = bb->getFirstInst();
1429 if (!firstInst)
1430 {
1431 return true;
1432 }
1433 G4_opcode opc = firstInst->opcode();
1434 return opc != G4_while && opc != G4_endif &&
1435 opc != G4_else && opc != G4_join;
1436 }
1437
ANListReplace(ANList & anlist,ANode * from,ANode * to)1438 void CFGStructurizer::ANListReplace(ANList &anlist, ANode *from, ANode *to)
1439 {
1440 for (ANList::iterator I = anlist.begin(), E = anlist.end(); I != E; ++I)
1441 {
1442 ANode *tmp = *I;
1443 if (tmp == from)
1444 {
1445 (*I) = to;
1446 return;
1447 }
1448 }
1449 }
1450
BBListReplace(BB_LIST & bblist,G4_BB * from,G4_BB * to)1451 void CFGStructurizer::BBListReplace(BB_LIST &bblist, G4_BB *from, G4_BB *to)
1452 {
1453 for (BB_LIST_ITER I = bblist.begin(), E = bblist.end(); I != E; ++I)
1454 {
1455 G4_BB *tmp = *I;
1456 if (tmp == from)
1457 {
1458 (*I) = to;
1459 return;
1460 }
1461 }
1462 MUST_BE_TRUE(false, "BBList should have to-be-replaced element");
1463 }
1464
ANListErase(ANList & anlist,ANode * toBeErased)1465 void CFGStructurizer::ANListErase(ANList &anlist, ANode *toBeErased)
1466 {
1467 ANList::iterator I = findANode(anlist, toBeErased);
1468 if (I != anlist.end())
1469 {
1470 anlist.erase(I);
1471 }
1472 }
1473
BBListErase(BB_LIST & bblist,G4_BB * toBeErased)1474 void CFGStructurizer::BBListErase(BB_LIST &bblist, G4_BB *toBeErased)
1475 {
1476 BB_LIST_ITER I = findBB(&bblist, toBeErased);
1477 if (I != bblist.end())
1478 {
1479 bblist.erase(I);
1480 }
1481 }
1482
1483 // Given ranges [end, exit] and [end1, exit1], and
1484 // end < exit && end1 < exit1
1485 // return the last two BBs of them. Note that the order is the
1486 // BB order as given in CFG's BBs list.
1487 //
1488 // Note that it's possible that exit could be nullptr, but end
1489 // will never be nullptr. And under some cases, return exit is
1490 // unknown (set to nullptr).
getNewRange(G4_BB * & end,G4_BB * & exit,G4_BB * end1,G4_BB * exit1)1491 void CFGStructurizer::getNewRange(
1492 G4_BB*& end, G4_BB*& exit, G4_BB* end1, G4_BB* exit1)
1493 {
1494 G4_BB *e0, *e1;
1495 G4_BB* newEnd = isBefore(end, end1) ? end1 : end;
1496
1497 if (exit && exit1)
1498 {
1499 if (exit == exit1)
1500 {
1501 // exit is still the new exit
1502 end = newEnd;
1503 return;
1504 }
1505
1506 if (isBefore(exit, exit1))
1507 {
1508 e0 = exit1;
1509 e1 = exit;
1510 }
1511 else
1512 {
1513 e0 = exit;
1514 e1 = exit1;
1515 }
1516
1517 if (isBefore(newEnd, e1))
1518 {
1519 newEnd = e1;
1520 }
1521 exit = e0;
1522 end = newEnd;
1523 return;
1524 }
1525
1526 e0 = exit ? exit : exit1;
1527 if (e0 && (e0 == newEnd || isBefore(e0, newEnd)))
1528 {
1529 e0 = nullptr;
1530 }
1531
1532 exit = e0;
1533 end = newEnd;
1534 }
1535
1536 // Given a cg, if it definitely does not start a HG, no new HG should
1537 // be created. Instead, the range covered by this cg will be used to
1538 // extend the current Node's range. This function is used to extend
1539 // Node's range from this cg.
extendNode(ANode * Node,ControlGraph * CG)1540 void CFGStructurizer::extendNode(ANode *Node, ControlGraph *CG)
1541 {
1542 if (!Node)
1543 {
1544 return;
1545 }
1546 // Extending end & exit
1547 G4_BB *begin = CG->beginBB;
1548 G4_BB* end = CG->endBB;
1549 G4_BB* exit;
1550 if (CG->isBackward)
1551 {
1552 G4_BB* succ0 = end->Succs.front();
1553 bool isCondGoto = (CG->gotoInst->getPredicate() != NULL);
1554 exit = isCondGoto ? succ0 : nullptr;
1555 }
1556 else
1557 {
1558 exit = end;
1559 end = begin;
1560 }
1561
1562 getNewRange(end, exit, Node->getEndBB(), Node->getExitBB());
1563 Node->setEndBB(end);
1564 Node->setExitBB(exit);
1565 }
1566
1567
1568 // Get the immediate enclosing ANodeHG that contains all the preds of AN
1569 // and all AN's backward succs (due to backward gotos).
getEnclosingANodeHG(ANode * AN)1570 ANodeHG* CFGStructurizer::getEnclosingANodeHG(ANode *AN)
1571 {
1572 MUST_BE_TRUE(!ANStack.empty(), "ANStack should not be empty");
1573
1574 G4_BB *begin = AN->getBeginBB();
1575 int32_t ix = (int32_t) ANStack.size() - 1;
1576 if (ANStack.size() > 1)
1577 {
1578 // As loop's end and begin must be in the same HG. Here, check
1579 // if AN has a backward goto, if so, merge ANode of the loop's
1580 // begin with AN. However, it is possible that the ANode for
1581 // the loop's begin is not in ANStack anymore (due to previous
1582 // merging); and in this case, find ANode that contains loop's
1583 // begin and merge with AN.
1584 if (AN->getType() == AN_BB)
1585 {
1586 G4_INST *gotoInst = getGotoInst(begin);
1587 if (gotoInst && gotoInst->asCFInst()->isBackward())
1588 {
1589 uint32_t j;
1590 ANodeHG *hg = nullptr;
1591 for (j = (uint32_t) ANStack.size(); j != 0; --j)
1592 {
1593 ANodeHG *tmp = (ANodeHG*)ANStack[j - 1];
1594 if (tmp->isLoopCandidate && tmp->endBB == begin)
1595 {
1596 // ANode for innermost loop
1597 hg = tmp;
1598 break;
1599 }
1600 }
1601 if (j == 0)
1602 {
1603 // loop's begin ANode is no longer on ANStack. Find ANStack's node
1604 // that enclose the loop's begin.
1605 G4_BB *loopBeginBB = begin->Succs.back();
1606 if (loopBeginBB != begin)
1607 {
1608 hg = (ANodeHG *)(getANodeBB(loopBeginBB)->getCurrentANode());
1609 // The parent of ACFG node should be on ANStack
1610 hg = (ANodeHG *)hg->parent;
1611 }
1612 }
1613
1614 if (hg)
1615 {
1616 MUST_BE_TRUE(hg->ANStackIx >= 0, "ANodeHG's index is wrong");
1617 if (ix > hg->ANStackIx)
1618 {
1619 ix = hg->ANStackIx;
1620 }
1621 }
1622 }
1623 }
1624
1625 for (ANList::iterator II = AN->preds.begin(), IE = AN->preds.end();
1626 II != IE; ++II)
1627 {
1628 ANode *pred = *II;
1629 if (pred == AN || isBefore(begin, pred->getBeginBB()))
1630 {
1631 // Skip. For pred == AN, they are already in AN;
1632 // for backward goto, its pred will be processed later.
1633 continue;
1634 }
1635
1636 ANodeHG *hg = (ANodeHG *)pred->parent;
1637 if (hg)
1638 {
1639 MUST_BE_TRUE(hg->ANStackIx >= 0, "ANodeHG's index is wrong");
1640 if (ix > hg->ANStackIx)
1641 {
1642 ix = hg->ANStackIx;
1643 }
1644 }
1645 else
1646 {
1647 // Error message
1648 G4_BB* predBB = pred->getBeginBB();
1649 G4_INST* predGoto = getGotoInst(predBB);
1650
1651 // If predGoto is null, it means that a non-goto branching inst like jmpi jumps
1652 // into a range of goto instrutions, such as the following:
1653 // jmp A
1654 // goto B
1655 // A:
1656 // ...
1657 // B:
1658 //
1659 MUST_BE_TRUE(predGoto != nullptr, "Error: Non-goto (like jmp) and goto crossing !");
1660 MUST_BE_TRUE(false, "Error: unknown control flow in the program.");
1661 }
1662 }
1663 // No need to check succs as forward goto will be handled later
1664 // and backward goto has been handled already!
1665 }
1666
1667 return (ANodeHG *)ANStack[ix];
1668 }
1669
1670 // Merge AN with its immediate pred if possible.
1671 // 1. If pred is AN_SEQUENCE, just add AN into pred
1672 // 2. otherwise, create AN_SEQUENCE and include both
1673 // Return the new merged node if merged, or AN otherwise.
mergeWithPred(ANode * AN)1674 ANode *CFGStructurizer::mergeWithPred(ANode *AN)
1675 {
1676 if (AN->parent == nullptr ||
1677 AN->succs.size() != 1 || AN->preds.size() != 1)
1678 {
1679 return AN;
1680 }
1681
1682 // check if it can be merged with the previous ANode. Note that we
1683 // only need to check its immediate predecessor under the same parent
1684 // as it is the only candidate for possible merging.
1685 ANode *node = AN;
1686 ANodeHG *parent = (ANodeHG *)AN->parent;
1687 ANode *pred = AN->preds.back();
1688 if (pred->succs.size() != 1 || (ANodeHG*)pred->parent != parent)
1689 {
1690 return AN;
1691 }
1692
1693 // Get AN's physical previous ANode.
1694 ANList::iterator E = parent->children.end();
1695 ANList::iterator I = findANode(parent->children, pred);
1696 MUST_BE_TRUE(I != E, "Child Node isn't in Parent's children list");
1697 ++I;
1698 ANode* predPhySucc = (I != E) ? *I : nullptr;
1699
1700 ANode *succ = AN->succs.back();
1701 if (predPhySucc == AN)
1702 {
1703 // check: do we allow self-looping on SEQ node ?
1704 if (pred->getType() == AN_SEQUENCE)
1705 {
1706 ANodeHG *predhg = (ANodeHG *)pred;
1707 ANode *lastNode = predhg->children.back();
1708 AN->parent = predhg;
1709 predhg->children.push_back(AN);
1710 predhg->succs.clear();
1711 predhg->succs.push_back(succ);
1712 ANListReplace(succ->preds, AN, predhg);
1713
1714 lastNode->succs.clear();
1715 lastNode->succs.push_back(AN);
1716 AN->preds.clear();
1717 AN->preds.push_back(lastNode);
1718
1719 predhg->setEndBB(AN->getEndBB());
1720 predhg->setExitBB(AN->getExitBB());
1721 if (succ != pred)
1722 {
1723 predhg->setExitNode(succ);
1724 }
1725
1726 ANListErase(parent->children, AN);
1727 AN->setInACFG(false);
1728 node = predhg;
1729 }
1730 else
1731 {
1732 ANodeHG *newSeqNode = new ANodeHG(AN_SEQUENCE);
1733 newSeqNode->anodeId = getNextANodeId();
1734 newSeqNode->parent = parent;
1735 pred->parent = newSeqNode;
1736 AN->parent = newSeqNode;
1737 ANList::iterator II = pred->preds.begin();
1738 ANList::iterator IE = pred->preds.end();
1739 while (II != IE)
1740 {
1741 ANList::iterator Iter = II;
1742 ++II;
1743 ANode *tmp = *Iter;
1744 if (tmp == AN || AN->isMember(tmp))
1745 {
1746 newSeqNode->succs.push_back(newSeqNode);
1747 newSeqNode->preds.push_back(newSeqNode);
1748 }
1749 else
1750 {
1751 ANListReplace(tmp->succs, pred, newSeqNode);
1752 newSeqNode->preds.push_back(tmp);
1753 }
1754 }
1755
1756 // The above loop has handled case of succ == pred already
1757 if (succ != pred)
1758 {
1759 ANListReplace(succ->preds, AN, newSeqNode);
1760 newSeqNode->succs.push_back(succ);
1761 }
1762 newSeqNode->children.push_back(pred);
1763 newSeqNode->children.push_back(AN);
1764 pred->setInACFG(false);
1765 AN->setInACFG(false);
1766
1767 newSeqNode->setBeginBB(pred->getBeginBB());
1768 newSeqNode->setEndBB(AN->getEndBB());
1769 newSeqNode->setExitBB(AN->getExitBB());
1770
1771 if (succ != pred)
1772 {
1773 newSeqNode->setExitNode(succ);
1774 }
1775
1776 ANListErase(parent->children, pred);
1777 ANListErase(parent->children, AN);
1778 parent->children.push_back(newSeqNode);
1779
1780 node = newSeqNode;
1781 }
1782 }
1783 return node;
1784 }
1785
1786 // Condensing a HG into a single node in ACFG. The children of node is still
1787 // a valid partial ACFG. The current ACFG is the all ANode whose inACFG is true.
condenseNode(ANodeHG * node)1788 void CFGStructurizer::condenseNode(ANodeHG *node)
1789 {
1790 ANode *entryNode = node->getEntryNode();
1791 G4_BB *exit = node->getExitBB();
1792 ANList::iterator I = entryNode->preds.begin();
1793 ANList::iterator E = entryNode->preds.end();
1794 while (I != E)
1795 {
1796 ANList::iterator Iter = I;
1797 ++I;
1798 ANode *pred = *Iter;
1799 if (node->isMember(pred))
1800 {
1801 continue;
1802 }
1803
1804 node->preds.push_back(pred);
1805 ANListReplace(pred->succs, entryNode, node);
1806 }
1807
1808 if (exit) {
1809 ANode *exitNode = node->getExitNode(getANodeBB(exit));
1810 I = exitNode->preds.begin();
1811 E = exitNode->preds.end();
1812 while (I != E)
1813 {
1814 ANList::iterator iter = I;
1815 ++I;
1816 ANode *pred = *iter;
1817 if (node->isMember(pred))
1818 {
1819 exitNode->preds.erase(iter);
1820 }
1821 }
1822 exitNode->preds.push_back(node);
1823 node->succs.push_back(exitNode);
1824 }
1825
1826 // Set children nodes' inAFCG flag to false.
1827 E = node->children.end();
1828 for (I = node->children.begin(); I != E; ++I)
1829 {
1830 ANode* nd = *I;
1831 nd->setInACFG(false);
1832 }
1833 }
1834
finalizeHG(ANodeHG * node)1835 ANodeHG *CFGStructurizer::finalizeHG(ANodeHG *node)
1836 {
1837 // Identify the type of this node
1838 ANode *entryNode = node->getEntryNode();
1839 ANode *endNode = node->getEndNode();
1840 G4_BB *exitBB = node->getExitBB();
1841 ANode *exitNode = nullptr;
1842 if (exitBB) {
1843 exitNode = node->getExitNode(getANodeBB(exitBB));
1844 if (exitNode)
1845 {
1846 // Set exitNode when node is finalized
1847 node->setExitNode(exitNode);
1848 }
1849 }
1850 G4_BB *begin = node->getBeginBB();
1851 G4_INST *gotoInst = begin->back();
1852 if (endNode->succs.size() > 0 && endNode->succs.back() == entryNode)
1853 {
1854 node->setType(AN_DO_WHILE);
1855 }
1856 else if (entryNode->getType() == AN_BB && gotoInst &&
1857 gotoInst->getPredicate() && !gotoInst->asCFInst()->isBackward())
1858 {
1859 ANodeType ty = AN_COMPOSITE;
1860 if (entryNode->succs.size() == 2)
1861 {
1862 ANode *thenNode = entryNode->succs.front();
1863 ANode *elseNode = entryNode->succs.back();
1864 if (thenNode == elseNode)
1865 {
1866 // The CFG is not well formed and has the following
1867 // BB0: succs=BB1, BB1
1868 // Simply make it COMPOSITE as an work-around. Will
1869 // fix igc to not generate such a case.
1870 ty = AN_COMPOSITE;
1871 }
1872 else if (thenNode->succs.size() == 1 && thenNode->succs.front() == elseNode &&
1873 elseNode == exitNode)
1874 {
1875 ty = AN_IF_THEN_ENDIF;
1876 }
1877 else if (thenNode->succs.size() == 1 && elseNode->succs.size() == 1 &&
1878 thenNode->succs.front() == elseNode->succs.front() &&
1879 thenNode->succs.front() == exitNode)
1880 {
1881 ty = AN_IF_THEN_ELSE_ENDIF;
1882 }
1883 }
1884 node->setType(ty);
1885 }
1886 else
1887 {
1888 node->setType(AN_COMPOSITE);
1889 }
1890
1891 condenseNode(node);
1892
1893 if (node->getType() == AN_DO_WHILE)
1894 {
1895 reConstructDoWhileforBreak(node);
1896 }
1897
1898 return node;
1899 }
1900
findBB(BB_LIST * bblist,G4_BB * bb)1901 BB_LIST_ITER CFGStructurizer::findBB(BB_LIST* bblist, G4_BB *bb)
1902 {
1903 for (BB_LIST_ITER I = bblist->begin(), E = bblist->end(); I != E; ++I)
1904 {
1905 G4_BB *tmp = *I;
1906 if (tmp == bb)
1907 {
1908 return I;
1909 }
1910 }
1911 return bblist->end();
1912 }
1913
findANode(ANList & anlist,ANode * nd)1914 ANList::iterator CFGStructurizer::findANode(ANList& anlist, ANode *nd)
1915 {
1916 for (ANList::iterator I = anlist.begin(), E = anlist.end(); I != E; ++I)
1917 {
1918 ANode *tmp = *I;
1919 if (tmp == nd)
1920 {
1921 return I;
1922 }
1923 }
1924 return anlist.end();
1925 }
1926
1927 // Return true if all preds of abb is inside node; false otherwise.
isANodeOnlyPred(G4_BB * abb,ANode * node)1928 bool CFGStructurizer::isANodeOnlyPred(G4_BB *abb, ANode *node)
1929 {
1930 for (BB_LIST_ITER II = abb->Preds.begin(), IE = abb->Preds.end();
1931 II != IE; ++II)
1932 {
1933 G4_BB *tmp = *II;
1934 ANodeBB *ndbb = getANodeBB(tmp);
1935
1936 // Note that isMember() requires PST is up to date.
1937 if (!node->isMember(ndbb))
1938 {
1939 return false;
1940 }
1941 }
1942 return true;
1943 }
1944
1945 //
1946 // This function creates a new BB as node's exit BB. Node must be
1947 // finalized before invoking this function. InsertAfterIter is the
1948 // iterator to node->getEndBB(), which is provided if the calling
1949 // context already has it; otherwise, this function will compute
1950 // this iterator.
1951 //
1952 // If updateACFG is true, ACFG will be updated. This is used during
1953 // the construction of ACFG and the new BB will always in ACFG
1954 // (as ANodeBB). Note that PST is up to its caller to update.
1955 //
addLandingBB(ANodeHG * node,BB_LIST_ITER insertAfterIter,bool updateACFG)1956 ANodeBB* CFGStructurizer::addLandingBB(
1957 ANodeHG *node, BB_LIST_ITER insertAfterIter, bool updateACFG)
1958 {
1959 G4_BB *endbb = node->getEndBB();
1960 if (insertAfterIter == BBs->end())
1961 {
1962 insertAfterIter = findBB(BBs, endbb);
1963 }
1964 G4_BB *insertAfterBB = *insertAfterIter;
1965 G4_BB *exitBB = node->getExitBB();
1966
1967 G4_BB *newBB = createBBWithLabel();
1968 G4_Label *newLabel = newBB->getLabel();
1969 G4_Label *targetLabel = exitBB->getLabel();
1970 setInsertAfterBB(newBB, insertAfterBB);
1971
1972 // Adjust BB's pred/succs
1973 BB_LIST_ITER BE = exitBB->Preds.end();
1974 BB_LIST_ITER BI = exitBB->Preds.begin();
1975 while (BI != BE)
1976 {
1977 BB_LIST_ITER curr = BI;
1978 ++BI;
1979 G4_BB *bb = *curr;
1980 ANode *tmp = getANodeBB(bb);
1981 if (node->isMember(tmp))
1982 {
1983 BBListErase(exitBB->Preds, bb);
1984 BBListReplace(bb->Succs, exitBB, newBB);
1985 newBB->Preds.push_back(bb);
1986 G4_INST *gotoInst = getGotoInst(bb);
1987 if (gotoInst)
1988 {
1989 if (gotoInst->asCFInst()->getUip() == targetLabel)
1990 {
1991 if (bb == insertAfterBB)
1992 {
1993 // remove goto
1994 bb->pop_back();
1995 }
1996 else
1997 {
1998 // modify the goto to branch to the new target
1999 gotoInst->asCFInst()->setUip(newLabel);
2000 }
2001 }
2002 }
2003 else
2004 {
2005 G4_INST *jmpiInst = getJmpiInst(bb);
2006 if (jmpiInst)
2007 {
2008 if (jmpiInst->getSrc(0) == targetLabel)
2009 {
2010 if (bb == insertAfterBB)
2011 {
2012 // remove jmpi
2013 bb->pop_back();
2014 }
2015 else
2016 {
2017 // modify the jmpi to branch to the new target
2018 jmpiInst->setSrc(newLabel, 0);
2019 }
2020 }
2021 }
2022 else if (bb != insertAfterBB)
2023 {
2024 MUST_BE_TRUE(false, "BB has unknown non-goto/jmpi branch");
2025 }
2026 }
2027
2028 // Update exit nodes
2029 tmp = tmp->parent;
2030 while (tmp != node)
2031 {
2032 if (tmp->getExitBB() == exitBB)
2033 {
2034 tmp->setExitBB(newBB);
2035 }
2036 tmp = tmp->parent;
2037 }
2038 }
2039 }
2040
2041 if (insertAfterBB->getPhysicalSucc() != exitBB)
2042 {
2043 G4_INST *gotoInst = CFG->builder->createInternalCFInst(
2044 NULL, G4_goto, g4::SIMD1, NULL, targetLabel, InstOpt_NoOpt);
2045 newBB->push_back(gotoInst);
2046
2047 // Consider newBB is part of exitBB, thus use exitBB's DI
2048 updateInstDI(gotoInst, exitBB);
2049 }
2050 newBB->Succs.push_back(exitBB);
2051 exitBB->Preds.push_back(newBB);
2052
2053 // insert it into BBs
2054 ++insertAfterIter;
2055 BBs->insert(insertAfterIter, newBB);
2056 G4_BB *phySucc = insertAfterBB->getPhysicalSucc();
2057 insertAfterBB->setPhysicalSucc(newBB);
2058 newBB->setPhysicalPred(insertAfterBB);
2059 newBB->setPhysicalSucc(phySucc);
2060 if (phySucc)
2061 {
2062 phySucc->setPhysicalPred(newBB);
2063 }
2064
2065 ANodeBB *ndbb = new ANodeBB(newBB);
2066 ndbb->anodeId = getNextANodeId();
2067 setANodeBB(ndbb, newBB);
2068
2069 if (updateACFG)
2070 {
2071 // Node has been finalized. Its children's ACFG is no longer
2072 // up to date. But within this node, acfg is still good;
2073 // only edges outside of this node is no longer valid!
2074 // We will rely on begin/end/exit when doing conversion.
2075 ANode *exitNode = getANodeBB(exitBB)->getCurrentANode();
2076 ANListReplace(exitNode->preds, node, ndbb);
2077 ANListReplace(node->succs, exitNode, ndbb);
2078 ndbb->succs.push_back(exitNode);
2079 ndbb->preds.push_back(node);
2080 }
2081
2082 node->setExitBB(newBB);
2083 adjustBBId(newBB);
2084
2085 return ndbb;
2086 }
2087
2088 // Add a new empty BB as a predecessor of splitBB.
addSplitBBAtBegin(G4_BB * splitBB)2089 ANode *CFGStructurizer::addSplitBBAtBegin(G4_BB *splitBB)
2090 {
2091 G4_BB *newBB = createBBWithLabel();
2092 G4_Label *newLabel = newBB->getLabel();
2093 G4_BB *insertAfter = findInsertAfterBB(splitBB->getPhysicalPred());
2094 setInsertAfterBB(newBB, insertAfter);
2095
2096 // Adjust BB's pred/succs
2097 newBB->Preds.splice(newBB->Preds.end(), splitBB->Preds);
2098 splitBB->Preds.clear();
2099 splitBB->Preds.push_back(newBB);
2100 newBB->Succs.push_back(splitBB);
2101 BB_LIST_ITER BE = newBB->Preds.end();
2102 BB_LIST_ITER BI = newBB->Preds.begin();
2103 while (BI != BE)
2104 {
2105 BB_LIST_ITER curr = BI;
2106 ++BI;
2107 G4_BB *pred = *curr;
2108 BBListReplace(pred->Succs, splitBB, newBB);
2109 G4_INST *gotoInst = getGotoInst(pred);
2110 if (gotoInst)
2111 {
2112 if (gotoInst->asCFInst()->getUip() == splitBB->getLabel())
2113 {
2114 // modify the goto to branch to the new target
2115 gotoInst->asCFInst()->setUip(newLabel);
2116 }
2117 }
2118
2119 // adjust exit BB
2120 ANode *nd = getANodeBB(pred)->parent;
2121 while (nd)
2122 {
2123 if (nd->getExitBB() == splitBB)
2124 {
2125 nd->setExitBB(newBB);
2126 }
2127 nd = nd->parent;
2128 }
2129 }
2130
2131 // insert it into BBs
2132 BB_LIST_ITER iter = findBB(BBs, splitBB);
2133 BBs->insert(iter, newBB);
2134 G4_BB *phyPred = splitBB->getPhysicalPred();
2135 phyPred->setPhysicalSucc(newBB);
2136 newBB->setPhysicalPred(phyPred);
2137 newBB->setPhysicalSucc(splitBB);
2138 splitBB->setPhysicalPred(newBB);
2139
2140 ANodeBB *ndbb = new ANodeBB(newBB);
2141 ndbb->anodeId = getNextANodeId();
2142 setANodeBB(ndbb, newBB);
2143
2144 // newBB's id is the largest at this moment. Make sure it is in order
2145 adjustBBId(newBB);
2146 return ndbb;
2147 }
2148
2149 // Add a new BB that only holds the branch instruction of splitBB
2150 // if it has one, otherwise, the new BB is empty.
addSplitBBAtEnd(G4_BB * splitBB)2151 ANode* CFGStructurizer::addSplitBBAtEnd(G4_BB *splitBB)
2152 {
2153 G4_BB *newBB = createBBWithLabel();
2154 setInsertAfterBB(newBB, splitBB);
2155
2156 // Adjust splitBB's pred/succs
2157 BB_LIST_ITER BE = splitBB->Succs.end();
2158 BB_LIST_ITER BI = splitBB->Succs.begin();
2159 for (; BI != BE; ++BI)
2160 {
2161 G4_BB *bb = *BI;
2162 BBListReplace(bb->Preds, splitBB, newBB);
2163 }
2164 newBB->Succs.splice(newBB->Succs.end(), splitBB->Succs);
2165 newBB->Preds.push_back(splitBB);
2166 splitBB->Succs.clear();
2167 splitBB->Succs.push_back(newBB);
2168 G4_INST *gotoInst = getGotoInst(splitBB);
2169 if (gotoInst)
2170 {
2171 splitBB->pop_back();
2172 newBB->push_back(gotoInst);
2173 }
2174
2175 // insert it into BBs
2176 BI = findBB(BBs, splitBB);
2177 ++BI;
2178 BBs->insert(BI, newBB);
2179 G4_BB *phySucc = splitBB->getPhysicalSucc();
2180 splitBB->setPhysicalSucc(newBB);
2181 newBB->setPhysicalPred(splitBB);
2182 newBB->setPhysicalSucc(phySucc);
2183 if (phySucc)
2184 {
2185 phySucc->setPhysicalPred(newBB);
2186 }
2187
2188 ANodeBB *ndbb = new ANodeBB(newBB);
2189 ndbb->anodeId = getNextANodeId();
2190 setANodeBB(ndbb, newBB);
2191
2192 ANode *splitNode = getANodeBB(splitBB);
2193 ANode *tmp = splitNode->parent;
2194 while (tmp)
2195 {
2196 if (tmp->getEndBB() == splitBB)
2197 {
2198 tmp->setEndBB(newBB);
2199 }
2200 tmp = tmp->parent;
2201 }
2202
2203 adjustBBId(newBB);
2204
2205 return ndbb;
2206 }
2207
2208 // Start from bb (inclusive), search backward to find the first original BB.
findInsertAfterBB(G4_BB * bb)2209 G4_BB *CFGStructurizer::findInsertAfterBB(G4_BB *bb)
2210 {
2211 G4_BB *insertAfter = bb;
2212 while (insertAfter && insertAfter->getId() >= numOfBBs)
2213 {
2214 insertAfter = insertAfter->getPhysicalPred();
2215 }
2216 return insertAfter;
2217 }
2218
2219 // Re-connect every predecessors of "from" to its new successor "to". Update
2220 // "to"'s preds and its new predecessors' succs. Note that "from"'s preds are
2221 // left unchanged.
reConnectAllPreds(ANode * from,ANode * to)2222 void CFGStructurizer::reConnectAllPreds(ANode *from, ANode *to)
2223 {
2224 for (ANList::iterator I = from->preds.begin(), E = from->preds.end();
2225 I != E; ++I)
2226 {
2227 ANode *pred = *I;
2228 ANListReplace(pred->succs, from, to);
2229 to->preds.push_back(pred);
2230 }
2231 }
2232
2233 // Re-connect every successors of "from" to its new predecesor "to". Update
2234 // "to"'s succs and its new successors' preds. Note that "from"'s succs are
2235 // left unchanged.
reConnectAllSuccs(ANode * from,ANode * to)2236 void CFGStructurizer::reConnectAllSuccs(ANode *from, ANode *to)
2237 {
2238 for (ANList::iterator I = from->succs.begin(), E = from->succs.end();
2239 I != E; ++I)
2240 {
2241 ANode *succ = *I;
2242 ANListReplace(succ->preds, from, to);
2243 to->succs.push_back(succ);
2244 }
2245 }
2246
2247 // This function is to merge ANStack nodes into the new node during HG
2248 // construction.
ANStack_reconstruct(ANodeHG * & node,ANodeBB * ndbb)2249 void CFGStructurizer::ANStack_reconstruct(ANodeHG *& node, ANodeBB *ndbb)
2250 {
2251 ANodeHG *newNode = getEnclosingANodeHG(ndbb);
2252 if (newNode->ANStackIx == ANStack.size() - 1)
2253 {
2254 // No need to do ANStack reconstruction
2255 return;
2256 }
2257
2258 // Merge all nodes in ANStack[newNode->ANStackIx+1 : ANStack.size() - 1] into newNode
2259 G4_BB *exit = newNode->getExitBB();
2260 G4_BB *end = newNode->getEndBB();
2261
2262 // Merging from ANStackIx to the top of ANStack is needed to make
2263 // sure the order of children is correct.
2264 for (uint32_t i = newNode->ANStackIx + 1, size = ANStack.size(); i < size; ++i)
2265 {
2266 ANodeHG *nd = (ANodeHG *)ANStack[i];
2267 getNewRange(end, exit, nd->getEndBB(), nd->getExitBB());
2268 for (ANList::iterator I = nd->children.begin(), E = nd->children.end();
2269 I != E; ++I)
2270 {
2271 ANode *tmp = *I;
2272 tmp->parent = newNode;
2273 newNode->children.push_back(tmp);
2274 }
2275 }
2276 newNode->isLoopCandidate = false;
2277 newNode->setExitBB(exit);
2278 newNode->setEndBB(end);
2279 uint32_t n = (uint32_t) ANStack.size() - newNode->ANStackIx - 1;
2280 if (n > 0)
2281 {
2282 for (uint32_t i = 0; i < n; ++i)
2283 {
2284 ANode *nd = ANStack.back();
2285
2286 // Make sure to delete from newNode' children
2287 ANListErase(newNode->children, nd);
2288
2289 ANStack.pop_back();
2290 delete nd;
2291 }
2292 }
2293
2294 node = newNode;
2295 return;
2296 }
2297
2298 // For case like:
2299 // do: ...
2300 // if (p) {
2301 // break;
2302 // else
2303 // ...
2304 // endif
2305 // .....
2306 // while
2307 //
2308 // if-else-endif does not form a hammock because of break (it has two exits,
2309 // one is the target of break, the other is the BB right after endif). we
2310 // will need to make break as "no-op" instruction and then re-run
2311 // constructPST() on the body of the while loop.
2312 //
2313 // For now, implement a poor-man's break construction. More general code
2314 // will come after.
2315 //
reConstructDoWhileforBreak(ANodeHG * whileNode)2316 void CFGStructurizer::reConstructDoWhileforBreak(ANodeHG *whileNode)
2317 {
2318 G4_BB *exitbb = whileNode->getExitBB();
2319 ANList::iterator IB = whileNode->children.begin();
2320 ANList::iterator IE = whileNode->children.end();
2321 ANList::iterator II = IB;
2322 BB_LIST activeJoinBBs;
2323 while (II != IE)
2324 {
2325 ANode *nd = *II;
2326 while (!activeJoinBBs.empty())
2327 {
2328 G4_BB *bb = activeJoinBBs.front();
2329 if (isBefore(nd->getBeginBB(), bb))
2330 {
2331 break;
2332 }
2333 activeJoinBBs.pop_front();
2334 }
2335
2336 G4_BB *ebb = nd->getExitBB();
2337 if (ebb != exitbb)
2338 {
2339 if (ebb)
2340 {
2341 bb_oinsert(activeJoinBBs, ebb);
2342 }
2343 ++II;
2344 continue;
2345 }
2346
2347 if (nd == whileNode->children.back())
2348 {
2349 ++II;
2350 continue;
2351 }
2352
2353 whileNode->setHasBreak(true);
2354 if (nd->getType() != AN_BB)
2355 {
2356 // don't handle it.
2357 whileNode->setAllowSCF(false);
2358 break;
2359 }
2360
2361 ANodeBB *ndbb = (ANodeBB*)nd;
2362 if (II == IB || activeJoinBBs.empty())
2363 {
2364 // Break isn't inside any possible gotos (conservative)
2365 ndbb->setHasBreak(true);
2366 ++II;
2367 continue;
2368 }
2369
2370 if (ndbb->preds.size() != 1)
2371 {
2372 whileNode->setAllowSCF(false);
2373 break;
2374 }
2375
2376 ANode *exitNode = ndbb->succs.back();
2377 if (whileNode->isMember(exitNode))
2378 {
2379 exitNode = ndbb->succs.front();
2380 }
2381 ANode *pred = ndbb->preds.back();
2382 ANList::iterator IPred = II;
2383 --IPred;
2384 ANode *phyPred = *IPred;
2385 ANList::iterator IPredPred = IPred;
2386 ANode *phyPredPred = nullptr;
2387 if (IPredPred != IB)
2388 {
2389 --IPredPred;
2390 phyPredPred = *IPredPred;
2391 }
2392
2393 ANList::iterator ISucc = II;
2394 ++ISucc;
2395 ANode *phySucc = (ISucc != IE) ? *ISucc : nullptr;
2396 ANode *predTarget = ANListGetOther(pred->succs, ndbb);
2397
2398 // Handle two cases:
2399 // Case 1 (if-then-endif with break)
2400 // BB0:
2401 // [p] goto BB2
2402 // BB1:
2403 // break;
2404 // BB2:
2405 //
2406 // Case 2: (if-then-else-endif with break)
2407 // BB0:
2408 // [p] goto BB2
2409 // BB1:
2410 // goto BB3;
2411 // BB2:
2412 // break;
2413 // BB3:
2414 //
2415
2416 // case 1
2417 if (phyPred == pred && pred->getType() == AN_BB &&
2418 predTarget && predTarget == phySucc)
2419 {
2420 bool calNewIB = (IPred == IB);
2421
2422 ANodeHG *newIf = new ANodeHG(AN_IF_THEN_ENDIF);
2423 newIf->anodeId = getNextANodeId();
2424
2425 newIf->parent = whileNode;
2426 whileNode->children.erase(IPred);
2427 whileNode->children.erase(II);
2428 whileNode->children.insert(ISucc, newIf);
2429 reConnectAllPreds(pred, newIf);
2430 newIf->succs.push_back(predTarget);
2431 newIf->succs.push_back(exitNode);
2432 ANListReplace(exitNode->preds, ndbb, newIf);
2433 ANListReplace(predTarget->preds, pred, newIf);
2434
2435 newIf->children.push_back(pred);
2436 newIf->children.push_back(ndbb);
2437 newIf->setBeginBB(pred->getBeginBB());
2438 newIf->setEndBB(ndbb->getEndBB());
2439 newIf->setExitBB(predTarget->getBeginBB());
2440
2441 ndbb->setHasBreak(true);
2442 newIf->setHasBreak(true);
2443 ndbb->setAllowSCF(true);
2444 newIf->setAllowSCF(true);
2445
2446 if (calNewIB)
2447 {
2448 IB = whileNode->children.begin();
2449 }
2450 II = ISucc;
2451 }
2452 // Case 2
2453 else if (predTarget && phyPredPred && phyPredPred == pred &&
2454 phyPred == predTarget && pred->getType() == AN_BB &&
2455 phyPred->succs.size() == 1 && phyPred->succs.front() == phySucc)
2456 {
2457 bool calNewIB = (IPredPred == IB);
2458
2459 ANodeHG *newIf = new ANodeHG(AN_IF_THEN_ELSE_ENDIF);
2460 newIf->anodeId = getNextANodeId();
2461
2462 newIf->parent = whileNode;
2463 whileNode->children.erase(IPredPred);
2464 whileNode->children.erase(IPred);
2465 whileNode->children.erase(II);
2466 whileNode->children.insert(ISucc, newIf);
2467 reConnectAllPreds(pred, newIf);
2468 newIf->succs.push_back(phySucc);
2469 newIf->succs.push_back(exitNode);
2470 ANListReplace(exitNode->preds, ndbb, newIf);
2471 ANListReplace(predTarget->preds, pred, newIf);
2472
2473 newIf->children.push_back(pred);
2474 newIf->children.push_back(phyPred);
2475 newIf->children.push_back(ndbb);
2476 newIf->setBeginBB(pred->getBeginBB());
2477 newIf->setEndBB(ndbb->getEndBB());
2478 newIf->setExitBB(phySucc->getBeginBB());
2479
2480 ndbb->setHasBreak(true);
2481 newIf->setHasBreak(true);
2482 ndbb->setAllowSCF(true);
2483 newIf->setAllowSCF(true);
2484
2485 if (calNewIB)
2486 {
2487 IB = whileNode->children.begin();
2488 }
2489 II = ISucc;
2490 }
2491 else
2492 {
2493 whileNode->setAllowSCF(false);
2494 break;
2495 }
2496 }
2497
2498 if (!whileNode->getAllowSCF())
2499 {
2500 // If not allowed, propagate this attribute to all
2501 // its children with break inside.
2502 ANList::iterator II = IB;
2503 for (II = IB; II != IE; ++II)
2504 {
2505 ANode *nd = *II;
2506 if (nd->getHasBreak())
2507 {
2508 nd->setAllowSCF(false);
2509 }
2510 }
2511 }
2512 }
2513
2514 //
2515 // PST : program structure tree.
2516 // ANode -- PST's node. It's an abstract class.
2517 // ANodeBB : ANode -- one for each BB (leaf node)
2518 // ANodeHG : ANode -- Hammock graph (group of ANodeBB)
2519 // (non-leaf node)
2520 // ANodeBB is for BB, while ANodeHG is a Hammock graph that is
2521 // composed of seveal BBs (mostly more than one, it could be one).
2522 // The Hammock graph defined in this implementation is the minimum
2523 // hammock graph for any goto BB (BB that has goto inst). Nesting
2524 // relation is represented with ANode's parent field and ANodeHG's
2525 // children (ANodeBB cannot have children, it is a leaf node). Given
2526 // two hammock graphs, they either have no common ANodes or one is
2527 // completely enclosed within the other. The Whole program is a Hammock,
2528 // and this node is the PST's root node.
2529 //
2530 // In the implementation, we don't really create ANode for the PST's
2531 // root. Instead, we construct only hammock children of the Root,
2532 // denoted by topANodes. This topANodes are refered to as top-level
2533 // nodes. (Note that for any BB that is b/w any goto BB and its target
2534 // BB in the program order, an ANode is created for it; otherwise, it
2535 // has no ANode.)
2536 //
2537 // Note that the children of each ANodeHG is the ordered list of ANode
2538 // (in BB order).
2539 //
2540 // ACFG : Abstract Control Flow Graph of ANode (ACFG):
2541 // This is a CFG of ANode, used in the process of constructing PST.
2542 // It can be viewed as the top-level PST on-the-fly (ignoring any nodes
2543 // that are children of the top level nodes). By "on-the-fly", it means
2544 // that the ACFG is constantly changing. Once a group of nodes in ACFG
2545 // is formed into a single ANodeHG, this group is replaced by this new
2546 // single ANodeHG, and the group of nodes is no longer in the ACFG. Such
2547 // node condensing will make finding interesting program structures (if,
2548 // while, etc.) easier. The process continues until all BBs are processed.
2549 // All the nodes in the final ACFG is the actually top-level nodes of the PST.
2550 // (In another word, ACFG will always refect the control flow among the
2551 // outmost parent nodes of all ANodeBBs.)
2552 //
2553 // ACFG's control-flow edges : represented by ANode's preds & succs.
2554 //
2555 // Note that ACFG edges within any ANodeHG via preds & succs should be still
2556 // valid, but outgoing/incoming edges are no longer valid. To get a right
2557 // ANode target, using BB's preds/succs as they are always up to date.
2558 //
2559 // input
2560 // [IB, IE) : denotes BB from IB to IE, containing IB, but not IE.
2561 // Results
2562 // topANodes
2563 //
constructPST(BB_LIST_ITER IB,BB_LIST_ITER IE)2564 void CFGStructurizer::constructPST(BB_LIST_ITER IB, BB_LIST_ITER IE)
2565 {
2566 // This algorithm assumes that IE is loop-invariant. For the code
2567 // that might insert/delete BB, pay attention to this rule!
2568
2569 // ANStack is used to keep all pending ANodes. It also reflects
2570 // possible nesting relation (ANStack[i] could be the parent of
2571 // ANStack[i+1]).
2572 ANodeHG *node = nullptr;
2573 BB_LIST_ITER II = IB;
2574 while (II != IE)
2575 {
2576 G4_BB *bb = *II;
2577 ANodeBB *ndbb = getANodeBB(bb);
2578
2579 // Do the following cases in order. (Note that bb should be part of
2580 // the current pending ANode (top of ANStack), although it could
2581 // be the beginning node of the new inner ANode.)
2582 //
2583 // 1: If bb is the target of some gotos, that is, the end of
2584 // some CGs, merge the current bb into ANStack nodes of
2585 // which those CGs belong to (as they should be in the same
2586 // hammock).
2587 // Also, any backward goto's begin and end BB should be in
2588 // the same hammock. Once merging is done (if any), go on to
2589 // the next step in the loop.
2590 //
2591 // 2: If bb is the begin of CGs, need to create new HG for each
2592 // cg that is associated with a backward goto, a forward
2593 // conditional goto, and the root (first) forward unconditional
2594 // goto. No new HG is created for a non-root unconditional
2595 // goto (see details in code). This is where an inner HG is formed.)
2596 //
2597 // 3: Add bb into this node as it is part of node. If bb can
2598 // be merged with its pred, merge it. The new node's type
2599 // must be type AN_SEQUENCE.
2600 //
2601 // 4: Check if bb is the end of the node. If it is, finalize the
2602 // node. If it is not, go on to process the next BB in the next
2603 // iteration (This implies that if the next BB should be part
2604 // of the current pending HG in ANStack).
2605 //
2606
2607 // case 1.
2608 if (node)
2609 {
2610 ANStack_reconstruct(node, ndbb);
2611 }
2612
2613 // case 2
2614 CGList cgs;
2615 if (getCGBegin(bb, cgs))
2616 {
2617 // If bb is the begin of more than one ANode on ANStack, ndbb
2618 // should be added only into innermost ANode. Keep the
2619 // following in mind:
2620 // 1. (a) bb's last inst is a goto.
2621 // ndbb should be added to the current node (ANStack's
2622 // top) if it exists and the goto is a unconditional goto;
2623 // otherwise, a new hammock node is created and ndbb
2624 // is added into this new hammock node (thus for both
2625 // conditional goto and a top node).
2626 // (b) bb's last inst is not a goto
2627 // add ndbb in the current node.
2628 // 2. If there are several backward gotos with bb as the begin
2629 // BB, ndbb is added only into the ANode corresponding to the
2630 // innermost backward goto. (All backward gotos should have their
2631 // new ANodes created.)
2632 ControlGraph *cg;
2633
2634 // For backward cg, we will only create ANodeHG if the backward cg
2635 // is a loop candidate. If it is not candidate, extend the current
2636 // node with this cg.
2637 for (uint32_t i = (uint32_t) cgs.size() - 1; i > 0; --i)
2638 {
2639 // This must be non-innermost backward goto, don't add ndbb to
2640 // the new node, ie. passing nullptr(2nd arg) to ANodeHG's ctor.
2641 cg = cgs.back();
2642 MUST_BE_TRUE(cg->isBackward,
2643 "Additional CG must be for backward gotos");
2644 if (!cg->isLoopCandidate)
2645 {
2646 assert(node && "Node should not be empty here!");
2647 extendNode(node, cg);
2648 }
2649 else
2650 {
2651 ANodeHG *parent = node;
2652 node = new ANodeHG(cg, nullptr);
2653 node->anodeId = getNextANodeId();
2654 node->parent = parent;
2655 node->isLoopCandidate = true;
2656 if (parent)
2657 {
2658 parent->children.push_back(node);
2659 }
2660 ANStack_push(node);
2661 }
2662 cgs.pop_back();
2663 delete cg;
2664 }
2665
2666 // First one in cgs, it is either an innermost backward goto or a forward goto
2667 cg = cgs.back();
2668 cgs.pop_back();
2669 if (cg->isBackward || cg->gotoInst->getPredicate() != nullptr || node == nullptr)
2670 {
2671 if (cg->isBackward && !cg->isLoopCandidate)
2672 {
2673 assert(node && "Node should not be empty at this point!");
2674 extendNode(node, cg);
2675 }
2676 else
2677 {
2678 // New HG node created and pushed on ANStack
2679 ANodeHG *parent = node;
2680 node = new ANodeHG(cg, ndbb);
2681 node->anodeId = getNextANodeId();
2682 node->parent = parent;
2683 if (cg->isBackward)
2684 {
2685 node->isLoopCandidate = true;
2686 }
2687 if (parent)
2688 {
2689 parent->children.push_back(node);
2690 }
2691 ANStack_push(node);
2692 }
2693 }
2694 else
2695 {
2696 // For an unconditional goto, just add it into the current ANodeHG.
2697 G4_BB *exit = node->getExitBB();
2698 G4_BB *end = node->getEndBB();
2699 getNewRange(end, exit, ndbb->getEndBB(), ndbb->getExitBB());
2700 node->setExitBB(exit);
2701 node->setEndBB(end);
2702 }
2703 delete cg;
2704 }
2705 else if (!node)
2706 {
2707 // Not a goto BB, skip.
2708 ++II;
2709 continue;
2710 }
2711
2712 // case 3
2713 // Add this new ANodeBB into node and also check if it can be
2714 // merged with the previous one to form a new SEQUENCE hammock.
2715 if (ndbb->parent != node)
2716 {
2717 node->children.push_back(ndbb);
2718 ndbb->parent = node;
2719 }
2720 (void)mergeWithPred(ndbb);
2721
2722 // Step 4
2723 if (isBefore(bb, node->getEndBB()))
2724 {
2725 ++II;
2726 continue;
2727 }
2728
2729 // At this point, bb == node->getEndBB().
2730 // Found a HG if
2731 // 1. bb is a unconditional backward goto, or
2732 // 2. bb's fall-thru is the same as node's exitBB
2733 G4_INST *gotoInst = getGotoInst(bb);
2734 bool uncondBackwardGoto =
2735 gotoInst && bb->Succs.size() == 1 && gotoInst->asCFInst()->isBackward();
2736 if (!(uncondBackwardGoto ||
2737 ndbb->succs.size() == 0 || node->getExitBB() == nullptr ||
2738 ndbb->succs.front()->getBeginBB() == node->getExitBB()))
2739 {
2740 G4_BB *next = bb->getPhysicalSucc();
2741 if (next)
2742 {
2743 node->setEndBB(next);
2744 }
2745 ++II;
2746 continue;
2747 }
2748
2749 // At this point, node is a MHG.
2750
2751 // The top of ANStack is an MHG, poping and condensing it into a new
2752 // ANodeHG. After condensing, it might expose merging opportunity,
2753 // if so, merge the newly condensed node with its predecessor. Once
2754 // merging and/or condensing is performed, check if the new top of
2755 // ANStack is an MHG, if so, doing the above again. Continue this
2756 // until the top of ANStack is not a HG.
2757 //
2758 // At this stage, if a MHG's exit isn't its physical next, add a
2759 // landingBB as its next physical BB. Note that the landingBB is
2760 // the new exit BB, which is not part of this MHG. For now, we will
2761 // only generate a single BB at most after each original BB. In
2762 // another word, if more than one MHG share the same exit, it will
2763 // share this same landingBB (creating more than one BB could make
2764 // jmpi/goto inefficient - two goto/jmpi instead of one.
2765 // (May need a cfg simplification after structurizer.)
2766 //
2767 bool isTopHG = true;
2768
2769 // Keep track of the outmost SCF so that we can add a landingBB
2770 // only for the oustmost SCF if needed.
2771 ANodeHG *outmostSCF = nullptr;
2772 ANodeHG *lastHG = nullptr;
2773
2774 G4_BB *exitBB = node->getExitBB();
2775 while (isTopHG)
2776 {
2777 // a Hammock has been detected. Set HG to the right type.
2778 finalizeHG(node);
2779
2780 // record the outmost structured CF MHG
2781 if (node->isSCF())
2782 {
2783 outmostSCF = node;
2784 }
2785
2786 // check if it can be merged with the previous ANode. Note that
2787 // we only need to check its immediate predecessor !
2788 ANode *nd = mergeWithPred(node);
2789 lastHG = (ANodeHG*)nd;
2790
2791 isTopHG = false;
2792 ANStack_pop();
2793 if (!ANStack.empty())
2794 {
2795 node = (ANodeHG *)ANStack.back();
2796 G4_BB *exit = node->getExitBB();
2797 G4_BB *end = node->getEndBB();
2798 getNewRange(end, exit, nd->getEndBB(), nd->getExitBB());
2799 node->setEndBB(end);
2800 node->setExitBB(exit);
2801
2802 if (isBefore(bb, end) || exit != exitBB)
2803 {
2804 // No more HG that ends with this exitBB
2805 continue;
2806 }
2807 isTopHG = true;
2808 }
2809 } // while (istopHG)
2810
2811 if (outmostSCF)
2812 {
2813 // Add a landingBB and adjust control flows (both CFG and ACFG).
2814 // The new BB will be processed later (so no PST updating is
2815 // needed here).
2816 #if 0
2817 // TODO: for performance, we should only add landingBB for outmostSCF,
2818 // however, doing so would have to special-handling the landingBB as
2819 // it should merge with outmostSCF, which is not in ACFG anymore (not
2820 // a top node anymore). So far, the this function is able to handle
2821 // node in ACFG only.
2822 if (!outmostSCF->isExitPhySucc())
2823 {
2824 (void)addLandingBB(outmostSCF, II, true);
2825 }
2826 #else
2827 if (!lastHG->isExitPhySucc() && lastHG->getExitBB())
2828 {
2829 (void)addLandingBB(lastHG, II, true);
2830 }
2831 #endif
2832 }
2833
2834 if (ANStack.empty() && node)
2835 {
2836 // This is a top ANode, add it.
2837 topANodes.push_back(node);
2838 node = nullptr;
2839 }
2840 ++II;
2841 } // while (!ANStack.empty())
2842
2843 MUST_BE_TRUE(ANStack.empty(), "ANodeHG stack is incorrectly formed");
2844 #ifdef _DEBUG
2845 #if 0
2846 dumpcfg();
2847 dumpacfg();
2848 #endif
2849 #endif
2850 return;
2851 }
2852
2853 // return true if bb0 is before bb1 in the BB list.
isBefore(G4_BB * bb0,G4_BB * bb1)2854 bool CFGStructurizer::isBefore(G4_BB *bb0, G4_BB *bb1)
2855 {
2856 MUST_BE_TRUE(bb0 && bb1, "BB ptrs should not be null");
2857 if (bb0 == bb1)
2858 {
2859 return false;
2860 }
2861
2862 // common case first
2863 uint32_t id0 = bb0->getId();
2864 uint32_t id1 = bb1->getId();
2865 MUST_BE_TRUE(id0 != id1, "Two different BBs must have different Ids");
2866
2867 if (id0 < numOfBBs && id1 < numOfBBs)
2868 {
2869 return id0 < id1;
2870 }
2871
2872 uint32_t insertAtId0 = id0 >= numOfBBs ? getInsertAfterBB(bb0)->getId() : id0;
2873 uint32_t insertAtId1 = id1 >= numOfBBs ? getInsertAfterBB(bb1)->getId() : id1;
2874 if (insertAtId0 != insertAtId1)
2875 {
2876 return insertAtId0 < insertAtId1;
2877 }
2878
2879 // Now, the case for a BB and its new BB or two new BBs with the same
2880 // insertion position.
2881 //
2882 // Since we make sure the larger the id, the later the position during
2883 // inserting new BBs. We will simply check their ids
2884 return id0 < id1;
2885 }
2886
2887
2888 // Insert newNode into parent and insertion place is after node
2889 // if isAfter is true, or before node if isAfter is false.
PSTAddANode(ANodeHG * parent,ANode * node,ANode * newNode,bool isAfter)2890 void CFGStructurizer::PSTAddANode(
2891 ANodeHG *parent, ANode *node, ANode *newNode, bool isAfter)
2892 {
2893 newNode->parent = parent;
2894 if (parent)
2895 {
2896 ANList::iterator I1 = findANode(parent->children, node);
2897 MUST_BE_TRUE(I1 != parent->children.end(),
2898 "Child node should be in parent's children list");
2899 if (isAfter)
2900 {
2901 ++I1;
2902 }
2903 parent->children.insert(I1, newNode);
2904 }
2905 }
2906
2907 // adjustBBId will make sure that all the new BB's with the same insertion
2908 // position should have ids in the increasing order.
2909 //
2910 // Before calling this function, newBB has to be inserted into
2911 // the BBs and its physicalSucc/Pred has been set up correctly.
adjustBBId(G4_BB * newbb)2912 void CFGStructurizer::adjustBBId(G4_BB *newbb)
2913 {
2914 // newBB's id is the largest at this moment. Make sure it is in order
2915 uint32_t id = newbb->getId();
2916 G4_BB *pred = newbb;
2917 G4_BB *next = newbb->getPhysicalSucc();
2918 while (next && next->getId() >= numOfBBs)
2919 {
2920 pred->setId(next->getId());
2921 pred = next;
2922 next = next->getPhysicalSucc();
2923 }
2924 pred->setId(id);
2925 }
2926
createBBWithLabel()2927 G4_BB *CFGStructurizer::createBBWithLabel()
2928 {
2929 G4_BB *newBB = CFG->createNewBB();
2930
2931 // Create a label for the new BB
2932 std::string str = std::string(CFG->builder->kernel.getName()) + "_cf_" + std::to_string(newBB->getId());
2933 G4_Label* lab = CFG->builder->createLabel(str, LABEL_BLOCK);
2934 G4_INST *labelInst = CFG->builder->createLabelInst(lab, false);
2935 newBB->push_front(labelInst);
2936 return newBB;
2937 }
2938
2939 // Insert an inst at the begin after a label instruction.
insertAtBegin(G4_BB * aBB,G4_INST * anInst)2940 void CFGStructurizer::insertAtBegin(G4_BB* aBB, G4_INST *anInst)
2941 {
2942 INST_LIST_ITER I = aBB->begin();
2943 MUST_BE_TRUE((*I)->opcode() == G4_label,
2944 "The 1st inst of BB must be a label inst");
2945 ++I;
2946 aBB->insertBefore(I, anInst);
2947 }
2948
setJoinJIP(G4_BB * joinBB,G4_BB * jipBB)2949 void CFGStructurizer::setJoinJIP(G4_BB* joinBB, G4_BB* jipBB)
2950 {
2951 G4_INST *firstInst = joinBB->getFirstInst();
2952 MUST_BE_TRUE(firstInst, "Missing a join inst in join BB!");
2953 if (firstInst &&
2954 (firstInst->opcode() == G4_join || firstInst->opcode() == G4_endif))
2955 {
2956 G4_Label *jipLabel = jipBB ? jipBB->getLabel() : nullptr;
2957 firstInst->asCFInst()->setJip(jipLabel);
2958 return;
2959 }
2960 MUST_BE_TRUE(false, "Missing Join/endif instruction in join BB!");
2961 }
2962
convertChildren(ANodeHG * nodehg,G4_BB * nextJoinBB)2963 void CFGStructurizer::convertChildren(ANodeHG *nodehg, G4_BB *nextJoinBB)
2964 {
2965 // JIP setting of Join instruction:
2966 // activeJoinBBs : a list of join BBs in the increasing order of BB layout,
2967 // which are after the BB being processed currently.
2968 //
2969 // The join BB is the BB with join instruction as its first instruction.
2970 // If the current BB is the first join BB (if exists) in acticveJoinBBs,
2971 // its JIP will be set to the 2nd join in activeJoinBBs. Once its JIP is
2972 // set, this BB is removed from activeJoinBBs.
2973 //
2974 // activeJoinBBs keeps a list of join BBs whose jips needs to be set.
2975 // It works like this: for each child ANode, if its exit BB is a join BB,
2976 // add it into activeJoinBBs (we check if a BB is a join BB by checking
2977 // if the BB has join instruction in it). As conversion is done bottom-up,
2978 // all children must have been converted before their parents. This means
2979 // that a join should be inserted in a child's exit BB if that child does
2980 // need join. But that join's JIP is not known until the next join is found.
2981 // Adding unknown-JIP join into activeJoinBBs so that it will get set when
2982 // next next join is reached.
2983 //
2984 BB_LIST activeJoinBBs;
2985 if (nextJoinBB)
2986 {
2987 // The incoming joinBB is a special one, it could
2988 // be a BB with its else/endif/while/join.
2989 activeJoinBBs.push_back(nextJoinBB);
2990 }
2991
2992 // If nodehg is DO_WHILE, its end should have been processed in convertDoWhile()
2993 // before invoking this function, hence, skip the end BB.
2994 ANList::iterator II = nodehg->children.begin();
2995 ANList::iterator IE = nodehg->children.end();
2996 if (nodehg->getKind() == ANKIND_SCF && nodehg->getType() == AN_DO_WHILE)
2997 {
2998 if (nodehg->children.size() > 1)
2999 {
3000 --IE;
3001 ANode *loopWhileNode = *IE;
3002 loopWhileNode->setVisited(true);
3003 }
3004 else
3005 {
3006 II = IE;
3007 }
3008 }
3009
3010 for (; II != IE; ++II)
3011 {
3012 ANode *nd = *II;
3013 G4_BB *begin = nd->getBeginBB();
3014
3015 G4_BB *nextjoin = nullptr;
3016 while (!activeJoinBBs.empty())
3017 {
3018 nextjoin = activeJoinBBs.front();
3019 if (begin == nextjoin || isBefore(nextjoin, begin))
3020 {
3021 // nextjoin has been reached. Set its JIP.
3022 activeJoinBBs.pop_front();
3023 G4_BB *tmp =
3024 !activeJoinBBs.empty() ? activeJoinBBs.front() : nullptr;
3025 setJoinJIP(nextjoin, tmp);
3026 }
3027 else
3028 {
3029 break;
3030 }
3031 }
3032
3033 // For loops, need to handle the backward goto and may insert join
3034 for (BB_LIST_ITER I1 = begin->Preds.begin(), E1 = begin->Preds.end();
3035 I1 != E1; ++I1)
3036 {
3037 // Don't process loopWhileNode if it is as it has been processed
3038 G4_BB *bb = *I1;
3039 G4_INST *gotoInst = getGotoInst(bb);
3040 ANodeBB *ndbb = getANodeBB(bb);
3041 ANode *nd1 = ndbb->getChildANode(nodehg);
3042 if (nd1 && nd != nd1 && gotoInst && !nd1->isVisited() &&
3043 gotoInst->asCFInst()->isBackward() &&
3044 bb->Succs.back() == begin)
3045 {
3046 convertPST(nd1, nullptr);
3047 nd1->setVisited(true);
3048 G4_BB *loopJoinBB = bb->getPhysicalSucc();
3049 if (loopJoinBB && isJoinBB(loopJoinBB))
3050 {
3051 // Add loop's join bb into activeJoinBBs
3052 if (findBB(&activeJoinBBs, loopJoinBB) == activeJoinBBs.end())
3053 {
3054 bb_oinsert(activeJoinBBs, loopJoinBB);
3055 }
3056 }
3057 }
3058 }
3059
3060 // Skip backward goto ANode, as it has been handled already
3061 if (nd->isVisited())
3062 {
3063 continue;
3064 }
3065
3066 nextjoin = activeJoinBBs.empty() ? nullptr : activeJoinBBs.front();
3067
3068 (void)convertPST(nd, nextjoin);
3069 nd->setVisited(true);
3070
3071 G4_BB *joinbb = nd->getExitBB();
3072 // Special case for ANodeBB of a break.
3073 // As its exit BB is still the original target (loop exit), there is
3074 // no join instruction needed at the target as it is implicit loop
3075 // join place. Here, just set joinbb to nullptr.
3076 // (need to refactor the code as SCF (except SEQUENCE) should not
3077 // have a join in its exit.)
3078 if (nd->getType() == AN_BB && nd->getKind() == ANKIND_SCF && nd->getHasBreak())
3079 {
3080 joinbb = nullptr;
3081 }
3082
3083 #if 0
3084 if (nd->getType() == AN_BB && begin->size() > 0)
3085 {
3086 G4_INST *gotoInst = begin->back();
3087 MUST_BE_TRUE(false, "Backward goto should have ben processed");
3088 if (gotoInst->opcode() == G4_goto && gotoInst->isBackward())
3089 {
3090 //
3091 MUST_BE_TRUE(false, "Backward goto should have ben processed");
3092 joinbb = begin->Succs.front();
3093 }
3094 }
3095 #endif
3096 if (joinbb && isJoinBB(joinbb))
3097 {
3098 // Add exit into activeJoinBBs
3099 if (findBB(&activeJoinBBs, joinbb) == activeJoinBBs.end())
3100 {
3101 bb_oinsert(activeJoinBBs, joinbb);
3102 }
3103 }
3104 }
3105
3106 uint32_t sz = (uint32_t)activeJoinBBs.size();
3107 #if 0
3108 // temporarily turn off assert as it is too conservative and miss
3109 // some cases, thus genrate false assertion!
3110
3111 // activeJoinBBs should have the following at this moment:
3112 // 1. nextJoinBB
3113 // 2. the nodehg's exit(s).
3114 // nodehg is a hammock, so it should have a single exit. However,
3115 // for something like "if (c) break endif", and nodehg is somehow
3116 // decided to be non-SCF (ie converted to goto), nodehg will have
3117 // two exits.
3118 // [todo: have a simple/better way to handle this]
3119 // Note that the outer node of PST may recalculate JIP.
3120 uint32_t NEntries = nextJoinBB ? 2 : 1;
3121 if (nodehg->getHasBreak() && !nodehg->getAllowSCF())
3122 {
3123 ++NEntries;
3124 }
3125 assert(sz <= NEntries && "Something is wrong!");
3126 #endif
3127 if (sz > 0)
3128 {
3129 // lastJoin will never be nullptr.
3130 G4_BB *lastJoin = activeJoinBBs.front();
3131 activeJoinBBs.pop_front();
3132 for (--sz; sz > 0; --sz)
3133 {
3134 if (lastJoin == nextJoinBB)
3135 {
3136 // don't need to process it further as the outer
3137 // ANode will handle nextJoinBB and nodes after it.
3138 break;
3139 }
3140 G4_BB *JipBB = activeJoinBBs.front();
3141 activeJoinBBs.pop_front();
3142 setJoinJIP(lastJoin, JipBB);
3143 lastJoin = JipBB;
3144 }
3145 // Handle the last one.
3146 if (lastJoin != nextJoinBB)
3147 {
3148 // set the last to nullptr
3149 setJoinJIP(lastJoin, nullptr);
3150 }
3151 }
3152 return;
3153 }
3154
3155 // TODO: more comments
3156 // If then block and else block may have gotos, we will insert join at the end of
3157 // then or else blocks as the new join exit.
convertIf(ANodeHG * node,G4_BB * nextJoinBB)3158 void CFGStructurizer::convertIf(ANodeHG *node, G4_BB *nextJoinBB)
3159 {
3160
3161 //#if defined(_DEBUG)
3162 // useDebugEnv();
3163 //#endif
3164
3165 // nextJoinLabel is used to set JIP for endif instruction
3166 G4_Label *nextJoinLabel = nextJoinBB ? nextJoinBB->getLabel() : nullptr;
3167 G4_BB *begin = node->getBeginBB();
3168 G4_INST *gotoInst = begin->back();
3169 G4_ExecSize execSize = gotoInst->getExecSize();
3170 execSize = execSize > 1 ? execSize : kernelExecSize;
3171 ANList::iterator II = node->children.begin();
3172 ANode *thenNode = *(++II); // children[1]
3173 G4_BB *end = node->getEndBB();
3174 G4_BB *exit = node->getExitBB();
3175
3176 MUST_BE_TRUE(end->getPhysicalSucc() == exit,
3177 "Landing BB should have been inserted during construction of hammock graph");
3178
3179 bool isUniform = isGotoScalarJmp(gotoInst);
3180 ANodeKind kind = ANKIND_GOTOJOIN;
3181 if (doScalarJmp && isUniform)
3182 {
3183 kind = ANKIND_JMPI;
3184 }
3185 else if (doStructCF && node->getAllowSCF())
3186 {
3187 if (node->getHasBreak())
3188 {
3189 ANodeHG *innermostWhile = getInnerMostWhile(node);
3190 MUST_BE_TRUE(innermostWhile, "if-endif with break isn't inside a while");
3191 if (innermostWhile->getKind() == ANKIND_SCF)
3192 {
3193 kind = ANKIND_SCF;
3194 }
3195 }
3196 else
3197 {
3198 kind = ANKIND_SCF;
3199 }
3200 }
3201 node->setKind(kind);
3202
3203
3204 // If this IF-ENDIF does not have break, there must be at least
3205 // one active channel enterning IF-ENDIF and at least one active
3206 // channel at the end of "IF-ENDIF". Thus endif's JIP will never
3207 // be used.
3208 if (!node->getHasBreak())
3209 {
3210 nextJoinLabel = nullptr;
3211 }
3212
3213 if (node->getHasBreak() && kind == ANKIND_GOTOJOIN)
3214 {
3215 // if break isn't honored, if-endif should not be
3216 // honored and should be treated as non if/endif node.
3217 convertChildren(node, nextJoinBB);
3218 return;
3219 }
3220
3221 ANodeBB *exitndbb = getANodeBB(exit);
3222 if (kind != ANKIND_JMPI && !isANodeOnlyPred(exit, node) &&
3223 (exitndbb->isJmpiTarget ||
3224 (kind == ANKIND_GOTOJOIN && !isJoinBB(exit)) ||
3225 (kind == ANKIND_SCF && !isBBLabelAvailable(exit))))
3226 {
3227 // To have a new BB for either endif or join
3228 exitndbb = addLandingBB(node, BBs->end(), false);
3229
3230 // Add it into PST. Will need {[SEQUENCE] Node, exitndbb} as
3231 // a new ANode to replace node.
3232 PSTAddANode((ANodeHG*)node->parent, node, exitndbb, true);
3233
3234 exit = exitndbb->getBeginBB();
3235 }
3236 G4_Label *endifLabel = exit->getLabel();
3237
3238 switch (node->type)
3239 {
3240 case AN_IF_THEN_ENDIF:
3241 {
3242 if (kind == ANKIND_JMPI)
3243 {
3244 CFG->convertGotoToJmpi(gotoInst);
3245 exitndbb->isJmpiTarget = true;
3246 (void)convertPST(thenNode, nextJoinBB);
3247 return;
3248 }
3249 else if (kind == ANKIND_GOTOJOIN)
3250 {
3251 // Generate goto/join
3252 generateGotoJoin(begin, exit, exit);
3253 setJoinJIP(exit, nextJoinBB);
3254 (void)convertPST(thenNode, node->getHasBreak() ? exit : nullptr);
3255 return;
3256 }
3257
3258 G4_INST* endifInst = CFG->builder->createInternalCFInst(
3259 NULL, G4_endif, execSize, nextJoinLabel, NULL, gotoInst->getOption());
3260 // Set CISA index link here explicitly since not doing so causes
3261 // endif to merge with immediately following line. This prevents
3262 // debugger from setting bp on src line immediately following
3263 // the endif.
3264 endifInst->inheritDIFrom(gotoInst);
3265 insertAtBegin(exit, endifInst);
3266
3267 // jip = uip = endif
3268 G4_Predicate* pred = gotoInst->getPredicate();
3269 MUST_BE_TRUE(pred != NULL, "if must have non-null predicate");
3270 pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3271 G4_INST* ifInst = CFG->builder->createInternalCFInst(
3272 pred, G4_if, execSize, endifLabel, endifLabel, gotoInst->getOption());
3273 ifInst->inheritDIFrom(endifInst);
3274 begin->pop_back();
3275 begin->push_back(ifInst);
3276 if (isUniform)
3277 {
3278 ifInst->asCFInst()->setUniform(true);
3279 }
3280
3281 if (thenNode->isHammock())
3282 {
3283 // if:
3284 // goto endif
3285 // ...
3286 // goto endif
3287 // else
3288 // ...
3289 // endif:
3290 //
3291 // changed to:
3292 // if:
3293 // goto joinbb
3294 // ...
3295 // goto joinbb
3296 // joinbb: <--- insert join instruction here
3297 // else
3298 // ...
3299 // endif:
3300 //
3301 ANodeBB *ndbb = addLandingBB((ANodeHG*)thenNode, BBs->end(), false);
3302
3303 // For now, just insert it into node (should create SEQ)
3304 PSTAddANode(node, thenNode, ndbb, true);
3305 }
3306 (void)convertPST(thenNode, node->getHasBreak() ? exit : nullptr);
3307
3308 break;
3309 }
3310
3311 case AN_IF_THEN_ELSE_ENDIF:
3312 {
3313 ANode *elseNode = *(++II); // children[2];
3314 G4_BB *thenLastBB = thenNode->getEndBB();
3315 G4_INST *thenLastInst = thenLastBB->back();
3316
3317 // May have the case in which thenLastInst isn't goto
3318 // if (uniformCond)
3319 // return;
3320 // else
3321 // ...
3322 // endif
3323 MUST_BE_TRUE(
3324 (thenLastInst->opcode() != G4_goto &&
3325 thenLastBB->Succs.size() == 0) ||
3326 (thenLastInst->opcode() == G4_goto &&
3327 thenLastInst->getPredicate() == nullptr),
3328 "Goto in then block should be unconditional");
3329
3330 if (kind == ANKIND_JMPI)
3331 {
3332 // goto's target is elseBB, and no need to create a new label.
3333 CFG->convertGotoToJmpi(gotoInst);
3334 {
3335 G4_BB *elsebeginbb = elseNode->getBeginBB();
3336 ANodeBB *elsendbb = getANodeBB(elsebeginbb);
3337 elsendbb->isJmpiTarget = true;
3338 }
3339 if (thenLastInst->opcode() == G4_goto)
3340 {
3341 CFG->convertGotoToJmpi(thenLastInst);
3342 exitndbb->isJmpiTarget = true;
3343 }
3344
3345 (void)convertPST(thenNode, nextJoinBB);
3346 (void)convertPST(elseNode, nextJoinBB);
3347 break;
3348 }
3349 else if (kind == ANKIND_GOTOJOIN)
3350 {
3351 // Generate goto/join
3352 G4_BB *elseFirstBB = elseNode->getBeginBB();
3353 generateGotoJoin(begin, elseFirstBB, elseFirstBB);
3354
3355 // convertPST(thenNode, elseFirstBB) will handle this
3356 // generateGotoJoin(thenLastBB, elseFirstBB, exit);
3357 (void)convertPST(thenNode, elseFirstBB);
3358 (void)convertPST(elseNode, exit);
3359 setJoinJIP(elseFirstBB, exit);
3360 setJoinJIP(exit, nextJoinBB);
3361
3362 return;
3363 }
3364
3365 // Add endif instruction into exit BB, else into the beginning
3366 // BB of else part, and replace goto in the begin BB with if
3367 // instruction.
3368 auto endifInst = CFG->builder->createInternalCFInst(
3369 NULL, G4_endif, execSize, nextJoinLabel, NULL, gotoInst->getOption());
3370 endifInst->inheritDIFrom(gotoInst);
3371 insertAtBegin(exit, endifInst);
3372
3373 G4_BB *elseFirstBB = elseNode->getBeginBB();
3374 G4_Label* elseLabel = elseFirstBB->getLabel();
3375 G4_Predicate* pred = gotoInst->getPredicate();
3376 MUST_BE_TRUE(pred != NULL, "if must have non-null predicate");
3377 pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3378
3379 // if instruction : jip = else_label, uip = endif
3380 G4_INST* ifInst = CFG->builder->createInternalCFInst(
3381 pred, G4_if, execSize, elseLabel, endifLabel, gotoInst->getOption());
3382 ifInst->inheritDIFrom(endifInst);
3383 begin->pop_back();
3384 begin->push_back(ifInst);
3385
3386 // else instruction: jip = uip = endif
3387 G4_BB *newThenLastBB = thenLastBB;
3388 if (thenNode->isHammock())
3389 {
3390 ANodeBB *ndbb = addLandingBB((ANodeHG*)thenNode, BBs->end(), false);
3391
3392 // Correct way to update PST is as {[SEQ]thenNode, newThenLastBB}
3393 // and replace thenNode with this new SEQ node.
3394 // For now, it does not matter much, just use the following
3395 PSTAddANode(node, thenNode, ndbb, true);
3396
3397 newThenLastBB = ndbb->getBeginBB();
3398 }
3399 G4_INST *thenGoto = newThenLastBB->back();
3400 G4_INST* elseInst = CFG->builder->createInternalCFInst(
3401 NULL, G4_else, execSize, endifLabel, endifLabel, gotoInst->getOption());
3402 elseInst->inheritDIFrom(endifInst);
3403 if (thenGoto->opcode() == G4_goto)
3404 {
3405 newThenLastBB->pop_back();
3406 }
3407 newThenLastBB->push_back(elseInst);
3408 if (isUniform)
3409 {
3410 ifInst->asCFInst()->setUniform(true);
3411 }
3412
3413 (void)convertPST(thenNode, node->getHasBreak() ? newThenLastBB : nullptr);
3414
3415 if (elseNode->isHammock())
3416 {
3417 // Insert a join bb
3418 ANode *ndbb = addLandingBB((ANodeHG*)elseNode, BBs->end(), false);
3419 PSTAddANode(node, thenNode, ndbb, true);
3420 }
3421 // exit must be the physical next of this if-endif
3422 (void)convertPST(elseNode, node->getHasBreak() ? exit : nullptr);
3423 break;
3424 }
3425 default:
3426 MUST_BE_TRUE(false, "Unreachable, must be a wrong node type");
3427 break;
3428 }
3429 return;
3430 }
3431
convertDoWhile(ANodeHG * node,G4_BB * nextJoinBB)3432 void CFGStructurizer::convertDoWhile(ANodeHG *node, G4_BB *nextJoinBB)
3433 {
3434
3435 //#if defined(_DEBUG)
3436 // useDebugEnv();
3437 //#endif
3438
3439 G4_BB *end = node->getEndBB();
3440 G4_INST *gotoInst = end->back();
3441 G4_ExecSize execSize = gotoInst->getExecSize();
3442 execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3443 G4_BB *head = node->getBeginBB();
3444 G4_BB *exit = node->getExitBB();
3445
3446 // For break/continue(no continue for now), unfortunately hammock does
3447 // not work. Need to specialize it. A generic approach is to
3448 // change all break/continue into a single ANode with its targets ignored
3449 // and just use its physical next as its next. Then re-do hammock only for
3450 // loop's children (no while BB). If every single break/continue are
3451 // within the structured no-loop CF, then it can be break/continue.
3452 // Note that mix of goto-join with break/continue may be not working, so
3453 // need to check it out to avoid generating them.
3454 //
3455
3456 bool isUniform = isGotoScalarJmp(gotoInst);
3457 ANodeKind ndkind = ANKIND_GOTOJOIN;
3458 if (doScalarJmp && isUniform &&
3459 (!doStructCF || !node->getHasBreak()))
3460 {
3461 // If loop has break, favor structured CF, not jmpi
3462 ndkind = ANKIND_JMPI;
3463 }
3464 else if (doStructCF && node->getAllowSCF())
3465 {
3466 ndkind = ANKIND_SCF;
3467 }
3468 node->setKind(ndkind);
3469 if (ndkind == ANKIND_JMPI)
3470 {
3471 // goto's target is head.
3472 CFG->convertGotoToJmpi(gotoInst);
3473 convertChildren(node, nextJoinBB);
3474 }
3475 else if (ndkind == ANKIND_SCF)
3476 {
3477 G4_Label* doLabel = head->getLabel();
3478
3479 // Create a new BB to keep while. This new BB
3480 // would be in the same PST ANode as the original end.
3481 ANode *ndbb = addSplitBBAtEnd(end);
3482 node->children.push_back(ndbb);
3483 ndbb->parent = node;
3484
3485 end = ndbb->getBeginBB();
3486
3487 // jip = uip = do
3488 G4_INST *whileInst = CFG->builder->createInternalCFInst(
3489 gotoInst->getPredicate(), G4_while, execSize, doLabel, doLabel, InstOpt_NoOpt);
3490 whileInst->inheritDIFrom(end->back());
3491 end->pop_back();
3492 end->push_back(whileInst);
3493 if (isUniform)
3494 {
3495 whileInst->asCFInst()->setUniform(true);
3496 }
3497
3498 convertChildren(node, node->getHasBreak() ? end : nullptr);
3499 }
3500 else
3501 {
3502 if (!isJoinBB(exit) && !canbeJoinBB(exit))
3503 {
3504 ANode *ndbb = addLandingBB(node, BBs->end(), false);
3505
3506 // Make it simply by adding newexitbb into node->parent.
3507 // (May use {[SEQ] node, newexitbb} to replace.)
3508 PSTAddANode((ANodeHG*)node->parent, node, ndbb, true);
3509
3510 exit = ndbb->getBeginBB();
3511 }
3512 generateGotoJoin(end, exit, exit);
3513 setJoinJIP(exit, nextJoinBB);
3514 getANodeBB(end)->setVisited(true);
3515
3516 convertChildren(node, exit);
3517 }
3518 }
3519
generateGotoJoin(G4_BB * gotoBB,G4_BB * jibBB,G4_BB * joinBB)3520 void CFGStructurizer::generateGotoJoin(G4_BB *gotoBB, G4_BB *jibBB, G4_BB *joinBB)
3521 {
3522 G4_INST *gotoInst = gotoBB->back();
3523 MUST_BE_TRUE(gotoInst && gotoInst->opcode() == G4_goto,
3524 "gotoBB should have goto instruction");
3525 G4_Label *nextJoinLabel = jibBB ? jibBB->getLabel() : nullptr;
3526 G4_ExecSize execSize = gotoInst->getExecSize();
3527 execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3528
3529 if (gotoInst->getExecSize() == g4::SIMD1)
3530 { // For simd1 goto, convert it to a goto with the right execSize.
3531 gotoInst->setExecSize(execSize);
3532 gotoInst->setOptions(InstOpt_M0);
3533 }
3534 gotoInst->asCFInst()->setJip(nextJoinLabel);
3535
3536 // set join's JIP to null as it will be handled later
3537 CFG->insertJoinToBB(joinBB, execSize, nullptr);
3538
3539 if (!gotoInst->asCFInst()->isBackward() && !CFG->builder->gotoJumpOnTrue())
3540 {
3541 // For BDW/SKL, need to flip goto's pred as false condition will jump.
3542 G4_Predicate *pred = gotoInst->getPredicate();
3543 if (pred)
3544 {
3545 pred->setState(pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus);
3546 }
3547 else
3548 {
3549 // if there is no predicate, generate a predicate with all 0s.
3550 // if predicate is SIMD32, we have to use a :ud dst type for the move
3551 uint8_t numFlags = gotoInst->getExecSize() > g4::SIMD16 ? 2 : 1;
3552 G4_Declare* tmpFlagDcl = CFG->builder->createTempFlag(numFlags);
3553 G4_DstRegRegion* newPredDef = CFG->builder->createDst(tmpFlagDcl->getRegVar(), 0, 0, 1,
3554 numFlags == 2 ? Type_UD : Type_UW);
3555 G4_INST *predInst = CFG->builder->createMov(
3556 g4::SIMD1,
3557 newPredDef,
3558 CFG->builder->createImm(0, Type_UW),
3559 InstOpt_WriteEnable, false);
3560 INST_LIST_ITER iter = gotoBB->end();
3561 iter--;
3562 gotoBB->insertBefore(iter, predInst);
3563
3564 pred = CFG->builder->createPredicate(
3565 PredState_Plus,
3566 tmpFlagDcl->getRegVar(),
3567 0);
3568 gotoInst->setPredicate(pred);
3569 }
3570 }
3571 }
3572
convertGoto(ANodeBB * node,G4_BB * nextJoinBB)3573 void CFGStructurizer::convertGoto(ANodeBB *node, G4_BB *nextJoinBB)
3574 {
3575 G4_BB *beginbb = node->getEndBB();
3576 G4_INST *gotoInst = getGotoInst(beginbb);
3577 if (!gotoInst || node->isVisited())
3578 {
3579 return;
3580 }
3581 node->setVisited(true);
3582
3583 bool isUniform = isGotoScalarJmp(gotoInst);
3584 G4_BB *exitbb = node->getExitBB();
3585 ANodeHG *innermostWhile = getInnerMostWhile(node);
3586 ANodeKind kind = ANKIND_GOTOJOIN;
3587 MUST_BE_TRUE(innermostWhile || (!innermostWhile && !node->getHasBreak()),
3588 "Break isn't inside a while");
3589 if (innermostWhile && node->getHasBreak() &&
3590 innermostWhile->getKind() == ANKIND_SCF)
3591 {
3592 kind = ANKIND_SCF;
3593 }
3594 else
3595 {
3596 // nextJoinBB, if any, must be after getBeginBB(). So, if nextJoinBB
3597 // is before exitbb, nextJoinBB is in the middle of [beginbb, exitbb]
3598 bool hasJoinInMiddle = nextJoinBB && isBefore(nextJoinBB, exitbb);
3599 if (doScalarJmp && isUniform &&
3600 (gotoInst->asCFInst()->isBackward() || !hasJoinInMiddle))
3601 {
3602 kind = ANKIND_JMPI;
3603 }
3604 }
3605 node->setKind(kind);
3606
3607 //
3608 // generate goto/join
3609 //
3610 G4_ExecSize execSize = gotoInst->getExecSize();
3611 execSize = execSize > g4::SIMD1 ? execSize : kernelExecSize;
3612
3613 if (kind == ANKIND_JMPI)
3614 {
3615 CFG->convertGotoToJmpi(gotoInst);
3616 {
3617 ANodeBB *exitndbb = getANodeBB(exitbb);
3618 exitndbb->isJmpiTarget = true;
3619 }
3620
3621 if (beginbb == exitbb || isBefore(exitbb, beginbb))
3622 {
3623 // TODO: Should find the 1st join BB out of this loop instead
3624 // of using the next BB as join BB (see comments in
3625 // convertChildren().
3626 G4_BB *joinbb = beginbb->getPhysicalSucc();
3627
3628 // set join's JIP to null as it will be handled later
3629 CFG->insertJoinToBB(joinbb, execSize, nullptr);
3630 }
3631 return;
3632 }
3633
3634 if (kind == ANKIND_SCF)
3635 {
3636 G4_BB *whilebb = innermostWhile->getEndBB();
3637 G4_BB *innerBlock = nextJoinBB ? nextJoinBB : whilebb;
3638
3639 G4_INST* breakInst = CFG->builder->createInternalCFInst(
3640 gotoInst->getPredicate(), G4_break, execSize,
3641 innerBlock->getLabel(), whilebb->getLabel(), InstOpt_NoOpt);
3642 breakInst->inheritDIFrom(gotoInst);
3643 beginbb->pop_back();
3644 beginbb->push_back(breakInst);
3645 if (isUniform)
3646 {
3647 breakInst->asCFInst()->setUniform(true);
3648 }
3649 return;
3650 }
3651
3652 G4_BB *joinbb = exitbb;
3653 G4_BB *JipBB = joinbb;
3654 if (beginbb == exitbb || isBefore(exitbb, beginbb))
3655 {
3656 joinbb = beginbb->getPhysicalSucc();
3657 JipBB = joinbb;
3658 }
3659 else{
3660 if (nextJoinBB && isBefore(nextJoinBB, JipBB))
3661 {
3662 JipBB = nextJoinBB;
3663 }
3664 }
3665 generateGotoJoin(beginbb, JipBB, joinbb);
3666 }
3667
convertPST(ANode * node,G4_BB * nextJoinBB)3668 bool CFGStructurizer::convertPST(ANode *node, G4_BB *nextJoinBB)
3669 {
3670 bool change = false;
3671 ANodeType ty = node->getType();
3672 if (ty == AN_IF_THEN_ENDIF ||
3673 ty == AN_IF_THEN_ELSE_ENDIF ||
3674 ty == AN_DO_WHILE ||
3675 ty == AN_SEQUENCE ||
3676 ty == AN_COMPOSITE)
3677 {
3678 ANodeHG *nodehg = (ANodeHG*)node;
3679 if (node->type == AN_DO_WHILE)
3680 {
3681 // For a while loop, it must have at least one active
3682 // channel at exit. Thus, passing nullptr as nextJoinBB!
3683 convertDoWhile(nodehg, nullptr);
3684 }
3685 else if (node->type == AN_IF_THEN_ENDIF ||
3686 node->type == AN_IF_THEN_ELSE_ENDIF)
3687 {
3688 convertIf(nodehg, nextJoinBB);
3689 }
3690 else
3691 {
3692 convertChildren(nodehg, nextJoinBB);
3693 }
3694 change = true;
3695 }
3696 else if (ty == AN_BB)
3697 {
3698 convertGoto((ANodeBB*)node, nextJoinBB);
3699 change = true;
3700 }
3701 else
3702 {
3703 MUST_BE_TRUE(false, "Wrong ANode type");
3704 }
3705
3706 return change;
3707 }
3708
3709 // Get the debug info from DIFromBB and use it as I's debug info
updateInstDI(G4_INST * I,G4_BB * DIFromBB)3710 void CFGStructurizer::updateInstDI(G4_INST* I, G4_BB* DIFromBB)
3711 {
3712 // sanity check
3713 if (DIFromBB == nullptr) return;
3714
3715 // If DIFromBB is empty, skip setting.
3716 G4_INST* DIFromInst = nullptr;
3717 for (auto II = DIFromBB->begin(), IE = DIFromBB->end(); II != IE; ++II)
3718 {
3719 G4_INST* Inst = (*II);
3720 if (Inst->isLabel() || Inst->isLifeTimeEnd() || Inst->isPseudoKill() ||
3721 Inst->isPseudoUse() || Inst->isSpillIntrinsic() || Inst->isFillIntrinsic())
3722 {
3723 continue;
3724 }
3725 DIFromInst = Inst;
3726 break;
3727 }
3728 if (DIFromInst)
3729 {
3730 I->inheritDIFrom(DIFromInst);
3731 }
3732 }
3733
run()3734 void CFGStructurizer::run()
3735 {
3736 constructPST(CFG->begin(), CFG->end());
3737
3738 if (topANodes.size() == 0)
3739 {
3740 return;
3741 }
3742
3743 // Now, convert ANodes
3744 if (topANodes.size() > 0)
3745 {
3746 for (uint32_t i = 0, size = topANodes.size(); i < size; ++i)
3747 {
3748 (void)convertPST(topANodes[i], nullptr);
3749 }
3750 }
3751
3752 if (anodeBBs.newBBToANodeBB.size() > 0)
3753 {
3754 // New BBs generated, reassign block Ids.
3755 CFG->reassignBlockIDs();
3756 }
3757 }
3758
doCFGStructurize(FlowGraph * FG)3759 void doCFGStructurize(FlowGraph *FG)
3760 {
3761 CFGStructurizer S(FG);
3762
3763 S.run();
3764 }
3765