1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef FLOWGRAPH_H 10 #define FLOWGRAPH_H 11 12 #include "VISADefines.h" 13 #include "G4_BB.hpp" 14 #include "G4_IR.hpp" 15 #include "RelocationInfo.h" 16 #include "LoopAnalysis.h" 17 18 #include <list> 19 #include <map> 20 #include <ostream> 21 #include <set> 22 #include <string> 23 #include <unordered_set> 24 #include <unordered_map> 25 #include <vector> 26 27 namespace vISA 28 { 29 class FlowGraph; 30 class G4_BB; 31 class G4_Kernel; 32 class IR_Builder; 33 class PhyRegSummary; 34 class VarSplitPass; 35 36 // 37 // FuncInfo - Function CFG information 38 // This class maintains a CFG summary of the function (its INIT block, EXIT block and 39 // number of call sites). The functions's INIT block will contain a pointer to its 40 // related FuncInfo object. The FuncInfo definition is used for inter-procedural liveness 41 // analysis (IPA). 42 class FuncInfo 43 { 44 private: 45 unsigned id; // the function id 46 G4_BB* initBB; // the init node 47 G4_BB* exitBB; // the exit node 48 unsigned callCount; // the number of call sites 49 50 std::vector<G4_BB*> BBList; // the list of BBs 51 std::list<FuncInfo *> callees; // the list of callees 52 unsigned scopeID; // the function scope ID 53 54 bool visited; 55 unsigned preID; 56 unsigned postID; 57 58 public: 59 FuncInfo(unsigned p_id,G4_BB * p_initBB,G4_BB * p_exitBB)60 FuncInfo(unsigned p_id, G4_BB* p_initBB, G4_BB* p_exitBB) 61 : id(p_id), initBB(p_initBB), exitBB(p_exitBB), callCount(1) 62 , scopeID(0), visited(false), preID(0), postID(0) 63 { 64 } 65 ~FuncInfo()66 ~FuncInfo() 67 { 68 BBList.clear(); 69 callees.clear(); 70 } 71 new(size_t sz,Mem_Manager & m)72 void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 73 doIPA()74 bool doIPA() const {return callCount > 1;} 75 getId()76 unsigned getId() const {return id;} setId(unsigned val)77 void setId(unsigned val) {id = val;} 78 getInitBB()79 G4_BB* getInitBB() const {return initBB;} getExitBB()80 G4_BB* getExitBB() const {return exitBB;} 81 incrementCallCount()82 void incrementCallCount() {++callCount;} 83 updateInitBB(G4_BB * p_initBB)84 void updateInitBB(G4_BB* p_initBB) {initBB = p_initBB;} updateExitBB(G4_BB * p_exitBB)85 void updateExitBB(G4_BB* p_exitBB) {exitBB = p_exitBB;} 86 addCallee(FuncInfo * fn)87 void addCallee(FuncInfo *fn) {callees.push_back(fn);} getCallees()88 std::list<FuncInfo *>& getCallees() {return callees;} 89 addBB(G4_BB * bb)90 void addBB(G4_BB* bb) {BBList.push_back(bb);} getBBList()91 std::vector<G4_BB*>& getBBList() {return BBList;} 92 getScopeID()93 unsigned getScopeID() const {return scopeID;} setScopeID(unsigned id)94 void setScopeID(unsigned id) {scopeID = id;} 95 getVisited()96 bool getVisited() const {return visited;} setVisited()97 void setVisited() {visited = true;} 98 getPreID()99 unsigned getPreID() const {return preID;} setPreID(unsigned id)100 void setPreID(unsigned id) {preID = id;} 101 getPostID()102 unsigned getPostID() const {return postID;} setPostID(unsigned id)103 void setPostID(unsigned id) {postID = id;} 104 105 void dump() const; 106 }; // FuncInfo 107 108 109 110 /// 111 /// A hashtable of <declare, node> where every node is a vector of 112 /// {LB, RB} (left-bounds and right-bounds) 113 /// A source operand (either SrcRegRegion or Predicate) is considered to be global 114 /// if it is not fully defined in one BB 115 /// 116 class GlobalOpndHashTable 117 { 118 Mem_Manager& mem; 119 std_arena_based_allocator<uint32_t> private_arena_allocator; 120 packBound(uint16_t lb,uint16_t rb)121 static uint32_t packBound(uint16_t lb, uint16_t rb) 122 { 123 return (rb << 16) + lb; 124 } 125 getLB(uint32_t value)126 static uint16_t getLB(uint32_t value) 127 { 128 return (uint16_t) (value & 0xFFFF); 129 } getRB(uint32_t value)130 static uint16_t getRB(uint32_t value) 131 { 132 return (uint16_t) (value >> 16); 133 } 134 135 struct HashNode 136 { 137 // each elements is {LB, RB} pair where [0:15] is LB and [16:31] is RB 138 std::vector<uint32_t, std_arena_based_allocator<uint32_t>> bounds; 139 HashNodeHashNode140 HashNode(uint16_t lb, uint16_t rb, std_arena_based_allocator<uint32_t>& m) 141 : bounds(m) 142 { 143 bounds.push_back(packBound(lb, rb)); 144 } 145 newHashNode146 void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 147 148 void insert(uint16_t newLB, uint16_t newRB); 149 bool isInNode(uint16_t lb, uint16_t rb) const; 150 }; 151 152 // "global" refers to declares with elements that are used without a preceding define in the same BB 153 std::map<G4_Declare*, HashNode*> globalVars; 154 // for debugging it's often useful to dump out the global operands, not just declares. 155 // Note that this may not be an exhaustive list, for example it does not cover dst global operands; 156 // for accuracy one should use isOpndGlobal() 157 std::vector<G4_Operand*> globalOpnds; 158 159 public: GlobalOpndHashTable(Mem_Manager & m)160 GlobalOpndHashTable(Mem_Manager& m) : mem(m) { } 161 162 void addGlobalOpnd(G4_Operand * opnd); 163 // returns true if def may possibly define a global variable 164 bool isOpndGlobal(G4_Operand * def) const; 165 void clearHashTable(); 166 167 void dump(std::ostream &os = std::cerr) const; 168 }; // GlobalOpndHashTable 169 170 // 171 // A table mapping the subroutine (INIT) block id's to their FuncInfo nodes. 172 // 173 typedef std::unordered_map<int, FuncInfo*> FuncInfoHashTable; 174 typedef std::unordered_map<G4_Label*, G4_BB*> Label_BB_Map; 175 176 class FlowGraph 177 { 178 // This list maintains the ordering of the basic blocks (i.e., asm and binary emission will output 179 // the blocks in list oder. 180 // Important: Due to the nature of SIMD CF, it is unsafe to change the order of basic blocks 181 // Once the list is populated in constructFlowGraph(), the only changes allowed are 182 // 1. insertion of new exit BBs due to handleExit/handleReturn/handleFRet. The exit BB 183 // must be the last BB for the kernel/subroutine/function it belongs to 184 // 2. deletion of unreachable blocks 185 // 3. merging of blocks that only contain one label with its (single) successor 186 // If you need to change the block ordering for any reason, create another data structure instead of 187 // modifying this one 188 BB_LIST BBs; 189 190 unsigned traversalNum; // used for flow graph traversals 191 unsigned numBBId; // number of basic blocks 192 bool reducible; // reducibility of the graph 193 bool doIPA; // requires inter-procedural liveness analysis 194 bool hasStackCalls; // indicates that the flowgraph contains STACK_CALL calls 195 bool isStackCallFunc; // indicates the function itself is a STACK_CALL function 196 unsigned autoLabelId; 197 G4_Kernel* pKernel; // back pointer to the kernel object 198 199 // map each BB to its local RA GRF usage summary, populated in local RA 200 std::map<G4_BB*, PhyRegSummary*> bbLocalRAMap; 201 // vector of summaries created for each BB, needed for deallocation later 202 std::vector<PhyRegSummary*> localRASummaries; 203 204 // list of all BBs ever created 205 // This list only grows and is freed when the FlowGraph is destroyed 206 std::vector<G4_BB*> BBAllocList; 207 208 // stores all INST that may be target of indirect jump. Currently these inst must be jmpi themselves 209 std::unordered_set<G4_INST*> indirectJmpTarget; 210 211 // stores all endift inst that have labels associated with it 212 std::unordered_map<G4_INST*, G4_Label*> endifWithLabels; 213 214 // label to subroutine BB's map. This is used to add edges between subroutine caller/callee 215 // ToDo: We should use FuncInfo instead, but at the time it was needed FuncInfo was not constructed yet.. 216 std::unordered_map<G4_Label*, std::vector<G4_BB*>> subroutines; 217 218 vISA::Dominator dom; 219 vISA::ImmDominator immDom; 220 vISA::PostDom pDom; 221 vISA::LoopDetection loops; 222 223 public: 224 typedef std::pair<G4_BB*, G4_BB*> Edge; 225 typedef std::set<G4_BB*> Blocks; 226 typedef std::map<Edge, Blocks> Loop; 227 228 Mem_Manager& mem; // mem mananger for creating BBs & starting IP table 229 INST_LIST_NODE_ALLOCATOR& instListAlloc; // a reference to dedicated mem allocator for holding instruction list nodes 230 231 std::list<Edge> backEdges; // list of all backedges (tail->head) 232 Loop naturalLoops; 233 234 // function info nodes. entry function is not included. 235 std::vector<FuncInfo*> funcInfoTable; 236 237 std::vector<FuncInfo*> sortedFuncTable; // subroutines in reverse topographical order (leaf at top) 238 // kernelInfo is the last element with invalid func id 239 240 FuncInfo* kernelInfo; // the call info for the kernel function 241 242 IR_Builder *builder; // needed to create new instructions (mainly labels) 243 GlobalOpndHashTable globalOpndHT; 244 245 G4_Declare * framePtrDcl; 246 G4_Declare * stackPtrDcl; 247 G4_Declare * scratchRegDcl; 248 G4_Declare * pseudoVCEDcl; 249 250 // pseudo declares used by RA to model the save/restore variables at each call site 251 struct PseudoDcls 252 { 253 G4_Declare* VCA; 254 G4_Declare* A0; 255 G4_Declare* Flag; 256 }; 257 258 std::unordered_map<G4_InstCF*, struct PseudoDcls> fcallToPseudoDclMap; 259 260 // offset in unit of OW 261 unsigned callerSaveAreaOffset = 0; 262 unsigned calleeSaveAreaOffset = 0; 263 unsigned frameSizeInOWord = 0; 264 265 // Bank conflict statistics. 266 struct BankConflictStatistics 267 { 268 unsigned NumOfGoodInsts = 0; 269 unsigned NumOfBadInsts = 0; 270 unsigned NumOfOKInsts = 0; 271 addGoodBankConflictStatistics272 void addGood() { ++NumOfGoodInsts; } addBadBankConflictStatistics273 void addBad() { ++NumOfBadInsts; } addOKBankConflictStatistics274 void addOK() { ++NumOfOKInsts; } clearBankConflictStatistics275 void clear() 276 { 277 NumOfGoodInsts = 0; 278 NumOfBadInsts = 0; 279 NumOfOKInsts = 0; 280 } 281 } BCStats; 282 283 struct XeBankConflictStatistics 284 { 285 unsigned simd8 = 0; //The number of simd8 instructions, one simd16 is treated as two simd8 if it's not HF 286 unsigned BCNum = 0; //The number of band conflict. 287 int sameBankConflicts = 0; 288 int simd16ReadSuppression = 0; 289 int twoSrcBC = 0; 290 clearXeBankConflictStatistics291 void clear() 292 { 293 simd8 = 0; 294 BCNum = 0; 295 sameBankConflicts = 0; 296 simd16ReadSuppression = 0; 297 twoSrcBC = 0; 298 } addBCXeBankConflictStatistics299 void addBC(unsigned num) { BCNum += num; } addSameBankBCXeBankConflictStatistics300 void addSameBankBC(unsigned num) { sameBankConflicts += num; } addSimd16RSBCXeBankConflictStatistics301 void addSimd16RSBC(unsigned num) { simd16ReadSuppression += num; } add2SrcBCXeBankConflictStatistics302 void add2SrcBC(unsigned num) { twoSrcBC += num; } addSIMD8XeBankConflictStatistics303 void addSIMD8() { ++simd8; } 304 305 } XeBCStats; 306 unsigned numRMWs = 0; // counting the number of read-modify-write 307 public: 308 // forwarding functions to the BBs list begin()309 BB_LIST_ITER begin() { return BBs.begin(); } end()310 BB_LIST_ITER end() { return BBs.end(); } rbegin()311 BB_LIST::reverse_iterator rbegin() { return BBs.rbegin(); } rend()312 BB_LIST::reverse_iterator rend() { return BBs.rend(); } cbegin()313 BB_LIST::const_iterator cbegin() const { return BBs.cbegin(); } cend()314 BB_LIST::const_iterator cend() const { return BBs.cend(); } crbegin()315 BB_LIST::const_reverse_iterator crbegin() const { return BBs.crbegin(); } crend()316 BB_LIST::const_reverse_iterator crend() const { return BBs.crend(); } 317 size()318 size_t size() { return BBs.size(); } empty()319 bool empty() const { return BBs.empty(); } back()320 G4_BB* back() const {return BBs.back(); } 321 322 static void setPhysicalLink(G4_BB* pred, G4_BB* succ); 323 324 BB_LIST_ITER insert(BB_LIST_ITER iter, G4_BB* bb); 325 326 push_back(G4_BB * bb)327 void push_back(G4_BB* bb) {insert(BBs.end(), bb);} 328 329 void erase(BB_LIST_ITER iter); 330 getBBList()331 BB_LIST& getBBList() { return BBs; } 332 333 // add BB to be the first BB 334 void addPrologBB(G4_BB* BB); 335 336 337 // append another CFG's BBs to this CFG. 338 // note that we don't add additional CFG edges as its purpose is just to 339 // stitch the two binaries togather 340 void append(const FlowGraph& otherFG); 341 342 G4_BB* getLabelBB(Label_BB_Map& map, G4_Label* label); 343 G4_BB* beginBB(Label_BB_Map& map, G4_INST* first); 344 performIPA()345 bool performIPA() const {return doIPA;} 346 getHasStackCalls()347 bool getHasStackCalls() const { return hasStackCalls; } setHasStackCalls()348 void setHasStackCalls() { hasStackCalls = true; } resetHasStackCalls()349 void resetHasStackCalls() { hasStackCalls = false;} 350 getIsStackCallFunc()351 bool getIsStackCallFunc() const {return isStackCallFunc;} setIsStackCallFunc()352 void setIsStackCallFunc() {isStackCallFunc = true;} 353 getKernel()354 G4_Kernel* getKernel() {return pKernel;} 355 356 void mergeFReturns(); 357 getFramePtrDcl()358 G4_Declare*& getFramePtrDcl() {return framePtrDcl;} getStackPtrDcl()359 G4_Declare*& getStackPtrDcl() {return stackPtrDcl;} getScratchRegDcl()360 G4_Declare*& getScratchRegDcl() {return scratchRegDcl;} 361 isPseudoVCEDcl(const G4_Declare * dcl)362 bool isPseudoVCEDcl(const G4_Declare* dcl) const { return dcl == pseudoVCEDcl; } isPseudoVCADcl(const G4_Declare * dcl)363 bool isPseudoVCADcl(const G4_Declare* dcl) const 364 { 365 for (auto iter : fcallToPseudoDclMap) 366 { 367 if (iter.second.VCA == dcl) 368 { 369 return true; 370 } 371 } 372 return false; 373 } isPseudoA0Dcl(const G4_Declare * dcl)374 bool isPseudoA0Dcl(const G4_Declare* dcl) const 375 { 376 for (auto iter : fcallToPseudoDclMap) 377 { 378 if (iter.second.A0 == dcl) 379 { 380 return true; 381 } 382 } 383 return false; 384 } isPseudoFlagDcl(const G4_Declare * dcl)385 bool isPseudoFlagDcl(const G4_Declare* dcl) const 386 { 387 for (auto iter : fcallToPseudoDclMap) 388 { 389 if (iter.second.Flag == dcl) 390 { 391 return true; 392 } 393 } 394 return false; 395 } isPseudoDcl(const G4_Declare * dcl)396 bool isPseudoDcl(const G4_Declare* dcl) const 397 { 398 if (!getHasStackCalls() && !getIsStackCallFunc()) 399 { 400 return false; 401 } 402 if (isPseudoVCEDcl(dcl)) 403 { 404 return true; 405 } 406 for (auto iter : fcallToPseudoDclMap) 407 { 408 if (iter.second.A0 == dcl || iter.second.Flag == dcl || iter.second.VCA == dcl) 409 { 410 return true; 411 } 412 } 413 return false; 414 } 415 416 // 417 // Merge multiple returns into one, prepare for spill code insertion 418 // 419 void mergeReturn(FuncInfoHashTable& funcInfoTable); 420 G4_BB* mergeSubRoutineReturn(G4_Label* subroutine); 421 void normalizeSubRoutineBB(FuncInfoHashTable& funcInfoTable); 422 void processGoto(bool HasSIMDCF); 423 void processSCF(FuncInfoHashTable& FuncInfoMap); 424 void insertJoinToBB(G4_BB* bb, G4_ExecSize execSize, G4_Label* jip); 425 426 // functions for structure analysis getKernel()427 G4_Kernel *getKernel() const { return pKernel; } 428 G4_Label* insertEndif(G4_BB* bb, G4_ExecSize execSize, bool createLabel); 429 void setJIPForEndif(G4_INST* endif, G4_INST* target, G4_BB* targetBB); 430 void convertGotoToJmpi(G4_INST *gotoInst); 431 G4_BB* getSinglePredecessor(G4_BB* BB, G4_BB* ExcludedPred) const; 432 bool convertJmpiToGoto(); 433 getNumFuncs()434 unsigned getNumFuncs() const {return unsigned(funcInfoTable.size());} 435 436 void handleReturn(Label_BB_Map& map, FuncInfoHashTable& funcInfoTable); 437 void linkReturnAddr(G4_BB* bb, G4_BB* returnAddr); 438 439 void handleExit(G4_BB* lastKernelBB); 440 void handleWait(); 441 442 void preprocess(INST_LIST& instlist); 443 444 FlowGraph() = delete; 445 FlowGraph(const FlowGraph&) = delete; 446 FlowGraph& operator=(const FlowGraph&) = delete; 447 FlowGraph(INST_LIST_NODE_ALLOCATOR & alloc,G4_Kernel * kernel,Mem_Manager & m)448 FlowGraph(INST_LIST_NODE_ALLOCATOR& alloc, G4_Kernel* kernel, Mem_Manager& m) : 449 traversalNum(0), numBBId(0), reducible(true), 450 doIPA(false), hasStackCalls(false), isStackCallFunc(false), autoLabelId(0), 451 pKernel(kernel), mem(m), instListAlloc(alloc), 452 kernelInfo(NULL), builder(NULL), globalOpndHT(m), framePtrDcl(NULL), 453 stackPtrDcl(NULL), scratchRegDcl(NULL), pseudoVCEDcl(NULL), 454 dom(*kernel), immDom(*kernel), pDom(*kernel), loops(*kernel) {} 455 456 ~FlowGraph(); 457 setBuilder(IR_Builder * pBuilder)458 void setBuilder(IR_Builder *pBuilder) {builder = pBuilder;} 459 460 void addPredSuccEdges(G4_BB* pred, G4_BB* succ, bool tofront=true) 461 { 462 markStale(); 463 464 if (tofront) 465 pred->Succs.push_front(succ); 466 else 467 pred->Succs.push_back(succ); 468 469 succ->Preds.push_front(pred); 470 } 471 472 void addUniquePredSuccEdges(G4_BB* pred, G4_BB* succ, bool tofront=true) 473 { 474 // like above, but check for duplicate edges 475 auto iter = std::find(pred->Succs.begin(), pred->Succs.end(), succ); 476 if (iter == pred->Succs.end()) 477 { 478 addPredSuccEdges(pred, succ, tofront); 479 } 480 } 481 482 void removePredSuccEdges(G4_BB* pred, G4_BB* succ); 483 484 G4_INST* createNewLabelInst(G4_Label* label, int lineNo = 0, int CISAOff = -1); 485 486 G4_BB* createNewBB(bool insertInFG = true); 487 G4_BB* createNewBBWithLabel(const char* LabelPrefix, int Lineno = 0, int CISAoff = -1); 488 int64_t insertDummyUUIDMov(); 489 // 490 // Increase by one so that all BBs' traversal are less than traversalNum 491 // prepareTraversal()492 void prepareTraversal() {traversalNum++;} getTraversalNum()493 unsigned getTraversalNum() {return traversalNum;} 494 495 // 496 // Check if the graph is reducible 497 // isReducible()498 bool isReducible() { return reducible; } 499 500 // 501 // Remove any placeholder empty blocks that could have been inserted to aid analysis 502 // 503 void removeRedundantLabels(); 504 // 505 // remove any mov with the same src and dst opnds 506 // 507 void removeRedundMov(); 508 // 509 // Remove any placeholder empty blocks that could have been inserted to aid analysis. 510 // 511 void removeEmptyBlocks(); 512 // 513 // Add a dummy BB for multiple-exit flow graph 514 // 515 void linkDummyBB(); 516 // 517 // Re-assign block ID so that we can use id to determine the ordering of two blocks in 518 // the code layout 519 // 520 void reassignBlockIDs(); 521 522 // 523 // Remove blocks that are unreachable via control flow of program 524 // 525 void removeUnreachableBlocks(FuncInfoHashTable& funcInfoHT); 526 527 void constructFlowGraph(INST_LIST& instlist); 528 bool matchBranch(int &sn, INST_LIST& instlist, INST_LIST_ITER &it); 529 530 void localDataFlowAnalysis(); 531 void resetLocalDataFlowData(); 532 getNumBB()533 unsigned getNumBB() const {return numBBId;} getEntryBB()534 G4_BB* getEntryBB() {return BBs.front();} 535 536 void addFrameSetupDeclares(IR_Builder& builder, PhyRegPool& regPool); 537 void addSaveRestorePseudoDeclares(IR_Builder& builder); 538 void markDivergentBBs(); 539 540 // Used for CISA 3.0 incrementNumBBs()541 void incrementNumBBs() { numBBId++ ; } 542 G4_BB* getUniqueReturnBlock(); 543 544 void normalizeFlowGraph(); 545 546 // This is mainly used to link subroutine call-return BBs 547 // ToDo: maintain this during BB add/delete instead of having to call it explicitly 548 void setPhysicalPredSucc(); 549 550 void markRPOTraversal(); 551 552 void findBackEdges(); 553 void findNaturalLoops(); 554 555 void traverseFunc(FuncInfo* func, unsigned *ptr); 556 void topologicalSortCallGraph(); 557 void findDominators(std::map<FuncInfo*, std::set<FuncInfo*>>& domMap); 558 unsigned resolveVarScope(G4_Declare* dcl, FuncInfo* func); 559 void markVarScope(std::vector<G4_BB*>& BBList, FuncInfo* func); 560 void markScope(); 561 562 void addSIMDEdges(); 563 addBBLRASummary(G4_BB * bb,PhyRegSummary * summary)564 void addBBLRASummary(G4_BB* bb, PhyRegSummary* summary) 565 { 566 bbLocalRAMap.insert(std::make_pair(bb, summary)); 567 localRASummaries.push_back(summary); 568 } 569 clearBBLRASummaries()570 void clearBBLRASummaries() 571 { 572 bbLocalRAMap.clear(); 573 } 574 getBBLRASummary(G4_BB * bb)575 PhyRegSummary* getBBLRASummary(G4_BB* bb) const 576 { 577 auto&& iter = bbLocalRAMap.find(bb); 578 return iter != bbLocalRAMap.end() ? iter->second : nullptr; 579 } 580 getNumCalls()581 uint32_t getNumCalls() const 582 { 583 uint32_t numCalls = 0; 584 for (auto bb : BBs) 585 { 586 if (bb->isEndWithCall()) 587 { 588 ++numCalls; 589 } 590 } 591 return numCalls; 592 } 593 isIndirectJmpTarget(G4_INST * inst)594 bool isIndirectJmpTarget(G4_INST* inst) const 595 { 596 return indirectJmpTarget.count(inst) > 0; 597 } 598 getLabelForEndif(G4_INST * inst)599 G4_Label* getLabelForEndif(G4_INST* inst) const 600 { 601 auto iter = endifWithLabels.find(inst); 602 if (iter != endifWithLabels.end()) 603 { 604 return iter->second; 605 } 606 else 607 { 608 return nullptr; 609 } 610 } 611 endWithGotoInLastBB()612 bool endWithGotoInLastBB() const 613 { 614 if (BBs.empty()) 615 { 616 return false; 617 } 618 G4_BB* lastBB = back(); 619 return lastBB->isEndWithGoto(); 620 } 621 622 /// Return true if PredBB->SuccBB is a backward branch goto/jmpi/while. isBackwardBranch(G4_BB * PredBB,G4_BB * SuccBB)623 bool isBackwardBranch(G4_BB* PredBB, G4_BB* SuccBB) const 624 { 625 if (PredBB->size() == 0) return false; 626 G4_INST* bInst = PredBB->back(); 627 G4_BB* targetBB = PredBB->Succs.size() > 0 ? PredBB->Succs.back() : nullptr; 628 bool isBr = (bInst->opcode() == G4_goto || bInst->opcode() == G4_jmpi); 629 // Note that isBackward() should return true for while as well. 630 return targetBB == SuccBB && 631 ((isBr && bInst->asCFInst()->isBackward()) || bInst->opcode() == G4_while); 632 } 633 634 void setABIForStackCallFunctionCalls(); 635 636 // This is for TGL WA 637 void findNestedDivergentBBs(std::unordered_map<G4_BB*, int>& nestedDivergentBBs); 638 639 void print(std::ostream& OS) const; 640 void dump() const; // used in debugger 641 getDominator()642 Dominator& getDominator() { return dom; } getImmDominator()643 ImmDominator& getImmDominator() { return immDom; } getPostDominator()644 PostDom& getPostDominator() { return pDom; } getLoops()645 LoopDetection& getLoops() { return loops; } 646 void markStale(); 647 648 private: 649 // Use normalized region descriptors for each source operand if possible. 650 void normalizeRegionDescriptors(); 651 652 // Find the BB that has the given label from the range [StartIter, EndIter). 653 static G4_BB* findLabelBB( 654 BB_LIST_ITER StartIter, 655 BB_LIST_ITER EndIter, 656 const char* Label); 657 658 void decoupleReturnBlock(G4_BB*); 659 void decoupleInitBlock(G4_BB*, FuncInfoHashTable& funcInfoTable); 660 void DFSTraverse(G4_BB* bb, unsigned& preId, unsigned& postId, FuncInfo* fn); 661 662 }; // FlowGraph 663 664 } // vISA:: 665 #endif // FLOWGRAPH_H 666