1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef _INC_LOCALRA_H_ 10 #define _INC_LOCALRA_H_ 11 12 #include <list> 13 #include "G4_Opcode.h" 14 #include "FlowGraph.h" 15 #include "BuildIR.h" 16 #include "BitSet.h" 17 18 // Forward decls 19 namespace vISA 20 { 21 class G4_Declare; 22 class G4_INST; 23 class G4_BB; 24 class LinearScan; 25 class LocalLiveRange; 26 class PhyRegLocalRA; 27 class PhyRegsManager; 28 class PhyRegSummary; 29 class BankConflictPass; 30 class GlobalRA; 31 } 32 33 vISA::G4_Declare* GetTopDclFromRegRegion(vISA::G4_Operand* opnd); 34 35 typedef std::map<vISA::LocalLiveRange*, std::vector<std::pair<INST_LIST_ITER, unsigned int>>> LLR_USE_MAP; 36 typedef std::map<vISA::LocalLiveRange*, std::vector<std::pair<INST_LIST_ITER, unsigned int>>>::iterator LLR_USE_MAP_ITER; 37 38 // Each declaration will have a LocalLiveRange object allocated for it 39 namespace vISA 40 { 41 class PhyRegsLocalRA; 42 class InputLiveRange; 43 44 class LocalRA 45 { 46 private: 47 G4_Kernel& kernel; 48 IR_Builder& builder; 49 PhyRegsLocalRA* pregs = nullptr; 50 LLR_USE_MAP LLRUseMap; 51 unsigned int numRegLRA = 0; 52 unsigned int globalLRSize = 0; 53 bool doSplitLLR = false; 54 Mem_Manager& mem; 55 std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>> inputIntervals; 56 BankConflictPass& bc; 57 GlobalRA& gra; 58 bool doBCR = false; 59 bool highInternalConflict = false; 60 bool hasSplitInsts = false; 61 62 BankAlign getBankAlignForUniqueAssign(G4_Declare *dcl); 63 bool hasBackEdge(); 64 void evenAlign(); 65 void preLocalRAAnalysis(); 66 void trivialAssignRA(bool& needGlobalRA, bool threeSourceCandidate); 67 bool localRAPass(bool doRoundRobin, bool doSplitLLR); 68 void resetMasks(); 69 void blockOutputPhyRegs(); 70 void removeUnrequiredLifetimeOps(); 71 bool assignUniqueRegisters(bool twoBanksRA, bool twoDirectionsAssign, bool hybridWithSpill); 72 bool unassignedRangeFound(); 73 void updateRegUsage(PhyRegSummary* summary, unsigned int& numRegsUsed); 74 void localRAOptReport(); 75 void markReferencesInOpnd(G4_Operand* opnd, bool isEOT, INST_LIST_ITER inst_it, 76 unsigned int pos); 77 void markReferencesInInst(INST_LIST_ITER inst_it); 78 void setLexicalID(bool includePseudo); 79 void markReferences(unsigned int& numRowsEOT, bool& lifetimeOpFound); 80 void calculateInputIntervals(); 81 bool hasDstSrcOverlapPotential(G4_DstRegRegion* dst, G4_SrcRegRegion* src); 82 void calculateLiveIntervals(G4_BB* bb, std::vector<LocalLiveRange*>& liveIntervals); 83 void printAddressTakenDecls(); 84 void printLocalRACandidates(); 85 void printLocalLiveIntervals(G4_BB* bb, std::vector<LocalLiveRange*>& liveIntervals); 86 void printInputLiveIntervals(); 87 bool countLiveIntervals(); 88 89 // scratch fields used for parameter passing 90 G4_BB* curBB = nullptr; 91 92 public: 93 static void getRowInfo(int size, int& nrows, int& lastRowSize); 94 static unsigned int convertSubRegOffFromWords(G4_Declare* dcl, int subregnuminwords); 95 static unsigned int convertSubRegOffToWords(G4_Declare* dcl, int subregnum); 96 static void countLocalLiveIntervals(std::vector<LocalLiveRange*>& liveIntervals); 97 static void printLocalLiveIntervalDistribution(unsigned int numScalars, 98 unsigned int numHalfGRF, unsigned int numOneGRF, unsigned int numTwoOrMoreGRF, 99 unsigned int numTotal); 100 101 LocalRA(BankConflictPass&, GlobalRA&); 102 bool localRA(); 103 void undoLocalRAAssignments(bool clearInterval); doHybridBCR()104 bool doHybridBCR() const { return doBCR; } hasHighInternalBC()105 bool hasHighInternalBC() const { return highInternalConflict; } 106 }; 107 108 class LocalLiveRange 109 { 110 private: 111 G4_Declare* topdcl; 112 G4_INST* firstRef; 113 G4_INST* lastRef; 114 unsigned int lrStartIdx, lrEndIdx; 115 bool isIndirectAccess; 116 G4_VarBase* preg; 117 // pregoff is stored in word here 118 // But subreg offset stored in regvar should be in units of dcl's element size 119 int pregoff; 120 121 unsigned int numRefsInFG; 122 G4_BB* prevBBRef; 123 bool eot; 124 125 bool assigned; 126 bool isSplit; 127 128 IR_Builder& builder; 129 130 std::unordered_set<unsigned int> forbiddenGRFs; 131 const static unsigned int UndefHint = 0xffffffff; 132 unsigned int hint = UndefHint; 133 134 public: LocalLiveRange(IR_Builder & b)135 LocalLiveRange(IR_Builder& b) : builder(b) 136 { 137 topdcl = NULL; 138 firstRef = lastRef = NULL; 139 lrStartIdx = lrEndIdx = 0; 140 isIndirectAccess = false; 141 numRefsInFG = 0; 142 prevBBRef = NULL; 143 preg = NULL; 144 pregoff = 0; 145 assigned = false; 146 eot = false; 147 isSplit = false; 148 } 149 150 // A reference to this live range exists in bb basic block, record it 151 void recordRef(G4_BB*); 152 markIndirectRef()153 void markIndirectRef() { isIndirectAccess = true; } 154 155 // A live range is local if it is never accessed indirectly (via address taken register) and 156 // only a single basic block references the range 157 bool isLiveRangeLocal() const; 158 159 bool isLiveRangeGlobal(); 160 161 bool isGRFRegAssigned(); 162 setTopDcl(G4_Declare * dcl)163 void setTopDcl(G4_Declare* dcl) 164 { 165 MUST_BE_TRUE(topdcl == NULL, "Redefining top dcl"); 166 topdcl = dcl; 167 } 168 getTopDcl()169 G4_Declare* getTopDcl() const { return topdcl; } 170 new(size_t sz,Mem_Manager & m)171 void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 172 hasIndirectAccess()173 bool hasIndirectAccess() const { return isIndirectAccess; } 174 setFirstRef(G4_INST * inst,unsigned int idx)175 void setFirstRef(G4_INST* inst, unsigned int idx) 176 { 177 firstRef = inst; 178 lrStartIdx = idx; 179 } 180 getFirstRef(unsigned int & idx)181 G4_INST* getFirstRef(unsigned int& idx) const 182 { 183 idx = lrStartIdx; 184 return firstRef; 185 } 186 setLastRef(G4_INST * inst,unsigned int idx)187 void setLastRef(G4_INST* inst, unsigned int idx) 188 { 189 lastRef = inst; 190 lrEndIdx = idx; 191 } 192 getLastRef(unsigned int & idx)193 G4_INST* getLastRef(unsigned int& idx) 194 { 195 idx = lrEndIdx; 196 return lastRef; 197 } 198 setPhyReg(G4_VarBase * pr,int subreg)199 void setPhyReg(G4_VarBase* pr, int subreg) { preg = pr; pregoff = subreg; } getPhyReg(int & subreg)200 G4_VarBase* getPhyReg(int& subreg) { subreg = pregoff; return preg; } 201 202 unsigned int getSizeInWords(); 203 setAssigned(bool a)204 void setAssigned(bool a) { assigned = a; } getAssigned()205 bool getAssigned() const { return assigned; } 206 markEOT()207 void markEOT() { eot = true; } isEOT()208 bool isEOT() const { return eot; } 209 markSplit()210 void markSplit() { isSplit = true; } getSplit()211 bool getSplit() const { return isSplit; } 212 addForbidden(unsigned int f)213 void addForbidden(unsigned int f) { forbiddenGRFs.insert(f); } getForbidden()214 std::unordered_set<unsigned int>& getForbidden() { return forbiddenGRFs; } 215 setHint(unsigned int h)216 void setHint(unsigned int h) { hint = h; } hasHint()217 bool hasHint() const { return hint != UndefHint; } getHint()218 unsigned int getHint() const { return hint; } 219 }; 220 221 class InputLiveRange 222 { 223 private: 224 unsigned int regWordIdx; 225 unsigned int lrEndIdx; 226 227 public: InputLiveRange(unsigned int regId,unsigned int endId)228 InputLiveRange(unsigned int regId, unsigned int endId) : regWordIdx(regId), lrEndIdx(endId) 229 { 230 231 } 232 new(size_t sz,Mem_Manager & m)233 void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 234 getRegWordIdx()235 unsigned int getRegWordIdx() { return regWordIdx; } getLrEndIdx()236 unsigned int getLrEndIdx() { return lrEndIdx; } 237 }; 238 } 239 #define SECOND_HALF_BANK_START_GRF 64 240 #define RR_HEURISTIC 0.66 241 enum 242 { 243 WORD_FREE = 0, 244 WORD_BUSY = 1, 245 }; 246 247 namespace vISA 248 { 249 class PhyRegsLocalRA 250 { 251 private: 252 IR_Builder* builder; 253 unsigned int numRegs; 254 // nth bit represents whether the register's nth word is free/busy 255 // 1 - busy, 0 - free 256 // It is possible to use bit-vector in place of this array 257 // bitvector does not provide coarse grained access to mark 258 // entire grf busy/available 259 std::vector<uint32_t> regBusyVector; 260 std::vector<int32_t> regLastUse; 261 std::vector<bool> grfAvialable; 262 int lastUseSum1; 263 int lastUseSum2; 264 int bank1AvailableRegNum; 265 int bank2AvailableRegNum; 266 267 bool twoBanksRA; 268 bool simpleGRFAvailable; 269 bool r0Forbidden; 270 bool r1Forbidden; 271 272 int LraFFWindowSize; 273 274 public: PhyRegsLocalRA(IR_Builder * _builder,uint32_t nregs)275 PhyRegsLocalRA(IR_Builder* _builder, uint32_t nregs) : builder(_builder), numRegs(nregs) 276 { 277 uint32_t grfFree = 0; 278 279 regBusyVector.resize(numRegs); 280 regLastUse.resize(numRegs); 281 grfAvialable.resize(numRegs); 282 283 for (int i = 0; i < (int) nregs; i++) 284 { 285 regBusyVector[i] = grfFree; 286 regLastUse[i] = 0; 287 grfAvialable[i] = true; 288 } 289 290 lastUseSum1 = 0; 291 lastUseSum2 = 0; 292 bank1AvailableRegNum = 0; 293 bank2AvailableRegNum = 0; 294 295 twoBanksRA = false; 296 simpleGRFAvailable = false; 297 r0Forbidden = false; 298 r1Forbidden = false; 299 LraFFWindowSize = (int)builder->getOptions()->getuInt32Option(vISA_LraFFWindowSize); 300 } 301 new(size_t sz,Mem_Manager & m)302 void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 303 304 void findRegisterCandiateWithAlignForward(int& i, BankAlign align, bool evenAlign); 305 306 unsigned int get_bundle(unsigned int baseReg, int offset); 307 308 int findBundleConflictFreeRegister(int curReg, int endReg, unsigned short occupiedBundles, BankAlign align, bool evenAlign); 309 310 void findRegisterCandiateWithAlignBackward(int& i, BankAlign align, bool evenAlign); 311 312 void setGRFBusy(int which); 313 void setGRFBusy(int which, bool isInput); 314 void setGRFBusy(int which, int howmany, bool isInput); 315 void setGRFBusy(int which, int howmany); 316 void setWordBusy(int whichgrf, int word, bool isInput); 317 void setGRFNotBusy(int which, int instID); 318 void setH1GRFBusy(int which); 319 void setH2GRFBusy(int which); 320 void setWordBusy(int whichgrf, int word); 321 void setWordBusy(int whichgrf, int word, int howmany, bool isInput); 322 void setWordBusy(int whichgrf, int word, int howmany); 323 void setWordNotBusy(int whichgrf, int word, int instID); 324 isGRFBusy(int which)325 inline bool isGRFBusy(int which) const 326 { 327 MUST_BE_TRUE(isGRFAvailable(which), "Invalid register"); 328 return (regBusyVector[which] != 0); 329 } 330 isGRFBusy(int which,int howmany)331 inline bool isGRFBusy(int which, int howmany) const 332 { 333 bool retval = false; 334 335 for (int i = 0; i < howmany && !retval; i++) 336 { 337 retval |= isGRFBusy(which + i); 338 } 339 340 return retval; 341 } 342 343 bool isH1GRFBusy(int which); 344 bool isH2GRFBusy(int which); 345 inline bool isWordBusy(int whichgrf, int word); 346 bool isWordBusy(int whichgrf, int word, int howmany); 347 348 bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int & regnum, int nrows, int lastRowSize, int endReg, unsigned short occupiedBundles, 349 int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden, bool hintSet); 350 351 void markPhyRegs(G4_Declare* topdcl); 352 353 // Available/unavailable is different from busy/free 354 // Unavailable GRFs are not available for allocation setGRFUnavailable(int which)355 void setGRFUnavailable(int which) { grfAvialable[which] = false; } isGRFAvailable(int which)356 bool isGRFAvailable(int which) const 357 { 358 359 if (simpleGRFAvailable) 360 { 361 if (which > 1) 362 { 363 return true; 364 } 365 else 366 { 367 if (r0Forbidden && which == 0) 368 { 369 return false; 370 } 371 372 if (r1Forbidden && which <= 1) 373 { 374 return false; 375 } 376 return true; 377 } 378 } 379 else 380 { 381 MUST_BE_TRUE(which < (int) numRegs, "invalid GRF"); 382 return (grfAvialable[which] == true); 383 } 384 } 385 isGRFAvailable(int which,int howmany)386 bool isGRFAvailable(int which, int howmany) const 387 { 388 if (simpleGRFAvailable) 389 { 390 if (which > 1) 391 { 392 return true; 393 } 394 else 395 { 396 if (r0Forbidden && which == 0) 397 { 398 return false; 399 } 400 401 if (r1Forbidden && which <= 1) 402 { 403 return false; 404 } 405 return true; 406 } 407 } 408 else 409 { 410 for (int i = 0; i < howmany; i++) 411 { 412 if (!isGRFAvailable(which + i)) 413 { 414 return false; 415 } 416 } 417 } 418 419 return true; 420 } 421 422 void printBusyRegs(); getRegLastUse(int reg)423 int getRegLastUse(int reg) {return regLastUse[reg];} 424 getLastUseSum1()425 int getLastUseSum1() {return lastUseSum1;} getLastUseSum2()426 int getLastUseSum2() {return lastUseSum2;} getBank1AvailableRegNum()427 int getBank1AvailableRegNum() {return bank1AvailableRegNum;} getBank2AvailableRegNum()428 int getBank2AvailableRegNum() {return bank2AvailableRegNum;} 429 setBank1AvailableRegNum(int avail1)430 void setBank1AvailableRegNum(int avail1) { bank1AvailableRegNum = avail1;} setBank2AvailableRegNum(int avail2)431 void setBank2AvailableRegNum(int avail2) { bank2AvailableRegNum = avail2;} 432 setTwoBanksRA(bool twoBanks)433 void setTwoBanksRA(bool twoBanks) { twoBanksRA = twoBanks;} setSimpleGRFAvailable(bool simple)434 void setSimpleGRFAvailable(bool simple) {simpleGRFAvailable = simple; } setR0Forbidden()435 void setR0Forbidden() {r0Forbidden = true;} setR1Forbidden()436 void setR1Forbidden() {r1Forbidden = true;} 437 bool findFreeMultipleRegsBackward(int regIdx, BankAlign align, int ®num, int nrows, int lastRowSize, int endReg, int instID, 438 bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden); 439 bool findFreeSingleReg(int regIdx, G4_SubReg_Align subalign, int ®num, int &subregnum, int size); 440 bool findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int ®num, int &subregnum, int endReg, 441 int instID, bool isHybridAlloc, bool forward, std::unordered_set<unsigned int>& forbidden); 442 443 bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int& regnum, int nrows, int lastRowSize, int endReg, int instID, const bool* forbidden); 444 445 bool findFreeSingleReg(int regIdx, int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int endReg, const bool* forbidden); 446 447 }; 448 449 class PhyRegsManager 450 { 451 private: 452 PhyRegsLocalRA availableRegs; 453 bool twoBanksRA; 454 455 public: PhyRegsManager(PhyRegsLocalRA pregs,bool _twoBanksRA)456 PhyRegsManager(PhyRegsLocalRA pregs, bool _twoBanksRA) : availableRegs(pregs), twoBanksRA(_twoBanksRA) 457 { 458 availableRegs.setTwoBanksRA(_twoBanksRA); 459 } 460 461 int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int & regnum, int & subregnum, int startRegNum, int endRegNum, 462 unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden, bool hasHint, 463 unsigned int hintReg); 464 465 void freeRegs(int regnum, int subregnum, int numwords, int instID); getAvaialableRegs()466 PhyRegsLocalRA * getAvaialableRegs() { return &availableRegs; } 467 int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int startRegNum, int endRegNum, unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc, std::unordered_set<unsigned int>& forbidden); 468 int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign, int& regnum, int& subregnum, int startRegNum, int endRegNum, unsigned int instID, const bool* forbidden); 469 }; 470 471 class LinearScan { 472 private: 473 GlobalRA& gra; 474 IR_Builder& builder; 475 Mem_Manager& mem; 476 PhyRegsManager& pregManager; 477 PhyRegsLocalRA& initPregs; 478 std::vector<LocalLiveRange*>& liveIntervals; 479 std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputIntervals; 480 std::list<LocalLiveRange*> active; 481 PhyRegSummary* summary; 482 483 void expireRanges(unsigned int); 484 void expireInputRanges(unsigned int, unsigned int, unsigned int); 485 void expireAllActive(); 486 bool allocateRegsFromBanks(LocalLiveRange*); 487 bool allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap); 488 void coalesceSplit(LocalLiveRange* lr); 489 void freeAllocedRegs(LocalLiveRange*, bool); 490 void updateActiveList(LocalLiveRange*); 491 492 BitSet pregs; 493 unsigned int simdSize; 494 495 unsigned int globalLRSize; 496 unsigned int* startGRFReg; 497 unsigned int numRegLRA; 498 499 unsigned int bank1StartGRFReg; 500 unsigned int bank2StartGRFReg; 501 unsigned int bank1_start; 502 unsigned int bank1_end; 503 unsigned int bank2_start; 504 unsigned int bank2_end; 505 506 bool useRoundRobin; 507 bool doBankConflict; 508 bool highInternalConflict; 509 bool doSplitLLR; 510 511 public: 512 LinearScan(GlobalRA& g, std::vector<LocalLiveRange*>& localLiveIntervals, 513 std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputLivelIntervals, 514 PhyRegsManager& pregMgr, PhyRegsLocalRA& pregs, Mem_Manager& memmgr, PhyRegSummary* s, 515 unsigned int numReg, unsigned int glrs, bool roundRobin, bool bankConflict, 516 bool internalConflict, bool splitLLR, unsigned int simdS); 517 518 void run(G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap); 519 }; 520 521 class PhyRegSummary 522 { 523 private: 524 uint32_t totalNumGRF = 0; 525 std::vector<bool> GRFUsage; 526 527 public: PhyRegSummary()528 PhyRegSummary() {} 529 PhyRegSummary(uint32_t numGRF)530 PhyRegSummary(uint32_t numGRF) : totalNumGRF(numGRF) 531 { 532 GRFUsage.resize(totalNumGRF, false); 533 } 534 getNumGRF()535 uint32_t getNumGRF() const { return totalNumGRF; } 536 new(size_t sz,Mem_Manager & m)537 void* operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);} 538 539 void markPhyRegs(G4_VarBase* pr, unsigned int words); 540 isGRFBusy(int regnum)541 bool isGRFBusy(int regnum) const { return GRFUsage[regnum]; } 542 setGRFBusy(int regnum)543 void setGRFBusy(int regnum) { GRFUsage[regnum] = true; } 544 545 void printBusyRegs(); 546 getNumFreeGRF()547 uint32_t getNumFreeGRF() const 548 { 549 uint32_t numFreeGRF = 0; 550 for (int i = 0, size = getNumGRF(); i < size; ++i) 551 { 552 if (!isGRFBusy(i)) 553 { 554 numFreeGRF++; 555 } 556 } 557 return numFreeGRF; 558 } 559 }; 560 } 561 #endif // _INC_LOCALRA_H_ 562