1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef __GRAPHCOLOR_H__ 10 #define __GRAPHCOLOR_H__ 11 12 #include "BitSet.h" 13 #include "G4_IR.hpp" 14 #include "RegAlloc.h" 15 #include "RPE.h" 16 #include "SpillManagerGMRF.h" 17 #include "VarSplit.h" 18 19 #include <list> 20 #include <limits> 21 #include <memory> 22 #include <map> 23 #include <unordered_set> 24 #include <vector> 25 26 #define BITS_DWORD 32 27 #define ROUND(x,y) ((x) + ((y - x % y) % y)) 28 29 namespace vISA 30 { 31 const float MAXSPILLCOST = (std::numeric_limits<float>::max()); 32 const float MINSPILLCOST = -(std::numeric_limits<float>::max()); 33 34 class BankConflictPass 35 { 36 private: 37 GlobalRA& gra; 38 bool forGlobal; 39 40 BankConflict setupBankAccordingToSiblingOperand(BankConflict assignedBank, unsigned offset, bool oneGRFBank); 41 void setupEvenOddBankConflictsForDecls(G4_Declare * dcl_1, G4_Declare * dcl_2, unsigned offset1, unsigned offset2, 42 BankConflict &srcBC1, BankConflict &srcBC2); 43 void setupBankConflictsOneGRFOld(G4_INST* inst, int &bank1RegNum, int &bank2RegNum, float GRFRatio, unsigned &internalConflict); 44 bool isOddOffset(unsigned offset) const; 45 void setupBankConflictsforDPAS(G4_INST* inst); 46 bool hasDpasInst = false; 47 48 void setupBankConflictsforTwoGRFs(G4_INST* inst); 49 void setupBankConflictsforMad(G4_INST* inst); 50 void setupBankConflictsForBB(G4_BB* bb, unsigned &threeSourceInstNum, unsigned &sendInstNum, 51 unsigned numRegLRA, unsigned & internalConflict); 52 void setupBankConflictsForBBTGL(G4_BB* bb, unsigned& threeSourceInstNum, unsigned& sendInstNum, unsigned numRegLRA, unsigned& internalConflict); 53 bool hasInternalConflict3Srcs(BankConflict *srcBC); 54 void setupBankForSrc0(G4_INST* inst, G4_INST* prevInst); 55 void getBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned *offset); 56 void getPrevBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned *offset); 57 58 public: 59 bool setupBankConflictsForKernel(bool doLocalRR, bool &threeSourceCandidate, unsigned numRegLRA, bool &highInternalConflict); 60 BankConflictPass(GlobalRA & g,bool global)61 BankConflictPass(GlobalRA& g, bool global) : gra(g), forGlobal(global) 62 { 63 64 } 65 }; 66 67 class LiveRange final 68 { 69 G4_RegVar* const var; 70 G4_Declare* const dcl; 71 const G4_RegFileKind regKind; 72 bool* forbidden = nullptr; 73 int numForbidden = -1; 74 bool spilled = false; 75 76 GlobalRA& gra; 77 unsigned numRegNeeded; 78 unsigned degree = 0; 79 unsigned refCount = 0; 80 unsigned parentLRID; 81 AssignedReg reg; 82 float spillCost; 83 BankConflict bc = BankConflict::BANK_CONFLICT_NONE; 84 const static unsigned UndefHint = 0xffffffff; 85 unsigned allocHint = UndefHint; 86 87 union { 88 uint16_t bunch = 0; 89 struct { 90 uint16_t calleeSaveBias : 1; // indicates if the var is biased to get a callee-save assignment or not 91 uint16_t callerSaveBias : 1; // indicates if the var is biased to get a caller-save assignment or not 92 uint16_t isEOTSrc : 1; //Gen7 only, Whether the liveRange is the message source of an EOT send 93 uint16_t retIp : 1; // variable is the return ip and should not be spilled 94 95 uint16_t active : 1; 96 uint16_t isInfiniteCost : 1; 97 uint16_t isCandidate : 1; 98 uint16_t isPseudoNode : 1; 99 uint16_t isPartialDeclare : 1; 100 uint16_t isSplittedDeclare : 1; 101 }; 102 }; 103 104 public: 105 106 LiveRange(G4_RegVar* v, GlobalRA&); 107 new(size_t sz,vISA::Mem_Manager & m)108 void* operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); } 109 setDegree(unsigned d)110 void setDegree(unsigned d) {degree = d;} getDegree()111 unsigned getDegree() const {return degree;} 112 getNumRegNeeded()113 unsigned getNumRegNeeded() const {return numRegNeeded;} 114 subtractDegree(unsigned d)115 void subtractDegree(unsigned d) 116 { 117 MUST_BE_TRUE(d <= degree, ERROR_INTERNAL_ARGUMENT); 118 degree -= d; 119 } 120 setActive(bool v)121 void setActive(bool v) {active = v;} getActive()122 bool getActive() const {return active;} 123 124 void emit(std::ostream& output, bool symbolreg = false) const 125 { 126 output << getVar ()->getDeclare ()->getName(); 127 if (reg.phyReg != NULL) 128 { 129 output << "("; 130 reg.phyReg->emit(output); 131 output << '.' << reg.subRegOff << ':'; 132 output << TypeSymbol(getVar()->getDeclare()->getElemType()) << ")"; 133 } 134 output << "(size = " << getDcl()->getByteSize() << 135 ", spill cost = " << getSpillCost() << ", degree = " << getDegree() << ")"; 136 } 137 getRefCount()138 unsigned getRefCount() const {return refCount;} setRefCount(unsigned count)139 void setRefCount(unsigned count) {refCount = count;} 140 getSpillCost()141 float getSpillCost() const {return spillCost;} setSpillCost(float cost)142 void setSpillCost(float cost) {spillCost = cost;} 143 getIsInfiniteSpillCost()144 bool getIsInfiniteSpillCost() const { return isInfiniteCost; } 145 void checkForInfiniteSpillCost(G4_BB* bb, std::list<G4_INST*>::reverse_iterator& it); 146 getPhyReg()147 G4_VarBase* getPhyReg() const { return reg.phyReg; } 148 getPhyRegOff()149 unsigned getPhyRegOff() const { return reg.subRegOff; } 150 setPhyReg(G4_VarBase * pr,unsigned off)151 void setPhyReg(G4_VarBase* pr, unsigned off) 152 { 153 MUST_BE_TRUE(pr->isPhyReg(), ERROR_UNKNOWN); 154 reg.phyReg = pr; 155 reg.subRegOff = off; 156 } 157 resetPhyReg()158 void resetPhyReg() 159 { 160 reg.phyReg = nullptr; 161 reg.subRegOff = 0; 162 } 163 getIsPseudoNode()164 bool getIsPseudoNode() const { return isPseudoNode; } setIsPseudoNode()165 void setIsPseudoNode() { isPseudoNode = true; } getIsPartialDcl()166 bool getIsPartialDcl() const { return isPartialDeclare; } setIsPartialDcl()167 void setIsPartialDcl() { isPartialDeclare = true; } getIsSplittedDcl()168 bool getIsSplittedDcl() const { return isSplittedDeclare; } setIsSplittedDcl(bool v)169 void setIsSplittedDcl(bool v) { isSplittedDeclare = v; } getBC()170 BankConflict getBC() const { return bc; } setBC(BankConflict c)171 void setBC(BankConflict c) { bc = c; } setParentLRID(int id)172 void setParentLRID(int id) { parentLRID = id; } getParentLRID()173 unsigned getParentLRID() const { return parentLRID; } 174 getAllocHint()175 unsigned getAllocHint() const { return allocHint; } hasAllocHint()176 bool hasAllocHint() const { return allocHint != UndefHint; } 177 void setAllocHint(unsigned h); resetAllocHint()178 void resetAllocHint() { allocHint = UndefHint; } 179 180 // From VarBasis 181 public: 182 void allocForbidden(vISA::Mem_Manager& mem, bool reserveStackCallRegs, unsigned reserveSpillSize, unsigned rerservedRegNum); 183 void allocForbiddenCallerSave(vISA::Mem_Manager& mem, G4_Kernel* kernel); 184 void allocForbiddenCalleeSave(vISA::Mem_Manager& mem, G4_Kernel* kernel); getForbidden()185 const bool* getForbidden() const { return forbidden; } markForbidden(int reg,int numReg)186 void markForbidden(int reg, int numReg) 187 { 188 MUST_BE_TRUE(((int)getForbiddenVectorSize()) >= reg + numReg, "forbidden register is out of bound"); 189 for (int i = reg; i < reg + numReg; ++i) 190 { 191 forbidden[i] = true; 192 } 193 numForbidden = -1; 194 } getNumForbidden()195 int getNumForbidden() 196 { 197 if (forbidden == nullptr) 198 { 199 return 0; 200 } 201 if (numForbidden == -1) 202 { 203 numForbidden = 0; 204 for (int i = 0, size = getForbiddenVectorSize(); i < size; ++i) 205 { 206 if (forbidden[i]) 207 { 208 ++numForbidden; 209 } 210 } 211 } 212 return numForbidden; 213 } getVar()214 G4_RegVar* getVar() const { return var; } getDcl()215 G4_Declare* getDcl() const { return dcl; } getRegKind()216 G4_RegFileKind getRegKind() const { return regKind; } 217 void dump() const; 218 setCalleeSaveBias(bool v)219 void setCalleeSaveBias(bool v) { calleeSaveBias = v; } getCalleeSaveBias()220 bool getCalleeSaveBias() const { return calleeSaveBias; } 221 setCallerSaveBias(bool v)222 void setCallerSaveBias(bool v) { callerSaveBias = v; } getCallerSaveBias()223 bool getCallerSaveBias() const { return callerSaveBias; } 224 setEOTSrc()225 void setEOTSrc() { isEOTSrc = true; } getEOTSrc()226 bool getEOTSrc() const { return isEOTSrc; } 227 setRetIp()228 void setRetIp() { retIp = true; } isRetIp()229 bool isRetIp() const { return retIp; } 230 isSpilled()231 bool isSpilled() const { return spilled; } setSpilled(bool v)232 void setSpilled(bool v) { spilled = v; } 233 234 private: 235 //const Options *m_options; 236 unsigned getForbiddenVectorSize() const; 237 void allocForbiddenVector(vISA::Mem_Manager& mem); 238 }; // class LiveRange 239 } // vISA:: 240 typedef std::list<vISA::LiveRange*> LIVERANGE_LIST; 241 typedef std::list<vISA::LiveRange*>::iterator LIVERANGE_LIST_ITER; 242 243 // A mapping from the pseudo decl created for caller save/restore, to the ret val 244 // This is used in augmentIntfGraph to prune interference edges for fcall ret val 245 typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*> FCALL_RET_MAP; 246 typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*>::iterator FCALL_RET_MAP_ITER; 247 248 typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned>> CALL_DECL_MAP; 249 typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned>>::iterator CALL_DECL_MAP_ITER; 250 251 // 252 // A bit array records all interference information. 253 // (2D matrix is flatten to 1D array) 254 // Since the interference information is symmetric, we can use only 255 // half of the size. To simplify the implementation, we use the full 256 // size of the bit array. 257 // 258 namespace vISA 259 { 260 class Augmentation 261 { 262 private: 263 // pair of default mask, non-default mask 264 using MaskDeclares = std::pair<BitSet, BitSet>; 265 G4_Kernel& kernel; 266 Interference& intf; 267 GlobalRA& gra; 268 const LivenessAnalysis& liveAnalysis; 269 LiveRange* const * const lrs; 270 FCALL_RET_MAP& fcallRetMap; 271 CALL_DECL_MAP callDclMap; 272 std::unordered_map<FuncInfo*, PhyRegSummary> localSummaryOfCallee; 273 std::vector<G4_Declare*> sortedIntervals; 274 std::list<G4_Declare*> defaultMask; 275 std::list<G4_Declare*> nonDefaultMask; 276 std::unordered_map<FuncInfo*, MaskDeclares> callsiteDeclares; 277 std::unordered_map <G4_Declare*, MaskDeclares> retDeclares; 278 Mem_Manager& m; 279 280 bool updateDstMaskForGather(G4_INST* inst, std::vector<unsigned char>& mask); 281 bool updateDstMaskForGatherRaw(G4_INST* inst, std::vector<unsigned char>& mask, const G4_SendDescRaw* raw); 282 bool updateDstMaskForGatherLdSt(G4_INST* inst, std::vector<unsigned char>& mask, const G4_SendDescLdSt *ldst); 283 void updateDstMask(G4_INST* inst, bool checkCmodOnly); 284 static unsigned getByteSizeFromMask(AugmentationMasks type); 285 bool isDefaultMaskDcl(G4_Declare* dcl, unsigned simdSize, AugmentationMasks type); 286 bool isDefaultMaskSubDeclare(unsigned char* mask, unsigned lb, unsigned rb, G4_Declare* dcl, unsigned simdSize); 287 bool verifyMaskIfInit(G4_Declare* dcl, AugmentationMasks mask); 288 bool checkGRFPattern3(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff, 289 unsigned lb, unsigned rb, unsigned execSize); 290 bool checkGRFPattern2(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff, 291 unsigned lb, unsigned rb, unsigned execSize); 292 bool checkGRFPattern1(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff, 293 unsigned lb, unsigned rb, unsigned execSize); 294 void markNonDefaultDstRgn(G4_INST* inst, G4_Operand* opnd); 295 bool markNonDefaultMaskDef(); 296 G4_BB* getTopmostBBDst(G4_BB* src, G4_BB* end, G4_BB* origSrc, unsigned traversal); 297 void updateStartIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd); 298 void updateEndIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd); 299 void updateStartInterval(const G4_Declare* dcl, G4_INST* curInst); 300 void updateEndInterval(const G4_Declare* dcl, G4_INST* curInst); 301 void updateStartIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd); 302 void updateEndIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd); 303 void buildLiveIntervals(); 304 void clearIntervalInfo(); 305 void sortLiveIntervals(); 306 unsigned getEnd(const G4_Declare* dcl) const; 307 bool isNoMask(const G4_Declare* dcl, unsigned size) const; 308 bool isConsecutiveBits(const G4_Declare* dcl, unsigned size) const; 309 bool isCompatible(const G4_Declare* testDcl, const G4_Declare* biggerDcl) const; 310 void buildInterferenceIncompatibleMask(); 311 void buildInteferenceForCallSiteOrRetDeclare(G4_Declare* newDcl, MaskDeclares* mask); 312 void buildInteferenceForCallsite(FuncInfo* func); 313 void buildInteferenceForRetDeclares(); 314 void buildSummaryForCallees(); 315 void expireIntervals(unsigned startIdx); 316 void buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall); 317 void buildSIMDIntfAll(G4_Declare* newDcl); 318 void buildSIMDIntfAllOld(G4_Declare* newDcl); 319 void updateActiveList(G4_Declare* newDcl, std::list<G4_Declare*>* dclMaskList); 320 void handleSIMDIntf(G4_Declare* firstDcl, G4_Declare* secondDcl, bool isCall); 321 bool weakEdgeNeeded(AugmentationMasks, AugmentationMasks); 322 323 void addSIMDIntfDclForCallSite(MaskDeclares* maskDeclares); 324 void addSIMDIntfForRetDclares(G4_Declare* newDcl); 325 326 public: 327 Augmentation(G4_Kernel& k, Interference& i, const LivenessAnalysis& l, LiveRange* const ranges[], GlobalRA& g); 328 329 void augmentIntfGraph(); 330 }; 331 332 class Interference 333 { 334 friend class Augmentation; 335 336 // This stores compatible ranges for each variable. Such 337 // compatible ranges will not be present in sparseIntf set. 338 // We store G4_Declare* instead of id is because variables 339 // allocated by LRA will not have a valid id. 340 std::map<G4_Declare*, std::vector<G4_Declare*>> compatibleSparseIntf; 341 342 GlobalRA& gra; 343 G4_Kernel& kernel; 344 LiveRange** const & lrs; 345 IR_Builder& builder; 346 const unsigned maxId; 347 const unsigned rowSize; 348 const unsigned splitStartId; 349 const unsigned splitNum; 350 unsigned* matrix = nullptr; 351 const LivenessAnalysis* const liveAnalysis; 352 353 std::vector<std::vector<unsigned>> sparseIntf; 354 355 // sparse interference matrix. 356 // we don't directly update sparseIntf to ensure uniqueness 357 // like dense matrix, interference is not symmetric (that is, if v1 and v2 interfere and v1 < v2, 358 // we insert (v1, v2) but not (v2, v1)) for better cache behavior 359 std::vector<std::unordered_set<uint32_t> > sparseMatrix; 360 static const uint32_t denseMatrixLimit = 0x80000; 361 updateLiveness(BitSet & live,uint32_t id,bool val)362 static void updateLiveness(BitSet& live, uint32_t id, bool val) 363 { 364 live.set(id, val); 365 } 366 367 G4_Declare* getGRFDclForHRA(int GRFNum) const; 368 useDenseMatrix()369 bool useDenseMatrix() const 370 { 371 return maxId < denseMatrixLimit; 372 } 373 374 // Only upper-half matrix is now used in intf graph. safeSetInterference(unsigned v1,unsigned v2)375 inline void safeSetInterference(unsigned v1, unsigned v2) 376 { 377 // Assume v1 < v2 378 if (useDenseMatrix()) 379 { 380 unsigned col = v2 / BITS_DWORD; 381 matrix[v1 * rowSize + col] |= 1 << (v2 % BITS_DWORD); 382 } 383 else 384 { 385 sparseMatrix[v1].emplace(v2); 386 } 387 } 388 setBlockInterferencesOneWay(unsigned v1,unsigned col,unsigned block)389 inline void setBlockInterferencesOneWay(unsigned v1, unsigned col, unsigned block) 390 { 391 if (useDenseMatrix()) 392 { 393 #ifdef _DEBUG 394 MUST_BE_TRUE(sparseIntf.size() == 0, "Updating intf graph matrix after populating sparse intf graph"); 395 #endif 396 397 matrix[v1 * rowSize + col] |= block; 398 } 399 else 400 { 401 auto&& intfSet = sparseMatrix[v1]; 402 for (int i = 0; i < BITS_DWORD; ++i) 403 { 404 if (block & (1 << i)) 405 { 406 uint32_t v2 = col * BITS_DWORD + i; 407 intfSet.emplace(v2); 408 } 409 } 410 } 411 } 412 getInterferenceBlk(unsigned idx)413 unsigned getInterferenceBlk(unsigned idx) const 414 { 415 assert(useDenseMatrix() && "matrix is not initialized"); 416 return matrix[idx]; 417 } 418 419 void addCalleeSaveBias(const BitSet& live); 420 421 void buildInterferenceAtBBExit(const G4_BB* bb, BitSet& live); 422 void buildInterferenceWithinBB(G4_BB* bb, BitSet& live); 423 void buildInterferenceForDst(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, G4_DstRegRegion* dst); 424 void buildInterferenceForFcall(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, const G4_VarBase* regVar); 425 426 inline void filterSplitDclares(unsigned startIdx, unsigned endIdx, unsigned n, unsigned col, unsigned &elt, bool is_split); 427 428 void buildInterferenceWithLive(const BitSet& live, unsigned i); 429 void buildInterferenceWithSubDcl(unsigned lr_id, G4_Operand *opnd, BitSet& live, bool setLive, bool setIntf); 430 void buildInterferenceWithAllSubDcl(unsigned v1, unsigned v2); 431 432 void markInterferenceForSend(G4_BB* bb, G4_INST* inst, G4_DstRegRegion* dst); 433 434 void buildInterferenceWithLocalRA(G4_BB* bb); 435 436 void buildInterferenceAmongLiveOuts(); 437 void buildInterferenceAmongLiveIns(); 438 439 void markInterferenceToAvoidDstSrcOverlap(G4_BB* bb, G4_INST* inst); 440 441 void generateSparseIntfGraph(); 442 443 public: 444 Interference(const LivenessAnalysis* l, LiveRange** const & lr, unsigned n, unsigned ns, unsigned nm, 445 GlobalRA& g); 446 ~Interference()447 ~Interference() 448 { 449 if (useDenseMatrix()) 450 { 451 delete[] matrix; 452 } 453 } 454 getCompatibleSparseIntf(G4_Declare * d)455 const std::vector<G4_Declare*>* getCompatibleSparseIntf(G4_Declare* d) const 456 { 457 if (compatibleSparseIntf.size() > 0) 458 { 459 auto it = compatibleSparseIntf.find(d); 460 if (it == compatibleSparseIntf.end()) 461 { 462 return nullptr; 463 } 464 return &it->second; 465 } 466 return nullptr; 467 } 468 init(vISA::Mem_Manager & m)469 void init(vISA::Mem_Manager& m) 470 { 471 if (useDenseMatrix()) 472 { 473 auto N = (size_t)rowSize * (size_t)maxId; 474 matrix = new uint32_t[N](); // zero-initialize 475 } 476 else 477 { 478 sparseMatrix.resize(maxId); 479 } 480 } 481 482 void computeInterference(); 483 void applyPartitionBias(); 484 bool interfereBetween(unsigned v1, unsigned v2) const; getSparseIntfForVar(unsigned id)485 const std::vector<unsigned>& getSparseIntfForVar(unsigned id) const { return sparseIntf[id]; } 486 487 inline bool varSplitCheckBeforeIntf(unsigned v1, unsigned v2) const; 488 checkAndSetIntf(unsigned v1,unsigned v2)489 void checkAndSetIntf(unsigned v1, unsigned v2) 490 { 491 if (v1 < v2) 492 { 493 safeSetInterference(v1, v2); 494 } 495 else if (v1 > v2) 496 { 497 safeSetInterference(v2, v1); 498 } 499 } 500 501 void dumpInterference() const; 502 void dumpVarInterference() const; 503 bool dumpIntf(const char*) const; 504 void interferenceVerificationForSplit() const; 505 506 bool linearScanVerify() const; 507 508 bool isStrongEdgeBetween(const G4_Declare*, const G4_Declare*) const; 509 }; 510 511 // Class to compute reg chart dump and dump it to ostream. 512 // Used only when -dumpregchart is passed. 513 class RegChartDump 514 { 515 const GlobalRA& gra; 516 std::vector<G4_Declare*> sortedLiveIntervals; 517 std::unordered_map<G4_Declare*, std::pair<G4_INST*, G4_INST*>> startEnd; 518 public: 519 void recordLiveIntervals(const std::vector<G4_Declare*>& dcls); 520 void dumpRegChart(std::ostream&, LiveRange** lrs = nullptr, unsigned numLRs = 0); 521 RegChartDump(const GlobalRA & g)522 RegChartDump(const GlobalRA& g) : gra(g) {} 523 }; 524 525 class GraphColor 526 { 527 GlobalRA& gra; 528 529 unsigned totalGRFRegCount; // .reg_count_total 530 const unsigned numVar; 531 const unsigned numSplitStartID; 532 const unsigned numSplitVar; 533 unsigned *spAddrRegSig; 534 Interference intf; 535 PhyRegPool& regPool; 536 IR_Builder& builder; 537 LiveRange** lrs = nullptr; 538 bool isHybrid; 539 LIVERANGE_LIST spilledLRs; 540 bool forceSpill; 541 vISA::Mem_Manager mem; 542 const Options *m_options; 543 544 unsigned evenTotalDegree = 1; 545 unsigned oddTotalDegree = 1; 546 unsigned evenTotalRegNum = 1; 547 unsigned oddTotalRegNum = 1; 548 unsigned evenMaxRegNum = 1; 549 unsigned oddMaxRegNum = 1; 550 551 G4_Kernel& kernel; 552 LivenessAnalysis& liveAnalysis; 553 554 std::vector<LiveRange*> colorOrder; 555 LIVERANGE_LIST unconstrainedWorklist; 556 LIVERANGE_LIST constrainedWorklist; 557 unsigned numColor = 0; 558 559 unsigned edgeWeightGRF(const LiveRange* lr1, const LiveRange* lr2); 560 unsigned edgeWeightARF(const LiveRange* lr1, const LiveRange* lr2); 561 562 void computeDegreeForGRF(); 563 void computeDegreeForARF(); 564 void computeSpillCosts(bool useSplitLLRHeuristic); 565 void determineColorOrdering(); 566 void removeConstrained(); 567 void relaxNeighborDegreeGRF(LiveRange* lr); 568 void relaxNeighborDegreeARF(LiveRange* lr); 569 bool assignColors(ColorHeuristic heuristicGRF, bool doBankConflict, bool highInternalConflict, bool honorHints = true); 570 clearSpillAddrLocSignature()571 void clearSpillAddrLocSignature() 572 { 573 memset(spAddrRegSig, 0, getNumAddrRegisters() * sizeof(unsigned)); 574 } 575 void pruneActiveSpillAddrLocs(G4_DstRegRegion*, unsigned, G4_Type); 576 void updateActiveSpillAddrLocs(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize); 577 bool redundantAddrFill(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize); 578 579 public: 580 GraphColor(LivenessAnalysis& live, unsigned totalGRF, bool hybrid, bool forceSpill_); 581 getOptions()582 const Options * getOptions() const { return m_options; } 583 584 bool regAlloc( 585 bool doBankConflictReduction, 586 bool highInternalConflict, 587 bool reserveSpillReg, unsigned& spillRegSize, unsigned& indrSpillRegSize, const RPE* rpe); requireSpillCode()588 bool requireSpillCode() const { return !spilledLRs.empty(); } getIntf()589 const Interference * getIntf() const { return &intf; } 590 void createLiveRanges(unsigned reserveSpillSize = 0); getLiveRanges()591 LiveRange ** getLiveRanges() const { return lrs; } getSpilledLiveRanges()592 const LIVERANGE_LIST & getSpilledLiveRanges() const { return spilledLRs; } 593 void confirmRegisterAssignments(); 594 void resetTemporaryRegisterAssignments(); 595 void cleanupRedundantARFFillCode(); 596 void getCalleeSaveRegisters(); 597 void addA0SaveRestoreCode(); 598 void addFlagSaveRestoreCode(); 599 void getSaveRestoreRegister(); 600 void getCallerSaveRegisters(); 601 void dumpRegisterPressure(); getGRA()602 GlobalRA & getGRA() { return gra; } 603 G4_SrcRegRegion* getScratchSurface() const; getLRs()604 LiveRange** getLRs() const { return lrs; } getNumVars()605 unsigned int getNumVars() const { return numVar; } 606 }; 607 608 struct BundleConflict 609 { 610 const G4_Declare * const dcl; 611 const int offset; BundleConflictBundleConflict612 BundleConflict(const G4_Declare *dcl, int offset) : dcl(dcl), offset(offset) { } 613 }; 614 615 struct RAVarInfo 616 { 617 unsigned numSplit = 0; 618 unsigned bb_id = UINT_MAX; // block local variable's block id. 619 G4_Declare* splittedDCL = nullptr; 620 LocalLiveRange* localLR = nullptr; 621 LSLiveRange* LSLR = nullptr; 622 unsigned numRefs = 0; 623 BankConflict conflict = BANK_CONFLICT_NONE; // used to indicate bank that should be assigned to dcl if possible 624 G4_INST* startInterval = nullptr; 625 G4_INST* endInterval = nullptr; 626 std::vector<unsigned char> mask; 627 std::vector<const G4_Declare*> subDclList; 628 unsigned subOff = 0; 629 std::vector<BundleConflict> bundleConflicts; 630 G4_SubReg_Align subAlign = G4_SubReg_Align::Any; 631 bool isEvenAlign = false; 632 }; 633 634 class VerifyAugmentation 635 { 636 private: 637 G4_Kernel* kernel = nullptr; 638 GlobalRA* gra = nullptr; 639 std::vector<G4_Declare*> sortedLiveRanges; 640 std::unordered_map<const G4_Declare*, std::tuple<LiveRange*, AugmentationMasks, G4_INST*, G4_INST*>> masks; 641 LiveRange* const * lrs = nullptr; 642 unsigned numVars = 0; 643 const Interference* intf = nullptr; 644 std::unordered_map<G4_Declare*, LiveRange*> DclLRMap; 645 std::unordered_map<G4_BB*, std::string> bbLabels; 646 std::vector<std::tuple<G4_BB*, unsigned, unsigned>> BBLexId; 647 getStr(AugmentationMasks a)648 static const char* getStr(AugmentationMasks a) 649 { 650 if (a == AugmentationMasks::Default16Bit) 651 return "Default16Bit"; 652 else if (a == AugmentationMasks::Default32Bit) 653 return "Default32Bit"; 654 else if (a == AugmentationMasks::Default64Bit) 655 return "Default64Bit"; 656 else if (a == AugmentationMasks::NonDefault) 657 return "NonDefault"; 658 else if (a == AugmentationMasks::Undetermined) 659 return "Undetermined"; 660 661 return "-----"; 662 }; 663 void labelBBs(); 664 void populateBBLexId(); 665 bool interfereBetween(G4_Declare*, G4_Declare*); 666 void verifyAlign(G4_Declare* dcl); 667 668 public: 669 void verify(); reset()670 void reset() 671 { 672 sortedLiveRanges.clear(); 673 masks.clear(); 674 kernel = nullptr; 675 lrs = nullptr; 676 gra = nullptr; 677 numVars = 0; 678 intf = nullptr; 679 DclLRMap.clear(); 680 bbLabels.clear(); 681 BBLexId.clear(); 682 } 683 void loadAugData(std::vector<G4_Declare*>& s, LiveRange* const * l, unsigned n, const Interference* i, GlobalRA& g); 684 void dump(const char* dclName); 685 bool isClobbered(LiveRange* lr, std::string& msg); 686 }; 687 688 class GlobalRA 689 { 690 private: 691 std::unordered_set<G4_INST*> EUFusionWAInsts; 692 bool m_EUFusionWANeeded; 693 public: EUFusionWANeeded()694 bool EUFusionWANeeded() const { return m_EUFusionWANeeded; } 695 void addEUFusionWAInsts(G4_INST* inst); removeEUFusionWAInst(G4_INST * inst)696 void removeEUFusionWAInst(G4_INST* inst) { EUFusionWAInsts.erase(inst); } getEUFusionWAInsts()697 const std::unordered_set<G4_INST*>& getEUFusionWAInsts() { return EUFusionWAInsts; } 698 public: 699 std::unique_ptr<VerifyAugmentation> verifyAugmentation; 700 std::unique_ptr<RegChartDump> regChart; 701 std::unique_ptr<SpillAnalysis> spillAnalysis; useGenericAugAlign()702 static bool useGenericAugAlign() 703 { 704 auto gen = getPlatformGeneration(getGenxPlatform()); 705 if (gen == PlatformGen::GEN9 || 706 gen == PlatformGen::GEN8) 707 return false; 708 return true; 709 } 710 static const char StackCallStr[]; 711 712 private: 713 template <class REGION_TYPE> static unsigned getRegionDisp(REGION_TYPE * region); 714 unsigned getRegionByteSize(G4_DstRegRegion * region, unsigned execSize); owordAligned(unsigned offset)715 static bool owordAligned(unsigned offset) { return offset % 16 == 0; } 716 template <class REGION_TYPE> bool isUnalignedRegion(REGION_TYPE * region, unsigned execSize); 717 bool shouldPreloadDst(G4_INST* instContext, G4_BB* curBB); 718 bool livenessCandidate(const G4_Declare* decl) const; 719 void updateDefSet(std::set<G4_Declare*>& defs, G4_Declare* referencedDcl); 720 void detectUndefinedUses(LivenessAnalysis& liveAnalysis, G4_Kernel& kernel); 721 void markBlockLocalVar(G4_RegVar* var, unsigned bbId); 722 void markBlockLocalVars(); 723 void computePhyReg(); 724 void fixAlignment(); 725 void expandSpillIntrinsic(G4_BB*); 726 void expandFillIntrinsic(G4_BB*); 727 void expandSpillFillIntrinsics(unsigned); 728 void saveRestoreA0(G4_BB*); 729 730 static const RAVarInfo defaultValues; 731 std::vector<RAVarInfo> vars; 732 std::vector<AugmentationMasks> varMasks; 733 std::vector<G4_Declare *> UndeclaredVars; 734 735 // fake declares for each GRF reg, used by HRA 736 // note only GRFs that are used by LRA get a declare 737 std::vector<G4_Declare*> GRFDclsForHRA; 738 739 // Store all LocalLiveRange instances created so they're 740 // appropriately destroyed alongwith instance of GlobalRA. 741 // This needs to be a list because we'll take address of 742 // its elements and std::vector cannot be used due to its 743 // reallocation policy. 744 std::list<LocalLiveRange> localLiveRanges; 745 746 std::unordered_map<const G4_BB*, unsigned> subretloc; 747 // map ret location to declare for call/ret 748 std::map<uint32_t, G4_Declare*> retDecls; 749 750 // store instructions that shouldnt be rematerialized. 751 std::unordered_set<G4_INST*> dontRemat; 752 allocVar(const G4_Declare * dcl)753 RAVarInfo &allocVar(const G4_Declare* dcl) 754 { 755 auto dclid = dcl->getDeclId(); 756 if (dclid >= vars.size()) 757 vars.resize(dclid + 1); 758 return vars[dclid]; 759 } 760 getVar(const G4_Declare * dcl)761 const RAVarInfo &getVar(const G4_Declare* dcl) const 762 { 763 auto dclid = dcl->getDeclId(); 764 return dclid >= vars.size() ? defaultValues : vars[dclid]; 765 } 766 767 // temp variable storing the FP dcl's old value 768 // created in addStoreRestoreForFP 769 G4_Declare* oldFPDcl = nullptr; 770 771 // instruction to save/restore vISA FP, only present in functions 772 G4_INST* saveBE_FPInst = nullptr; 773 G4_INST* restoreBE_FPInst = nullptr; 774 775 // instruction go update BE_FP, only present in functions 776 G4_INST* setupBE_FP = nullptr; 777 778 // new temps for each reference of spilled address/flag decls 779 std::unordered_set<G4_Declare*> addrFlagSpillDcls; 780 781 // store iteration number for GRA loop 782 unsigned iterNo = 0; 783 784 uint32_t numGRFSpill = 0; 785 uint32_t numGRFFill = 0; 786 787 bool spillFillIntrinUsesLSC(G4_INST* spillFillIntrin); 788 void expandFillLSC(G4_BB* bb, INST_LIST_ITER& instIt); 789 void expandSpillLSC(G4_BB* bb, INST_LIST_ITER& instIt); 790 void expandFillNonStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_DstRegRegion* resultRgn, G4_BB* bb, INST_LIST_ITER& instIt); 791 void expandSpillNonStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_SrcRegRegion* payload, G4_BB* bb, INST_LIST_ITER& instIt); 792 void expandFillStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* header, G4_DstRegRegion* resultRgn, G4_BB* bb, INST_LIST_ITER& instIt); 793 void expandSpillStackcall(uint32_t numRows, uint32_t offset, short rowOffset, G4_SrcRegRegion* payload, G4_BB* bb, INST_LIST_ITER& instIt); 794 795 public: 796 static unsigned sendBlockSizeCode(unsigned owordSize); 797 798 // For current program, store caller/callee save/restore instructions 799 std::unordered_set<G4_INST*> calleeSaveInsts; 800 std::unordered_set<G4_INST*> calleeRestoreInsts; 801 std::unordered_map<G4_INST*, std::unordered_set<G4_INST*>> callerSaveInsts; 802 std::unordered_map<G4_INST*, std::unordered_set<G4_INST*>> callerRestoreInsts; 803 std::unordered_map<G4_BB*, std::vector<bool>> callerSaveRegsMap; 804 std::unordered_map<G4_BB*, unsigned> callerSaveRegCountMap; 805 std::unordered_map<G4_BB*, std::vector<bool>> retRegsMap; 806 std::vector<bool> calleeSaveRegs; 807 unsigned calleeSaveRegCount = 0; 808 809 std::unordered_map<G4_Declare*, SplitResults> splitResults; 810 811 public: 812 G4_Kernel& kernel; 813 IR_Builder& builder; 814 PhyRegPool& regPool; 815 PointsToAnalysis& pointsToAnalysis; 816 FCALL_RET_MAP fcallRetMap; 817 818 bool useLscForSpillFill = false; 819 bool useLscForNonStackCallSpillFill = false; 820 getVarSplitPass()821 VarSplitPass* getVarSplitPass() const { return kernel.getVarSplitPass(); } 822 getSubRetLoc(const G4_BB * bb)823 unsigned getSubRetLoc(const G4_BB* bb) 824 { 825 auto it = subretloc.find(bb); 826 if (it == subretloc.end()) 827 return UNDEFINED_VAL; 828 return it->second; 829 } 830 setSubRetLoc(const G4_BB * bb,unsigned s)831 void setSubRetLoc(const G4_BB* bb, unsigned s) { subretloc[bb] = s; } 832 833 bool isSubRetLocConflict(G4_BB *bb, std::vector<unsigned> &usedLoc, unsigned stackTop); 834 void assignLocForReturnAddr(); 835 unsigned determineReturnAddrLoc(unsigned entryId, unsigned* retLoc, G4_BB* bb); 836 void insertCallReturnVar(); 837 void insertSaveAddr(G4_BB*); 838 void insertRestoreAddr(G4_BB*); setIterNo(unsigned i)839 void setIterNo(unsigned i) { iterNo = i; } getIterNo()840 unsigned getIterNo() const { return iterNo; } 841 void fixSrc0IndirFcall(); 842 getRetDecl(uint32_t retLoc)843 G4_Declare* getRetDecl(uint32_t retLoc) 844 { 845 auto result = retDecls.find(retLoc); 846 if (result != retDecls.end()) 847 { 848 return result->second; 849 } 850 851 const char* name = builder.getNameString(kernel.fg.mem, 24, "RET__loc%d", retLoc); 852 G4_Declare* dcl = builder.createDeclareNoLookup(name, G4_GRF, 2, 1, Type_UD); 853 854 855 // call destination must still be QWord aligned 856 dcl->setSubRegAlign(Four_Word); 857 setSubRegAlign(dcl, Four_Word); 858 859 retDecls[retLoc] = dcl; 860 return dcl; 861 } 862 getSaveBE_FPInst()863 G4_INST* getSaveBE_FPInst() const { return saveBE_FPInst; }; getRestoreBE_FPInst()864 G4_INST* getRestoreBE_FPInst() const { return restoreBE_FPInst; }; 865 getBEFPSetupInst()866 G4_INST* getBEFPSetupInst() { return setupBE_FP; } setBEFPSetupInst(G4_INST * i)867 void setBEFPSetupInst(G4_INST* i) { setupBE_FP = i; } 868 869 static unsigned owordToGRFSize(unsigned numOwords); 870 static unsigned hwordToGRFSize(unsigned numHwords); 871 static unsigned GRFToHwordSize(unsigned numGRFs); 872 static unsigned GRFSizeToOwords(unsigned numGRFs); 873 static unsigned getHWordByteSize(); 874 875 // RA specific fields getGRFDclForHRA(int GRFNum)876 G4_Declare* getGRFDclForHRA(int GRFNum) const { return GRFDclsForHRA[GRFNum]; } 877 getOldFPDcl()878 G4_Declare* getOldFPDcl() const { return oldFPDcl; } 879 isAddrFlagSpillDcl(G4_Declare * dcl)880 bool isAddrFlagSpillDcl(G4_Declare* dcl) const 881 { 882 return addrFlagSpillDcls.count(dcl) != 0; 883 } 884 addAddrFlagSpillDcl(G4_Declare * dcl)885 void addAddrFlagSpillDcl(G4_Declare* dcl) 886 { 887 addrFlagSpillDcls.insert(dcl); 888 } 889 addUndefinedDcl(G4_Declare * dcl)890 void addUndefinedDcl(G4_Declare* dcl) 891 { 892 UndeclaredVars.push_back(dcl); 893 } 894 isUndefinedDcl(const G4_Declare * dcl)895 bool isUndefinedDcl(const G4_Declare* dcl) const 896 { 897 return std::find(UndeclaredVars.begin(), UndeclaredVars.end(), dcl) != UndeclaredVars.end(); 898 } 899 getSplitVarNum(const G4_Declare * dcl)900 unsigned getSplitVarNum(const G4_Declare* dcl) const 901 { 902 return getVar(dcl).numSplit; 903 } 904 setSplitVarNum(const G4_Declare * dcl,unsigned val)905 void setSplitVarNum(const G4_Declare* dcl, unsigned val) 906 { 907 allocVar(dcl).numSplit = val; 908 } 909 getBBId(const G4_Declare * dcl)910 unsigned getBBId(const G4_Declare* dcl) const 911 { 912 return getVar(dcl).bb_id; 913 } 914 setBBId(const G4_Declare * dcl,unsigned id)915 void setBBId(const G4_Declare* dcl, unsigned id) 916 { 917 allocVar(dcl).bb_id = id; 918 } 919 isBlockLocal(const G4_Declare * dcl)920 bool isBlockLocal(const G4_Declare* dcl) const 921 { 922 return getBBId(dcl) < (UINT_MAX - 1); 923 } 924 getSplittedDeclare(const G4_Declare * dcl)925 G4_Declare* getSplittedDeclare(const G4_Declare* dcl) const 926 { 927 return getVar(dcl).splittedDCL; 928 } 929 setSplittedDeclare(const G4_Declare * dcl,G4_Declare * sd)930 void setSplittedDeclare(const G4_Declare* dcl, G4_Declare* sd) 931 { 932 allocVar(dcl).splittedDCL = sd; 933 } 934 getLocalLR(const G4_Declare * dcl)935 LocalLiveRange* getLocalLR(const G4_Declare* dcl) const 936 { 937 return getVar(dcl).localLR; 938 } 939 setLocalLR(G4_Declare * dcl,LocalLiveRange * lr)940 void setLocalLR(G4_Declare* dcl, LocalLiveRange* lr) 941 { 942 RAVarInfo &var = allocVar(dcl); 943 MUST_BE_TRUE(var.localLR == NULL, "Local live range already allocated for declaration"); 944 var.localLR = lr; 945 lr->setTopDcl(dcl); 946 } 947 getSafeLSLR(const G4_Declare * dcl)948 LSLiveRange* getSafeLSLR(const G4_Declare* dcl) const 949 { 950 auto dclid = dcl->getDeclId(); 951 assert(dclid < vars.size()); 952 return vars[dclid].LSLR; 953 } 954 getLSLR(const G4_Declare * dcl)955 LSLiveRange* getLSLR(const G4_Declare* dcl) const 956 { 957 return getVar(dcl).LSLR; 958 } 959 setLSLR(G4_Declare * dcl,LSLiveRange * lr)960 void setLSLR(G4_Declare* dcl, LSLiveRange* lr) 961 { 962 RAVarInfo &var = allocVar(dcl); 963 MUST_BE_TRUE(var.LSLR == NULL, "Local live range already allocated for declaration"); 964 var.LSLR = lr; 965 lr->setTopDcl(dcl); 966 } 967 resetLSLR(const G4_Declare * dcl)968 void resetLSLR(const G4_Declare* dcl) 969 { 970 allocVar(dcl).LSLR = nullptr; 971 } 972 resetLocalLR(const G4_Declare * dcl)973 void resetLocalLR(const G4_Declare* dcl) 974 { 975 allocVar(dcl).localLR = nullptr; 976 } 977 clearStaleLiveRanges()978 void clearStaleLiveRanges() 979 { 980 for (auto dcl : kernel.Declares) 981 { 982 setBBId(dcl, UINT_MAX); 983 resetLocalLR(dcl); 984 } 985 } 986 clearLocalLiveRanges()987 void clearLocalLiveRanges() 988 { 989 for (auto dcl : kernel.Declares) 990 { 991 resetLocalLR(dcl); 992 } 993 } 994 recordRef(const G4_Declare * dcl)995 void recordRef(const G4_Declare* dcl) 996 { 997 allocVar(dcl).numRefs++; 998 } 999 getNumRefs(const G4_Declare * dcl)1000 unsigned getNumRefs(const G4_Declare* dcl) const 1001 { 1002 return getVar(dcl).numRefs; 1003 } 1004 setNumRefs(const G4_Declare * dcl,unsigned refs)1005 void setNumRefs(const G4_Declare* dcl, unsigned refs) 1006 { 1007 allocVar(dcl).numRefs = refs; 1008 } 1009 getBankConflict(const G4_Declare * dcl)1010 BankConflict getBankConflict(const G4_Declare* dcl) const 1011 { 1012 return getVar(dcl).conflict; 1013 } 1014 setBankConflict(const G4_Declare * dcl,BankConflict c)1015 void setBankConflict(const G4_Declare* dcl, BankConflict c) 1016 { 1017 allocVar(dcl).conflict = c; 1018 } 1019 getStartInterval(const G4_Declare * dcl)1020 G4_INST* getStartInterval(const G4_Declare* dcl) const 1021 { 1022 return getVar(dcl).startInterval; 1023 } 1024 setStartInterval(const G4_Declare * dcl,G4_INST * inst)1025 void setStartInterval(const G4_Declare* dcl, G4_INST* inst) 1026 { 1027 allocVar(dcl).startInterval = inst; 1028 } 1029 getEndInterval(const G4_Declare * dcl)1030 G4_INST* getEndInterval(const G4_Declare* dcl) const 1031 { 1032 return getVar(dcl).endInterval; 1033 } 1034 setEndInterval(const G4_Declare * dcl,G4_INST * inst)1035 void setEndInterval(const G4_Declare* dcl, G4_INST* inst) 1036 { 1037 allocVar(dcl).endInterval = inst; 1038 } 1039 getMask(const G4_Declare * dcl)1040 const std::vector<unsigned char>& getMask(const G4_Declare* dcl) const 1041 { 1042 return getVar(dcl).mask; 1043 } 1044 setMask(const G4_Declare * dcl,std::vector<unsigned char> m)1045 void setMask(const G4_Declare* dcl, std::vector<unsigned char> m) 1046 { 1047 allocVar(dcl).mask = m; 1048 } 1049 getAugmentationMask(const G4_Declare * dcl)1050 AugmentationMasks getAugmentationMask(const G4_Declare* dcl) const 1051 { 1052 auto dclid = dcl->getDeclId(); 1053 if (dclid >= varMasks.size()) 1054 { 1055 return AugmentationMasks::Undetermined; 1056 } 1057 return varMasks[dclid]; 1058 } 1059 setAugmentationMask(const G4_Declare * dcl,AugmentationMasks m)1060 void setAugmentationMask(const G4_Declare* dcl, AugmentationMasks m) 1061 { 1062 auto dclid = dcl->getDeclId(); 1063 if (dclid >= varMasks.size()) 1064 varMasks.resize(dclid + 1); 1065 varMasks[dclid] = m; 1066 if (dcl->getIsSplittedDcl()) 1067 { 1068 for (const G4_Declare *subDcl : getSubDclList(dcl)) 1069 { 1070 setAugmentationMask(subDcl, m); 1071 } 1072 } 1073 } 1074 getHasNonDefaultMaskDef(const G4_Declare * dcl)1075 bool getHasNonDefaultMaskDef(const G4_Declare* dcl) const 1076 { 1077 return (getAugmentationMask(dcl) == AugmentationMasks::NonDefault); 1078 } 1079 addBundleConflictDcl(const G4_Declare * dcl,const G4_Declare * subDcl,int offset)1080 void addBundleConflictDcl(const G4_Declare *dcl, const G4_Declare* subDcl, int offset) 1081 { 1082 allocVar(dcl).bundleConflicts.emplace_back(subDcl, offset); 1083 } 1084 clearBundleConflictDcl(const G4_Declare * dcl)1085 void clearBundleConflictDcl(const G4_Declare* dcl) 1086 { 1087 allocVar(dcl).bundleConflicts.clear(); 1088 } 1089 getBundleConflicts(const G4_Declare * dcl)1090 const std::vector<BundleConflict> &getBundleConflicts(const G4_Declare* dcl) const 1091 { 1092 return getVar(dcl).bundleConflicts; 1093 } 1094 get_bundle(unsigned baseReg,int offset)1095 unsigned get_bundle(unsigned baseReg, int offset) const 1096 { 1097 if (builder.hasPartialInt64Support()) 1098 { 1099 return (((baseReg + offset) % 32) / 2); 1100 } 1101 return (((baseReg + offset) % 64) / 4); 1102 } 1103 get_bank(unsigned baseReg,int offset)1104 unsigned get_bank(unsigned baseReg, int offset) 1105 { 1106 int bankID = (baseReg + offset) % 2; 1107 1108 if (builder.hasTwoGRFBank16Bundles()) 1109 { 1110 bankID = ((baseReg + offset) % 4) / 2; 1111 } 1112 1113 1114 if (builder.hasOneGRFBank16Bundles()) 1115 { 1116 bankID = (baseReg + offset) % 2; 1117 } 1118 1119 return bankID; 1120 } 1121 addSubDcl(const G4_Declare * dcl,G4_Declare * subDcl)1122 void addSubDcl(const G4_Declare *dcl, G4_Declare* subDcl) 1123 { 1124 allocVar(dcl).subDclList.push_back(subDcl); 1125 } 1126 clearSubDcl(const G4_Declare * dcl)1127 void clearSubDcl(const G4_Declare* dcl) 1128 { 1129 allocVar(dcl).subDclList.clear(); 1130 } 1131 getSubDclList(const G4_Declare * dcl)1132 const std::vector<const G4_Declare*> &getSubDclList(const G4_Declare* dcl) const 1133 { 1134 return getVar(dcl).subDclList; 1135 } 1136 getSubOffset(const G4_Declare * dcl)1137 unsigned getSubOffset(const G4_Declare* dcl) const 1138 { 1139 return getVar(dcl).subOff; 1140 } 1141 setSubOffset(const G4_Declare * dcl,unsigned offset)1142 void setSubOffset(const G4_Declare* dcl, unsigned offset) 1143 { 1144 allocVar(dcl).subOff = offset; 1145 } 1146 getSubRegAlign(const G4_Declare * dcl)1147 G4_SubReg_Align getSubRegAlign(const G4_Declare* dcl) const 1148 { 1149 return getVar(dcl).subAlign; 1150 } 1151 setSubRegAlign(const G4_Declare * dcl,G4_SubReg_Align subAlg)1152 void setSubRegAlign(const G4_Declare* dcl, G4_SubReg_Align subAlg) 1153 { 1154 auto& subAlign = allocVar(dcl).subAlign; 1155 // sub reg alignment can only be more restricted than prior setting 1156 MUST_BE_TRUE(subAlign == Any || subAlign == subAlg || subAlign % 2 == 0, 1157 ERROR_UNKNOWN); 1158 if (subAlign > subAlg) 1159 { 1160 MUST_BE_TRUE(subAlign % subAlg == 0, "Sub reg alignment conflict"); 1161 // do nothing; keep the original alignment (more restricted) 1162 } 1163 else 1164 { 1165 MUST_BE_TRUE(subAlg % subAlign == 0, "Sub reg alignment conflict"); 1166 subAlign = subAlg; 1167 } 1168 } 1169 hasAlignSetup(const G4_Declare * dcl)1170 bool hasAlignSetup(const G4_Declare* dcl) const 1171 { 1172 if (getVar(dcl).subAlign == G4_SubReg_Align::Any && 1173 dcl->getSubRegAlign() != G4_SubReg_Align::Any) 1174 return false; 1175 return true; 1176 } 1177 isEvenAligned(const G4_Declare * dcl)1178 bool isEvenAligned(const G4_Declare* dcl) const 1179 { 1180 return getVar(dcl).isEvenAlign; 1181 } 1182 setEvenAligned(const G4_Declare * dcl,bool e)1183 void setEvenAligned(const G4_Declare* dcl, bool e) 1184 { 1185 allocVar(dcl).isEvenAlign = e; 1186 } 1187 1188 BankAlign getBankAlign(const G4_Declare*) const; 1189 bool areAllDefsNoMask(G4_Declare*); 1190 void removeUnreferencedDcls(); 1191 LocalLiveRange* GetOrCreateLocalLiveRange(G4_Declare* topdcl); 1192 GlobalRA(G4_Kernel & k,PhyRegPool & r,PointsToAnalysis & p2a)1193 GlobalRA(G4_Kernel& k, PhyRegPool& r, PointsToAnalysis& p2a) : kernel(k), builder(*k.fg.builder), regPool(r), 1194 pointsToAnalysis(p2a) 1195 { 1196 vars.resize(k.Declares.size()); 1197 varMasks.resize(k.Declares.size()); 1198 1199 if (kernel.getOptions()->getOption(vISA_VerifyAugmentation)) 1200 { 1201 verifyAugmentation = std::make_unique<VerifyAugmentation>(); 1202 } 1203 1204 // Need call WA for EU Fusion for non-entry function 1205 m_EUFusionWANeeded = builder.hasFusedEU() 1206 && builder.getOption(vISA_fusedCallWA) 1207 && (kernel.fg.getHasStackCalls() || kernel.hasIndirectCall()) 1208 && !builder.getIsKernel(); 1209 } 1210 1211 void emitFGWithLiveness(const LivenessAnalysis& liveAnalysis) const; 1212 void reportSpillInfo(const LivenessAnalysis& liveness, const GraphColor& coloring) const; 1213 static uint32_t getRefCount(int loopNestLevel); 1214 bool isReRAPass(); 1215 void updateSubRegAlignment(G4_SubReg_Align subAlign); 1216 bool isChannelSliced(); 1217 void evenAlign(); 1218 bool evenAlignNeeded(G4_Declare*); 1219 void getBankAlignment(LiveRange* lr, BankAlign &align); 1220 void printLiveIntervals(); 1221 void reportUndefinedUses(LivenessAnalysis& liveAnalysis, G4_BB* bb, G4_INST* inst, G4_Declare* referencedDcl, std::set<G4_Declare*>& defs, std::ofstream& optreport, Gen4_Operand_Number opndNum); 1222 void detectNeverDefinedUses(); 1223 void emitVarLiveIntervals(); 1224 1225 void determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpillRegSize); 1226 G4_Imm* createMsgDesc(unsigned owordSize, bool writeType, bool isSplitSend); 1227 void stackCallProlog(); 1228 void saveRegs(unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr, unsigned frameOwordOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group); 1229 void saveActiveRegs(std::vector<bool>& saveRegs, unsigned startReg, unsigned frameOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group); 1230 void addrRegAlloc(); 1231 void flagRegAlloc(); 1232 bool hybridRA(bool doBankConflictReduction, bool highInternalConflict, LocalRA& lra); 1233 void assignRegForAliasDcl(); 1234 void removeSplitDecl(); 1235 int coloringRegAlloc(); 1236 void restoreRegs(unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr, unsigned frameOwordOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group, bool caller); 1237 void restoreActiveRegs(std::vector<bool>& restoreRegs, unsigned startReg, unsigned frameOffset, G4_BB* bb, INST_LIST_ITER insertIt, std::unordered_set<G4_INST*>& group, bool caller); 1238 void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs); 1239 void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::vector<bool>& retRegs); 1240 void addCallerSaveRestoreCode(); 1241 void addCalleeSaveRestoreCode(); 1242 void addGenxMainStackSetupCode(); 1243 void addCalleeStackSetupCode(); 1244 void addSaveRestoreCode(unsigned localSpillAreaOwordSize); 1245 void addCallerSavePseudoCode(); 1246 void addCalleeSavePseudoCode(); 1247 void addStoreRestoreToReturn(); 1248 void markGraphBlockLocalVars(); 1249 void verifyRA(LivenessAnalysis & liveAnalysis); 1250 void resetGlobalRAStates(); 1251 1252 void insertPhyRegDecls(); 1253 copyMissingAlignment()1254 void copyMissingAlignment() 1255 { 1256 // Insert alignment for vars created in RA 1257 for (auto dcl : kernel.Declares) 1258 { 1259 if (dcl->getAliasDeclare()) 1260 continue; 1261 1262 if (!hasAlignSetup(dcl)) 1263 { 1264 // Var may be temp created in RA 1265 setSubRegAlign(dcl, dcl->getSubRegAlign()); 1266 setEvenAligned(dcl, dcl->isEvenAlign()); 1267 } 1268 } 1269 } 1270 copyAlignment(G4_Declare * dst,G4_Declare * src)1271 void copyAlignment(G4_Declare* dst, G4_Declare* src) 1272 { 1273 setEvenAligned(dst, isEvenAligned(src)); 1274 setSubRegAlign(dst, getSubRegAlign(src)); 1275 } 1276 copyAlignment()1277 void copyAlignment() 1278 { 1279 for (auto dcl : kernel.Declares) 1280 { 1281 if (dcl->getAliasDeclare()) 1282 continue; 1283 1284 setSubRegAlign(dcl, dcl->getSubRegAlign()); 1285 setEvenAligned(dcl, dcl->isEvenAlign()); 1286 } 1287 } 1288 isNoRemat(G4_INST * inst)1289 bool isNoRemat(G4_INST* inst) 1290 { 1291 return dontRemat.find(inst) != dontRemat.end(); 1292 } 1293 addNoRemat(G4_INST * inst)1294 void addNoRemat(G4_INST* inst) 1295 { 1296 dontRemat.insert(inst); 1297 } 1298 }; 1299 getGRFDclForHRA(int GRFNum)1300 inline G4_Declare* Interference::getGRFDclForHRA(int GRFNum) const 1301 { 1302 return gra.getGRFDclForHRA(GRFNum); 1303 } 1304 1305 class VarSplit 1306 { 1307 private: 1308 G4_Kernel& kernel; 1309 GlobalRA& gra; 1310 1311 VarRange* splitVarRange(VarRange *src1, VarRange *src2, std::stack<VarRange*> *toDelete); 1312 void rangeListSpliting(VAR_RANGE_LIST *rangeList, G4_Operand *opnd, std::stack<VarRange*> *toDelete); 1313 static void getHeightWidth(G4_Type type, unsigned numberElements, unsigned short &dclWidth, unsigned short &dclHeight, int &totalByteSize); 1314 void createSubDcls(G4_Kernel& kernel, G4_Declare* oldDcl, std::vector<G4_Declare*> &splitDclList); 1315 void insertMovesToTemp(IR_Builder& builder, G4_Declare* oldDcl, G4_Operand *dstOpnd, G4_BB* bb, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList); 1316 void insertMovesFromTemp(G4_Kernel& kernel, G4_Declare* oldDcl, int index, G4_Operand *srcOpnd, int pos, G4_BB* bb, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList); 1317 1318 public: 1319 bool didLocalSplit = false; 1320 bool didGlobalSplit = false; 1321 1322 void localSplit(IR_Builder& builder, G4_BB* bb); 1323 void globalSplit(IR_Builder& builder, G4_Kernel &kernel); 1324 bool canDoGlobalSplit(IR_Builder& builder, G4_Kernel &kernel, uint32_t sendSpillRefCount); 1325 VarSplit(GlobalRA & g)1326 VarSplit(GlobalRA& g) : kernel(g.kernel), gra(g) 1327 { 1328 1329 } 1330 }; 1331 1332 // 1333 // Spill code clean up 1334 // 1335 typedef struct _CLEAN_NUM_PROFILE 1336 { 1337 unsigned spill_clean_num[10] {}; 1338 unsigned fill_clean_num[10] {}; 1339 } CLEAN_NUM_PROFILE; 1340 1341 typedef struct _SCRATCH_RANGE 1342 { 1343 unsigned leftOff; 1344 unsigned rightOff; 1345 }SCRATCH_RANGE; 1346 1347 typedef std::vector<SCRATCH_RANGE > SCRATCH_RANGE_VEC; 1348 typedef std::vector<SCRATCH_RANGE >::iterator SCRATCH_RANGE_VEC_ITER; 1349 1350 typedef struct _RANGE 1351 { 1352 unsigned linearizedStart; 1353 unsigned linearizedEnd; 1354 bool predicate; 1355 }REG_RANGE; 1356 1357 typedef std::vector<REG_RANGE > REG_RANGE_VEC; 1358 typedef std::vector<REG_RANGE >::iterator REG_RANGE_VEC_ITER; 1359 1360 typedef std::pair<vISA::G4_INST *, int > RENAME_OPND; 1361 typedef std::vector<RENAME_OPND> RANAME_VEC; 1362 1363 typedef struct _SCRATCH_ACCESS 1364 { 1365 //Basic info 1366 #ifdef _DEBUG 1367 int regNum; 1368 #endif 1369 vISA::G4_Declare* scratchDcl; //The scrach access 1370 vISA::G4_Operand* flagOpnd; 1371 INST_LIST_ITER inst_it; 1372 1373 unsigned linearizedStart; //linearized start regsiter address 1374 unsigned linearizedEnd; //linearized end regsiter address 1375 unsigned leftOff; //left offset in scratch space 1376 unsigned rightOff; //right offset in the scratch space 1377 unsigned useCount; 1378 1379 bool isSpill = false; 1380 bool isBlockLocal = false; 1381 bool directKill = false; 1382 1383 bool regKilled = false; 1384 bool regPartialKilled = false; 1385 bool regOverKilled = false; 1386 bool inRangePartialKilled = false; 1387 bool regInUse = false; 1388 1389 bool fillInUse = false; 1390 bool removeable = false; 1391 bool instKilled = false; 1392 bool evicted = false; 1393 bool scratchDefined = false; 1394 1395 unsigned maskFlag; 1396 1397 RANAME_VEC renameOperandVec; 1398 SCRATCH_RANGE_VEC killedScratchRange; 1399 REG_RANGE_VEC killedRegRange; 1400 struct _SCRATCH_ACCESS* preScratchAccess; 1401 struct _SCRATCH_ACCESS* prePreScratchAccess; 1402 struct _SCRATCH_ACCESS* preFillAccess; 1403 1404 } SCRATCH_ACCESS; 1405 1406 typedef std::vector< SCRATCH_ACCESS *> SCRATCH_PTR_VEC; 1407 1408 typedef vISA::std_arena_based_allocator<SCRATCH_ACCESS*> SCRATCH_PTR_ALLOCATOR; 1409 typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR> SCRATCH_PTR_LIST; 1410 typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR>::iterator SCRATCH_PTR_LIST_ITER; 1411 1412 class FlagSpillCleanup 1413 { 1414 private: 1415 GlobalRA& gra; 1416 1417 void FlagLineraizedStartAndEnd(G4_Declare* topdcl, unsigned& linearizedStart, unsigned& linearizedEnd); 1418 bool replaceWithPreDcl(IR_Builder& builder, SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess); 1419 bool scratchKilledByPartial(SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess); 1420 bool addKilledGRFRanges(unsigned linearizedStart, unsigned linearizedEnd, SCRATCH_ACCESS* scratchAccess, 1421 G4_Predicate* predicate); 1422 bool regFullyKilled(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd, 1423 unsigned short maskFlag); 1424 bool inRangePartialKilled(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd, 1425 unsigned short maskFlag); 1426 bool regDefineAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd, 1427 unsigned short maskFlag, G4_Predicate* predicate); 1428 void regDefineFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd); 1429 bool regUseAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned linearizedStart, unsigned linearizedEnd); 1430 void regUseFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd, int opndIndex); 1431 void regUseScratch(SCRATCH_PTR_LIST * scratchTraceList, G4_INST * inst, G4_Operand * opnd, Gen4_Operand_Number opndIndex); 1432 void initializeScratchAccess(SCRATCH_ACCESS *scratchAccess, INST_LIST_ITER inst_it); 1433 bool initializeFlagScratchAccess(SCRATCH_PTR_VEC* scratchAccessList, SCRATCH_ACCESS*& scratchAccess, INST_LIST_ITER inst_it); 1434 void freeScratchAccess(SCRATCH_PTR_VEC *scratchAccessList); 1435 void flagDefine(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst); 1436 void scratchUse(SCRATCH_PTR_LIST & scratchTraceList, G4_INST * inst); 1437 void flagUse(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst); 1438 bool flagScratchDefineUse(G4_BB* bb, SCRATCH_PTR_LIST* scratchTraceList, SCRATCH_PTR_VEC* candidateList, 1439 SCRATCH_ACCESS* scratchAccess, CLEAN_NUM_PROFILE* clean_num_profile); 1440 void flagSpillFillClean(G4_BB* bb, INST_LIST_ITER inst_it, SCRATCH_PTR_VEC& scratchAccessList, 1441 SCRATCH_PTR_LIST& scratchTraceList, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile); 1442 void regFillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList, 1443 CLEAN_NUM_PROFILE* clean_num_profile); 1444 void regSpillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile); 1445 1446 public: 1447 void spillFillCodeCleanFlag(IR_Builder& builder, G4_Kernel& kernel, CLEAN_NUM_PROFILE* clean_num_profile); FlagSpillCleanup(GlobalRA & g)1448 FlagSpillCleanup(GlobalRA& g) : gra(g) 1449 { 1450 1451 } 1452 1453 }; 1454 } 1455 1456 // TODO: Refactor code so that stack call related enums, 1457 // methods, etc. should be part of this class. Right now 1458 // code is scattered across FlowGraph and GraphColor. 1459 class StackCall 1460 { 1461 public: 1462 // Following enum holds offsets of various fields in 1463 // frame descriptor as per VISA ABI. 1464 enum class FrameDescriptorOfsets 1465 { 1466 FE_FP = vISA::IR_Builder::SubRegs_Stackcall::FE_FP * 8, 1467 FE_SP = vISA::IR_Builder::SubRegs_Stackcall::FE_SP * 8, 1468 Ret_IP = vISA::IR_Builder::SubRegs_Stackcall::Ret_IP * 4, 1469 Ret_EM = vISA::IR_Builder::SubRegs_Stackcall::Ret_EM * 4, 1470 BE_FP = vISA::IR_Builder::SubRegs_Stackcall::BE_FP * 4, 1471 BE_SP = vISA::IR_Builder::SubRegs_Stackcall::BE_SP * 4 1472 }; 1473 }; 1474 1475 #endif // __GRAPHCOLOR_H__ 1476