1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef __PHYREGUSAGE_H__ 10 #define __PHYREGUSAGE_H__ 11 12 #include "BuildIR.h" 13 #include "G4_Opcode.h" 14 #include "G4_IR.hpp" 15 16 enum ColorHeuristic {FIRST_FIT, ROUND_ROBIN}; 17 18 // forward declares 19 namespace vISA 20 { 21 class LiveRange; 22 class GlobalRA; 23 24 class PhyRegUsageParms 25 { 26 public: 27 GlobalRA& gra; 28 G4_RegFileKind rFile; 29 unsigned int maxGRFCanBeUsed; 30 unsigned int& startARFReg; 31 unsigned int& startFlagReg; 32 unsigned int& startGRFReg; 33 unsigned int& bank1_start; 34 unsigned int& bank1_end; 35 unsigned int& bank2_start; 36 unsigned int& bank2_end; 37 bool doBankConflict; 38 bool* availableGregs; 39 uint32_t* availableSubRegs; 40 bool* availableAddrs; 41 bool* availableFlags; 42 uint8_t* weakEdgeUsage; 43 unsigned int totalGRF; 44 LiveRange** lrs; 45 46 PhyRegUsageParms(GlobalRA& g, LiveRange* l[], G4_RegFileKind r, unsigned int m, unsigned int& startARF, unsigned int& startFlag, unsigned int& startGRF, 47 unsigned int& bank1_s, unsigned int& bank1_e, unsigned int& bank2_s, unsigned int& bank2_e, bool doBC, bool* avaGReg, 48 uint32_t* avaSubReg, bool* avaAddrs, bool* avaFlags, uint8_t* weakEdges); 49 }; 50 51 // 52 // track which registers are currently in use (cannot be assigned to other variables) 53 // For sub reg allocation, the granularity is UW/W (2 bytes). Doing so, we only need to 54 // handle even and odd alignment. 55 // 56 class PhyRegUsage 57 { 58 GlobalRA& gra; 59 LiveRange** lrs; 60 unsigned maxGRFCanBeUsed; 61 bool *availableGregs; // true if the reg is available for allocation 62 uint32_t *availableSubRegs; // each entry is a 16-bit value of the words that are free in a GRF 63 bool *availableAddrs; // true if the reg is available for allocation 64 bool *availableFlags; 65 uint8_t* weakEdgeUsage; 66 67 ColorHeuristic colorHeuristic; // perform register assignment in first-fit/round-robin for GRFs 68 G4_RegFileKind regFile; 69 70 unsigned& startARFReg; // round-robin reg start bias 71 unsigned& startFLAGReg; 72 unsigned& startGRFReg; // round-robin reg start bias 73 74 75 unsigned &bank1_start; 76 unsigned &bank2_start; 77 unsigned &bank1_end; 78 unsigned &bank2_end; 79 80 unsigned totalGRFNum; 81 82 bool honorBankBias; // whether we honor the bank bias assigned by the bank conflict avoidance heuristic 83 bool overlapTest; // set to true only when current dcl has compatible ranges marked by augmentation 84 85 struct PhyReg 86 { 87 int reg; 88 int subreg; // in unit of words (0-15) 89 }; // return type for findGRFSubReg 90 91 PhyReg findGRFSubReg(const bool forbidden[], 92 bool callerSaveBias, 93 bool callerSaverBias, 94 BankAlign align, 95 G4_SubReg_Align subAlign, 96 unsigned nwords); 97 98 void findGRFSubRegFromRegs(int startReg, 99 int endReg, 100 int step, 101 PhyReg *phyReg, 102 G4_SubReg_Align subAlign, 103 unsigned nwords, 104 const bool forbidden[], 105 bool fromPartialOccupiedReg); 106 107 PhyReg findGRFSubRegFromBanks(G4_Declare *dcl, 108 const bool forbidden[], 109 bool oneGRFBankDivision); 110 111 void freeGRFSubReg(unsigned regNum, unsigned regOff, unsigned nwords, G4_Type ty); 112 void freeContiguous(bool availRegs[], unsigned start, unsigned numReg, unsigned maxRegs); 113 bool canGRFSubRegAlloc(G4_Declare* decl); 114 bool findContiguousNoWrapGRF(bool availRegs[], const bool forbidden[], unsigned short occupiedBundles, BankAlign align, unsigned numRegNeeded, unsigned startPos, unsigned endPos, unsigned & idx); 115 116 bool findContiguousNoWrapAddrFlag(bool availRegs[], 117 const bool forbidden[], 118 G4_SubReg_Align subAlign, 119 unsigned numRegNeeded, 120 unsigned startPos, 121 unsigned endPos, 122 unsigned& idx); 123 124 bool allFree(bool availRegs[], unsigned maxRegs); 125 126 bool findFreeRegs(bool availRegs[], 127 const bool forbidden[], 128 BankAlign align, 129 unsigned numRegNeeded, 130 unsigned startRegNum, 131 unsigned endRegNum, 132 unsigned& idx, 133 bool gotoSecondBank, 134 bool oneGRFBankDivision); 135 136 public: 137 IR_Builder& builder; 138 139 PhyRegPool& regPool; // all Physical Reg Operands 140 141 PhyRegUsage(PhyRegUsageParms&); 142 143 bool isOverlapValid(unsigned int, unsigned int); 144 setWeakEdgeUse(unsigned int reg,uint8_t index)145 void setWeakEdgeUse(unsigned int reg, uint8_t index) 146 { 147 // Consider V1 is allocated to r10, r11, r12, r13. 148 // Then following will be set eventually to model 149 // compatible ranges: 150 // weakEdgeUsage[10] = 1; 151 // weakEdgeUsage[11] = 2; 152 // weakEdgeUsage[12] = 3; 153 // weakEdgeUsage[13] = 4; 154 // This means some other compatible range cannot start 155 // at r7, r8, r9, r11, r12, r13. Another compatible range 156 // can either have no overlap at all with this range (strong 157 // edge), or it can start at r10 to have full 158 // overlap (weak edge). 159 weakEdgeUsage[reg] = index; 160 } 161 getWeakEdgeUse(unsigned int reg)162 uint8_t getWeakEdgeUse(unsigned int reg) const 163 { 164 return weakEdgeUsage[reg]; 165 } 166 runOverlapTest(bool t)167 void runOverlapTest(bool t) 168 { 169 overlapTest = t; 170 } 171 ~PhyRegUsage()172 ~PhyRegUsage() 173 { 174 175 } 176 177 bool assignRegs(bool isSIMD16, 178 LiveRange* var, 179 const bool* forbidden, 180 BankAlign align, 181 G4_SubReg_Align subAlign, 182 ColorHeuristic colorHeuristic, 183 float spillCost, 184 bool hintSet); 185 186 bool assignGRFRegsFromBanks(LiveRange* varBasis, 187 BankAlign align, 188 const bool* forbidden, 189 ColorHeuristic heuristic, 190 bool oneGRFBankDivision); 191 192 void markBusyForDclSplit(G4_RegFileKind kind, 193 unsigned regNum, 194 unsigned regOff, 195 unsigned nunits, 196 unsigned numRows); 197 markBusyGRF(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows,bool isPreDefinedVar)198 void markBusyGRF(unsigned regNum, 199 unsigned regOff, 200 unsigned nunits, 201 unsigned numRows, 202 bool isPreDefinedVar) 203 { 204 MUST_BE_TRUE(numRows > 0 && nunits > 0, ERROR_INTERNAL_ARGUMENT); 205 206 MUST_BE_TRUE((regNum + numRows <= maxGRFCanBeUsed) || isPreDefinedVar, 207 ERROR_UNKNOWN); 208 209 // 210 // sub reg allocation (allocation unit is word) 211 // 212 if (numRows == 1 && regOff + nunits < numEltPerGRF<Type_UW>()) 213 { 214 availableGregs[regNum] = false; 215 auto subregMask = getSubregBitMask(regOff, nunits); 216 availableSubRegs[regNum] &= ~subregMask; 217 } 218 else // allocate whole registers 219 { 220 for (unsigned i = 0; i < numRows; i++) 221 { 222 availableGregs[regNum + i] = false; 223 if (getGRFSize() == 64) 224 availableSubRegs[regNum + i] = 0; 225 else 226 availableSubRegs[regNum + i] = 0xffff0000; 227 } 228 } 229 } 230 markBusyAddress(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)231 void markBusyAddress(unsigned regNum, 232 unsigned regOff, 233 unsigned nunits, 234 unsigned numRows) 235 { 236 MUST_BE_TRUE(regNum == 0 && regOff + nunits <= getNumAddrRegisters(), 237 ERROR_UNKNOWN); 238 for (unsigned i = regOff; i < regOff + nunits; i++) 239 availableAddrs[i] = false; 240 } 241 markBusyFlag(unsigned regNum,unsigned regOff,unsigned nunits,unsigned numRows)242 void markBusyFlag(unsigned regNum, 243 unsigned regOff, 244 unsigned nunits, 245 unsigned numRows) 246 { 247 for (unsigned i = regOff; i < regOff + nunits; i++) 248 availableFlags[i] = false; 249 } 250 numAllocUnit(unsigned nelems,G4_Type ty)251 static unsigned numAllocUnit(unsigned nelems, G4_Type ty) 252 { 253 // 254 // we allocate sub reg in 2-byte granularity 255 // 256 unsigned nbytes = nelems* TypeSize(ty); 257 return nbytes/G4_WSIZE + nbytes%G4_WSIZE; 258 } 259 260 // translate offset to allocUnit offsetAllocUnit(unsigned nelems,G4_Type ty)261 static unsigned offsetAllocUnit(unsigned nelems, G4_Type ty) 262 { 263 264 unsigned nbytes = nelems * TypeSize(ty); 265 //RA allocate register in unit of G4_WSIZE bytes 266 //pre-assigned register may start from nbytes%G4_WSIZE != 0, i.e, within an allocUnit 267 return nbytes/G4_WSIZE; 268 } 269 270 void updateRegUsage(LiveRange* lr); 271 getSubregBitMask(uint32_t start,uint32_t num)272 uint32_t getSubregBitMask(uint32_t start, uint32_t num) const 273 { 274 MUST_BE_TRUE(num > 0 && start+num <= numEltPerGRF<Type_UW>(), "illegal number of words"); 275 uint32_t mask = ((1 << num) - 1) << start; 276 277 return (uint32_t) mask; 278 } 279 emit(std::ostream & output)280 void emit(std::ostream& output) 281 { 282 output << "available GRFs: "; 283 for (unsigned int i = 0; i < totalGRFNum; i++) 284 { 285 if (availableGregs[i]) 286 { 287 output << i << " "; 288 } 289 } 290 output << "\n"; 291 } 292 293 private: 294 295 void freeRegs(LiveRange* var); 296 297 bool findContiguousAddrFlag(bool availRegs[], 298 const bool forbidden[], 299 G4_SubReg_Align subAlign, 300 unsigned numRegNeeded, 301 unsigned maxRegs, 302 unsigned& startReg, // inout 303 unsigned& idx, // output 304 bool isCalleeSaveBias = false, 305 bool isEOTSrc = false); 306 307 bool findContiguousGRFFromBanks(G4_Declare *dcl, bool availRegs[], 308 const bool forbidden[], 309 BankAlign align, 310 unsigned& idx, 311 bool oneGRFBankDivision); 312 313 unsigned short getOccupiedBundle(const G4_Declare* dcl) const; 314 315 // find contiguous free words in a registers 316 int findContiguousWords(uint32_t words, G4_SubReg_Align alignment, int numWord) const; 317 bool findContiguousGRF(bool availRegs[], const bool forbidden[], unsigned occupiedBundles, BankAlign align, 318 unsigned numRegNeeded, unsigned maxRegs, unsigned & startPos, unsigned & idx, bool isCalleeSaveBias, bool isEOTSrc); 319 }; 320 } 321 #endif // __PHYREGUSAGE_H__ 322