1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef _IGA_REGDEPS_HPP 10 #define _IGA_REGDEPS_HPP 11 12 #include "BitSet.hpp" 13 #include "Instruction.hpp" 14 #include "Kernel.hpp" // BlockList TODO: move to Blocks.hpp (elide Encoder def) 15 16 #include <ostream> 17 #include <vector> 18 #include <map> 19 20 21 namespace iga 22 { 23 24 enum class DEP_TYPE 25 { 26 NONE, 27 READ, 28 WRITE, 29 WRITE_ALWAYS_INTERFERE, 30 READ_ALWAYS_INTERFERE 31 }; 32 33 enum class DEP_PIPE 34 { 35 NONE, 36 // TGL 37 SHORT, 38 LONG, 39 CONTROL_FLOW, // TGL only 40 SEND, 41 MATH, 42 // XeHP+ 43 FLOAT, 44 INTEGER, 45 LONG64, 46 DPAS, 47 SEND_SLM, // XeHPG LSC SLM message 48 SEND_UNKNOWN, // XeHPG LSC desc is indirect, not sure if it's SLM 49 MATH_INORDER // XeHPC 50 }; 51 52 enum class DEP_CLASS 53 { 54 NONE, 55 IN_ORDER, 56 OUT_OF_ORDER, 57 OTHER 58 }; 59 struct SBID 60 { 61 uint32_t sbid = 0; 62 DEP_TYPE dType = DEP_TYPE::NONE; 63 bool isFree = true; 64 SBIDiga::SBID65 SBID() {} 66 SBIDiga::SBID67 SBID(uint32_t id, bool isfree, DEP_TYPE type) 68 : sbid(id), dType(type), isFree(isfree) { } 69 resetiga::SBID70 void reset() 71 { 72 sbid = 0; 73 dType = DEP_TYPE::NONE; 74 isFree = true; 75 } 76 }; 77 78 class DepSetBuilder; 79 80 // A sort of bit set for representing 81 // BITs per 32-byte GRF register 82 // 1 would be register granularity only 83 // 8 would be DWORD granularity 84 // 85 class DepSet { 86 friend DepSetBuilder; 87 public: 88 struct InstIDs { 89 public: InstIDsiga::DepSet::InstIDs90 InstIDs() {} InstIDsiga::DepSet::InstIDs91 InstIDs(uint32_t global_id, 92 uint32_t in_order_id) 93 : global(global_id), 94 inOrder(in_order_id) 95 {} 96 InstIDsiga::DepSet::InstIDs97 InstIDs(uint32_t global_id, 98 uint32_t in_order_id, 99 uint32_t float_pipe_id, 100 uint32_t int_pipe_id, 101 uint32_t long_pipe_id, 102 uint32_t math_pipe_id) 103 : global(global_id), 104 inOrder(in_order_id), 105 floatPipe(float_pipe_id), 106 intPipe(int_pipe_id), 107 longPipe(long_pipe_id), 108 mathPipe(math_pipe_id) 109 { } 110 111 // unique id for all instructions 112 uint32_t global = 0; 113 // id counter for all in-order instructions 114 uint32_t inOrder = 0; 115 // id coutner for float pipe 116 uint32_t floatPipe = 0; 117 // id counter for short pipe 118 uint32_t intPipe = 0; 119 // id counter for Long pipe 120 uint32_t longPipe = 0; 121 // id counter for in-order math pipe 122 uint32_t mathPipe = 0; 123 }; 124 125 private: 126 DepSet(const InstIDs& inst_id_counter, const DepSetBuilder& dsb); ~DepSet()127 ~DepSet() 128 { 129 delete bits; 130 } 131 DepSet(const DepSet& ds) = delete; 132 DepSet& operator=(DepSet const&) = delete; 133 134 public: 135 // FIXME: shold be moved to DepSetBuilder 136 uint32_t addressOf(RegName rn, const RegRef &rr, uint32_t typeSizeBytes) const; 137 bool isRegTracked(RegName rn) const; 138 139 void setDstRegion( 140 RegName rn, 141 RegRef rr, 142 Region r, 143 uint32_t execSize, 144 uint32_t typeSizeBits); 145 void setSrcRegion( 146 RegName rn, 147 RegRef rr, 148 Region r, 149 uint32_t execSize, 150 uint32_t typeSizeBits); 151 setDepType(DEP_TYPE type)152 void setDepType(DEP_TYPE type){ m_dType = type; } setHasIndirect()153 void setHasIndirect() { m_hasIndirect = true; } setHasSR()154 void setHasSR() { m_hasSR = true; } 155 156 void addGrf(size_t reg); 157 void addGrfBytes(size_t reg, size_t subRegBytes, size_t bytes); addA_W(RegRef rr)158 void addA_W(RegRef rr) { addABytes(rr.regNum, 2* (size_t)rr.subRegNum, 2); } addA_D(RegRef rr)159 void addA_D(RegRef rr) { addABytes(rr.regNum, 4* (size_t)rr.subRegNum, 4); } 160 void addABytes(size_t reg, size_t subregBytes, size_t bytes); 161 void addFBytes(size_t fByteOff, size_t bytes); addToBucket(uint32_t regNum)162 void addToBucket(uint32_t regNum){ m_bucketList.push_back(regNum); } setDepPipe(DEP_PIPE pipe)163 void setDepPipe(DEP_PIPE pipe) { m_dPipe = pipe; } setDepClass(DEP_CLASS cls)164 void setDepClass(DEP_CLASS cls) { m_dClass = cls; } setSBID(SBID & sw)165 void setSBID(SBID &sw) { m_sbid = sw; } 166 empty() const167 bool empty() const { return bits->empty(); } reset()168 void reset() { bits->reset(); } intersects(const DepSet & rhs) const169 bool intersects(const DepSet &rhs) const { return bits->intersects(*rhs.bits); } getDepType() const170 DEP_TYPE getDepType() const { return m_dType; } hasIndirect() const171 bool hasIndirect() const { return m_hasIndirect; } hasSR() const172 bool hasSR() const { return m_hasSR; } getInstruction() const173 const Instruction* getInstruction() const { return m_instruction; } getInstIDs() const174 const InstIDs& getInstIDs() const { return m_InstIDs; } getInstGlobalID() const175 size_t getInstGlobalID() const { return m_InstIDs.global; } 176 getDepPipe() const177 DEP_PIPE getDepPipe() const { return m_dPipe; } getDepClass() const178 DEP_CLASS getDepClass() const { return m_dClass; } getSBID() const179 SBID getSBID() const { return m_sbid; } 180 void str(std::ostream &os) const; 181 std::string str() const; 182 bool destructiveSubtract(const DepSet &rhs); 183 getBuckets() const184 const std::vector<size_t>& getBuckets() const { return m_bucketList; } getBitSet() const185 const BitSet<>& getBitSet() const { return *bits; } getBitSetVol()186 BitSet<>& getBitSetVol() { return *bits; } 187 getCompanion() const188 const DepSet* getCompanion() const { return m_companion; } getCompanion()189 DepSet* getCompanion() { return m_companion; } 190 setCompanion(DepSet * companion)191 void setCompanion(DepSet* companion) { m_companion = companion; } 192 193 private: 194 void setInputsFlagDep(); 195 void setInputsSrcDep(); 196 197 void setOutputsFlagDep(); 198 void setOutputsDstcDep(); 199 void setMathWAOutputsDstcDep(); 200 201 typedef std::pair<uint32_t, uint32_t> RegRangeType; 202 typedef std::vector<RegRangeType> RegRangeListType; 203 204 // Set the bits to this DepSet with the given reg_range 205 void addDependency(const RegRangeType& reg_range); 206 void addDependency(const RegRangeListType& reg_range); 207 208 // get the used src registers of given dpas inst 209 // Return: in reg_range each pair denotes start and upper grf reg number of each src 210 // extra_regs - the extra register footprint required by HW workaround: treat Src2 as dpas.8x8 211 // when calculating register footpring. extra_regs only affect external dependency 212 // and will not apply to calculating dpas/macro internal dependency 213 void getDpasSrcDependency(const Instruction &inst, RegRangeListType& reg_range, 214 RegRangeListType& extra_regs, const Model& model); 215 216 // get the used dst registers of given dpas inst 217 // Return: in reg_range denotes the start and upper grf reg number 218 void getDpasDstDependency(const Instruction &inst, RegRangeType& reg_range); 219 220 // helper function to get dpas src register footpring upper bound 221 uint32_t getDPASSrcDepUpBound(unsigned idx, Type srcType, uint32_t execSize, uint32_t lowBound, 222 uint32_t systolicDepth, uint32_t repeatCount, uint32_t opsPerChan); 223 // helper function to get dpas OPS_PER_CHAN 224 uint32_t getDPASOpsPerChan(Type src1_ty, Type src2_ty); 225 private: 226 const Instruction* m_instruction; 227 228 // track the inst id counters when it reach to this instruction 229 const InstIDs m_InstIDs; 230 231 const DepSetBuilder& m_DB; 232 233 DEP_TYPE m_dType; 234 DEP_PIPE m_dPipe; 235 DEP_CLASS m_dClass; 236 BitSet<>* bits; 237 std::vector<size_t> m_bucketList; 238 SBID m_sbid; 239 bool m_hasIndirect; 240 // set true if the instruction has access the special registers: CR, CE, SR 241 // In this case we cannot be sure which register it is actually affect, 242 // will need to sync all pipes 243 bool m_hasSR; 244 void formatShortReg( 245 std::ostream &os, 246 bool &first, 247 const char *reg_name, 248 size_t reg_num, 249 size_t reg_start, 250 size_t reg_len) const; 251 252 // There are always two DepSet (input and output) for an instruction. Here 253 // record the compnion DepSet that create for the same instruction with this 254 // DepSet. This is for the use of when we clear an in-order instruction's 255 // dependency, we'd like to clear its both input and output DepSets 256 DepSet* m_companion = nullptr; 257 }; 258 259 /// DepSetBuilder - create the DepSet, also keep track of Model dependend register info 260 class DepSetBuilder { 261 public: 262 typedef DepSet::InstIDs InstIDs; 263 DepSetBuilder(const Model & model)264 DepSetBuilder(const Model& model) 265 : GRF_REGS(model.getNumGRF()), 266 GRF_BYTES_PER_REG(model.getGRFByteSize()), 267 ARF_F_REGS(model.getNumFlagReg()), 268 mPlatformModel(model) 269 {} 270 ~DepSetBuilder()271 ~DepSetBuilder() 272 { 273 for (auto ds : mAllDepSet) 274 delete ds; 275 } 276 277 public: 278 // DepSet creater 279 /// createSrcDepSet - create DepSet for src operands of instruction i 280 DepSet* createSrcDepSet(const Instruction &i, const InstIDs& inst_id_counter, 281 SWSB_ENCODE_MODE enc_mode); 282 /// createDstDepSet - create DepSet for dst operands of instruction i 283 DepSet* createDstDepSet(const Instruction &i, const InstIDs& inst_id_counter, 284 SWSB_ENCODE_MODE enc_mode); 285 286 /// createSrcDstDepSetForDpas - Find the DPAS macro and set the dependency for input and output 287 /// return the created DepSets for input(src) and output(dst) 288 std::pair<DepSet*, DepSet*> createDPASSrcDstDepSet( 289 const InstList& insList, InstListIterator instIt, const InstIDs& inst_id_counter, 290 size_t& dpasCnt, SWSB_ENCODE_MODE enc_mode); 291 292 /// mathDstWA - this will return the DepSet with math's dst region, and force to occupy the 293 /// entire registers no matter what the region and channel are. e.g. if dst is r1.3, it'll 294 /// occupy the entire r1 295 /// This is the WA to fix the HW read suppression issue 296 DepSet* createMathDstWADepSet(const Instruction &i, const InstIDs& inst_id_counter, 297 SWSB_ENCODE_MODE enc_mode); 298 299 300 // Register File Size Info getGRF_REGS() const301 uint32_t getGRF_REGS() const { return GRF_REGS; } getGRF_BYTES_PER_REG() const302 uint32_t getGRF_BYTES_PER_REG() const { return GRF_BYTES_PER_REG; } getGRF_LEN() const303 uint32_t getGRF_LEN() const { return GRF_REGS * GRF_BYTES_PER_REG; } 304 getARF_A_BYTES_PER_REG() const305 uint32_t getARF_A_BYTES_PER_REG() const { return ARF_A_BYTES_PER_REG; } getARF_A_REGS() const306 uint32_t getARF_A_REGS() const { return ARF_A_REGS; } getARF_A_LEN() const307 uint32_t getARF_A_LEN() const { return ARF_A_REGS * ARF_A_BYTES_PER_REG; } 308 getARF_ACC_REGS() const309 uint32_t getARF_ACC_REGS() const { return ARF_ACC_REGS; } getARF_ACC_BYTES_PER_REG() const310 uint32_t getARF_ACC_BYTES_PER_REG() const { return ARF_ACC_BYTES_PER_REG; } getARF_ACC_LEN() const311 uint32_t getARF_ACC_LEN() const { return ARF_ACC_REGS * ARF_ACC_BYTES_PER_REG; } 312 getARF_F_REGS() const313 uint32_t getARF_F_REGS() const { return ARF_F_REGS; } getARF_F_BYTES_PER_REG() const314 uint32_t getARF_F_BYTES_PER_REG() const { return ARF_F_BYTES_PER_REG; } getARF_F_LEN() const315 uint32_t getARF_F_LEN() const { return ARF_F_REGS * ARF_F_BYTES_PER_REG; } 316 getARF_SPECIAL_REGS() const317 uint32_t getARF_SPECIAL_REGS() const { return ARF_SPECIAL_REGS; } getARF_SPECIAL_BYTES_PER_REG() const318 uint32_t getARF_SPECIAL_BYTES_PER_REG() const { return ARF_SPECIAL_BYTES_PER_REG; } getARF_SPECIAL_LEN() const319 uint32_t getARF_SPECIAL_LEN() const { return ARF_SPECIAL_REGS * ARF_SPECIAL_BYTES_PER_REG; } 320 getGRF_START() const321 uint32_t getGRF_START() const { return 0; } getARF_A_START() const322 uint32_t getARF_A_START() const { return ALIGN_UP_TO(32, getGRF_START() + getGRF_LEN()); } getARF_ACC_START() const323 uint32_t getARF_ACC_START() const { return ALIGN_UP_TO(32, getARF_A_START() + getARF_A_LEN()); } getARF_F_START() const324 uint32_t getARF_F_START() const { return ALIGN_UP_TO(32, getARF_ACC_START() + getARF_ACC_LEN()); } getARF_SPECIAL_START() const325 uint32_t getARF_SPECIAL_START() const { return ALIGN_UP_TO(32, getARF_F_START() + getARF_F_LEN()); } getTOTAL_END() const326 uint32_t getTOTAL_END() const { return ALIGN_UP_TO(32, getARF_SPECIAL_START() + getARF_SPECIAL_LEN()); } 327 getTOTAL_BITS() const328 uint32_t getTOTAL_BITS() const { return getTOTAL_END(); } getBYTES_PER_BUCKET() const329 uint32_t getBYTES_PER_BUCKET() const { return getGRF_BYTES_PER_REG(); } getTOTAL_BUCKETS() const330 uint32_t getTOTAL_BUCKETS() const { return (getTOTAL_BITS() / getBYTES_PER_BUCKET()) + 1; } 331 getBucketStart(RegName regname) const332 uint32_t getBucketStart(RegName regname) const 333 { 334 uint32_t bucket = 0; 335 switch (regname) 336 { 337 case iga::RegName::GRF_R: 338 bucket = getGRF_START() / getBYTES_PER_BUCKET(); 339 break; 340 case iga::RegName::ARF_A: 341 bucket = getARF_A_START() / getBYTES_PER_BUCKET(); 342 break; 343 case iga::RegName::ARF_ACC: 344 bucket = getARF_ACC_START() / getBYTES_PER_BUCKET(); 345 break; 346 case iga::RegName::ARF_F: 347 bucket = getARF_F_START() / getBYTES_PER_BUCKET(); 348 break; 349 case RegName::ARF_CR: 350 case RegName::ARF_SR: 351 bucket = getARF_SPECIAL_START() / getBYTES_PER_BUCKET(); 352 break; 353 default: 354 //putting rest of archtecture registers in to same bucket 355 bucket = getARF_F_START() / 32; 356 break; 357 } 358 return bucket; 359 } 360 361 private: 362 // ASSUMES: byte-level tracking for all elements 363 364 // FIXME: Some info taken from model, some are hardcoded 365 const uint32_t GRF_REGS; 366 const uint32_t GRF_BYTES_PER_REG; 367 368 const uint32_t ARF_A_BYTES_PER_REG = 32; 369 const uint32_t ARF_A_REGS = 1; 370 371 const uint32_t ARF_ACC_REGS = 12; 372 const uint32_t ARF_ACC_BYTES_PER_REG = 32; 373 374 const uint32_t ARF_F_REGS; 375 const uint32_t ARF_F_BYTES_PER_REG = 4; 376 377 //for registers that migh thave indirect dependence like CR and SR 378 const uint32_t ARF_SPECIAL_REGS = 2; 379 const uint32_t ARF_SPECIAL_BYTES_PER_REG = 4; 380 381 private: 382 // Track all the created DepSet for deletion 383 std::vector<DepSet*> mAllDepSet; 384 385 const Model &mPlatformModel; 386 }; 387 388 } 389 #endif // _IGA_REGDEPS_HPP 390