1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _IGA_REGDEPS_HPP
10 #define _IGA_REGDEPS_HPP
11 
12 #include "BitSet.hpp"
13 #include "Instruction.hpp"
14 #include "Kernel.hpp" // BlockList TODO: move to Blocks.hpp (elide Encoder def)
15 
16 #include <ostream>
17 #include <vector>
18 #include <map>
19 
20 
21 namespace iga
22 {
23 
24 enum class DEP_TYPE
25 {
26     NONE,
27     READ,
28     WRITE,
29     WRITE_ALWAYS_INTERFERE,
30     READ_ALWAYS_INTERFERE
31 };
32 
33 enum class DEP_PIPE
34 {
35     NONE,
36     // TGL
37     SHORT,
38     LONG,
39     CONTROL_FLOW, // TGL only
40     SEND,
41     MATH,
42     // XeHP+
43     FLOAT,
44     INTEGER,
45     LONG64,
46     DPAS,
47     SEND_SLM,     // XeHPG LSC SLM message
48     SEND_UNKNOWN, // XeHPG LSC desc is indirect, not sure if it's SLM
49     MATH_INORDER // XeHPC
50 };
51 
52 enum class DEP_CLASS
53 {
54     NONE,
55     IN_ORDER,
56     OUT_OF_ORDER,
57     OTHER
58 };
59 struct SBID
60 {
61     uint32_t sbid = 0;
62     DEP_TYPE dType = DEP_TYPE::NONE;
63     bool isFree = true;
64 
SBIDiga::SBID65     SBID() {}
66 
SBIDiga::SBID67     SBID(uint32_t id, bool isfree, DEP_TYPE type)
68         : sbid(id), dType(type), isFree(isfree) { }
69 
resetiga::SBID70     void reset()
71     {
72         sbid = 0;
73         dType = DEP_TYPE::NONE;
74         isFree = true;
75     }
76 };
77 
78 class DepSetBuilder;
79 
80 // A sort of bit set for representing
81 // BITs per 32-byte GRF register
82 //    1 would be register granularity only
83 //    8 would be DWORD granularity
84 //
85 class DepSet {
86     friend DepSetBuilder;
87 public:
88     struct InstIDs {
89     public:
InstIDsiga::DepSet::InstIDs90         InstIDs() {}
InstIDsiga::DepSet::InstIDs91         InstIDs(uint32_t global_id,
92                 uint32_t in_order_id)
93             : global(global_id),
94               inOrder(in_order_id)
95         {}
96 
InstIDsiga::DepSet::InstIDs97         InstIDs(uint32_t global_id,
98                 uint32_t in_order_id,
99                 uint32_t float_pipe_id,
100                 uint32_t int_pipe_id,
101                 uint32_t long_pipe_id,
102                 uint32_t math_pipe_id)
103           : global(global_id),
104             inOrder(in_order_id),
105             floatPipe(float_pipe_id),
106             intPipe(int_pipe_id),
107             longPipe(long_pipe_id),
108             mathPipe(math_pipe_id)
109         { }
110 
111         // unique id for all instructions
112         uint32_t global = 0;
113         // id counter for all in-order instructions
114         uint32_t inOrder = 0;
115         // id coutner for float pipe
116         uint32_t floatPipe = 0;
117         // id counter for short pipe
118         uint32_t intPipe = 0;
119         // id counter for Long pipe
120         uint32_t longPipe = 0;
121         // id counter for in-order math pipe
122         uint32_t mathPipe = 0;
123     };
124 
125 private:
126     DepSet(const InstIDs& inst_id_counter, const DepSetBuilder& dsb);
~DepSet()127     ~DepSet()
128     {
129         delete bits;
130     }
131     DepSet(const DepSet& ds) = delete;
132     DepSet& operator=(DepSet const&) = delete;
133 
134 public:
135     // FIXME: shold be moved to DepSetBuilder
136     uint32_t addressOf(RegName rn, const RegRef &rr, uint32_t typeSizeBytes) const;
137     bool isRegTracked(RegName rn) const;
138 
139     void setDstRegion(
140         RegName rn,
141         RegRef rr,
142         Region r,
143         uint32_t execSize,
144         uint32_t typeSizeBits);
145     void setSrcRegion(
146         RegName rn,
147         RegRef rr,
148         Region r,
149         uint32_t execSize,
150         uint32_t typeSizeBits);
151 
setDepType(DEP_TYPE type)152     void setDepType(DEP_TYPE type){ m_dType = type; }
setHasIndirect()153     void setHasIndirect() { m_hasIndirect = true; }
setHasSR()154     void setHasSR() { m_hasSR = true; }
155 
156     void addGrf(size_t reg);
157     void addGrfBytes(size_t reg, size_t subRegBytes, size_t bytes);
addA_W(RegRef rr)158     void addA_W(RegRef rr) { addABytes(rr.regNum, 2* (size_t)rr.subRegNum, 2); }
addA_D(RegRef rr)159     void addA_D(RegRef rr) { addABytes(rr.regNum, 4* (size_t)rr.subRegNum, 4); }
160     void addABytes(size_t reg, size_t subregBytes, size_t bytes);
161     void addFBytes(size_t fByteOff, size_t bytes);
addToBucket(uint32_t regNum)162     void addToBucket(uint32_t regNum){ m_bucketList.push_back(regNum); }
setDepPipe(DEP_PIPE pipe)163     void setDepPipe(DEP_PIPE pipe) { m_dPipe = pipe; }
setDepClass(DEP_CLASS cls)164     void setDepClass(DEP_CLASS cls) { m_dClass = cls; }
setSBID(SBID & sw)165     void setSBID(SBID &sw) { m_sbid = sw; }
166 
empty() const167     bool                    empty()                         const { return bits->empty(); }
reset()168     void                    reset() { bits->reset(); }
intersects(const DepSet & rhs) const169     bool                    intersects(const DepSet &rhs)   const { return bits->intersects(*rhs.bits); }
getDepType() const170     DEP_TYPE                getDepType()                    const { return m_dType; }
hasIndirect() const171     bool                    hasIndirect()                   const { return m_hasIndirect; }
hasSR() const172     bool                    hasSR()                         const { return m_hasSR; }
getInstruction() const173     const Instruction*      getInstruction()                const { return m_instruction; }
getInstIDs() const174     const InstIDs&          getInstIDs()                    const { return m_InstIDs; }
getInstGlobalID() const175     size_t                  getInstGlobalID()               const { return m_InstIDs.global; }
176 
getDepPipe() const177     DEP_PIPE                getDepPipe()                    const { return m_dPipe; }
getDepClass() const178     DEP_CLASS               getDepClass()                   const { return m_dClass; }
getSBID() const179     SBID                    getSBID()                       const { return m_sbid; }
180     void                    str(std::ostream &os) const;
181     std::string             str() const;
182     bool                    destructiveSubtract(const DepSet &rhs);
183 
getBuckets() const184     const std::vector<size_t>&  getBuckets() const { return m_bucketList; }
getBitSet() const185     const BitSet<>&   getBitSet() const { return *bits; }
getBitSetVol()186     BitSet<>&         getBitSetVol() { return *bits; }
187 
getCompanion() const188     const DepSet* getCompanion() const { return m_companion; }
getCompanion()189     DepSet* getCompanion() { return m_companion; }
190 
setCompanion(DepSet * companion)191     void setCompanion(DepSet* companion) { m_companion = companion; }
192 
193 private:
194     void setInputsFlagDep();
195     void setInputsSrcDep();
196 
197     void setOutputsFlagDep();
198     void setOutputsDstcDep();
199     void setMathWAOutputsDstcDep();
200 
201     typedef std::pair<uint32_t, uint32_t> RegRangeType;
202     typedef std::vector<RegRangeType> RegRangeListType;
203 
204     // Set the bits to this DepSet with the given reg_range
205     void addDependency(const RegRangeType& reg_range);
206     void addDependency(const RegRangeListType& reg_range);
207 
208     // get the used src registers of given dpas inst
209     // Return: in reg_range each pair denotes start and upper grf reg number of each src
210     // extra_regs - the extra register footprint required by HW workaround: treat Src2 as dpas.8x8
211     //              when calculating register footpring. extra_regs only affect external dependency
212     //              and will not apply to calculating dpas/macro internal dependency
213     void getDpasSrcDependency(const Instruction &inst, RegRangeListType& reg_range,
214         RegRangeListType& extra_regs, const Model& model);
215 
216     // get the used dst registers of given dpas inst
217     // Return: in reg_range denotes the start and upper grf reg number
218     void getDpasDstDependency(const Instruction &inst, RegRangeType& reg_range);
219 
220     // helper function to get dpas src register footpring upper bound
221     uint32_t getDPASSrcDepUpBound(unsigned idx, Type srcType, uint32_t execSize, uint32_t lowBound,
222         uint32_t systolicDepth, uint32_t repeatCount, uint32_t opsPerChan);
223     // helper function to get dpas OPS_PER_CHAN
224     uint32_t getDPASOpsPerChan(Type src1_ty, Type src2_ty);
225 private:
226     const Instruction* m_instruction;
227 
228     // track the inst id counters when it reach to this instruction
229     const InstIDs m_InstIDs;
230 
231     const DepSetBuilder& m_DB;
232 
233     DEP_TYPE m_dType;
234     DEP_PIPE m_dPipe;
235     DEP_CLASS m_dClass;
236     BitSet<>* bits;
237     std::vector<size_t> m_bucketList;
238     SBID m_sbid;
239     bool m_hasIndirect;
240     // set true if the instruction has access the special registers: CR, CE, SR
241     // In this case we cannot be sure which register it is actually affect,
242     // will need to sync all pipes
243     bool m_hasSR;
244     void formatShortReg(
245         std::ostream &os,
246         bool &first,
247         const char *reg_name,
248         size_t reg_num,
249         size_t reg_start,
250         size_t reg_len) const;
251 
252     // There are always two DepSet (input and output) for an instruction. Here
253     // record the compnion DepSet that create for the same instruction with this
254     // DepSet. This is for the use of when we clear an in-order instruction's
255     // dependency, we'd like to clear its both input and output DepSets
256     DepSet* m_companion = nullptr;
257 };
258 
259 /// DepSetBuilder - create the DepSet, also keep track of Model dependend register info
260 class DepSetBuilder {
261 public:
262     typedef DepSet::InstIDs InstIDs;
263 
DepSetBuilder(const Model & model)264     DepSetBuilder(const Model& model)
265         : GRF_REGS(model.getNumGRF()),
266           GRF_BYTES_PER_REG(model.getGRFByteSize()),
267           ARF_F_REGS(model.getNumFlagReg()),
268           mPlatformModel(model)
269     {}
270 
~DepSetBuilder()271     ~DepSetBuilder()
272     {
273         for (auto ds : mAllDepSet)
274             delete ds;
275     }
276 
277 public:
278     // DepSet creater
279     /// createSrcDepSet - create DepSet for src operands of instruction i
280     DepSet* createSrcDepSet(const Instruction &i, const InstIDs& inst_id_counter,
281         SWSB_ENCODE_MODE enc_mode);
282     /// createDstDepSet - create DepSet for dst operands of instruction i
283     DepSet* createDstDepSet(const Instruction &i, const InstIDs& inst_id_counter,
284         SWSB_ENCODE_MODE enc_mode);
285 
286     /// createSrcDstDepSetForDpas - Find the DPAS macro and set the dependency for input and output
287     /// return the created DepSets for input(src) and output(dst)
288     std::pair<DepSet*, DepSet*> createDPASSrcDstDepSet(
289         const InstList& insList, InstListIterator instIt, const InstIDs& inst_id_counter,
290         size_t& dpasCnt, SWSB_ENCODE_MODE enc_mode);
291 
292     /// mathDstWA - this will return the DepSet with math's dst region, and force to occupy the
293     /// entire registers no matter what the region and channel are. e.g. if dst is r1.3, it'll
294     /// occupy the entire r1
295     /// This is the WA to fix the HW read suppression issue
296     DepSet* createMathDstWADepSet(const Instruction &i, const InstIDs& inst_id_counter,
297         SWSB_ENCODE_MODE enc_mode);
298 
299 
300     // Register File Size Info
getGRF_REGS() const301     uint32_t getGRF_REGS()                  const { return GRF_REGS; }
getGRF_BYTES_PER_REG() const302     uint32_t getGRF_BYTES_PER_REG()         const { return GRF_BYTES_PER_REG; }
getGRF_LEN() const303     uint32_t getGRF_LEN()                   const { return GRF_REGS * GRF_BYTES_PER_REG; }
304 
getARF_A_BYTES_PER_REG() const305     uint32_t getARF_A_BYTES_PER_REG()       const { return ARF_A_BYTES_PER_REG; }
getARF_A_REGS() const306     uint32_t getARF_A_REGS()                const { return ARF_A_REGS; }
getARF_A_LEN() const307     uint32_t getARF_A_LEN()                 const { return ARF_A_REGS * ARF_A_BYTES_PER_REG; }
308 
getARF_ACC_REGS() const309     uint32_t getARF_ACC_REGS()              const { return ARF_ACC_REGS; }
getARF_ACC_BYTES_PER_REG() const310     uint32_t getARF_ACC_BYTES_PER_REG()     const { return ARF_ACC_BYTES_PER_REG; }
getARF_ACC_LEN() const311     uint32_t getARF_ACC_LEN()               const { return ARF_ACC_REGS * ARF_ACC_BYTES_PER_REG; }
312 
getARF_F_REGS() const313     uint32_t getARF_F_REGS()                const { return ARF_F_REGS; }
getARF_F_BYTES_PER_REG() const314     uint32_t getARF_F_BYTES_PER_REG()       const { return ARF_F_BYTES_PER_REG; }
getARF_F_LEN() const315     uint32_t getARF_F_LEN()                 const { return ARF_F_REGS * ARF_F_BYTES_PER_REG; }
316 
getARF_SPECIAL_REGS() const317     uint32_t getARF_SPECIAL_REGS()          const { return ARF_SPECIAL_REGS; }
getARF_SPECIAL_BYTES_PER_REG() const318     uint32_t getARF_SPECIAL_BYTES_PER_REG() const { return ARF_SPECIAL_BYTES_PER_REG; }
getARF_SPECIAL_LEN() const319     uint32_t getARF_SPECIAL_LEN()           const { return ARF_SPECIAL_REGS * ARF_SPECIAL_BYTES_PER_REG; }
320 
getGRF_START() const321     uint32_t getGRF_START()                 const { return 0; }
getARF_A_START() const322     uint32_t getARF_A_START()               const { return ALIGN_UP_TO(32, getGRF_START() + getGRF_LEN()); }
getARF_ACC_START() const323     uint32_t getARF_ACC_START()             const { return ALIGN_UP_TO(32, getARF_A_START() + getARF_A_LEN()); }
getARF_F_START() const324     uint32_t getARF_F_START()               const { return ALIGN_UP_TO(32, getARF_ACC_START() + getARF_ACC_LEN()); }
getARF_SPECIAL_START() const325     uint32_t getARF_SPECIAL_START()         const { return ALIGN_UP_TO(32, getARF_F_START() + getARF_F_LEN()); }
getTOTAL_END() const326     uint32_t getTOTAL_END()                 const { return ALIGN_UP_TO(32, getARF_SPECIAL_START() + getARF_SPECIAL_LEN()); }
327 
getTOTAL_BITS() const328     uint32_t getTOTAL_BITS()                const { return getTOTAL_END(); }
getBYTES_PER_BUCKET() const329     uint32_t getBYTES_PER_BUCKET()          const { return getGRF_BYTES_PER_REG(); }
getTOTAL_BUCKETS() const330     uint32_t getTOTAL_BUCKETS()             const { return (getTOTAL_BITS() / getBYTES_PER_BUCKET()) + 1; }
331 
getBucketStart(RegName regname) const332     uint32_t getBucketStart(RegName regname) const
333     {
334         uint32_t bucket = 0;
335         switch (regname)
336         {
337         case iga::RegName::GRF_R:
338             bucket = getGRF_START() / getBYTES_PER_BUCKET();
339             break;
340         case iga::RegName::ARF_A:
341             bucket = getARF_A_START() / getBYTES_PER_BUCKET();
342             break;
343         case iga::RegName::ARF_ACC:
344             bucket = getARF_ACC_START() / getBYTES_PER_BUCKET();
345             break;
346         case iga::RegName::ARF_F:
347             bucket = getARF_F_START() / getBYTES_PER_BUCKET();
348             break;
349         case RegName::ARF_CR:
350         case RegName::ARF_SR:
351             bucket = getARF_SPECIAL_START() / getBYTES_PER_BUCKET();
352             break;
353         default:
354             //putting rest of archtecture registers in to same bucket
355             bucket = getARF_F_START() / 32;
356             break;
357         }
358         return bucket;
359     }
360 
361 private:
362     // ASSUMES: byte-level tracking for all elements
363 
364     // FIXME: Some info taken from model, some are hardcoded
365     const uint32_t GRF_REGS;
366     const uint32_t GRF_BYTES_PER_REG;
367 
368     const uint32_t ARF_A_BYTES_PER_REG = 32;
369     const uint32_t ARF_A_REGS = 1;
370 
371     const uint32_t ARF_ACC_REGS = 12;
372     const uint32_t ARF_ACC_BYTES_PER_REG = 32;
373 
374     const uint32_t ARF_F_REGS;
375     const uint32_t ARF_F_BYTES_PER_REG = 4;
376 
377     //for registers that migh thave indirect dependence like CR and SR
378     const uint32_t ARF_SPECIAL_REGS = 2;
379     const uint32_t ARF_SPECIAL_BYTES_PER_REG = 4;
380 
381 private:
382     // Track all the created DepSet for deletion
383     std::vector<DepSet*> mAllDepSet;
384 
385     const Model &mPlatformModel;
386 };
387 
388 }
389 #endif // _IGA_REGDEPS_HPP
390