1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef IGA_MODELS_HPP
10 #define IGA_MODELS_HPP
11 
12 #include "../asserts.hpp"
13 #include "../IR/Types.hpp"
14 #include "OpSpec.hpp"
15 
16 #include <cstddef>
17 #include <cstdint>
18 
19 namespace iga
20 {
21 
22     struct RegInfo {
23         // the register name corresponding to this platform
24         RegName     regName;
25         // the lexical identifier for this register
26         const char *syntax;
27         // a description of this register
28         // e.g. "State Register"
29         const char *desc;
30 
31         // For GRF this is nothing.
32         // For ARF this is RegNum[7:4].
33         // The value is unshifted.  I.e. only 4 bits.
34         //
35         // RegNum[3:0] usually holds the register number itself for
36         // the particular ARF.  E.g. acc1 has 0001b there.
37         // The exception is mme, which maps to acc2-9 on some platforms and
38         // other accumulators elsewhere.
39         uint32_t    regNum7_4;
40         // The amount to add to the register number given to set
41         // RegNum[3:0].  For most this will be 0, but a MMR will be offset
42         // within the ACC space (since they are shared)
43         int         regNumBase;
44 
45         // platform where this was first introduced
46         Platform    platIntrd;
47         // platform where this was last used
48         Platform    platLast;
49         // access granularity (in bytes)
50         int         accGran;
51 
52         // number of registers
53         // Zero 0 means no reg. number and the register has only 1
54         // e.g. "ce" instead of "ce0"
55         int         numRegs;
56         // The number of bytes in each subregister
57         // Certain registers are kind of wonky and have uneven sized registers
58         int         numBytesPerReg[16];
59 
isRegNumberValidiga::RegInfo60         bool isRegNumberValid(int reg) const {
61             // wonky because null and sp have "0" registers (meaning 1 implied)
62             // so reg==0 is alway valid for everyone
63             return reg == 0 ||
64                 (reg >= 0 && reg < numRegs); // otherwise: one of several registers
65         }
isSubRegByteOffsetValidiga::RegInfo66         bool isSubRegByteOffsetValid(
67             int regNum, int subregByte, int grfSize) const
68         {
69             int regBytes = 0;
70             // special handling null register that we may use sub-register number for other
71             // purposes. At those cases the sub register number of null should not excceed GRF bound.
72             if (regName == RegName::ARF_NULL) {
73                 regBytes = grfSize;
74             } else {
75                 regBytes = regName == RegName::GRF_R ? grfSize :
76                     numBytesPerReg[regNum];
77             }
78             return subregByte < regBytes;
79         }
80 
supportsRegioningiga::RegInfo81         bool supportsRegioning() const {
82             // needs to be more liberal than before
83             // context save and restore seems to region some of the
84             // non-regionable registers
85             return regName == RegName::ARF_NULL || hasSubregs();
86         }
supportedOniga::RegInfo87         bool supportedOn(Platform p) const {
88             return platIntrd <= p && p <= platLast;
89         }
hasRegNumiga::RegInfo90         bool hasRegNum() const {
91             return numRegs > 0; // e.g. "cr0" or "r13" VS "ce" or "null"
92         }
hasSubregsiga::RegInfo93         bool hasSubregs() const {
94             switch (regName) {
95             case RegName::ARF_IP:
96             case RegName::ARF_CE:
97             case RegName::ARF_NULL:
98                 return false;
99             default:
100                 return true;
101             }
102         }
103 
getNumRegiga::RegInfo104         int getNumReg() const { return numRegs; }
105 
106         bool encode(int reg, uint8_t &regNumBits) const;
107         bool decode(uint8_t regNumBits, int &reg) const;
108     }; // RegInfo
109 
110     // returns the table for all platforms.
111     // Most users should try and use one of the Model::lookupXXX methods
112     const RegInfo *GetRegisterSpecificationTable(int &len);
113 
114     // See IRChecker.cpp / checkDst (these ARFs need {Switch})
arfNeedsSwitch(RegName rn)115     static inline bool arfNeedsSwitch(RegName rn) {
116         // ARFs without a scoreboard need {Switch} before the write
117         //  Registers with scoreboard and no switch required -
118         //    Accumulator/address register/flag register/notify register
119         //  Registers without scoreboard and switch required -
120         //    Control Register/State Register/Stack Pointer/Timestamp/Pause/IP register
121         //  CE is read only.
122         //  FC does not require switch. It is read/write only in CSR SIP Routine.
123         //  TDR does not require switch. It is read/write only in CSR SIP Routine.
124 
125         switch (rn) {
126         case RegName::ARF_CR:
127         case RegName::ARF_DBG:
128         case RegName::ARF_IP:
129         case RegName::ARF_SP:
130         case RegName::ARF_SR:
131         case RegName::ARF_TM:
132             return true;
133         default:
134             return false;
135         }
136     }
137 
IsRegisterScaled(RegName regName,const Platform platform)138     static inline bool IsRegisterScaled(RegName regName, const Platform platform)
139     {
140         switch (regName)
141         {
142         case RegName::GRF_R:
143         case RegName::ARF_NULL:
144         case RegName::ARF_A:
145         case RegName::ARF_ACC:
146         case RegName::ARF_MME:
147         case RegName::ARF_TM:
148         case RegName::ARF_CR:
149         case RegName::ARF_SP:
150         case RegName::ARF_F:
151         case RegName::ARF_N:
152         case RegName::ARF_DBG:
153         case RegName::ARF_SR:
154         case RegName::ARF_TDR:
155             return true;
156 
157         case RegName::ARF_FC:
158         case RegName::ARF_MSG:
159             return false;
160 
161         default:
162             return false;
163         }
164     }
165 
166     // helper function to translate offset in binary to subReg Num
BinaryOffsetToSubReg(uint32_t offset,RegName regName,Type type,const Platform platform)167     static inline uint8_t BinaryOffsetToSubReg(
168         uint32_t offset, RegName regName, Type type, const Platform platform)
169     {
170         if (!IsRegisterScaled(regName, platform) || type == Type::INVALID) {
171             // special handle non-scaled ARF on XeHPC+:
172             // the sub reg number should be taken from Src0.SubReg[5:1]
173             if (platform >= Platform::XE_HPC)
174                 offset = offset >> 1;
175             return (uint8_t)offset;
176         }
177         auto tsh = TypeSizeShiftsOffsetToSubreg(type);
178         return (uint8_t)((offset << std::get<0>(tsh)) >> std::get<1>(tsh));
179     }
180 
181     // helper functions to translate subReg number to Offset in binary
182     // for regular cases (grf, flag, ...), the offset in binary is the byte offst of the sub-reg
183     // for the special cases (fc), the offset depends.
SubRegToBinaryOffset(int subRegNum,RegName regName,Type type,const Platform platform)184     static inline uint32_t SubRegToBinaryOffset(
185         int subRegNum, RegName regName, Type type, const Platform platform)
186     {
187         if (!IsRegisterScaled(regName, platform) || type == Type::INVALID) {
188             // special handle non-scaled ARF on XeHPC+: Src0.SubReg[0] must be 0
189             // the given sub reg number should be set to Src0.SubReg[5:1]
190             if (platform >= Platform::XE_HPC)
191                 subRegNum = subRegNum << 1;
192             return subRegNum;
193         }
194         auto tsh = TypeSizeShiftsOffsetToSubreg(type);
195         // NOTE: flipped tuple access (1 <-> 0) since we are unscaling
196         return (subRegNum << std::get<1>(tsh)) >> std::get<0>(tsh);
197     }
198 
199     // enables abstract iteration of all OpSpecs in the Model
200     // see Model::ops()
201     class OpSpecTableIterator {
202         Op curr;
203         const OpSpec *const opsArray;
advanceToNextValid()204         void advanceToNextValid() {
205             advance(); // advance at least one
206             while (curr <= Op::LAST_OP && !currValid()) {
207                 advance();
208             }
209         }
currValid() const210         bool currValid() const { return opsArray[(int)curr].isValid(); }
advance()211         void advance() {
212             if (curr <= Op::LAST_OP) {
213                 curr = (Op)((int)curr + 1);
214             }
215         }
216     public:
OpSpecTableIterator(const OpSpec * const ops,Op from)217         OpSpecTableIterator(const OpSpec *const ops, Op from)
218             : curr(from), opsArray(ops)
219         {
220             if (!currValid()) { // if FIRST_OP is bogus, go to next
221                 advanceToNextValid();
222             }
223         }
operator ==(const OpSpecTableIterator & rhs) const224         bool operator==(const OpSpecTableIterator &rhs) const {
225             return curr == rhs.curr;
226         }
operator !=(const OpSpecTableIterator & rhs) const227         bool operator!=(const OpSpecTableIterator &rhs) const {
228             return !(*this == rhs);
229         }
230         // OpSpecTableIterator  operator++(int); // post-increment
operator ++()231         OpSpecTableIterator&  operator++() { // pre-increment
232             advanceToNextValid();
233             return *this;
234         }
operator *() const235         const OpSpec *operator *() const {
236             return opsArray + (int)curr;
237         }
238     };
239     class OpSpecTableWalker {
240     private:
241         const OpSpec *const opsArray;
242         OpSpecTableIterator end_itr;
243     public:
OpSpecTableWalker(const OpSpec * const ops)244         OpSpecTableWalker(const OpSpec *const ops)
245             : opsArray(ops)
246             , end_itr(ops, (Op)((int)Op::LAST_OP + 1))
247         {
248         }
begin() const249         OpSpecTableIterator begin() const {
250             return OpSpecTableIterator(opsArray, Op::FIRST_OP);
251         }
end() const252         const OpSpecTableIterator &end() const { return end_itr; }
253     };
254 
255 
256     // error info if we fail to resolve the OpSpec
257     // only valid if `decoodeOpSpec` returns nullptr
258     struct OpSpecMissInfo
259     {
260         uint64_t     opcode;  // the opcode bits we tried to lookup
261                               // for subfunctions (e.g. math.*), this'll be
262                               // the subfunction bits we were looking for
263     };
264 
265 
266     // Corresponds to a platform model (e.g. GEN9)
267     // Has methods to lookup the various operations (OpSpec's) by name
268     // opcode value (7 bit encoding), and enumeration value (Op).
269     struct Model {
270         Platform             platform;
271 
272         // the table of supported ops for this model indexed by iga::Op
273         const OpSpec        *const opsArray;
274         //
275         // file ext (e.g. "12p1" for TGL)
276         ModelString          extension;
277         //
278         // various platform names we match for this model (e.g. "TGL")
279         static const size_t  MAX_NAMES = 6;
280         ModelString          names[MAX_NAMES];
281 
Modeliga::Model282         constexpr Model(
283             Platform p,
284             const OpSpec *const opsArr,
285             ModelString ext,
286             ModelString name0,
287             ModelString name1 = ModelString(),
288             ModelString name2 = ModelString(),
289             ModelString name3 = ModelString(),
290             ModelString name4 = ModelString(),
291             ModelString name5 = ModelString())
292             : platform(p), opsArray(opsArr), extension(ext)
293             , names{name0, name1, name2, name3, name4, name5}
294         {
295         }
296 
297         /*
298          * Enables iteration of all valid ops in the table in a for all loop
299          * E.g. one would write:
300          *   iga::Model model = ...
301          *   for (const OpSpec *os : model.ops()) {
302          *      IGA_ASSERT(os->isValid(), "all ops walked will be valid");
303          *   }
304          */
opsiga::Model305         OpSpecTableWalker ops() const {return OpSpecTableWalker(opsArray);}
306 
307         const OpSpec&        lookupOpSpec(Op op) const;
308         const OpSpec&        lookupOpSpecByCode(unsigned opcode) const;
309         const OpSpec&        lookupOpSpecFromBits(const void *bits, OpSpecMissInfo &missInfo) const;
310         const RegInfo*       lookupArfRegInfoByRegNum(uint8_t regNum7_0) const;
311         const RegInfo*       lookupRegInfoByRegName(RegName name) const;
312 
313 
314         static const Model  *LookupModel(Platform platform);
315         //
316         // same as above, but asserts if nullptr
317         // useful in cases where we know the platform is valid
318         static const Model  &LookupModelRef(Platform platform);
319 
320 
supportsHwDepsiga::Model321         bool supportsHwDeps() const
322         {
323             return platform <= Platform::GEN11;
324         }
325         // send is unary (sends is binary)
supportsUnarySendiga::Model326         bool supportsUnarySend() const {
327             return supportsHwDeps();
328         }
329         // sends merged with send (send is binary)
supportsUnifiedSendiga::Model330         bool supportsUnifiedSend() const {
331             return !supportsHwDeps();
332         }
333         // registers in control flow is stored in src1 for
334         // certain instructions
supportsSrc1CtrlFlowiga::Model335         bool supportsSrc1CtrlFlow() const {
336             return supportsUnarySend();
337         }
338 
339         // the wait instruction exists
supportsWaitiga::Model340         bool supportsWait() const { return supportsHwDeps(); }
341 
342         // ImplAcc must be Align16
supportsAlign16ImplicitAcciga::Model343         bool supportsAlign16ImplicitAcc() const {
344             return platform <= Platform::GEN10;
345         }
346         // If the GED_ACCESS_MODE is supported
supportsAccessModeiga::Model347         bool supportsAccessMode() const { return supportsAlign16ImplicitAcc(); }
348 
349         // {NoSrcDepSet} allowed
supportNoSrcDepSetiga::Model350         bool supportNoSrcDepSet() const {
351             return platform >= Platform::GEN9 && !supportsUnifiedSend();
352         }
353         // {NoPreempt} allowed
supportsNoPreemptiga::Model354         bool supportsNoPreempt() const {
355             return platform >= Platform::GEN10 && !supportsUnifiedSend();
356         }
357         // implies that:
358         //  - branches don't have types
359         //  - the pc is always relative to pre-inc (even jmpi)
supportsSimplifiedBranchesiga::Model360         bool supportsSimplifiedBranches() const {
361             return platform >= Platform::XE;
362         }
363 
supportsAlign16iga::Model364         bool supportsAlign16() const { return platform <= Platform::GEN10; }
supportsAlign16MacroOnlyiga::Model365         bool supportsAlign16MacroOnly() const { return platform == Platform::GEN10; }
supportsAlign16Ternaryiga::Model366         bool supportsAlign16Ternary() const { return platform < Platform::GEN10; }
supportsAlign16MacroInstiga::Model367         bool supportsAlign16MacroInst() const { return platform <= Platform::GEN10; }
368 
369         uint32_t getNumGRF()      const;
370         uint32_t getNumFlagReg()  const;
371         uint32_t getGRFByteSize() const;
372 
373         uint32_t getRegCount(RegName rn) const;
374         uint32_t getBytesPerReg(RegName rn) const;
375 
376         /// getSWSBEncodeMode - get the default swsb encoding mode derived from platform
getSWSBEncodeModeiga::Model377         SWSB_ENCODE_MODE getSWSBEncodeMode() const {
378             if (platform == Platform::XE)
379                 return SWSB_ENCODE_MODE::SingleDistPipe;
380             else if (platform == Platform::XE_HP)
381                 return SWSB_ENCODE_MODE::ThreeDistPipe;
382             else if (platform == Platform::XE_HPG)
383                 return SWSB_ENCODE_MODE::ThreeDistPipe;
384             else if (platform == Platform::XE_HPC)
385                 return SWSB_ENCODE_MODE::FourDistPipeReduction; // XE_HPC is XeHPC-Bstep (PVC-XT)
386             return SWSB_ENCODE_MODE::SWSBInvalidMode;
387         }
388 
389         // Get the max number of swsb id
getMaxSWSBTokenNumiga::Model390         uint32_t getMaxSWSBTokenNum() const {
391             switch(getSWSBEncodeMode()) {
392             case SWSB_ENCODE_MODE::SingleDistPipe:
393             case SWSB_ENCODE_MODE::ThreeDistPipe:
394                 return 16;
395 
396             case SWSB_ENCODE_MODE::FourDistPipe:
397             case SWSB_ENCODE_MODE::FourDistPipeReduction:
398                 return 32;
399             default:
400                 break;
401             }
402 
403             return 16;
404         }
405 
getSWSBMaxValidDistanceiga::Model406         uint32_t getSWSBMaxValidDistance() const {
407             return 7;
408         }
409 
410         // hasReadModifiedWriteOnByteDst - on the platform having 64-byte
411         // size GRF, we're not able to perform byte write to a GRF.
412         // The platform will read the other half and perform word write.
413         // This will affect swsb setting behavior.
hasReadModifiedWriteOnByteDstiga::Model414         bool hasReadModifiedWriteOnByteDst() const {
415             return getGRFByteSize() == 64;
416         }
417     }; // class Model
418 
419     ///////////////////////////////////////////////////////////////////////////
420     // In rare cases where one must iterate all models
421     extern const Model * const ALL_MODELS[];
422     extern const size_t ALL_MODELS_LEN;
423 } // namespace iga::*
424 
425 #endif // IGA_MODELS_HPP
426