1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Models.hpp"
10 
11 // this must precede model*.hpp inclusion below
12 #include "../strings.hpp"
13 
14 
15 // for bxml/Model operand type mappings
16 #define TYPE(T) \
17     ENUM_BITSET_VALUE(T, uint32_t)
18 
19 #include "bxml/ModelGen7p5.hpp"
20 #include "bxml/ModelGen8.hpp"
21 #include "bxml/ModelGen9.hpp"
22 #include "bxml/ModelGen10.hpp"
23 #include "bxml/ModelGen11.hpp"
24 #include "bxml/ModelXe.hpp"
25 #include "bxml/ModelXeHP.hpp"
26 #include "bxml/ModelXeHPG.hpp"
27 #include "bxml/ModelXeHPC.hpp"
28 #include "../asserts.hpp"
29 #include "../bits.hpp"
30 #include "../Backend/Native/MInst.hpp"
31 
32 #include <sstream>
33 #include <iostream>
34 
35 using namespace iga;
36 
37 
38 // full "constructor"
39 #define UNWRAP_TUPLE(...) {__VA_ARGS__}
40 #define IGA_REGISTER_SPEC(\
41     PLAT_LO,PLAT_HI,\
42     REGNAME,SYNTAX,DESCRIPTION,\
43     REGNUM7_4,REGNUM_BASE,\
44     ACC_GRAN,\
45     NUM_REGS,NUM_BYTE_PER_REG) \
46     {REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,PLAT_LO,PLAT_HI,ACC_GRAN,NUM_REGS,UNWRAP_TUPLE NUM_BYTE_PER_REG}
47 // for <= some platform (whatever our lowest platform is
48 #define IGA_REGISTER_SPEC_LE(PLAT_HI,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
49     IGA_REGISTER_SPEC(Platform::GEN6,PLAT_HI,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
50 // for >= some platform (up to the highest)
51 #define IGA_REGISTER_SPEC_GE(PLAT_LO,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
52     IGA_REGISTER_SPEC(PLAT_LO,Platform::FUTURE,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
53 // a specification valid on all platforms
54 #define IGA_REGISTER_SPEC_UNIFORM(REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
55     IGA_REGISTER_SPEC(Platform::GEN6,Platform::FUTURE,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
56 
57 
58 // ordered by encoding of RegNum[7:4]
59 // newest platforms first
60 static const struct RegInfo REGISTER_SPECIFICATIONS[] = {
61     IGA_REGISTER_SPEC_LE(
62         Platform::GEN11,
63         RegName::GRF_R, "r", "General",
64         0, 0,
65         1,
66         128,(0)),
67 
68     IGA_REGISTER_SPEC_GE(
69         Platform::XE,
70         RegName::GRF_R, "r", "General",
71         0,0, // regNum7_4, regNumBase
72         1,   // accGran
73         256,(0)),
74 
75     IGA_REGISTER_SPEC_UNIFORM(
76         RegName::ARF_NULL, "null", "Null",
77         0x0, 0,
78         0,
79         0, (32)),
80     IGA_REGISTER_SPEC_UNIFORM(RegName::ARF_A, "a", "Index",
81         0x1, 0,
82         2,
83         1, (32)),
84 
85     // acc and mme share same RegNum[7:4], mme gets the high registers
86     IGA_REGISTER_SPEC_LE(
87         Platform::GEN11,
88         RegName::ARF_ACC, "acc", "Accumulator",
89         0x2, 0,
90         1,
91         2, (32,32)),
92     IGA_REGISTER_SPEC(Platform::XE, Platform::XE,
93         RegName::ARF_ACC, "acc", "Accumulator",
94         0x2, 0,
95         1,
96         8, (32,32,32,32,32,32,32,32)),
97     IGA_REGISTER_SPEC(Platform::XE_HP, Platform::XE_HP,
98         RegName::ARF_ACC, "acc", "Accumulator",
99         0x2, 0,
100         1,
101         8, (32,32,32,32,32,32,32,32)),
102     IGA_REGISTER_SPEC(Platform::XE_HPG, Platform::XE_HPG,
103         RegName::ARF_ACC, "acc", "Accumulator",
104         0x2, 0,
105         1,
106         8, (32,32,32,32,32,32,32,32)),
107     IGA_REGISTER_SPEC_GE(
108         Platform::XE_HPC,
109         RegName::ARF_ACC, "acc", "Accumulator",
110         0x2, 0,
111         1,
112         16, (64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64)),
113     IGA_REGISTER_SPEC_LE(
114         Platform::GEN11,
115         RegName::ARF_MME, "mme", "Math Macro",
116         0x2, 2, // offset by 2 "acc2-9"
117         4,
118         8, (32,32,32,32,32,32,32,32)),
119     IGA_REGISTER_SPEC(Platform::XE, Platform::XE,
120         RegName::ARF_MME, "mme", "Math Macro",
121         0x2, 8, // offset by 8 "acc8-15"
122         4,
123         8, (32,32,32,32,32,32,32,32)),
124     IGA_REGISTER_SPEC(Platform::XE_HP, Platform::XE_HP,
125         RegName::ARF_MME, "mme", "Math Macro",
126         0x2, 8, // offset by 8 "acc8-15"
127         4,
128         8, (32,32,32,32,32,32,32,32)),
129     IGA_REGISTER_SPEC(Platform::XE_HPG, Platform::XE_HPG,
130         RegName::ARF_MME, "mme", "Math Macro",
131         0x2, 8, // offset by 8 "acc8-15"
132         4,
133         8, (32,32,32,32,32,32,32,32)),
134     IGA_REGISTER_SPEC_GE(
135         Platform::XE_HPC,
136         RegName::ARF_MME, "mme", "Math Macro",
137         0x2, 8, // offset by 8 "acc8-15"
138         4,
139         8, (64,64,64,64,64,64,64,64)),
140 
141     IGA_REGISTER_SPEC_LE(
142         Platform::XE_HPG,
143         RegName::ARF_F, "f", "Flag Register",
144         0x3, 0,
145         2,
146         2, (4,4)),
147     IGA_REGISTER_SPEC_GE(
148         Platform::XE_HPC,
149         RegName::ARF_F, "f", "Flag Register",
150         0x3, 0,
151         2,
152         4, (4,4,4,4)),
153 
154     IGA_REGISTER_SPEC_GE(
155         Platform::GEN7P5,
156         RegName::ARF_CE, "ce", "Channel Enable",
157         0x4, 0,
158         4,
159         0, (4)),
160 
161     IGA_REGISTER_SPEC_LE(
162         Platform::XE_HPC,
163         RegName::ARF_MSG, "msg", "Message Control",
164         0x5, 0,
165         4,
166         8, (4,4,4,4,4,4,4,4)),
167     IGA_REGISTER_SPEC(
168         Platform::GEN7P5, Platform::GEN7P5,
169         RegName::ARF_SP, "sp", "Stack Pointer",
170         0x6, 0,
171         4,
172         0, (2*4)), // two subregisters of 4 bytes each
173     IGA_REGISTER_SPEC(
174         Platform::GEN8, Platform::XE_HPC,
175         RegName::ARF_SP, "sp", "Stack Pointer",
176         0x6, 0,
177         4,
178         0, (2*8)), // two subregisters of 8 bytes each
179 
180 
181     IGA_REGISTER_SPEC_UNIFORM(
182         RegName::ARF_SR, "sr", "State Register",
183         0x7, 0,
184         1,
185         2, (16,16)), // sr{0,1}.{0..3}:d
186     IGA_REGISTER_SPEC_UNIFORM(
187         RegName::ARF_CR, "cr", "Control Register",
188         0x8, 0,
189         4,
190         1, (3*4)), // cr0.{0..2}:d
191 
192         // with SWSB wait n{0,1} replaced by sync.{bar,host}, which
193         // implicitly reference notification registers;
194         // not sure if these are needed in CSR though, so leaving for now
195 
196     IGA_REGISTER_SPEC_UNIFORM(
197         RegName::ARF_N, "n", "Notification Register",
198         0x9, 0,
199         4,
200         1, (3*4)), // n0.{0..2}:d
201 
202     IGA_REGISTER_SPEC_UNIFORM(
203         RegName::ARF_IP, "ip", "Instruction Pointer",
204         0xA, 0,
205         4,
206         0, (4)), // ip
207     IGA_REGISTER_SPEC_UNIFORM(
208         RegName::ARF_TDR, "tdr", "Thread Dependency Register",
209         0xB, 0,
210         2,
211         1, (16)), // tdr0.*
212     IGA_REGISTER_SPEC_GE(
213         Platform::GEN10,
214         RegName::ARF_TM, "tm", "Timestamp Register",
215         0xC, 0,
216         4,
217         1, (5*4)), // tm0.{0..4}:d
218     IGA_REGISTER_SPEC_LE(
219         Platform::GEN9,
220         RegName::ARF_TM, "tm", "Timestamp Register",
221         0xC, 0,
222         4,
223         1, (4*4)), // tm0.{0..3}:d
224 
225     // fc0.0-31  stack-entry 0-31
226     // fc1.0     global counts
227     // fc2.0     top of stack pointers
228     // fc3.0-3   per channel counts
229     // fc4.0     call mask
230     IGA_REGISTER_SPEC(Platform::GEN7P5, Platform::GEN11,
231         RegName::ARF_FC, "fc", "Flow Control",
232         0xD, 0,
233         4,
234         5, (4*32,4*1,4*1,4*4,4*1)),
235     //  EU GOTO/JOIN instruction latency improvement HAS397165 removes two flow control registers
236     // fc0.0-31  per-channel IP
237     // fc1.0     channel enables
238     // fc2       call mask
239     // fc3       JEU fused mask
240     IGA_REGISTER_SPEC(Platform::XE, Platform::XE_HPC,
241         RegName::ARF_FC, "fc", "Flow Control",
242         0xD, 0,
243         4,
244         4, (4*32,4*1,4*1,4*1)),
245     IGA_REGISTER_SPEC(Platform::GEN7, Platform::GEN7P5,
246         RegName::ARF_DBG, "dbg", "Debug",
247         0xF, 0,
248         4,
249         1, (4)), // dbg0.0:ud
250     IGA_REGISTER_SPEC_GE(Platform::GEN8,
251         RegName::ARF_DBG, "dbg", "Debug",
252         0xF, 0,
253         4,
254         1, (2*4)), // dbg0.{0,1}:ud
255 };
256 
lookupOpSpec(Op op) const257 const OpSpec& Model::lookupOpSpec(Op op) const
258 {
259     if (op < Op::FIRST_OP || op > Op::LAST_OP) {
260         // external opspec API can reach this
261         // IGA_ASSERT_FALSE("op out of bounds");
262         return opsArray[(int)Op::INVALID]; // return invalid if assertions are off
263     }
264     return opsArray[(int)op];
265 }
266 
lookupOpSpecByCode(unsigned opcode) const267 const OpSpec& Model::lookupOpSpecByCode(unsigned opcode) const
268 {
269     // if (!opsByCodeValid) {
270     //     for (int i = (int)Op::FIRST_OP; i <= (int)Op::LAST_OP; i++) {
271     //         const OpSpec &os = lookupOpSpec((Op)i);
272     //         if (!os.isSubop()) {
273     //             opsByCode[os.code] = &os;
274     //         }
275     //     }
276     //     opsByCodeValid = true;
277     // }
278     for (int i = (int)Op::FIRST_OP; i <= (int)Op::LAST_OP; i++) {
279         if (opsArray[i].op != Op::INVALID &&
280             opsArray[i].opcode == opcode)
281         {
282             return opsArray[i];
283         }
284     }
285     return opsArray[static_cast<int>(Op::INVALID)];
286 }
287 
288 template <int N>
getBitsFromFragments(const uint64_t * qws,const Fragment ff[N])289 static unsigned getBitsFromFragments(const uint64_t *qws, const Fragment ff[N])
290 {
291     unsigned bits = 0;
292 
293     int off = 0;
294     for (int i = 0; i < N; i++) {
295         if (ff[i].length == 0) {
296             break;
297         }
298         auto frag = (unsigned)getBits(qws, ff[i].offset, ff[i].length);
299         bits |= frag << off;
300         off += ff[i].length;
301     }
302 
303     return bits;
304 }
305 
306 
lookupOpSpecFromBits(const void * bits,OpSpecMissInfo & missInfo) const307 const OpSpec& Model::lookupOpSpecFromBits(
308     const void *bits,
309     OpSpecMissInfo &missInfo) const
310 {
311     constexpr static Fragment F_OPCODE("Opcode", 0, 7);
312     //
313     const MInst *mi = (const MInst *)bits;
314     auto opc = mi->getFragment(F_OPCODE);
315     missInfo.opcode = opc;
316     const OpSpec *os = &lookupOpSpecByCode((unsigned)opc);
317     return *os;
318 }
319 
320 
lookupRegInfoByRegName(RegName name) const321 const RegInfo *Model::lookupRegInfoByRegName(RegName name) const
322 {
323     // static tester should check this
324     for (const RegInfo &ri : REGISTER_SPECIFICATIONS) {
325         if (ri.regName == name && ri.supportedOn(platform)) {
326             return &ri;
327         }
328     }
329     return nullptr;
330 }
331 
getNumGRF() const332 uint32_t Model::getNumGRF() const
333 {
334     return getRegCount(RegName::GRF_R);
335 }
336 
getNumFlagReg() const337 uint32_t Model::getNumFlagReg() const
338 {
339     return getRegCount(RegName::ARF_F);
340 }
341 
getGRFByteSize() const342 uint32_t Model::getGRFByteSize() const
343 {
344     return platform >= Platform::XE_HPC ? 64 : 32;
345 }
346 
getRegCount(RegName rn) const347 uint32_t Model::getRegCount(RegName rn) const {
348     const RegInfo* ri = lookupRegInfoByRegName(rn);
349     IGA_ASSERT(ri, "invalid register for platform");
350     // for getNumReg 0 means single register (like ce); bump to 1
351     int n = std::max(ri->getNumReg(), 1);
352     return n;
353 }
354 
getBytesPerReg(RegName rn) const355 uint32_t Model::getBytesPerReg(RegName rn) const {
356     const RegInfo* ri = lookupRegInfoByRegName(rn);
357     IGA_ASSERT(ri, "invalid register for platform");
358     if (rn == RegName::GRF_R) {
359         // GRF has 0's in numBytesPerReg[..]
360         if (platform >= Platform::XE_HPC)
361             return 64;
362         return 32;
363     }
364     // we assume they are all equal length
365     return ri->numBytesPerReg[0];
366 }
367 
368 
GetRegisterSpecificationTable(int & len)369 const RegInfo *iga::GetRegisterSpecificationTable(int &len)
370 {
371     len = sizeof(REGISTER_SPECIFICATIONS)/sizeof(REGISTER_SPECIFICATIONS[0]);
372     return REGISTER_SPECIFICATIONS;
373 }
374 
lookupArfRegInfoByRegNum(uint8_t regNum7_0) const375 const RegInfo* Model::lookupArfRegInfoByRegNum(uint8_t regNum7_0) const
376 {
377     const RegInfo *arfAcc = nullptr;
378     int regNum = (int)(regNum7_0 & 0xF);
379     for (const RegInfo &ri : REGISTER_SPECIFICATIONS) {
380         if (ri.regName == RegName::GRF_R) {
381             continue; // GRF will be in the table as 0000b
382         } else if (ri.regNum7_4 == ((uint32_t)regNum7_0 >> 4) && // RegNum[7:4] matches AND
383             ri.supportedOn(platform))           // platform matches
384         {
385             int shiftedRegNum = regNum - ri.regNumBase;
386             if (ri.regName == RegName::ARF_MME &&
387                 !ri.isRegNumberValid(shiftedRegNum) &&
388                 arfAcc != nullptr)
389             {
390                 // they picked an invalid register in the acc# space
391                 // (which is shared with mme#)
392                 // since acc# is far more likely, favor that so the error
393                 // message about the register number being out of range
394                 // refers to acc# instead of mme#
395                 return arfAcc;
396             } else if (ri.regName == RegName::ARF_ACC &&
397                 !ri.isRegNumberValid(shiftedRegNum))
398             {
399                 // not really acc#, but mme#, continue the loop until we find
400                 // that one, but at least save acc# for the error case above
401                 arfAcc = &ri;
402             } else {
403                 // - it's acc# (value reg)
404                 // - its mme#
405                 // - it's some other ARF
406                 return &ri;
407             }
408         }
409     }
410     // if we get here, we didn't find a matching register for this platform
411     // it is possible we found and rejected acc# because the reg num was out
412     // of bounds and we were hoping it was an mme# register, so we return
413     // that reg specification so that the error message will favor
414     // acc# over mme# (since the latter is far less likely)
415     return arfAcc;
416 }
417 
encode(int reg,uint8_t & regNumBits) const418 bool RegInfo::encode(int reg, uint8_t &regNumBits) const
419 {
420     if (!isRegNumberValid(reg)) {
421         return false;
422     }
423 
424     if (regName == RegName::GRF_R) {
425         regNumBits = (uint8_t)reg;
426     } else {
427         // ARF
428         // RegNum[7:4] = high bits from the spec
429         reg += regNumBase;
430         // this assert would suggest that something is busted in
431         // the RegInfo table
432         IGA_ASSERT(reg <= 0xF, "ARF encoding overflowed");
433         regNumBits = (uint8_t)(regNum7_4 << 4);
434         regNumBits |= (uint8_t)reg;
435     }
436     return true;
437 }
438 
439 
decode(uint8_t regNumBits,int & reg) const440 bool RegInfo::decode(uint8_t regNumBits, int &reg) const
441 {
442     if (regName == RegName::GRF_R) {
443         reg = (int)regNumBits;
444     } else {
445         reg = (int)(regNumBits & 0xF) - regNumBase; // acc2 -> mme0
446     }
447     return isRegNumberValid(reg);
448 }
449 
450 
451 // static const iga::Model MODEL_GEN7(
452 //    Platform::GEN7P5, &MODEL_GEN7_OPSPECS[0], "7", "ivb");
453 static constexpr Model MODEL_GEN7P5(
454     Platform::GEN7P5, &MODEL_GEN7P5_OPSPECS[0], "7p5", "hsw");
455 static constexpr Model MODEL_GEN8(
456     Platform::GEN8, &MODEL_GEN8_OPSPECS[0], "8", "bdw");
457 static constexpr Model MODEL_GEN9(
458     Platform::GEN9, &MODEL_GEN9_OPSPECS[0], "9", "skl");
459 static constexpr Model MODEL_GEN10(
460     Platform::GEN10, &MODEL_GEN10_OPSPECS[0], "10", "cnl");
461 static constexpr Model MODEL_GEN11(
462     Platform::GEN11, &MODEL_GEN11_OPSPECS[0], "11", "icl");
463 static constexpr Model MODEL_XE(
464     Platform::XE, &MODEL_XE_OPSPECS[0], "12p1",
465     "xe", "xelp", "tgl", "tgllp", "dg1");
466 static constexpr Model MODEL_XE_HP(
467     Platform::XE_HP, &MODEL_XE_HP_OPSPECS[0], "12p5", "xehp"
468     );
469 static constexpr Model MODEL_XE_HPG(
470     Platform::XE_HPG, &MODEL_XE_HPG_OPSPECS[0],
471     "12p71", // default file extension
472     "xehpg"
473     );
474 static constexpr Model MODEL_XE_HPC(
475     Platform::XE_HPC, &MODEL_XE_HPC_OPSPECS[0],
476     "12p72",
477     "xehpc"
478     );
479 
480 const Model * const iga::ALL_MODELS[] {
481     &MODEL_GEN7P5,
482     &MODEL_GEN8,
483     &MODEL_GEN9,
484     &MODEL_GEN10,
485     &MODEL_GEN11,
486     &MODEL_XE,
487     &MODEL_XE_HP,
488     &MODEL_XE_HPG,
489     &MODEL_XE_HPC,
490 };
491 const size_t iga::ALL_MODELS_LEN = sizeof(ALL_MODELS)/sizeof(ALL_MODELS[0]);
492 
LookupModel(Platform p)493 const Model *Model::LookupModel(Platform p)
494 {
495     switch (p) {
496     case Platform::GEN7P5:
497         return &MODEL_GEN7P5;
498     case Platform::GEN8:
499     case Platform::GEN8LP:
500         return &MODEL_GEN8;
501     case Platform::GEN9:
502     case Platform::GEN9LP:
503     case Platform::GEN9P5:
504         return &MODEL_GEN9;
505     case Platform::GEN10:
506         return &MODEL_GEN10;
507     case Platform::GEN11:
508         return &MODEL_GEN11;
509     case Platform::XE:
510         return &MODEL_XE;
511     case Platform::XE_HP:
512         return &MODEL_XE_HP;
513     case Platform::XE_HPG:
514         return &MODEL_XE_HPG;
515     case Platform::XE_HPC:
516         return &MODEL_XE_HPC;
517     default:
518         return nullptr;
519     }
520 }
521 
LookupModelRef(Platform platform)522 const Model &Model::LookupModelRef(Platform platform)
523 {
524     const Model *m = Model::LookupModel(platform);
525     IGA_ASSERT(m, "invalid platform");
526     return *m;
527 }
528 
529