1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Models.hpp"
10
11 // this must precede model*.hpp inclusion below
12 #include "../strings.hpp"
13
14
15 // for bxml/Model operand type mappings
16 #define TYPE(T) \
17 ENUM_BITSET_VALUE(T, uint32_t)
18
19 #include "bxml/ModelGen7p5.hpp"
20 #include "bxml/ModelGen8.hpp"
21 #include "bxml/ModelGen9.hpp"
22 #include "bxml/ModelGen10.hpp"
23 #include "bxml/ModelGen11.hpp"
24 #include "bxml/ModelXe.hpp"
25 #include "bxml/ModelXeHP.hpp"
26 #include "bxml/ModelXeHPG.hpp"
27 #include "bxml/ModelXeHPC.hpp"
28 #include "../asserts.hpp"
29 #include "../bits.hpp"
30 #include "../Backend/Native/MInst.hpp"
31
32 #include <sstream>
33 #include <iostream>
34
35 using namespace iga;
36
37
38 // full "constructor"
39 #define UNWRAP_TUPLE(...) {__VA_ARGS__}
40 #define IGA_REGISTER_SPEC(\
41 PLAT_LO,PLAT_HI,\
42 REGNAME,SYNTAX,DESCRIPTION,\
43 REGNUM7_4,REGNUM_BASE,\
44 ACC_GRAN,\
45 NUM_REGS,NUM_BYTE_PER_REG) \
46 {REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,PLAT_LO,PLAT_HI,ACC_GRAN,NUM_REGS,UNWRAP_TUPLE NUM_BYTE_PER_REG}
47 // for <= some platform (whatever our lowest platform is
48 #define IGA_REGISTER_SPEC_LE(PLAT_HI,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
49 IGA_REGISTER_SPEC(Platform::GEN6,PLAT_HI,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
50 // for >= some platform (up to the highest)
51 #define IGA_REGISTER_SPEC_GE(PLAT_LO,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
52 IGA_REGISTER_SPEC(PLAT_LO,Platform::FUTURE,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
53 // a specification valid on all platforms
54 #define IGA_REGISTER_SPEC_UNIFORM(REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG) \
55 IGA_REGISTER_SPEC(Platform::GEN6,Platform::FUTURE,REGNAME,SYNTAX,DESCRIPTION,REGNUM7_4,REGNUM_BASE,ACC_GRAN,NUM_REGS,NUM_BYTE_PER_REG)
56
57
58 // ordered by encoding of RegNum[7:4]
59 // newest platforms first
60 static const struct RegInfo REGISTER_SPECIFICATIONS[] = {
61 IGA_REGISTER_SPEC_LE(
62 Platform::GEN11,
63 RegName::GRF_R, "r", "General",
64 0, 0,
65 1,
66 128,(0)),
67
68 IGA_REGISTER_SPEC_GE(
69 Platform::XE,
70 RegName::GRF_R, "r", "General",
71 0,0, // regNum7_4, regNumBase
72 1, // accGran
73 256,(0)),
74
75 IGA_REGISTER_SPEC_UNIFORM(
76 RegName::ARF_NULL, "null", "Null",
77 0x0, 0,
78 0,
79 0, (32)),
80 IGA_REGISTER_SPEC_UNIFORM(RegName::ARF_A, "a", "Index",
81 0x1, 0,
82 2,
83 1, (32)),
84
85 // acc and mme share same RegNum[7:4], mme gets the high registers
86 IGA_REGISTER_SPEC_LE(
87 Platform::GEN11,
88 RegName::ARF_ACC, "acc", "Accumulator",
89 0x2, 0,
90 1,
91 2, (32,32)),
92 IGA_REGISTER_SPEC(Platform::XE, Platform::XE,
93 RegName::ARF_ACC, "acc", "Accumulator",
94 0x2, 0,
95 1,
96 8, (32,32,32,32,32,32,32,32)),
97 IGA_REGISTER_SPEC(Platform::XE_HP, Platform::XE_HP,
98 RegName::ARF_ACC, "acc", "Accumulator",
99 0x2, 0,
100 1,
101 8, (32,32,32,32,32,32,32,32)),
102 IGA_REGISTER_SPEC(Platform::XE_HPG, Platform::XE_HPG,
103 RegName::ARF_ACC, "acc", "Accumulator",
104 0x2, 0,
105 1,
106 8, (32,32,32,32,32,32,32,32)),
107 IGA_REGISTER_SPEC_GE(
108 Platform::XE_HPC,
109 RegName::ARF_ACC, "acc", "Accumulator",
110 0x2, 0,
111 1,
112 16, (64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64)),
113 IGA_REGISTER_SPEC_LE(
114 Platform::GEN11,
115 RegName::ARF_MME, "mme", "Math Macro",
116 0x2, 2, // offset by 2 "acc2-9"
117 4,
118 8, (32,32,32,32,32,32,32,32)),
119 IGA_REGISTER_SPEC(Platform::XE, Platform::XE,
120 RegName::ARF_MME, "mme", "Math Macro",
121 0x2, 8, // offset by 8 "acc8-15"
122 4,
123 8, (32,32,32,32,32,32,32,32)),
124 IGA_REGISTER_SPEC(Platform::XE_HP, Platform::XE_HP,
125 RegName::ARF_MME, "mme", "Math Macro",
126 0x2, 8, // offset by 8 "acc8-15"
127 4,
128 8, (32,32,32,32,32,32,32,32)),
129 IGA_REGISTER_SPEC(Platform::XE_HPG, Platform::XE_HPG,
130 RegName::ARF_MME, "mme", "Math Macro",
131 0x2, 8, // offset by 8 "acc8-15"
132 4,
133 8, (32,32,32,32,32,32,32,32)),
134 IGA_REGISTER_SPEC_GE(
135 Platform::XE_HPC,
136 RegName::ARF_MME, "mme", "Math Macro",
137 0x2, 8, // offset by 8 "acc8-15"
138 4,
139 8, (64,64,64,64,64,64,64,64)),
140
141 IGA_REGISTER_SPEC_LE(
142 Platform::XE_HPG,
143 RegName::ARF_F, "f", "Flag Register",
144 0x3, 0,
145 2,
146 2, (4,4)),
147 IGA_REGISTER_SPEC_GE(
148 Platform::XE_HPC,
149 RegName::ARF_F, "f", "Flag Register",
150 0x3, 0,
151 2,
152 4, (4,4,4,4)),
153
154 IGA_REGISTER_SPEC_GE(
155 Platform::GEN7P5,
156 RegName::ARF_CE, "ce", "Channel Enable",
157 0x4, 0,
158 4,
159 0, (4)),
160
161 IGA_REGISTER_SPEC_LE(
162 Platform::XE_HPC,
163 RegName::ARF_MSG, "msg", "Message Control",
164 0x5, 0,
165 4,
166 8, (4,4,4,4,4,4,4,4)),
167 IGA_REGISTER_SPEC(
168 Platform::GEN7P5, Platform::GEN7P5,
169 RegName::ARF_SP, "sp", "Stack Pointer",
170 0x6, 0,
171 4,
172 0, (2*4)), // two subregisters of 4 bytes each
173 IGA_REGISTER_SPEC(
174 Platform::GEN8, Platform::XE_HPC,
175 RegName::ARF_SP, "sp", "Stack Pointer",
176 0x6, 0,
177 4,
178 0, (2*8)), // two subregisters of 8 bytes each
179
180
181 IGA_REGISTER_SPEC_UNIFORM(
182 RegName::ARF_SR, "sr", "State Register",
183 0x7, 0,
184 1,
185 2, (16,16)), // sr{0,1}.{0..3}:d
186 IGA_REGISTER_SPEC_UNIFORM(
187 RegName::ARF_CR, "cr", "Control Register",
188 0x8, 0,
189 4,
190 1, (3*4)), // cr0.{0..2}:d
191
192 // with SWSB wait n{0,1} replaced by sync.{bar,host}, which
193 // implicitly reference notification registers;
194 // not sure if these are needed in CSR though, so leaving for now
195
196 IGA_REGISTER_SPEC_UNIFORM(
197 RegName::ARF_N, "n", "Notification Register",
198 0x9, 0,
199 4,
200 1, (3*4)), // n0.{0..2}:d
201
202 IGA_REGISTER_SPEC_UNIFORM(
203 RegName::ARF_IP, "ip", "Instruction Pointer",
204 0xA, 0,
205 4,
206 0, (4)), // ip
207 IGA_REGISTER_SPEC_UNIFORM(
208 RegName::ARF_TDR, "tdr", "Thread Dependency Register",
209 0xB, 0,
210 2,
211 1, (16)), // tdr0.*
212 IGA_REGISTER_SPEC_GE(
213 Platform::GEN10,
214 RegName::ARF_TM, "tm", "Timestamp Register",
215 0xC, 0,
216 4,
217 1, (5*4)), // tm0.{0..4}:d
218 IGA_REGISTER_SPEC_LE(
219 Platform::GEN9,
220 RegName::ARF_TM, "tm", "Timestamp Register",
221 0xC, 0,
222 4,
223 1, (4*4)), // tm0.{0..3}:d
224
225 // fc0.0-31 stack-entry 0-31
226 // fc1.0 global counts
227 // fc2.0 top of stack pointers
228 // fc3.0-3 per channel counts
229 // fc4.0 call mask
230 IGA_REGISTER_SPEC(Platform::GEN7P5, Platform::GEN11,
231 RegName::ARF_FC, "fc", "Flow Control",
232 0xD, 0,
233 4,
234 5, (4*32,4*1,4*1,4*4,4*1)),
235 // EU GOTO/JOIN instruction latency improvement HAS397165 removes two flow control registers
236 // fc0.0-31 per-channel IP
237 // fc1.0 channel enables
238 // fc2 call mask
239 // fc3 JEU fused mask
240 IGA_REGISTER_SPEC(Platform::XE, Platform::XE_HPC,
241 RegName::ARF_FC, "fc", "Flow Control",
242 0xD, 0,
243 4,
244 4, (4*32,4*1,4*1,4*1)),
245 IGA_REGISTER_SPEC(Platform::GEN7, Platform::GEN7P5,
246 RegName::ARF_DBG, "dbg", "Debug",
247 0xF, 0,
248 4,
249 1, (4)), // dbg0.0:ud
250 IGA_REGISTER_SPEC_GE(Platform::GEN8,
251 RegName::ARF_DBG, "dbg", "Debug",
252 0xF, 0,
253 4,
254 1, (2*4)), // dbg0.{0,1}:ud
255 };
256
lookupOpSpec(Op op) const257 const OpSpec& Model::lookupOpSpec(Op op) const
258 {
259 if (op < Op::FIRST_OP || op > Op::LAST_OP) {
260 // external opspec API can reach this
261 // IGA_ASSERT_FALSE("op out of bounds");
262 return opsArray[(int)Op::INVALID]; // return invalid if assertions are off
263 }
264 return opsArray[(int)op];
265 }
266
lookupOpSpecByCode(unsigned opcode) const267 const OpSpec& Model::lookupOpSpecByCode(unsigned opcode) const
268 {
269 // if (!opsByCodeValid) {
270 // for (int i = (int)Op::FIRST_OP; i <= (int)Op::LAST_OP; i++) {
271 // const OpSpec &os = lookupOpSpec((Op)i);
272 // if (!os.isSubop()) {
273 // opsByCode[os.code] = &os;
274 // }
275 // }
276 // opsByCodeValid = true;
277 // }
278 for (int i = (int)Op::FIRST_OP; i <= (int)Op::LAST_OP; i++) {
279 if (opsArray[i].op != Op::INVALID &&
280 opsArray[i].opcode == opcode)
281 {
282 return opsArray[i];
283 }
284 }
285 return opsArray[static_cast<int>(Op::INVALID)];
286 }
287
288 template <int N>
getBitsFromFragments(const uint64_t * qws,const Fragment ff[N])289 static unsigned getBitsFromFragments(const uint64_t *qws, const Fragment ff[N])
290 {
291 unsigned bits = 0;
292
293 int off = 0;
294 for (int i = 0; i < N; i++) {
295 if (ff[i].length == 0) {
296 break;
297 }
298 auto frag = (unsigned)getBits(qws, ff[i].offset, ff[i].length);
299 bits |= frag << off;
300 off += ff[i].length;
301 }
302
303 return bits;
304 }
305
306
lookupOpSpecFromBits(const void * bits,OpSpecMissInfo & missInfo) const307 const OpSpec& Model::lookupOpSpecFromBits(
308 const void *bits,
309 OpSpecMissInfo &missInfo) const
310 {
311 constexpr static Fragment F_OPCODE("Opcode", 0, 7);
312 //
313 const MInst *mi = (const MInst *)bits;
314 auto opc = mi->getFragment(F_OPCODE);
315 missInfo.opcode = opc;
316 const OpSpec *os = &lookupOpSpecByCode((unsigned)opc);
317 return *os;
318 }
319
320
lookupRegInfoByRegName(RegName name) const321 const RegInfo *Model::lookupRegInfoByRegName(RegName name) const
322 {
323 // static tester should check this
324 for (const RegInfo &ri : REGISTER_SPECIFICATIONS) {
325 if (ri.regName == name && ri.supportedOn(platform)) {
326 return &ri;
327 }
328 }
329 return nullptr;
330 }
331
getNumGRF() const332 uint32_t Model::getNumGRF() const
333 {
334 return getRegCount(RegName::GRF_R);
335 }
336
getNumFlagReg() const337 uint32_t Model::getNumFlagReg() const
338 {
339 return getRegCount(RegName::ARF_F);
340 }
341
getGRFByteSize() const342 uint32_t Model::getGRFByteSize() const
343 {
344 return platform >= Platform::XE_HPC ? 64 : 32;
345 }
346
getRegCount(RegName rn) const347 uint32_t Model::getRegCount(RegName rn) const {
348 const RegInfo* ri = lookupRegInfoByRegName(rn);
349 IGA_ASSERT(ri, "invalid register for platform");
350 // for getNumReg 0 means single register (like ce); bump to 1
351 int n = std::max(ri->getNumReg(), 1);
352 return n;
353 }
354
getBytesPerReg(RegName rn) const355 uint32_t Model::getBytesPerReg(RegName rn) const {
356 const RegInfo* ri = lookupRegInfoByRegName(rn);
357 IGA_ASSERT(ri, "invalid register for platform");
358 if (rn == RegName::GRF_R) {
359 // GRF has 0's in numBytesPerReg[..]
360 if (platform >= Platform::XE_HPC)
361 return 64;
362 return 32;
363 }
364 // we assume they are all equal length
365 return ri->numBytesPerReg[0];
366 }
367
368
GetRegisterSpecificationTable(int & len)369 const RegInfo *iga::GetRegisterSpecificationTable(int &len)
370 {
371 len = sizeof(REGISTER_SPECIFICATIONS)/sizeof(REGISTER_SPECIFICATIONS[0]);
372 return REGISTER_SPECIFICATIONS;
373 }
374
lookupArfRegInfoByRegNum(uint8_t regNum7_0) const375 const RegInfo* Model::lookupArfRegInfoByRegNum(uint8_t regNum7_0) const
376 {
377 const RegInfo *arfAcc = nullptr;
378 int regNum = (int)(regNum7_0 & 0xF);
379 for (const RegInfo &ri : REGISTER_SPECIFICATIONS) {
380 if (ri.regName == RegName::GRF_R) {
381 continue; // GRF will be in the table as 0000b
382 } else if (ri.regNum7_4 == ((uint32_t)regNum7_0 >> 4) && // RegNum[7:4] matches AND
383 ri.supportedOn(platform)) // platform matches
384 {
385 int shiftedRegNum = regNum - ri.regNumBase;
386 if (ri.regName == RegName::ARF_MME &&
387 !ri.isRegNumberValid(shiftedRegNum) &&
388 arfAcc != nullptr)
389 {
390 // they picked an invalid register in the acc# space
391 // (which is shared with mme#)
392 // since acc# is far more likely, favor that so the error
393 // message about the register number being out of range
394 // refers to acc# instead of mme#
395 return arfAcc;
396 } else if (ri.regName == RegName::ARF_ACC &&
397 !ri.isRegNumberValid(shiftedRegNum))
398 {
399 // not really acc#, but mme#, continue the loop until we find
400 // that one, but at least save acc# for the error case above
401 arfAcc = &ri;
402 } else {
403 // - it's acc# (value reg)
404 // - its mme#
405 // - it's some other ARF
406 return &ri;
407 }
408 }
409 }
410 // if we get here, we didn't find a matching register for this platform
411 // it is possible we found and rejected acc# because the reg num was out
412 // of bounds and we were hoping it was an mme# register, so we return
413 // that reg specification so that the error message will favor
414 // acc# over mme# (since the latter is far less likely)
415 return arfAcc;
416 }
417
encode(int reg,uint8_t & regNumBits) const418 bool RegInfo::encode(int reg, uint8_t ®NumBits) const
419 {
420 if (!isRegNumberValid(reg)) {
421 return false;
422 }
423
424 if (regName == RegName::GRF_R) {
425 regNumBits = (uint8_t)reg;
426 } else {
427 // ARF
428 // RegNum[7:4] = high bits from the spec
429 reg += regNumBase;
430 // this assert would suggest that something is busted in
431 // the RegInfo table
432 IGA_ASSERT(reg <= 0xF, "ARF encoding overflowed");
433 regNumBits = (uint8_t)(regNum7_4 << 4);
434 regNumBits |= (uint8_t)reg;
435 }
436 return true;
437 }
438
439
decode(uint8_t regNumBits,int & reg) const440 bool RegInfo::decode(uint8_t regNumBits, int ®) const
441 {
442 if (regName == RegName::GRF_R) {
443 reg = (int)regNumBits;
444 } else {
445 reg = (int)(regNumBits & 0xF) - regNumBase; // acc2 -> mme0
446 }
447 return isRegNumberValid(reg);
448 }
449
450
451 // static const iga::Model MODEL_GEN7(
452 // Platform::GEN7P5, &MODEL_GEN7_OPSPECS[0], "7", "ivb");
453 static constexpr Model MODEL_GEN7P5(
454 Platform::GEN7P5, &MODEL_GEN7P5_OPSPECS[0], "7p5", "hsw");
455 static constexpr Model MODEL_GEN8(
456 Platform::GEN8, &MODEL_GEN8_OPSPECS[0], "8", "bdw");
457 static constexpr Model MODEL_GEN9(
458 Platform::GEN9, &MODEL_GEN9_OPSPECS[0], "9", "skl");
459 static constexpr Model MODEL_GEN10(
460 Platform::GEN10, &MODEL_GEN10_OPSPECS[0], "10", "cnl");
461 static constexpr Model MODEL_GEN11(
462 Platform::GEN11, &MODEL_GEN11_OPSPECS[0], "11", "icl");
463 static constexpr Model MODEL_XE(
464 Platform::XE, &MODEL_XE_OPSPECS[0], "12p1",
465 "xe", "xelp", "tgl", "tgllp", "dg1");
466 static constexpr Model MODEL_XE_HP(
467 Platform::XE_HP, &MODEL_XE_HP_OPSPECS[0], "12p5", "xehp"
468 );
469 static constexpr Model MODEL_XE_HPG(
470 Platform::XE_HPG, &MODEL_XE_HPG_OPSPECS[0],
471 "12p71", // default file extension
472 "xehpg"
473 );
474 static constexpr Model MODEL_XE_HPC(
475 Platform::XE_HPC, &MODEL_XE_HPC_OPSPECS[0],
476 "12p72",
477 "xehpc"
478 );
479
480 const Model * const iga::ALL_MODELS[] {
481 &MODEL_GEN7P5,
482 &MODEL_GEN8,
483 &MODEL_GEN9,
484 &MODEL_GEN10,
485 &MODEL_GEN11,
486 &MODEL_XE,
487 &MODEL_XE_HP,
488 &MODEL_XE_HPG,
489 &MODEL_XE_HPC,
490 };
491 const size_t iga::ALL_MODELS_LEN = sizeof(ALL_MODELS)/sizeof(ALL_MODELS[0]);
492
LookupModel(Platform p)493 const Model *Model::LookupModel(Platform p)
494 {
495 switch (p) {
496 case Platform::GEN7P5:
497 return &MODEL_GEN7P5;
498 case Platform::GEN8:
499 case Platform::GEN8LP:
500 return &MODEL_GEN8;
501 case Platform::GEN9:
502 case Platform::GEN9LP:
503 case Platform::GEN9P5:
504 return &MODEL_GEN9;
505 case Platform::GEN10:
506 return &MODEL_GEN10;
507 case Platform::GEN11:
508 return &MODEL_GEN11;
509 case Platform::XE:
510 return &MODEL_XE;
511 case Platform::XE_HP:
512 return &MODEL_XE_HP;
513 case Platform::XE_HPG:
514 return &MODEL_XE_HPG;
515 case Platform::XE_HPC:
516 return &MODEL_XE_HPC;
517 default:
518 return nullptr;
519 }
520 }
521
LookupModelRef(Platform platform)522 const Model &Model::LookupModelRef(Platform platform)
523 {
524 const Model *m = Model::LookupModel(platform);
525 IGA_ASSERT(m, "invalid platform");
526 return *m;
527 }
528
529