1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "MessageDecoder.hpp"
10
11 #include <utility>
12
13 using namespace iga;
14
15 enum LscOp : uint32_t {
16 LSC_LOAD = 0x00,
17 LSC_LOAD_STRIDED = 0x01,
18 LSC_LOAD_QUAD = 0x02, // aka load_cmask
19 LSC_LOAD_BLOCK2D = 0x03,
20 LSC_STORE = 0x04,
21 LSC_STORE_STRIDED = 0x05,
22 LSC_STORE_QUAD = 0x06, // aka store_cmask
23 LSC_STORE_BLOCK2D = 0x07,
24 //
25 LSC_ATOMIC_IINC = 0x08,
26 LSC_ATOMIC_IDEC = 0x09,
27 LSC_ATOMIC_LOAD = 0x0A,
28 LSC_ATOMIC_STORE = 0x0B,
29 LSC_ATOMIC_IADD = 0x0C,
30 LSC_ATOMIC_ISUB = 0x0D,
31 LSC_ATOMIC_SMIN = 0x0E,
32 LSC_ATOMIC_SMAX = 0x0F,
33 LSC_ATOMIC_UMIN = 0x10,
34 LSC_ATOMIC_UMAX = 0x11,
35 LSC_ATOMIC_ICAS = 0x12,
36 LSC_ATOMIC_FADD = 0x13,
37 LSC_ATOMIC_FSUB = 0x14,
38 LSC_ATOMIC_FMIN = 0x15,
39 LSC_ATOMIC_FMAX = 0x16,
40 LSC_ATOMIC_FCAS = 0x17,
41 LSC_ATOMIC_AND = 0x18,
42 LSC_ATOMIC_OR = 0x19,
43 LSC_ATOMIC_XOR = 0x1A,
44 //
45 LSC_LOAD_STATUS = 0x1B,
46 LSC_STORE_UNCOMPRESSED = 0x1C,
47 LSC_CCS = 0x1D,
48 //
49 LSC_RSI = 0x1E,
50 LSC_FENCE = 0x1F,
51 //
52 LSC_STORE_UNCOMPRESSED_QUAD = 0x20,
53 //
54 //
55 LSC_INVALID = 0xFFFFFFFF,
56 };
57
58 static const uint32_t LSC_AT_FLAT = 0x0;
59 static const uint32_t LSC_AT_BSS = 0x1;
60 static const uint32_t LSC_AT_SS = 0x2;
61 static const uint32_t LSC_AT_BTI = 0x3;
62
63 static const uint32_t LSC_A16 = 0x1;
64 static const uint32_t LSC_A32 = 0x2;
65 static const uint32_t LSC_A64 = 0x3;
66
67 static const uint32_t LSC_D8 = 0x0;
68 static const uint32_t LSC_D16 = 0x1;
69 static const uint32_t LSC_D32 = 0x2;
70 static const uint32_t LSC_D64 = 0x3;
71 static const uint32_t LSC_D8U32 = 0x4;
72 static const uint32_t LSC_D16U32 = 0x5;
73 static const uint32_t LSC_D16U32H = 0x6;
74
75 static const uint32_t LSC_V1 = 0x0;
76 static const uint32_t LSC_V2 = 0x1;
77 static const uint32_t LSC_V3 = 0x2;
78 static const uint32_t LSC_V4 = 0x3;
79 static const uint32_t LSC_V8 = 0x4;
80 static const uint32_t LSC_V16 = 0x5;
81 static const uint32_t LSC_V32 = 0x6;
82 static const uint32_t LSC_V64 = 0x7;
83
84 const static uint32_t LSC_SCALE_NONE = 0x0;
85 const static uint32_t LSC_SCALE_1X = 0x1;
86 const static uint32_t LSC_SCALE_2X = 0x2;
87 const static uint32_t LSC_SCALE_4X = 0x3;
88
89 ///////////////////////////////////////////////////////
90 // Cache Opt
91 // Value for bits[19:17]
92 static const uint32_t LSC_DF_DF = 0x0;
93 //
94 static const uint32_t LSC_UC_UC = 0x1;
95 //
96 static const uint32_t LSC_UC_CA = 0x2;
97 static const uint32_t LSC_UC_WB = 0x2;
98 //
99 static const uint32_t LSC_CA_UC = 0x3;
100 static const uint32_t LSC_WT_UC = 0x3;
101 //
102 static const uint32_t LSC_CA_CA = 0x4;
103 static const uint32_t LSC_WT_WB = 0x4;
104 //
105 static const uint32_t LSC_ST_UC = 0x5;
106 //
107 static const uint32_t LSC_ST_CA = 0x6;
108 static const uint32_t LSC_ST_WB = 0x6;
109 //
110 static const uint32_t LSC_RI_CA = 0x7;
111 static const uint32_t LSC_WB_WB = 0x7;
112
113 // Value for bits [19:16]
114 static const uint32_t LSC_UC_CC = 0x5;
115 static const uint32_t LSC_CA_CC = 0x9;
116 static const uint32_t LSC_RI_RI = 0xE;
117
118
119 #if 0
120 struct LscMessageFormat {
121 const char *mnemonic;
122 const char *description;
123 uint32_t mask;
124 uint32_t op;
125 //
126 // std::pair<Platform,const char *> docs[2];
127 };
128
129 //
130 static LscMessageFormat OPS[32] {
131 };
132 #endif
133
134
135 // This handles LSC messages only
136 struct MessageDecoderLSC : MessageDecoder {
MessageDecoderLSCMessageDecoderLSC137 MessageDecoderLSC(
138 Platform _platform,
139 SFID _sfid,
140 ExecSize _execSize,
141 SendDesc _exDesc,
142 SendDesc _desc,
143 DecodeResult &_result)
144 : MessageDecoder(
145 _platform, _sfid, _execSize,
146 _exDesc, _desc, _result)
147 {
148 }
149
150 // used by decodeLscMessage and subchildren
151 std::string dataTypePrefixSyntax; // e.g. d32 or d16 or d32
152 std::string vectorSuffixSyntax; // e.g. x16t (for d16x16t) or .yzw
153 std::string addrSizeSyntax; // e.g. a32
154 std::string cacheControlSyntax; // e.g. ca.ca
155
156 SendOp op = SendOp::INVALID;
157 //
158 int expectedExecSize = 1;
159 //
160 int addrSizeBits = 0;
161 int dataSizeRegBits = 0, dataSizeMemBits = 0;
162 int vectorSize = 1;
163 MessageInfo::Attr extraAttrs = MessageInfo::Attr::NONE;
164
165 // the symbol to return in the MessageInfo structure
symbolFromSyntaxMessageDecoderLSC166 std::string symbolFromSyntax() const {
167 std::stringstream sym;
168 sym << result.syntax.mnemonic;
169 if (!result.syntax.controls.empty())
170 sym << result.syntax.controls;
171 sym << " ";
172 if (!result.syntax.surface.empty())
173 sym << result.syntax.surface;
174 sym << "[";
175 if (!result.syntax.scale.empty()) {
176 sym << result.syntax.scale;
177 }
178 sym << "A";
179 if (!result.syntax.immOffset.empty()) {
180 sym << result.syntax.immOffset;
181 }
182 sym << "]";
183 return sym.str();
184 }
185
186 ///////////////////////////////////////////////////////////////////////////
hasPayloadSizesInDescMessageDecoderLSC187 bool hasPayloadSizesInDesc() const {
188 return true;
189 }
190
lscAddrTypeOffsetMessageDecoderLSC191 int lscAddrTypeOffset() const {
192 int off = 29;
193
194 return off;
195 }
196
setCacheOptsMessageDecoderLSC197 void setCacheOpts(std::stringstream& sym, std::stringstream& descs,
198 CacheOpt &l1, CacheOpt &l3, CacheOpt _l1, CacheOpt _l3) {
199 l1 = _l1;
200 l3 = _l3;
201 if (_l1 == CacheOpt::DEFAULT && _l3 == CacheOpt::DEFAULT) {
202 descs << "use state settings for both L1 and L3";
203 return;
204 }
205 auto emitCacheOpt = [&] (CacheOpt c) {
206 sym << '.';
207 switch (c) {
208 case CacheOpt::DEFAULT:
209 sym << "df";
210 descs << " uses default state settings";
211 break;
212 case CacheOpt::READINVALIDATE:
213 sym << "ri";
214 descs << " read-invalidate (last use)";
215 break;
216 case CacheOpt::CACHED:
217 sym << "ca";
218 descs << " cached";
219 break;
220 case CacheOpt::STREAMING:
221 sym << "st";
222 descs << " streaming";
223 break;
224 case CacheOpt::UNCACHED:
225 sym << "uc";
226 descs << " uncached (bypass)";
227 break;
228 case CacheOpt::WRITETHROUGH:
229 sym << "wt";
230 descs << " writethrough";
231 break;
232 case CacheOpt::WRITEBACK:
233 sym << "wb";
234 descs << " writeback";
235 break;
236 default:
237 sym << "?";
238 descs << " invalid";
239 break;
240 }
241 };
242 descs << "L1"; emitCacheOpt(_l1);
243 descs << "; L3"; emitCacheOpt(_l3); descs << "";
244 }
245
246
decodeLscCacheControlMessageDecoderLSC247 void decodeLscCacheControl(
248 SendOp sop,
249 CacheOpt &l1,
250 CacheOpt &l3)
251 {
252
253 if (!decodeLscCacheControlBits17_19(sop, l1, l3))
254 error(17, 3, "invalid cache options");
255 }
256
257 // Descriptor Bits[19:17]: 3 bits of cache control
decodeLscCacheControlBits17_19MessageDecoderLSC258 bool decodeLscCacheControlBits17_19(
259 SendOp sop,
260 CacheOpt &l1,
261 CacheOpt &l3)
262 {
263 std::stringstream sym, descs;
264 l1 = l3 = CacheOpt::DEFAULT;
265 bool isLoad = lookupSendOp(sop).isLoad();
266 auto ccBits = getDescBits(17, 3);
267 auto setCacheOptsWrapper = [&](CacheOpt _l1, CacheOpt _l3) {
268 return setCacheOpts(sym, descs, l1, l3, _l1, _l3);
269 };
270 switch (ccBits) {
271 case LSC_DF_DF:
272 setCacheOptsWrapper(CacheOpt::DEFAULT, CacheOpt::DEFAULT);
273 break;
274 case LSC_UC_UC:
275 setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::UNCACHED);
276 break;
277 case LSC_UC_CA: // == LSC_UC_WB
278 if (isLoad)
279 setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::CACHED);
280 else
281 setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::WRITEBACK);
282 break;
283 case LSC_CA_UC: // == LSC_WT_UC
284 if (isLoad)
285 setCacheOptsWrapper(CacheOpt::CACHED, CacheOpt::UNCACHED);
286 else
287 setCacheOptsWrapper(CacheOpt::WRITETHROUGH, CacheOpt::UNCACHED);
288 break;
289 case LSC_CA_CA: // == LSC_WT_WB
290 if (isLoad)
291 setCacheOptsWrapper(CacheOpt::CACHED, CacheOpt::CACHED);
292 else
293 setCacheOptsWrapper(CacheOpt::WRITETHROUGH, CacheOpt::WRITEBACK);
294 break;
295 case LSC_ST_UC:
296 setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::UNCACHED);
297 break;
298 case LSC_ST_CA: // == LSC_ST_WB
299 if (isLoad)
300 setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::CACHED);
301 else
302 setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::WRITEBACK);
303 break;
304 case LSC_RI_CA:
305 if (isLoad) {
306 // atomic follows store semantics, so compare against load
307 setCacheOptsWrapper(CacheOpt::READINVALIDATE, CacheOpt::CACHED);
308 } else {
309 setCacheOptsWrapper(CacheOpt::WRITEBACK, CacheOpt::WRITEBACK);
310 }
311 break;
312 default:
313 return false;
314 }
315 //
316 cacheControlSyntax = sym.str();
317 //
318 addField("Caching", 17, 3, ccBits, descs.str());
319 return true;
320 }
321
322
decodeLscImmOffMessageDecoderLSC323 void decodeLscImmOff(uint32_t atBits) {
324 }
325
decodeLscAddrTypeMessageDecoderLSC326 AddrType decodeLscAddrType(SendDesc& surfId, bool allowsFlat = true)
327 {
328 surfId = 0;
329 AddrType addrType = AddrType::FLAT;
330 //
331 int addrTypeLoc = lscAddrTypeOffset();
332 const char *addrTypeMeaning = "?";
333 //
334 const auto atBits = getDescBits(addrTypeLoc, 2);
335 //
336 std::stringstream surfSyntax;
337 switch (atBits) {
338 case LSC_AT_FLAT:
339 addrTypeMeaning = "Flat";
340 addrType = AddrType::FLAT;
341 if (!allowsFlat)
342 error(addrTypeLoc, 2,
343 "this message may not use FLAT address type");
344 break;
345 case LSC_AT_BSS:
346 case LSC_AT_SS:
347 if (atBits == LSC_AT_BSS) {
348 addrTypeMeaning = "BSS";
349 addrType = AddrType::BSS;
350 surfSyntax << "bss";
351 } else {
352 addrTypeMeaning = "SS";
353 addrType = AddrType::SS;
354 surfSyntax << "ss";
355 }
356 if (exDesc.isImm()) {
357 // XeHPG/XeHPC: we can pull this value out of ExDesc[31:11]
358 int exDescOff = 11, len = 31 - exDescOff + 1;
359 surfId = getDescBits(32 + exDescOff, len) << exDescOff;
360 addField(
361 "SurfaceStateOffset", exDescOff, len,
362 surfId.imm, "immediate surface state offset");
363 surfSyntax << "[" << iga::fmtHex(surfId.imm) << "]";
364 } else {
365 // XeHPG/XeHPC with reg surface state offset
366 surfSyntax << "[a0." << (int)exDesc.reg.subRegNum << "]";
367 surfId = exDesc;
368 }
369 break;
370 case LSC_AT_BTI:
371 addrTypeMeaning = "BTI";
372 addrType = AddrType::BTI;
373 if (exDesc.isImm()) {
374 uint32_t bti = decodeExDescField("BTI", 24, 8,
375 [&] (std::stringstream &ss, uint32_t bti) {
376 ss << "bti[" << bti << "]";
377 });
378 surfSyntax << "bti[" << bti << "]";
379 surfId = bti;
380 } else {
381 surfSyntax << "bti[a0." << (int)exDesc.reg.subRegNum << "]";
382 surfId = exDesc;
383 }
384 break;
385 default:
386 addrTypeMeaning = "INVALID AddrType";
387 addrType = AddrType::FLAT;
388 surfSyntax << "?";
389 error(addrTypeLoc, 2, "invalid address type");
390 break;
391 }
392 result.syntax.surface = surfSyntax.str();
393
394 /////////////////////////////
395 // immediate offset
396 decodeLscImmOff(atBits);
397 //
398 addField("AddrType", lscAddrTypeOffset(), 2, atBits, addrTypeMeaning);
399 //
400 return addrType;
401 }
402
decodeLscAddrSizeMessageDecoderLSC403 void decodeLscAddrSize() {
404 int addrSzBits = getDescBits(7, 2); // [8:7]
405 std::stringstream asym;
406 const char *aDesc = "";
407 switch (addrSzBits) {
408 case 1:
409 asym << "a16";
410 aDesc = "addresses are 16b";
411 addrSizeBits = 16;
412 break;
413 case 2:
414 asym << "a32";
415 aDesc = "addresses are 32b";
416 addrSizeBits = 32;
417 break;
418 case 3:
419 asym << "a64";
420 aDesc = "addresses are 64b";
421 addrSizeBits = 64;
422 break;
423 default:
424 asym << "a???";
425 aDesc = "address size is invalid";
426 error(7, 2, "invalid address size");
427 break;
428 }
429 // result.syntax.addressType = ":" + asym.str();
430
431 addrSizeSyntax = asym.str();
432 //
433 addField("AddrSize", 7, 2, addrSzBits, aDesc);
434 }
435
decodeLscDataSizeMessageDecoderLSC436 void decodeLscDataSize() {
437 std::stringstream dsym;
438 dataSizeRegBits = dataSizeMemBits = 0;
439 std::string meaning;
440 auto dtBits = getDescBits(9,3);
441 switch (dtBits) { // dat size [11:9]
442 case LSC_D8:
443 dataSizeRegBits = dataSizeMemBits = 8;
444 dsym << "d8";
445 meaning = "8b per data element";
446 break;
447 case LSC_D16:
448 dataSizeRegBits = dataSizeMemBits = 16;
449 meaning = "16b per data element";
450 dsym << "d16";
451 break;
452 case LSC_D32:
453 dataSizeRegBits = dataSizeMemBits = 32;
454 dsym << "d32";
455 meaning = "32b per data element";
456 break;
457 case LSC_D64:
458 dataSizeRegBits = dataSizeMemBits = 64;
459 dsym << "d64";
460 meaning = "64b per data element";
461 break;
462 case LSC_D8U32:
463 dataSizeRegBits = 32; dataSizeMemBits = 8;
464 dsym << "d8u32";
465 meaning = "load 8b into the low 8b of 32b register elements "
466 "(upper bits are undefined)";
467 break;
468 case LSC_D16U32:
469 dataSizeRegBits = 32; dataSizeMemBits = 16;
470 dsym << "d16u32";
471 meaning = "load 16b into the low 16b of 32b register elements "
472 "(upper bits are undefined)";
473 break;
474 case LSC_D16U32H:
475 dataSizeRegBits = 32; dataSizeMemBits = 16;
476 extraAttrs |= MessageInfo::Attr::EXPAND_HIGH;
477 dsym << "d16u32h";
478 meaning =
479 "load 16b into the high half of 32b register elements";
480 break;
481 default:
482 dsym << "0x" << std::uppercase << std::hex << dtBits;
483 meaning = "???";
484 }
485 //
486 // result.syntax.dataType = ":" + dsym.str();
487 dataTypePrefixSyntax = dsym.str();
488
489 addField("DataSize", 9, 3, dtBits, meaning);
490 }
491
decodeLscVecSizeMessageDecoderLSC492 void decodeLscVecSize()
493 {
494 if (lookupSendOp(op).hasChMask()) {
495 decodeLscVecSizeQuad();
496 } else {
497 decodeLscVecSizeNormal();
498 }
499 }
500
decodeLscVecSizeNormalMessageDecoderLSC501 void decodeLscVecSizeNormal() {
502 std::stringstream vsym;
503
504 uint32_t vecSzEncd = getDescBits(12, 3); // [14:12]
505 switch (vecSzEncd) {
506 case LSC_V1: vectorSize = 1; break;
507 case LSC_V2: vectorSize = 2; break;
508 case LSC_V3: vectorSize = 3; break;
509 case LSC_V4: vectorSize = 4; break;
510 case LSC_V8: vectorSize = 8; break;
511 case LSC_V16: vectorSize = 16; break;
512 case LSC_V32: vectorSize = 32; break;
513 case LSC_V64: vectorSize = 64; break;
514 default:
515 vsym << "x?";
516 }
517 bool opIsBlock2d =
518 op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D;
519 auto transposed =
520 decodeDescBitField(
521 "DataOrder", 15,
522 "non-transposed (vector elements are in successive registers)",
523 "transposed (vector elements are in the same register)");
524 if (vectorSize > 1 || transposed && !opIsBlock2d) {
525 vsym << 'x' << vectorSize;
526 }
527 if (transposed && op == SendOp::LOAD_STATUS) {
528 error(15, 1, "data order must be non-transposed for this op");
529 }
530 std::stringstream vdesc;
531 vdesc << "each address accesses " << vectorSize << " element";
532 if (vectorSize != 1)
533 vdesc << "s";
534 if (!opIsBlock2d)
535 addField("VecSize", 12, 3, vecSzEncd, vdesc.str());
536 if (transposed) {
537 vsym << 't';
538 extraAttrs |= MessageInfo::Attr::TRANSPOSED;
539 expectedExecSize = 1; // all transpose messages are SIMD1
540 }
541
542 if (op == SendOp::LOAD_BLOCK2D) {
543 bool vnni = decodeDescBitField(
544 "Block2dVnniTransform", 7, "disabled", "enabled");
545 if (vnni)
546 vsym << 'v';
547 }
548
549 vectorSuffixSyntax = vsym.str();
550 }
551
decodeLscVecSizeQuadMessageDecoderLSC552 void decodeLscVecSizeQuad() {
553 // LSC channels *enabled* is the inverse of the old messages
554 // because the old ChMask used in untyped old (scatter4/gather4)
555 // was really a channel "disable" mask
556 auto chEn = getDescBits(12, 4);
557 vectorSize = 0;
558 for (int i = 0; i < 4; ++i) {
559 if ((1<<i) & chEn) {
560 vectorSize++;
561 }
562 }
563 extraAttrs |= MessageInfo::Attr::HAS_CHMASK;
564
565 std::stringstream vsym;
566 vsym << ".";
567 if (chEn & 1)
568 vsym << "x";
569 if (chEn & 2)
570 vsym << "y";
571 if (chEn & 4)
572 vsym << "z";
573 if (chEn & 8)
574 vsym << "w";
575 vectorSuffixSyntax = vsym.str();
576
577 addField("CompEn", 12, 4, chEn, vsym.str());
578 }
579
580
581 ///////////////////////////////////////////////////////////////////////////
decodeLscMessageMessageDecoderLSC582 void decodeLscMessage(
583 const char *doc,
584 std::string msgDesc,
585 SendOp lscOp)
586 {
587 const std::string symbol = ToSyntax(lscOp);
588 op = lscOp;
589
590 bool opSupportsUvr =
591 lscOp == SendOp::LOAD_QUAD ||
592 lscOp == SendOp::STORE_QUAD ||
593 lscOp == SendOp::STORE_UNCOMPRESSED_QUAD;
594 if (sfid == SFID::TGM && opSupportsUvr) {
595 extraAttrs |= MessageInfo::Attr::HAS_UVRLOD;
596 }
597
598 addField("Opcode", 0, 6, getDescBits(0, 6), symbol);
599
600 setDoc(doc);
601 //
602 if (hasPayloadSizesInDesc() &&
603 exDesc.isImm() && (exDesc.imm & 0x7FF))
604 {
605 // bit 11 may or may not be available
606 error(0, 12, "ExDesc[11:0] must be 0 on this platform");
607 }
608 //
609 SendDesc surfaceId(0x0);
610 AddrType addrType = decodeLscAddrType(surfaceId);
611 //
612 if (op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D) {
613 addrSizeBits = 64;
614 addrSizeSyntax = "a64";
615 } else {
616 decodeLscAddrSize();
617 }
618 //
619 decodeLscDataSize();
620 //
621 expectedExecSize =
622 op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D ? 1 :
623 sfid == SFID::TGM ? DEFAULT_EXEC_SIZE/2 : DEFAULT_EXEC_SIZE;
624 decodeLscVecSize();
625 //
626 if (sfid == SFID::TGM)
627 extraAttrs |= MessageInfo::Attr::TYPED;
628 if (sfid == SFID::SLM)
629 extraAttrs |= MessageInfo::Attr::SLM;
630 //
631 CacheOpt l1 = CacheOpt::DEFAULT, l3 = CacheOpt::DEFAULT;
632 const auto &opInfo = lookupSendOp(op);
633 bool hasCc =
634 opInfo.isLoad() || opInfo.isStore() || opInfo.isAtomic();
635 if (sfid != SFID::SLM && hasCc) {
636 decodeLscCacheControl(op, l1, l3);
637 }
638 //
639 result.syntax.mnemonic = symbol;
640 //
641 result.syntax.controls += '.';
642 result.syntax.controls += dataTypePrefixSyntax;
643 result.syntax.controls += vectorSuffixSyntax;
644 if (!addrSizeSyntax.empty()) {
645 result.syntax.controls += '.';
646 result.syntax.controls += addrSizeSyntax;
647 }
648 if (!cacheControlSyntax.empty()) {
649 result.syntax.controls += cacheControlSyntax;
650 }
651 //
652 setScatterGatherOpX(
653 symbolFromSyntax(),
654 msgDesc,
655 op,
656 addrType,
657 surfaceId,
658 l1,
659 l3,
660 addrSizeBits,
661 dataSizeRegBits,
662 dataSizeMemBits,
663 vectorSize,
664 int(instExecSize),
665 extraAttrs);
666 if (lookupSendOp(op).hasChMask()) {
667 result.info.channelsEnabled = getDescBits(12, 4);
668 if (result.info.channelsEnabled == 0)
669 error(12, 4, "no channels enabled on quad message");
670 }
671 }
672
673
setLscAtomicMessageMessageDecoderLSC674 void setLscAtomicMessage(
675 const char *doc,
676 std::string msgDesc,
677 SendOp atOp)
678 {
679 extraAttrs |=
680 getDescBits(20, 5) != 0 ?
681 MessageInfo::Attr::ATOMIC_RETURNS : MessageInfo::Attr::NONE;
682 if (sfid == SFID::TGM)
683 extraAttrs |= MessageInfo::Attr::HAS_UVRLOD;
684 decodeLscMessage(doc, msgDesc, atOp);
685 }
686
687
tryDecodeLscMessageDecoderLSC688 void tryDecodeLsc() {
689 int lscOp = getDescBits(0, 6); // Opcode[5:0]
690 switch (lscOp) {
691 case LSC_LOAD:
692 decodeLscMessage(
693 chooseDoc(nullptr, "53523", "63970"),
694 "gathering load",
695 SendOp::LOAD);
696 break;
697 case LSC_STORE:
698 decodeLscMessage(
699 chooseDoc(nullptr, "53523", "63980"),
700 "scattering store",
701 SendOp::STORE);
702 break;
703 case LSC_STORE_UNCOMPRESSED:
704 decodeLscMessage(
705 chooseDoc(nullptr, "53532", "63984"),
706 "scattering store uncompressed",
707 SendOp::STORE_UNCOMPRESSED);
708 break;
709 case LSC_STORE_UNCOMPRESSED_QUAD:
710 decodeLscMessage(
711 chooseDoc(nullptr, "55224", "63985"),
712 "store quad uncompressed",
713 SendOp::STORE_UNCOMPRESSED_QUAD);
714 break;
715 case LSC_LOAD_QUAD:
716 decodeLscMessage(
717 chooseDoc(nullptr, "53527", "63977"),
718 "quad load (a.k.a. load_cmask)",
719 SendOp::LOAD_QUAD);
720 break;
721 case LSC_STORE_QUAD:
722 decodeLscMessage(
723 chooseDoc(nullptr, "53527", "63983"),
724 "quad store (a.k.a. store_cmask)",
725 SendOp::STORE_QUAD);
726 break;
727 case LSC_LOAD_STRIDED:
728 decodeLscMessage(
729 chooseDoc(nullptr, "53525", "63976"),
730 "strided load (a.k.a load_block)",
731 SendOp::LOAD_STRIDED);
732 break;
733 case LSC_STORE_STRIDED:
734 decodeLscMessage(
735 chooseDoc(nullptr, "53526", "63982"),
736 "strided store (a.k.a store_block)",
737 SendOp::STORE_STRIDED);
738 break;
739 case LSC_LOAD_BLOCK2D:
740 decodeLscMessage(
741 chooseDoc(nullptr, "53680", "63972"),
742 "block2d load",
743 SendOp::LOAD_BLOCK2D);
744 break;
745 case LSC_STORE_BLOCK2D:
746 decodeLscMessage(
747 chooseDoc(nullptr, "53530", "63981"),
748 "block2d store",
749 SendOp::STORE_BLOCK2D);
750 break;
751 case LSC_ATOMIC_IINC:
752 setLscAtomicMessage(
753 chooseDoc(nullptr, "53538", "63955"),
754 "atomic integer increment",
755 SendOp::ATOMIC_IINC);
756 break;
757 case LSC_ATOMIC_IDEC:
758 setLscAtomicMessage(
759 chooseDoc(nullptr, "53539", "63949"),
760 "atomic integer decrement",
761 SendOp::ATOMIC_IDEC);
762 break;
763 case LSC_ATOMIC_LOAD:
764 setLscAtomicMessage(
765 chooseDoc(nullptr, "53540", "63956"),
766 "atomic load",
767 SendOp::ATOMIC_LOAD);
768 break;
769 case LSC_ATOMIC_STORE:
770 setLscAtomicMessage(
771 chooseDoc(nullptr, "53541", "63960"),
772 "atomic store",
773 SendOp::ATOMIC_STORE);
774 break;
775 case LSC_ATOMIC_IADD:
776 setLscAtomicMessage(
777 chooseDoc(nullptr, "53542", "63946"),
778 "atomic integer add",
779 SendOp::ATOMIC_IADD);
780 break;
781 case LSC_ATOMIC_ISUB:
782 setLscAtomicMessage(
783 chooseDoc(nullptr, "53543", "63961"),
784 "atomic integer subtract",
785 SendOp::ATOMIC_ISUB);
786 break;
787 case LSC_ATOMIC_SMIN:
788 setLscAtomicMessage(
789 chooseDoc(nullptr, "53544", "63958"),
790 "atomic signed-integer minimum",
791 SendOp::ATOMIC_SMIN);
792 break;
793 case LSC_ATOMIC_SMAX:
794 setLscAtomicMessage(
795 chooseDoc(nullptr, "53545", "63957"),
796 "atomic signed-integer maximum",
797 SendOp::ATOMIC_SMAX);
798 break;
799 case LSC_ATOMIC_UMIN:
800 setLscAtomicMessage(
801 chooseDoc(nullptr, "53546", "63963"),
802 "atomic unsigned-integer minimum",
803 SendOp::ATOMIC_UMIN);
804 break;
805 case LSC_ATOMIC_UMAX:
806 setLscAtomicMessage(
807 chooseDoc(nullptr, "53547", "63962"),
808 "atomic unsigned-integer maximum",
809 SendOp::ATOMIC_UMAX);
810 break;
811 case LSC_ATOMIC_ICAS:
812 setLscAtomicMessage(
813 chooseDoc(nullptr, "53555", "63948"),
814 "atomic integer compare and swap",
815 SendOp::ATOMIC_ICAS);
816 break;
817 case LSC_ATOMIC_FADD:
818 setLscAtomicMessage(
819 chooseDoc(nullptr, "53548", "63950"),
820 "atomic float add",
821 SendOp::ATOMIC_FADD);
822 break;
823 case LSC_ATOMIC_FSUB:
824 setLscAtomicMessage(
825 chooseDoc(nullptr, "53549", "63954"),
826 "atomic float subtract",
827 SendOp::ATOMIC_FSUB);
828 break;
829 case LSC_ATOMIC_FMIN:
830 setLscAtomicMessage(
831 chooseDoc(nullptr, "53550", "63953"),
832 "atomic float minimum",
833 SendOp::ATOMIC_FMIN);
834 break;
835 case LSC_ATOMIC_FMAX:
836 setLscAtomicMessage(
837 chooseDoc(nullptr, "53551", "63952"),
838 "atomic float maximum",
839 SendOp::ATOMIC_FMAX);
840 break;
841 case LSC_ATOMIC_FCAS:
842 setLscAtomicMessage(
843 chooseDoc(nullptr, "53556", "63951"),
844 "atomic float compare and swap",
845 SendOp::ATOMIC_FCAS);
846 break;
847 case LSC_ATOMIC_AND:
848 setLscAtomicMessage(
849 chooseDoc(nullptr, "53552", "63947"),
850 "atomic logical and",
851 SendOp::ATOMIC_AND);
852 break;
853 case LSC_ATOMIC_OR:
854 setLscAtomicMessage(
855 chooseDoc(nullptr, "53553", "63959"),
856 "atomic logical or",
857 SendOp::ATOMIC_OR);
858 break;
859 case LSC_ATOMIC_XOR:
860 setLscAtomicMessage(
861 chooseDoc(nullptr, "53554", "63964"),
862 "atomic logical xor",
863 SendOp::ATOMIC_XOR);
864 break;
865 case LSC_CCS:
866 decodeLscCcs();
867 break;
868 case LSC_RSI: {
869 addField("Opcode", 0, 6, getDescBits(0, 6), "read_state");
870 setDoc(nullptr, "54000", "63979");
871 //
872 std::stringstream descs;
873 descs << "read state information";
874 result.syntax.mnemonic = "read_state";
875 //
876 SendDesc surfId = 0;
877 auto at = decodeLscAddrType(surfId, false);
878 //
879 // XeHPG returns 2 GRF, XeHPC+ only 1
880 // #54152
881 int rlen = platform() == Platform::XE_HPG ? 2 : 1;
882 setSpecialOpX(
883 result.syntax.mnemonic,
884 descs.str(),
885 SendOp::READ_STATE,
886 at,
887 surfId,
888 1, // mlen = 1 (U,V,R,LOD)
889 rlen);
890 result.info.addrSizeBits = 64;
891 result.info.execWidth = 1;
892 result.info.attributeSet |= MessageInfo::Attr::HAS_UVRLOD;
893 result.info.attributeSet |= MessageInfo::Attr::TRANSPOSED;
894 break;
895 }
896 case LSC_FENCE:
897 decodeLscFence();
898 break;
899 case LSC_LOAD_STATUS:
900 if (getDescBit(15)) {
901 error(15, 1, "transpose forbidden on load_status");
902 }
903 if (getDescBits(20, 5) != 1) {
904 error(20, 5, "load_status must have rlen (Desc[24:20] == 1)");
905 }
906 decodeLscMessage(
907 chooseDoc(nullptr, "53531", "63978"),
908 "load status",
909 SendOp::LOAD_STATUS);
910 break;
911 default:
912 addField("Opcode", 0, 6,
913 getDescBits(0, 6), "invalid message opcode");
914 error(0, 6, "unsupported message opcode");
915 return;
916 }
917 }
918
decodeLscCcsMessageDecoderLSC919 void decodeLscCcs() {
920 addField("Opcode", 0, 6,
921 static_cast<uint32_t>(LSC_CCS),
922 "compression-state control");
923 //
924 std::stringstream descs;
925 result.syntax.mnemonic = "ccs";
926 descs << "compression-state control";
927 auto ccsOpBits = getDescBits(17, 3);
928 SendOp sop = SendOp::INVALID;
929 std::string opDesc;
930 switch (ccsOpBits) {
931 case 0:
932 sop = SendOp::CCS_PC;
933 result.syntax.mnemonic += "_pc";
934 opDesc = " page clear (64k)";
935 setDoc(nullptr, "53536", "63965");
936 break;
937 case 1:
938 sop = SendOp::CCS_SC;
939 result.syntax.mnemonic += "_sc";
940 opDesc = " sector clear (2-cachelines)";
941 setDoc(nullptr, "53534", "63967");
942 result.syntax.controls += vectorSuffixSyntax;
943 break;
944 case 2:
945 sop = SendOp::CCS_PU;
946 result.syntax.mnemonic += "_pu";
947 opDesc = " page uncompress (64k)";
948 setDoc(nullptr, "53537", "63966");
949 break;
950 case 3:
951 sop = SendOp::CCS_SU;
952 result.syntax.mnemonic += "_su";
953 opDesc = " sector uncompress (2-cachelines)";
954 setDoc(nullptr, "53535", "63968");
955 result.syntax.controls += vectorSuffixSyntax;
956 break;
957 default: {
958 std::stringstream ss;
959 ss << ".0x" << std::hex << std::uppercase << ccsOpBits;
960 result.syntax.controls += ss.str();
961 opDesc = "invalid ccs sop";
962 error(17, 3, "invalid ccs sop");
963 }
964 } // switch
965 descs << opDesc;
966 //
967 addField("CcsOp", 17, 3, ccsOpBits, opDesc);
968 //
969 SendDesc surfId = 0;
970 auto at = decodeLscAddrType(surfId);
971 if (ccsOpBits == 0 || ccsOpBits == 2) {
972 // page operations: pc, pu
973 if (at != AddrType::FLAT)
974 error(29, 2, "ccs_{pcc,pcu} requires FLAT address type");
975 std::stringstream dummy;
976 decodeLscAddrSize();
977 if (addrSizeBits != 64)
978 error(7, 2, "AddrSize must be A64");
979 result.info.execWidth = 1;
980 expectedExecSize = 1;
981 // sector uncompress has addresses
982 // FIXME: I could derive this via exec size and a64
983 int mlen =
984 ccsOpBits == 1 || ccsOpBits == 3 ?
985 4 : // A64_PAYLOAD_SIMT32 = 4 regs
986 1; // A64_PAYLOAD_SIMT1 = 1 reg
987 int rlen = 0; // always 0
988 setSpecialOpX(
989 symbolFromSyntax(),
990 descs.str(),
991 sop,
992 at,
993 surfId,
994 mlen,
995 rlen);
996 } else {
997 // sector operations
998 ///
999 // these are vector messages
1000 expectedExecSize = DEFAULT_EXEC_SIZE;
1001 // const int SECTOR_SIZE_BITS = 128*8;
1002 // result.syntax.controls += ".d1024";
1003 result.syntax.controls += vectorSuffixSyntax;
1004 result.syntax.controls +=
1005 addrSizeBits == 64 ? ".a64" : ".a32";
1006 //
1007 setScatterGatherOp(
1008 symbolFromSyntax(),
1009 descs.str(),
1010 sop,
1011 at,
1012 surfId,
1013 addrSizeBits,
1014 0, // dateSize = 0; nothing returned
1015 vectorSize,
1016 DEFAULT_EXEC_SIZE,
1017 extraAttrs);
1018 }
1019 }
1020
decodeLscFenceMessageDecoderLSC1021 void decodeLscFence() {
1022 addField("Opcode", 0, 6, getDescBits(0, 6), "fence");
1023 setDoc(nullptr, "53533", "63969");
1024 //
1025 std::stringstream descs;
1026 result.syntax.mnemonic = "fence";
1027 descs << "fence";
1028 //
1029 std::stringstream fenceOpts;
1030 addLscFenceFields(fenceOpts, descs);
1031 result.syntax.controls += fenceOpts.str();
1032 //
1033 setSpecialOpX(
1034 symbolFromSyntax(),
1035 descs.str(),
1036 SendOp::FENCE,
1037 AddrType::FLAT,
1038 0, // no surface
1039 1, // mlen = 1
1040 0); // rlen = 0
1041 }
1042 }; // MessageDecoderLSC
1043
1044
decodeDescriptorsLSC(Platform platform,SFID sfid,ExecSize execSize,SendDesc exDesc,SendDesc desc,DecodeResult & result)1045 void iga::decodeDescriptorsLSC(
1046 Platform platform, SFID sfid, ExecSize execSize,
1047 SendDesc exDesc, SendDesc desc,
1048 DecodeResult &result)
1049 {
1050 MessageDecoderLSC md(
1051 platform, sfid, execSize,
1052 exDesc, desc,
1053 result);
1054 md.tryDecodeLsc();
1055 }
1056
1057 // descriptor bits [19:17]: cache control
encLdStVecCachingBits17_19(SendOp op,CacheOpt cachingL1,CacheOpt cachingL3,SendDesc & desc)1058 static bool encLdStVecCachingBits17_19(
1059 SendOp op,
1060 CacheOpt cachingL1, CacheOpt cachingL3,
1061 SendDesc &desc)
1062 {
1063 const auto &opInfo = lookupSendOp(op);
1064 bool isLd = opInfo.isLoad();
1065 bool isSt = opInfo.isStore();
1066 bool isAt = opInfo.isAtomic();
1067 bool isStAt = isSt || isAt;
1068 auto ccMatches = [&](CacheOpt l1, CacheOpt l3, uint32_t enc) {
1069 if (l1 == cachingL1 && l3 == cachingL3) {
1070 desc.imm |= enc << 17;
1071 return true;
1072 }
1073 return false;
1074 };
1075 bool matched =
1076 ccMatches(CacheOpt::DEFAULT, CacheOpt::DEFAULT, LSC_DF_DF) ||
1077 //
1078 ccMatches(CacheOpt::UNCACHED, CacheOpt::UNCACHED, LSC_UC_UC) ||
1079 //
1080 (isLd &&
1081 ccMatches(CacheOpt::UNCACHED, CacheOpt::CACHED, LSC_UC_CA)) ||
1082 (isStAt &&
1083 ccMatches(CacheOpt::UNCACHED, CacheOpt::WRITEBACK, LSC_UC_WB)) ||
1084 //
1085 (isLd &&
1086 ccMatches(CacheOpt::CACHED, CacheOpt::UNCACHED, LSC_CA_UC)) ||
1087 (isSt &&
1088 ccMatches(
1089 CacheOpt::WRITETHROUGH, CacheOpt::UNCACHED, LSC_WT_UC)) ||
1090 //
1091 (isLd &&
1092 ccMatches(CacheOpt::CACHED, CacheOpt::CACHED, LSC_CA_CA)) ||
1093 (isSt &&
1094 ccMatches(
1095 CacheOpt::WRITETHROUGH, CacheOpt::WRITEBACK, LSC_WT_WB)) ||
1096 //
1097 ccMatches(CacheOpt::STREAMING, CacheOpt::UNCACHED, LSC_ST_UC) ||
1098 //
1099 (isLd &&
1100 ccMatches(CacheOpt::STREAMING, CacheOpt::CACHED, LSC_ST_CA)) ||
1101 (isSt &&
1102 ccMatches(CacheOpt::STREAMING, CacheOpt::WRITEBACK, LSC_ST_WB)) ||
1103 //
1104 (isLd &&
1105 ccMatches(
1106 CacheOpt::READINVALIDATE, CacheOpt::CACHED, LSC_RI_CA)) ||
1107 (isSt &&
1108 ccMatches(CacheOpt::WRITEBACK, CacheOpt::WRITEBACK, LSC_WB_WB));
1109 return matched;
1110 }
1111
1112
encLdStVecCaching(const Platform & p,SendOp op,CacheOpt cachingL1,CacheOpt cachingL3,SendDesc & desc)1113 static bool encLdStVecCaching(
1114 const Platform& p,
1115 SendOp op,
1116 CacheOpt cachingL1, CacheOpt cachingL3,
1117 SendDesc &desc)
1118 {
1119
1120 return encLdStVecCachingBits17_19(op, cachingL1, cachingL3, desc);
1121 }
1122
encLdStVec(Platform p,const VectorMessageArgs & vma,SendDesc & exDesc,SendDesc & desc,std::string & err)1123 static bool encLdStVec(
1124 Platform p,
1125 const VectorMessageArgs &vma,
1126 SendDesc &exDesc,
1127 SendDesc &desc,
1128 std::string &err)
1129 {
1130 desc = 0x0;
1131 exDesc = 0x0;
1132 //
1133 bool hasCMask = false;
1134 switch (vma.op) {
1135 case SendOp::LOAD: desc.imm |= LSC_LOAD; break;
1136 case SendOp::LOAD_STRIDED: desc.imm |= LSC_LOAD_STRIDED; break;
1137 case SendOp::LOAD_QUAD:
1138 desc.imm |= LSC_LOAD_QUAD;
1139 hasCMask = true;
1140 break;
1141 case SendOp::LOAD_BLOCK2D: desc.imm |= LSC_LOAD_BLOCK2D; break;
1142 //
1143 case SendOp::STORE: desc.imm |= LSC_STORE; break;
1144 case SendOp::STORE_STRIDED: desc.imm |= LSC_STORE_STRIDED; break;
1145 case SendOp::STORE_QUAD:
1146 desc.imm |= LSC_STORE_QUAD;
1147 hasCMask = true;
1148 break;
1149 case SendOp::STORE_UNCOMPRESSED:
1150 desc.imm |= LSC_STORE_UNCOMPRESSED;
1151 break;
1152 case SendOp::STORE_UNCOMPRESSED_QUAD:
1153 desc.imm |= LSC_STORE_UNCOMPRESSED_QUAD;
1154 hasCMask = true;
1155 break;
1156 case SendOp::STORE_BLOCK2D: desc.imm |= LSC_STORE_BLOCK2D; break;
1157 //
1158 case SendOp::ATOMIC_AND: desc.imm |= LSC_ATOMIC_AND; break;
1159 case SendOp::ATOMIC_FADD: desc.imm |= LSC_ATOMIC_FADD; break;
1160 case SendOp::ATOMIC_FCAS: desc.imm |= LSC_ATOMIC_FCAS; break;
1161 case SendOp::ATOMIC_FMAX: desc.imm |= LSC_ATOMIC_FMAX; break;
1162 case SendOp::ATOMIC_FMIN: desc.imm |= LSC_ATOMIC_FMIN; break;
1163 case SendOp::ATOMIC_FSUB: desc.imm |= LSC_ATOMIC_FSUB; break;
1164 case SendOp::ATOMIC_IADD: desc.imm |= LSC_ATOMIC_IADD; break;
1165 case SendOp::ATOMIC_ICAS: desc.imm |= LSC_ATOMIC_ICAS; break;
1166 case SendOp::ATOMIC_IDEC: desc.imm |= LSC_ATOMIC_IDEC; break;
1167 case SendOp::ATOMIC_IINC: desc.imm |= LSC_ATOMIC_IINC; break;
1168 case SendOp::ATOMIC_ISUB: desc.imm |= LSC_ATOMIC_ISUB; break;
1169 case SendOp::ATOMIC_LOAD: desc.imm |= LSC_ATOMIC_LOAD; break;
1170 case SendOp::ATOMIC_OR: desc.imm |= LSC_ATOMIC_OR; break;
1171 case SendOp::ATOMIC_SMAX: desc.imm |= LSC_ATOMIC_SMAX; break;
1172 case SendOp::ATOMIC_SMIN: desc.imm |= LSC_ATOMIC_SMIN; break;
1173 case SendOp::ATOMIC_STORE: desc.imm |= LSC_ATOMIC_STORE; break;
1174 case SendOp::ATOMIC_UMAX: desc.imm |= LSC_ATOMIC_UMAX; break;
1175 case SendOp::ATOMIC_UMIN: desc.imm |= LSC_ATOMIC_UMIN; break;
1176 case SendOp::ATOMIC_XOR: desc.imm |= LSC_ATOMIC_XOR; break;
1177 default:
1178 err = "unsupported op";
1179 return false;
1180 }
1181 bool isBlock2d =
1182 vma.op == SendOp::LOAD_BLOCK2D || vma.op == SendOp::STORE_BLOCK2D;
1183 bool isBlock2dTyped = isBlock2d && vma.sfid == SFID::TGM;
1184 bool isBlock2dUntyped = isBlock2d && vma.sfid != SFID::TGM;
1185 bool hasAddrSizeField = !isBlock2d;
1186
1187 //
1188 ////////////////////////////////////////
1189 // data size
1190 uint32_t dszEnc = LSC_D8;
1191 if (isBlock2dTyped &&
1192 (vma.dataSizeReg != 32 || vma.dataSizeMem != 32))
1193 {
1194 err = "block2d.tgm must be d32";
1195 return false;
1196 }
1197 if (vma.dataSizeMem == vma.dataSizeReg) {
1198 switch (vma.dataSizeMem) {
1199 case 8: dszEnc = LSC_D8; break;
1200 case 16: dszEnc = LSC_D16; break;
1201 case 32: dszEnc = LSC_D32; break;
1202 case 64: dszEnc = LSC_D64; break;
1203 default: err = "invalid data size"; return false;
1204 }
1205 } else if (vma.dataSizeMem == 8 && vma.dataSizeReg == 32) {
1206 dszEnc = LSC_D8U32;
1207 } else if (vma.dataSizeMem == 16 && vma.dataSizeReg == 32) {
1208 if (vma.dataSizeExpandHigh) {
1209 dszEnc = LSC_D16U32H;
1210 } else {
1211 dszEnc = LSC_D16U32;
1212 }
1213 } else {
1214 err = "invalid data type";
1215 return false;
1216 }
1217 if (!isBlock2dTyped)
1218 desc.imm |= dszEnc << 9;
1219 //
1220 ////////////////////////////////////////
1221 // vector size
1222 if (hasCMask) {
1223 if (vma.dataComponentMask & ~0xF) {
1224 err = "invalid component mask";
1225 return false;
1226 }
1227 desc.imm |= vma.dataComponentMask << 12;
1228 } else if (isBlock2d) {
1229 if (isBlock2dTyped && vma.dataVnni) {
1230 err = "block2d.tgm forbids VNNI";
1231 return false;
1232 } else if (isBlock2dTyped && vma.dataTranspose) {
1233 err = "block2d.tgm forbids transpose data order";
1234 return false;
1235 }
1236 if (vma.dataVnni)
1237 desc.imm |= 1 << 7;
1238 if (vma.dataTranspose)
1239 desc.imm |= 1 << 15;
1240 } else {
1241 uint32_t vecEnc = LSC_V1;
1242 switch (vma.dataVectorSize) {
1243 case 1: vecEnc = LSC_V1; break;
1244 case 2: vecEnc = LSC_V2; break;
1245 case 3: vecEnc = LSC_V3; break;
1246 case 4: vecEnc = LSC_V4; break;
1247 case 8: vecEnc = LSC_V8; break;
1248 case 16: vecEnc = LSC_V16; break;
1249 case 32: vecEnc = LSC_V32; break;
1250 case 64: vecEnc = LSC_V64; break;
1251 default: err = "invalid vector size"; break;
1252 }
1253 if (vma.isAtomic() && vma.dataVectorSize != 1) {
1254 err = "atomics do not support vector operations";
1255 return false;
1256 }
1257 if (vma.dataVnni) {
1258 err = "vnni only valid on block2d operations";
1259 return false;
1260 }
1261 //
1262 desc.imm |= vecEnc << 12;
1263 //
1264 if (vma.dataTranspose) {
1265 desc.imm |= 1 << 15;
1266 //
1267 if (vma.isAtomic()) {
1268 err = "atomics do not support transpose operations";
1269 return false;
1270 }
1271 }
1272 } // end vec non-cmask case
1273 //
1274 ////////////////////////////////////////
1275 // caching options
1276 if (vma.isAtomic() &&
1277 vma.cachingL1 != CacheOpt::DEFAULT &&
1278 vma.cachingL1 != CacheOpt::UNCACHED)
1279 {
1280 err = "atomic L1 must be an uncached option";
1281 return false;
1282 } else {
1283 if (!encLdStVecCaching(p, vma.op, vma.cachingL1, vma.cachingL3, desc)) {
1284 err = "invalid cache-control combination";
1285 return false;
1286 }
1287 }
1288 //
1289 ////////////////////////////////////////
1290 // address size
1291 uint32_t asEnc = 0x0;
1292 switch (vma.addrSize) {
1293 case 16: asEnc = LSC_A16; break;
1294 case 32: asEnc = LSC_A32; break;
1295 case 64: asEnc = LSC_A64; break;
1296 default:
1297 err = "unsupported address size";
1298 return false;
1299 }
1300 if (isBlock2dTyped && vma.addrSize != 32) {
1301 err = "block2d.typed address size must be A32";
1302 return false;
1303 }
1304 if (isBlock2dUntyped && vma.addrSize != 64) {
1305 err = "block2d untyped address size must be A64";
1306 return false;
1307 }
1308 if (hasAddrSizeField) {
1309 desc.imm |= asEnc << 7;
1310 }
1311 //
1312 ////////////////////////////////////////
1313 // address type
1314 uint32_t atEnc = 0x0;
1315 switch (vma.addrType) {
1316 case AddrType::FLAT: atEnc = LSC_AT_FLAT; break;
1317 case AddrType::BSS: atEnc = LSC_AT_BSS; break;
1318 case AddrType::SS: atEnc = LSC_AT_SS; break;
1319 case AddrType::BTI: atEnc = LSC_AT_BTI; break;
1320 default:
1321 err = "unsupported address type";
1322 return false;
1323 }
1324 if (isBlock2dTyped && vma.addrType == AddrType::FLAT) {
1325 err = "block2d.typed forbids flat address";
1326 return false;
1327 }
1328 desc.imm |= atEnc << 29;
1329 //
1330 // store the surface
1331 if (vma.addrType != AddrType::FLAT) {
1332 // use exDesc
1333 if (vma.addrType == AddrType::BTI && !vma.addrSurface.isReg()) {
1334 exDesc = vma.addrSurface.imm << 24;
1335 } else {
1336 exDesc = vma.addrSurface;
1337 }
1338 }
1339 //
1340 if (vma.addrType != AddrType::FLAT && vma.sfid == SFID::SLM) {
1341 err = "SLM requires flat address type";
1342 return false;
1343 }
1344 ////////////////////////////////////////
1345 // address scale factor
1346 if (vma.addrScale != 1) {
1347 if (true) { // disable if address scaling is ever added
1348 err = "address scaling not supported on this platform";
1349 return false;
1350 }
1351 int vlen = vma.elementsPerAddress();
1352 int bytesPerElem = vma.dataSizeMem * vlen / 8;
1353 uint32_t addrScEnc = LSC_SCALE_NONE;
1354 if (vma.addrScale > 32) {
1355 err = "scale value is too large";
1356 return false;
1357 } else if (vma.addrScale == bytesPerElem) {
1358 addrScEnc = LSC_SCALE_1X;
1359 } else if (vma.addrScale == 2*bytesPerElem) {
1360 addrScEnc = LSC_SCALE_2X;
1361 } else if (vma.addrScale == 4*bytesPerElem) {
1362 addrScEnc = LSC_SCALE_4X;
1363 } else {
1364 std::stringstream ss;
1365 ss <<
1366 "invalid scaling factor (must be " <<
1367 1*bytesPerElem << ", " <<
1368 2*bytesPerElem << ", or " <<
1369 4*bytesPerElem << ")";
1370 err = ss.str();
1371 return false;
1372 }
1373 desc.imm |= addrScEnc << 22;
1374 }
1375 //
1376 ////////////////////////////////////////
1377 // address immediate offset
1378 bool hasAddrImmOffset = vma.addrOffset != 0;
1379 hasAddrImmOffset |= vma.addrOffsetX != 0;
1380 hasAddrImmOffset |= vma.addrOffsetY != 0;
1381 if (hasAddrImmOffset) {
1382 bool platformSupportsAddrOff = false;
1383 if (platformSupportsAddrOff) {
1384 err = "address immediate offset not supported on this platform";
1385 return false;
1386 }
1387
1388 } // else: addrOffset == 0
1389
1390 ////////////////////////////////////////
1391 // set the surface object
1392 if (vma.addrType == AddrType::FLAT) {
1393 // IR normalization
1394 if (!vma.addrSurface.isImm() || vma.addrSurface.imm != 0) {
1395 err = "malformed IR: flat address model must have surface = 0";
1396 return false;
1397 }
1398 }
1399
1400 // XeHPG+ have surface in ExDesc
1401 if (vma.addrType == AddrType::BTI && vma.addrSurface.isImm()) {
1402 // BTI takes the high byte
1403 if (vma.addrSurface.imm > 0xFF) {
1404 err = "surface index too large for BTI";
1405 return false;
1406 }
1407 exDesc.imm |= vma.addrSurface.imm << 24;
1408 } else if (vma.addrType != AddrType::FLAT) {
1409 uint32_t ZERO_MASK = 0xFFF;
1410 std::string highBit = "11";
1411
1412 // if BTI reg or BSS/SS reg/imm with just copy
1413 // BSS/SS with imm, value is already aligned
1414 if (vma.addrType != AddrType::BTI &&
1415 vma.addrSurface.isImm() &&
1416 (vma.addrSurface.imm & ZERO_MASK) != 0)
1417 {
1418 err = "BSS/SS with immediate descriptor require "
1419 "ExDesc[" + highBit + ":0] to be 0";
1420 return false;
1421 }
1422 exDesc = vma.addrSurface;
1423 }
1424 //
1425 return true;
1426 }
1427
1428
encodeDescriptorsLSC(Platform p,const VectorMessageArgs & vma,SendDesc & exDesc,SendDesc & desc,std::string & err)1429 bool iga::encodeDescriptorsLSC(
1430 Platform p,
1431 const VectorMessageArgs &vma,
1432 SendDesc &exDesc,
1433 SendDesc &desc,
1434 std::string &err)
1435 {
1436 if (!sendOpSupportsSyntax(p, vma.op, vma.sfid)) {
1437 err = "unsupported message for SFID";
1438 return false;
1439 }
1440 return encLdStVec(p, vma,
1441 exDesc, desc, err);
1442 }
1443