1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "MessageDecoder.hpp"
10 
11 #include <utility>
12 
13 using namespace iga;
14 
15 enum LscOp : uint32_t {
16     LSC_LOAD            = 0x00,
17     LSC_LOAD_STRIDED    = 0x01,
18     LSC_LOAD_QUAD       = 0x02, // aka load_cmask
19     LSC_LOAD_BLOCK2D    = 0x03,
20     LSC_STORE           = 0x04,
21     LSC_STORE_STRIDED   = 0x05,
22     LSC_STORE_QUAD      = 0x06, // aka store_cmask
23     LSC_STORE_BLOCK2D   = 0x07,
24     //
25     LSC_ATOMIC_IINC     = 0x08,
26     LSC_ATOMIC_IDEC     = 0x09,
27     LSC_ATOMIC_LOAD     = 0x0A,
28     LSC_ATOMIC_STORE    = 0x0B,
29     LSC_ATOMIC_IADD     = 0x0C,
30     LSC_ATOMIC_ISUB     = 0x0D,
31     LSC_ATOMIC_SMIN     = 0x0E,
32     LSC_ATOMIC_SMAX     = 0x0F,
33     LSC_ATOMIC_UMIN     = 0x10,
34     LSC_ATOMIC_UMAX     = 0x11,
35     LSC_ATOMIC_ICAS     = 0x12,
36     LSC_ATOMIC_FADD     = 0x13,
37     LSC_ATOMIC_FSUB     = 0x14,
38     LSC_ATOMIC_FMIN     = 0x15,
39     LSC_ATOMIC_FMAX     = 0x16,
40     LSC_ATOMIC_FCAS     = 0x17,
41     LSC_ATOMIC_AND      = 0x18,
42     LSC_ATOMIC_OR       = 0x19,
43     LSC_ATOMIC_XOR      = 0x1A,
44     //
45     LSC_LOAD_STATUS     = 0x1B,
46     LSC_STORE_UNCOMPRESSED = 0x1C,
47     LSC_CCS             = 0x1D,
48     //
49     LSC_RSI             = 0x1E,
50     LSC_FENCE           = 0x1F,
51     //
52     LSC_STORE_UNCOMPRESSED_QUAD = 0x20,
53     //
54     //
55     LSC_INVALID         = 0xFFFFFFFF,
56 };
57 
58 static const uint32_t LSC_AT_FLAT = 0x0;
59 static const uint32_t LSC_AT_BSS  = 0x1;
60 static const uint32_t LSC_AT_SS   = 0x2;
61 static const uint32_t LSC_AT_BTI  = 0x3;
62 
63 static const uint32_t LSC_A16 = 0x1;
64 static const uint32_t LSC_A32 = 0x2;
65 static const uint32_t LSC_A64 = 0x3;
66 
67 static const uint32_t LSC_D8      = 0x0;
68 static const uint32_t LSC_D16     = 0x1;
69 static const uint32_t LSC_D32     = 0x2;
70 static const uint32_t LSC_D64     = 0x3;
71 static const uint32_t LSC_D8U32   = 0x4;
72 static const uint32_t LSC_D16U32  = 0x5;
73 static const uint32_t LSC_D16U32H = 0x6;
74 
75 static const uint32_t LSC_V1  = 0x0;
76 static const uint32_t LSC_V2  = 0x1;
77 static const uint32_t LSC_V3  = 0x2;
78 static const uint32_t LSC_V4  = 0x3;
79 static const uint32_t LSC_V8  = 0x4;
80 static const uint32_t LSC_V16 = 0x5;
81 static const uint32_t LSC_V32 = 0x6;
82 static const uint32_t LSC_V64 = 0x7;
83 
84 const static uint32_t LSC_SCALE_NONE = 0x0;
85 const static uint32_t LSC_SCALE_1X   = 0x1;
86 const static uint32_t LSC_SCALE_2X   = 0x2;
87 const static uint32_t LSC_SCALE_4X   = 0x3;
88 
89 ///////////////////////////////////////////////////////
90 // Cache Opt
91 // Value for bits[19:17]
92 static const uint32_t LSC_DF_DF = 0x0;
93 //
94 static const uint32_t LSC_UC_UC = 0x1;
95 //
96 static const uint32_t LSC_UC_CA = 0x2;
97 static const uint32_t LSC_UC_WB = 0x2;
98 //
99 static const uint32_t LSC_CA_UC = 0x3;
100 static const uint32_t LSC_WT_UC = 0x3;
101 //
102 static const uint32_t LSC_CA_CA = 0x4;
103 static const uint32_t LSC_WT_WB = 0x4;
104 //
105 static const uint32_t LSC_ST_UC = 0x5;
106 //
107 static const uint32_t LSC_ST_CA = 0x6;
108 static const uint32_t LSC_ST_WB = 0x6;
109 //
110 static const uint32_t LSC_RI_CA = 0x7;
111 static const uint32_t LSC_WB_WB = 0x7;
112 
113 // Value for bits [19:16]
114 static const uint32_t LSC_UC_CC = 0x5;
115 static const uint32_t LSC_CA_CC = 0x9;
116 static const uint32_t LSC_RI_RI = 0xE;
117 
118 
119 #if 0
120 struct LscMessageFormat {
121     const char *mnemonic;
122     const char *description;
123     uint32_t    mask;
124     uint32_t    op;
125 //
126 //    std::pair<Platform,const char *> docs[2];
127 };
128 
129 //
130 static LscMessageFormat OPS[32] {
131 };
132 #endif
133 
134 
135 // This handles LSC messages only
136 struct MessageDecoderLSC : MessageDecoder {
MessageDecoderLSCMessageDecoderLSC137     MessageDecoderLSC(
138         Platform _platform,
139         SFID _sfid,
140         ExecSize _execSize,
141         SendDesc _exDesc,
142         SendDesc _desc,
143         DecodeResult &_result)
144         : MessageDecoder(
145             _platform, _sfid, _execSize,
146             _exDesc, _desc, _result)
147     {
148     }
149 
150     // used by decodeLscMessage and subchildren
151     std::string dataTypePrefixSyntax; // e.g. d32 or d16 or d32
152     std::string vectorSuffixSyntax; // e.g. x16t (for d16x16t) or .yzw
153     std::string addrSizeSyntax; // e.g. a32
154     std::string cacheControlSyntax; // e.g. ca.ca
155 
156     SendOp op = SendOp::INVALID;
157     //
158     int expectedExecSize = 1;
159     //
160     int addrSizeBits = 0;
161     int dataSizeRegBits = 0, dataSizeMemBits = 0;
162     int vectorSize = 1;
163     MessageInfo::Attr extraAttrs = MessageInfo::Attr::NONE;
164 
165     // the symbol to return in the MessageInfo structure
symbolFromSyntaxMessageDecoderLSC166     std::string symbolFromSyntax() const {
167         std::stringstream sym;
168         sym << result.syntax.mnemonic;
169         if (!result.syntax.controls.empty())
170             sym << result.syntax.controls;
171         sym << "  ";
172         if (!result.syntax.surface.empty())
173             sym << result.syntax.surface;
174         sym << "[";
175         if (!result.syntax.scale.empty()) {
176             sym << result.syntax.scale;
177         }
178         sym << "A";
179         if (!result.syntax.immOffset.empty()) {
180             sym << result.syntax.immOffset;
181         }
182         sym << "]";
183         return sym.str();
184     }
185 
186     ///////////////////////////////////////////////////////////////////////////
hasPayloadSizesInDescMessageDecoderLSC187     bool hasPayloadSizesInDesc() const {
188         return true;
189     }
190 
lscAddrTypeOffsetMessageDecoderLSC191     int lscAddrTypeOffset() const {
192         int off = 29;
193 
194         return off;
195     }
196 
setCacheOptsMessageDecoderLSC197     void setCacheOpts(std::stringstream& sym, std::stringstream& descs,
198         CacheOpt &l1, CacheOpt &l3, CacheOpt _l1, CacheOpt _l3) {
199         l1 = _l1;
200         l3 = _l3;
201         if (_l1 == CacheOpt::DEFAULT && _l3 == CacheOpt::DEFAULT) {
202             descs << "use state settings for both L1 and L3";
203             return;
204         }
205         auto emitCacheOpt = [&] (CacheOpt c) {
206             sym << '.';
207             switch (c) {
208             case CacheOpt::DEFAULT:
209                 sym << "df";
210                 descs << " uses default state settings";
211                 break;
212             case CacheOpt::READINVALIDATE:
213                 sym << "ri";
214                 descs << " read-invalidate (last use)";
215                 break;
216             case CacheOpt::CACHED:
217                 sym << "ca";
218                 descs << " cached";
219                 break;
220             case CacheOpt::STREAMING:
221                 sym << "st";
222                 descs << " streaming";
223                 break;
224             case CacheOpt::UNCACHED:
225                 sym << "uc";
226                 descs << " uncached (bypass)";
227                 break;
228             case CacheOpt::WRITETHROUGH:
229                 sym << "wt";
230                 descs << " writethrough";
231                 break;
232             case CacheOpt::WRITEBACK:
233                 sym << "wb";
234                 descs << " writeback";
235                 break;
236             default:
237                 sym << "?";
238                 descs << " invalid";
239                 break;
240             }
241         };
242         descs << "L1"; emitCacheOpt(_l1);
243         descs << "; L3"; emitCacheOpt(_l3); descs << "";
244     }
245 
246 
decodeLscCacheControlMessageDecoderLSC247     void decodeLscCacheControl(
248         SendOp sop,
249         CacheOpt &l1,
250         CacheOpt &l3)
251     {
252 
253         if (!decodeLscCacheControlBits17_19(sop, l1, l3))
254             error(17, 3, "invalid cache options");
255     }
256 
257     // Descriptor Bits[19:17]: 3 bits of cache control
decodeLscCacheControlBits17_19MessageDecoderLSC258     bool decodeLscCacheControlBits17_19(
259         SendOp sop,
260         CacheOpt &l1,
261         CacheOpt &l3)
262     {
263         std::stringstream sym, descs;
264         l1 = l3 = CacheOpt::DEFAULT;
265         bool isLoad = lookupSendOp(sop).isLoad();
266         auto ccBits = getDescBits(17, 3);
267         auto setCacheOptsWrapper = [&](CacheOpt _l1, CacheOpt _l3) {
268             return setCacheOpts(sym, descs, l1, l3, _l1, _l3);
269         };
270         switch (ccBits) {
271         case LSC_DF_DF:
272             setCacheOptsWrapper(CacheOpt::DEFAULT, CacheOpt::DEFAULT);
273             break;
274         case LSC_UC_UC:
275             setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::UNCACHED);
276             break;
277         case LSC_UC_CA: // == LSC_UC_WB
278             if (isLoad)
279                 setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::CACHED);
280             else
281                 setCacheOptsWrapper(CacheOpt::UNCACHED, CacheOpt::WRITEBACK);
282             break;
283         case LSC_CA_UC: // == LSC_WT_UC
284             if (isLoad)
285                 setCacheOptsWrapper(CacheOpt::CACHED, CacheOpt::UNCACHED);
286             else
287                 setCacheOptsWrapper(CacheOpt::WRITETHROUGH, CacheOpt::UNCACHED);
288             break;
289         case LSC_CA_CA: // == LSC_WT_WB
290             if (isLoad)
291                 setCacheOptsWrapper(CacheOpt::CACHED, CacheOpt::CACHED);
292             else
293                 setCacheOptsWrapper(CacheOpt::WRITETHROUGH, CacheOpt::WRITEBACK);
294             break;
295         case LSC_ST_UC:
296             setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::UNCACHED);
297             break;
298         case LSC_ST_CA: // == LSC_ST_WB
299             if (isLoad)
300                 setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::CACHED);
301             else
302                 setCacheOptsWrapper(CacheOpt::STREAMING, CacheOpt::WRITEBACK);
303             break;
304         case LSC_RI_CA:
305             if (isLoad) {
306                 // atomic follows store semantics, so compare against load
307                 setCacheOptsWrapper(CacheOpt::READINVALIDATE, CacheOpt::CACHED);
308             } else {
309                 setCacheOptsWrapper(CacheOpt::WRITEBACK, CacheOpt::WRITEBACK);
310             }
311             break;
312         default:
313             return false;
314         }
315         //
316         cacheControlSyntax = sym.str();
317         //
318         addField("Caching", 17, 3, ccBits, descs.str());
319         return true;
320     }
321 
322 
decodeLscImmOffMessageDecoderLSC323     void decodeLscImmOff(uint32_t atBits) {
324     }
325 
decodeLscAddrTypeMessageDecoderLSC326     AddrType decodeLscAddrType(SendDesc& surfId, bool allowsFlat = true)
327     {
328         surfId = 0;
329         AddrType addrType = AddrType::FLAT;
330         //
331         int addrTypeLoc = lscAddrTypeOffset();
332         const char *addrTypeMeaning = "?";
333         //
334         const auto atBits = getDescBits(addrTypeLoc, 2);
335         //
336         std::stringstream surfSyntax;
337         switch (atBits) {
338         case LSC_AT_FLAT:
339             addrTypeMeaning = "Flat";
340             addrType = AddrType::FLAT;
341             if (!allowsFlat)
342                 error(addrTypeLoc, 2,
343                     "this message may not use FLAT address type");
344             break;
345         case LSC_AT_BSS:
346         case LSC_AT_SS:
347             if (atBits == LSC_AT_BSS) {
348                 addrTypeMeaning = "BSS";
349                 addrType = AddrType::BSS;
350                 surfSyntax << "bss";
351             } else {
352                 addrTypeMeaning = "SS";
353                 addrType = AddrType::SS;
354                 surfSyntax << "ss";
355             }
356             if (exDesc.isImm()) {
357                 // XeHPG/XeHPC: we can pull this value out of ExDesc[31:11]
358                 int exDescOff = 11, len = 31 - exDescOff + 1;
359                 surfId = getDescBits(32 + exDescOff, len) << exDescOff;
360                 addField(
361                     "SurfaceStateOffset", exDescOff, len,
362                     surfId.imm, "immediate surface state offset");
363                 surfSyntax << "[" << iga::fmtHex(surfId.imm) << "]";
364             } else {
365                 // XeHPG/XeHPC with reg surface state offset
366                 surfSyntax << "[a0." << (int)exDesc.reg.subRegNum << "]";
367                 surfId = exDesc;
368             }
369             break;
370         case LSC_AT_BTI:
371             addrTypeMeaning = "BTI";
372             addrType = AddrType::BTI;
373             if (exDesc.isImm()) {
374                 uint32_t bti = decodeExDescField("BTI", 24, 8,
375                     [&] (std::stringstream &ss, uint32_t bti) {
376                         ss << "bti[" << bti << "]";
377                     });
378                 surfSyntax << "bti[" << bti << "]";
379                 surfId = bti;
380             } else {
381                 surfSyntax << "bti[a0." << (int)exDesc.reg.subRegNum << "]";
382                 surfId = exDesc;
383             }
384             break;
385         default:
386             addrTypeMeaning = "INVALID AddrType";
387             addrType = AddrType::FLAT;
388             surfSyntax << "?";
389             error(addrTypeLoc, 2, "invalid address type");
390             break;
391         }
392         result.syntax.surface = surfSyntax.str();
393 
394         /////////////////////////////
395         // immediate offset
396         decodeLscImmOff(atBits);
397         //
398         addField("AddrType", lscAddrTypeOffset(), 2, atBits, addrTypeMeaning);
399         //
400         return addrType;
401     }
402 
decodeLscAddrSizeMessageDecoderLSC403     void decodeLscAddrSize() {
404         int addrSzBits = getDescBits(7, 2); // [8:7]
405         std::stringstream asym;
406         const char *aDesc = "";
407         switch (addrSzBits) {
408         case 1:
409             asym << "a16";
410             aDesc = "addresses are 16b";
411             addrSizeBits = 16;
412             break;
413         case 2:
414             asym << "a32";
415             aDesc = "addresses are 32b";
416             addrSizeBits = 32;
417             break;
418         case 3:
419             asym << "a64";
420             aDesc = "addresses are 64b";
421             addrSizeBits = 64;
422             break;
423         default:
424             asym << "a???";
425             aDesc = "address size is invalid";
426             error(7, 2, "invalid address size");
427             break;
428         }
429         // result.syntax.addressType = ":" + asym.str();
430 
431         addrSizeSyntax = asym.str();
432         //
433         addField("AddrSize", 7, 2, addrSzBits, aDesc);
434     }
435 
decodeLscDataSizeMessageDecoderLSC436     void decodeLscDataSize() {
437         std::stringstream dsym;
438         dataSizeRegBits = dataSizeMemBits = 0;
439         std::string meaning;
440         auto dtBits = getDescBits(9,3);
441         switch (dtBits) { // dat size [11:9]
442         case LSC_D8:
443             dataSizeRegBits = dataSizeMemBits = 8;
444             dsym << "d8";
445             meaning = "8b per data element";
446             break;
447         case LSC_D16:
448             dataSizeRegBits = dataSizeMemBits = 16;
449             meaning = "16b per data element";
450             dsym << "d16";
451             break;
452         case LSC_D32:
453             dataSizeRegBits = dataSizeMemBits = 32;
454             dsym << "d32";
455             meaning = "32b per data element";
456             break;
457         case LSC_D64:
458             dataSizeRegBits = dataSizeMemBits = 64;
459             dsym << "d64";
460             meaning = "64b per data element";
461             break;
462         case LSC_D8U32:
463             dataSizeRegBits = 32; dataSizeMemBits = 8;
464             dsym << "d8u32";
465             meaning = "load 8b into the low 8b of 32b register elements "
466                 "(upper bits are undefined)";
467             break;
468         case LSC_D16U32:
469             dataSizeRegBits = 32; dataSizeMemBits = 16;
470             dsym << "d16u32";
471             meaning = "load 16b into the low 16b of 32b register elements "
472                 "(upper bits are undefined)";
473             break;
474         case LSC_D16U32H:
475             dataSizeRegBits = 32; dataSizeMemBits = 16;
476             extraAttrs |= MessageInfo::Attr::EXPAND_HIGH;
477             dsym << "d16u32h";
478             meaning =
479                 "load 16b into the high half of 32b register elements";
480             break;
481         default:
482             dsym << "0x" << std::uppercase << std::hex << dtBits;
483             meaning = "???";
484         }
485         //
486         // result.syntax.dataType = ":" + dsym.str();
487         dataTypePrefixSyntax = dsym.str();
488 
489         addField("DataSize", 9, 3, dtBits, meaning);
490     }
491 
decodeLscVecSizeMessageDecoderLSC492     void decodeLscVecSize()
493     {
494         if (lookupSendOp(op).hasChMask()) {
495             decodeLscVecSizeQuad();
496         } else {
497             decodeLscVecSizeNormal();
498         }
499     }
500 
decodeLscVecSizeNormalMessageDecoderLSC501     void decodeLscVecSizeNormal() {
502         std::stringstream vsym;
503 
504         uint32_t vecSzEncd = getDescBits(12, 3); // [14:12]
505         switch (vecSzEncd) {
506         case LSC_V1:  vectorSize =  1; break;
507         case LSC_V2:  vectorSize =  2; break;
508         case LSC_V3:  vectorSize =  3; break;
509         case LSC_V4:  vectorSize =  4; break;
510         case LSC_V8:  vectorSize =  8; break;
511         case LSC_V16: vectorSize = 16; break;
512         case LSC_V32: vectorSize = 32; break;
513         case LSC_V64: vectorSize = 64; break;
514         default:
515             vsym << "x?";
516         }
517         bool opIsBlock2d =
518             op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D;
519         auto transposed =
520             decodeDescBitField(
521                 "DataOrder", 15,
522                 "non-transposed (vector elements are in successive registers)",
523                 "transposed (vector elements are in the same register)");
524         if (vectorSize > 1 || transposed && !opIsBlock2d) {
525             vsym << 'x' << vectorSize;
526         }
527         if (transposed && op == SendOp::LOAD_STATUS) {
528             error(15, 1, "data order must be non-transposed for this op");
529         }
530         std::stringstream vdesc;
531         vdesc << "each address accesses " << vectorSize << " element";
532         if (vectorSize != 1)
533             vdesc << "s";
534         if (!opIsBlock2d)
535             addField("VecSize", 12, 3, vecSzEncd, vdesc.str());
536         if (transposed) {
537             vsym << 't';
538             extraAttrs |= MessageInfo::Attr::TRANSPOSED;
539             expectedExecSize = 1; // all transpose messages are SIMD1
540         }
541 
542         if (op == SendOp::LOAD_BLOCK2D) {
543             bool vnni = decodeDescBitField(
544                 "Block2dVnniTransform", 7, "disabled", "enabled");
545             if (vnni)
546                 vsym << 'v';
547         }
548 
549         vectorSuffixSyntax = vsym.str();
550     }
551 
decodeLscVecSizeQuadMessageDecoderLSC552     void decodeLscVecSizeQuad() {
553         // LSC channels *enabled* is the inverse of the old messages
554         // because the old ChMask used in untyped old (scatter4/gather4)
555         // was really a channel "disable" mask
556         auto chEn = getDescBits(12, 4);
557         vectorSize = 0;
558         for (int i = 0; i < 4; ++i) {
559             if ((1<<i) & chEn) {
560                 vectorSize++;
561             }
562         }
563         extraAttrs |= MessageInfo::Attr::HAS_CHMASK;
564 
565         std::stringstream vsym;
566         vsym << ".";
567         if (chEn & 1)
568             vsym << "x";
569         if (chEn & 2)
570             vsym << "y";
571         if (chEn & 4)
572             vsym << "z";
573         if (chEn & 8)
574             vsym << "w";
575         vectorSuffixSyntax = vsym.str();
576 
577         addField("CompEn", 12, 4, chEn, vsym.str());
578     }
579 
580 
581     ///////////////////////////////////////////////////////////////////////////
decodeLscMessageMessageDecoderLSC582     void decodeLscMessage(
583         const char *doc,
584         std::string msgDesc,
585         SendOp lscOp)
586     {
587         const std::string symbol = ToSyntax(lscOp);
588         op = lscOp;
589 
590         bool opSupportsUvr =
591             lscOp == SendOp::LOAD_QUAD ||
592             lscOp == SendOp::STORE_QUAD ||
593             lscOp == SendOp::STORE_UNCOMPRESSED_QUAD;
594         if (sfid == SFID::TGM && opSupportsUvr) {
595             extraAttrs |= MessageInfo::Attr::HAS_UVRLOD;
596         }
597 
598         addField("Opcode", 0, 6, getDescBits(0, 6), symbol);
599 
600         setDoc(doc);
601         //
602         if (hasPayloadSizesInDesc() &&
603             exDesc.isImm() && (exDesc.imm & 0x7FF))
604         {
605             // bit 11 may or may not be available
606             error(0, 12, "ExDesc[11:0] must be 0 on this platform");
607         }
608         //
609         SendDesc surfaceId(0x0);
610         AddrType addrType = decodeLscAddrType(surfaceId);
611         //
612         if (op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D) {
613             addrSizeBits = 64;
614             addrSizeSyntax = "a64";
615         } else {
616             decodeLscAddrSize();
617         }
618         //
619         decodeLscDataSize();
620         //
621         expectedExecSize =
622             op == SendOp::LOAD_BLOCK2D || op == SendOp::STORE_BLOCK2D ? 1 :
623                 sfid == SFID::TGM ? DEFAULT_EXEC_SIZE/2 : DEFAULT_EXEC_SIZE;
624         decodeLscVecSize();
625         //
626         if (sfid == SFID::TGM)
627             extraAttrs |= MessageInfo::Attr::TYPED;
628         if (sfid == SFID::SLM)
629             extraAttrs |= MessageInfo::Attr::SLM;
630         //
631         CacheOpt l1 = CacheOpt::DEFAULT, l3 = CacheOpt::DEFAULT;
632         const auto &opInfo = lookupSendOp(op);
633         bool hasCc =
634             opInfo.isLoad() || opInfo.isStore() || opInfo.isAtomic();
635         if (sfid != SFID::SLM && hasCc) {
636             decodeLscCacheControl(op, l1, l3);
637         }
638         //
639         result.syntax.mnemonic = symbol;
640         //
641         result.syntax.controls += '.';
642         result.syntax.controls += dataTypePrefixSyntax;
643         result.syntax.controls += vectorSuffixSyntax;
644         if (!addrSizeSyntax.empty()) {
645             result.syntax.controls += '.';
646             result.syntax.controls += addrSizeSyntax;
647         }
648         if (!cacheControlSyntax.empty()) {
649             result.syntax.controls += cacheControlSyntax;
650         }
651         //
652         setScatterGatherOpX(
653             symbolFromSyntax(),
654             msgDesc,
655             op,
656             addrType,
657             surfaceId,
658             l1,
659             l3,
660             addrSizeBits,
661             dataSizeRegBits,
662             dataSizeMemBits,
663             vectorSize,
664             int(instExecSize),
665             extraAttrs);
666         if (lookupSendOp(op).hasChMask()) {
667             result.info.channelsEnabled = getDescBits(12, 4);
668             if (result.info.channelsEnabled == 0)
669                 error(12, 4, "no channels enabled on quad message");
670         }
671     }
672 
673 
setLscAtomicMessageMessageDecoderLSC674     void setLscAtomicMessage(
675         const char *doc,
676         std::string msgDesc,
677         SendOp atOp)
678     {
679         extraAttrs |=
680             getDescBits(20, 5) != 0 ?
681                 MessageInfo::Attr::ATOMIC_RETURNS : MessageInfo::Attr::NONE;
682         if (sfid == SFID::TGM)
683             extraAttrs |= MessageInfo::Attr::HAS_UVRLOD;
684         decodeLscMessage(doc, msgDesc, atOp);
685     }
686 
687 
tryDecodeLscMessageDecoderLSC688     void tryDecodeLsc() {
689         int lscOp = getDescBits(0, 6); // Opcode[5:0]
690         switch (lscOp) {
691         case LSC_LOAD:
692             decodeLscMessage(
693                 chooseDoc(nullptr, "53523", "63970"),
694                 "gathering load",
695                 SendOp::LOAD);
696             break;
697         case LSC_STORE:
698             decodeLscMessage(
699                 chooseDoc(nullptr, "53523", "63980"),
700                 "scattering store",
701                 SendOp::STORE);
702             break;
703         case LSC_STORE_UNCOMPRESSED:
704             decodeLscMessage(
705                 chooseDoc(nullptr, "53532", "63984"),
706                 "scattering store uncompressed",
707                 SendOp::STORE_UNCOMPRESSED);
708             break;
709         case LSC_STORE_UNCOMPRESSED_QUAD:
710             decodeLscMessage(
711                 chooseDoc(nullptr, "55224", "63985"),
712                 "store quad uncompressed",
713                 SendOp::STORE_UNCOMPRESSED_QUAD);
714             break;
715         case LSC_LOAD_QUAD:
716             decodeLscMessage(
717                 chooseDoc(nullptr, "53527", "63977"),
718                 "quad load (a.k.a. load_cmask)",
719                 SendOp::LOAD_QUAD);
720             break;
721         case LSC_STORE_QUAD:
722             decodeLscMessage(
723                 chooseDoc(nullptr, "53527", "63983"),
724                 "quad store (a.k.a. store_cmask)",
725                 SendOp::STORE_QUAD);
726             break;
727         case LSC_LOAD_STRIDED:
728             decodeLscMessage(
729                 chooseDoc(nullptr, "53525", "63976"),
730                 "strided load (a.k.a load_block)",
731                 SendOp::LOAD_STRIDED);
732             break;
733         case LSC_STORE_STRIDED:
734             decodeLscMessage(
735                 chooseDoc(nullptr, "53526", "63982"),
736                 "strided store (a.k.a store_block)",
737                 SendOp::STORE_STRIDED);
738             break;
739         case LSC_LOAD_BLOCK2D:
740             decodeLscMessage(
741                 chooseDoc(nullptr, "53680", "63972"),
742                 "block2d load",
743                 SendOp::LOAD_BLOCK2D);
744             break;
745         case LSC_STORE_BLOCK2D:
746             decodeLscMessage(
747                 chooseDoc(nullptr, "53530", "63981"),
748                 "block2d store",
749                 SendOp::STORE_BLOCK2D);
750             break;
751         case LSC_ATOMIC_IINC:
752             setLscAtomicMessage(
753                 chooseDoc(nullptr, "53538", "63955"),
754                 "atomic integer increment",
755                 SendOp::ATOMIC_IINC);
756             break;
757         case LSC_ATOMIC_IDEC:
758             setLscAtomicMessage(
759                 chooseDoc(nullptr, "53539", "63949"),
760                 "atomic integer decrement",
761                 SendOp::ATOMIC_IDEC);
762             break;
763         case LSC_ATOMIC_LOAD:
764             setLscAtomicMessage(
765                 chooseDoc(nullptr, "53540", "63956"),
766                 "atomic load",
767                 SendOp::ATOMIC_LOAD);
768             break;
769         case LSC_ATOMIC_STORE:
770             setLscAtomicMessage(
771                 chooseDoc(nullptr, "53541", "63960"),
772                 "atomic store",
773                 SendOp::ATOMIC_STORE);
774             break;
775         case LSC_ATOMIC_IADD:
776             setLscAtomicMessage(
777                 chooseDoc(nullptr, "53542", "63946"),
778                 "atomic integer add",
779                 SendOp::ATOMIC_IADD);
780             break;
781         case LSC_ATOMIC_ISUB:
782             setLscAtomicMessage(
783                 chooseDoc(nullptr, "53543", "63961"),
784                 "atomic integer subtract",
785                 SendOp::ATOMIC_ISUB);
786             break;
787         case LSC_ATOMIC_SMIN:
788             setLscAtomicMessage(
789                 chooseDoc(nullptr, "53544", "63958"),
790                 "atomic signed-integer minimum",
791                 SendOp::ATOMIC_SMIN);
792             break;
793         case LSC_ATOMIC_SMAX:
794             setLscAtomicMessage(
795                 chooseDoc(nullptr, "53545", "63957"),
796                 "atomic signed-integer maximum",
797                 SendOp::ATOMIC_SMAX);
798             break;
799         case LSC_ATOMIC_UMIN:
800             setLscAtomicMessage(
801                 chooseDoc(nullptr, "53546", "63963"),
802                 "atomic unsigned-integer minimum",
803                 SendOp::ATOMIC_UMIN);
804             break;
805         case LSC_ATOMIC_UMAX:
806             setLscAtomicMessage(
807                 chooseDoc(nullptr, "53547", "63962"),
808                 "atomic unsigned-integer maximum",
809                 SendOp::ATOMIC_UMAX);
810             break;
811         case LSC_ATOMIC_ICAS:
812             setLscAtomicMessage(
813                 chooseDoc(nullptr, "53555", "63948"),
814                 "atomic integer compare and swap",
815                 SendOp::ATOMIC_ICAS);
816             break;
817         case LSC_ATOMIC_FADD:
818             setLscAtomicMessage(
819                 chooseDoc(nullptr, "53548", "63950"),
820                 "atomic float add",
821                 SendOp::ATOMIC_FADD);
822             break;
823         case LSC_ATOMIC_FSUB:
824             setLscAtomicMessage(
825                 chooseDoc(nullptr, "53549", "63954"),
826                 "atomic float subtract",
827                 SendOp::ATOMIC_FSUB);
828             break;
829         case LSC_ATOMIC_FMIN:
830             setLscAtomicMessage(
831                 chooseDoc(nullptr, "53550", "63953"),
832                 "atomic float minimum",
833                 SendOp::ATOMIC_FMIN);
834             break;
835         case LSC_ATOMIC_FMAX:
836             setLscAtomicMessage(
837                 chooseDoc(nullptr, "53551", "63952"),
838                 "atomic float maximum",
839                 SendOp::ATOMIC_FMAX);
840             break;
841         case LSC_ATOMIC_FCAS:
842             setLscAtomicMessage(
843                 chooseDoc(nullptr, "53556", "63951"),
844                 "atomic float compare and swap",
845                 SendOp::ATOMIC_FCAS);
846             break;
847         case LSC_ATOMIC_AND:
848             setLscAtomicMessage(
849                 chooseDoc(nullptr, "53552", "63947"),
850                 "atomic logical and",
851                 SendOp::ATOMIC_AND);
852             break;
853         case LSC_ATOMIC_OR:
854             setLscAtomicMessage(
855                 chooseDoc(nullptr, "53553", "63959"),
856                 "atomic logical or",
857                 SendOp::ATOMIC_OR);
858             break;
859         case LSC_ATOMIC_XOR:
860             setLscAtomicMessage(
861                 chooseDoc(nullptr, "53554", "63964"),
862                 "atomic logical xor",
863                 SendOp::ATOMIC_XOR);
864             break;
865         case LSC_CCS:
866             decodeLscCcs();
867             break;
868         case LSC_RSI: {
869             addField("Opcode", 0, 6, getDescBits(0, 6), "read_state");
870             setDoc(nullptr, "54000", "63979");
871             //
872             std::stringstream descs;
873             descs << "read state information";
874             result.syntax.mnemonic = "read_state";
875             //
876             SendDesc surfId = 0;
877             auto at = decodeLscAddrType(surfId, false);
878             //
879             // XeHPG returns 2 GRF, XeHPC+ only 1
880             // #54152
881             int rlen = platform() == Platform::XE_HPG ? 2 : 1;
882             setSpecialOpX(
883                 result.syntax.mnemonic,
884                 descs.str(),
885                 SendOp::READ_STATE,
886                 at,
887                 surfId,
888                 1, // mlen = 1 (U,V,R,LOD)
889                 rlen);
890             result.info.addrSizeBits = 64;
891             result.info.execWidth = 1;
892             result.info.attributeSet |= MessageInfo::Attr::HAS_UVRLOD;
893             result.info.attributeSet |= MessageInfo::Attr::TRANSPOSED;
894             break;
895         }
896         case LSC_FENCE:
897             decodeLscFence();
898             break;
899         case LSC_LOAD_STATUS:
900             if (getDescBit(15)) {
901                 error(15, 1, "transpose forbidden on load_status");
902             }
903             if (getDescBits(20, 5) != 1) {
904                 error(20, 5, "load_status must have rlen (Desc[24:20] == 1)");
905             }
906             decodeLscMessage(
907                 chooseDoc(nullptr, "53531", "63978"),
908                 "load status",
909                 SendOp::LOAD_STATUS);
910             break;
911         default:
912             addField("Opcode", 0, 6,
913                 getDescBits(0, 6), "invalid message opcode");
914             error(0, 6, "unsupported message opcode");
915             return;
916         }
917     }
918 
decodeLscCcsMessageDecoderLSC919     void decodeLscCcs() {
920         addField("Opcode", 0, 6,
921             static_cast<uint32_t>(LSC_CCS),
922             "compression-state control");
923         //
924         std::stringstream descs;
925         result.syntax.mnemonic = "ccs";
926         descs << "compression-state control";
927         auto ccsOpBits = getDescBits(17, 3);
928         SendOp sop = SendOp::INVALID;
929         std::string opDesc;
930         switch (ccsOpBits) {
931         case 0:
932             sop = SendOp::CCS_PC;
933             result.syntax.mnemonic += "_pc";
934             opDesc = " page clear (64k)";
935             setDoc(nullptr, "53536", "63965");
936             break;
937         case 1:
938             sop = SendOp::CCS_SC;
939             result.syntax.mnemonic += "_sc";
940             opDesc = " sector clear (2-cachelines)";
941             setDoc(nullptr, "53534", "63967");
942             result.syntax.controls += vectorSuffixSyntax;
943             break;
944         case 2:
945             sop = SendOp::CCS_PU;
946             result.syntax.mnemonic += "_pu";
947             opDesc = " page uncompress (64k)";
948             setDoc(nullptr, "53537", "63966");
949             break;
950         case 3:
951             sop = SendOp::CCS_SU;
952             result.syntax.mnemonic += "_su";
953             opDesc = " sector uncompress (2-cachelines)";
954             setDoc(nullptr, "53535", "63968");
955             result.syntax.controls += vectorSuffixSyntax;
956             break;
957         default: {
958             std::stringstream ss;
959             ss << ".0x" << std::hex << std::uppercase << ccsOpBits;
960             result.syntax.controls += ss.str();
961             opDesc = "invalid ccs sop";
962             error(17, 3, "invalid ccs sop");
963         }
964         } // switch
965         descs << opDesc;
966         //
967         addField("CcsOp", 17, 3, ccsOpBits, opDesc);
968         //
969         SendDesc surfId = 0;
970         auto at = decodeLscAddrType(surfId);
971         if (ccsOpBits == 0 || ccsOpBits == 2) {
972             // page operations: pc, pu
973             if (at != AddrType::FLAT)
974                 error(29, 2, "ccs_{pcc,pcu} requires FLAT address type");
975             std::stringstream dummy;
976             decodeLscAddrSize();
977             if (addrSizeBits != 64)
978                 error(7, 2, "AddrSize must be A64");
979             result.info.execWidth = 1;
980             expectedExecSize = 1;
981             // sector uncompress has addresses
982             // FIXME: I could derive this via exec size and a64
983             int mlen =
984                 ccsOpBits == 1 || ccsOpBits == 3 ?
985                     4 : // A64_PAYLOAD_SIMT32 = 4 regs
986                     1;  // A64_PAYLOAD_SIMT1  = 1 reg
987             int rlen = 0;  // always 0
988             setSpecialOpX(
989                 symbolFromSyntax(),
990                 descs.str(),
991                 sop,
992                 at,
993                 surfId,
994                 mlen,
995                 rlen);
996         } else {
997             // sector operations
998             ///
999             // these are vector messages
1000             expectedExecSize = DEFAULT_EXEC_SIZE;
1001             // const int SECTOR_SIZE_BITS = 128*8;
1002             // result.syntax.controls += ".d1024";
1003             result.syntax.controls += vectorSuffixSyntax;
1004             result.syntax.controls +=
1005                 addrSizeBits == 64 ? ".a64" : ".a32";
1006             //
1007             setScatterGatherOp(
1008                 symbolFromSyntax(),
1009                 descs.str(),
1010                 sop,
1011                 at,
1012                 surfId,
1013                 addrSizeBits,
1014                 0, // dateSize = 0; nothing returned
1015                 vectorSize,
1016                 DEFAULT_EXEC_SIZE,
1017                 extraAttrs);
1018         }
1019     }
1020 
decodeLscFenceMessageDecoderLSC1021     void decodeLscFence() {
1022         addField("Opcode", 0, 6, getDescBits(0, 6), "fence");
1023         setDoc(nullptr, "53533", "63969");
1024         //
1025         std::stringstream descs;
1026         result.syntax.mnemonic = "fence";
1027         descs << "fence";
1028         //
1029         std::stringstream fenceOpts;
1030         addLscFenceFields(fenceOpts, descs);
1031         result.syntax.controls += fenceOpts.str();
1032         //
1033         setSpecialOpX(
1034             symbolFromSyntax(),
1035             descs.str(),
1036             SendOp::FENCE,
1037             AddrType::FLAT,
1038             0, // no surface
1039             1, // mlen = 1
1040             0); // rlen = 0
1041     }
1042 }; // MessageDecoderLSC
1043 
1044 
decodeDescriptorsLSC(Platform platform,SFID sfid,ExecSize execSize,SendDesc exDesc,SendDesc desc,DecodeResult & result)1045 void iga::decodeDescriptorsLSC(
1046     Platform platform, SFID sfid, ExecSize execSize,
1047     SendDesc exDesc, SendDesc desc,
1048     DecodeResult &result)
1049 {
1050     MessageDecoderLSC md(
1051         platform, sfid, execSize,
1052         exDesc, desc,
1053         result);
1054     md.tryDecodeLsc();
1055 }
1056 
1057 // descriptor bits [19:17]: cache control
encLdStVecCachingBits17_19(SendOp op,CacheOpt cachingL1,CacheOpt cachingL3,SendDesc & desc)1058 static bool encLdStVecCachingBits17_19(
1059     SendOp op,
1060     CacheOpt cachingL1, CacheOpt cachingL3,
1061     SendDesc &desc)
1062 {
1063     const auto &opInfo = lookupSendOp(op);
1064     bool isLd = opInfo.isLoad();
1065     bool isSt = opInfo.isStore();
1066     bool isAt = opInfo.isAtomic();
1067     bool isStAt = isSt || isAt;
1068     auto ccMatches = [&](CacheOpt l1, CacheOpt l3, uint32_t enc) {
1069         if (l1 == cachingL1 && l3 == cachingL3) {
1070             desc.imm |= enc << 17;
1071             return true;
1072         }
1073         return false;
1074     };
1075     bool matched =
1076         ccMatches(CacheOpt::DEFAULT, CacheOpt::DEFAULT, LSC_DF_DF) ||
1077         //
1078         ccMatches(CacheOpt::UNCACHED, CacheOpt::UNCACHED, LSC_UC_UC) ||
1079         //
1080         (isLd &&
1081             ccMatches(CacheOpt::UNCACHED, CacheOpt::CACHED, LSC_UC_CA)) ||
1082         (isStAt &&
1083             ccMatches(CacheOpt::UNCACHED, CacheOpt::WRITEBACK, LSC_UC_WB)) ||
1084         //
1085         (isLd &&
1086             ccMatches(CacheOpt::CACHED, CacheOpt::UNCACHED, LSC_CA_UC)) ||
1087         (isSt &&
1088             ccMatches(
1089                 CacheOpt::WRITETHROUGH, CacheOpt::UNCACHED, LSC_WT_UC)) ||
1090         //
1091         (isLd &&
1092             ccMatches(CacheOpt::CACHED, CacheOpt::CACHED, LSC_CA_CA)) ||
1093         (isSt &&
1094             ccMatches(
1095                 CacheOpt::WRITETHROUGH, CacheOpt::WRITEBACK, LSC_WT_WB)) ||
1096         //
1097         ccMatches(CacheOpt::STREAMING, CacheOpt::UNCACHED, LSC_ST_UC) ||
1098         //
1099         (isLd &&
1100             ccMatches(CacheOpt::STREAMING, CacheOpt::CACHED, LSC_ST_CA)) ||
1101         (isSt &&
1102             ccMatches(CacheOpt::STREAMING, CacheOpt::WRITEBACK, LSC_ST_WB)) ||
1103         //
1104         (isLd &&
1105             ccMatches(
1106                 CacheOpt::READINVALIDATE, CacheOpt::CACHED, LSC_RI_CA)) ||
1107         (isSt &&
1108             ccMatches(CacheOpt::WRITEBACK, CacheOpt::WRITEBACK, LSC_WB_WB));
1109     return matched;
1110 }
1111 
1112 
encLdStVecCaching(const Platform & p,SendOp op,CacheOpt cachingL1,CacheOpt cachingL3,SendDesc & desc)1113 static bool encLdStVecCaching(
1114     const Platform& p,
1115     SendOp op,
1116     CacheOpt cachingL1, CacheOpt cachingL3,
1117     SendDesc &desc)
1118 {
1119 
1120     return encLdStVecCachingBits17_19(op, cachingL1, cachingL3, desc);
1121 }
1122 
encLdStVec(Platform p,const VectorMessageArgs & vma,SendDesc & exDesc,SendDesc & desc,std::string & err)1123 static bool encLdStVec(
1124     Platform p,
1125     const VectorMessageArgs &vma,
1126     SendDesc &exDesc,
1127     SendDesc &desc,
1128     std::string &err)
1129 {
1130     desc = 0x0;
1131     exDesc = 0x0;
1132     //
1133     bool hasCMask = false;
1134     switch (vma.op) {
1135     case SendOp::LOAD:            desc.imm |= LSC_LOAD; break;
1136     case SendOp::LOAD_STRIDED:    desc.imm |= LSC_LOAD_STRIDED; break;
1137     case SendOp::LOAD_QUAD:
1138         desc.imm |= LSC_LOAD_QUAD;
1139         hasCMask = true;
1140         break;
1141     case SendOp::LOAD_BLOCK2D:    desc.imm |= LSC_LOAD_BLOCK2D; break;
1142     //
1143     case SendOp::STORE:           desc.imm |= LSC_STORE; break;
1144     case SendOp::STORE_STRIDED:   desc.imm |= LSC_STORE_STRIDED; break;
1145     case SendOp::STORE_QUAD:
1146         desc.imm |= LSC_STORE_QUAD;
1147         hasCMask = true;
1148         break;
1149     case SendOp::STORE_UNCOMPRESSED:
1150         desc.imm |= LSC_STORE_UNCOMPRESSED;
1151         break;
1152     case SendOp::STORE_UNCOMPRESSED_QUAD:
1153         desc.imm |= LSC_STORE_UNCOMPRESSED_QUAD;
1154         hasCMask = true;
1155         break;
1156     case SendOp::STORE_BLOCK2D:   desc.imm |= LSC_STORE_BLOCK2D; break;
1157     //
1158     case SendOp::ATOMIC_AND:      desc.imm |= LSC_ATOMIC_AND;   break;
1159     case SendOp::ATOMIC_FADD:     desc.imm |= LSC_ATOMIC_FADD;  break;
1160     case SendOp::ATOMIC_FCAS:     desc.imm |= LSC_ATOMIC_FCAS;  break;
1161     case SendOp::ATOMIC_FMAX:     desc.imm |= LSC_ATOMIC_FMAX;  break;
1162     case SendOp::ATOMIC_FMIN:     desc.imm |= LSC_ATOMIC_FMIN;  break;
1163     case SendOp::ATOMIC_FSUB:     desc.imm |= LSC_ATOMIC_FSUB;  break;
1164     case SendOp::ATOMIC_IADD:     desc.imm |= LSC_ATOMIC_IADD;  break;
1165     case SendOp::ATOMIC_ICAS:     desc.imm |= LSC_ATOMIC_ICAS;  break;
1166     case SendOp::ATOMIC_IDEC:     desc.imm |= LSC_ATOMIC_IDEC;  break;
1167     case SendOp::ATOMIC_IINC:     desc.imm |= LSC_ATOMIC_IINC;  break;
1168     case SendOp::ATOMIC_ISUB:     desc.imm |= LSC_ATOMIC_ISUB;  break;
1169     case SendOp::ATOMIC_LOAD:     desc.imm |= LSC_ATOMIC_LOAD;  break;
1170     case SendOp::ATOMIC_OR:       desc.imm |= LSC_ATOMIC_OR;    break;
1171     case SendOp::ATOMIC_SMAX:     desc.imm |= LSC_ATOMIC_SMAX;  break;
1172     case SendOp::ATOMIC_SMIN:     desc.imm |= LSC_ATOMIC_SMIN;  break;
1173     case SendOp::ATOMIC_STORE:    desc.imm |= LSC_ATOMIC_STORE; break;
1174     case SendOp::ATOMIC_UMAX:     desc.imm |= LSC_ATOMIC_UMAX;  break;
1175     case SendOp::ATOMIC_UMIN:     desc.imm |= LSC_ATOMIC_UMIN;  break;
1176     case SendOp::ATOMIC_XOR:      desc.imm |= LSC_ATOMIC_XOR;   break;
1177     default:
1178         err = "unsupported op";
1179         return false;
1180     }
1181     bool isBlock2d =
1182         vma.op == SendOp::LOAD_BLOCK2D || vma.op == SendOp::STORE_BLOCK2D;
1183     bool isBlock2dTyped = isBlock2d && vma.sfid == SFID::TGM;
1184     bool isBlock2dUntyped = isBlock2d && vma.sfid != SFID::TGM;
1185     bool hasAddrSizeField = !isBlock2d;
1186 
1187     //
1188     ////////////////////////////////////////
1189     // data size
1190     uint32_t dszEnc = LSC_D8;
1191     if (isBlock2dTyped &&
1192         (vma.dataSizeReg != 32 || vma.dataSizeMem != 32))
1193     {
1194         err = "block2d.tgm must be d32";
1195         return false;
1196     }
1197     if (vma.dataSizeMem == vma.dataSizeReg) {
1198         switch (vma.dataSizeMem) {
1199         case  8: dszEnc = LSC_D8; break;
1200         case 16: dszEnc = LSC_D16; break;
1201         case 32: dszEnc = LSC_D32; break;
1202         case 64: dszEnc = LSC_D64; break;
1203         default: err = "invalid data size"; return false;
1204         }
1205     } else if (vma.dataSizeMem == 8 && vma.dataSizeReg == 32) {
1206         dszEnc = LSC_D8U32;
1207     } else if (vma.dataSizeMem == 16 && vma.dataSizeReg == 32) {
1208         if (vma.dataSizeExpandHigh) {
1209             dszEnc = LSC_D16U32H;
1210         } else {
1211             dszEnc = LSC_D16U32;
1212         }
1213     } else {
1214         err = "invalid data type";
1215         return false;
1216     }
1217     if (!isBlock2dTyped)
1218         desc.imm |= dszEnc << 9;
1219     //
1220     ////////////////////////////////////////
1221     // vector size
1222     if (hasCMask) {
1223         if (vma.dataComponentMask & ~0xF) {
1224             err = "invalid component mask";
1225             return false;
1226         }
1227         desc.imm |= vma.dataComponentMask << 12;
1228     } else if (isBlock2d) {
1229         if (isBlock2dTyped && vma.dataVnni) {
1230             err = "block2d.tgm forbids VNNI";
1231             return false;
1232         } else if (isBlock2dTyped && vma.dataTranspose) {
1233             err = "block2d.tgm forbids transpose data order";
1234             return false;
1235         }
1236         if (vma.dataVnni)
1237             desc.imm |= 1 << 7;
1238         if (vma.dataTranspose)
1239             desc.imm |= 1 << 15;
1240     } else {
1241         uint32_t vecEnc = LSC_V1;
1242         switch (vma.dataVectorSize) {
1243         case  1: vecEnc = LSC_V1; break;
1244         case  2: vecEnc = LSC_V2; break;
1245         case  3: vecEnc = LSC_V3; break;
1246         case  4: vecEnc = LSC_V4; break;
1247         case  8: vecEnc = LSC_V8; break;
1248         case 16: vecEnc = LSC_V16; break;
1249         case 32: vecEnc = LSC_V32; break;
1250         case 64: vecEnc = LSC_V64; break;
1251         default: err = "invalid vector size"; break;
1252         }
1253         if (vma.isAtomic() && vma.dataVectorSize != 1) {
1254             err = "atomics do not support vector operations";
1255             return false;
1256         }
1257         if (vma.dataVnni) {
1258             err = "vnni only valid on block2d operations";
1259             return false;
1260         }
1261         //
1262         desc.imm |= vecEnc << 12;
1263         //
1264         if (vma.dataTranspose) {
1265             desc.imm |= 1 << 15;
1266             //
1267             if (vma.isAtomic()) {
1268                 err = "atomics do not support transpose operations";
1269                 return false;
1270             }
1271         }
1272     } // end vec non-cmask case
1273     //
1274     ////////////////////////////////////////
1275     // caching options
1276     if (vma.isAtomic() &&
1277         vma.cachingL1 != CacheOpt::DEFAULT &&
1278         vma.cachingL1 != CacheOpt::UNCACHED)
1279     {
1280         err = "atomic L1 must be an uncached option";
1281         return false;
1282     } else {
1283         if (!encLdStVecCaching(p, vma.op, vma.cachingL1, vma.cachingL3, desc)) {
1284             err = "invalid cache-control combination";
1285             return false;
1286         }
1287     }
1288     //
1289     ////////////////////////////////////////
1290     // address size
1291     uint32_t asEnc = 0x0;
1292     switch (vma.addrSize) {
1293     case 16: asEnc = LSC_A16; break;
1294     case 32: asEnc = LSC_A32; break;
1295     case 64: asEnc = LSC_A64; break;
1296     default:
1297         err = "unsupported address size";
1298         return false;
1299     }
1300     if (isBlock2dTyped && vma.addrSize != 32) {
1301         err = "block2d.typed address size must be A32";
1302         return false;
1303     }
1304     if (isBlock2dUntyped && vma.addrSize != 64) {
1305         err = "block2d untyped address size must be A64";
1306         return false;
1307     }
1308     if (hasAddrSizeField) {
1309         desc.imm |= asEnc << 7;
1310     }
1311     //
1312     ////////////////////////////////////////
1313     // address type
1314     uint32_t atEnc = 0x0;
1315     switch (vma.addrType) {
1316     case AddrType::FLAT: atEnc = LSC_AT_FLAT; break;
1317     case AddrType::BSS:  atEnc = LSC_AT_BSS; break;
1318     case AddrType::SS:   atEnc = LSC_AT_SS; break;
1319     case AddrType::BTI:  atEnc = LSC_AT_BTI; break;
1320     default:
1321         err = "unsupported address type";
1322         return false;
1323     }
1324     if (isBlock2dTyped && vma.addrType == AddrType::FLAT) {
1325         err = "block2d.typed forbids flat address";
1326         return false;
1327     }
1328     desc.imm |= atEnc << 29;
1329     //
1330     // store the surface
1331     if (vma.addrType != AddrType::FLAT) {
1332         // use exDesc
1333         if (vma.addrType == AddrType::BTI && !vma.addrSurface.isReg()) {
1334             exDesc = vma.addrSurface.imm << 24;
1335         } else {
1336             exDesc = vma.addrSurface;
1337         }
1338     }
1339     //
1340     if (vma.addrType != AddrType::FLAT && vma.sfid == SFID::SLM) {
1341         err = "SLM requires flat address type";
1342         return false;
1343     }
1344     ////////////////////////////////////////
1345     // address scale factor
1346     if (vma.addrScale != 1) {
1347         if (true) { // disable if address scaling is ever added
1348             err = "address scaling not supported on this platform";
1349             return false;
1350         }
1351         int vlen = vma.elementsPerAddress();
1352         int bytesPerElem = vma.dataSizeMem * vlen / 8;
1353         uint32_t addrScEnc = LSC_SCALE_NONE;
1354         if (vma.addrScale > 32) {
1355             err = "scale value is too large";
1356             return false;
1357         } else if (vma.addrScale == bytesPerElem) {
1358             addrScEnc = LSC_SCALE_1X;
1359         } else if (vma.addrScale == 2*bytesPerElem) {
1360             addrScEnc = LSC_SCALE_2X;
1361         } else if (vma.addrScale == 4*bytesPerElem) {
1362             addrScEnc = LSC_SCALE_4X;
1363         } else {
1364             std::stringstream ss;
1365             ss <<
1366                 "invalid scaling factor (must be " <<
1367                 1*bytesPerElem << ", " <<
1368                 2*bytesPerElem << ", or " <<
1369                 4*bytesPerElem << ")";
1370             err = ss.str();
1371             return false;
1372         }
1373         desc.imm |= addrScEnc << 22;
1374     }
1375     //
1376     ////////////////////////////////////////
1377     // address immediate offset
1378     bool hasAddrImmOffset = vma.addrOffset != 0;
1379     hasAddrImmOffset |= vma.addrOffsetX != 0;
1380     hasAddrImmOffset |= vma.addrOffsetY != 0;
1381     if (hasAddrImmOffset) {
1382         bool platformSupportsAddrOff = false;
1383         if (platformSupportsAddrOff) {
1384             err = "address immediate offset not supported on this platform";
1385             return false;
1386         }
1387 
1388     } // else: addrOffset == 0
1389 
1390     ////////////////////////////////////////
1391     // set the surface object
1392     if (vma.addrType == AddrType::FLAT) {
1393         // IR normalization
1394         if (!vma.addrSurface.isImm() || vma.addrSurface.imm != 0) {
1395             err = "malformed IR: flat address model must have surface = 0";
1396             return false;
1397         }
1398     }
1399 
1400     // XeHPG+ have surface in ExDesc
1401     if (vma.addrType == AddrType::BTI && vma.addrSurface.isImm()) {
1402         // BTI takes the high byte
1403         if (vma.addrSurface.imm > 0xFF) {
1404             err = "surface index too large for BTI";
1405             return false;
1406         }
1407         exDesc.imm |= vma.addrSurface.imm << 24;
1408     } else if (vma.addrType != AddrType::FLAT) {
1409         uint32_t ZERO_MASK = 0xFFF;
1410         std::string highBit = "11";
1411 
1412         // if BTI reg or BSS/SS reg/imm with just copy
1413         // BSS/SS with imm, value is already aligned
1414         if (vma.addrType != AddrType::BTI &&
1415             vma.addrSurface.isImm() &&
1416             (vma.addrSurface.imm & ZERO_MASK) != 0)
1417         {
1418             err = "BSS/SS with immediate descriptor require "
1419                 "ExDesc[" + highBit + ":0] to be 0";
1420             return false;
1421         }
1422         exDesc = vma.addrSurface;
1423     }
1424     //
1425     return true;
1426 }
1427 
1428 
encodeDescriptorsLSC(Platform p,const VectorMessageArgs & vma,SendDesc & exDesc,SendDesc & desc,std::string & err)1429 bool iga::encodeDescriptorsLSC(
1430     Platform p,
1431     const VectorMessageArgs &vma,
1432     SendDesc &exDesc,
1433     SendDesc &desc,
1434     std::string &err)
1435 {
1436     if (!sendOpSupportsSyntax(p, vma.op, vma.sfid)) {
1437         err = "unsupported message for SFID";
1438         return false;
1439     }
1440     return encLdStVec(p, vma,
1441         exDesc, desc, err);
1442 }
1443