1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "IGC/common/StringMacros.hpp"
10 #include "G4_IR.hpp"
11 #include "BuildIR.h"
12 
13 #include <sstream>
14 #include <iomanip>
15 
16 using namespace vISA;
17 
18 
19 ///////////////////////////////////////////////////////////////////////////////
20 // LdSt data type support
ToSymbol(LdStOp op)21 std::string vISA::ToSymbol(LdStOp op)
22 {
23     switch (op) {
24     case LdStOp::LOAD:         return "load";
25     case LdStOp::LOAD_QUAD:    return "load_quad";
26     case LdStOp::LOAD_STRIDED: return "load_strided";
27     case LdStOp::LOAD_BLOCK2D: return "load_block2d";
28     case LdStOp::STORE:         return "store";
29     case LdStOp::STORE_QUAD:    return "store_quad";
30     case LdStOp::STORE_STRIDED: return "store_strided";
31     case LdStOp::STORE_BLOCK2D: return "store_block2d";
32     // general atomics
33     case LdStOp::ATOMIC_LOAD:   return "atomic_load";
34     case LdStOp::ATOMIC_STORE:  return "atomic_store";
35     // floating point
36     case LdStOp::ATOMIC_FADD:   return "atomic_fadd";
37     case LdStOp::ATOMIC_FSUB:   return "atomic_fsub";
38     case LdStOp::ATOMIC_FMIN:   return "atomic_fmin";
39     case LdStOp::ATOMIC_FMAX:   return "atomic_fmax";
40     case LdStOp::ATOMIC_FCAS:   return "atomic_fcas";
41     // integer
42     case LdStOp::ATOMIC_IINC:   return "atomic_iinc";
43     case LdStOp::ATOMIC_IDEC:   return "atomic_idec";
44     case LdStOp::ATOMIC_IADD:   return "atomic_iadd";
45     case LdStOp::ATOMIC_ISUB:   return "atomic_isub";
46     case LdStOp::ATOMIC_ICAS:   return "atomic_icas";
47     case LdStOp::ATOMIC_SMIN:   return "atomic_smin";
48     case LdStOp::ATOMIC_SMAX:   return "atomic_smax";
49     case LdStOp::ATOMIC_UMIN:   return "atomic_umin";
50     case LdStOp::ATOMIC_UMAX:   return "atomic_umax";
51     // integer bitwise
52     case LdStOp::ATOMIC_AND:    return "atomic_and";
53     case LdStOp::ATOMIC_XOR:    return "atomic_xor";
54     case LdStOp::ATOMIC_OR:     return "atomic_or";
55     default:
56         break;
57     }
58     return "???";
59 }
60 
ToSymbol(Caching c)61 std::string vISA::ToSymbol(Caching c)
62 {
63     switch (c)
64     {
65     case Caching::CA: return ".ca";
66     case Caching::DF: return ".df";
67     case Caching::RI: return ".ri";
68     case Caching::ST: return ".st";
69     case Caching::WB: return ".wb";
70     case Caching::WT: return ".wt";
71     case Caching::UC: return ".uc";
72     default: return "?";
73     }
74 }
75 
ToSymbol(Caching l1,Caching l3)76 std::string vISA::ToSymbol(Caching l1, Caching l3)
77 {
78     if (l1 == Caching::DF && l3 == Caching::DF)
79         return "";
80     else
81         return ToSymbol(l1) + ToSymbol(l3);
82 }
83 
getCount() const84 int ElemsPerAddr::getCount() const
85 {
86     if (!isChannelMask())
87         return count;
88     return
89         ((int(channels) & int(Chs::X)) ? 1 : 0) +
90         ((int(channels) & int(Chs::Y)) ? 1 : 0) +
91         ((int(channels) & int(Chs::Z)) ? 1 : 0) +
92         ((int(channels) & int(Chs::W)) ? 1 : 0);
93 }
94 
getMask() const95 ElemsPerAddr::Chs ElemsPerAddr::getMask() const
96 {
97     MUST_BE_TRUE(isChannelMask(), "must be a channel mask vector");
98     return channels;
99 }
100 
str() const101 std::string ElemsPerAddr::str() const
102 {
103     if (isChannelMask()) {
104         if (channels == Chs::INVALID)
105             return ".?";
106         // e.g. .xyz
107         std::string s = ".";
108         if (int(channels) & int(Chs::X))
109             s += 'x';
110         if (int(channels) & int(Chs::Y))
111             s += 'y';
112         if (int(channels) & int(Chs::Z))
113             s += 'z';
114         if (int(channels) & int(Chs::W))
115             s += 'w';
116         return s;
117     } else {
118         // e.g. x4 (note absence of a dot)
119         return "x" + std::to_string(count);
120     }
121 }
122 
123 ///////////////////////////////////////////////////////////////////////////////
124 // G4_SendDesc implementations
125 ///////////////////////////////////////////////////////////////////////////////
roundUpToGrf(int bytes)126 static inline int roundUpToGrf(int bytes) {
127     return g4::alignUp((int)getGRFSize(), bytes) / (int)getGRFSize();
128 }
129 
getSrc0LenRegs() const130 size_t G4_SendDesc::getSrc0LenRegs() const {
131     return roundUpToGrf(getSrc0LenBytes());
132 }
133 
getDstLenRegs() const134 size_t G4_SendDesc::getDstLenRegs() const {
135     return roundUpToGrf(getDstLenBytes());
136 }
137 
getSrc1LenRegs() const138 size_t G4_SendDesc::getSrc1LenRegs() const {
139     return roundUpToGrf(getSrc1LenBytes());
140 }
141 
isHDC() const142 bool G4_SendDesc::isHDC() const
143 {
144     auto funcID = getSFID();
145     return
146         funcID == SFID::DP_DC0 ||
147         funcID == SFID::DP_DC1 ||
148         funcID == SFID::DP_DC2 ||
149         funcID == SFID::DP_CC;
150 }
151 
isLSC() const152 bool G4_SendDesc::isLSC() const
153 {
154     switch (getSFID()) {
155     case SFID::UGM:
156     case SFID::UGML:
157     case SFID::TGM:
158     case SFID::SLM:
159         return true;
160     default:
161         break;
162     }
163     return false;
164 }
165 
166 
167 
168 ///////////////////////////////////////////////////////////////////////////////
169 // G4_SendDescLdSt implementations
170 ///////////////////////////////////////////////////////////////////////////////
G4_SendDescLdSt(SFID sfid,LdStOp _op,G4_ExecSize _execSize,AddrType at,int _addrBits,int _addrDims,int elemBitsMem,int elemBitsReg,int elemsPerAddr,LdStOrder _order,Caching _l1,Caching _l3,G4_Operand * surf,ImmOff _immOff,LdStAttrs _attrs)171 G4_SendDescLdSt::G4_SendDescLdSt(
172     SFID sfid,
173     LdStOp _op,
174     G4_ExecSize _execSize,
175     //
176     // addr params
177     AddrType at, int _addrBits, int _addrDims,
178     //
179     // data params
180     int elemBitsMem, int elemBitsReg, int elemsPerAddr,
181     LdStOrder _order,
182     //
183     // ext info
184     Caching _l1, Caching _l3,
185     G4_Operand *surf,
186     ImmOff _immOff,
187     LdStAttrs _attrs)
188     : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid, _execSize),
189     op(_op),
190     //
191     addrType(at), addrBits(_addrBits), addrDims(_addrDims),
192     //
193     elemBitsMem(elemBitsMem), elemBitsReg(elemBitsReg),
194     elemPerAddr(elemsPerAddr), order(_order),
195     l1(_l1), l3(_l3),
196     surface(surf), immOff(_immOff),
197     attrs(_attrs)
198 {
199 }
200 
toExecSlots(const G4_SendDescLdSt & d)201 static size_t toExecSlots(const G4_SendDescLdSt &d)
202 {
203     int minExecSize = 8;
204     if (getGenxPlatform() >= TARGET_PLATFORM::GENX_PVC)
205         minExecSize = 16;
206     MUST_BE_TRUE(false, "TODO: needs to deal with half size LSC messages");
207     MUST_BE_TRUE(false, "TODO: need to deal with varying typed message sizes");
208     // e.g. deal with
209     //   SIMD4 typed ...
210     //   SIMD4 untyped...
211     // (or we make the descriptor creator just pass the right exec size in)
212     int execSlots = std::max((int)d.getExecSize(), minExecSize);
213     return (size_t)execSlots;
214 }
215 
getSrc0LenBytes() const216 size_t G4_SendDescLdSt::getSrc0LenBytes() const
217 {
218     if (overrideSrc0LengthBytesValue >= 0) {
219         return (size_t)overrideSrc0LengthBytesValue;
220     }
221     switch (op) {
222     case LdStOp::LOAD_STRIDED:
223     case LdStOp::STORE_STRIDED:
224         return 8 + 4;  // address field is 64b (even for A32) + pitch is 32b
225     case LdStOp::LOAD_BLOCK2D:
226     case LdStOp::STORE_BLOCK2D:
227         // [243:0] ~ 256b = 32B
228         return 32;
229     default:
230         break; // fallthrough to other logic
231     }
232     if (order == LdStOrder::SCALAR) {
233         // transpose messages send one address only
234         return elemPerAddr / 8;
235     } else {
236         MUST_BE_TRUE(false, "TODO: needs to deal with half HDC gunk");
237         MUST_BE_TRUE(false, "TODO: needs to deal with addrDims");
238         int execSlots = std::max((int)execSize, 16);
239         return (size_t)(execSlots * elemPerAddr / 8);
240     }
241 }
242 
getSrc1LenBytes() const243 size_t G4_SendDescLdSt::getSrc1LenBytes() const
244 {
245     if (overrideSrc1LengthBytesValue >= 0) {
246         return (size_t)overrideSrc1LengthBytesValue;
247     }
248     if (order == LdStOrder::SCALAR) {
249         // transpose messages send one address only
250         return elemPerAddr / 8;
251     } else {
252         return toExecSlots(*this) * elemBitsReg;
253     }
254     MUST_BE_TRUE(false, "TODO: compute data bytes sent");
255     return (size_t)-1;
256 }
257 
getDstLenBytes() const258 size_t G4_SendDescLdSt::getDstLenBytes() const
259 {
260     if (overrideDstLengthBytesValue >= 0) {
261         return (size_t)overrideDstLengthBytesValue;
262     }
263     MUST_BE_TRUE(false, "TODO: compute bytes received");
264     return (size_t)-1;
265 }
setCaching(Caching _l1,Caching _l3)266 void G4_SendDescLdSt::setCaching(Caching _l1, Caching _l3)
267 {
268     l1 = _l1;
269     l3 = _l3;
270 }
isSLM() const271 bool G4_SendDescLdSt::isSLM() const
272 {
273     if (getSFID() == SFID::SLM)
274         return true;
275     MUST_BE_TRUE(!isHDC(), "HDC SLM not supported (yet)");
276     return false;
277 }
278 
getAccessType() const279 SendAccess G4_SendDescLdSt::getAccessType() const
280 {
281     if ((int(op) & int(LDST_LOAD_GROUP)) != 0)
282         return SendAccess::READ_ONLY;
283     else if ((int(op) & int(LDST_STORE_GROUP)) != 0)
284         return SendAccess::WRITE_ONLY;
285     else if ((int(op) & int(LDST_ATOMIC_GROUP)) != 0)
286         return hasAttrs(LdStAttrs::ATOMIC_RETURN) ?
287         SendAccess::READ_WRITE : SendAccess::WRITE_ONLY;
288 
289     MUST_BE_TRUE(false, "unsupported op group");
290     return SendAccess::INVALID;
291 }
292 
isAtomic() const293 bool G4_SendDescLdSt::isAtomic() const
294 {
295     return (int(op) & int(LDST_ATOMIC_GROUP)) != 0;
296 }
297 
isTyped() const298 bool G4_SendDescLdSt::isTyped() const
299 {
300     if (getSFID() == SFID::TGM)
301         return true;
302     return false;
303 }
304 
ToSymbol(vISA::SFID sfid)305 static std::string ToSymbol(vISA::SFID sfid)
306 {
307     switch (sfid) {
308     case SFID::UGM:  return ".ugm";
309     case SFID::UGML: return ".ugml";
310     case SFID::SLM:  return ".slm";
311     case SFID::TGM:  return ".tgm";
312         // these aren't necessarily supported yet
313     case SFID::DP_DC0:  return ".dc0";
314     case SFID::DP_DC1:  return ".dc1";
315     case SFID::DP_DC2:  return ".dc2";
316     case SFID::DP_CC: return ".dcro";
317     case SFID::URB:  return ".urb";
318         // others not needed
319     default: break;
320     }
321     return ".?";
322 }
323 
ToSymbolDataSize(int reg,int mem)324 static std::string ToSymbolDataSize(int reg, int mem)
325 {
326     if (reg == mem)
327         return "d" + std::to_string(reg);
328     return "d" + std::to_string(mem) + "a" + std::to_string(reg) ;
329 }
330 
ToSymbol(AddrType at)331 static std::string ToSymbol(AddrType at)
332 {
333     switch (at) {
334     case AddrType::FLAT: return "";
335     case AddrType::BSS:  return "bss";
336     case AddrType::SS:   return "ss";
337     case AddrType::BTI:  return "bti";
338     default: break;
339     }
340     return "?";
341 }
342 
GetMnemonicPart(std::ostream & os,const G4_SendDescLdSt & m)343 static void GetMnemonicPart(std::ostream &os, const G4_SendDescLdSt &m)
344 {
345     os << ToSymbol(m.op);
346     os << "." << ::ToSymbol(m.getSFID());
347     os << "." << ToSymbolDataSize(m.elemBitsReg, m.elemBitsReg);
348     os << ".a" << std::to_string(m.addrBits);
349     os << ToSymbol(m.l1, m.l3);
350 }
351 
str() const352 std::string G4_SendDescLdSt::str() const
353 {
354     std::stringstream ss;
355     str(ss);
356     return ss.str();
357 }
358 
str(std::ostream & os) const359 void G4_SendDescLdSt::str(std::ostream &os) const
360 {
361     auto fmtHex =
362         [] (int64_t x) {
363         std::stringstream ss;
364         ss << "0x" << std::hex << std::uppercase << x;
365         return ss.str();
366     };
367 
368     GetMnemonicPart(os, *this);
369     os << " ";
370     os << ::ToSymbol(addrType);
371     if (addrType != AddrType::FLAT) {
372         os << "[";
373         if (const G4_Operand *surf = getSurface()) {
374             if (surf->isImm()) {
375                 os << fmtHex(surf->asImm()->getImm());
376             } else if (surf->isA0() ){
377                 os << "a0.?"; // how to find subreg
378             } else {
379                 os << "A?";
380             }
381         } else {
382             os << "???";
383         }
384         os << "]";
385     }
386 }
387 
388 
389 ///////////////////////////////////////////////////////////////////////////////
390 // G4_SendDescRaw implementations
391 ///////////////////////////////////////////////////////////////////////////////
G4_SendDescRaw(uint32_t fCtrl,uint32_t regs2rcv,uint32_t regs2snd,SFID fID,uint16_t extMsgLen,uint32_t extFCtrl,SendAccess access,G4_Operand * bti,G4_Operand * sti,IR_Builder & builder)392 G4_SendDescRaw::G4_SendDescRaw(
393     uint32_t fCtrl, uint32_t regs2rcv,
394     uint32_t regs2snd, SFID fID, uint16_t extMsgLen,
395     uint32_t extFCtrl, SendAccess access,
396     G4_Operand *bti, G4_Operand *sti,
397     IR_Builder& builder)
398     : G4_SendDesc(G4_SendDesc::Kind::RAW, fID)
399 {
400     // All unnamed bits should be passed with those control bits.
401     // Otherwise, need to be set individually.
402     desc.value = fCtrl;
403 
404     desc.layout.rspLength = regs2rcv;
405     desc.layout.msgLength = regs2snd;
406 
407     extDesc.value = 0;
408     extDesc.layout.funcID = SFIDtoInt(fID);
409     extDesc.layout.extMsgLength = extMsgLen;
410     extDesc.layout.extFuncCtrl = extFCtrl;
411 
412     src1Len = extMsgLen; // [10:6]
413     eotAfterMessage = false; // [5]
414     sfid = fID;
415 
416     accessType = access;
417     funcCtrlValid = true;
418 
419     m_bti = bti;
420     m_sti = sti;
421 
422     if (m_bti && m_bti->isImm())
423     {
424         setBindingTableIdx((unsigned)m_bti->asImm()->getInt());
425     }
426     if (m_sti && m_sti->isImm())
427     {
428         desc.value |= (((unsigned)m_sti->asImm()->getInt()) << 8); // [11:8]
429     }
430 
431     uint32_t totalMaxLength = builder.getMaxSendMessageLength();
432     MUST_BE_TRUE(extDesc.layout.extMsgLength + desc.layout.msgLength < totalMaxLength,
433         "combined message length may not exceed the maximum");
434 }
435 
G4_SendDescRaw(uint32_t descBits,uint32_t extDescBits,SendAccess access,G4_Operand * bti,G4_Operand * sti)436 G4_SendDescRaw::G4_SendDescRaw(
437     uint32_t descBits, uint32_t extDescBits,
438     SendAccess access,
439     G4_Operand *bti,
440     G4_Operand *sti)
441     : G4_SendDesc(G4_SendDesc::Kind::RAW,
442         intToSFID(extDescBits & 0xF)), // [3:0]
443     accessType(access), m_sti(sti), m_bti(bti), funcCtrlValid(true)
444 {
445     desc.value = descBits;
446     extDesc.value = extDescBits;
447     src1Len = (extDescBits >> 6) & 0x1F; // [10:6]
448     eotAfterMessage = extDesc.layout.eot; // [5]
449 
450 
451     if (bti && bti->isImm())
452     {
453         setBindingTableIdx((unsigned)bti->asImm()->getInt());
454     }
455     if (sti && sti->isImm())
456     {
457         desc.value |= (((unsigned)m_sti->asImm()->getInt()) << 8); // [11:8]
458     }
459 }
460 
G4_SendDescRaw(SFID _sfid,uint32_t _desc,uint32_t _extDesc,int _src1Len,SendAccess access,G4_Operand * bti,bool isValidFuncCtrl)461 G4_SendDescRaw::G4_SendDescRaw(
462     SFID _sfid,
463     uint32_t _desc,
464     uint32_t _extDesc,
465     int _src1Len,
466     SendAccess access,
467     G4_Operand* bti,
468     bool isValidFuncCtrl)
469     : G4_SendDescRaw(_sfid, _desc, _extDesc, _src1Len, access, bti,
470                      g4::SIMD_UNDEFINED, isValidFuncCtrl)
471 {}
472 
G4_SendDescRaw(SFID _sfid,uint32_t _desc,uint32_t _extDesc,int _src1Len,SendAccess access,G4_Operand * bti,G4_ExecSize execSize,bool isValidFuncCtrl)473 G4_SendDescRaw::G4_SendDescRaw(
474     SFID _sfid,
475     uint32_t _desc,
476     uint32_t _extDesc,
477     int _src1Len,
478     SendAccess access,
479     G4_Operand *bti,
480     G4_ExecSize execSize,
481     bool isValidFuncCtrl)
482     : G4_SendDesc(G4_SendDesc::Kind::RAW, _sfid, execSize),
483     accessType(access), m_sti(nullptr), m_bti(bti), funcCtrlValid(isValidFuncCtrl)
484 {
485     isLscDescriptor =
486         _sfid == SFID::UGM || _sfid == SFID::UGML ||
487         _sfid == SFID::SLM || _sfid == SFID::TGM;
488 
489     if (!isLscDescriptor && bti && bti->isImm()) {
490         setBindingTableIdx((unsigned)bti->asImm()->getInt());
491     }
492     // ensure ExDesc[10:6] also holds src1Len
493     // see the note above (other constructor) about DG2 descriptors and
494     // ExDesc[10:6]
495     _extDesc |= ((_src1Len & 0x1F) << 6);
496     desc.value = _desc;
497     extDesc.value = _extDesc;
498     src1Len = _src1Len;
499     eotAfterMessage = false;
500 }
501 
getHdcMessageType() const502 uint32_t G4_SendDescRaw::getHdcMessageType() const
503 {
504     MUST_BE_TRUE(isHDC(),"not an HDC message");
505     return (desc.value >> 14) & 0x1F;
506 }
507 
getLscAddrType() const508 LSC_ADDR_TYPE G4_SendDescRaw::getLscAddrType() const
509 {
510     MUST_BE_TRUE(isLscOp(), "must be LSC op");
511     const int LSC_ADDR_TYPE_OFFSET = 29;
512     const uint32_t LSC_ADDR_TYPE_MASK = 0x3;
513     const uint32_t rawDescBits = getDesc();
514     auto addrTypeBits = ((rawDescBits >> LSC_ADDR_TYPE_OFFSET) & LSC_ADDR_TYPE_MASK);
515     return LSC_ADDR_TYPE(addrTypeBits + 1);
516 }
517 
getLscAddrSizeBytes() const518 int G4_SendDescRaw::getLscAddrSizeBytes() const
519 {
520     MUST_BE_TRUE(isLscOp(), "must be LSC op");
521     auto op = getLscOp();
522     switch (op) {
523     case LSC_LOAD:
524     case LSC_LOAD_STRIDED:
525     case LSC_LOAD_QUAD:
526     case LSC_STORE:
527     case LSC_STORE_STRIDED:
528     case LSC_STORE_QUAD:
529         break;
530     case LSC_LOAD_BLOCK2D:
531     case LSC_STORE_BLOCK2D:
532         return getSFID() == SFID::TGM ? 4 : 8;
533     default:
534         if (op < LSC_ATOMIC_IINC && op > LSC_ATOMIC_XOR) {
535             return 0;
536         }
537     }
538     // it's a good op with an AddrType field in [8:7]
539     switch ((getDesc() >> 7) & 0x3) {
540     case 1: return 2;
541     case 2: return 4;
542     case 3: return 8;
543     default: break;
544     }
545     return 0;
546 }
547 
getLscDataOrder() const548 LSC_DATA_ORDER G4_SendDescRaw::getLscDataOrder() const
549 {
550     MUST_BE_TRUE(isLscOp(), "must be LSC op");
551     auto op = getLscOp();
552     if (op == LSC_LOAD_QUAD || op == LSC_STORE_QUAD)
553         return LSC_DATA_ORDER_NONTRANSPOSE;
554     if ((getDesc() >> 15) & 0x1) {
555         return LSC_DATA_ORDER_TRANSPOSE;
556     } else {
557         return LSC_DATA_ORDER_NONTRANSPOSE;
558     }
559 }
560 
561 
setEOT()562 void G4_SendDescRaw::setEOT() {
563     eotAfterMessage = true;
564 
565     if (isLscOp())
566         return;
567 
568     extDesc.layout.eot = true;
569 }
570 
isHdcIntAtomicMessage(SFID funcID,uint16_t msgType)571 static bool isHdcIntAtomicMessage(SFID funcID, uint16_t msgType)
572 {
573     if (funcID != SFID::DP_DC1)
574         return false;
575 
576     if (msgType == DC1_UNTYPED_ATOMIC || msgType == DC1_A64_ATOMIC)
577     {
578         return true;
579     }
580     if (getGenxPlatform() >= GENX_SKL)
581     {
582         if (msgType == DC1_TYPED_ATOMIC)
583             return true;
584     }
585     if (getPlatformGeneration(getGenxPlatform()) >= PlatformGen::XE)
586     {
587         if (msgType == DC1_TYPED_HALF_INTEGER_ATOMIC ||
588             msgType == DC1_TYPED_HALF_COUNTER_ATOMIC ||
589             msgType == DC1_UNTYPED_HALF_INTEGER_ATOMIC ||
590             msgType == DC1_A64_UNTYPED_HALF_INTEGER_ATOMIC)
591             return true;
592     }
593     return false;
594 }
595 
isHdcFloatAtomicMessage(SFID funcID,uint16_t msgType)596 static bool isHdcFloatAtomicMessage(SFID funcID, uint16_t msgType)
597 {
598     if (funcID != SFID::DP_DC1)
599         return false;
600 
601     if (getGenxPlatform() >= GENX_SKL)
602     {
603         if (msgType == DC1_UNTYPED_FLOAT_ATOMIC ||
604             msgType == DC1_A64_UNTYPED_FLOAT_ATOMIC)
605             return true;
606     }
607     if (getPlatformGeneration(getGenxPlatform()) >= PlatformGen::XE)
608     {
609         if (msgType == DC1_UNTYPED_HALF_FLOAT_ATOMIC ||
610             msgType == DC1_A64_UNTYPED_HALF_FLOAT_ATOMIC)
611             return true;
612     }
613     return false;
614 }
615 
isAtomicMessage() const616 bool G4_SendDescRaw::isAtomicMessage() const
617 {
618     if (isLscOp() &&
619         (desc.value & 0x3F) >= LSC_ATOMIC_IINC &&
620         (desc.value & 0x3F) <= LSC_ATOMIC_XOR)
621     {
622         return true;
623     }
624 
625     auto funcID = getSFID();
626     if (!isHDC())
627         return false; // guard getMessageType() on SFID without a message type
628     uint16_t msgType = getHdcMessageType();
629     return isHdcIntAtomicMessage(funcID,msgType) ||
630         isHdcFloatAtomicMessage(funcID,msgType);
631 }
632 
getHdcAtomicOp() const633 uint16_t G4_SendDescRaw::getHdcAtomicOp() const
634 {
635     MUST_BE_TRUE(isHDC(), "must be HDC message");
636     MUST_BE_TRUE(isAtomicMessage(), "getting atomicOp from non-atomic message!");
637     uint32_t funcCtrl = getFuncCtrl();
638     if (isHdcIntAtomicMessage(getSFID(), getHdcMessageType()))
639     {
640         // bits: 11:8
641         return (uint16_t)((funcCtrl >> 8) & 0xF);
642     }
643 
644     // must be float Atomic
645     // bits: 10:8
646     return (int16_t)((funcCtrl >> 8) & 0x7);
647 }
648 
isSLMMessage() const649 bool G4_SendDescRaw::isSLMMessage() const
650 {
651     if (getSFID() == SFID::DP_DC2)
652     {
653         uint32_t msgType = getHdcMessageType();
654         if ((msgType == DC2_UNTYPED_SURFACE_WRITE || msgType == DC2_BYTE_SCATTERED_WRITE) &&
655             (getFuncCtrl() & 0x80))
656         {
657             return true;
658         }
659     }
660 
661     if (getSFID() == SFID::DP_DC2 ||
662         getSFID() == SFID::DP_DC1 ||
663         getSFID() == SFID::DP_DC0)
664     {
665         if ((getDesc() & 0xFF) == SLMIndex)
666         {
667             return true;
668         }
669     }
670 
671     if (m_bti && m_bti->isImm() && m_bti->asImm()->getInt() == SLMIndex)
672     {
673         return true;
674     }
675 
676     return getSFID() == SFID::SLM;
677 }
678 
679 
ResponseLength() const680 uint16_t G4_SendDescRaw::ResponseLength() const
681 {
682     // the loadblock2DArray message may return up to 32 GRF.
683     // Since we don't have enough bits to encode 32, block2d creates an exception where 31 means 31 or 32 (HW detects).
684     // SW must know the actual size is 32 for data-flow/RA/SWSB to function correctly though.
685     // fortunately it doesn't look like 31 is a valid value for this message, we just treat 31 as 32
686     bool isLoadBlock2DArray = isLscOp() && getLscOp() == LSC_LOAD_BLOCK2D;
687     if (desc.layout.rspLength == 31 && isLoadBlock2DArray)
688     {
689         return 32;
690     }
691     return desc.layout.rspLength;
692 }
693 
694 
isHeaderPresent() const695 bool G4_SendDescRaw::isHeaderPresent() const {
696     if (isLscOp())
697         return false;
698 
699     return desc.layout.headerPresent == 1;
700 }
701 
setHeaderPresent(bool val)702 void G4_SendDescRaw::setHeaderPresent(bool val)
703 {
704     MUST_BE_TRUE(!isLscOp(), "LSC ops don't have headers");
705     desc.layout.headerPresent = val;
706 }
707 
setBindingTableIdx(unsigned idx)708 void G4_SendDescRaw::setBindingTableIdx(unsigned idx)
709 {
710     if (isLscOp()) {
711         extDesc.value |= (idx << 24);
712         return;
713     }
714     desc.value |= idx;
715 }
716 
getSamplerMessageType() const717 uint32_t G4_SendDescRaw::getSamplerMessageType() const
718 {
719     MUST_BE_TRUE(isSampler(), "wrong descriptor type for method");
720     return (getFuncCtrl() >> 12) & 0x1f;
721 }
722 
is16BitInput() const723 bool G4_SendDescRaw::is16BitInput() const
724 {
725     MUST_BE_TRUE(!isLscOp(), "wrong descriptor type for method");
726     // TODO: could use this for LSC messages too potentially
727     return desc.layout.simdMode2 == 1;
728 }
729 
is16BitReturn() const730 bool G4_SendDescRaw::is16BitReturn() const
731 {
732     MUST_BE_TRUE(!isLscOp(), "wrong descriptor type for method");
733     return desc.layout.returnFormat == 1;
734 }
735 
isByteScatterRW() const736 bool G4_SendDescRaw::isByteScatterRW() const
737 {
738     auto funcID = getSFID();
739     switch (funcID) {
740     case SFID::DP_DC0:
741         switch (getHdcMessageType()) {
742         case DC_BYTE_SCATTERED_READ:
743         case DC_BYTE_SCATTERED_WRITE:
744             return true;
745         default:
746             break;
747         }
748         break;
749     case SFID::DP_DC1:
750         switch (getHdcMessageType()) {
751         case DC1_A64_SCATTERED_READ:
752         case DC1_A64_SCATTERED_WRITE:
753             return (getBlockSize() == 1);
754         default:
755             break;
756         }
757         break;
758     case SFID::DP_DC2:
759         switch (getHdcMessageType()) {
760         case DC2_A64_SCATTERED_READ:
761         case DC2_A64_SCATTERED_WRITE:
762             return (getBlockSize() == 1);
763         case DC2_BYTE_SCATTERED_READ:
764         case DC2_BYTE_SCATTERED_WRITE:
765             return true;
766         default:
767             break;
768         }
769         break;
770     default:
771         break;
772     }
773     return false;
774 }
775 
isDWScatterRW() const776 bool G4_SendDescRaw::isDWScatterRW() const
777 {
778     auto funcID = getSFID();
779     switch (funcID) {
780     case SFID::DP_DC0:
781         switch (getHdcMessageType()) {
782         case DC_DWORD_SCATTERED_READ:
783         case DC_DWORD_SCATTERED_WRITE:
784             return true;
785         default:
786             break;
787         }
788         break;
789     case SFID::DP_DC1:
790         switch (getHdcMessageType()) {
791         case DC1_A64_SCATTERED_READ:
792         case DC1_A64_SCATTERED_WRITE:
793             return (getBlockSize() == 4);
794         default:
795             break;
796         }
797         break;
798     case SFID::DP_DC2:
799         switch (getHdcMessageType()) {
800         case DC2_A64_SCATTERED_READ:
801         case DC2_A64_SCATTERED_WRITE:
802             return (getBlockSize() == 4);
803         default:
804             break;
805         }
806         break;
807     default:
808         break;
809     }
810     return false;
811 }
812 
isQWScatterRW() const813 bool G4_SendDescRaw::isQWScatterRW() const
814 {
815     auto funcID = getSFID();
816     switch (funcID) {
817     case SFID::DP_DC0:
818         switch (getHdcMessageType()) {
819         case DC_QWORD_SCATTERED_READ:
820         case DC_QWORD_SCATTERED_WRITE:
821             return true;
822         default:
823             break;
824         }
825         break;
826     case SFID::DP_DC1:
827         switch (getHdcMessageType()) {
828         case DC1_A64_SCATTERED_READ:
829         case DC1_A64_SCATTERED_WRITE:
830             return (getBlockSize() == 8);
831         default:
832             break;
833         }
834         break;
835     case SFID::DP_DC2:
836         switch (getHdcMessageType()) {
837         case DC2_A64_SCATTERED_READ:
838         case DC2_A64_SCATTERED_WRITE:
839             return (getBlockSize() == 8);
840         default:
841             break;
842         }
843         break;
844     default:
845         break;
846     }
847     return false;
848 }
849 
isUntypedRW() const850 bool G4_SendDescRaw::isUntypedRW() const
851 {
852     auto funcID = getSFID();
853     switch (funcID) {
854     case SFID::DP_DC1:
855         switch (getHdcMessageType()) {
856         case DC1_UNTYPED_SURFACE_READ:
857         case DC1_UNTYPED_SURFACE_WRITE:
858         case DC1_A64_UNTYPED_SURFACE_READ:
859         case DC1_A64_UNTYPED_SURFACE_WRITE:
860             return true;
861         default:
862             break;
863         }
864         break;
865     case SFID::DP_DC2:
866         switch (getHdcMessageType()) {
867         case DC2_UNTYPED_SURFACE_READ:
868         case DC2_UNTYPED_SURFACE_WRITE:
869         case DC2_A64_UNTYPED_SURFACE_READ:
870         case DC2_A64_UNTYPED_SURFACE_WRITE:
871             return true;
872         default:
873             break;
874         }
875         break;
876     default:
877         break;
878     }
879     return false;
880 }
881 
isA64Message() const882 bool G4_SendDescRaw::isA64Message() const
883 {
884     if (!isHDC()) {
885         return false;
886     }
887 
888     uint32_t msgType = getHdcMessageType();
889     auto funcID = getSFID();
890     switch (funcID) {
891     case SFID::DP_DC1:
892     {
893         switch (msgType) {
894         default:
895             break;
896         case DC1_A64_SCATTERED_READ:
897         case DC1_A64_UNTYPED_SURFACE_READ:
898         case DC1_A64_ATOMIC:
899         case DC1_A64_BLOCK_READ :
900         case DC1_A64_BLOCK_WRITE:
901         case DC1_A64_UNTYPED_SURFACE_WRITE:
902         case DC1_A64_SCATTERED_WRITE:
903         case DC1_A64_UNTYPED_FLOAT_ATOMIC:
904         case DC1_A64_UNTYPED_HALF_INTEGER_ATOMIC:
905         case DC1_A64_UNTYPED_HALF_FLOAT_ATOMIC:
906             return true;
907         }
908         break;
909     }
910     case SFID::DP_DC2 :
911     {
912         switch (msgType) {
913         default:
914             break;
915         case DC2_A64_SCATTERED_READ:
916         case DC2_A64_UNTYPED_SURFACE_READ:
917         case DC2_A64_UNTYPED_SURFACE_WRITE:
918         case DC2_A64_SCATTERED_WRITE:
919             return true;
920         }
921         break;
922     }
923     default:
924         break;
925     }
926     return false;
927 }
928 
getNumEnabledChannels(uint32_t chDisableBits)929 static int getNumEnabledChannels(uint32_t chDisableBits)
930 {
931     switch (chDisableBits)
932     {
933     case 0x7:
934     case 0xB:
935     case 0xD:
936     case 0xE: return 1;
937     case 0x3:
938     case 0x5:
939     case 0x6:
940     case 0x9:
941     case 0xA:
942     case 0xC: return 2;
943     case 0x1:
944     case 0x2:
945     case 0x4:
946     case 0x8: return 3;
947     case 0x0: return 4;
948     case 0xF: return 0;
949     default: MUST_BE_TRUE(false, "Illegal Channel Mask Number");
950     }
951     return 0;
952 }
953 
954 #define MSG_BLOCK_SIZE_OFFSET   8
getEnabledChannelNum() const955 unsigned G4_SendDescRaw::getEnabledChannelNum() const
956 {
957     // TODO: should further scope this to typed/untyped
958     MUST_BE_TRUE(isHDC(), "message does not have field ChannelEnable");
959     uint32_t funcCtrl = getFuncCtrl();
960     return getNumEnabledChannels((funcCtrl >> MSG_BLOCK_SIZE_OFFSET) & 0xF);
961 }
962 
getBlockNum() const963 unsigned G4_SendDescRaw::getBlockNum() const
964 {
965     MUST_BE_TRUE(isHDC(), "not an HDC message");
966 
967     uint32_t funcCtrl = getFuncCtrl();
968 
969 #define MSG_BLOCK_NUMBER_OFFSET 10
970     funcCtrl =  (funcCtrl >> MSG_BLOCK_NUMBER_OFFSET) & 0x3;
971     switch (funcCtrl)
972     {
973     case SVM_BLOCK_NUM_1: return 1;
974     case SVM_BLOCK_NUM_2: return 2;
975     case SVM_BLOCK_NUM_4: return 4;
976     case SVM_BLOCK_NUM_8: return 8;
977     default: MUST_BE_TRUE(false, "Illegal SVM block number (should be 1, 2, 4, or 8).");
978     }
979 
980     return 0;
981 }
982 
getBlockSize() const983 unsigned G4_SendDescRaw::getBlockSize() const
984 {
985     MUST_BE_TRUE(isHDC(), "not an HDC message");
986 
987     uint32_t funcCtrl = getFuncCtrl();
988 
989     funcCtrl =  (funcCtrl >> MSG_BLOCK_SIZE_OFFSET) & 0x3;
990     switch (funcCtrl)
991     {
992     case SVM_BLOCK_TYPE_BYTE: return 1;
993     case SVM_BLOCK_TYPE_DWORD: return 4;
994     case SVM_BLOCK_TYPE_QWORD: return 8;
995     default: MUST_BE_TRUE(false, "Illegal SVM block size (should be 1, 4, or 8).");
996     }
997     return 0;
998 }
999 
isOwordLoad() const1000 bool G4_SendDescRaw::isOwordLoad() const
1001 {
1002     if (!isHDC()) {
1003         return false;
1004     }
1005     uint32_t funcCtrl = getFuncCtrl();
1006     auto funcID = getSFID();
1007     static int DC0_MSG_TYPE_OFFSET = 14;
1008     static int DC1_MSG_SUBTYPE_OFFSET = 12; // [31:12]
1009     uint16_t msgType = (funcCtrl >> DC0_MSG_TYPE_OFFSET) & 0x1F;
1010     uint16_t dc1MsgSubType = (funcCtrl >> DC1_MSG_SUBTYPE_OFFSET) & 0x3;
1011     // bits [18:14] are message type
1012     // (included 18 because that is set for scratch)
1013     static const uint32_t MSD0R_OWAB  = 0x0; // DC0
1014     static const uint32_t MSD0R_OWB   = 0x0; // DC0
1015     static const uint32_t MSD_CC_OWAB = 0x1; // DC_CC
1016     static const uint32_t MSD_CC_OWB = 0x0; // DC_CC
1017     static const uint32_t MSD1R_A64_OWB = 0x14; // DC1 A64 [13:12] == 1
1018     static const uint32_t MSD1R_A64_OWAB = 0x14; // DC1 A64 [13:12] == 0
1019     bool isDc0Owb = funcID == SFID::DP_DC0 && (msgType == MSD0R_OWAB || msgType == MSD0R_OWB);
1020     bool isCcOwb = funcID == SFID::DP_CC && (msgType == MSD_CC_OWAB || msgType == MSD_CC_OWB);
1021     bool isDc1A64Owb =
1022         funcID == SFID::DP_DC1 && (msgType == MSD1R_A64_OWB) &&
1023         // st==2, 3 don't have mappings that I can find, but just to be
1024         // safe force 0 or 1 (which are unalgined vs aligned)
1025         (dc1MsgSubType == 0 || dc1MsgSubType == 1);
1026     return isDc0Owb || isCcOwb || isDc1A64Owb;
1027 }
1028 
getOwordsAccessed() const1029 unsigned G4_SendDescRaw::getOwordsAccessed() const
1030 {
1031     MUST_BE_TRUE(isOwordLoad(), "must be OWord message");
1032     // This encoding holds for the DP_DC0, DP_CC, and DP_DC1 (A64 block)
1033     // element count.
1034     auto owEnc = (getFuncCtrl() >> 8) & 0x7; // Desc[10:8] is OW count
1035     if (owEnc == 0) {
1036         return 1; // OW1L (low half of GRF)
1037     } else if (owEnc == 1) {
1038         // for OW1H (high half of GRF): treat as full 32B
1039         // (this control probably isn't ever be used and was removed in Xe)
1040         return 1;
1041     } else {
1042         // 2 = OW2, 3 == OW4, 4 == OW8, 5 == OW16
1043         return 2 << (owEnc - 2);
1044     }
1045 }
1046 
isHdcTypedSurfaceWrite() const1047 bool G4_SendDescRaw::isHdcTypedSurfaceWrite() const
1048 {
1049     return isHDC() && getHdcMessageType() == DC1_TYPED_SURFACE_WRITE;
1050 }
1051 
getDescription() const1052 std::string G4_SendDescRaw::getDescription() const
1053 {
1054     // Return plain text string of type of msg, ie "oword read", "oword write",
1055     // "media rd", etc.
1056     const G4_SendDescRaw* msgDesc = this;
1057     unsigned int category;
1058 
1059     switch (msgDesc->getSFID())
1060     {
1061     case SFID::SAMPLER: return "sampler";
1062     case SFID::GATEWAY: return "gateway";
1063     case SFID::DP_DC2:
1064         switch (getHdcMessageType())
1065         {
1066         case DC2_UNTYPED_SURFACE_READ: return "scaled untyped surface read";
1067         case DC2_A64_SCATTERED_READ: return "scaled A64 scatter read";
1068         case DC2_A64_UNTYPED_SURFACE_READ: return "scaled A64 untyped surface read";
1069         case DC2_BYTE_SCATTERED_READ: return "scaled byte scattered read";
1070         case DC2_UNTYPED_SURFACE_WRITE: return "scaled untyped surface write";
1071         case DC2_A64_UNTYPED_SURFACE_WRITE: return "scaled A64 untyped surface write";
1072         case DC2_A64_SCATTERED_WRITE: return "scaled A64 scattered write";
1073         case DC2_BYTE_SCATTERED_WRITE: return "scaled byte scattede write";
1074         default: return "unrecognized DC2 message";
1075         }
1076     case SFID::DP_WRITE:
1077         switch ((getFuncCtrl() >> 14) & 0x1F)
1078         {
1079         case 0xc: return "render target write";
1080         case 0xd: return "render target read";
1081         default: return "unrecognized RT message";
1082         }
1083         break;
1084     case SFID::URB: return "urb";
1085     case SFID::SPAWNER: return "thread spawner";
1086     case SFID::VME: return "vme";
1087     case SFID::DP_CC:
1088         switch (getHdcMessageType())
1089         {
1090         case 0x0: return "oword block read";
1091         case 0x1: return "unaligned oword block read";
1092         case 0x2: return "oword dual block read";
1093         case 0x3: return "dword scattered read";
1094         default: return "unrecognized DCC message";
1095         }
1096     case SFID::DP_DC0:
1097         category = (msgDesc->getFuncCtrl() >> 18) & 0x1;
1098         if (category == 0)
1099         {
1100             // legacy data port
1101             bool hword = (msgDesc->getFuncCtrl() >> 13) & 0x1;
1102             switch (getHdcMessageType())
1103             {
1104             case 0x0: return hword ? "hword block read" : "oword block read";
1105             case 0x1: return hword ? "hword aligned block read" : "unaligned oword block read";
1106             case 0x2: return "oword dual block read";
1107             case 0x3: return "dword scattered read";
1108             case 0x4: return "byte scattered read";
1109             case 0x7: return "memory fence";
1110             case 0x8: return hword ? "hword block write" : "oword block write";
1111             case 0x9: return "hword aligned block write";
1112             case 0xa: return "oword dual block write";
1113             case 0xb: return "dword scattered write";
1114             case 0xc: return "byte scattered write";
1115             case 0x5: return "qword gather";
1116             case 0xd: return "qword scatter";
1117             default: return "unrecognized DC0 message";
1118             }
1119         }
1120         else
1121         {
1122             // scratch
1123             int bits = (msgDesc->getFuncCtrl() >> 17) & 0x1;
1124 
1125             if (bits == 0)
1126                 return "scratch read";
1127             else
1128                 return "scratch write";
1129         }
1130         break;
1131     case SFID::DP_PI: return "dp_pi";
1132     case SFID::DP_DC1:
1133         switch (getHdcMessageType())
1134         {
1135         case 0x0: return "transpose read";
1136         case 0x1: return "untyped surface read";
1137         case 0x2: return "untyped atomic operation";
1138         case 0x3: return "untyped atomic operation simd4x2";
1139         case 0x4: return "media block read";
1140         case 0x5: return "typed surface read";
1141         case 0x6: return "typed atomic operation";
1142         case 0x7: return "typed atomic operation simd4x2";
1143         case 0x8: return "untyped atomic float add";
1144         case 0x9: return "untyped surface write";
1145         case 0xa: return "media block write (non-iecp)";
1146         case 0xb: return "atomic counter operation";
1147         case 0xc: return "atomic counter operation simd4x2";
1148         case 0xd: return "typed surface write";
1149         case 0x10: return "a64 gathering read";
1150         case 0x11: return "a64 untyped surface read";
1151         case 0x12: return "a64 untyped atomic operation";
1152         case 0x13: return "a64 untyped atomic operation simd4x2";
1153         case 0x14: return "a64 block read";
1154         case 0x15: return "a64 block write";
1155         case 0x18: return "a64 untyped atomic float add";
1156         case 0x19: return "a64 untyped surface write";
1157         case 0x1a: return "a64 scattered write";
1158         default: return "unrecognized DC1 message";
1159         }
1160         break;
1161     case SFID::CRE: return "cre";
1162     case SFID::SLM:
1163     case SFID::TGM:
1164     case SFID::UGM:
1165     case SFID::UGML:
1166     {
1167         LscOpInfo opInfo { };
1168         if (LscOpInfoFind((LSC_OP)(desc.value & 0x3F), opInfo)) { // Desc[5:0]
1169             return opInfo.mnemonic;
1170         } else {
1171             const char* invalid = "lsc (invalid operation)";
1172             return invalid;
1173         }
1174     }
1175     default: return "--";
1176     }
1177     return NULL;
1178 }
1179 
getSrc0LenBytes() const1180 size_t G4_SendDescRaw::getSrc0LenBytes() const
1181 {
1182     return MessageLength() * (size_t)getGRFSize();
1183 }
1184 
getDstLenBytes() const1185 size_t G4_SendDescRaw::getDstLenBytes() const
1186 {
1187     if (isScratchRW()) {
1188         return 32 * getScratchRWSize(); // HWords
1189     } else if (isOwordLoad()) {
1190         return 16 * getOwordsAccessed(); // OWords
1191 #if 0
1192     // Due to VMIT-9224, comment this out!
1193     // Use macro fo easy testing.
1194     } else if (isByteScatterRW() && isDataPortRead()) {
1195         assert(getExecSize() != g4::SIMD_UNDEFINED);
1196         uint16_t nbytes = getBlockNum();
1197         // assume 4 at least
1198         nbytes = (nbytes >= 4 ? nbytes : 4);
1199         size_t sz = nbytes * getExecSize();
1200         return sz;
1201     } else if (isDWScatterRW() && isDataPortRead()) {
1202         assert(getExecSize() != g4::SIMD_UNDEFINED);
1203         size_t sz = 4 * getBlockNum() * getExecSize();
1204         return sz;
1205     } else if (isQWScatterRW() && isDataPortRead()) {
1206         assert(getExecSize() != g4::SIMD_UNDEFINED);
1207         size_t sz = 8 * getBlockNum() * getExecSize();
1208         return sz;
1209     } else if (isUntypedRW() && isDataPortRead()) {
1210         assert(getExecSize() != g4::SIMD_UNDEFINED);
1211         size_t sz = 4 * getEnabledChannelNum() * getExecSize();
1212         return sz;
1213 #endif
1214     } else {
1215         // fallback to the raw GRF count
1216         return ResponseLength() * (size_t)getGRFSize();
1217     }
1218 }
1219 
getSrc1LenBytes() const1220 size_t G4_SendDescRaw::getSrc1LenBytes() const
1221 {
1222     if (isScratchRW()) {
1223         return 32 * getScratchRWSize(); // HWords
1224     }
1225     // we could support OW store here, but no one seems to need that and
1226     // we are phasing this class out; so ignore it for now
1227 
1228     return extMessageLength() * (size_t)getGRFSize();
1229 }
1230 
isFence() const1231 bool G4_SendDescRaw::isFence() const
1232 {
1233     if (isLscOp())
1234         return (desc.value & 0x3F) == LSC_FENCE;
1235 
1236     SFID sfid = getSFID();
1237     unsigned FC = getFuncCtrl();
1238 
1239     // Memory Fence
1240     if (sfid == SFID::DP_DC0 && ((FC >> 14) & 0x1F) == DC_MEMORY_FENCE)
1241     {
1242         return true;
1243     }
1244 
1245     // Sampler cache flush
1246     if (sfid == SFID::SAMPLER && ((FC >> 12) & 0x1F) == 0x1F)
1247     {
1248         return true;
1249     }
1250 
1251     return false;
1252 }
isBarrier() const1253 bool G4_SendDescRaw::isBarrier() const
1254 {
1255     auto funcID = getSFID();
1256     uint32_t funcCtrl = getFuncCtrl();
1257     return funcID == SFID::GATEWAY && (funcCtrl & 0xFF) == 0x4;
1258 }
1259 
getOffset() const1260 int G4_SendDescRaw::getOffset() const
1261 {
1262     if (isLscOp()) {
1263         MUST_BE_TRUE(false, "need to do some work here...");
1264     }
1265     if (isScratchRW())
1266         return getScratchRWOffset() * 32;
1267     return 0;
1268 }
1269 
cachingToG4(LSC_CACHE_OPT co)1270 static Caching cachingToG4(LSC_CACHE_OPT co)
1271 {
1272     switch (co) {
1273     case LSC_CACHING_DEFAULT:        return Caching::DF;
1274     case LSC_CACHING_CACHED:         return Caching::CA;
1275     case LSC_CACHING_READINVALIDATE: return Caching::RI;
1276     case LSC_CACHING_WRITEBACK:      return Caching::WB;
1277     case LSC_CACHING_UNCACHED:       return Caching::UC;
1278     case LSC_CACHING_STREAMING:      return Caching::ST;
1279     case LSC_CACHING_WRITETHROUGH:   return Caching::WT;
1280     default: break;
1281     }
1282     return Caching::INVALID;
1283 }
1284 
1285 // decode caching from Desc[19:17]
decodeCaching3(bool isLoad,uint32_t descBits)1286 static std::pair<Caching,Caching> decodeCaching3(
1287     bool isLoad, uint32_t descBits)
1288 {
1289     auto mk = [&](Caching l1IfLd, Caching l3IfLd,
1290         Caching l1IfStAt, Caching l3IfStAt)
1291     {
1292         return isLoad ?
1293             std::make_pair(l1IfLd, l3IfLd) :
1294             std::make_pair(l1IfStAt, l3IfStAt);
1295     };
1296 
1297     // Decode caching field from in [19:17]
1298     uint32_t ccBits = (descBits >> 17) & 0x7;
1299     switch (ccBits) {
1300     case 0: return mk(
1301         Caching::DF, Caching::DF,
1302         Caching::DF, Caching::DF);
1303     case 1: return mk(
1304         Caching::UC, Caching::UC,
1305         Caching::UC, Caching::UC);
1306     case 2: return mk(
1307         Caching::UC, Caching::CA,
1308         Caching::UC, Caching::WB);
1309     case 3: return mk(
1310         Caching::CA, Caching::UC,
1311         Caching::WT, Caching::UC);
1312     case 4: return mk(
1313         Caching::CA, Caching::CA,
1314         Caching::WT, Caching::WB);
1315     case 5: return mk(
1316         Caching::ST, Caching::UC,
1317         Caching::ST, Caching::UC);
1318     case 6: return mk(
1319         Caching::ST, Caching::CA,
1320         Caching::ST, Caching::WB);
1321     case 7: return mk(
1322         Caching::RI, Caching::CA,
1323         Caching::WB, Caching::WB);
1324     }
1325     return std::make_pair(Caching::INVALID,Caching::INVALID);
1326 }
1327 
1328 
getCaching() const1329 std::pair<Caching,Caching> G4_SendDescRaw::getCaching() const {
1330     if (!isLscOp()) {
1331         return std::make_pair(Caching::INVALID, Caching::INVALID);
1332     }
1333     const auto opInfo = LscOpInfoGet(getLscOp());
1334     if (opInfo.isOther()) {
1335         return std::make_pair(Caching::INVALID, Caching::INVALID);
1336     }
1337 
1338     auto ccPair =
1339         decodeCaching3(opInfo.isLoad(), getDesc());
1340     MUST_BE_TRUE(
1341         ccPair.first != Caching::INVALID &&
1342         ccPair.second != Caching::INVALID,
1343         "unexpected invalid caching options (corrupt descriptor?)");
1344     return ccPair;
1345 }
1346 
1347 
toVisaCachingOpt(Caching c)1348 static LSC_CACHE_OPT toVisaCachingOpt(Caching c) {
1349     switch (c) {
1350     case Caching::DF: return LSC_CACHING_DEFAULT;
1351     case Caching::UC: return LSC_CACHING_UNCACHED;
1352     case Caching::CA: return LSC_CACHING_CACHED;
1353     case Caching::WB: return LSC_CACHING_WRITEBACK;
1354     case Caching::WT: return LSC_CACHING_WRITETHROUGH;
1355     case Caching::ST: return LSC_CACHING_STREAMING;
1356     case Caching::RI: return LSC_CACHING_READINVALIDATE;
1357     default:
1358         MUST_BE_TRUE(false, "invalid cache option");
1359         return (LSC_CACHE_OPT)-1;
1360     }
1361 }
1362 
setCaching(Caching l1,Caching l3)1363 void G4_SendDescRaw::setCaching(Caching l1, Caching l3)
1364 {
1365     if (!isLscOp()) {
1366         MUST_BE_TRUE(
1367             (l1 == Caching::INVALID && l3 == Caching::INVALID) ||
1368             (l1 == Caching::DF && l3 == Caching::DF),
1369             "invalid caching options for platform*SFID");
1370     }
1371     const auto opInfo = LscOpInfoGet(getLscOp());
1372     MUST_BE_TRUE(!opInfo.isOther(), "invalid LSC message kind for caching op");
1373     LSC_CACHE_OPTS visaCopts { };
1374     visaCopts.l1 = toVisaCachingOpt(l1);
1375     visaCopts.l3 = toVisaCachingOpt(l3);
1376 
1377     uint32_t cacheEnc = 0;
1378     uint32_t fieldMask = (0x7 << 17);
1379     bool isBits17_19 = true;
1380     bool success =
1381         LscTryEncodeCacheOpts(opInfo, visaCopts, cacheEnc, isBits17_19);
1382     MUST_BE_TRUE(success, "failed to set caching options");
1383     desc.value &= ~fieldMask;
1384     desc.value |= cacheEnc;
1385 }
1386 
isDc1OpTyped(uint32_t desc)1387 static bool isDc1OpTyped(uint32_t desc)
1388 {
1389     uint32_t mty = (desc >> 14) & 0x1F;
1390     switch (mty) {
1391     case DC1_TYPED_SURFACE_WRITE:
1392     case DC1_TYPED_SURFACE_READ:
1393     case DC1_TYPED_ATOMIC:
1394     case DC1_TYPED_HALF_INTEGER_ATOMIC:
1395         return true;
1396     default:
1397         break;
1398     }
1399     return false;
1400 }
1401 
isTyped() const1402 bool G4_SendDescRaw::isTyped() const
1403 {
1404     return getSFID() == SFID::DP_DC1 && isDc1OpTyped(getDesc());
1405 }
1406 
1407