1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "IGC/common/StringMacros.hpp"
10 #include "G4_IR.hpp"
11 #include "BuildIR.h"
12
13 #include <sstream>
14 #include <iomanip>
15
16 using namespace vISA;
17
18
19 ///////////////////////////////////////////////////////////////////////////////
20 // LdSt data type support
ToSymbol(LdStOp op)21 std::string vISA::ToSymbol(LdStOp op)
22 {
23 switch (op) {
24 case LdStOp::LOAD: return "load";
25 case LdStOp::LOAD_QUAD: return "load_quad";
26 case LdStOp::LOAD_STRIDED: return "load_strided";
27 case LdStOp::LOAD_BLOCK2D: return "load_block2d";
28 case LdStOp::STORE: return "store";
29 case LdStOp::STORE_QUAD: return "store_quad";
30 case LdStOp::STORE_STRIDED: return "store_strided";
31 case LdStOp::STORE_BLOCK2D: return "store_block2d";
32 // general atomics
33 case LdStOp::ATOMIC_LOAD: return "atomic_load";
34 case LdStOp::ATOMIC_STORE: return "atomic_store";
35 // floating point
36 case LdStOp::ATOMIC_FADD: return "atomic_fadd";
37 case LdStOp::ATOMIC_FSUB: return "atomic_fsub";
38 case LdStOp::ATOMIC_FMIN: return "atomic_fmin";
39 case LdStOp::ATOMIC_FMAX: return "atomic_fmax";
40 case LdStOp::ATOMIC_FCAS: return "atomic_fcas";
41 // integer
42 case LdStOp::ATOMIC_IINC: return "atomic_iinc";
43 case LdStOp::ATOMIC_IDEC: return "atomic_idec";
44 case LdStOp::ATOMIC_IADD: return "atomic_iadd";
45 case LdStOp::ATOMIC_ISUB: return "atomic_isub";
46 case LdStOp::ATOMIC_ICAS: return "atomic_icas";
47 case LdStOp::ATOMIC_SMIN: return "atomic_smin";
48 case LdStOp::ATOMIC_SMAX: return "atomic_smax";
49 case LdStOp::ATOMIC_UMIN: return "atomic_umin";
50 case LdStOp::ATOMIC_UMAX: return "atomic_umax";
51 // integer bitwise
52 case LdStOp::ATOMIC_AND: return "atomic_and";
53 case LdStOp::ATOMIC_XOR: return "atomic_xor";
54 case LdStOp::ATOMIC_OR: return "atomic_or";
55 default:
56 break;
57 }
58 return "???";
59 }
60
ToSymbol(Caching c)61 std::string vISA::ToSymbol(Caching c)
62 {
63 switch (c)
64 {
65 case Caching::CA: return ".ca";
66 case Caching::DF: return ".df";
67 case Caching::RI: return ".ri";
68 case Caching::ST: return ".st";
69 case Caching::WB: return ".wb";
70 case Caching::WT: return ".wt";
71 case Caching::UC: return ".uc";
72 default: return "?";
73 }
74 }
75
ToSymbol(Caching l1,Caching l3)76 std::string vISA::ToSymbol(Caching l1, Caching l3)
77 {
78 if (l1 == Caching::DF && l3 == Caching::DF)
79 return "";
80 else
81 return ToSymbol(l1) + ToSymbol(l3);
82 }
83
getCount() const84 int ElemsPerAddr::getCount() const
85 {
86 if (!isChannelMask())
87 return count;
88 return
89 ((int(channels) & int(Chs::X)) ? 1 : 0) +
90 ((int(channels) & int(Chs::Y)) ? 1 : 0) +
91 ((int(channels) & int(Chs::Z)) ? 1 : 0) +
92 ((int(channels) & int(Chs::W)) ? 1 : 0);
93 }
94
getMask() const95 ElemsPerAddr::Chs ElemsPerAddr::getMask() const
96 {
97 MUST_BE_TRUE(isChannelMask(), "must be a channel mask vector");
98 return channels;
99 }
100
str() const101 std::string ElemsPerAddr::str() const
102 {
103 if (isChannelMask()) {
104 if (channels == Chs::INVALID)
105 return ".?";
106 // e.g. .xyz
107 std::string s = ".";
108 if (int(channels) & int(Chs::X))
109 s += 'x';
110 if (int(channels) & int(Chs::Y))
111 s += 'y';
112 if (int(channels) & int(Chs::Z))
113 s += 'z';
114 if (int(channels) & int(Chs::W))
115 s += 'w';
116 return s;
117 } else {
118 // e.g. x4 (note absence of a dot)
119 return "x" + std::to_string(count);
120 }
121 }
122
123 ///////////////////////////////////////////////////////////////////////////////
124 // G4_SendDesc implementations
125 ///////////////////////////////////////////////////////////////////////////////
roundUpToGrf(int bytes)126 static inline int roundUpToGrf(int bytes) {
127 return g4::alignUp((int)getGRFSize(), bytes) / (int)getGRFSize();
128 }
129
getSrc0LenRegs() const130 size_t G4_SendDesc::getSrc0LenRegs() const {
131 return roundUpToGrf(getSrc0LenBytes());
132 }
133
getDstLenRegs() const134 size_t G4_SendDesc::getDstLenRegs() const {
135 return roundUpToGrf(getDstLenBytes());
136 }
137
getSrc1LenRegs() const138 size_t G4_SendDesc::getSrc1LenRegs() const {
139 return roundUpToGrf(getSrc1LenBytes());
140 }
141
isHDC() const142 bool G4_SendDesc::isHDC() const
143 {
144 auto funcID = getSFID();
145 return
146 funcID == SFID::DP_DC0 ||
147 funcID == SFID::DP_DC1 ||
148 funcID == SFID::DP_DC2 ||
149 funcID == SFID::DP_CC;
150 }
151
isLSC() const152 bool G4_SendDesc::isLSC() const
153 {
154 switch (getSFID()) {
155 case SFID::UGM:
156 case SFID::UGML:
157 case SFID::TGM:
158 case SFID::SLM:
159 return true;
160 default:
161 break;
162 }
163 return false;
164 }
165
166
167
168 ///////////////////////////////////////////////////////////////////////////////
169 // G4_SendDescLdSt implementations
170 ///////////////////////////////////////////////////////////////////////////////
G4_SendDescLdSt(SFID sfid,LdStOp _op,G4_ExecSize _execSize,AddrType at,int _addrBits,int _addrDims,int elemBitsMem,int elemBitsReg,int elemsPerAddr,LdStOrder _order,Caching _l1,Caching _l3,G4_Operand * surf,ImmOff _immOff,LdStAttrs _attrs)171 G4_SendDescLdSt::G4_SendDescLdSt(
172 SFID sfid,
173 LdStOp _op,
174 G4_ExecSize _execSize,
175 //
176 // addr params
177 AddrType at, int _addrBits, int _addrDims,
178 //
179 // data params
180 int elemBitsMem, int elemBitsReg, int elemsPerAddr,
181 LdStOrder _order,
182 //
183 // ext info
184 Caching _l1, Caching _l3,
185 G4_Operand *surf,
186 ImmOff _immOff,
187 LdStAttrs _attrs)
188 : G4_SendDesc(G4_SendDesc::Kind::LDST, sfid, _execSize),
189 op(_op),
190 //
191 addrType(at), addrBits(_addrBits), addrDims(_addrDims),
192 //
193 elemBitsMem(elemBitsMem), elemBitsReg(elemBitsReg),
194 elemPerAddr(elemsPerAddr), order(_order),
195 l1(_l1), l3(_l3),
196 surface(surf), immOff(_immOff),
197 attrs(_attrs)
198 {
199 }
200
toExecSlots(const G4_SendDescLdSt & d)201 static size_t toExecSlots(const G4_SendDescLdSt &d)
202 {
203 int minExecSize = 8;
204 if (getGenxPlatform() >= TARGET_PLATFORM::GENX_PVC)
205 minExecSize = 16;
206 MUST_BE_TRUE(false, "TODO: needs to deal with half size LSC messages");
207 MUST_BE_TRUE(false, "TODO: need to deal with varying typed message sizes");
208 // e.g. deal with
209 // SIMD4 typed ...
210 // SIMD4 untyped...
211 // (or we make the descriptor creator just pass the right exec size in)
212 int execSlots = std::max((int)d.getExecSize(), minExecSize);
213 return (size_t)execSlots;
214 }
215
getSrc0LenBytes() const216 size_t G4_SendDescLdSt::getSrc0LenBytes() const
217 {
218 if (overrideSrc0LengthBytesValue >= 0) {
219 return (size_t)overrideSrc0LengthBytesValue;
220 }
221 switch (op) {
222 case LdStOp::LOAD_STRIDED:
223 case LdStOp::STORE_STRIDED:
224 return 8 + 4; // address field is 64b (even for A32) + pitch is 32b
225 case LdStOp::LOAD_BLOCK2D:
226 case LdStOp::STORE_BLOCK2D:
227 // [243:0] ~ 256b = 32B
228 return 32;
229 default:
230 break; // fallthrough to other logic
231 }
232 if (order == LdStOrder::SCALAR) {
233 // transpose messages send one address only
234 return elemPerAddr / 8;
235 } else {
236 MUST_BE_TRUE(false, "TODO: needs to deal with half HDC gunk");
237 MUST_BE_TRUE(false, "TODO: needs to deal with addrDims");
238 int execSlots = std::max((int)execSize, 16);
239 return (size_t)(execSlots * elemPerAddr / 8);
240 }
241 }
242
getSrc1LenBytes() const243 size_t G4_SendDescLdSt::getSrc1LenBytes() const
244 {
245 if (overrideSrc1LengthBytesValue >= 0) {
246 return (size_t)overrideSrc1LengthBytesValue;
247 }
248 if (order == LdStOrder::SCALAR) {
249 // transpose messages send one address only
250 return elemPerAddr / 8;
251 } else {
252 return toExecSlots(*this) * elemBitsReg;
253 }
254 MUST_BE_TRUE(false, "TODO: compute data bytes sent");
255 return (size_t)-1;
256 }
257
getDstLenBytes() const258 size_t G4_SendDescLdSt::getDstLenBytes() const
259 {
260 if (overrideDstLengthBytesValue >= 0) {
261 return (size_t)overrideDstLengthBytesValue;
262 }
263 MUST_BE_TRUE(false, "TODO: compute bytes received");
264 return (size_t)-1;
265 }
setCaching(Caching _l1,Caching _l3)266 void G4_SendDescLdSt::setCaching(Caching _l1, Caching _l3)
267 {
268 l1 = _l1;
269 l3 = _l3;
270 }
isSLM() const271 bool G4_SendDescLdSt::isSLM() const
272 {
273 if (getSFID() == SFID::SLM)
274 return true;
275 MUST_BE_TRUE(!isHDC(), "HDC SLM not supported (yet)");
276 return false;
277 }
278
getAccessType() const279 SendAccess G4_SendDescLdSt::getAccessType() const
280 {
281 if ((int(op) & int(LDST_LOAD_GROUP)) != 0)
282 return SendAccess::READ_ONLY;
283 else if ((int(op) & int(LDST_STORE_GROUP)) != 0)
284 return SendAccess::WRITE_ONLY;
285 else if ((int(op) & int(LDST_ATOMIC_GROUP)) != 0)
286 return hasAttrs(LdStAttrs::ATOMIC_RETURN) ?
287 SendAccess::READ_WRITE : SendAccess::WRITE_ONLY;
288
289 MUST_BE_TRUE(false, "unsupported op group");
290 return SendAccess::INVALID;
291 }
292
isAtomic() const293 bool G4_SendDescLdSt::isAtomic() const
294 {
295 return (int(op) & int(LDST_ATOMIC_GROUP)) != 0;
296 }
297
isTyped() const298 bool G4_SendDescLdSt::isTyped() const
299 {
300 if (getSFID() == SFID::TGM)
301 return true;
302 return false;
303 }
304
ToSymbol(vISA::SFID sfid)305 static std::string ToSymbol(vISA::SFID sfid)
306 {
307 switch (sfid) {
308 case SFID::UGM: return ".ugm";
309 case SFID::UGML: return ".ugml";
310 case SFID::SLM: return ".slm";
311 case SFID::TGM: return ".tgm";
312 // these aren't necessarily supported yet
313 case SFID::DP_DC0: return ".dc0";
314 case SFID::DP_DC1: return ".dc1";
315 case SFID::DP_DC2: return ".dc2";
316 case SFID::DP_CC: return ".dcro";
317 case SFID::URB: return ".urb";
318 // others not needed
319 default: break;
320 }
321 return ".?";
322 }
323
ToSymbolDataSize(int reg,int mem)324 static std::string ToSymbolDataSize(int reg, int mem)
325 {
326 if (reg == mem)
327 return "d" + std::to_string(reg);
328 return "d" + std::to_string(mem) + "a" + std::to_string(reg) ;
329 }
330
ToSymbol(AddrType at)331 static std::string ToSymbol(AddrType at)
332 {
333 switch (at) {
334 case AddrType::FLAT: return "";
335 case AddrType::BSS: return "bss";
336 case AddrType::SS: return "ss";
337 case AddrType::BTI: return "bti";
338 default: break;
339 }
340 return "?";
341 }
342
GetMnemonicPart(std::ostream & os,const G4_SendDescLdSt & m)343 static void GetMnemonicPart(std::ostream &os, const G4_SendDescLdSt &m)
344 {
345 os << ToSymbol(m.op);
346 os << "." << ::ToSymbol(m.getSFID());
347 os << "." << ToSymbolDataSize(m.elemBitsReg, m.elemBitsReg);
348 os << ".a" << std::to_string(m.addrBits);
349 os << ToSymbol(m.l1, m.l3);
350 }
351
str() const352 std::string G4_SendDescLdSt::str() const
353 {
354 std::stringstream ss;
355 str(ss);
356 return ss.str();
357 }
358
str(std::ostream & os) const359 void G4_SendDescLdSt::str(std::ostream &os) const
360 {
361 auto fmtHex =
362 [] (int64_t x) {
363 std::stringstream ss;
364 ss << "0x" << std::hex << std::uppercase << x;
365 return ss.str();
366 };
367
368 GetMnemonicPart(os, *this);
369 os << " ";
370 os << ::ToSymbol(addrType);
371 if (addrType != AddrType::FLAT) {
372 os << "[";
373 if (const G4_Operand *surf = getSurface()) {
374 if (surf->isImm()) {
375 os << fmtHex(surf->asImm()->getImm());
376 } else if (surf->isA0() ){
377 os << "a0.?"; // how to find subreg
378 } else {
379 os << "A?";
380 }
381 } else {
382 os << "???";
383 }
384 os << "]";
385 }
386 }
387
388
389 ///////////////////////////////////////////////////////////////////////////////
390 // G4_SendDescRaw implementations
391 ///////////////////////////////////////////////////////////////////////////////
G4_SendDescRaw(uint32_t fCtrl,uint32_t regs2rcv,uint32_t regs2snd,SFID fID,uint16_t extMsgLen,uint32_t extFCtrl,SendAccess access,G4_Operand * bti,G4_Operand * sti,IR_Builder & builder)392 G4_SendDescRaw::G4_SendDescRaw(
393 uint32_t fCtrl, uint32_t regs2rcv,
394 uint32_t regs2snd, SFID fID, uint16_t extMsgLen,
395 uint32_t extFCtrl, SendAccess access,
396 G4_Operand *bti, G4_Operand *sti,
397 IR_Builder& builder)
398 : G4_SendDesc(G4_SendDesc::Kind::RAW, fID)
399 {
400 // All unnamed bits should be passed with those control bits.
401 // Otherwise, need to be set individually.
402 desc.value = fCtrl;
403
404 desc.layout.rspLength = regs2rcv;
405 desc.layout.msgLength = regs2snd;
406
407 extDesc.value = 0;
408 extDesc.layout.funcID = SFIDtoInt(fID);
409 extDesc.layout.extMsgLength = extMsgLen;
410 extDesc.layout.extFuncCtrl = extFCtrl;
411
412 src1Len = extMsgLen; // [10:6]
413 eotAfterMessage = false; // [5]
414 sfid = fID;
415
416 accessType = access;
417 funcCtrlValid = true;
418
419 m_bti = bti;
420 m_sti = sti;
421
422 if (m_bti && m_bti->isImm())
423 {
424 setBindingTableIdx((unsigned)m_bti->asImm()->getInt());
425 }
426 if (m_sti && m_sti->isImm())
427 {
428 desc.value |= (((unsigned)m_sti->asImm()->getInt()) << 8); // [11:8]
429 }
430
431 uint32_t totalMaxLength = builder.getMaxSendMessageLength();
432 MUST_BE_TRUE(extDesc.layout.extMsgLength + desc.layout.msgLength < totalMaxLength,
433 "combined message length may not exceed the maximum");
434 }
435
G4_SendDescRaw(uint32_t descBits,uint32_t extDescBits,SendAccess access,G4_Operand * bti,G4_Operand * sti)436 G4_SendDescRaw::G4_SendDescRaw(
437 uint32_t descBits, uint32_t extDescBits,
438 SendAccess access,
439 G4_Operand *bti,
440 G4_Operand *sti)
441 : G4_SendDesc(G4_SendDesc::Kind::RAW,
442 intToSFID(extDescBits & 0xF)), // [3:0]
443 accessType(access), m_sti(sti), m_bti(bti), funcCtrlValid(true)
444 {
445 desc.value = descBits;
446 extDesc.value = extDescBits;
447 src1Len = (extDescBits >> 6) & 0x1F; // [10:6]
448 eotAfterMessage = extDesc.layout.eot; // [5]
449
450
451 if (bti && bti->isImm())
452 {
453 setBindingTableIdx((unsigned)bti->asImm()->getInt());
454 }
455 if (sti && sti->isImm())
456 {
457 desc.value |= (((unsigned)m_sti->asImm()->getInt()) << 8); // [11:8]
458 }
459 }
460
G4_SendDescRaw(SFID _sfid,uint32_t _desc,uint32_t _extDesc,int _src1Len,SendAccess access,G4_Operand * bti,bool isValidFuncCtrl)461 G4_SendDescRaw::G4_SendDescRaw(
462 SFID _sfid,
463 uint32_t _desc,
464 uint32_t _extDesc,
465 int _src1Len,
466 SendAccess access,
467 G4_Operand* bti,
468 bool isValidFuncCtrl)
469 : G4_SendDescRaw(_sfid, _desc, _extDesc, _src1Len, access, bti,
470 g4::SIMD_UNDEFINED, isValidFuncCtrl)
471 {}
472
G4_SendDescRaw(SFID _sfid,uint32_t _desc,uint32_t _extDesc,int _src1Len,SendAccess access,G4_Operand * bti,G4_ExecSize execSize,bool isValidFuncCtrl)473 G4_SendDescRaw::G4_SendDescRaw(
474 SFID _sfid,
475 uint32_t _desc,
476 uint32_t _extDesc,
477 int _src1Len,
478 SendAccess access,
479 G4_Operand *bti,
480 G4_ExecSize execSize,
481 bool isValidFuncCtrl)
482 : G4_SendDesc(G4_SendDesc::Kind::RAW, _sfid, execSize),
483 accessType(access), m_sti(nullptr), m_bti(bti), funcCtrlValid(isValidFuncCtrl)
484 {
485 isLscDescriptor =
486 _sfid == SFID::UGM || _sfid == SFID::UGML ||
487 _sfid == SFID::SLM || _sfid == SFID::TGM;
488
489 if (!isLscDescriptor && bti && bti->isImm()) {
490 setBindingTableIdx((unsigned)bti->asImm()->getInt());
491 }
492 // ensure ExDesc[10:6] also holds src1Len
493 // see the note above (other constructor) about DG2 descriptors and
494 // ExDesc[10:6]
495 _extDesc |= ((_src1Len & 0x1F) << 6);
496 desc.value = _desc;
497 extDesc.value = _extDesc;
498 src1Len = _src1Len;
499 eotAfterMessage = false;
500 }
501
getHdcMessageType() const502 uint32_t G4_SendDescRaw::getHdcMessageType() const
503 {
504 MUST_BE_TRUE(isHDC(),"not an HDC message");
505 return (desc.value >> 14) & 0x1F;
506 }
507
getLscAddrType() const508 LSC_ADDR_TYPE G4_SendDescRaw::getLscAddrType() const
509 {
510 MUST_BE_TRUE(isLscOp(), "must be LSC op");
511 const int LSC_ADDR_TYPE_OFFSET = 29;
512 const uint32_t LSC_ADDR_TYPE_MASK = 0x3;
513 const uint32_t rawDescBits = getDesc();
514 auto addrTypeBits = ((rawDescBits >> LSC_ADDR_TYPE_OFFSET) & LSC_ADDR_TYPE_MASK);
515 return LSC_ADDR_TYPE(addrTypeBits + 1);
516 }
517
getLscAddrSizeBytes() const518 int G4_SendDescRaw::getLscAddrSizeBytes() const
519 {
520 MUST_BE_TRUE(isLscOp(), "must be LSC op");
521 auto op = getLscOp();
522 switch (op) {
523 case LSC_LOAD:
524 case LSC_LOAD_STRIDED:
525 case LSC_LOAD_QUAD:
526 case LSC_STORE:
527 case LSC_STORE_STRIDED:
528 case LSC_STORE_QUAD:
529 break;
530 case LSC_LOAD_BLOCK2D:
531 case LSC_STORE_BLOCK2D:
532 return getSFID() == SFID::TGM ? 4 : 8;
533 default:
534 if (op < LSC_ATOMIC_IINC && op > LSC_ATOMIC_XOR) {
535 return 0;
536 }
537 }
538 // it's a good op with an AddrType field in [8:7]
539 switch ((getDesc() >> 7) & 0x3) {
540 case 1: return 2;
541 case 2: return 4;
542 case 3: return 8;
543 default: break;
544 }
545 return 0;
546 }
547
getLscDataOrder() const548 LSC_DATA_ORDER G4_SendDescRaw::getLscDataOrder() const
549 {
550 MUST_BE_TRUE(isLscOp(), "must be LSC op");
551 auto op = getLscOp();
552 if (op == LSC_LOAD_QUAD || op == LSC_STORE_QUAD)
553 return LSC_DATA_ORDER_NONTRANSPOSE;
554 if ((getDesc() >> 15) & 0x1) {
555 return LSC_DATA_ORDER_TRANSPOSE;
556 } else {
557 return LSC_DATA_ORDER_NONTRANSPOSE;
558 }
559 }
560
561
setEOT()562 void G4_SendDescRaw::setEOT() {
563 eotAfterMessage = true;
564
565 if (isLscOp())
566 return;
567
568 extDesc.layout.eot = true;
569 }
570
isHdcIntAtomicMessage(SFID funcID,uint16_t msgType)571 static bool isHdcIntAtomicMessage(SFID funcID, uint16_t msgType)
572 {
573 if (funcID != SFID::DP_DC1)
574 return false;
575
576 if (msgType == DC1_UNTYPED_ATOMIC || msgType == DC1_A64_ATOMIC)
577 {
578 return true;
579 }
580 if (getGenxPlatform() >= GENX_SKL)
581 {
582 if (msgType == DC1_TYPED_ATOMIC)
583 return true;
584 }
585 if (getPlatformGeneration(getGenxPlatform()) >= PlatformGen::XE)
586 {
587 if (msgType == DC1_TYPED_HALF_INTEGER_ATOMIC ||
588 msgType == DC1_TYPED_HALF_COUNTER_ATOMIC ||
589 msgType == DC1_UNTYPED_HALF_INTEGER_ATOMIC ||
590 msgType == DC1_A64_UNTYPED_HALF_INTEGER_ATOMIC)
591 return true;
592 }
593 return false;
594 }
595
isHdcFloatAtomicMessage(SFID funcID,uint16_t msgType)596 static bool isHdcFloatAtomicMessage(SFID funcID, uint16_t msgType)
597 {
598 if (funcID != SFID::DP_DC1)
599 return false;
600
601 if (getGenxPlatform() >= GENX_SKL)
602 {
603 if (msgType == DC1_UNTYPED_FLOAT_ATOMIC ||
604 msgType == DC1_A64_UNTYPED_FLOAT_ATOMIC)
605 return true;
606 }
607 if (getPlatformGeneration(getGenxPlatform()) >= PlatformGen::XE)
608 {
609 if (msgType == DC1_UNTYPED_HALF_FLOAT_ATOMIC ||
610 msgType == DC1_A64_UNTYPED_HALF_FLOAT_ATOMIC)
611 return true;
612 }
613 return false;
614 }
615
isAtomicMessage() const616 bool G4_SendDescRaw::isAtomicMessage() const
617 {
618 if (isLscOp() &&
619 (desc.value & 0x3F) >= LSC_ATOMIC_IINC &&
620 (desc.value & 0x3F) <= LSC_ATOMIC_XOR)
621 {
622 return true;
623 }
624
625 auto funcID = getSFID();
626 if (!isHDC())
627 return false; // guard getMessageType() on SFID without a message type
628 uint16_t msgType = getHdcMessageType();
629 return isHdcIntAtomicMessage(funcID,msgType) ||
630 isHdcFloatAtomicMessage(funcID,msgType);
631 }
632
getHdcAtomicOp() const633 uint16_t G4_SendDescRaw::getHdcAtomicOp() const
634 {
635 MUST_BE_TRUE(isHDC(), "must be HDC message");
636 MUST_BE_TRUE(isAtomicMessage(), "getting atomicOp from non-atomic message!");
637 uint32_t funcCtrl = getFuncCtrl();
638 if (isHdcIntAtomicMessage(getSFID(), getHdcMessageType()))
639 {
640 // bits: 11:8
641 return (uint16_t)((funcCtrl >> 8) & 0xF);
642 }
643
644 // must be float Atomic
645 // bits: 10:8
646 return (int16_t)((funcCtrl >> 8) & 0x7);
647 }
648
isSLMMessage() const649 bool G4_SendDescRaw::isSLMMessage() const
650 {
651 if (getSFID() == SFID::DP_DC2)
652 {
653 uint32_t msgType = getHdcMessageType();
654 if ((msgType == DC2_UNTYPED_SURFACE_WRITE || msgType == DC2_BYTE_SCATTERED_WRITE) &&
655 (getFuncCtrl() & 0x80))
656 {
657 return true;
658 }
659 }
660
661 if (getSFID() == SFID::DP_DC2 ||
662 getSFID() == SFID::DP_DC1 ||
663 getSFID() == SFID::DP_DC0)
664 {
665 if ((getDesc() & 0xFF) == SLMIndex)
666 {
667 return true;
668 }
669 }
670
671 if (m_bti && m_bti->isImm() && m_bti->asImm()->getInt() == SLMIndex)
672 {
673 return true;
674 }
675
676 return getSFID() == SFID::SLM;
677 }
678
679
ResponseLength() const680 uint16_t G4_SendDescRaw::ResponseLength() const
681 {
682 // the loadblock2DArray message may return up to 32 GRF.
683 // Since we don't have enough bits to encode 32, block2d creates an exception where 31 means 31 or 32 (HW detects).
684 // SW must know the actual size is 32 for data-flow/RA/SWSB to function correctly though.
685 // fortunately it doesn't look like 31 is a valid value for this message, we just treat 31 as 32
686 bool isLoadBlock2DArray = isLscOp() && getLscOp() == LSC_LOAD_BLOCK2D;
687 if (desc.layout.rspLength == 31 && isLoadBlock2DArray)
688 {
689 return 32;
690 }
691 return desc.layout.rspLength;
692 }
693
694
isHeaderPresent() const695 bool G4_SendDescRaw::isHeaderPresent() const {
696 if (isLscOp())
697 return false;
698
699 return desc.layout.headerPresent == 1;
700 }
701
setHeaderPresent(bool val)702 void G4_SendDescRaw::setHeaderPresent(bool val)
703 {
704 MUST_BE_TRUE(!isLscOp(), "LSC ops don't have headers");
705 desc.layout.headerPresent = val;
706 }
707
setBindingTableIdx(unsigned idx)708 void G4_SendDescRaw::setBindingTableIdx(unsigned idx)
709 {
710 if (isLscOp()) {
711 extDesc.value |= (idx << 24);
712 return;
713 }
714 desc.value |= idx;
715 }
716
getSamplerMessageType() const717 uint32_t G4_SendDescRaw::getSamplerMessageType() const
718 {
719 MUST_BE_TRUE(isSampler(), "wrong descriptor type for method");
720 return (getFuncCtrl() >> 12) & 0x1f;
721 }
722
is16BitInput() const723 bool G4_SendDescRaw::is16BitInput() const
724 {
725 MUST_BE_TRUE(!isLscOp(), "wrong descriptor type for method");
726 // TODO: could use this for LSC messages too potentially
727 return desc.layout.simdMode2 == 1;
728 }
729
is16BitReturn() const730 bool G4_SendDescRaw::is16BitReturn() const
731 {
732 MUST_BE_TRUE(!isLscOp(), "wrong descriptor type for method");
733 return desc.layout.returnFormat == 1;
734 }
735
isByteScatterRW() const736 bool G4_SendDescRaw::isByteScatterRW() const
737 {
738 auto funcID = getSFID();
739 switch (funcID) {
740 case SFID::DP_DC0:
741 switch (getHdcMessageType()) {
742 case DC_BYTE_SCATTERED_READ:
743 case DC_BYTE_SCATTERED_WRITE:
744 return true;
745 default:
746 break;
747 }
748 break;
749 case SFID::DP_DC1:
750 switch (getHdcMessageType()) {
751 case DC1_A64_SCATTERED_READ:
752 case DC1_A64_SCATTERED_WRITE:
753 return (getBlockSize() == 1);
754 default:
755 break;
756 }
757 break;
758 case SFID::DP_DC2:
759 switch (getHdcMessageType()) {
760 case DC2_A64_SCATTERED_READ:
761 case DC2_A64_SCATTERED_WRITE:
762 return (getBlockSize() == 1);
763 case DC2_BYTE_SCATTERED_READ:
764 case DC2_BYTE_SCATTERED_WRITE:
765 return true;
766 default:
767 break;
768 }
769 break;
770 default:
771 break;
772 }
773 return false;
774 }
775
isDWScatterRW() const776 bool G4_SendDescRaw::isDWScatterRW() const
777 {
778 auto funcID = getSFID();
779 switch (funcID) {
780 case SFID::DP_DC0:
781 switch (getHdcMessageType()) {
782 case DC_DWORD_SCATTERED_READ:
783 case DC_DWORD_SCATTERED_WRITE:
784 return true;
785 default:
786 break;
787 }
788 break;
789 case SFID::DP_DC1:
790 switch (getHdcMessageType()) {
791 case DC1_A64_SCATTERED_READ:
792 case DC1_A64_SCATTERED_WRITE:
793 return (getBlockSize() == 4);
794 default:
795 break;
796 }
797 break;
798 case SFID::DP_DC2:
799 switch (getHdcMessageType()) {
800 case DC2_A64_SCATTERED_READ:
801 case DC2_A64_SCATTERED_WRITE:
802 return (getBlockSize() == 4);
803 default:
804 break;
805 }
806 break;
807 default:
808 break;
809 }
810 return false;
811 }
812
isQWScatterRW() const813 bool G4_SendDescRaw::isQWScatterRW() const
814 {
815 auto funcID = getSFID();
816 switch (funcID) {
817 case SFID::DP_DC0:
818 switch (getHdcMessageType()) {
819 case DC_QWORD_SCATTERED_READ:
820 case DC_QWORD_SCATTERED_WRITE:
821 return true;
822 default:
823 break;
824 }
825 break;
826 case SFID::DP_DC1:
827 switch (getHdcMessageType()) {
828 case DC1_A64_SCATTERED_READ:
829 case DC1_A64_SCATTERED_WRITE:
830 return (getBlockSize() == 8);
831 default:
832 break;
833 }
834 break;
835 case SFID::DP_DC2:
836 switch (getHdcMessageType()) {
837 case DC2_A64_SCATTERED_READ:
838 case DC2_A64_SCATTERED_WRITE:
839 return (getBlockSize() == 8);
840 default:
841 break;
842 }
843 break;
844 default:
845 break;
846 }
847 return false;
848 }
849
isUntypedRW() const850 bool G4_SendDescRaw::isUntypedRW() const
851 {
852 auto funcID = getSFID();
853 switch (funcID) {
854 case SFID::DP_DC1:
855 switch (getHdcMessageType()) {
856 case DC1_UNTYPED_SURFACE_READ:
857 case DC1_UNTYPED_SURFACE_WRITE:
858 case DC1_A64_UNTYPED_SURFACE_READ:
859 case DC1_A64_UNTYPED_SURFACE_WRITE:
860 return true;
861 default:
862 break;
863 }
864 break;
865 case SFID::DP_DC2:
866 switch (getHdcMessageType()) {
867 case DC2_UNTYPED_SURFACE_READ:
868 case DC2_UNTYPED_SURFACE_WRITE:
869 case DC2_A64_UNTYPED_SURFACE_READ:
870 case DC2_A64_UNTYPED_SURFACE_WRITE:
871 return true;
872 default:
873 break;
874 }
875 break;
876 default:
877 break;
878 }
879 return false;
880 }
881
isA64Message() const882 bool G4_SendDescRaw::isA64Message() const
883 {
884 if (!isHDC()) {
885 return false;
886 }
887
888 uint32_t msgType = getHdcMessageType();
889 auto funcID = getSFID();
890 switch (funcID) {
891 case SFID::DP_DC1:
892 {
893 switch (msgType) {
894 default:
895 break;
896 case DC1_A64_SCATTERED_READ:
897 case DC1_A64_UNTYPED_SURFACE_READ:
898 case DC1_A64_ATOMIC:
899 case DC1_A64_BLOCK_READ :
900 case DC1_A64_BLOCK_WRITE:
901 case DC1_A64_UNTYPED_SURFACE_WRITE:
902 case DC1_A64_SCATTERED_WRITE:
903 case DC1_A64_UNTYPED_FLOAT_ATOMIC:
904 case DC1_A64_UNTYPED_HALF_INTEGER_ATOMIC:
905 case DC1_A64_UNTYPED_HALF_FLOAT_ATOMIC:
906 return true;
907 }
908 break;
909 }
910 case SFID::DP_DC2 :
911 {
912 switch (msgType) {
913 default:
914 break;
915 case DC2_A64_SCATTERED_READ:
916 case DC2_A64_UNTYPED_SURFACE_READ:
917 case DC2_A64_UNTYPED_SURFACE_WRITE:
918 case DC2_A64_SCATTERED_WRITE:
919 return true;
920 }
921 break;
922 }
923 default:
924 break;
925 }
926 return false;
927 }
928
getNumEnabledChannels(uint32_t chDisableBits)929 static int getNumEnabledChannels(uint32_t chDisableBits)
930 {
931 switch (chDisableBits)
932 {
933 case 0x7:
934 case 0xB:
935 case 0xD:
936 case 0xE: return 1;
937 case 0x3:
938 case 0x5:
939 case 0x6:
940 case 0x9:
941 case 0xA:
942 case 0xC: return 2;
943 case 0x1:
944 case 0x2:
945 case 0x4:
946 case 0x8: return 3;
947 case 0x0: return 4;
948 case 0xF: return 0;
949 default: MUST_BE_TRUE(false, "Illegal Channel Mask Number");
950 }
951 return 0;
952 }
953
954 #define MSG_BLOCK_SIZE_OFFSET 8
getEnabledChannelNum() const955 unsigned G4_SendDescRaw::getEnabledChannelNum() const
956 {
957 // TODO: should further scope this to typed/untyped
958 MUST_BE_TRUE(isHDC(), "message does not have field ChannelEnable");
959 uint32_t funcCtrl = getFuncCtrl();
960 return getNumEnabledChannels((funcCtrl >> MSG_BLOCK_SIZE_OFFSET) & 0xF);
961 }
962
getBlockNum() const963 unsigned G4_SendDescRaw::getBlockNum() const
964 {
965 MUST_BE_TRUE(isHDC(), "not an HDC message");
966
967 uint32_t funcCtrl = getFuncCtrl();
968
969 #define MSG_BLOCK_NUMBER_OFFSET 10
970 funcCtrl = (funcCtrl >> MSG_BLOCK_NUMBER_OFFSET) & 0x3;
971 switch (funcCtrl)
972 {
973 case SVM_BLOCK_NUM_1: return 1;
974 case SVM_BLOCK_NUM_2: return 2;
975 case SVM_BLOCK_NUM_4: return 4;
976 case SVM_BLOCK_NUM_8: return 8;
977 default: MUST_BE_TRUE(false, "Illegal SVM block number (should be 1, 2, 4, or 8).");
978 }
979
980 return 0;
981 }
982
getBlockSize() const983 unsigned G4_SendDescRaw::getBlockSize() const
984 {
985 MUST_BE_TRUE(isHDC(), "not an HDC message");
986
987 uint32_t funcCtrl = getFuncCtrl();
988
989 funcCtrl = (funcCtrl >> MSG_BLOCK_SIZE_OFFSET) & 0x3;
990 switch (funcCtrl)
991 {
992 case SVM_BLOCK_TYPE_BYTE: return 1;
993 case SVM_BLOCK_TYPE_DWORD: return 4;
994 case SVM_BLOCK_TYPE_QWORD: return 8;
995 default: MUST_BE_TRUE(false, "Illegal SVM block size (should be 1, 4, or 8).");
996 }
997 return 0;
998 }
999
isOwordLoad() const1000 bool G4_SendDescRaw::isOwordLoad() const
1001 {
1002 if (!isHDC()) {
1003 return false;
1004 }
1005 uint32_t funcCtrl = getFuncCtrl();
1006 auto funcID = getSFID();
1007 static int DC0_MSG_TYPE_OFFSET = 14;
1008 static int DC1_MSG_SUBTYPE_OFFSET = 12; // [31:12]
1009 uint16_t msgType = (funcCtrl >> DC0_MSG_TYPE_OFFSET) & 0x1F;
1010 uint16_t dc1MsgSubType = (funcCtrl >> DC1_MSG_SUBTYPE_OFFSET) & 0x3;
1011 // bits [18:14] are message type
1012 // (included 18 because that is set for scratch)
1013 static const uint32_t MSD0R_OWAB = 0x0; // DC0
1014 static const uint32_t MSD0R_OWB = 0x0; // DC0
1015 static const uint32_t MSD_CC_OWAB = 0x1; // DC_CC
1016 static const uint32_t MSD_CC_OWB = 0x0; // DC_CC
1017 static const uint32_t MSD1R_A64_OWB = 0x14; // DC1 A64 [13:12] == 1
1018 static const uint32_t MSD1R_A64_OWAB = 0x14; // DC1 A64 [13:12] == 0
1019 bool isDc0Owb = funcID == SFID::DP_DC0 && (msgType == MSD0R_OWAB || msgType == MSD0R_OWB);
1020 bool isCcOwb = funcID == SFID::DP_CC && (msgType == MSD_CC_OWAB || msgType == MSD_CC_OWB);
1021 bool isDc1A64Owb =
1022 funcID == SFID::DP_DC1 && (msgType == MSD1R_A64_OWB) &&
1023 // st==2, 3 don't have mappings that I can find, but just to be
1024 // safe force 0 or 1 (which are unalgined vs aligned)
1025 (dc1MsgSubType == 0 || dc1MsgSubType == 1);
1026 return isDc0Owb || isCcOwb || isDc1A64Owb;
1027 }
1028
getOwordsAccessed() const1029 unsigned G4_SendDescRaw::getOwordsAccessed() const
1030 {
1031 MUST_BE_TRUE(isOwordLoad(), "must be OWord message");
1032 // This encoding holds for the DP_DC0, DP_CC, and DP_DC1 (A64 block)
1033 // element count.
1034 auto owEnc = (getFuncCtrl() >> 8) & 0x7; // Desc[10:8] is OW count
1035 if (owEnc == 0) {
1036 return 1; // OW1L (low half of GRF)
1037 } else if (owEnc == 1) {
1038 // for OW1H (high half of GRF): treat as full 32B
1039 // (this control probably isn't ever be used and was removed in Xe)
1040 return 1;
1041 } else {
1042 // 2 = OW2, 3 == OW4, 4 == OW8, 5 == OW16
1043 return 2 << (owEnc - 2);
1044 }
1045 }
1046
isHdcTypedSurfaceWrite() const1047 bool G4_SendDescRaw::isHdcTypedSurfaceWrite() const
1048 {
1049 return isHDC() && getHdcMessageType() == DC1_TYPED_SURFACE_WRITE;
1050 }
1051
getDescription() const1052 std::string G4_SendDescRaw::getDescription() const
1053 {
1054 // Return plain text string of type of msg, ie "oword read", "oword write",
1055 // "media rd", etc.
1056 const G4_SendDescRaw* msgDesc = this;
1057 unsigned int category;
1058
1059 switch (msgDesc->getSFID())
1060 {
1061 case SFID::SAMPLER: return "sampler";
1062 case SFID::GATEWAY: return "gateway";
1063 case SFID::DP_DC2:
1064 switch (getHdcMessageType())
1065 {
1066 case DC2_UNTYPED_SURFACE_READ: return "scaled untyped surface read";
1067 case DC2_A64_SCATTERED_READ: return "scaled A64 scatter read";
1068 case DC2_A64_UNTYPED_SURFACE_READ: return "scaled A64 untyped surface read";
1069 case DC2_BYTE_SCATTERED_READ: return "scaled byte scattered read";
1070 case DC2_UNTYPED_SURFACE_WRITE: return "scaled untyped surface write";
1071 case DC2_A64_UNTYPED_SURFACE_WRITE: return "scaled A64 untyped surface write";
1072 case DC2_A64_SCATTERED_WRITE: return "scaled A64 scattered write";
1073 case DC2_BYTE_SCATTERED_WRITE: return "scaled byte scattede write";
1074 default: return "unrecognized DC2 message";
1075 }
1076 case SFID::DP_WRITE:
1077 switch ((getFuncCtrl() >> 14) & 0x1F)
1078 {
1079 case 0xc: return "render target write";
1080 case 0xd: return "render target read";
1081 default: return "unrecognized RT message";
1082 }
1083 break;
1084 case SFID::URB: return "urb";
1085 case SFID::SPAWNER: return "thread spawner";
1086 case SFID::VME: return "vme";
1087 case SFID::DP_CC:
1088 switch (getHdcMessageType())
1089 {
1090 case 0x0: return "oword block read";
1091 case 0x1: return "unaligned oword block read";
1092 case 0x2: return "oword dual block read";
1093 case 0x3: return "dword scattered read";
1094 default: return "unrecognized DCC message";
1095 }
1096 case SFID::DP_DC0:
1097 category = (msgDesc->getFuncCtrl() >> 18) & 0x1;
1098 if (category == 0)
1099 {
1100 // legacy data port
1101 bool hword = (msgDesc->getFuncCtrl() >> 13) & 0x1;
1102 switch (getHdcMessageType())
1103 {
1104 case 0x0: return hword ? "hword block read" : "oword block read";
1105 case 0x1: return hword ? "hword aligned block read" : "unaligned oword block read";
1106 case 0x2: return "oword dual block read";
1107 case 0x3: return "dword scattered read";
1108 case 0x4: return "byte scattered read";
1109 case 0x7: return "memory fence";
1110 case 0x8: return hword ? "hword block write" : "oword block write";
1111 case 0x9: return "hword aligned block write";
1112 case 0xa: return "oword dual block write";
1113 case 0xb: return "dword scattered write";
1114 case 0xc: return "byte scattered write";
1115 case 0x5: return "qword gather";
1116 case 0xd: return "qword scatter";
1117 default: return "unrecognized DC0 message";
1118 }
1119 }
1120 else
1121 {
1122 // scratch
1123 int bits = (msgDesc->getFuncCtrl() >> 17) & 0x1;
1124
1125 if (bits == 0)
1126 return "scratch read";
1127 else
1128 return "scratch write";
1129 }
1130 break;
1131 case SFID::DP_PI: return "dp_pi";
1132 case SFID::DP_DC1:
1133 switch (getHdcMessageType())
1134 {
1135 case 0x0: return "transpose read";
1136 case 0x1: return "untyped surface read";
1137 case 0x2: return "untyped atomic operation";
1138 case 0x3: return "untyped atomic operation simd4x2";
1139 case 0x4: return "media block read";
1140 case 0x5: return "typed surface read";
1141 case 0x6: return "typed atomic operation";
1142 case 0x7: return "typed atomic operation simd4x2";
1143 case 0x8: return "untyped atomic float add";
1144 case 0x9: return "untyped surface write";
1145 case 0xa: return "media block write (non-iecp)";
1146 case 0xb: return "atomic counter operation";
1147 case 0xc: return "atomic counter operation simd4x2";
1148 case 0xd: return "typed surface write";
1149 case 0x10: return "a64 gathering read";
1150 case 0x11: return "a64 untyped surface read";
1151 case 0x12: return "a64 untyped atomic operation";
1152 case 0x13: return "a64 untyped atomic operation simd4x2";
1153 case 0x14: return "a64 block read";
1154 case 0x15: return "a64 block write";
1155 case 0x18: return "a64 untyped atomic float add";
1156 case 0x19: return "a64 untyped surface write";
1157 case 0x1a: return "a64 scattered write";
1158 default: return "unrecognized DC1 message";
1159 }
1160 break;
1161 case SFID::CRE: return "cre";
1162 case SFID::SLM:
1163 case SFID::TGM:
1164 case SFID::UGM:
1165 case SFID::UGML:
1166 {
1167 LscOpInfo opInfo { };
1168 if (LscOpInfoFind((LSC_OP)(desc.value & 0x3F), opInfo)) { // Desc[5:0]
1169 return opInfo.mnemonic;
1170 } else {
1171 const char* invalid = "lsc (invalid operation)";
1172 return invalid;
1173 }
1174 }
1175 default: return "--";
1176 }
1177 return NULL;
1178 }
1179
getSrc0LenBytes() const1180 size_t G4_SendDescRaw::getSrc0LenBytes() const
1181 {
1182 return MessageLength() * (size_t)getGRFSize();
1183 }
1184
getDstLenBytes() const1185 size_t G4_SendDescRaw::getDstLenBytes() const
1186 {
1187 if (isScratchRW()) {
1188 return 32 * getScratchRWSize(); // HWords
1189 } else if (isOwordLoad()) {
1190 return 16 * getOwordsAccessed(); // OWords
1191 #if 0
1192 // Due to VMIT-9224, comment this out!
1193 // Use macro fo easy testing.
1194 } else if (isByteScatterRW() && isDataPortRead()) {
1195 assert(getExecSize() != g4::SIMD_UNDEFINED);
1196 uint16_t nbytes = getBlockNum();
1197 // assume 4 at least
1198 nbytes = (nbytes >= 4 ? nbytes : 4);
1199 size_t sz = nbytes * getExecSize();
1200 return sz;
1201 } else if (isDWScatterRW() && isDataPortRead()) {
1202 assert(getExecSize() != g4::SIMD_UNDEFINED);
1203 size_t sz = 4 * getBlockNum() * getExecSize();
1204 return sz;
1205 } else if (isQWScatterRW() && isDataPortRead()) {
1206 assert(getExecSize() != g4::SIMD_UNDEFINED);
1207 size_t sz = 8 * getBlockNum() * getExecSize();
1208 return sz;
1209 } else if (isUntypedRW() && isDataPortRead()) {
1210 assert(getExecSize() != g4::SIMD_UNDEFINED);
1211 size_t sz = 4 * getEnabledChannelNum() * getExecSize();
1212 return sz;
1213 #endif
1214 } else {
1215 // fallback to the raw GRF count
1216 return ResponseLength() * (size_t)getGRFSize();
1217 }
1218 }
1219
getSrc1LenBytes() const1220 size_t G4_SendDescRaw::getSrc1LenBytes() const
1221 {
1222 if (isScratchRW()) {
1223 return 32 * getScratchRWSize(); // HWords
1224 }
1225 // we could support OW store here, but no one seems to need that and
1226 // we are phasing this class out; so ignore it for now
1227
1228 return extMessageLength() * (size_t)getGRFSize();
1229 }
1230
isFence() const1231 bool G4_SendDescRaw::isFence() const
1232 {
1233 if (isLscOp())
1234 return (desc.value & 0x3F) == LSC_FENCE;
1235
1236 SFID sfid = getSFID();
1237 unsigned FC = getFuncCtrl();
1238
1239 // Memory Fence
1240 if (sfid == SFID::DP_DC0 && ((FC >> 14) & 0x1F) == DC_MEMORY_FENCE)
1241 {
1242 return true;
1243 }
1244
1245 // Sampler cache flush
1246 if (sfid == SFID::SAMPLER && ((FC >> 12) & 0x1F) == 0x1F)
1247 {
1248 return true;
1249 }
1250
1251 return false;
1252 }
isBarrier() const1253 bool G4_SendDescRaw::isBarrier() const
1254 {
1255 auto funcID = getSFID();
1256 uint32_t funcCtrl = getFuncCtrl();
1257 return funcID == SFID::GATEWAY && (funcCtrl & 0xFF) == 0x4;
1258 }
1259
getOffset() const1260 int G4_SendDescRaw::getOffset() const
1261 {
1262 if (isLscOp()) {
1263 MUST_BE_TRUE(false, "need to do some work here...");
1264 }
1265 if (isScratchRW())
1266 return getScratchRWOffset() * 32;
1267 return 0;
1268 }
1269
cachingToG4(LSC_CACHE_OPT co)1270 static Caching cachingToG4(LSC_CACHE_OPT co)
1271 {
1272 switch (co) {
1273 case LSC_CACHING_DEFAULT: return Caching::DF;
1274 case LSC_CACHING_CACHED: return Caching::CA;
1275 case LSC_CACHING_READINVALIDATE: return Caching::RI;
1276 case LSC_CACHING_WRITEBACK: return Caching::WB;
1277 case LSC_CACHING_UNCACHED: return Caching::UC;
1278 case LSC_CACHING_STREAMING: return Caching::ST;
1279 case LSC_CACHING_WRITETHROUGH: return Caching::WT;
1280 default: break;
1281 }
1282 return Caching::INVALID;
1283 }
1284
1285 // decode caching from Desc[19:17]
decodeCaching3(bool isLoad,uint32_t descBits)1286 static std::pair<Caching,Caching> decodeCaching3(
1287 bool isLoad, uint32_t descBits)
1288 {
1289 auto mk = [&](Caching l1IfLd, Caching l3IfLd,
1290 Caching l1IfStAt, Caching l3IfStAt)
1291 {
1292 return isLoad ?
1293 std::make_pair(l1IfLd, l3IfLd) :
1294 std::make_pair(l1IfStAt, l3IfStAt);
1295 };
1296
1297 // Decode caching field from in [19:17]
1298 uint32_t ccBits = (descBits >> 17) & 0x7;
1299 switch (ccBits) {
1300 case 0: return mk(
1301 Caching::DF, Caching::DF,
1302 Caching::DF, Caching::DF);
1303 case 1: return mk(
1304 Caching::UC, Caching::UC,
1305 Caching::UC, Caching::UC);
1306 case 2: return mk(
1307 Caching::UC, Caching::CA,
1308 Caching::UC, Caching::WB);
1309 case 3: return mk(
1310 Caching::CA, Caching::UC,
1311 Caching::WT, Caching::UC);
1312 case 4: return mk(
1313 Caching::CA, Caching::CA,
1314 Caching::WT, Caching::WB);
1315 case 5: return mk(
1316 Caching::ST, Caching::UC,
1317 Caching::ST, Caching::UC);
1318 case 6: return mk(
1319 Caching::ST, Caching::CA,
1320 Caching::ST, Caching::WB);
1321 case 7: return mk(
1322 Caching::RI, Caching::CA,
1323 Caching::WB, Caching::WB);
1324 }
1325 return std::make_pair(Caching::INVALID,Caching::INVALID);
1326 }
1327
1328
getCaching() const1329 std::pair<Caching,Caching> G4_SendDescRaw::getCaching() const {
1330 if (!isLscOp()) {
1331 return std::make_pair(Caching::INVALID, Caching::INVALID);
1332 }
1333 const auto opInfo = LscOpInfoGet(getLscOp());
1334 if (opInfo.isOther()) {
1335 return std::make_pair(Caching::INVALID, Caching::INVALID);
1336 }
1337
1338 auto ccPair =
1339 decodeCaching3(opInfo.isLoad(), getDesc());
1340 MUST_BE_TRUE(
1341 ccPair.first != Caching::INVALID &&
1342 ccPair.second != Caching::INVALID,
1343 "unexpected invalid caching options (corrupt descriptor?)");
1344 return ccPair;
1345 }
1346
1347
toVisaCachingOpt(Caching c)1348 static LSC_CACHE_OPT toVisaCachingOpt(Caching c) {
1349 switch (c) {
1350 case Caching::DF: return LSC_CACHING_DEFAULT;
1351 case Caching::UC: return LSC_CACHING_UNCACHED;
1352 case Caching::CA: return LSC_CACHING_CACHED;
1353 case Caching::WB: return LSC_CACHING_WRITEBACK;
1354 case Caching::WT: return LSC_CACHING_WRITETHROUGH;
1355 case Caching::ST: return LSC_CACHING_STREAMING;
1356 case Caching::RI: return LSC_CACHING_READINVALIDATE;
1357 default:
1358 MUST_BE_TRUE(false, "invalid cache option");
1359 return (LSC_CACHE_OPT)-1;
1360 }
1361 }
1362
setCaching(Caching l1,Caching l3)1363 void G4_SendDescRaw::setCaching(Caching l1, Caching l3)
1364 {
1365 if (!isLscOp()) {
1366 MUST_BE_TRUE(
1367 (l1 == Caching::INVALID && l3 == Caching::INVALID) ||
1368 (l1 == Caching::DF && l3 == Caching::DF),
1369 "invalid caching options for platform*SFID");
1370 }
1371 const auto opInfo = LscOpInfoGet(getLscOp());
1372 MUST_BE_TRUE(!opInfo.isOther(), "invalid LSC message kind for caching op");
1373 LSC_CACHE_OPTS visaCopts { };
1374 visaCopts.l1 = toVisaCachingOpt(l1);
1375 visaCopts.l3 = toVisaCachingOpt(l3);
1376
1377 uint32_t cacheEnc = 0;
1378 uint32_t fieldMask = (0x7 << 17);
1379 bool isBits17_19 = true;
1380 bool success =
1381 LscTryEncodeCacheOpts(opInfo, visaCopts, cacheEnc, isBits17_19);
1382 MUST_BE_TRUE(success, "failed to set caching options");
1383 desc.value &= ~fieldMask;
1384 desc.value |= cacheEnc;
1385 }
1386
isDc1OpTyped(uint32_t desc)1387 static bool isDc1OpTyped(uint32_t desc)
1388 {
1389 uint32_t mty = (desc >> 14) & 0x1F;
1390 switch (mty) {
1391 case DC1_TYPED_SURFACE_WRITE:
1392 case DC1_TYPED_SURFACE_READ:
1393 case DC1_TYPED_ATOMIC:
1394 case DC1_TYPED_HALF_INTEGER_ATOMIC:
1395 return true;
1396 default:
1397 break;
1398 }
1399 return false;
1400 }
1401
isTyped() const1402 bool G4_SendDescRaw::isTyped() const
1403 {
1404 return getSFID() == SFID::DP_DC1 && isDc1OpTyped(getDesc());
1405 }
1406
1407