1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Decoder.hpp"
10 #include "IGAToGEDTranslation.hpp"
11 #include "GEDToIGATranslation.hpp"
12 #include "../../asserts.hpp"
13 #include "../../strings.hpp"
14 #include "../../Frontend/Formatter.hpp"
15 #include "../../Frontend/IRToString.hpp"
16 #include "../../IR/Checker/IRChecker.hpp"
17 #include "../../IR/Messages.hpp"
18 #include "../../IR/SWSBSetter.hpp"
19 #include "../../MemManager/MemManager.hpp"
20
21 #include <sstream>
22 #include <cstring>
23
24
25
26 // Used to label expressions that need to be removed once GED is fixed
27 #define GED_WORKAROUND(X) (X)
28
29 using namespace ::iga;
30
DEFINE_GED_SOURCE_ACCESSORS_01(GED_ADDR_MODE,AddrMode)31 DEFINE_GED_SOURCE_ACCESSORS_01(GED_ADDR_MODE, AddrMode)
32
33 DEFINE_GED_SOURCE_ACCESSORS_012(GED_REG_FILE, RegFile)
34 DEFINE_GED_SOURCE_ACCESSORS_01(int32_t, AddrImm)
35 DEFINE_GED_SOURCE_ACCESSORS_01(uint32_t, AddrSubRegNum)
36 DEFINE_GED_SOURCE_ACCESSORS_01(uint32_t, Width)
37
38 // DEFINE_GED_SOURCE_ACCESSORS_INLINE_012(GED_REG_FILE, RegFile)
39 DEFINE_GED_SOURCE_ACCESSORS_012(GED_DATA_TYPE, DataType)
40 DEFINE_GED_SOURCE_ACCESSORS_012(uint32_t, RegNum)
41 DEFINE_GED_SOURCE_ACCESSORS_012(uint32_t, SubRegNum)
42 DEFINE_GED_SOURCE_ACCESSORS_012(GED_MATH_MACRO_EXT, MathMacroExt)
43 DEFINE_GED_SOURCE_ACCESSORS_012(uint32_t, VertStride)
44 DEFINE_GED_SOURCE_ACCESSORS_012(uint32_t, ChanSel)
45 DEFINE_GED_SOURCE_ACCESSORS_012(GED_REP_CTRL, RepCtrl)
46 DEFINE_GED_SOURCE_ACCESSORS_012(GED_SRC_MOD, SrcMod)
47 DEFINE_GED_SOURCE_ACCESSORS_012(uint32_t, HorzStride)
48
49 static void setImmValKind(Type t, ImmVal &val)
50 {
51 switch (t) {
52 case Type::B: val.kind = ImmVal::Kind::S8; break;
53 case Type::UB: val.kind = ImmVal::Kind::U8; break;
54 case Type::W: val.kind = ImmVal::Kind::S16; break;
55 case Type::UW: val.kind = ImmVal::Kind::U16; break;
56 case Type::D: val.kind = ImmVal::Kind::S32; break;
57 case Type::UD: val.kind = ImmVal::Kind::U32; break;
58 case Type::Q: val.kind = ImmVal::Kind::S64; break;
59 case Type::UQ: val.kind = ImmVal::Kind::U64; break;
60 case Type::HF: val.kind = ImmVal::Kind::F16; break;
61 case Type::F: val.kind = ImmVal::Kind::F32; break;
62 case Type::DF: val.kind = ImmVal::Kind::F64; break;
63 case Type::V: // fallthrough for the packed vector types
64 case Type::UV:
65 case Type::VF:
66 val.kind = ImmVal::Kind::U32;
67 break;
68 default:
69 break;
70 }
71 }
72
macroDefaultSourceRegion(int srcOpIx,const OpSpec & os,Platform p,ExecSize execSize)73 static Region macroDefaultSourceRegion(
74 int srcOpIx, const OpSpec &os, Platform p, ExecSize execSize)
75 {
76 if (os.hasImplicitSrcRegion(srcOpIx, execSize, true)) {
77 return os.implicitSrcRegion(srcOpIx, execSize, true);
78 } else if (srcOpIx == 2) {
79 return Region::SRCXX1;
80 } else {
81 if (p >= Platform::XE) {
82 return os.isTernary() ?
83 Region::SRC1X0 : // XE ternary packed region
84 Region::SRC110;
85 }
86 return os.isTernary() ?
87 Region::SRC2X1 :
88 Region::SRC441;
89 }
90 }
91
92
Decoder(const Model & model,ErrorHandler & errHandler)93 Decoder::Decoder(const Model &model, ErrorHandler &errHandler) :
94 GEDBitProcessor(model,errHandler),
95 m_gedModel(lowerPlatform(model.platform)),
96 m_SWSBEncodeMode(model.getSWSBEncodeMode()),
97 m_kernel(nullptr),
98 m_opSpec(nullptr),
99 m_binary(nullptr)
100 {
101 IGA_ASSERT(m_gedModel != GED_MODEL_INVALID, "invalid GED model");
102 }
103
decodeSWSB(Instruction * inst)104 void Decoder::decodeSWSB(Instruction* inst)
105 {
106 if (platform() >= Platform::XE) {
107 uint32_t swsbBits = 0;
108 if (inst->getOp() != Op::INVALID &&
109 inst->getOp() != Op::ILLEGAL)
110 {
111 GED_DECODE_RAW_TO(SWSB, swsbBits);
112 }
113 // must convert the raw encoding bits to our SWSB IR
114 SWSB::InstType instType = inst->getSWSBInstType(m_SWSBEncodeMode);
115 SWSB sw;
116 SWSB_STATUS status = sw.decode(swsbBits, m_SWSBEncodeMode, instType);
117
118 switch (status)
119 {
120 case SWSB_STATUS::SUCCESS:
121 break;
122 case SWSB_STATUS::ERROR_SET_ON_VARIABLE_LENGTH_ONLY:
123 errorT("SBID set is only allowed on variable latency ops");
124 break;
125 case SWSB_STATUS::ERROR_INVALID_SBID_VALUE:
126 errorT("invalid SBID value 0x%x", swsbBits);
127 break;
128 case SWSB_STATUS::ERROR_ENCODE_MODE:
129 errorT("invalid encoding mode for platform");
130 break;
131 default:
132 errorT("unknown error decoding SBID value 0x%x", swsbBits);
133 break;
134 }
135 inst->setSWSB(sw);
136 }
137 }
138
decodeKernelBlocks(const void * binary,size_t binarySize)139 Kernel *Decoder::decodeKernelBlocks(
140 const void *binary,
141 size_t binarySize)
142 {
143 return decodeKernel(binary, binarySize, false);
144 }
145
146
decodeKernelNumeric(const void * binary,size_t binarySize)147 Kernel *Decoder::decodeKernelNumeric(
148 const void *binary,
149 size_t binarySize)
150 {
151 return decodeKernel(binary, binarySize, true);
152 }
153
isMacro() const154 bool Decoder::isMacro() const {
155 return
156 m_opSpec->is(Op::MADM) ||
157 (m_opSpec->is(Op::MATH) && IsMacro(m_subfunc.math));
158 }
159
decodeKernel(const void * binary,size_t binarySize,bool numericLabels)160 Kernel *Decoder::decodeKernel(
161 const void *binary,
162 size_t binarySize,
163 bool numericLabels)
164 {
165 m_binary = binary;
166 if (binarySize == 0) {
167 // edge case: empty kernel is okay
168 return new Kernel(m_model);
169 }
170 if (binarySize < 8) {
171 // bail if we don't have at least a compact instruction
172 errorT("binary size is too small");
173 return nullptr;
174 }
175 Kernel *kernel = new Kernel(m_model);
176
177 InstList insts;
178 // NOTE: we could pre-allocate instruction list here
179 // (this would block allocate everything)
180 // insts.reserve(binarySize / 8 + 1);
181
182 // Pass 1. decode them all into Instruction objects
183 decodeInstructions(
184 *kernel,
185 binary,
186 binarySize,
187 insts);
188
189 if (numericLabels) {
190 Block *block = kernel->createBlock();
191 block->setPC(0);
192 block->setID(1);
193 for (Instruction *inst : insts) {
194 block->appendInstruction(inst);
195 }
196 kernel->appendBlock(block);
197 } else {
198 auto blockStarts = Block::inferBlocks(
199 errorHandler(),
200 kernel->getMemManager(),
201 insts);
202 int id = 1;
203 for (auto bitr : blockStarts) {
204 bitr.second->setID(id++);
205 kernel->appendBlock(bitr.second);
206 }
207 }
208 return kernel;
209 }
210
decodeSubfunction()211 Subfunction Decoder::decodeSubfunction()
212 {
213 Subfunction sf = InvalidFC::INVALID;
214
215 switch (m_opSpec->op) {
216 case Op::IF:
217 case Op::ELSE:
218 case Op::GOTO:
219 {
220 BranchCntrl bc = BranchCntrl::OFF;
221 if (m_opSpec->supportsBranchCtrl()) {
222 // HSW doesn't support this
223 GED_DECODE_TO(BranchCtrl, translate, bc);
224 }
225 sf = bc;
226 break;
227 }
228 case Op::MATH: {
229 GED_DECODE(MathFC, GED_MATH_FC, mathFc, MathFC);
230 sf = mathFc;
231 if (mathFc == MathFC::INVALID) {
232 errorT("invalid MathFC");
233 }
234 break;
235 }
236 case Op::BFN: {
237 GED_DECODE_RAW(uint32_t, lut8, BfnFC);
238 sf = BfnFC((uint8_t)lut8);
239 break;
240 }
241 case Op::DPAS:
242 case Op::DPASW:
243 {
244 // GED splits this field up; we fuse it as a single subfunction
245 uint32_t systolicDepth;
246 GED_DECODE_RAW_TO_SRC(systolicDepth, uint32_t, SystolicDepth);
247 uint32_t repeatCount;
248 GED_DECODE_RAW_TO_SRC(repeatCount, uint32_t, RepeatCount);
249 sf = GetDpasFC(systolicDepth, repeatCount);
250 break;
251 }
252 case Op::SYNC: {
253 GED_DECODE(SyncFC, GED_SYNC_FC, syncFC, SyncFC);
254 sf = syncFC;
255 break;
256 }
257 case Op::SENDSC:
258 case Op::SENDS:
259 // handled in descriptor decoding
260 break;
261 case Op::SENDC:
262 case Op::SEND:
263 if (platform() >= Platform::XE) {
264 GED_DECODE(SFID, GED_SFID, sfid, SFID);
265 sf = sfid;
266 } // else handled in descriptor decoding
267 break;
268 default:
269 IGA_ASSERT(!m_opSpec->supportsSubfunction(),
270 "we need to decode a subfunction here");
271 sf = InvalidFC::INVALID;
272 break;
273 }
274 return sf;
275 }
276
decodeOpSpec(Op op)277 const OpSpec *Decoder::decodeOpSpec(Op op)
278 {
279 auto os = &m_model.lookupOpSpec(op);
280 return os;
281 }
282
283 // Pass 1. decode all instructions in Instruction*
decodeInstructions(Kernel & kernel,const void * binaryStart,size_t binarySize,InstList & insts)284 void Decoder::decodeInstructions(
285 Kernel &kernel,
286 const void *binaryStart,
287 size_t binarySize,
288 InstList &insts)
289 {
290 restart();
291 uint32_t nextId = 1;
292 const unsigned char *binary = (const unsigned char *)binaryStart;
293
294 int32_t bytesLeft = (int32_t)binarySize;
295 while (bytesLeft > 0)
296 {
297 // need at least 4 bytes to check compaction control
298 if (bytesLeft < 4) {
299 warningT("unexpected padding at end of kernel");
300 break;
301 }
302 // ensure there's enough buffer left
303 int32_t iLen = getBitField(COMPACTION_CONTROL,1) != 0 ?
304 COMPACTED_SIZE :
305 UNCOMPACTED_SIZE;
306 if (bytesLeft < iLen) {
307 warningT("unexpected padding at end of kernel");
308 break;
309 }
310 memset(&m_currGedInst, 0, sizeof(m_currGedInst));
311 GED_RETURN_VALUE status =
312 GED_DecodeIns(m_gedModel, binary, (uint32_t)binarySize, &m_currGedInst);
313 Instruction *inst = nullptr;
314 if (status == GED_RETURN_VALUE_NO_COMPACT_FORM) {
315 errorT("error decoding instruction (no compacted form)");
316 inst = createErrorInstruction(
317 kernel,
318 "unable to decompact",
319 binary,
320 iLen);
321 // fall through: GED can sort of decode some things here
322 } else if (status != GED_RETURN_VALUE_SUCCESS) {
323 errorT("error decoding instruction");
324 inst = createErrorInstruction(
325 kernel,
326 "GED error decoding instruction",
327 binary,
328 iLen);
329 } else {
330 const auto gedOp = GED_GetOpcode(&m_currGedInst);
331 const Op op = translate(gedOp);
332 m_opSpec = decodeOpSpec(op);
333 if (!m_opSpec->isValid()) {
334 // figure out if we failed to resolve the primary op
335 // or if it's an unmapped subfunction (e.g. math function)
336 auto os = m_model.lookupOpSpec(op);
337 std::stringstream ss;
338 ss << "0x" << std::hex << (unsigned)op <<
339 ": unsupported opcode on this platform";
340 std::string str = ss.str();
341 errorT(str);
342 inst = createErrorInstruction(
343 kernel,
344 str.c_str(),
345 binary,
346 iLen);
347 } else {
348 m_subfunc = decodeSubfunction();
349 try {
350 inst = decodeNextInstruction(kernel);
351 } catch (const FatalError &fe) {
352 // error is already logged
353 inst = createErrorInstruction(
354 kernel,
355 fe.what(),
356 binary,
357 iLen);
358 }
359 }
360 }
361 inst->setPC(currentPc());
362 inst->setID(nextId++);
363 inst->setLoc(currentPc());
364 insts.emplace_back(inst);
365 #if _DEBUG
366 if (!errorHandler().hasErrors()) {
367 // only validate if there weren't errors
368 inst->validate();
369 }
370 #endif
371 advancePc(iLen);
372 binary += iLen;
373 bytesLeft -= iLen;
374 }
375 }
376
decodeNextInstructionEpilog(Instruction * inst)377 void Decoder::decodeNextInstructionEpilog(Instruction *inst)
378 {
379 decodeSWSB(inst);
380 }
381
382 // Decodes a GED instruction to IGA IR and appends it to a given block
decodeNextInstruction(Kernel & kernel)383 Instruction *Decoder::decodeNextInstruction(Kernel &kernel)
384 {
385 Instruction *inst = nullptr;
386
387 switch (m_opSpec->format)
388 {
389 case OpSpec::NULLARY:
390 if (m_opSpec->op == Op::ILLEGAL) {
391 inst = kernel.createIllegalInstruction();
392 } else if (m_opSpec->op == Op::NOP) {
393 inst = kernel.createNopInstruction();
394 } else {
395 std::stringstream ss;
396 ss << "at pc " << currentPc() << ": invalid operation format";
397 IGA_ASSERT_FALSE(ss.str().c_str());
398 return kernel.createIllegalInstruction();
399 }
400 break;
401 case OpSpec::BASIC_UNARY_REG:
402 case OpSpec::BASIC_UNARY_REGIMM:
403 case OpSpec::BASIC_BINARY_REG_IMM:
404 case OpSpec::BASIC_BINARY_REG_REG:
405 case OpSpec::BASIC_BINARY_REG_REGIMM:
406 case OpSpec::MATH_BINARY_REG_REGIMM:
407 inst = decodeBasicInstruction(kernel);
408 break;
409 case OpSpec::JUMP_UNARY_REG:
410 case OpSpec::JUMP_UNARY_IMM:
411 case OpSpec::JUMP_UNARY_REGIMM:
412 case OpSpec::JUMP_UNARY_CALL_REGIMM:
413 case OpSpec::JUMP_BINARY_BRC:
414 case OpSpec::JUMP_BINARY_IMM_IMM:
415 if (m_model.supportsSimplifiedBranches()) {
416 inst = decodeBranchSimplifiedInstruction(kernel);
417 } else {
418 inst = decodeBranchInstruction(kernel);
419 }
420 break;
421 case OpSpec::TERNARY_REGIMM_REG_REGIMM:
422 inst = decodeTernaryInstruction(kernel);
423 break;
424 case OpSpec::SEND_UNARY:
425 case OpSpec::SEND_BINARY:
426 inst = decodeSendInstruction(kernel);
427 break;
428 case OpSpec::SYNC_UNARY:
429 if (platform() < Platform::XE) {
430 inst = decodeWaitInstruction(kernel);
431 } else {
432 inst = decodeSyncInstruction(kernel);
433 }
434 break;
435 default: {
436 std::stringstream ss;
437 ss << "at pc " << currentPc() << ": invalid operation format\n";
438 ss << FormatOpBits(m_model, (const uint8_t*)m_binary + currentPc());
439
440 IGA_ASSERT_FALSE(ss.str().c_str());
441 return kernel.createIllegalInstruction();
442 } // default:
443 } // switch
444
445 decodeOptions(inst);
446 decodeNextInstructionEpilog(inst);
447
448 return inst;
449 }
450
hasImm64Src0Overlap()451 bool Decoder::hasImm64Src0Overlap()
452 {
453 if (platform() < Platform::XE)
454 return false;
455
456 // SWSB overlaps the flag modifier with src0
457 // check if it's 64 bit imm
458 GED_REG_FILE regFile = decodeSrcRegFile<SourceIndex::SRC0>();
459 Type t = decodeSrcType<SourceIndex::SRC0>();
460 return (TypeIs64b(t) && (regFile == GED_REG_FILE_IMM));
461 }
462
463 ///////////////////////////////////////////////////////////////////////
464 // BASIC INSTRUCTIONS
465 ///////////////////////////////////////////////////////////////////////
466
decodeBasicInstruction(Kernel & kernel)467 Instruction *Decoder::decodeBasicInstruction(Kernel &kernel)
468 {
469 FlagRegInfo fri = decodeFlagRegInfo(hasImm64Src0Overlap());
470 Instruction *inst = kernel.createBasicInstruction(
471 *m_opSpec,
472 fri.pred,
473 fri.reg,
474 decodeExecSize(),
475 decodeChannelOffset(),
476 decodeMaskCtrl(),
477 fri.modifier,
478 m_subfunc);
479
480 GED_ACCESS_MODE accessMode = decodeAccessMode();
481 if (m_opSpec->supportsDestination()) {
482 decodeBasicDestination(inst, accessMode);
483 }
484 switch (m_opSpec->format) {
485 case OpSpec::BASIC_UNARY_REG:
486 case OpSpec::BASIC_UNARY_REGIMM:
487 decodeBasicUnaryInstruction(inst, accessMode);
488 break;
489 case OpSpec::BASIC_BINARY_REG_IMM:
490 case OpSpec::BASIC_BINARY_REG_REG:
491 case OpSpec::BASIC_BINARY_REG_REGIMM:
492 case OpSpec::MATH_BINARY_REG_REGIMM:
493 decodeSourceBasic<SourceIndex::SRC0>(inst, accessMode);
494 if (inst->getSourceCount() > 1) {
495 // math can have one or two ops
496 decodeSourceBasic<SourceIndex::SRC1>(inst, accessMode);
497 }
498 break;
499 default:
500 std::stringstream ss;
501 ss << "IGA INTERNAL ERROR: ";
502 ss << FormatOpBits(m_model, (const char *)m_binary + currentPc());
503 ss << ": unexpected format for basic instruction";
504 IGA_ASSERT_FALSE(ss.str().c_str());
505 errorT(ss.str());
506 inst = kernel.createIllegalInstruction();
507 }
508
509 return inst;
510 }
511
decodeBasicUnaryInstruction(Instruction * inst,GED_ACCESS_MODE accessMode)512 void Decoder::decodeBasicUnaryInstruction(
513 Instruction *inst, GED_ACCESS_MODE accessMode)
514 {
515 decodeSourceBasic<SourceIndex::SRC0>(inst, accessMode);
516 if (m_opSpec->op == Op::MOVI && platform() >= Platform::GEN10) {
517 // movi can takes two parameters on on this platform
518 // movi (..) reg reg (imm|null)
519 decodeSourceBasic<SourceIndex::SRC1>(inst, accessMode);
520 }
521 }
522
decodeBasicDestinationAlign16(Instruction * inst)523 void Decoder::decodeBasicDestinationAlign16(Instruction *inst)
524 {
525 GED_DECODE_RAW(GED_ADDR_MODE, addrMode, DstAddrMode);
526
527 DstModifier dstMod = DstModifier::NONE;
528 if (inst->getOpSpec().supportsSaturation()) {
529 GED_DECODE_RAW(GED_SATURATE, mod, Saturate);
530 dstMod = translate(mod);
531 }
532
533 GED_DECODE(Type, GED_DATA_TYPE, type, DstDataType);
534
535 switch (addrMode)
536 {
537 case GED_ADDR_MODE_Direct: {
538 DirRegOpInfo dri = decodeDstDirRegInfo();
539 if (inst->isMacro()) {
540 MathMacroExt MathMacroReg = decodeDestinationMathMacroRegFromChEn();
541 inst->setMacroDestination(dstMod,
542 dri.regName, dri.regRef, MathMacroReg, Region::Horz::HZ_1, type);
543 } else {
544 // normal Align16 destination
545 uint32_t hStride = 1;
546 GED_DECODE_RAW(GED_DST_CHAN_EN, chEn, DstChanEn);
547 if (dri.regName == RegName::ARF_MME &&
548 isAlign16MathMacroRegisterCsrPlatform())
549 {
550 // special access to acc2-acc9 via ChEn encoding
551 // (for context save and restore)
552 dri.regRef.regNum = (uint16_t)decodeDestinationRegNumAccBitsFromChEn();
553 } else if (chEn == GED_DST_CHAN_EN_xyzw) {
554 hStride = 1;
555 } else {
556 fatalT("dst: unsupported Align16 ChEn; only <1> (.xyzw) supported");
557 }
558
559 GED_DECODE_RAW(uint32_t, subRegNum, DstSubRegNum);
560 inst->setDirectDestination(
561 dstMod, dri.regName, dri.regRef,
562 translateRgnH(hStride), type);
563 }
564 break;
565 }
566 case GED_ADDR_MODE_Indirect: {
567 GED_DECODE_RAW(GED_DST_CHAN_EN, chEn, DstChanEn);
568 if (chEn == GED_DST_CHAN_EN_xyzw) {
569 warningT("converting unary/binary Align16 dst to equivalent Align1");
570 } else {
571 fatalT("unsupported Align16 Dst.ChEn (only .xyzw supported)");
572 }
573
574 GED_DECODE_RAW(int32_t, addrImm, DstAddrImm);
575 GED_DECODE_RAW(uint32_t, subRegNum, DstAddrSubRegNum);
576 RegRef a0(0u, subRegNum);
577 inst->setInidirectDestination(
578 dstMod, a0, (uint16_t)addrImm, Region::Horz::HZ_1, type);
579 break;
580 }
581 default:
582 fatalT("invalid addressing mode on dst");
583 break;
584 } // switch
585 }
586
decodeBasicDestinationAlign1(Instruction * inst)587 void Decoder::decodeBasicDestinationAlign1(Instruction *inst) {
588 GED_ADDR_MODE addrMode = GED_ADDR_MODE_Direct;
589 GED_DECODE_RAW(GED_ADDR_MODE, taddrMode, DstAddrMode);
590 addrMode = taddrMode;
591
592 DstModifier dstMod = DstModifier::NONE;
593 if (inst->getOpSpec().supportsSaturation()) {
594 GED_DECODE_RAW(GED_SATURATE, mod, Saturate);
595 dstMod = translate(mod);
596 }
597
598 GED_DECODE_RAW(uint32_t, hStride, DstHorzStride);
599 Region::Horz rgnHzDec = translateRgnH(hStride);
600 if (inst->getOpSpec().hasImplicitDstRegion(isMacro())) {
601 Region::Horz rgnHzImpl =
602 inst->getOpSpec().implicitDstRegion(isMacro()).getHz();
603 if (rgnHzImpl != rgnHzDec) {
604 warningT("dst has wrong region for binary normal form");
605 }
606 }
607
608 GED_DECODE(Type, GED_DATA_TYPE, type, DstDataType);
609
610 switch (addrMode)
611 {
612 case GED_ADDR_MODE_Direct: {
613 GED_DECODE_RAW(GED_REG_FILE, regFile, DstRegFile);
614 if (regFile != GED_REG_FILE_ARF && regFile != GED_REG_FILE_GRF) {
615 errorT("invalid reg file on dst");
616 }
617
618 DirRegOpInfo dri = decodeDstDirRegInfo();
619 if (inst->isMacro()) {
620 GED_DECODE(MathMacroExt, GED_MATH_MACRO_EXT, mme, DstMathMacroExt);
621 inst->setMacroDestination(
622 dstMod, dri.regName, dri.regRef, mme, rgnHzDec, type);
623 } else {
624 // normal Align1 destination
625 // it's a normal Align1 destination
626 GED_DECODE_RAW(uint32_t, subRegNum, DstSubRegNum);
627 inst->setDirectDestination(
628 dstMod, dri.regName, dri.regRef, rgnHzDec, type);
629 }
630 break;
631 }
632 case GED_ADDR_MODE_Indirect: {
633 GED_DECODE_RAW(int32_t, addrImm, DstAddrImm);
634 GED_DECODE_RAW(uint32_t, subRegNum, DstAddrSubRegNum);
635 RegRef a0(0u, subRegNum);
636 inst->setInidirectDestination(
637 dstMod, a0, (uint16_t)addrImm, rgnHzDec, type);
638 break;
639 }
640 default:
641 fatalT("invalid addressing mode on dst");
642 break;
643 } // switch
644 }
645
646
647
648 ///////////////////////////////////////////////////////////////////////
649 // TERNARY INSTRUCTIONS
650 ///////////////////////////////////////////////////////////////////////
decodeTernaryInstruction(Kernel & kernel)651 Instruction *Decoder::decodeTernaryInstruction(Kernel& kernel)
652 {
653 FlagRegInfo fri = decodeFlagRegInfo();
654 Instruction *inst = kernel.createBasicInstruction(
655 *m_opSpec,
656 fri.pred,
657 fri.reg,
658 decodeExecSize(),
659 decodeChannelOffset(),
660 decodeMaskCtrl(),
661 fri.modifier,
662 m_subfunc);
663
664 GED_ACCESS_MODE accessMode = decodeAccessMode();
665 decodeTernaryInstructionOperands(kernel, inst, accessMode);
666
667 return inst;
668 }
669
decodeTernaryInstructionOperands(Kernel & kernel,Instruction * inst,GED_ACCESS_MODE accessMode)670 void Decoder::decodeTernaryInstructionOperands(
671 Kernel& kernel, Instruction *inst, GED_ACCESS_MODE accessMode)
672 {
673 if (accessMode == GED_ACCESS_MODE_Align16) {
674 if (m_opSpec->supportsDestination()) {
675 decodeTernaryDestinationAlign16(inst);
676 }
677 decodeTernarySourceAlign16<SourceIndex::SRC0>(inst);
678 decodeTernarySourceAlign16<SourceIndex::SRC1>(inst);
679 decodeTernarySourceAlign16<SourceIndex::SRC2>(inst);
680 } else {
681 if (platform() >= Platform::GEN10) {
682 if (m_opSpec->supportsDestination()) {
683 decodeTernaryDestinationAlign1(inst);
684 }
685 decodeTernarySourceAlign1<SourceIndex::SRC0>(inst);
686 decodeTernarySourceAlign1<SourceIndex::SRC1>(inst);
687 decodeTernarySourceAlign1<SourceIndex::SRC2>(inst);
688 } else {
689 errorT("unexpected Align1 Ternary in current platform");
690 inst = kernel.createIllegalInstruction();
691 }
692 }
693 }
694
decodeTernaryDestinationAlign16(Instruction * inst)695 void Decoder::decodeTernaryDestinationAlign16(Instruction *inst)
696 {
697 GED_DECODE_RAW(uint32_t, regNumBits, DstRegNum);
698 DstModifier dstMod = DstModifier::NONE;
699 if (m_opSpec->supportsSaturation()) {
700 GED_DECODE_RAW(GED_SATURATE, mod, Saturate);
701 dstMod = translate(mod);
702 }
703 GED_DECODE(Type, GED_DATA_TYPE, type, DstDataType);
704 GED_DECODE_RAW(GED_REG_FILE, regFile, DstRegFile);
705 GED_DECODE_RAW(GED_DST_CHAN_EN, chEn, DstChanEn);
706
707 RegName regName = RegName::INVALID;
708 RegRef regRef;
709 decodeReg(-1, regFile, regNumBits, regName, regRef);
710
711 if (inst->isMacro()) {
712 MathMacroExt MathMacroReg = decodeDestinationMathMacroRegFromChEn();
713 inst->setMacroDestination(
714 dstMod, regName, regRef, MathMacroReg, Region::Horz::HZ_1, type);
715 } else {
716 // We have to translate Align16 ternary instructions to equivalent
717 // Align1 where posssible. The goal of these translations is to
718 // capture everything the IGC compiler generates. There will be
719 // valid Align16 sequences that we choose not to represent in Align1.
720 //
721 // CASES:
722 // SIMD1: illegal (hardware disallows this, we use a SIMD4 with ChEn to emulate)
723 // SIMD2: We accept .xy, .zw only if the type is :df. E.g.
724 // mad (2) r5.0.xy:df ... // means r5.0<1>
725 // mad (2) r5.0.zw:df ... // means r5.1<1>
726 // *** we convert the exec size to SIMD1 ***
727 // SIMD4: This can be a true SIMD4 or an emulation of a SIMD1 scalar.
728 // We decide based on the ChEn mask.
729 // .xyzw is a SIMD4, but .{x,y,z,w} means scalar SIMD1
730 // (since other lanes are masked out)
731 // *** we convert the exec size to SIMD1 for the scalar case ***
732 // I.e.
733 // mad (4) r5.0.xyzw:f ... // gets converted cleanly
734 // to
735 // mad (4) r5.0<1>:f
736 // but
737 // mad (4) r5.0.x:f ... {NoMask) // gets treated as scalar and
738 // translates to (W) mad (1) r5.0.x
739 //
740 // SIMD8, SIMD16: we only accept .xyzw and .r as packed and scalar
741 // this seems to capture everything used in practice
742 //
743 // NOTE: this is an appalling hack, but creates the least technical
744 // debt for the project. These problems all go away in GEN10 when
745 // we get Align1 ternary and Align16 fades into the sunset.
746 uint8_t subregOffAlign16Elems = 0; // in elements not bytes (add after conversion)
747 switch (inst->getExecSize()) {
748 case ExecSize::SIMD2:
749 if (chEn != GED_DST_CHAN_EN_xyzw) {
750 // this is a special case of below for DF and Q
751 inst->setExecSize(ExecSize::SIMD1);
752 if (chEn == GED_DST_CHAN_EN_xy && type == Type::DF) {
753 subregOffAlign16Elems = 0; // dst.k.xy:df => dst.(k+0)<1>:df
754 } else if (chEn == GED_DST_CHAN_EN_zw && type == Type::DF) {
755 subregOffAlign16Elems = 1; // dst.k.xy:df => dst.(k+1)<1>:df
756 } else {
757 errorT("unsupported Align16 ternary destination for SIMD2"
758 " (must be .xywz or .{xy,zw} for :df)");
759 }
760 }
761 break;
762 case ExecSize::SIMD4:
763 if (chEn != GED_DST_CHAN_EN_xyzw) {
764 // with Align16, we emulate a scalar (SIMD1) by masking out
765 // all, but one of the channels.
766 // we must translate a SIMD4
767 // mad (4) r5.0.x:f ... {NoMask}
768 // to the following
769 // (W) mad (1) r5.0<1>:f ...
770 // we have to twiddle the execution size here too
771 inst->setExecSize(ExecSize::SIMD1);
772 switch (chEn) {
773 case GED_DST_CHAN_EN_x:
774 subregOffAlign16Elems = 0; // subregister is already aligned
775 break;
776 case GED_DST_CHAN_EN_y:
777 subregOffAlign16Elems = 1; // dst.k.y => dst.(k+1)<1> (e.g. dst.4.y => dst.5<1>)
778 break;
779 case GED_DST_CHAN_EN_z:
780 subregOffAlign16Elems = 2; // dst.k.z => dst.(k+2)<1>
781 break;
782 case GED_DST_CHAN_EN_w:
783 subregOffAlign16Elems = 3; // dst.k.w => dst.(k+3)<1>
784 break;
785 default:
786 errorT("unsupported Align16 ternary destination for SIMD4"
787 " (must be .xywz or .{x,y,z,w})");
788 break;
789 }
790 } // else { it's an .xyzw ChEn: we can leave the subregister alone }
791 break;
792 case ExecSize::SIMD8:
793 case ExecSize::SIMD16:
794 case ExecSize::SIMD32: // can appear for things like :hf, :w, or :b
795 if (chEn != GED_DST_CHAN_EN_xyzw) {
796 errorT("unsupported Align16 ternary destination for SIMD{8,16}"
797 " (must be .xywz)");
798 }
799 // the access must already be aligned for .xyzw
800 break;
801 default:
802 // SIMD1 is illegal
803 errorT("unsupported Align16 ternary destination (unsupported SIMD)");
804 break;
805 }
806
807 GED_DECODE_RAW(uint32_t, subRegNumBytes, DstSubRegNum);
808 uint16_t subRegNumber =
809 type == Type::INVALID ? 0 :
810 (uint16_t)BinaryOffsetToSubReg(subRegNumBytes, regName, type, m_model.platform);
811 regRef.subRegNum = (uint16_t)(subRegNumber + subregOffAlign16Elems);
812 inst->setDirectDestination(
813 dstMod,
814 regName,
815 regRef,
816 Region::Horz::HZ_1,
817 type);
818 }
819 }
820
821 template <SourceIndex S>
decodeTernarySourceAlign16(Instruction * inst)822 void Decoder::decodeTernarySourceAlign16(Instruction *inst)
823 {
824 bool isMacro = inst->isMacro(); // madm or math.invm or math.rsqrt
825
826 if (!isMacro && m_model.supportsAlign16MacroOnly()) {
827 warningT("src", (int)S, ": converting Align16 to Align1 "
828 "(bits will re-assemble to Align1)");
829 }
830
831 SrcModifier srcMod = decodeSrcModifier<S>();
832
833 ///////////////////////////////////////////////////////////////////////////
834 // register name, number and and register file
835 // (will be GRF)
836 uint32_t regNum = decodeSrcRegNum<S>();
837
838 ///////////////////////////////////////////////////////////////////////////
839 // swizzling / region
840 GED_DATA_TYPE gedType;
841 GED_DECODE_RAW_TO(SrcDataType, gedType);
842 if (platform() >= Platform::GEN8LP &&
843 (gedType == GED_DATA_TYPE_f || gedType == GED_DATA_TYPE_hf) &&
844 S > SourceIndex::SRC0)
845 {
846 // CHV+ mixed mode
847 gedType = decodeSrcDataType<S>();
848 }
849 Type type = translate(gedType);
850
851 if (isMacro) {
852 MathMacroExt MathMacroReg = decodeSrcMathMacroReg<S>();
853 RegRef rr(regNum, 0u);
854 Region macroDftSrcRgn = macroDefaultSourceRegion(
855 (int)S, inst->getOpSpec(), platform(), inst->getExecSize());
856 inst->setMacroSource(
857 S,
858 srcMod,
859 RegName::GRF_R,
860 rr,
861 MathMacroReg,
862 macroDftSrcRgn,
863 type);
864 } else {
865 int subReg = type == Type::INVALID ?
866 0 : BinaryOffsetToSubReg(decodeSrcSubRegNum<S>(), RegName::GRF_R, type, m_model.platform);
867 RegRef reg = RegRef(regNum, (uint32_t)subReg);
868 Region rgn;
869 if (decodeSrcRepCtrl<S>() == GED_REP_CTRL_NoRep) {
870 GED_SWIZZLE swizzle[4];
871 decodeChSelToSwizzle(decodeSrcChanSel<S>(), swizzle);
872 bool isFullSwizzle = (swizzle[0] == GED_SWIZZLE_x && swizzle[1] == GED_SWIZZLE_y &&
873 swizzle[2] == GED_SWIZZLE_z && swizzle[3] == GED_SWIZZLE_w);
874
875 bool isXYSwizzle = (swizzle[0] == GED_SWIZZLE_x && swizzle[1] == GED_SWIZZLE_y &&
876 swizzle[2] == GED_SWIZZLE_x && swizzle[3] == GED_SWIZZLE_y);
877
878 bool isYZSwizzle = (swizzle[0] == GED_SWIZZLE_z && swizzle[1] == GED_SWIZZLE_w &&
879 swizzle[2] == GED_SWIZZLE_z && swizzle[3] == GED_SWIZZLE_w);
880
881 bool invalidSwizzle = false;
882 if (TypeIs64b(type)) {
883 invalidSwizzle = !isFullSwizzle && !isXYSwizzle && !isYZSwizzle;
884 } else {
885 invalidSwizzle = !isFullSwizzle;
886 }
887
888 if (invalidSwizzle) {
889 fatalT("unconvertible ternary align16 operand");
890 }
891
892 // mad (8) r46.0.xyzw:df r46.0.xyzw:df r50.0.xyzw:df r48.0.xyzw:df {Align16, Q1}
893 // mad (2) r5.0.xy:df r5.0.xyxy:df r92.2.xyxy:df r93.0.xyxy:df {Align16, Q1, NoMask}
894 // a HW hack for scalar operation on DF
895 if (type == Type::DF && (isXYSwizzle || isYZSwizzle)) {
896 if (S == SourceIndex::SRC2) {
897 rgn = Region::SRCXX0;
898 } else {
899 rgn = Region::SRC0X0;
900 }
901 if (isYZSwizzle) {
902 reg.subRegNum += 1;
903 }
904 } else {
905 // we accept r#.xyzw as r#<2;1>
906 if (S == SourceIndex::SRC2) {
907 rgn = Region::SRCXX1;
908 } else {
909 rgn = Region::SRC2X1;
910 }
911 }
912 } else {
913 // r#.r is the same as r#<0;0>
914 if (S == SourceIndex::SRC2) {
915 rgn = Region::SRCXX0;
916 } else {
917 rgn = Region::SRC0X0;
918 }
919 }
920 inst->setDirectSource(S, srcMod, RegName::GRF_R, reg, rgn, type);
921 }
922 }
923
ternaryDstOmitsHzStride(const OpSpec & os)924 static bool ternaryDstOmitsHzStride(const OpSpec &os)
925 {
926 if (os.isDpasFamily())
927 return true;
928
929 return false;
930 }
931
decodeTernaryDestinationAlign1(Instruction * inst)932 void Decoder::decodeTernaryDestinationAlign1(Instruction *inst)
933 {
934 const OpSpec &os = inst->getOpSpec();
935
936 DstModifier dstMod = DstModifier::NONE;
937 if (os.supportsSaturation()) {
938 GED_DECODE_RAW(GED_SATURATE, mod, Saturate);
939 dstMod = translate(mod);
940 }
941
942 DirRegOpInfo dri = decodeDstDirRegInfo();
943
944 if (inst->isMacro()) {
945 GED_DECODE(MathMacroExt, GED_MATH_MACRO_EXT, mme, DstMathMacroExt);
946 inst->setMacroDestination(
947 dstMod, dri.regName, dri.regRef, mme, Region::Horz::HZ_1, dri.type);
948 } else {
949 if (ternaryDstOmitsHzStride(inst->getOpSpec())) {
950 Region::Horz dftRgnHz = os.hasImplicitDstRegion(isMacro()) ?
951 os.implicitDstRegion(isMacro()).getHz() : Region::Horz::HZ_1;
952 inst->setDirectDestination(dstMod,
953 dri.regName,
954 dri.regRef,
955 dftRgnHz,
956 dri.type);
957 } else {
958 GED_DECODE_RAW(uint32_t, hStride, DstHorzStride);
959
960 inst->setDirectDestination(dstMod,
961 dri.regName,
962 dri.regRef,
963 translateRgnH(hStride),
964 dri.type);
965 }
966 }
967 }
968
969 template <SourceIndex S>
decodeSrcRegionTernaryAlign1(const OpSpec & os)970 Region Decoder::decodeSrcRegionTernaryAlign1(const OpSpec &os)
971 {
972 uint32_t rgnVt = static_cast<uint32_t>(Region::Vert::VT_INVALID);
973 bool hasRgnVt = S != SourceIndex::SRC2;
974 if (hasRgnVt) {
975 rgnVt = decodeSrcVertStride<S>();
976 }
977 //
978 uint32_t rgnHz = decodeSrcHorzStride<S>();
979 //
980 return transateGEDtoIGARegion(
981 rgnVt, static_cast<uint32_t>(Region::Width::WI_INVALID), rgnHz);
982 }
983
984 template <SourceIndex S>
decodeTernarySourceAlign1(Instruction * inst)985 void Decoder::decodeTernarySourceAlign1(Instruction *inst)
986 {
987 if (platform() < Platform::GEN10) {
988 fatalT("Align1 not available on this platform");
989 }
990
991 GED_REG_FILE regFile = decodeSrcRegFile<S>();
992 const OpSpec &os = inst->getOpSpec();
993 if (os.isDpasFamily()) {
994 // DPAS specific
995 // DPAS allowed src0 as null:
996 // When Src0 is specified as null, it is treated as an immediate value of +0
997 if (!(S == SourceIndex::SRC0 && regFile == GED_REG_FILE_ARF)) {
998 if (regFile != GED_REG_FILE_GRF) {
999 fatalT("invalid register file in src", (int)S);
1000 }
1001 }
1002
1003 RegRef regRef;
1004 RegName regName = decodeSourceReg<S>(regRef);
1005
1006 // only ARF_NULL is allowed at src0 if it's not GRF
1007 if (S == SourceIndex::SRC0 && regFile == GED_REG_FILE_ARF)
1008 if (regName != RegName::ARF_NULL)
1009 fatalT("non grf src0 register file must be null for this op");
1010
1011 Type ty = decodeSrcType<S>();
1012 if (S == SourceIndex::SRC1) {
1013 GED_DECODE_TO(Src1Precision, translate, ty);
1014 } else if (S == SourceIndex::SRC2) {
1015 GED_DECODE_TO(Src2Precision, translate, ty);
1016 } // else ty is valid already for dst/src0
1017 regRef.subRegNum =
1018 (uint16_t)BinaryOffsetToSubReg(regRef.subRegNum, regName, ty, m_model.platform);
1019 Region dftRgn = os.implicitSrcRegion(
1020 (int)S, inst->getExecSize(), isMacro());
1021 inst->setDirectSource(
1022 S,
1023 decodeSrcModifier<S>(),
1024 regName,
1025 regRef,
1026 dftRgn,
1027 ty);
1028 return;
1029 } // DPAS
1030
1031 // regular ternary align1 source operand
1032 if (regFile == GED_REG_FILE_IMM) {
1033 Type type = decodeSrcType<S>();
1034
1035 ImmVal val;
1036 if (platform() < Platform::GEN10) {
1037 val = decodeSrcImmVal(type);
1038 } else {
1039 val = decodeTernarySrcImmVal<S>(type);
1040 }
1041
1042 inst->setImmediateSource(S, val, type);
1043 } else if (regFile == GED_REG_FILE_GRF || regFile == GED_REG_FILE_ARF) {
1044 // addressing mode is always direct in Align1 ternary
1045 if (inst->isMacro()) {
1046 if (m_model.supportsAlign16ImplicitAcc()) {
1047 fatalT("src", (int)S, ": macro instructions must be Align16 "
1048 "for this platform");
1049 }
1050 RegRef regRef;
1051 RegName regName = decodeSourceReg<S>(regRef);
1052 Region macroDftSrcRgn = macroDefaultSourceRegion(
1053 (int)S, inst->getOpSpec(), platform(), inst->getExecSize());
1054 inst->setMacroSource(
1055 S,
1056 decodeSrcModifier<S>(),
1057 regName,
1058 regRef,
1059 decodeSrcMathMacroReg<S>(),
1060 macroDftSrcRgn,
1061 decodeSrcType<S>());
1062 } else {
1063 // normal access
1064 Region rgn = decodeSrcRegionTernaryAlign1<S>(inst->getOpSpec());
1065 DirRegOpInfo opInfo = decodeSrcDirRegOpInfo<S>();
1066 inst->setDirectSource(
1067 S,
1068 decodeSrcModifier<S>(),
1069 opInfo.regName,
1070 opInfo.regRef,
1071 rgn,
1072 opInfo.type);
1073 }
1074 } else { // GED_REG_FILE_INVALID
1075 fatalT("invalid register file in src", (int)S);
1076 }
1077 }
1078
1079
1080 ///////////////////////////////////////////////////////////////////////
1081 // SEND INSTRUCTIONS
1082 ///////////////////////////////////////////////////////////////////////
decodeSendExDesc()1083 SendDesc Decoder::decodeSendExDesc()
1084 {
1085 // ex_desc
1086 GED_REG_FILE exDescRegFile = GED_REG_FILE_IMM;
1087 if (m_opSpec->format & OpSpec::Format::SEND_BINARY) {
1088 // only sends/sendsc has ExDescRegFile
1089 GED_DECODE_RAW_TO(ExDescRegFile, exDescRegFile);
1090 }
1091
1092 SendDesc exDesc;
1093 if (exDescRegFile == GED_REG_FILE_IMM) {
1094 exDesc.type = SendDesc::Kind::IMM;
1095 GED_DECODE_RAW_TO(ExMsgDesc, exDesc.imm);
1096 } else {
1097 // For sends GED interprets SelReg32ExDesc and returns default values
1098 GED_DECODE_RAW(uint32_t, subRegNum, ExDescAddrSubRegNum);
1099 exDesc.type = SendDesc::Kind::REG32A;
1100 exDesc.reg.regNum = 0; // a0 is implied
1101 exDesc.reg.subRegNum = (uint16_t)(subRegNum / 2);
1102 }
1103 return exDesc;
1104 }
1105
decodeSendDesc()1106 SendDesc Decoder::decodeSendDesc()
1107 {
1108 GED_REG_FILE descRegFile = GED_REG_FILE_IMM;
1109 GED_DECODE_RAW_TO(DescRegFile, descRegFile);
1110 SendDesc desc;
1111 if (descRegFile == GED_REG_FILE_IMM) {
1112 desc.type = SendDesc::Kind::IMM;
1113 GED_DECODE_RAW_TO(MsgDesc, desc.imm);
1114 } else {
1115 // desc register is hardwired to a0.0 (ex-desc can vary)
1116 desc.type = SendDesc::Kind::REG32A;
1117 desc.reg.regNum = 0;
1118 desc.reg.subRegNum = 0;
1119 }
1120 return desc;
1121 }
1122
decodeMLenRlenFromDesc(const SendDesc & desc,int & src0Len,int & dstLen)1123 static void decodeMLenRlenFromDesc(
1124 const SendDesc &desc, int &src0Len, int &dstLen)
1125 {
1126 if (desc.isImm()) {
1127 src0Len = (int)(desc.imm >> 25) & 0xF;
1128 dstLen = (int)(desc.imm >> 20) & 0x1F;
1129 }
1130 }
1131
decodeSendInfoPreXe(SendDescodeInfo & sdi)1132 void Decoder::decodeSendInfoPreXe(SendDescodeInfo &sdi)
1133 {
1134 if (sdi.exDesc.isImm()) {
1135 // in <=GEN11, it's ExDesc[3:0]
1136 // if the extended descriptor is immediate, we can extract it
1137 // from that
1138 sdi.sfid = sfidFromEncoding(platform(), sdi.exDesc.imm);
1139 } else if (sdi.exDesc.isReg()) {
1140 // given <=GEN11 and reg exdesc
1141 sdi.sfid = SFID::A0REG;
1142 }
1143 if (sdi.exDesc.isImm()) {
1144 sdi.src1Len = (int)(sdi.exDesc.imm >> 6) & 0x1F;
1145 }
1146 decodeMLenRlenFromDesc(sdi.desc, sdi.src0Len, sdi.dstLen);
1147 }
decodeSendInfoXe(SendDescodeInfo & sdi)1148 void Decoder::decodeSendInfoXe(SendDescodeInfo &sdi)
1149 {
1150 sdi.sfid = m_subfunc.send;
1151 if (sdi.exDesc.isImm()) {
1152 sdi.src1Len = (int)(sdi.exDesc.imm >> 6) & 0x1F;
1153 }
1154 decodeMLenRlenFromDesc(sdi.desc, sdi.src0Len, sdi.dstLen);
1155 }
1156
decodeSendInfoXeHP(SendDescodeInfo & sdi)1157 void Decoder::decodeSendInfoXeHP(SendDescodeInfo &sdi)
1158 {
1159 sdi.sfid = m_subfunc.send;
1160 if (sdi.exDesc.isImm()) {
1161 sdi.src1Len = (int)(sdi.exDesc.imm >> 6) & 0x1F;
1162 }
1163 decodeMLenRlenFromDesc(sdi.desc, sdi.src0Len, sdi.dstLen);
1164
1165 if (sdi.exDesc.isReg()) {
1166 // if ExBSO is set, decode Src1Length and CPS
1167 GED_DECODE_RAW(uint32_t, exBSO, ExBSO);
1168 sdi.hasExBSO = exBSO != 0;
1169 if (sdi.hasExBSO) {
1170 GED_DECODE_RAW(uint32_t, cps, CPS);
1171 sdi.hasCps = cps != 0;
1172 GED_DECODE_RAW_TO(Src1Length, sdi.src1Len);
1173 }
1174 }
1175 }
1176
decodeSendInfoXeHPG(SendDescodeInfo & sdi)1177 void Decoder::decodeSendInfoXeHPG(SendDescodeInfo &sdi)
1178 {
1179 // This is exactly the same as XeHP except that:
1180 // - all immediate descriptors encode Src1Len in the EU bits
1181 decodeSendInfoXeHP(sdi);
1182 if (sdi.exDesc.isImm()) {
1183 // >=XeHPG all immediate descriptors also have Src1Length
1184 // clobber the value XeHP decoding set
1185 GED_DECODE_RAW_TO(Src1Length, sdi.src1Len);
1186 }
1187 }
1188
1189
decodeSendInstruction(Kernel & kernel)1190 Instruction *Decoder::decodeSendInstruction(Kernel& kernel)
1191 {
1192 SendDescodeInfo sdi;
1193 sdi.desc = decodeSendDesc();
1194 sdi.exDesc = decodeSendExDesc();
1195 if (platform() < Platform::XE) {
1196 decodeSendInfoPreXe(sdi);
1197 } else if (platform() == Platform::XE) {
1198 decodeSendInfoXe(sdi);
1199 } else if (platform() == Platform::XE_HP) {
1200 decodeSendInfoXeHP(sdi);
1201 } else if (platform() == Platform::XE_HPG ||
1202 platform() == Platform::XE_HPC)
1203 {
1204 decodeSendInfoXeHPG(sdi);
1205 } else {
1206 IGA_ASSERT_FALSE("unsupported platform");
1207 }
1208
1209 FlagRegInfo fri = decodeFlagRegInfo();
1210 Instruction *inst = kernel.createSendInstruction(
1211 *m_opSpec,
1212 sdi.sfid,
1213 fri.pred,
1214 fri.reg,
1215 decodeExecSize(),
1216 decodeChannelOffset(),
1217 decodeMaskCtrl(),
1218 sdi.exDesc,
1219 sdi.desc
1220 );
1221
1222 if ((m_opSpec->format & OpSpec::Format::SEND_BINARY) ==
1223 OpSpec::Format::SEND_BINARY)
1224 { // send is binary
1225 decodeSendDestination(inst);
1226 decodeSendSource0(inst);
1227 decodeSendSource1(inst);
1228 if (sdi.src1Len < 0 && inst->getSource(SourceIndex::SRC1).isNull()) {
1229 // if src1Len comes from a0.#[24:20], but src1 is null, then
1230 // we can still assume it's 0.
1231 sdi.src1Len = 0;
1232 }
1233 } else { // if (m_opSpec->isSendFamily()) {
1234 decodeSendDestination(inst);
1235 decodeSendSource0(inst);
1236 }
1237
1238 // No fusion in XeHPC+
1239 bool hasFusionCtrl = platform() >= Platform::XE && platform() < Platform::XE_HPC;
1240 if (hasFusionCtrl) {
1241 GED_FUSION_CTRL fusionCtrl = GED_FUSION_CTRL_Normal;
1242 GED_DECODE_RAW_TO(FusionCtrl, fusionCtrl);
1243 if (fusionCtrl == GED_FUSION_CTRL_Serialized) {
1244 inst->addInstOpt(InstOpt::SERIALIZE);
1245 }
1246 }
1247
1248 if (sdi.hasExBSO)
1249 inst->addInstOpt(InstOpt::EXBSO);
1250 if (sdi.hasCps)
1251 inst->addInstOpt(InstOpt::CPS);
1252
1253 // in case the operand lengths come from a seprate source
1254 if (inst->getSrc0Length() < 0)
1255 inst->setSrc0Length(sdi.src0Len);
1256 if (inst->getSrc1Length() < 0)
1257 inst->setSrc1Length(sdi.src1Len);
1258
1259 return inst;
1260 }
1261
decodeSendDestination(Instruction * inst)1262 void Decoder::decodeSendDestination(Instruction *inst)
1263 {
1264 GED_ACCESS_MODE accessMode = decodeAccessMode();
1265 GED_DECODE_RAW(GED_REG_FILE, regFile, DstRegFile);
1266 GED_ADDR_MODE addrMode = GED_ADDR_MODE_Direct;
1267
1268 if (platform() <= Platform::GEN11) {
1269 GED_DECODE_RAW_TO(DstAddrMode, addrMode);
1270 }
1271
1272 if (addrMode == GED_ADDR_MODE_Indirect) {
1273 if (regFile == GED_REG_FILE_GRF) {
1274 decodeBasicDestination(inst, accessMode);
1275 } else {
1276 errorT("error decoding instruction: SEND dst ARF");
1277 }
1278 } else {
1279 DirRegOpInfo dri = decodeDstDirRegInfo();
1280
1281 Region::Horz rgnHz = Region::Horz::HZ_1;
1282 if (m_opSpec->hasImplicitDstRegion(isMacro())) {
1283 rgnHz = m_opSpec->implicitDstRegion(isMacro()).getHz();
1284 }
1285
1286 inst->setDirectDestination(
1287 DstModifier::NONE,
1288 dri.regName,
1289 dri.regRef,
1290 rgnHz,
1291 dri.type);
1292 }
1293 }
1294
decodeSendSource0AddressMode()1295 GED_ADDR_MODE Decoder::decodeSendSource0AddressMode()
1296 {
1297 GED_ADDR_MODE addrMode = GED_ADDR_MODE_Direct;
1298 if (platform() <= Platform::GEN11) {
1299 addrMode = decodeSrcAddrMode<SourceIndex::SRC0>();
1300 }
1301 return addrMode;
1302 }
1303
decodeSendSource0(Instruction * inst)1304 void Decoder::decodeSendSource0(Instruction *inst)
1305 {
1306 GED_ACCESS_MODE accessMode = decodeAccessMode();
1307 GED_REG_FILE regFile = decodeSrcRegFile<SourceIndex::SRC0>();
1308
1309 GED_ADDR_MODE addrMode = decodeSendSource0AddressMode();
1310
1311 if (regFile == GED_REG_FILE_GRF && addrMode == GED_ADDR_MODE_Indirect) {
1312 decodeSourceBasic<SourceIndex::SRC0>(inst, accessMode);
1313 } else {
1314 DirRegOpInfo dri = decodeSrcDirRegOpInfo<SourceIndex::SRC0>();
1315
1316 Region rgn = inst->getOpSpec().implicitSrcRegion(
1317 0,
1318 inst->getExecSize(),
1319 isMacro());
1320 bool hasSrcRgnEncoding = inst->getOpSpec().isSendFamily()
1321 && platform() < Platform::GEN9;
1322
1323 hasSrcRgnEncoding &= platform() <= Platform::GEN11;
1324
1325 if (hasSrcRgnEncoding) {
1326 // these bits are implicitly set by GED on SKL, and they disallow access
1327 rgn = decodeSrcRegionVWH<SourceIndex::SRC0>();
1328 }
1329
1330 inst->setDirectSource(
1331 SourceIndex::SRC0,
1332 SrcModifier::NONE,
1333 dri.regName,
1334 dri.regRef,
1335 rgn,
1336 dri.type);
1337 }
1338 }
1339
1340
decodeSendSource1(Instruction * inst)1341 void Decoder::decodeSendSource1(Instruction *inst)
1342 {
1343 RegRef regRef;
1344 RegName regName = decodeSourceReg<SourceIndex::SRC1>(regRef);
1345 const OpSpec &os = inst->getOpSpec();
1346 Region rgn = os.implicitSrcRegion(1, inst->getExecSize(), isMacro());
1347 Type implSrcType = os.implicitSrcType(1, false);
1348 inst->setDirectSource(
1349 SourceIndex::SRC1,
1350 SrcModifier::NONE,
1351 regName,
1352 regRef,
1353 rgn,
1354 implSrcType);
1355 }
1356
1357
1358 ///////////////////////////////////////////////////////////////////////
1359 // BRANCH INSTRUCTIONS
1360 ///////////////////////////////////////////////////////////////////////
decodeBranchInstruction(Kernel & kernel)1361 Instruction *Decoder::decodeBranchInstruction(Kernel& kernel)
1362 {
1363 if (decodeAccessMode() == GED_ACCESS_MODE_Align16) {
1364 errorT("Align16 branches not supported");
1365 return kernel.createIllegalInstruction();
1366 }
1367
1368 FlagRegInfo fri = decodeFlagRegInfo();
1369 Instruction *inst = kernel.createBranchInstruction(
1370 *m_opSpec,
1371 fri.pred,
1372 fri.reg,
1373 decodeExecSize(),
1374 decodeChannelOffset(),
1375 decodeMaskCtrl(),
1376 m_subfunc);
1377
1378 if (m_opSpec->op == Op::JMPI) {
1379 // jmpi (1) JIP
1380 // is encoded as:
1381 // jmpi (1) ip ip JIP
1382 GED_REG_FILE regFile = GED_REG_FILE_INVALID;
1383
1384 GED_DECODE_RAW(GED_REG_FILE, regTFile, Src1RegFile);
1385 regFile = regTFile;
1386
1387 // TODO: make and use m_opSpec->hasImplicit{Source,Destination}()
1388 if (regFile != GED_REG_FILE_IMM) {
1389 if (m_model.supportsSrc1CtrlFlow()) {
1390 decodeSourceBasic<SourceIndex::SRC1>(
1391 inst, SourceIndex::SRC0, GED_ACCESS_MODE_Align1);
1392 } else {
1393 Region rgn = decodeSrcRegionVWH<SourceIndex::SRC0>();
1394 DirRegOpInfo opInfo = decodeSrcDirRegOpInfo<SourceIndex::SRC0>();
1395 inst->setDirectSource(
1396 SourceIndex::SRC0, SrcModifier::NONE, opInfo.regName, opInfo.regRef, rgn, opInfo.type);
1397 }
1398 } else {
1399 GED_DECODE_RAW(int32_t, jip, JIP);
1400 // jmpi is stored post-increment; normalize it to pre-increment
1401 jip += GED_InsSize(&m_currGedInst);
1402 Type dataType = Type::INVALID;
1403 if (m_model.supportsSrc1CtrlFlow()) {
1404 dataType = decodeSrcType<SourceIndex::SRC1>();
1405 } else {
1406 dataType = decodeSrcType<SourceIndex::SRC0>();
1407 }
1408 inst->setLabelSource(
1409 SourceIndex::SRC0,
1410 jip,
1411 dataType);
1412 }
1413 } else if (m_opSpec->op == Op::RET) {
1414 // ret encodes as:
1415 // ret (..) null src0
1416 // we leave then null implicit
1417 decodeSourceBasicAlign1<SourceIndex::SRC0>(inst);
1418 } else if (m_opSpec->op == Op::CALL || m_opSpec->op == Op::CALLA) {
1419 // calla (..) reg imm32
1420 // call (..) reg imm32
1421 // call (..) reg reg32
1422 //
1423 // call can take register or immediate (jip)
1424 // call stores register info in src1
1425 decodeBasicDestinationAlign1(inst);
1426 GED_REG_FILE regFile = GED_REG_FILE_INVALID;
1427 Type srcType = Type::INVALID;
1428
1429 GED_DECODE_RAW(GED_REG_FILE, regTFile, Src1RegFile);
1430 regFile = regTFile;
1431 srcType = decodeSrcType<SourceIndex::SRC1>();
1432
1433 if (regFile == GED_REG_FILE_IMM) {
1434 // calla (..) reg imm32
1435 // call (..) reg imm32
1436 decodeJipToSrc(inst,
1437 SourceIndex::SRC0,
1438 srcType);
1439 } else {
1440 // call (..) reg reg32
1441 decodeSourceBasicAlign1<SourceIndex::SRC1>(
1442 inst,
1443 SourceIndex::SRC0);
1444 }
1445 } else if (m_opSpec->op == Op::BRC || m_opSpec->op == Op::BRD) {
1446 // brc (..) lbl16 lbl16 [PreBDW]
1447 // brc (..) lbl32 lbl32 [BDW+]
1448 // brc (..) reg32 [PreHSW]
1449 // brc (..) reg64 [HSW]
1450 // brc (..) reg64 [BDW+]
1451 //
1452 // brd (..) imm16 [IVB,HSW]
1453 // brd (..) reg32 [IVB,HSW]
1454 // brd (..) lbl32 [BDW+]
1455 // brd (..) reg32 [BDW+]
1456 GED_DECODE_RAW(GED_REG_FILE, regFile, Src0RegFile);
1457 if (regFile == GED_REG_FILE_IMM) {
1458 Type type = decodeSrcType<SourceIndex::SRC0>();
1459 decodeJipToSrc(inst,
1460 SourceIndex::SRC0,
1461 type);
1462 if (m_opSpec->op == Op::BRC) {
1463 decodeUipToSrc1(inst, type);
1464 }
1465 } else {
1466 // register argument
1467 decodeSourceBasicAlign1<SourceIndex::SRC0>(inst);
1468 if (m_opSpec->op == Op::BRC) {
1469 // add an implicit null parameter
1470 inst->setSource(SourceIndex::SRC1, Operand::SRC_REG_NULL_UD);
1471 }
1472 }
1473 } else {
1474 // e.g. if, else, endif, while, cont, break, ...
1475 decodeJipToSrc(inst);
1476 if (m_opSpec->format != OpSpec::Format::JUMP_UNARY_IMM) {
1477 decodeUipToSrc1(inst, Type::INVALID);
1478 }
1479 }
1480
1481 return inst;
1482 }
1483
decodeBranchSimplifiedInstruction(Kernel & kernel)1484 Instruction *Decoder::decodeBranchSimplifiedInstruction(Kernel& kernel)
1485 {
1486 BranchCntrl branchCtrl = BranchCntrl::OFF;
1487 if (m_opSpec->supportsBranchCtrl()) {
1488 GED_DECODE_TO(BranchCtrl, translate, branchCtrl);
1489 }
1490 FlagRegInfo fri = decodeFlagRegInfo();
1491 Instruction *inst = kernel.createBranchInstruction(
1492 *m_opSpec,
1493 fri.pred,
1494 fri.reg,
1495 decodeExecSize(),
1496 decodeChannelOffset(),
1497 decodeMaskCtrl(),
1498 branchCtrl);
1499
1500 if (inst->getOpSpec().supportsDestination()) {
1501 decodeBranchDestination(inst);
1502 }
1503
1504 GED_DECODE_RAW(GED_REG_FILE, regFile, Src0RegFile);
1505 if (regFile != GED_REG_FILE_IMM) {
1506 Region rgn =
1507 m_opSpec->implicitSrcRegion(0, inst->getExecSize(), isMacro());
1508 DirRegOpInfo opInfo = decodeSrcDirRegOpInfo<SourceIndex::SRC0>();
1509 inst->setDirectSource(
1510 SourceIndex::SRC0,
1511 SrcModifier::NONE,
1512 opInfo.regName,
1513 opInfo.regRef,
1514 rgn,
1515 opInfo.type);
1516 } else {
1517 decodeJipToSrc(inst,
1518 SourceIndex::SRC0,
1519 m_opSpec->implicitSrcType(
1520 static_cast<int>(SourceIndex::SRC0), false));
1521 }
1522 // brc/brd read both UIP and JIP from one register (64-bits)
1523 bool isReg64 = ((m_opSpec->op == Op::BRC || m_opSpec->op == Op::BRD) &&
1524 regFile == GED_REG_FILE_GRF);
1525 bool isUnary = (m_opSpec->format & OpSpec::Format::UNARY) != 0;
1526 if (!isReg64 && !isUnary) {
1527 decodeUipToSrc1(inst, Type::INVALID);
1528 }
1529 return inst;
1530 }
1531
decodeBranchDestination(Instruction * inst)1532 void Decoder::decodeBranchDestination(Instruction *inst)
1533 {
1534 DirRegOpInfo dri = decodeDstDirRegInfo();
1535 Type dty = Type::UD;
1536 if (inst->getOpSpec().hasImplicitDstType()) {
1537 dty = m_opSpec->implicitDstType();
1538 }
1539 inst->setDirectDestination(
1540 DstModifier::NONE, dri.regName, dri.regRef, Region::Horz::HZ_1, dty);
1541 }
1542
1543 ///////////////////////////////////////////////////////////////////////
1544 // OTHER INSTRUCTIONS
1545 ///////////////////////////////////////////////////////////////////////
decodeWaitInstruction(Kernel & kernel)1546 Instruction *Decoder::decodeWaitInstruction(Kernel &kernel)
1547 {
1548 // wait encodes as
1549 // wait (..) nreg nreg null
1550 GED_ACCESS_MODE accessMode = decodeAccessMode();
1551 FlagRegInfo fri = decodeFlagRegInfo();
1552 Instruction *inst =
1553 kernel.createBasicInstruction(
1554 *m_opSpec,
1555 fri.pred,
1556 fri.reg,
1557 decodeExecSize(),
1558 decodeChannelOffset(),
1559 decodeMaskCtrl(),
1560 fri.modifier,
1561 m_subfunc);
1562 decodeSourceBasic<SourceIndex::SRC0>(inst, accessMode);
1563 return inst;
1564 }
1565
decodeSyncInstruction(Kernel & kernel)1566 Instruction *Decoder::decodeSyncInstruction(Kernel &kernel)
1567 {
1568 FlagRegInfo fri = decodeFlagRegInfo();
1569 Instruction *inst =
1570 kernel.createBasicInstruction(
1571 *m_opSpec,
1572 fri.pred,
1573 fri.reg,
1574 decodeExecSize(),
1575 decodeChannelOffset(),
1576 decodeMaskCtrl(),
1577 fri.modifier,
1578 m_subfunc);
1579 GED_REG_FILE regFile = decodeSrcRegFile<SourceIndex::SRC0>();
1580
1581 if (regFile == GED_REG_FILE_ARF) {
1582 // e.g.
1583 // sync.nop null
1584 // sync.allrd null
1585 // ...
1586 // Since XeHPC, sync.bar supports flag src0
1587 if (platform() >= Platform::XE_HPC) {
1588 decodeSourceBasic<SourceIndex::SRC0>(inst, GED_ACCESS_MODE_Align1);
1589 } else {
1590 inst->setSource(SourceIndex::SRC0, Operand::SRC_REG_NULL_UB);
1591 }
1592 } else {
1593 // e.g.
1594 // sync.allrd 0x15
1595 // ...
1596 decodeSourceBasic<SourceIndex::SRC0>(inst, GED_ACCESS_MODE_Align1);
1597 }
1598 return inst;
1599 }
1600
decodePredication()1601 Predication Decoder::decodePredication()
1602 {
1603 Predication pred = {PredCtrl::NONE, false};
1604 GED_DECODE_RAW(GED_PRED_CTRL, pc, PredCtrl);
1605 pred.function = translate(pc);
1606 return pred;
1607 }
1608
decodePredInv(Predication & pred)1609 void Decoder::decodePredInv(Predication& pred)
1610 {
1611 GED_DECODE_RAW(GED_PRED_INV, pi, PredInv);
1612 pred.inverse = (pi == GED_PRED_INV_Invert);
1613 }
1614
decodeMaskCtrl()1615 MaskCtrl Decoder::decodeMaskCtrl()
1616 {
1617 GED_DECODE(MaskCtrl, GED_MASK_CTRL, ctrl, MaskCtrl);
1618 return ctrl;
1619 }
1620
decodeFlagRegInfo(bool imm64Src0Overlaps)1621 FlagRegInfo Decoder::decodeFlagRegInfo(bool imm64Src0Overlaps) {
1622
1623 FlagRegInfo fri = {
1624 {PredCtrl::NONE, false},
1625 FlagModifier::NONE,
1626 REGREF_ZERO_ZERO};
1627 if (m_opSpec->supportsPredication()) {
1628 fri.pred = decodePredication();
1629 }
1630 if (m_opSpec->supportsFlagModifier() && !imm64Src0Overlaps) {
1631 // XE SWSB overlaps CondModifier and Imm64 values
1632 GED_DECODE_RAW(GED_COND_MODIFIER, condMod, CondModifier);
1633 fri.modifier = translate(condMod);
1634 } else if (m_opSpec->is(Op::MATH) && isMacro()) {
1635 // math.inv and math.rsqrtm both implicitly support EO
1636 // currently math is the only case, and its flagModifier must be EO
1637 fri.modifier = FlagModifier::EO;
1638 }
1639
1640 // For XeHPC PredIvn field only exists when
1641 // PredCtrl or CondCtrl (flag modifier) exits
1642 if (platform() >= Platform::XE_HPC) {
1643 if (fri.pred.function != PredCtrl::NONE ||
1644 fri.modifier != FlagModifier::NONE)
1645 decodePredInv(fri.pred);
1646 }
1647 else if (m_opSpec->supportsPredication())
1648 {
1649 decodePredInv(fri.pred);
1650 }
1651
1652 if (fri.pred.function != PredCtrl::NONE ||
1653 fri.modifier != FlagModifier::NONE)
1654 {
1655 GED_DECODE_RAW(uint32_t, flagRegNum, FlagRegNum);
1656 fri.reg.regNum = (uint16_t)flagRegNum;
1657 GED_DECODE_RAW(uint32_t, flagSubRegNum, FlagSubRegNum);
1658 fri.reg.subRegNum = (uint16_t)flagSubRegNum;
1659 }
1660
1661 return fri;
1662 }
1663
decodeExecSize()1664 ExecSize Decoder::decodeExecSize()
1665 {
1666 GED_DECODE_RAW(uint32_t, execSize, ExecSize);
1667 return translateExecSize(execSize);
1668 }
1669
decodeChannelOffset()1670 ChannelOffset Decoder::decodeChannelOffset()
1671 {
1672 if (m_opSpec->supportsQtrCtrl()) {
1673 GED_DECODE(ChannelOffset, GED_CHANNEL_OFFSET, em, ChannelOffset);
1674 return em;
1675 } else {
1676 return ChannelOffset::M0;
1677 }
1678 }
1679
decodeAccessMode()1680 GED_ACCESS_MODE Decoder::decodeAccessMode()
1681 {
1682 if (m_model.supportsAccessMode()) {
1683 GED_DECODE_RAW(GED_ACCESS_MODE, accessMode, AccessMode);
1684 return accessMode;
1685 }
1686 return GED_ACCESS_MODE_Align1;
1687 }
1688
decodeJipToSrc(Instruction * inst,SourceIndex s,Type type)1689 void Decoder::decodeJipToSrc(Instruction *inst, SourceIndex s, Type type) {
1690 inst->setLabelSource(s, decodeJip(), type);
1691 }
decodeUipToSrc1(Instruction * inst,Type type)1692 void Decoder::decodeUipToSrc1(Instruction *inst, Type type) {
1693 inst->setLabelSource(SourceIndex::SRC1, decodeUip(), type);
1694 }
1695
1696 // PreBDW JIP and UIP are in QWORDS in <GEN8 except for a few
1697 // exceptions for the above instructions
1698 #define PC_SCALE \
1699 ((platform() < Platform::GEN8 && \
1700 m_opSpec->op != Op::CALL && \
1701 m_opSpec->op != Op::CALLA && \
1702 m_opSpec->op != Op::JMPI) ? 8 : 1)
decodeJip()1703 int32_t Decoder::decodeJip() {
1704 GED_DECODE_RAW(int32_t, jip, JIP);
1705 return jip * PC_SCALE;
1706 }
decodeUip()1707 int32_t Decoder::decodeUip() {
1708 GED_DECODE_RAW(int32_t, uip, UIP);
1709 return uip * PC_SCALE;
1710 }
1711
decodeDestinationRegNumAccBitsFromChEn()1712 int Decoder::decodeDestinationRegNumAccBitsFromChEn()
1713 {
1714 // this is used by the math macro register (implicit accumulator access)
1715 // and for context save and restore access to those registers
1716 GED_DECODE_RAW(GED_DST_CHAN_EN, chEn, DstChanEn);
1717 switch (chEn) {
1718 case GED_DST_CHAN_EN_None: return 0; // 0000b => mme0 (acc2)
1719 case GED_DST_CHAN_EN_x: return 1; // 0001b => mme1 (acc3)
1720 case GED_DST_CHAN_EN_y: return 2; // 0010b => mme2 (acc4)
1721 case GED_DST_CHAN_EN_xy: return 3; // 0011b => mme3 (acc5)
1722 case GED_DST_CHAN_EN_z: return 4; // 0100b => mme4 (acc6)
1723 case GED_DST_CHAN_EN_xz: return 5; // 0101b => mme5 (acc7)
1724 case GED_DST_CHAN_EN_yz: return 6; // 0110b => mme6 (acc8)
1725 case GED_DST_CHAN_EN_xyz: return 7; // 0111b => mme7 (acc9)
1726 //
1727 // every thing else unreachable because this is an explicit operand
1728 // not an implicit math macro acc reference
1729 //
1730 case GED_DST_CHAN_EN_w: return 0; // 1000b => noacc
1731 //
1732 // HACK: for context save and restore, acc9 encodes as .xyzw
1733 // I think this is because of
1734 // mov(8) acc2:ud r103:ud {NoMask, Align16} //acc9
1735 // ^.xyzw implied
1736 // Seems like it should just be .xyz
1737 case GED_DST_CHAN_EN_xyzw: return 7; // 1111b => mme7 (acc9)
1738 default:
1739 errorT("dst: invalid math macro register (from ChEn)");
1740 return -1;
1741 }
1742 }
1743
1744
decodeDestinationMathMacroRegFromChEn()1745 MathMacroExt Decoder::decodeDestinationMathMacroRegFromChEn()
1746 {
1747 // this is used by the math macro register (implicit accumulator) access
1748 // and for context save and restore access to those registers
1749 GED_DECODE_RAW(GED_DST_CHAN_EN, chEn, DstChanEn);
1750 switch (chEn) {
1751 case GED_DST_CHAN_EN_None: return MathMacroExt::MME0; // 0000b => mme0 (acc2)
1752 case GED_DST_CHAN_EN_x: return MathMacroExt::MME1; // 0001b => mme1 (acc3)
1753 case GED_DST_CHAN_EN_y: return MathMacroExt::MME2; // 0010b => mme2 (acc4)
1754 case GED_DST_CHAN_EN_xy: return MathMacroExt::MME3; // 0011b => mme3 (acc5)
1755 case GED_DST_CHAN_EN_z: return MathMacroExt::MME4; // 0100b => mme4 (acc6)
1756 case GED_DST_CHAN_EN_xz: return MathMacroExt::MME5; // 0101b => mme5 (acc7)
1757 case GED_DST_CHAN_EN_yz: return MathMacroExt::MME6; // 0110b => mme6 (acc8)
1758 case GED_DST_CHAN_EN_xyz: return MathMacroExt::MME7; // 0111b => mme7 (acc9)
1759 case GED_DST_CHAN_EN_w: return MathMacroExt::NOMME; // 1000b => nomme (noacc)
1760 default:
1761 errorT("invalid dst implicit accumulator reference (in ChEn)");
1762 return MathMacroExt::INVALID;
1763 }
1764 }
1765
decodeDstDirSubRegNum(DirRegOpInfo & dri)1766 void Decoder::decodeDstDirSubRegNum(DirRegOpInfo& dri)
1767 {
1768 if (isMacro() || m_opSpec->isSendOrSendsFamily()) {
1769 dri.regRef.subRegNum = 0;
1770 } else {
1771 Type scalingType = dri.type;
1772 if (scalingType == Type::INVALID)
1773 scalingType = m_opSpec->isBranching() ? Type::D : Type::UB;
1774
1775 GED_DECODE_RAW(uint32_t, subRegNum, DstSubRegNum);
1776 dri.regRef.subRegNum =
1777 (uint16_t)BinaryOffsetToSubReg(subRegNum, dri.regName, scalingType, m_model.platform);
1778 }
1779 }
1780
decodeReg(int opIx,GED_REG_FILE regFile,uint32_t regNumBits,RegName & regName,RegRef & regRef)1781 void Decoder::decodeReg(
1782 int opIx,
1783 GED_REG_FILE regFile,
1784 uint32_t regNumBits,
1785 RegName ®Name,
1786 RegRef ®Ref) // works for src or dst
1787 {
1788 const char *opName =
1789 opIx == 0 ? "src0" :
1790 opIx == 1 ? "src1" :
1791 opIx == 2 ? "src2" :
1792 "dst";
1793 if (regFile == GED_REG_FILE_GRF) {
1794 regName = RegName::GRF_R;
1795 regRef.regNum = (uint16_t)regNumBits;
1796 } else if (regFile == GED_REG_FILE_ARF) { // ARF
1797 regName = RegName::INVALID;
1798 int arfRegNum = 0;
1799 const RegInfo *ri = m_model.lookupArfRegInfoByRegNum((uint8_t)regNumBits);
1800 if (ri == nullptr) {
1801 errorT(opName, ": ", iga::fmtHex(regNumBits, 2),
1802 ": invalid arf register");
1803 } else {
1804 regName = ri->regName;
1805 if (!ri->decode((uint8_t)regNumBits, arfRegNum)) {
1806 errorT(opName, ": ", ri->syntax, arfRegNum,
1807 ": invalid register number ");
1808 }
1809 }
1810 regRef.regNum = (uint16_t)arfRegNum;
1811 } else { // e.g. 10b
1812 errorT(opName, ": invalid register file");
1813 }
1814 }
1815
decodeDstDirRegInfo()1816 DirRegOpInfo Decoder::decodeDstDirRegInfo() {
1817 DirRegOpInfo dri;
1818 dri.type = m_opSpec->implicitDstType();
1819 bool hasDstType = true;
1820 if (platform() >= Platform::XE) {
1821 hasDstType &= !m_opSpec->isSendOrSendsFamily();
1822 hasDstType &= !m_opSpec->isBranching();
1823 }
1824 if (hasDstType) {
1825 dri.type = decodeDstType();
1826 }
1827
1828 GED_DECODE_RAW(GED_REG_FILE, gedRegFile, DstRegFile);
1829 GED_DECODE_RAW(uint32_t, regNumBits, DstRegNum);
1830 decodeReg(-1,gedRegFile,regNumBits,dri.regName,dri.regRef);
1831 decodeDstDirSubRegNum(dri);
1832
1833 return dri;
1834 }
1835
decodeDstType()1836 Type Decoder::decodeDstType() {
1837 GED_DECODE(Type, GED_DATA_TYPE, t, DstDataType);
1838 return t;
1839 }
1840
hasImplicitScalingType(Type & type,DirRegOpInfo & dri)1841 bool Decoder::hasImplicitScalingType(Type& type, DirRegOpInfo& dri)
1842 {
1843 // FIXME: when entering this function, assuming it MUST NOT be imm or label src
1844 if (platform() >= Platform::XE &&
1845 (m_opSpec->isSendFamily() || m_opSpec->isBranching()))
1846 {
1847 dri.type = m_opSpec->implicitSrcType(
1848 static_cast<int>(SourceIndex::SRC0), false);
1849 type = Type::D;
1850 return true;
1851 }
1852 return false;
1853 }
1854
decodeSrcImmVal(Type t)1855 ImmVal Decoder::decodeSrcImmVal(Type t) {
1856 ImmVal val;
1857 val.kind = ImmVal::Kind::UNDEF;
1858 memset(&val, 0, sizeof(val)); // zero value in case GED only sets bottom bits
1859
1860 GED_DECODE_RAW_TO(Imm, val.u64);
1861 setImmValKind(t, val);
1862 return val;
1863 }
1864
1865 template <SourceIndex S>
decodeSourceBasicAlign1(Instruction * inst,SourceIndex toSrcIxE)1866 void Decoder::decodeSourceBasicAlign1(
1867 Instruction *inst, SourceIndex toSrcIxE)
1868 {
1869 const int toSrcIx = static_cast<int>(toSrcIxE);
1870 GED_REG_FILE regFile = decodeSrcRegFile<S>();
1871 if (regFile == GED_REG_FILE_IMM) {
1872 // immediate operand
1873 Type type = decodeSrcType<S>();
1874 inst->setImmediateSource(toSrcIxE, decodeSrcImmVal(type), type);
1875 } else if (regFile == GED_REG_FILE_ARF || regFile == GED_REG_FILE_GRF) {
1876 // register operand
1877 GED_ADDR_MODE addrMode = GED_ADDR_MODE_Direct;
1878 addrMode = decodeSrcAddrMode<S>();
1879
1880 SrcModifier srcMod = decodeSrcModifier<S>();
1881 // region (implicit accumulator if Align16 and <GEN11)
1882 Region implRgn = Region::INVALID;
1883 if (inst->getOpSpec().hasImplicitSrcRegion(
1884 toSrcIx, inst->getExecSize(), isMacro()))
1885 {
1886 implRgn = inst->getOpSpec().implicitSrcRegion(
1887 toSrcIx, inst->getExecSize(), isMacro());
1888 }
1889 Region decRgn = Region::INVALID;
1890 if (m_opSpec->isSendOrSendsFamily()) {
1891 decRgn = implRgn;
1892 } else {
1893 decRgn = decodeSrcRegionVWH<S>();
1894 }
1895 // ensure the region matches any implicit region rules
1896 if (!m_opSpec->isSendOrSendsFamily() &&
1897 inst->getOpSpec().hasImplicitSrcRegion(
1898 toSrcIx, inst->getExecSize(), isMacro()))
1899 {
1900 if (implRgn != decRgn) {
1901 warningT("src", (int)S, ".Rgn should have ",
1902 ToSyntax(implRgn), " for binary normal form");
1903 }
1904 }
1905
1906 if (addrMode == GED_ADDR_MODE_Direct) {
1907 if (inst->isMacro()) {
1908 // GEN11 macros are stored in the subregister
1909 if (m_model.supportsAlign16()) {
1910 fatalT("src", (int)S, ": macro instructions must be "
1911 "Align16 for this platform");
1912 }
1913 MathMacroExt mme = decodeSrcMathMacroReg<S>();
1914 RegRef regRef {0,0};
1915 RegName regName = decodeSourceReg<S>(regRef);
1916 inst->setMacroSource(
1917 toSrcIxE, srcMod, regName, regRef, mme, decRgn, decodeSrcType<S>());
1918 } else {
1919 // normal access
1920 DirRegOpInfo opInfo = decodeSrcDirRegOpInfo<S>();
1921 inst->setDirectSource(
1922 toSrcIxE,
1923 srcMod,
1924 opInfo.regName,
1925 opInfo.regRef,
1926 decRgn,
1927 opInfo.type);
1928 }
1929 } else if (addrMode == GED_ADDR_MODE_Indirect) {
1930 RegRef a0(0u, decodeSrcAddrSubRegNum<S>());
1931 int16_t addrImm = (uint16_t)decodeSrcAddrImm<S>();
1932 inst->setInidirectSource(
1933 toSrcIxE,
1934 srcMod,
1935 RegName::GRF_R, // set to GRF for indirect register access
1936 a0,
1937 addrImm,
1938 decRgn,
1939 decodeSrcType<S>());
1940 } else { // == GED_ADDR_MODE_INVALID
1941 fatalT("invalid addressing mode in src", (int)S);
1942 }
1943 } else { // GED_REG_FILE_INVALID
1944 fatalT("invalid register file in src", (int)S);
1945 }
1946 }
1947
1948
1949 template <SourceIndex S>
decodeSourceBasicAlign16(Instruction * inst,SourceIndex toSrcIx)1950 void Decoder::decodeSourceBasicAlign16(
1951 Instruction *inst, SourceIndex toSrcIx)
1952 {
1953 GED_REG_FILE regFile = decodeSrcRegFile<S>();
1954 if (regFile == GED_REG_FILE_IMM) {
1955 // immediate operand
1956 Type type = decodeSrcType<S>();
1957 inst->setImmediateSource(toSrcIx, decodeSrcImmVal(type), type);
1958 } else if (regFile == GED_REG_FILE_ARF || regFile == GED_REG_FILE_GRF) {
1959 // register operand (direct or indirect)
1960 SrcModifier srcMod = decodeSrcModifier<S>();
1961
1962 // reg and subreg (if direct)
1963 GED_ADDR_MODE addrMode = decodeSrcAddrMode<S>();
1964
1965 // special context save/restore access to acc2-acc9
1966 uint32_t vs = decodeSrcVertStride<S>();
1967
1968 if (addrMode == GED_ADDR_MODE_Direct) {
1969 DirRegOpInfo opInfo = decodeSrcDirRegOpInfo<S>();
1970 if (inst->isMacro()) { // math macro operand (macro inst)
1971 if (!((vs == 2 && opInfo.type == Type::DF) ||
1972 (vs == 4 && opInfo.type != Type::DF)))
1973 {
1974 fatalT("src", (int)S, ": inconvertible align16 operand");
1975 }
1976 // <GEN11 macros are stored in swizzle bits
1977 MathMacroExt MathMacroReg = decodeSrcMathMacroReg<S>();
1978 Region macroDftSrcRgn = macroDefaultSourceRegion(
1979 (int)S,
1980 inst->getOpSpec(),
1981 platform(),
1982 inst->getExecSize());
1983 inst->setMacroSource(
1984 toSrcIx,
1985 srcMod,
1986 opInfo.regName,
1987 opInfo.regRef,
1988 MathMacroReg,
1989 macroDftSrcRgn,
1990 opInfo.type);
1991 } else {
1992 if (vs != 4) {
1993 fatalT("src", (int)S, ": inconvertible align16 operand");
1994 }
1995 Region rgn = Region::SRC110;
1996 if (opInfo.regName == RegName::ARF_MME &&
1997 isAlign16MathMacroRegisterCsrPlatform())
1998 {
1999 // GEN8-9: context save and restore of acc3-9 hack
2000 // (remember acc2 is Align1 and misses this path)
2001 // So if we are here, it'll look like we just
2002 // decoded "mme0" (acc2), but really we need to consult
2003 // ChSel for the real mme# (acc#+2).
2004 uint32_t chanSel = decodeSrcChanSel<S>() & 0xF;
2005 // We do have to strip off the top bits of ChSel since
2006 // it's really only ChSel[3:0].
2007 //
2008 // the ChSel[3:0] value is taken as interpreted as the
2009 // mme register (e.g. we used to map 0 to 2 for acc2)
2010 // mme's start at 0 (mme0 is acc2).
2011 opInfo.regRef.regNum = (uint8_t)chanSel;
2012 opInfo.regRef.subRegNum = 0;
2013 // In GEN10, this all gets cleaned up and acc3 really is
2014 // RegName[7:4] = 0101b ("acc")
2015 // RegName[3:0] = 0011b ("3")
2016 // (Which we map back to mme1.) Moreover, that'll be
2017 // Align1 code and this path won't be hit.
2018 } else {
2019 // conversion of some other Align16 (e.g math macros)
2020 if (isChanSelPacked<S>()) {
2021 fatalT("src", (int)S, ": inconvertible align16 operand");
2022 }
2023 }
2024 inst->setDirectSource(
2025 toSrcIx, srcMod, opInfo.regName, opInfo.regRef, rgn, opInfo.type);
2026 }
2027 } else if (addrMode == GED_ADDR_MODE_Indirect) {
2028 if (!isChanSelPacked<S>() && vs == 4) {
2029 fatalT("src", (int)S, ": inconvertible align16 operand");
2030 }
2031 int32_t subRegNum = decodeSrcAddrSubRegNum<S>();
2032 int32_t addrImm = decodeSrcAddrImm<S>();
2033 RegRef indReg = {0, (uint8_t)subRegNum};
2034 inst->setInidirectSource(
2035 toSrcIx, srcMod, RegName::GRF_R, indReg,
2036 (int16_t)addrImm, Region::SRC110, decodeSrcType<S>());
2037 } else {
2038 // == GED_ADDR_MODE_INVALID
2039 fatalT("src", (int)S, ": invalid addressing mode");
2040 }
2041 } else { // GED_REG_FILE_INVALID
2042 fatalT("invalid register file in src", (int)S);
2043 }
2044 }
2045
2046
decodeChSelToSwizzle(uint32_t chanSel,GED_SWIZZLE swizzle[4])2047 void Decoder::decodeChSelToSwizzle(uint32_t chanSel, GED_SWIZZLE swizzle[4])
2048 {
2049 GED_RETURN_VALUE status = GED_RETURN_VALUE_INVALID_FIELD;
2050
2051 swizzle[0] = GED_GetSwizzleX(chanSel, m_gedModel, &status);
2052 if (status != GED_RETURN_VALUE_SUCCESS) {
2053 fatalT("swizzle X could not be retrieved");
2054 }
2055 swizzle[1] = GED_GetSwizzleY(chanSel, m_gedModel, &status);
2056 if (status != GED_RETURN_VALUE_SUCCESS) {
2057 fatalT("swizzle Y could not be retrieved");
2058 }
2059 swizzle[2] = GED_GetSwizzleZ(chanSel, m_gedModel, &status);
2060 if (status != GED_RETURN_VALUE_SUCCESS) {
2061 fatalT("swizzle Z could not be retrieved");
2062 }
2063 swizzle[3] = GED_GetSwizzleW(chanSel, m_gedModel, &status);
2064 if (status != GED_RETURN_VALUE_SUCCESS) {
2065 fatalT("swizzle W could not be retrieved");
2066 }
2067 }
2068
2069 template <SourceIndex S>
isChanSelPacked()2070 bool Decoder::isChanSelPacked()
2071 {
2072 uint32_t chanSel = decodeSrcChanSel<S>();
2073 GED_SWIZZLE swizzle[4];
2074 decodeChSelToSwizzle(chanSel, swizzle);
2075 return swizzle[0] != GED_SWIZZLE_x && swizzle[1] != GED_SWIZZLE_y &&
2076 swizzle[2] != GED_SWIZZLE_z && swizzle[3] != GED_SWIZZLE_w;
2077 }
2078
decodeThreadOptions(Instruction * inst,GED_THREAD_CTRL trdCntrl)2079 void Decoder::decodeThreadOptions(Instruction *inst, GED_THREAD_CTRL trdCntrl)
2080 {
2081 switch (trdCntrl) {
2082 case GED_THREAD_CTRL_Atomic:
2083 inst->addInstOpt(InstOpt::ATOMIC);
2084 break;
2085 case GED_THREAD_CTRL_Switch:
2086 inst->addInstOpt(InstOpt::SWITCH);
2087 break;
2088 case GED_THREAD_CTRL_NoPreempt:
2089 inst->addInstOpt(InstOpt::NOPREEMPT);
2090 break;
2091 case GED_THREAD_CTRL_INVALID:
2092 default:
2093 break;
2094 }
2095 }
2096
2097 template <SourceIndex S> ImmVal
decodeTernarySrcImmVal(Type t)2098 Decoder::decodeTernarySrcImmVal(Type t)
2099 {
2100 ImmVal val;
2101 val.kind = ImmVal::Kind::UNDEF;
2102 memset(&val, 0, sizeof(val)); // zero value in case GED only sets bottom bits
2103
2104 if (S == SourceIndex::SRC0) {
2105 GED_DECODE_RAW_TO(Src0TernaryImm, val.u64);
2106 } else if (S == SourceIndex::SRC2) {
2107 GED_DECODE_RAW_TO(Src2TernaryImm, val.u64);
2108 } else {
2109 errorT("src1: no immediate supported here on ternary instruction");
2110 }
2111
2112 setImmValKind(t, val);
2113
2114 return val;
2115 }
2116
decodeOptions(Instruction * inst)2117 void Decoder::decodeOptions(Instruction *inst)
2118 {
2119 const OpSpec &os = inst->getOpSpec();
2120 if (os.supportsAccWrEn()) {
2121 // * GED doesn't allow AccWrEn on send's
2122 // * BrnchCtrl overlaps AccWrEn, so anything using that is out
2123 GED_ACC_WR_CTRL accWrEn = GED_ACC_WR_CTRL_Normal;
2124 GED_DECODE_RAW_TO(AccWrCtrl, accWrEn);
2125 if (accWrEn == GED_ACC_WR_CTRL_AccWrEn) {
2126 inst->addInstOpt(InstOpt::ACCWREN);
2127 }
2128 }
2129
2130 if (os.supportsDebugCtrl()) {
2131 GED_DEBUG_CTRL debugCtrl = GED_DEBUG_CTRL_Normal;
2132 GED_DECODE_RAW_TO(DebugCtrl, debugCtrl);
2133 if (debugCtrl == GED_DEBUG_CTRL_Breakpoint) {
2134 inst->addInstOpt(InstOpt::BREAKPOINT);
2135 }
2136 }
2137
2138 if (os.isSendOrSendsFamily()) {
2139 GED_EOT eot = GED_EOT_None;
2140 GED_DECODE_RAW_TO(EOT, eot);
2141 if (eot == GED_EOT_EOT) {
2142 inst->addInstOpt(InstOpt::EOT);
2143 }
2144 }
2145
2146 if (os.supportsDepCtrl()) {
2147 GED_DEP_CTRL dpCtrl = GED_DEP_CTRL_Normal;
2148 GED_DECODE_RAW_TO(DepCtrl, dpCtrl);
2149 if (dpCtrl == GED_DEP_CTRL_NoDDClr) {
2150 inst->addInstOpt(InstOpt::NODDCLR);
2151 } else if (dpCtrl == GED_DEP_CTRL_NoDDChk) {
2152 inst->addInstOpt(InstOpt::NODDCHK);
2153 } else if (dpCtrl == GED_DEP_CTRL_NoDDClr_NoDDChk) {
2154 inst->addInstOpt(InstOpt::NODDCLR);
2155 inst->addInstOpt(InstOpt::NODDCHK);
2156 }
2157 }
2158
2159 if (GED_WORKAROUND(
2160 /* really need to get GED to support ThrCtrl on GEN7-8 send's */
2161 (!os.isSendOrSendsFamily() && os.supportsThreadCtrl()) ||
2162 (os.isSendOrSendsFamily() && platform() >= Platform::GEN9)))
2163 {
2164 GED_THREAD_CTRL trdCntrl = GED_THREAD_CTRL_Normal;
2165 GED_DECODE_RAW_TO(ThreadCtrl, trdCntrl);
2166 decodeThreadOptions(inst, trdCntrl);
2167 }
2168
2169 if (m_model.supportNoSrcDepSet() &&
2170 os.isSendOrSendsFamily())
2171 {
2172 GED_NO_SRC_DEP_SET srcDep;
2173 GED_DECODE_RAW_TO(NoSrcDepSet, srcDep);
2174 if (srcDep == GED_NO_SRC_DEP_SET_NoSrcDepSet) {
2175 inst->addInstOpt(InstOpt::NOSRCDEPSET);
2176 }
2177 }
2178
2179 if (GED_IsCompact(&m_currGedInst)) {
2180 inst->addInstOpt(InstOpt::COMPACTED);
2181 }
2182 }
2183
2184
createErrorInstruction(Kernel & kernel,const char * message,const void * binary,int32_t iLen)2185 Instruction *Decoder::createErrorInstruction(
2186 Kernel& kernel,
2187 const char *message,
2188 const void *binary,
2189 int32_t iLen)
2190 {
2191 Instruction *inst = kernel.createIllegalInstruction();
2192
2193 std::stringstream ss;
2194 ss << FormatOpBits(m_model, binary);
2195 if (*message) {
2196 ss << ": " << message;
2197 }
2198 size_t bufLen = (size_t)ss.tellp() + 1;
2199 char *buf = (char *)kernel.getMemManager().alloc(bufLen);
2200 ss.read(buf, bufLen - 1);
2201 buf[bufLen - 1] = 0;
2202
2203 if (iLen == 8) {
2204 inst->addInstOpt(InstOpt::COMPACTED);
2205 }
2206 inst->setComment(buf);
2207 return inst;
2208 }
2209
2210
getBitField(int ix,int len) const2211 uint32_t Decoder::getBitField(int ix, int len) const {
2212 const uint32_t *ws = (const uint32_t *)((const char *)m_binary + currentPc());
2213 // shift is only well-defined for values <32, use 0xFFFFFFFF
2214 uint32_t mask = len >= 32 ? 0xFFFFFFFF : (1<<(uint32_t)len) - 1;
2215 IGA_ASSERT(len <= 32 && ((ix + len - 1)/32 == ix/32),
2216 "getBitField: bitfield spans DWord");
2217 return (ws[ix / 32] >> (ix % 32)) & mask;
2218 }
2219
2220
handleGedDecoderError(int line,const char * field,GED_RETURN_VALUE status)2221 void Decoder::handleGedDecoderError(
2222 int line,
2223 const char *field,
2224 GED_RETURN_VALUE status)
2225 {
2226 std::stringstream ss;
2227 ss << "GED reports ";
2228 if (status == GED_RETURN_VALUE_INVALID_VALUE) {
2229 // bad user bits -> report a warning
2230 ss << "invalid value";
2231 } else if (status == GED_RETURN_VALUE_INVALID_FIELD) {
2232 // our bad -> take it seriously
2233 ss << "invalid field";
2234 } else if (status != GED_RETURN_VALUE_SUCCESS) {
2235 // some other error -> our bad -> take it seriously and assert!
2236 ss << "error (" << (int)status << ")";
2237 }
2238 ss << " for field " << field << " (line " << line << ")\n";
2239 ss << FormatOpBits(m_model, (const char *)m_binary + currentPc());
2240 // std::cout << "pc[" << currentPc() << "] " << ss.str() << std::endl;
2241 if (status == GED_RETURN_VALUE_INVALID_VALUE) {
2242 // indicates something wrong with the bits given, but we can
2243 // continue trying to decode things
2244 errorT(ss.str());
2245 } else {
2246 // indicates IGA is totally wrong and we should probably bail out
2247 fatalT(ss.str());
2248 }
2249 }
2250
2251 // These template class member functions are not defined in header (only
2252 // declared in header), thus their definitions are not available to other
2253 // .cpp. We need to explicitly instantiate those template functions so
2254 // the other .cpp can reference them.
2255 template
2256 void Decoder::decodeSourceBasicAlign16<SourceIndex::SRC0>(
2257 Instruction *inst, SourceIndex toSrcIx);
2258 template
2259 void Decoder::decodeSourceBasicAlign16<SourceIndex::SRC1>(
2260 Instruction *inst, SourceIndex toSrcIx);
2261 template
2262 void Decoder::decodeSourceBasicAlign1<SourceIndex::SRC0>(
2263 Instruction *inst, SourceIndex toSrcIx);
2264 template
2265 void Decoder::decodeSourceBasicAlign1<SourceIndex::SRC1>(
2266 Instruction *inst, SourceIndex toSrcIx);
2267