1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Encoder.hpp"
10 #include "IGAToGEDTranslation.hpp"
11 #include "../../strings.hpp"
12 #include "../../Frontend/IRToString.hpp"
13 #include "../../IR/Kernel.hpp"
14 #include "../../IR/SWSBSetter.hpp"
15 #include "../../Models/Models.hpp"
16 #include "../../Timer/Timer.hpp"
17
18 #include <cstring>
19
20 using namespace iga;
21
22
23
24
gedReturnValueToString(GED_RETURN_VALUE rv)25 static const char *gedReturnValueToString(GED_RETURN_VALUE rv)
26 {
27 switch(rv) {
28 case GED_RETURN_VALUE_SUCCESS: return "success";
29 case GED_RETURN_VALUE_CYCLIC_DEPENDENCY: return "cyclic dependency";
30 case GED_RETURN_VALUE_NULL_POINTER: return "null pointer";
31 case GED_RETURN_VALUE_OPCODE_NOT_SUPPORTED: return "unsupported opcode";
32 case GED_RETURN_VALUE_NO_COMPACT_FORM: return "no compact form";
33 case GED_RETURN_VALUE_INVALID_FIELD: return "invalid field";
34 case GED_RETURN_VALUE_INVALID_VALUE: return "invalid value";
35 case GED_RETURN_VALUE_INVALID_INTERPRETER: return "invalid interpreter";
36 default: return "other error";
37 }
38 }
handleGedError(int line,const char * setter,GED_RETURN_VALUE status)39 void Encoder::handleGedError(
40 int line, const char *setter, GED_RETURN_VALUE status)
41 {
42 errorT("IGALibrary/GED/Encoder.cpp:", line, ": GED_Set", setter, ": ",
43 gedReturnValueToString(status));
44 }
45
46
47
Encoder(const Model & model,ErrorHandler & errHandler,const EncoderOpts & opts)48 Encoder::Encoder(
49 const Model &model,
50 ErrorHandler &errHandler,
51 const EncoderOpts &opts)
52 : GEDBitProcessor(model, errHandler)
53 , m_opts(opts)
54 , m_numberInstructionsEncoded(0)
55 , m_mem(nullptr)
56 {
57 // derive the swsb encoding mode from platform if not set
58 if (opts.swsbEncodeMode == SWSB_ENCODE_MODE::SWSBInvalidMode) {
59 m_opts.swsbEncodeMode = model.getSWSBEncodeMode();
60 }
61 }
62
encodeKernelPreProcess(Kernel & k)63 void Encoder::encodeKernelPreProcess(Kernel &k)
64 {
65 doEncodeKernelPreProcess(k);
66 }
67
doEncodeKernelPreProcess(Kernel & k)68 void Encoder::doEncodeKernelPreProcess(Kernel &k)
69 {
70 if (m_opts.autoDepSet && platform() >= Platform::XE) {
71 SWSBAnalyzer swsbAnalyzer(
72 k, errorHandler(), m_opts.swsbEncodeMode, m_opts.sbidCount);
73 swsbAnalyzer.run();
74 }
75 }
76
getElapsedTimeMicros(unsigned int idx)77 double Encoder::getElapsedTimeMicros(unsigned int idx)
78 {
79 return getIGATimerUS(idx);
80 }
81
getElapsedTimeTicks(unsigned int idx)82 int64_t Encoder::getElapsedTimeTicks(unsigned int idx)
83 {
84 return getIGATimerTicks(idx);
85 }
86
getTimerName(unsigned int idx)87 std::string Encoder::getTimerName(unsigned int idx)
88 {
89 return getIGATimerNames(idx);
90 }
91
getNumInstructionsEncoded() const92 size_t Encoder::getNumInstructionsEncoded() const
93 {
94 return m_numberInstructionsEncoded;
95 }
96
encodeKernel(Kernel & k,MemManager & mem,void * & bits,uint32_t & bitsLen)97 void Encoder::encodeKernel(
98 Kernel &k,
99 MemManager &mem,
100 void *&bits,
101 uint32_t &bitsLen)
102 {
103 #ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
104 try {
105 #endif
106 initIGATimer();
107 setIGAKernelName("test");
108 IGA_ASSERT(k.getModel().platform == platform(),
109 "kernel/encoder model mismatch");
110
111 encodeKernelPreProcess(k);
112 m_needToPatch.clear();
113 m_mem = &mem;
114 m_numberInstructionsEncoded = k.getInstructionCount();
115 size_t allocLen = m_numberInstructionsEncoded * UNCOMPACTED_SIZE;
116 if (allocLen == 0) // for empty kernel case
117 allocLen = 4;
118 m_instBuf = (uint8_t *)mem.alloc(allocLen);
119 if (!m_instBuf) {
120 fatalAtT(0, "failed to allocate memory for kernel binary");
121 return;
122 }
123
124 for (auto blk : k.getBlockList()) {
125 START_ENCODER_TIMER();
126 encodeBlock(blk);
127 STOP_ENCODER_TIMER();
128 if (hasFatalError()) {
129 return;
130 }
131 }
132 START_ENCODER_TIMER();
133 patchJumpOffsets();
134 STOP_ENCODER_TIMER();
135
136 // setting actual size
137 bitsLen = currentPc();
138 bits = m_instBuf;
139
140 applyGedWorkarounds(k, currentPc());
141
142 // clear any padding
143 memset(m_instBuf + bitsLen, 0, allocLen - bitsLen);
144 #ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
145 } catch (const iga::FatalError&) {
146 // error is already reported
147 }
148 #endif
149 }
150
encodeBlock(Block * blk)151 void Encoder::encodeBlock(Block *blk)
152 {
153 m_blockToOffsetMap[blk] = currentPc();
154 for (const auto inst : blk->getInstList()) {
155 setCurrInst(inst);
156 encodeInstruction(*inst);
157 if (hasFatalError()) {
158 return;
159 }
160 setEncodedPC(inst, currentPc());
161
162 GED_RETURN_VALUE status = GED_RETURN_VALUE_SIZE;
163
164 // If -Xforce-no-compact is set, do not compact any insruction
165 // Otherwise, if {NoCompact} is set, do not compact the instruction
166 // Otherwise, if {Copmacted} is set on the instructionm, try to compact it and throw error on fail
167 // Otherwise, if no compaction setting on the instruction, try to compact the instruction if -Xauto-compact
168 // Otherwise, do not compact the instruction
169 bool mustCompact = inst->hasInstOpt(InstOpt::COMPACTED);
170 bool mustNotCompact = inst->hasInstOpt(InstOpt::NOCOMPACT);
171 if (m_opts.forceNoCompact) {
172 mustCompact = false;
173 mustNotCompact = true;
174 }
175
176 int32_t iLen = 16;
177 if (mustCompact || (!mustNotCompact && m_opts.autoCompact)) {
178 // try compact first
179 status = GED_EncodeIns(
180 &m_gedInst, GED_INS_TYPE_COMPACT, m_instBuf + currentPc());
181 if (status == GED_RETURN_VALUE_SUCCESS) {
182 //If auto compation is turned on, in case we need to patch later.
183 inst->addInstOpt(InstOpt::COMPACTED);
184 iLen = 8;
185 } else if (status == GED_RETURN_VALUE_NO_COMPACT_FORM) {
186 if (mustCompact) {
187 if (m_opts.explicitCompactMissIsWarning) {
188 warningAtT(inst->getLoc(), "GED unable to compact instruction");
189 } else {
190 errorAtT(inst->getLoc(), "GED unable to compact instruction");
191 }
192 }
193 } // else: some other error (unreachable?)
194 }
195
196 // try native encoding if compaction failed
197 if (status != GED_RETURN_VALUE_SUCCESS) {
198 inst->removeInstOpt(InstOpt::COMPACTED);
199 status = GED_EncodeIns(
200 &m_gedInst, GED_INS_TYPE_NATIVE, m_instBuf + currentPc());
201 if (status != GED_RETURN_VALUE_SUCCESS) {
202 errorAtT(inst->getLoc(), "GED unable to encode instruction: ",
203 gedReturnValueToString(status));
204 }
205 }
206
207 advancePc(iLen);
208 }
209 }
210
getBlockOffset(const Block * b,uint32_t & pc)211 bool Encoder::getBlockOffset(const Block *b, uint32_t &pc)
212 {
213 auto iter = m_blockToOffsetMap.find(b);
214 if (iter != m_blockToOffsetMap.end())
215 {
216 pc = iter->second;
217 return true;
218 }
219 return false;
220 }
221
setEncodedPC(Instruction * inst,int32_t encodedPC)222 void Encoder::setEncodedPC(Instruction *inst, int32_t encodedPC)
223 {
224 #if 0
225 auto iter = m_instPcs.find(inst);
226 IGA_ASSERT(iter == m_instPcs.end(), "resetting encode PC");
227 ((void)iter); // dummy use where ASSERT_USER compiles out
228 m_instPcs[inst] = encodedPC;
229 #else
230 inst->setPC(encodedPC);
231 #endif
232 }
233
getEncodedPC(const Instruction * inst) const234 int32_t Encoder::getEncodedPC(const Instruction *inst) const
235 {
236 #if 0
237 auto iter = m_instPcs.find(inst);
238 if (iter == m_instPcs.end()) {
239 IGA_ASSERT_FALSE("inst PC not found");
240 return 0;
241 }
242 return iter->second;
243 #else
244 return inst->getPC();
245 #endif
246 }
247
encodeFC(const Instruction & i)248 void Encoder::encodeFC(const Instruction &i)
249 {
250 const OpSpec &os = i.getOpSpec();
251
252 if (os.is(Op::MATH)) {
253 GED_MATH_FC mfc = lowerMathFC(i.getMathFc());
254 GED_ENCODE(MathFC, mfc);
255 } else if (os.is(Op::BFN)) {
256 GED_ENCODE(BfnFC, i.getBfnFc().value);
257 } else if (os.isDpasFamily()) {
258 auto sf = i.getDpasFc();
259 GED_ENCODE(SystolicDepth, GetDpasSystolicDepth(sf));
260 GED_ENCODE(RepeatCount, GetDpasRepeatCount(sf));
261 } else if (os.isSendOrSendsFamily()) {
262 if (platform() >= Platform::XE) {
263 // on earlier platforms this is stowed in ExDesc
264 auto sfid = lowerSFID(i.getSendFc());
265 GED_ENCODE(SFID, sfid);
266 }
267 } else if (os.is(Op::SYNC)) {
268 GED_SYNC_FC wfc = lowerSyncFC(i.getSyncFc());
269 GED_ENCODE(SyncFC, wfc);
270 } else if (os.supportsBranchCtrl()) {
271 GED_ENCODE(BranchCtrl,
272 lowerBranchCntrl(i.getBranchCtrl()));
273 } else if (os.supportsSubfunction()) {
274 IGA_ASSERT_FALSE("encoder needs to encode subfunction for this op");
275 }
276 }
277
encodeInstruction(Instruction & inst)278 void Encoder::encodeInstruction(Instruction& inst)
279 {
280 m_opcode = inst.getOp();
281 const auto gedPlat = lowerPlatform(platform());
282 const auto gedOp = lowerOpcode(m_opcode);
283 GED_RETURN_VALUE status = GED_InitEmptyIns(
284 gedPlat,
285 &m_gedInst,
286 gedOp);
287 if (status != GED_RETURN_VALUE_SUCCESS) {
288 fatalAtT(inst.getLoc(), "GED failed to create instruction template");
289 return;
290 }
291
292 if (m_opcode == Op::ILLEGAL) {
293 // GED does all the work for this instruction
294 return;
295 } else if (m_opcode == Op::NOP) {
296 // nop supports {Breakpoint}
297 encodeOptions(inst);
298 return;
299 }
300 const OpSpec &os = inst.getOpSpec();
301
302 // Dwindling cases where we must use Align16
303 // Pre-GEN10 ternary ops are all align16
304 bool isTernary = platform() < Platform::GEN10 && os.isTernary();
305 bool contextSaveRestoreNeedsAlign16 =
306 isAlign16MathMacroRegisterCsrOperand(inst.getDestination()) ||
307 isAlign16MathMacroRegisterCsrOperand(inst.getSource(0));
308 // IEEE macro instructions (math.invm and math.rsqrtm)
309 bool align16MacroInst = m_model.supportsAlign16MacroInst() && inst.isMacro();
310 m_encodeAlign16 = isTernary || contextSaveRestoreNeedsAlign16 || align16MacroInst;
311 GED_ACCESS_MODE accessMode = m_encodeAlign16 ?
312 GED_ACCESS_MODE_Align16 : GED_ACCESS_MODE_Align1;
313 if (m_model.supportsAccessMode()) {
314 GED_ENCODE(AccessMode, accessMode);
315 } // else GED will crash given this call (even given Align1)
316
317 ExecSize execSize = inst.getExecSize();
318 if (os.isTernary() &&
319 m_model.supportsAlign16Ternary() &&
320 inst.getExecSize() == ExecSize::SIMD1)
321 {
322 // scalar ternary workaround for Align16
323 // (c.f. Encoder::encodeTernaryDestinationAlign16)
324 execSize = inst.getDestination().getType() == Type::DF ?
325 ExecSize::SIMD2 : ExecSize::SIMD4;
326 }
327 GED_ENCODE(ExecSize, lowerExecSize(execSize));
328
329 encodeFC(inst);
330
331 if (os.supportsQtrCtrl()) {
332 // use ExecSize from above since it may have been modified
333 GED_CHANNEL_OFFSET qtrCtrl = lowerQtrCtrl(inst.getChannelOffset());
334 GED_ENCODE(ChannelOffset, qtrCtrl);
335 }
336
337 GED_ENCODE(MaskCtrl, lowerEmask(inst.getMaskCtrl()));
338
339 // Predicate
340 const Predication &pred = inst.getPredication();
341 if (os.supportsPredication()) {
342 GED_ENCODE(PredCtrl, lowerPredCtrl(pred.function));
343 } else {
344 GED_ENCODE(PredCtrl, GED_PRED_CTRL_Normal);
345 }
346
347 bool isImm64Src0Overlap =
348 platform() >= Platform::XE &&
349 inst.getSource(0).getKind() == Operand::Kind::IMMEDIATE &&
350 TypeIs64b(inst.getSource(0).getType());
351
352 if (!isImm64Src0Overlap && inst.getOpSpec().supportsFlagModifier()) {
353 if (os.op == Op::BFN) {
354 switch (inst.getFlagModifier()) {
355 case FlagModifier::NONE:
356 case FlagModifier::EQ:
357 case FlagModifier::GT:
358 case FlagModifier::LT:
359 // GED does the special mapping to CondMod2
360 // only a subset of cond modifiers are supported on this op
361 GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
362 break;
363 default:
364 errorT("this instruction format only supports "
365 "(eq), (gt), and (lt) conditional modifiers");
366 }
367 } else {
368 GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
369 }
370 }
371
372 bool hasFlagRegField = true;
373 // For >= XE_HPC, Some fields only exist when having CondCtrl or PredCtrl:
374 // PredInv, FlagRegNum, FlagSubRegNum
375 // In GED, either CondCtrl or PredCtrl have to be set to non-zero before
376 // these fields can be set
377 if (platform() >= Platform::XE_HPC) {
378 hasFlagRegField = (inst.getFlagModifier() != FlagModifier::NONE) ||
379 (pred.function != PredCtrl::NONE) ||
380 inst.isBranching();
381 }
382
383 if (os.supportsPredication() && hasFlagRegField)
384 GED_ENCODE(PredInv, pred.inverse ? GED_PRED_INV_Invert : GED_PRED_INV_Normal);
385
386 // GED_ExecutionDataType
387 RegRef flagReg = inst.getFlagReg();
388 if (hasFlagRegField && (flagReg != REGREF_INVALID)) {
389 GED_ENCODE(FlagRegNum, static_cast<uint32_t>(inst.getFlagReg().regNum));
390 GED_ENCODE(FlagSubRegNum, inst.getFlagReg().subRegNum);
391 }
392
393 // set AccWrEn where supported
394 if (inst.hasInstOpt(InstOpt::ACCWREN)) {
395 GED_ENCODE(AccWrCtrl, GED_ACC_WR_CTRL_AccWrEn);
396 }
397
398 if (os.isBranching()) {
399 if (m_model.supportsSimplifiedBranches()) {
400 encodeBranchingInstructionSimplified(inst);
401 } else {
402 encodeBranchingInstruction(inst);
403 }
404 // options encoded internally
405 } else if (os.isTernary()) {
406 encodeTernaryInstruction(inst, accessMode);
407 } else if (os.isSendOrSendsFamily()) {
408 encodeSendInstruction(inst);
409 } else if (os.is(Op::SYNC)) {
410 encodeSyncInstruction(inst);
411 } else {
412 encodeBasicInstruction(inst, accessMode);
413 }
414
415 if (!hasFatalError()) {
416 encodeOptions(inst);
417
418 // setup for back patching on branching ops
419 if (os.isBranching() || inst.isMovWithLabel()) {
420 bool src0IsLabel = inst.getSource(0).isImm();
421 bool src1IsLabel = inst.getSourceCount() > 1 && inst.getSource(1).isImm();
422 if (src0IsLabel || src1IsLabel) {
423 m_needToPatch.emplace_back(&inst, m_gedInst, m_instBuf + currentPc());
424 // Force not to compact label instructions to avoid the compaction error
425 // when auto-compaction is enabled. We could set this inst to compactable during
426 // Encoder::encodeBlock when the value is unknown (and assume to be 0). But we can
427 // only compact imm values use up to 12 bits.
428 inst.addInstOpt(InstOpt::NOCOMPACT);
429 }
430 }
431 }
432 }
433
encodeBasicInstruction(const Instruction & inst,GED_ACCESS_MODE accessMode)434 void Encoder::encodeBasicInstruction(
435 const Instruction& inst,
436 GED_ACCESS_MODE accessMode)
437 {
438 const OpSpec& os = inst.getOpSpec();
439 if (os.supportsDestination()) {
440 encodeBasicDestination(inst, inst.getDestination(), accessMode);
441 } else if (os.op == Op::WAIT ) {
442 // wait has an implicit destination (same as first source)
443 // but with dst region of <1>
444 Operand copy(inst.getSource(0));
445 copy.setRegion(Region::DST1);
446 encodeBasicDestination(inst, copy);
447 }
448
449 switch (inst.getSourceCount())
450 {
451 case 2:
452 encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(1), accessMode);
453 // vvvv fall through vvvv
454 case 1:
455 encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0), accessMode);
456 }
457 }
458
encodeTernaryDestinationAlign1(const Instruction & inst)459 void Encoder::encodeTernaryDestinationAlign1(const Instruction& inst)
460 {
461 const Operand& dst = inst.getDestination();
462
463 if (inst.getOpSpec().supportsSaturation()) {
464 GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
465 }
466 GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
467 GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
468 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
469
470 if (inst.isMacro()) {
471 GED_ENCODE(DstMathMacroExt, lowerMathMacroReg(dst.getMathMacroExt()));
472 // GED_ENCODE(DstHorzStride, 1);
473 } else {
474 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
475 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
476 bool hasDstRgnHz = true;
477 // dpas does not have a dst region
478 hasDstRgnHz = !inst.getOpSpec().isDpasFamily();
479 if (hasDstRgnHz) {
480 GED_ENCODE(DstHorzStride, static_cast<int>(dst.getRegion().getHz()));
481 }
482 }
483 }
484
485 template <SourceIndex S>
encodeTernarySourceAlign1(const Instruction & inst)486 void Encoder::encodeTernarySourceAlign1(const Instruction& inst)
487 {
488 // CNL+ align1 ternary
489 if (platform() < Platform::GEN10) {
490 fatalT("src", (int)S, ": align1 ternary is not supported on this "
491 "platform");
492 return;
493 }
494
495 const Operand& src = inst.getSource(S);
496 Type srcType = src.getType();
497 // DPAS
498 if (inst.getOpSpec().isDpasFamily()) {
499 // src0's type is the type for all sources
500
501 if (S == SourceIndex::SRC0) {
502 GED_ENCODE(Src0DataType, lowerDataType(srcType));
503 // GED: src0 HS = 0, VS=3
504 } else if (S == SourceIndex::SRC1) {
505 GED_ENCODE(Src1Precision, lowerSubBytePrecision(srcType));
506 // GED sets both the type and the precision at the same time for us
507 // GED: src1 HS = 1, VS=3
508 // via this higher-level API
509 } else if (S == SourceIndex::SRC2) {
510 GED_ENCODE(Src2Precision, lowerSubBytePrecision(srcType));
511 // GED: src2 HS = 3
512 // GED sets both the type and the precision at the same time for us
513 // via this higher-level API
514 }
515 encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
516 encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
517 encodeSrcSubRegNum<S>(SubRegToBinaryOffset(
518 src.getDirRegRef().subRegNum, src.getDirRegName(), srcType, m_model.platform));
519
520 return;
521 }
522
523 // GED will catch any mismatch between float and int (illegal mixed mode)
524 encodeSrcType<S>(srcType); // GED dependency requires type before reg file
525
526 switch (src.getKind()) {
527 case Operand::Kind::DIRECT:
528 case Operand::Kind::MACRO: {
529 encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
530
531 if (platform() <= Platform::GEN11) {
532 encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
533 }
534
535 // source modifiers
536 if (inst.getOpSpec().supportsSourceModifiers()) {
537 encodeSrcModifier<S>(src.getSrcModifier());
538 }
539
540 // regioning
541 //
542 // ternary align1 puts SpcAcc into subreg, so regions may be set
543 // in all cases
544 auto rgn = src.getRegion();
545 // * madm doesn't have a region in GEN9 ...
546 // it does in GEN10+, but we haven't supported it in syntax yet
547 // and leave it to GED to set it
548 // * src0 and src1 only has <w;h>, src2 only has <h>
549 bool hasSrcRgnHz = !inst.isMacro();
550 bool hasSrcRgnVt = !inst.isMacro() && S < SourceIndex::SRC2;
551 if (hasSrcRgnHz) {
552 encodeSrcRegionHorz<S>(rgn.getHz());
553 }
554 if (hasSrcRgnVt) {
555 encodeTernarySrcRegionVert(S, rgn.getVt());
556 }
557 // register and subregister
558 encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
559 if (inst.isMacro()) {
560 if (platform() < Platform::GEN11) {
561 fatalT("src", (int)S, ": math macro operands require Align16");
562 return;
563 }
564 encodeSrcMathMacroReg<S>(src.getMathMacroExt());
565 if (S != SourceIndex::SRC2) {
566 encodeTernarySrcRegionVert(S, Region::Vert::VT_4);
567 }
568 encodeSrcRegionHorz<S>(Region::Horz::HZ_1);
569
570 } else {
571 auto subReg = SubRegToBinaryOffset(
572 src.getDirRegRef().subRegNum, src.getDirRegName(), src.getType(), m_model.platform);
573 encodeSrcSubRegNum<S>(subReg);
574 }
575 break;
576 }
577 case Operand::Kind::IMMEDIATE:
578 if (S == SourceIndex::SRC1) {
579 fatalT("src1: immediate operand in ternary align1 must be "
580 "src0 or src2");
581 return;
582 }
583 encodeSrcRegFile<S>(GED_REG_FILE_IMM);
584 if (platform() < Platform::GEN10) {
585 encodeImmVal(src.getImmediateValue(), src.getType());
586 } else {
587 encodeTernaryImmVal<S>(src.getImmediateValue(), src.getType());
588 }
589 break;
590 default:
591 fatalT("src", (int)S, ": invalid operand kind");
592 return;
593 }
594 }
595
596
encodeTernaryInstruction(const Instruction & inst,GED_ACCESS_MODE accessMode)597 void Encoder::encodeTernaryInstruction(
598 const Instruction& inst,
599 GED_ACCESS_MODE accessMode)
600 {
601 if (accessMode == GED_ACCESS_MODE_Align1) {
602 encodeTernaryAlign1Instruction(inst);
603 } else {
604 encodeTernaryAlign16Instruction(inst);
605 }
606 }
encodeTernaryAlign16Instruction(const Instruction & inst)607 void Encoder::encodeTernaryAlign16Instruction(const Instruction& inst)
608 {
609 if (inst.getOpSpec().supportsDestination()) {
610 encodeTernaryDestinationAlign16(inst);
611 }
612 encodeTernarySourceAlign16<SourceIndex::SRC0>(inst);
613 encodeTernarySourceAlign16<SourceIndex::SRC1>(inst);
614 encodeTernarySourceAlign16<SourceIndex::SRC2>(inst);
615 }
encodeTernaryAlign1Instruction(const Instruction & inst)616 void Encoder::encodeTernaryAlign1Instruction(const Instruction& inst)
617 {
618 // set ExecutionDataType (integral or floating)
619 // the operands must be part of the same type set
620 Type src0Type = inst.getSource(0).getType();
621 GED_EXECUTION_DATA_TYPE execDataType;
622 if (isTernaryAlign1Floating(src0Type)) {
623 execDataType = GED_EXECUTION_DATA_TYPE_Float;
624 } else if (isTernaryAlign1Integral(src0Type)) {
625 execDataType = GED_EXECUTION_DATA_TYPE_Integer;
626 } else {
627 fatalT("src0: unsupported type for ternary align1 encoding");
628 return;
629 }
630 GED_ENCODE(ExecutionDataType, execDataType);
631
632 if (inst.getOpSpec().supportsDestination()) {
633 encodeTernaryDestinationAlign1(inst);
634 }
635 encodeTernarySourceAlign1<SourceIndex::SRC0>(inst);
636 encodeTernarySourceAlign1<SourceIndex::SRC1>(inst);
637 encodeTernarySourceAlign1<SourceIndex::SRC2>(inst);
638 }
639
encodeBranchingInstruction(const Instruction & inst)640 void Encoder::encodeBranchingInstruction(const Instruction& inst)
641 {
642 // the destination stride is always 1 for all control flow
643 GED_ENCODE(DstHorzStride, 1);
644
645 // control flow instructions require patching later if any operand is a label
646 bool src0IsLabel = inst.getSource(0).getKind() == Operand::Kind::LABEL;
647
648 // break up instructions into various classes
649 // - stuff with implicit operands: jmpi
650 // - stuff that can take register operands: call, calla, return
651 // - everything else: if, else, while, ..., goto, join, ...
652 if (m_opcode == Op::JMPI)
653 {
654 // jmpi encodes the syntax
655 // jmpi (1) LABEL
656 // jmpi (1) reg32
657 // as
658 // jmpi (1) ip ip LABEL
659 // jmpi (1) ip ip reg32
660 //
661 // "Restriction: The index data type must be D (Signed DWord Integer)."
662 //
663 // implicit IP ...
664 encodeBasicDestination(inst, Operand::DST_REG_IP_UD);
665 encodeBasicSource<SourceIndex::SRC0>(inst, Operand::SRC_REG_IP_UD);
666 GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
667 if (src0IsLabel) {
668 // jmpi (1) LABEL (encodes into Src1)
669 GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
670 } else {
671 // jmpi (1) reg32 (encodes into Src1)
672 encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
673 }
674 }
675 else if (m_opcode == Op::CALL ||
676 m_opcode == Op::CALLA ||
677 m_opcode == Op::RET)
678 {
679 // e.g. call, calla, ret
680 // call (..) imm32
681 // call (..) reg32
682 // calla (..) imm32
683 // ret (...) reg32 => encodes as ret (...) null reg
684 //
685 if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
686 encodeBasicDestination(inst, inst.getDestination());
687 } else if (m_opcode == Op::RET) {
688 encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);
689 encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
690 }
691
692 if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
693 if (src0IsLabel) {
694 // op == CALL (since it's a label), hence we have
695 // call (..) imm32 => which uses src1
696 GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
697 GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
698 } else {
699 // call (..) reg32
700 // calla (..) imm32
701 encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
702 }
703
704 // <2;2,1> restriction for CALL and CALLA restriction is only for
705 // IVB+HSW, but simulator has it until CNL. So we have to support it
706 // until we get CNL HW validation moves to it
707 if (callNeedsSrc0Region221(inst)) {
708 GED_ENCODE(Src0VertStride, 2);
709 GED_ENCODE(Src0Width, 2);
710 GED_ENCODE(Src0HorzStride, 1);
711 }
712 // though it's not state in the spec, ICL requires src0 region be set to <2;4,1>
713 else if (callNeedsSrc0Region241(inst)) {
714 GED_ENCODE(Src0VertStride, 2);
715 GED_ENCODE(Src0Width, 4);
716 GED_ENCODE(Src0HorzStride, 1);
717 }
718 }
719 } else if (m_opcode == Op::BRD || m_opcode == Op::BRC) {
720 // [brd/brc]: The ip register must be used (for example, by the assembler) as dst.
721 encodeBasicDestination(inst, Operand::DST_REG_IP_D);
722 if (!src0IsLabel) {
723 encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
724 }
725 GED_DATA_TYPE ty =
726 platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
727 GED_ENCODE(Src0RegFile,
728 src0IsLabel ? GED_REG_FILE_IMM : GED_REG_FILE_GRF);
729 GED_ENCODE(Src0DataType, ty);
730 // GED automatically sets?
731 // if (m_opcode == Op::BRC && src0IsLabel) {
732 // GED_ENCODE(Src1DataType, ty);
733 // }
734 // if (m_opcode == Op::BRD && m_inst->getOpSpec().hasImplicitSrcRegion(0)) {
735 // encodeSrcRegion(SourceIndex::SRC0,
736 // m_inst->getOpSpec().implicitSrcRegion(0));
737 // }
738 } else {
739 // regular control flow that only accepts immediate values
740 // e.g. if, else, endif, while, cont, break, goto, join, halt
741
742 // Apparently, the implicit destination on these instructions
743 // is null instead of ip (unlike jmpi etc)
744 // destination is ip<1>:ud
745 //
746 // encodeDestination(&Operand::DST_IP);
747 encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);
748
749 //UIP
750 if (m_opcode != Op::ENDIF &&
751 m_opcode != Op::WHILE &&
752 m_opcode != Op::JOIN)
753 {
754 // if/else/halt/brk/cont.... all require :d on operands
755 GED_DATA_TYPE ty =
756 platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
757 GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
758 GED_ENCODE(Src0DataType, ty);
759 }
760 //before XE don't need to set JIP for control flow instructions that have UIP
761 //JIP
762 if (m_opcode == Op::WHILE ||
763 m_opcode == Op::ENDIF ||
764 m_opcode == Op::JOIN)
765 {
766 GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
767 GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
768 }
769 }
770 }
771
encodeBranchingInstructionSimplified(const Instruction & inst)772 void Encoder::encodeBranchingInstructionSimplified(const Instruction& inst)
773 {
774 const OpSpec& os = inst.getOpSpec();
775
776 // set branch control
777 if (os.supportsBranchCtrl()) {
778 GED_ENCODE(BranchCtrl, lowerBranchCntrl(inst.getBranchCtrl()));
779 }
780
781 // control flow instructions require patching later if any operand is a label
782 const Operand& src0 = inst.getSource(0);
783 bool src0IsLabel = src0.getKind() == Operand::Kind::LABEL;
784
785 // for jmpi HW will take care of IP so don't need to encode it for dst/src0
786 if (inst.getOpSpec().supportsDestination()) {
787 encodeBranchDestination(inst.getDestination());
788 } else {
789 encodeBranchDestination(Operand::DST_REG_NULL_UD);
790 }
791 // regualar control flow that only accepts immediate values
792 // e.g. if, else, endif, while, cont, break, goto, join, halt
793
794 // encoding JIP
795 if (src0IsLabel) {
796 GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
797 // if (src0.getTargetBlock() == nullptr) {
798 // // the input value is immediate; use m_immVal as the value
799 // encodeBranchSource(src0);
800 // }
801 } else {
802 // jmpi, call, brc, ...
803 if (src0.getKind() == Operand::Kind::INDIRECT)
804 errorT("branch instructions forbid indirect register mode");
805 encodeBranchSource(src0);
806 }
807
808 if (inst.getSourceCount() == 2) {
809 // encoding UIP always IMM except for brc with a register argument
810 if (inst.getOp() != Op::BRC || src0.isImm()) {
811 GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
812 }
813 }
814 }
815
encodeSendInstruction(const Instruction & i)816 void Encoder::encodeSendInstruction(const Instruction& i)
817 {
818 ////////////////////////////////////////////
819 // send operands
820 const OpSpec& os = i.getOpSpec();
821 if (os.isSendFamily()) {
822 encodeSendDestination(i.getDestination());
823 encodeSendSource0(i.getSource(0));
824 if (m_model.supportsUnifiedSend()) {
825 encodeSendsSource1(i.getSource(1));
826 }
827 } else if (os.isSendsFamily()) {
828 encodeSendDestination(i.getDestination());
829 encodeSendsSource0(i.getSource(0));
830 encodeSendsSource1(i.getSource(1));
831 }
832
833 ////////////////////////////////////////////
834 // send descriptors and other gunk
835 encodeSendDescs(i);
836
837 ////////////////////////////////////////////
838 // send options
839
840 // FusionCtrl is removed from XeHPC+
841 bool hasFusion =
842 platform() >= Platform::XE && platform() < Platform::XE_HPC;
843 if (hasFusion) {
844 GED_ENCODE(FusionCtrl,
845 i.hasInstOpt(InstOpt::SERIALIZE) ?
846 GED_FUSION_CTRL_Serialized : GED_FUSION_CTRL_Normal);
847 }
848
849 if (i.hasInstOpt(InstOpt::EOT)) {
850 GED_ENCODE(EOT, GED_EOT_EOT);
851 }
852 } //end: encodeSendInstruction
853
854
encodeSendDescs(const Instruction & i)855 void Encoder::encodeSendDescs(const Instruction& i)
856 {
857 if (platform() < Platform::XE) {
858 encodeSendDescsPreXe(i);
859 } else if (platform() == Platform::XE) {
860 encodeSendDescsXe(i);
861 } else if (platform() == Platform::XE_HP) {
862 encodeSendDescsXeHP(i);
863 } else if (platform() == Platform::XE_HPG ||
864 platform() == Platform::XE_HPC)
865 {
866 encodeSendDescsXeHPG(i);
867 } else {
868 errorT("unsupported platform");
869 }
870
871 bool noEOTinExDesc = m_model.supportsUnifiedSend();
872 if (noEOTinExDesc &&
873 i.getExtMsgDescriptor().isImm() &&
874 (i.getExtMsgDescriptor().imm & 1 << 5))
875 errorT("Encoder: Send exDesc[5] must not be set (the legacy EOT bit)");
876 }
877
encodeSendDescsPreXe(const Instruction & i)878 void Encoder::encodeSendDescsPreXe(const Instruction& i)
879 {
880 SendDesc exDesc = i.getExtMsgDescriptor();
881 const OpSpec& os = i.getOpSpec();
882 if (exDesc.isReg()) {
883 if (os.isSendFamily()) {
884 errorT("unary send forbids register ExDesc");
885 }
886 GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
887 GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
888 } else {
889 GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
890 GED_ENCODE(ExMsgDesc, exDesc.imm);
891 }
892
893 SendDesc desc = i.getMsgDescriptor();
894 if (desc.isReg()) {
895 if (platform() == Platform::GEN9) {
896 uint32_t msgDescriptor = 0;
897 // There is a HW bug on SKL where HW will only copy bits 0-28 from
898 // the address register (descriptor register) and will miss bit 30
899 // of the descriptor. Hence, even in the case of an register
900 // descriptor we must program bit 30 as immediate (it will be
901 // taken from the encoding and OR'd in correctly)
902 //
903 // E.g. (old syntax)
904 // sends (8) r74:hf r16 r73 0x42:ud a0.0 {Align1, Q1, NoMask}
905 // // sampler, resLen=3, msgLen=1, extMsgLen=1
906 // On SKL, HW will copy bits 29-31 from the actual immediate
907 // descriptor bits. Hence, we must set immediate descriptor
908 // bit 30 even in the case of a register descriptor. (For SKL).
909 //
910 // For 3D sampler bit 30 indicates HF/F return format.
911 // For render target write bit 30 indicates HF/F input...
912 // Thankfully for SKL the 3D sampler doesn't support HF input.
913 // For CNL it does, and that will be bit 29.
914 // But this bug should be fixed in CNL.
915 if (platform() == Platform::GEN9 && desc.isReg()) {
916 if (i.getDestination().getType() == Type::HF ||
917 i.getSource(0).getType() == Type::HF)
918 {
919 msgDescriptor |= (1 << 30);
920 }
921 }
922 GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
923 GED_ENCODE(MsgDesc, msgDescriptor);
924 }
925 GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
926 uint8_t regNumBits;
927 const RegInfo *ri = m_model.lookupRegInfoByRegName(RegName::ARF_A);
928 IGA_ASSERT(ri, "failed to find a0 register");
929 ri->encode((int)desc.reg.regNum, regNumBits);
930 GED_ENCODE(DescRegNum, regNumBits);
931 } else if (desc.isImm()) {
932 GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
933 GED_ENCODE(MsgDesc, desc.imm);
934 }
935 }
encodeSendDescsXe(const Instruction & i)936 void Encoder::encodeSendDescsXe(const Instruction& i)
937 {
938 SendDesc exDesc = i.getExtMsgDescriptor();
939 if (exDesc.isReg()) {
940 GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
941 GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
942 } else {
943 GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
944 GED_ENCODE(ExMsgDesc, exDesc.imm);
945 }
946
947 SendDesc desc = i.getMsgDescriptor();
948 if (desc.isReg()) {
949 GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
950 // a0.0 is implied (there's no field)
951 if (desc.reg.subRegNum != 0) {
952 errorT("send with reg desc must be a0.0");
953 }
954 } else {
955 GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
956 GED_ENCODE(MsgDesc, desc.imm);
957 }
958 }
959
960 // A bit harder than Xe
961 // * If ExBSO is set then Src1Length holds xlen
962 // * CPS has it's own field (ExDesc[11]) only if ExDesc.IsReg
encodeSendDescsXeHP(const Instruction & i)963 void Encoder::encodeSendDescsXeHP(const Instruction& i)
964 {
965 SendDesc exDesc = i.getExtMsgDescriptor();
966 if (exDesc.isReg()) {
967 GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
968 GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
969 GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
970 if (i.hasInstOpt(InstOpt::EXBSO)) {
971 GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
972 GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
973 } else if (i.hasInstOpt(InstOpt::CPS)) {
974 errorT("{CPS} requires {ExBSO}");
975 }
976 } else {
977 if (i.hasInstOpt(InstOpt::CPS)) {
978 warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
979 exDesc.imm |= 1 << 11;
980 }
981 GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
982 GED_ENCODE(ExMsgDesc, exDesc.imm);
983 }
984
985 SendDesc desc = i.getMsgDescriptor();
986 if (desc.isReg()) {
987 GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
988 if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
989 errorT("send with reg desc must be a0.0");
990 }
991 } else {
992 GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
993 GED_ENCODE(MsgDesc, desc.imm);
994 }
995 }
996
997 // Similar to XeHP, except
998 // * ExDesc.IsImm implies use of Src1Length (Src.Length is in EU bits)
encodeSendDescsXeHPG(const Instruction & i)999 void Encoder::encodeSendDescsXeHPG(const Instruction& i)
1000 {
1001 SendDesc exDesc = i.getExtMsgDescriptor();
1002 if (exDesc.isReg()) {
1003 GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
1004 GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
1005 GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
1006 if (i.hasInstOpt(InstOpt::EXBSO)) {
1007 GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
1008 GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
1009 } else if (i.hasInstOpt(InstOpt::CPS)) {
1010 errorT("{CPS} requires {ExBSO}");
1011 }
1012 } else {
1013 if (i.hasInstOpt(InstOpt::CPS)) {
1014 warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
1015 exDesc.imm |= 1 << 11;
1016 }
1017 GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
1018 GED_ENCODE(ExMsgDesc, exDesc.imm);
1019 GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
1020 }
1021
1022 SendDesc desc = i.getMsgDescriptor();
1023 if (desc.isReg()) {
1024 GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
1025 if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
1026 errorT("send with reg desc must be a0.0");
1027 }
1028 } else {
1029 GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
1030 GED_ENCODE(MsgDesc, desc.imm);
1031 }
1032 }
1033
1034
1035
1036
encodeSyncInstruction(const Instruction & inst)1037 void Encoder::encodeSyncInstruction(const Instruction& inst)
1038 {
1039 // Set the Dst.HorStride to 1 so that "sync.bar null" can be compacted
1040 GED_ENCODE(DstHorzStride, 1);
1041
1042 const Operand &src = inst.getSource(0);
1043 if (src.getKind() == Operand::Kind::IMMEDIATE) {
1044 encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_IMM);
1045 encodeSrcType<SourceIndex::SRC0>(src.getType());
1046 encodeImmVal(src.getImmediateValue(), src.getType());
1047 } else {
1048 if (platform() <= Platform::XE_HPG) {
1049 encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
1050 } else {
1051 // XeHPC+ supports sync with reg32. For earlier platforms encode it to the null reg anyway.
1052 // If not doing so we'll encounter some weird behavior on validation. Suspect it's
1053 // becuase on some previous platforms' testcase there are reg32 those are not valid,
1054 // but IGA workaround (set it to NULL) them
1055 if (src.isNull()) {
1056 encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
1057 } else {
1058 // currently only flag register is supported in sync.bar
1059 encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
1060 encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(), src.getDirRegRef().regNum);
1061 encodeSrcType<SourceIndex::SRC0>(src.getType());
1062 // must be flag register (otherwise GED will return error), encode the subreg directly.
1063 GED_ENCODE(Src0SubRegNum, SubRegToBinaryOffset(
1064 src.getDirRegRef().subRegNum, src.getDirRegName(), src.getType(), m_model.platform));
1065 }
1066 }
1067 }
1068 }
1069
encodeBranchDestination(const Operand & dst)1070 void Encoder::encodeBranchDestination(const Operand& dst) {
1071 GED_ENCODE(DstRegFile,
1072 lowerRegFile(dst.getDirRegName()));
1073 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1074 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1075 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1076 }
1077
encodeBasicDestination(const Instruction & inst,const Operand & dst,GED_ACCESS_MODE accessMode)1078 void Encoder::encodeBasicDestination(
1079 const Instruction& inst,
1080 const Operand& dst,
1081 GED_ACCESS_MODE accessMode)
1082 {
1083 IGA_ASSERT(accessMode != GED_ACCESS_MODE_Align16 ||
1084 m_model.supportsAlign16(),
1085 "Align16 not supported on this platform.");
1086
1087 GED_ENCODE(DstRegFile,
1088 lowerRegFile(dst.getDirRegName()));
1089 switch (dst.getKind())
1090 {
1091 case Operand::Kind::DIRECT:
1092 case Operand::Kind::MACRO:
1093 GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1094 GED_ENCODE(DstDataType,
1095 lowerDataType(dst.getType()));
1096 if (inst.getOpSpec().supportsSaturation()) {
1097 GED_ENCODE(Saturate,
1098 lowerSaturate(dst.getDstModifier()));
1099 }
1100 // VVVVV fallthrough VVVVV
1101 default: break;
1102 }
1103
1104 switch (dst.getKind())
1105 {
1106 case Operand::Kind::DIRECT:
1107 if (accessMode == GED_ACCESS_MODE_Align16) {
1108 if (dst.getRegion() != Region::DST1) {
1109 fatalT("dst has inconvertible region for Align16 encoding");
1110 return;
1111 }
1112 if (isAlign16MathMacroRegisterCsrOperand(dst)) {
1113 // acc2.XXXX on BDW .. SKL is context save and restore
1114 // This is really mme0
1115 encodeDstReg(RegName::ARF_MME, 0);
1116 // on GEN8 and GEN9 all encode as acc2, but the mux varies
1117 // to distinguish which acc it really is.
1118 GED_DST_CHAN_EN chEn;
1119 switch (dst.getDirRegRef().regNum) {
1120 /// case 0: ... acc2 actually uses Align1!
1121 // old-style for acc2 would be:
1122 // mov(8) r113:ud acc2:ud {NoMask} // acc2
1123 //
1124 // acc3-9 are Align16
1125 case 1: chEn = GED_DST_CHAN_EN_x; break; // mme0/acc3 -> acc2.x (0001b)
1126 case 2: chEn = GED_DST_CHAN_EN_y; break; // mme1/acc4 -> acc2.y (0010b)
1127 case 3: chEn = GED_DST_CHAN_EN_xy; break;
1128 case 4: chEn = GED_DST_CHAN_EN_z; break;
1129 case 5: chEn = GED_DST_CHAN_EN_xz; break;
1130 case 6: chEn = GED_DST_CHAN_EN_yz; break;
1131 case 7: chEn = GED_DST_CHAN_EN_xyzw; break; // mme7/acc9 -> acc2.xyzw (0111b)
1132 default: IGA_ASSERT_FALSE("unreachable"); chEn = GED_DST_CHAN_EN_x;
1133 }
1134 GED_ENCODE(DstChanEn, chEn);
1135 } else {
1136 // normal align16 destination (this still might be a
1137 // CSR work around op if the src is "acc2")
1138 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1139 GED_ENCODE(DstChanEn, GED_DST_CHAN_EN_xyzw);
1140 }
1141 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1142 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1143 } else { // Align1
1144 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1145 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1146 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1147 }
1148 break;
1149 case Operand::Kind::MACRO:
1150 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1151 GED_ENCODE(DstMathMacroExt,
1152 lowerSpecialAcc(dst.getMathMacroExt()));
1153 if (accessMode == GED_ACCESS_MODE_Align1 &&
1154 m_model.supportsAlign16ImplicitAcc())
1155 {
1156 fatalT("Align1 dst math macro unsupported on this platform.");
1157 return;
1158 }
1159 break;
1160 case Operand::Kind::INDIRECT:
1161 GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
1162 GED_ENCODE(DstDataType,
1163 lowerDataType(dst.getType()));
1164 if (inst.getOpSpec().supportsSaturation()) {
1165 GED_ENCODE(Saturate,
1166 lowerSaturate(dst.getDstModifier()));
1167 }
1168
1169 GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
1170 GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
1171 break;
1172 default:
1173 IGA_ASSERT_FALSE("unsupported operand kind");
1174 break;
1175 }
1176
1177 if (accessMode == GED_ACCESS_MODE_Align1) {
1178 auto dstRgn = dst.getRegion();
1179 if (inst.getOpSpec().hasImplicitDstRegion(inst.isMacro())) {
1180 auto dstRgnImpl = inst.getOpSpec().implicitDstRegion(inst.isMacro());
1181 if (dstRgn != dstRgnImpl) {
1182 warningT("dst region should be ", ToSyntax(dstRgnImpl));
1183 }
1184 }
1185 GED_ENCODE(DstHorzStride, lowerRegionHorz(dstRgn.getHz()));
1186 }
1187 }
1188
createChSelForCtxSavRst(GED_SWIZZLE * chSel,GED_SWIZZLE x,GED_SWIZZLE y)1189 static void createChSelForCtxSavRst(
1190 GED_SWIZZLE *chSel,
1191 GED_SWIZZLE x,
1192 GED_SWIZZLE y)
1193 {
1194 // following IsaAsm rules here
1195 // reg.ab expands to reg.abbb
1196 chSel[0] = x;
1197 chSel[1] = chSel[2] = chSel[3] = y;
1198 }
1199
1200
encodeBranchSource(const Operand & src)1201 void Encoder::encodeBranchSource(const Operand& src)
1202 {
1203 encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
1204 encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(),src.getDirRegRef().regNum);
1205 auto subReg = SubRegToBinaryOffset(
1206 src.getDirRegRef().subRegNum, src.getDirRegName(), Type::D, m_model.platform);
1207 encodeSrcSubRegNum<SourceIndex::SRC0>(subReg);
1208 }
1209
1210 template <SourceIndex S>
encodeBasicSource(const Instruction & inst,const Operand & src,GED_ACCESS_MODE accessMode)1211 void Encoder::encodeBasicSource(
1212 const Instruction& inst,
1213 const Operand& src,
1214 GED_ACCESS_MODE accessMode)
1215 {
1216 // setting the reg file must precede must precede setting the type in GED
1217 switch (src.getKind()) {
1218 case Operand::Kind::DIRECT:
1219 case Operand::Kind::MACRO:
1220 case Operand::Kind::INDIRECT:
1221 encodeSrcRegFile<S>(
1222 lowerRegFile(src.getDirRegName()));
1223 if (inst.getOpSpec().supportsSourceModifiers()) {
1224 encodeSrcModifier<S>(src.getSrcModifier());
1225 } else if (src.getSrcModifier() != SrcModifier::NONE) {
1226 // better be invalid in the IR if unsupported
1227 errorT("src", (int)S, " source modifier not supported (invalid IR)");
1228 }
1229 break;
1230 case Operand::Kind::IMMEDIATE:
1231 encodeSrcRegFile<S>(GED_REG_FILE_IMM);
1232 break;
1233 default:
1234 break;
1235 }
1236
1237 encodeSrcType<S>(src.getType());
1238
1239 switch (src.getKind()) {
1240 case Operand::Kind::DIRECT:
1241 case Operand::Kind::MACRO: {
1242 encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
1243 if (src.getKind() == Operand::Kind::DIRECT) {
1244 if (isAlign16MathMacroRegisterCsrOperand(src)) {
1245 // BDW..SKL context save and restore of acc3...acc9
1246 // encode as acc2.####, ChSel will be changed in regioning code
1247 // recall acc2 is remapped to mme0
1248 encodeSrcReg<S>(RegName::ARF_MME, 0);
1249 } else {
1250 encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
1251 auto subReg = SubRegToBinaryOffset(
1252 src.getDirRegRef().subRegNum,
1253 src.getDirRegName(),
1254 src.getType(),
1255 m_model.platform);
1256 encodeSrcSubRegNum<S>(subReg);
1257 }
1258 } else { // (src.getKind() == Operand::Kind::MACRO)
1259 encodeSrcReg<S>(RegName::GRF_R,src.getDirRegRef().regNum);
1260 encodeSrcMathMacroReg<S>(src.getMathMacroExt());
1261 if (accessMode == GED_ACCESS_MODE_Align16) {
1262 // vertical stride has to be halved for 8B types
1263 if (src.getType() == Type::DF) {
1264 encodeSrcRegionVert<S>(Region::Vert::VT_2);
1265 } else {
1266 encodeSrcRegionVert<S>(Region::Vert::VT_4);
1267 }
1268 }
1269 }
1270 break;
1271 }
1272 case Operand::Kind::INDIRECT:
1273 encodeSrcAddrMode<S>(GED_ADDR_MODE_Indirect);
1274 encodeSrcAddrImm<S>(src.getIndImmAddr());
1275 encodeSrcAddrSubRegNum<S>(src.getIndAddrReg().subRegNum);
1276 break;
1277 case Operand::Kind::IMMEDIATE:
1278 encodeImmVal(src.getImmediateValue(), src.getType());
1279 break;
1280 default:
1281 // support mov label
1282 if (static_cast<int>(S) == 0 && inst.isMovWithLabel()) {
1283 GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
1284 } else {
1285 fatalT("src", (int)S, ": unsupported source operand kind "
1286 "(malformed IR)");
1287 return;
1288 }
1289 break;
1290 }
1291
1292 // sets stuff found in all register accesses (not macros)
1293 // - region
1294 switch (src.getKind()) {
1295 case Operand::Kind::DIRECT:
1296 case Operand::Kind::INDIRECT:
1297 if (accessMode == GED_ACCESS_MODE_Align16) {
1298 // r13.0<4>.xyzw is the only supported ChEn
1299 // ^^^
1300 encodeSrcRegionVert<S>(Region::Vert::VT_4);
1301 GED_SWIZZLE chSel[4] =
1302 {GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w};
1303 if (isAlign16MathMacroRegisterCsrOperand(src)) {
1304 // context save and restore workaround on GEN8 and GEN9
1305 switch (src.getDirRegRef().regNum) {
1306 case 1: // acc2.yx = mme1 (acc3)
1307 createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_x);
1308 break;
1309 case 2: // acc2.zx = mme2 (acc4)
1310 createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_x);
1311 break;
1312 case 3: // acc2.wx = mme3 (acc5)
1313 createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_x);
1314 break;
1315 case 4: // acc2.xy = mme4 (acc6)
1316 createChSelForCtxSavRst(chSel, GED_SWIZZLE_x, GED_SWIZZLE_y);
1317 break;
1318 case 5: // acc2.yy = mme5 (acc7)
1319 createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_y);
1320 break;
1321 case 6: // acc2.zy = mme6 (acc8)
1322 createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_y);
1323 break;
1324 case 7: // acc2.wy = mme7 (acc9)
1325 createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_y);
1326 break;
1327 }
1328 } else {
1329 // normal Align16 that we are converting to Align1
1330 if (src.getRegion() != Region::SRC110 &&
1331 // supports legacy bits that may use <K;K,1> for "block"
1332 // access; this allows us to assemble/reassemble similar bits
1333 src.getRegion() != Region::SRC221 &&
1334 src.getRegion() != Region::SRC441 &&
1335 src.getRegion() != Region::SRC881 &&
1336 src.getRegion() != Region::SRCFF1)
1337 {
1338 fatalT("src", (int)S, ": unsupported region for "
1339 "translation to align16 encoding");
1340 return;
1341 }
1342 // TODO: we could permit SIMD4 with .x to mean broadcast read
1343 // of subreg 0, but I don't think any System Routine code uses
1344 // this.
1345 //
1346 // NOTE: technically we could convert
1347 // r13.0<0>.xxxx to r13.0<0;1,0>
1348 // r13.0<0>.yyyy to r13.1<0;1,0>
1349 // r13.0<0>.zzzz to r13.2<0;1,0>
1350 // r13.0<0>.wwww to r13.3<0;1,0>
1351 // Also be sure to handle stuff like:
1352 // r13.4<0>.zzzz (would be r13.7<0;1,0>)
1353 //
1354 // Let's wait until we need this though.
1355 }
1356 encodeSrcChanSel<S>(chSel[0], chSel[1], chSel[2], chSel[3]);
1357 } else { // Align1
1358 bool hasRgnWi = true;
1359 encodeSrcRegion<S>(src.getRegion(), hasRgnWi);
1360 }
1361 break;
1362 case Operand::Kind::MACRO:
1363 if (accessMode == GED_ACCESS_MODE_Align1) {
1364 encodeSrcRegion<S>(src.getRegion());
1365 } // else {align16 macros use the regioning bits, don't clobber them}
1366 break;
1367 default:
1368 break;
1369 }
1370 }
1371
encodeSendDirectDestination(const Operand & dst)1372 void Encoder::encodeSendDirectDestination(const Operand& dst)
1373 {
1374 if (platform() >= Platform::XE) {
1375 //auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1376 //GED_ENCODE(DstDataType, lowerDataType(t));
1377 GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1378 } else {
1379 auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1380 GED_ENCODE(DstDataType, lowerDataType(t));
1381
1382 //GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
1383 if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
1384 GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
1385 }
1386
1387 GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1388 // GED_ENCODE(DstSubRegNum,
1389 // SubRegToBinaryOffset(dst.getDirRegRef().subRegNum, RegName::GRF_R, dst.getType(), m_model.platform));
1390 }
1391 }
1392
encodeSendDestinationDataType(const Operand & dst)1393 void Encoder::encodeSendDestinationDataType(const Operand& dst)
1394 {
1395 if (platform() >= Platform::XE)
1396 return;
1397
1398 auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1399 GED_ENCODE(DstDataType, lowerDataType(t));
1400 }
1401
encodeSendDestination(const Operand & dst)1402 void Encoder::encodeSendDestination(const Operand& dst)
1403 {
1404 if (m_model.supportsUnarySend()) {
1405 switch (dst.getKind())
1406 {
1407 case Operand::Kind::DIRECT:
1408 GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1409 break;
1410 case Operand::Kind::INDIRECT:
1411 GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
1412 break;
1413 default:
1414 fatalT("dst: unsupported destination operand kind/addrMode "
1415 "(malformed IR)");
1416 return;
1417 }
1418 }
1419
1420 GED_ENCODE(DstRegFile,
1421 lowerRegFile(dst.getDirRegName()));
1422
1423 if (dst.getKind() == Operand::Kind::DIRECT) {
1424 encodeSendDirectDestination(dst);
1425 } else if (dst.getKind() == Operand::Kind::INDIRECT) {
1426 encodeSendDestinationDataType(dst);
1427 if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
1428 GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
1429 }
1430 GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
1431 GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
1432 }
1433 }
1434
encodeSendSource0(const Operand & src)1435 void Encoder::encodeSendSource0(const Operand& src)
1436 {
1437 if (m_model.supportsUnarySend()) {
1438 switch(src.getKind())
1439 {
1440 case Operand::Kind::DIRECT:
1441 GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
1442 break;
1443 case Operand::Kind::INDIRECT:
1444 GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
1445 break;
1446 default:
1447 fatalT("src0: unsupported source operand kind/addrMode "
1448 "(malformed IR)");
1449 return;
1450 break;
1451 }
1452 }
1453
1454 GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
1455 GED_ENCODE(Src0RegFile, gedRegFile);
1456
1457 auto t = src.getType() == Type::INVALID ? Type::UD : src.getType();
1458
1459 if (src.getKind() == Operand::Kind::DIRECT)
1460 {
1461 if (m_model.supportsUnifiedSend()){
1462 GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
1463 } else {
1464 GED_ENCODE(Src0DataType, lowerDataType(t));
1465 GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
1466 GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
1467 }
1468 }
1469 else if (src.getKind() == Operand::Kind::INDIRECT)
1470 {
1471 {
1472 GED_ENCODE(Src0DataType, lowerDataType(t));
1473 GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
1474 // For platform >= XeHPC, the ImmAddr is represented in Word Offset in bianry,
1475 // platform < XeHPC, the ImmAddr is represented in Byte Offset in bianry
1476 // And for all platforms, the ImmAddr is represented in Byet Offset in assembly
1477 if (platform() >= Platform::XE_HPC) {
1478 GED_ENCODE(Src0AddrImm, src.getIndImmAddr() / 2);
1479 } else {
1480 GED_ENCODE(Src0AddrImm, src.getIndImmAddr());
1481 }
1482 }
1483 }
1484 }
1485
1486 // The sends opCode exists on gen9+. There is no sends opcode on pre-gen9.
1487 // Starting from XE, send opcode can have two sources, so the sends opcode
1488 // is not needed.
1489
encodeSendsSource0(const Operand & src)1490 void Encoder::encodeSendsSource0(const Operand& src)
1491 {
1492 // "...for sends/sendsc instructions Src0.SrcMod, ... and Src0.SrcType are not used."
1493 // "Src0.RegFile[1], Src1.RegFile[1] are implicitly set to 0,
1494 // and Src0.RegFile[0] is implicitly set as 1 for sends/sendsc instructions."
1495 switch (src.getKind())
1496 {
1497 case Operand::Kind::DIRECT:
1498 GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
1499 break;
1500 case Operand::Kind::INDIRECT:
1501 GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
1502 break;
1503 default:
1504 fatalT("src0: unsupported source operand kind/addrMode (malformed IR)");
1505 return;
1506 break;
1507 }
1508
1509 if (src.getKind() == Operand::Kind::DIRECT)
1510 {
1511 GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
1512 GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
1513 }
1514 else if (src.getKind() == Operand::Kind::INDIRECT)
1515 {
1516 auto immAddr = src.getIndImmAddr();
1517 // For platforms >= XeHPC, ImmAddr is encoded as words,
1518 // platforms < XeHPC, ImmAddr is encoded as bytes
1519 // For all platforms, ImmAddr is represented in Byte Offset in syntax
1520 if (platform() >= Platform::XE_HPC) {
1521 immAddr /= 2;
1522 }
1523 GED_ENCODE(Src0AddrImm, immAddr);
1524 GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
1525 }
1526 }
1527
1528
encodeSendsSource1(const Operand & src)1529 void Encoder::encodeSendsSource1(const Operand& src)
1530 {
1531 //GED_ENCODE(Src1AddrMode, GED_ADDR_MODE_Direct);
1532 GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
1533 GED_ENCODE(Src1RegFile, gedRegFile);
1534 GED_ENCODE(Src1RegNum, src.getDirRegRef().regNum);
1535 }
1536
encodeSendsDestination(const Operand & dst)1537 void Encoder::encodeSendsDestination(const Operand& dst)
1538 {
1539 GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1540 GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
1541 // send types use :ud where possible
1542 auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1543 GED_ENCODE(DstDataType, lowerDataType(t));
1544
1545 //GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
1546 //GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst->getHz()));
1547
1548 GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1549 // TODO: set correct regType
1550 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1551 dst.getDirRegRef().subRegNum, RegName::GRF_R, dst.getType(), m_model.platform));
1552 }
1553
1554 template <SourceIndex S>
encodeTernarySourceAlign16(const Instruction & inst)1555 void Encoder::encodeTernarySourceAlign16(const Instruction& inst)
1556 {
1557 // PreCNL Align16
1558 // GRF-only
1559 encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
1560
1561 const Operand& src = inst.getSource(S);
1562
1563 if (inst.getOpSpec().supportsSourceModifiers()) {
1564 encodeSrcModifier<S>(src.getSrcModifier());
1565 }
1566
1567 // set the data type
1568 GED_DATA_TYPE gedType = lowerDataType(src.getType());
1569 if (S == SourceIndex::SRC0) {
1570 GED_ENCODE(SrcDataType, gedType);
1571 } else {
1572 const Operand &src0 = inst.getSource(SourceIndex::SRC0);
1573 bool src0IsFloating = src0.getType() == Type::F || src0.getType() == Type::HF;
1574 if (platform() >= Platform::GEN8LP && src0IsFloating) {
1575 bool srcNIsFloating = src.getType() == Type::F || src.getType() == Type::HF;
1576 if (src0IsFloating && srcNIsFloating) {
1577 encodeSrcType<S>(src.getType());
1578 } else {
1579 fatalT("src", (int)S, ": mixed types require :f and :hf "
1580 "(or vice versa)");
1581 return;
1582 }
1583 }
1584 }
1585
1586 if (!inst.isMacro()) {
1587 const Region& rgn = src.getRegion();
1588 const RegRef& reg = src.getDirRegRef();
1589 // Adjusting sub register when going from align1 to align16 representation.
1590 // in align 16 subregister is always 16 byte alligned, but we can play
1591 // with swizzle to access none aligned sub register
1592 uint16_t subRegNumber = reg.subRegNum;
1593 // mad (8) r46.0.xyzw:df r46.0.xyzw:df r50.0.xyzw:df r48.0.xyzw:df {Align16, Q1}
1594 // mad (2) r5.0.xy:df r5.0.xyxy:df r92.2.xyxy:df r93.0.xyxy:df {Align16, Q1, NoMask} // BDW,SKL
1595 if (S != SourceIndex::SRC2) {
1596 if (rgn == Region::SRC8X1 ||
1597 rgn == Region::SRC4X1 ||
1598 rgn == Region::SRC2X1) {
1599 encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
1600 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w);
1601 } else if (rgn == Region::SRC0X0) {
1602 if (src.getType() == Type::DF) {
1603 if (reg.subRegNum % 2 == 0) {
1604 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1605 } else {
1606 encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z, GED_SWIZZLE_w);
1607 subRegNumber -= 1;
1608 }
1609 } else {
1610 encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
1611 }
1612 } else {
1613 fatalT("src", (int)S, ": unsupported region for Align16 encoding");
1614 return;
1615 }
1616 } else {
1617 if (rgn == Region::SRCXX1) {
1618 encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
1619 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w);
1620 } else if (rgn == Region::SRCXX0) {
1621 if (src.getType() == Type::DF) {
1622 if (src.getDirRegRef().subRegNum % 2 == 0) {
1623 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1624 } else {
1625 encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z, GED_SWIZZLE_w);
1626 subRegNumber -= 1;
1627 }
1628 } else {
1629 encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
1630 }
1631 }
1632 else if (rgn == Region::SRC0X0 && src.getType() == Type::DF) {
1633 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1634 }
1635 else {
1636 fatalT("src", (int)S, ": unsupported region for Align16 encoding");
1637 return;
1638 }
1639 }
1640 uint32_t regNum = reg.regNum;
1641 encodeSrcReg<S>(RegName::GRF_R, (uint16_t)regNum);
1642 auto subReg = SubRegToBinaryOffset(subRegNumber, src.getDirRegName(), src.getType(), m_model.platform);
1643 encodeSrcSubRegNum<S>(subReg);
1644 } else {
1645 // implicit operand accumulator
1646 // e.g. madm (4) ... -r14.acc3
1647 encodeSrcReg<S>(RegName::GRF_R,src.getDirRegRef().regNum);
1648 encodeSrcMathMacroReg<S>(src.getMathMacroExt());
1649 }
1650 }
1651
encodeTernaryDestinationAlign16(const Instruction & inst)1652 void Encoder::encodeTernaryDestinationAlign16(const Instruction& inst)
1653 {
1654 const Operand& dst = inst.getDestination();
1655 if (inst.getOpSpec().supportsSaturation()) {
1656 GED_ENCODE(Saturate,
1657 lowerSaturate(dst.getDstModifier()));
1658 }
1659 GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
1660 if (dst.getDirRegName() != RegName::GRF_R) {
1661 fatalT("align16 ternary dst must be to GRF");
1662 return;
1663 }
1664
1665 // register / info (must be GRF)
1666 GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
1667 uint32_t regNum = dst.getDirRegRef().regNum;
1668 GED_ENCODE(DstRegNum, regNum);
1669 if (inst.isMacro()) {
1670 // macro only
1671 GED_DST_CHAN_EN chanEn = mathMacroRegToChEn(dst.getMathMacroExt());
1672 GED_ENCODE(DstChanEn, chanEn);
1673 } else {
1674 // Align16 instruction (we must convert from Align1)
1675 //
1676 // As long as the Align1 sequences are packed (.xyzw), this is
1677 // straightforward. However, "scalar" (braoadcast) sequences are
1678 // a bit harder as we must carefully choose the ChEn based on the
1679 // subregister that would be used in Align1
1680 // (See also Decoder::decodeDestinationTernaryAlign16)
1681 GED_DST_CHAN_EN chanEn = GED_DST_CHAN_EN_xyzw;
1682 auto reg = dst.getDirRegRef();
1683 if (inst.getExecSize() == ExecSize::SIMD1) {
1684 // SIMD1 MAD is not allowed, so MDF (and IGC) are generating use
1685 // SIMD4 and SIMD2 with specific channel masks to selectively
1686 // enable just the bottom channel.
1687 if (dst.getType() == Type::DF) {
1688 // For 64-bit types we use a mad (2) ...
1689 // Note, only :df is needed since :q and :uq are not supported
1690 //
1691 // e.g. mad (2) r5.0.xy:df ... {Align16, Q1, NoMask} //
1692 if (dst.getDirRegRef().subRegNum % 2 == 0) {
1693 chanEn = GED_DST_CHAN_EN_xy;
1694 } else {
1695 // e.g. mad (1) r5.1<1>:df
1696 // encodes as
1697 // mad (2) r5.0.zw:df
1698 // ^ SIMD2 and .zw (~= .1)
1699 chanEn = GED_DST_CHAN_EN_zw;
1700 reg.subRegNum -= 1;
1701 }
1702 } else {
1703 // 32-bit or 16-bit type (:hf). We use a SIMD4
1704 //
1705 // one channel enabled. E.g. we'll parse
1706 // mad (1|M0) r53.6<1>:f ...
1707 // and encode it as
1708 // mad (4) r53.4.z:f
1709 // ^ SIMD4 ^^^ aligned subreg .4.z == subreg .6:f Align1
1710 switch (reg.subRegNum % 4) {
1711 case 0: chanEn = GED_DST_CHAN_EN_x; break;
1712 case 1: chanEn = GED_DST_CHAN_EN_y; break;
1713 case 2: chanEn = GED_DST_CHAN_EN_z; break;
1714 case 3: chanEn = GED_DST_CHAN_EN_w; break;
1715 }
1716 // align the subregister
1717 reg.subRegNum -= (reg.subRegNum % 4);
1718 }
1719 }
1720 GED_ENCODE(DstChanEn, chanEn);
1721 GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1722 reg.subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1723 }
1724 }
1725
encodeDstReg(RegName regName,uint16_t regNum)1726 void Encoder::encodeDstReg(RegName regName, uint16_t regNum)
1727 {
1728 // encodes ARF or GRF
1729 uint32_t gedBits = translateRegNum(-1, regName, regNum);
1730 GED_ENCODE(DstRegNum, gedBits);
1731 }
1732
1733
encodeImmVal(const ImmVal & val,Type type)1734 void Encoder::encodeImmVal(const ImmVal &val, Type type) {
1735 GED_ENCODE(Imm, typeConvesionHelper(val, type));
1736 }
1737
1738 template <SourceIndex S>
encodeSrcRepCtrl(GED_REP_CTRL rep)1739 void Encoder::encodeSrcRepCtrl(GED_REP_CTRL rep)
1740 {
1741 if (S == SourceIndex::SRC0) {
1742 GED_ENCODE(Src0RepCtrl, rep);
1743 } else if (S == SourceIndex::SRC1) {
1744 GED_ENCODE(Src1RepCtrl, rep);
1745 } else {
1746 GED_ENCODE(Src2RepCtrl, rep);
1747 }
1748 }
1749
encodeSrcChanSel(GED_SWIZZLE chSelX,GED_SWIZZLE chSelY,GED_SWIZZLE chSelZ,GED_SWIZZLE chSelW)1750 template <SourceIndex S> void Encoder::encodeSrcChanSel(
1751 GED_SWIZZLE chSelX,
1752 GED_SWIZZLE chSelY,
1753 GED_SWIZZLE chSelZ,
1754 GED_SWIZZLE chSelW)
1755 {
1756 uint32_t chSelBits =
1757 createChanSel(chSelX, chSelY, chSelZ, chSelW);
1758 if (S == SourceIndex::SRC0) {
1759 GED_ENCODE(Src0ChanSel, chSelBits);
1760 } else if (S == SourceIndex::SRC1) {
1761 GED_ENCODE(Src1ChanSel, chSelBits);
1762 } else {
1763 GED_ENCODE(Src2ChanSel, chSelBits);
1764 }
1765 }
1766
translateRegNum(int opIx,RegName regName,uint16_t regNum)1767 uint32_t Encoder::translateRegNum(
1768 int opIx, RegName regName, uint16_t regNum)
1769 {
1770 uint8_t regNumBits = 0;
1771
1772 const char *whichOp =
1773 opIx == 0 ? "src0" :
1774 opIx == 1 ? "src1" :
1775 opIx == 2 ? "src2" :
1776 "dst";
1777
1778 const RegInfo *ri = m_model.lookupRegInfoByRegName(regName);
1779 if (ri == nullptr) {
1780 errorT(whichOp, ": invalid register name for this platform");
1781 } else if (!ri->isRegNumberValid((int)regNum)) {
1782 errorT(whichOp, ": ", ri->syntax, regNum, " number out of range");
1783 } else {
1784 ri->encode((int)regNum, regNumBits);
1785 }
1786 return regNumBits; // widen for GED
1787 }
1788
mathMacroRegToBits(int src,MathMacroExt implAcc)1789 uint32_t Encoder::mathMacroRegToBits(int src, MathMacroExt implAcc) {
1790 uint32_t bits = 8; // NOACC
1791 switch (implAcc) {
1792 /// or 00000000b (GEN11)
1793 case MathMacroExt::MME0: bits = 0; break; // 0000b
1794 case MathMacroExt::MME1: bits = 1; break;
1795 case MathMacroExt::MME2: bits = 2; break;
1796 case MathMacroExt::MME3: bits = 3; break;
1797 case MathMacroExt::MME4: bits = 4; break;
1798 case MathMacroExt::MME5: bits = 5; break;
1799 case MathMacroExt::MME6: bits = 6; break;
1800 case MathMacroExt::MME7: bits = 7; break;
1801 /// or 00008000b (GEN11)
1802 case MathMacroExt::NOMME: bits = 8; break; // 1000b
1803 default:
1804 if (src < 0) {
1805 fatalT("dst operand has invalid math macro register");
1806 } else {
1807 fatalT("src", src, " operand has invalid math macro register");
1808 }
1809 return bits;
1810 }
1811 return bits;
1812 }
mathMacroRegToChEn(MathMacroExt implAcc)1813 GED_DST_CHAN_EN Encoder::mathMacroRegToChEn(MathMacroExt implAcc) {
1814 GED_DST_CHAN_EN bits = GED_DST_CHAN_EN_w; // NOACC
1815 switch (implAcc) {
1816 case MathMacroExt::MME0: bits = GED_DST_CHAN_EN_None; break; // 0000b
1817 case MathMacroExt::MME1: bits = GED_DST_CHAN_EN_x; break;
1818 case MathMacroExt::MME2: bits = GED_DST_CHAN_EN_y; break;
1819 case MathMacroExt::MME3: bits = GED_DST_CHAN_EN_xy; break;
1820 case MathMacroExt::MME4: bits = GED_DST_CHAN_EN_z; break; // 0100b
1821 case MathMacroExt::MME5: bits = GED_DST_CHAN_EN_xz; break;
1822 case MathMacroExt::MME6: bits = GED_DST_CHAN_EN_yz; break;
1823 case MathMacroExt::MME7: bits = GED_DST_CHAN_EN_xyz; break;
1824 case MathMacroExt::NOMME: bits = GED_DST_CHAN_EN_w; break; // 1000b
1825 default: fatalT("operand has invalid math macro register");
1826 }
1827 return bits;
1828 }
1829
encodeOptionsThreadControl(const Instruction & inst)1830 void Encoder::encodeOptionsThreadControl(const Instruction& inst)
1831 {
1832 if (inst.hasInstOpt(InstOpt::NOPREEMPT)) {
1833 if (m_model.supportsNoPreempt()) {
1834 GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_NoPreempt);
1835 }
1836 else {
1837 warningT("NoPreempt not supported on this platform (dropping)");
1838 }
1839 }
1840 }
1841
encodeOptions(const Instruction & inst)1842 void Encoder::encodeOptions(const Instruction& inst)
1843 {
1844 GED_ENCODE(DebugCtrl,
1845 inst.hasInstOpt(InstOpt::BREAKPOINT) ?
1846 GED_DEBUG_CTRL_Breakpoint : GED_DEBUG_CTRL_Normal);
1847
1848 auto &os = inst.getOpSpec();
1849 if (os.supportsDepCtrl()) {
1850 if (inst.hasInstOpt(InstOpt::NODDCHK) &&
1851 !inst.hasInstOpt(InstOpt::NODDCLR))
1852 {
1853 GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDChk);
1854 }
1855 else if (!inst.hasInstOpt(InstOpt::NODDCHK) &&
1856 inst.hasInstOpt(InstOpt::NODDCLR))
1857 {
1858 GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr);
1859 }
1860 else if (inst.hasInstOpt(InstOpt::NODDCHK) &&
1861 inst.hasInstOpt(InstOpt::NODDCLR))
1862 {
1863 GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr_NoDDChk);
1864 }
1865 else if (!inst.getOpSpec().isSendOrSendsFamily() && inst.getOp() != Op::NOP)
1866 {
1867 GED_ENCODE(DepCtrl, GED_DEP_CTRL_Normal);
1868 }
1869 }
1870
1871 if (inst.hasInstOpt(InstOpt::ATOMIC))
1872 {
1873 GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Atomic);
1874 }
1875
1876 if (inst.hasInstOpt(InstOpt::SWITCH) && m_model.supportsHwDeps())
1877 {
1878 if (inst.getOp() == Op::NOP) {
1879 warningT("nop doesn't support Switch option (dropping)");
1880 } else {
1881 GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Switch);
1882 }
1883 }
1884 encodeOptionsThreadControl(inst);
1885
1886 if (!inst.hasInstOpt(InstOpt::ATOMIC) &&
1887 !inst.hasInstOpt(InstOpt::SWITCH) &&
1888 !inst.hasInstOpt(InstOpt::NOPREEMPT) &&
1889 !inst.getOpSpec().isSendOrSendsFamily() &&
1890 inst.getOp() != Op::NOP)
1891 {
1892 GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Normal);
1893 }
1894
1895 if (inst.hasInstOpt(InstOpt::NOSRCDEPSET))
1896 {
1897 GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_NoSrcDepSet);
1898 }
1899 else if (inst.getOpSpec().isSendOrSendsFamily() &&
1900 m_model.supportNoSrcDepSet())
1901 {
1902 GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_Normal);
1903 }
1904
1905 if (platform() >= Platform::XE && m_opcode != Op::ILLEGAL) {
1906 SWSB::InstType inst_type = inst.getSWSBInstType(m_opts.swsbEncodeMode);
1907 uint32_t swsbBinary = inst.getSWSB().encode(m_opts.swsbEncodeMode, inst_type);
1908 IGA_ASSERT(inst.getSWSB().verify(m_opts.swsbEncodeMode, inst_type),
1909 "INTERNAL ERROR: invalid SWSB (parser/IR-creator should have prevented this)");
1910
1911 GED_ENCODE(SWSB, swsbBinary);
1912 }
1913 }
1914
1915
patchJumpOffsets()1916 void Encoder::patchJumpOffsets()
1917 {
1918 for (JumpPatch &jp : m_needToPatch)
1919 {
1920 const Instruction *inst = jp.inst;
1921 IGA_ASSERT(
1922 inst->getOpSpec().isBranching() || inst->isMovWithLabel(),
1923 "patching non-control-flow/non-mov instruction");
1924
1925 // on some platforms jmpi os post-increment
1926 uint32_t jmpiExtraOffset = 0;
1927 bool isPostIncrementJmpi =
1928 inst->getOp() == Op::JMPI && !m_model.supportsSimplifiedBranches();
1929 if (isPostIncrementJmpi) {
1930 // jmpi is relative to the incremented PC, hence we must add
1931 // the size of the instruction here. jmpi probably will never
1932 // compact, but we'll be careful here
1933 jmpiExtraOffset = inst->hasInstOpt(InstOpt::COMPACTED) ? 8 : 16;
1934 IGA_ASSERT(inst->getSource(0).getKind() == Operand::Kind::LABEL,
1935 "patching non label op");
1936 // skip registers
1937 }
1938
1939 // calla and mov is an absolute offset
1940 uint32_t encodePC =
1941 (inst->getOpSpec().isJipAbsolute()) || (inst->getOp() == Op::MOV) ?
1942 0 : getEncodedPC(inst);
1943
1944 uint32_t jumpPC = 0;
1945 const Block *jipBlk = inst->getJIP();
1946 if (jipBlk == nullptr) {
1947 // immediate offset: we have to treat this as a relative offset
1948 jumpPC = inst->getSource(0).getImmediateValue().s32 + encodePC;
1949 } else if (!getBlockOffset(jipBlk, jumpPC)) {
1950 // For call, its target symbol may not be resolvable until in the
1951 // link stage when other kernels are available.
1952 if (inst->getOp() != Op::CALL && inst->getOp() != Op::CALLA) {
1953 fatalAtT(inst->getLoc(), "jip label invalid");
1954 }
1955 }
1956
1957 int32_t jip = jumpPC - encodePC - jmpiExtraOffset;
1958 // JIP and UIP are in QWORDS for most ops on PreBDW
1959 int32_t pcUnscale = arePcsInQWords(inst->getOpSpec()) ? 8 : 1;
1960
1961 if (inst->isMovWithLabel()) {
1962 // encode mov label
1963 GED_DATA_TYPE src0_ty = lowerDataType(inst->getSource(0).getType());
1964 GED_ENCODE_TO(Src0DataType, src0_ty, &jp.gedInst);
1965 GED_ENCODE_TO(Imm, jip, &jp.gedInst);
1966 } else {
1967 // encode other branch instructions
1968 GED_ENCODE_TO(JIP, jip / pcUnscale, &jp.gedInst);
1969 }
1970
1971 if (inst->getSourceCount() == 2 &&
1972 (inst->getOp() != Op::BRC || inst->getSource(1).isImm()))
1973 {
1974 // No need to set src1 regFile and type,
1975 // it will be over written by UIP
1976 const Block *uipBlk = inst->getUIP();
1977 if (uipBlk == nullptr) {
1978 jumpPC = inst->getSource(1).getImmediateValue().s32 + encodePC;
1979 } else if (!getBlockOffset(uipBlk, jumpPC)) {
1980 fatalAtT(inst->getLoc(), "uip label invalid");
1981 }
1982 encodePC = getEncodedPC(inst);
1983 int32_t uip = jumpPC - encodePC;
1984 GED_ENCODE_TO(UIP, uip/pcUnscale, &jp.gedInst);
1985 }
1986
1987 // re-encode branch
1988 START_GED_TIMER();
1989 GED_RETURN_VALUE status = GED_EncodeIns(&jp.gedInst,
1990 inst->hasInstOpt(InstOpt::COMPACTED) ?
1991 GED_INS_TYPE_COMPACT : GED_INS_TYPE_NATIVE,
1992 jp.bits);
1993 STOP_GED_TIMER();
1994 if (status != GED_RETURN_VALUE_SUCCESS) {
1995 fatalAtT(inst->getLoc(),
1996 "GED_EncodeIns failed: ", gedReturnValueToString(status));
1997 }
1998 }
1999 }
2000
2001
arePcsInQWords(const OpSpec & os) const2002 bool Encoder::arePcsInQWords(const OpSpec &os) const
2003 {
2004 // everything is in bytes except:
2005 // HSW calla, call, and jmpi
2006 return platform() < Platform::GEN8 &&
2007 os.op != Op::JMPI &&
2008 os.op != Op::CALL &&
2009 os.op != Op::CALLA;
2010 }
2011
2012
callNeedsSrc0Region221(const Instruction & inst) const2013 bool Encoder::callNeedsSrc0Region221(const Instruction &inst) const
2014 {
2015 // [call]: "Restriction: The src0 regioning control must be <2;2,1>"
2016 // [calla]: "Restriction: The src0 regioning control must be <2;2,1>"
2017 return (inst.getOp() == Op::CALL && platform() < Platform::GEN8) ||
2018 (inst.getOp() == Op::CALL && platform() == Platform::GEN9) ||
2019 (inst.getOp() == Op::CALLA && platform() <= Platform::GEN10);
2020 }
2021
callNeedsSrc0Region241(const Instruction & inst) const2022 bool Encoder::callNeedsSrc0Region241(const Instruction &inst) const
2023 {
2024 return (inst.getOp() == Op::CALL && platform() == Platform::GEN11);
2025 }
2026
encodeTernarySrcRegionVert(SourceIndex S,Region::Vert v)2027 void Encoder::encodeTernarySrcRegionVert(SourceIndex S, Region::Vert v) {
2028 if (S == SourceIndex::SRC0) {
2029 GED_ENCODE(Src0VertStride, lowerRegionVert(v));
2030 } else { // (S == SourceIndex::SRC1)
2031 GED_ENCODE(Src1VertStride, lowerRegionVert(v));
2032 } // S != SRC2 since ternary Align1 doesn't have bits for that
2033 }
2034
2035 // fixes stuff where GED just ignores or where it refuses to allow us to
2036 // set bits. This should be empty unless GED fixes are in flight.
applyGedWorkarounds(const Kernel &,size_t)2037 void Encoder::applyGedWorkarounds(
2038 const Kernel& /* k */, size_t /* bitsLen */)
2039 {
2040 // NOTE: there should be a GED raw bits setter (we can use this for
2041 // workarounds...)
2042 }
2043