1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Encoder.hpp"
10 #include "IGAToGEDTranslation.hpp"
11 #include "../../strings.hpp"
12 #include "../../Frontend/IRToString.hpp"
13 #include "../../IR/Kernel.hpp"
14 #include "../../IR/SWSBSetter.hpp"
15 #include "../../Models/Models.hpp"
16 #include "../../Timer/Timer.hpp"
17 
18 #include <cstring>
19 
20 using namespace iga;
21 
22 
23 
24 
gedReturnValueToString(GED_RETURN_VALUE rv)25 static const char *gedReturnValueToString(GED_RETURN_VALUE rv)
26 {
27     switch(rv) {
28     case GED_RETURN_VALUE_SUCCESS:              return "success";
29     case GED_RETURN_VALUE_CYCLIC_DEPENDENCY:    return "cyclic dependency";
30     case GED_RETURN_VALUE_NULL_POINTER:         return "null pointer";
31     case GED_RETURN_VALUE_OPCODE_NOT_SUPPORTED: return "unsupported opcode";
32     case GED_RETURN_VALUE_NO_COMPACT_FORM:      return "no compact form";
33     case GED_RETURN_VALUE_INVALID_FIELD:        return "invalid field";
34     case GED_RETURN_VALUE_INVALID_VALUE:        return "invalid value";
35     case GED_RETURN_VALUE_INVALID_INTERPRETER:  return "invalid interpreter";
36     default:                                    return "other error";
37     }
38 }
handleGedError(int line,const char * setter,GED_RETURN_VALUE status)39 void Encoder::handleGedError(
40     int line, const char *setter, GED_RETURN_VALUE status)
41 {
42     errorT("IGALibrary/GED/Encoder.cpp:", line, ": GED_Set", setter, ": ",
43         gedReturnValueToString(status));
44 }
45 
46 
47 
Encoder(const Model & model,ErrorHandler & errHandler,const EncoderOpts & opts)48 Encoder::Encoder(
49     const Model &model,
50     ErrorHandler &errHandler,
51     const EncoderOpts &opts)
52     : GEDBitProcessor(model, errHandler)
53     , m_opts(opts)
54     , m_numberInstructionsEncoded(0)
55     , m_mem(nullptr)
56 {
57     // derive the swsb encoding mode from platform if not set
58     if (opts.swsbEncodeMode == SWSB_ENCODE_MODE::SWSBInvalidMode) {
59         m_opts.swsbEncodeMode = model.getSWSBEncodeMode();
60     }
61 }
62 
encodeKernelPreProcess(Kernel & k)63 void Encoder::encodeKernelPreProcess(Kernel &k)
64 {
65     doEncodeKernelPreProcess(k);
66 }
67 
doEncodeKernelPreProcess(Kernel & k)68 void Encoder::doEncodeKernelPreProcess(Kernel &k)
69 {
70     if (m_opts.autoDepSet && platform() >= Platform::XE) {
71         SWSBAnalyzer swsbAnalyzer(
72             k, errorHandler(), m_opts.swsbEncodeMode, m_opts.sbidCount);
73         swsbAnalyzer.run();
74     }
75 }
76 
getElapsedTimeMicros(unsigned int idx)77 double Encoder::getElapsedTimeMicros(unsigned int idx)
78 {
79     return getIGATimerUS(idx);
80 }
81 
getElapsedTimeTicks(unsigned int idx)82 int64_t Encoder::getElapsedTimeTicks(unsigned int idx)
83 {
84     return getIGATimerTicks(idx);
85 }
86 
getTimerName(unsigned int idx)87 std::string Encoder::getTimerName(unsigned int idx)
88 {
89     return getIGATimerNames(idx);
90 }
91 
getNumInstructionsEncoded() const92 size_t Encoder::getNumInstructionsEncoded() const
93 {
94     return m_numberInstructionsEncoded;
95 }
96 
encodeKernel(Kernel & k,MemManager & mem,void * & bits,uint32_t & bitsLen)97 void Encoder::encodeKernel(
98     Kernel &k,
99     MemManager &mem,
100     void *&bits,
101     uint32_t &bitsLen)
102 {
103 #ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
104     try {
105 #endif
106         initIGATimer();
107         setIGAKernelName("test");
108         IGA_ASSERT(k.getModel().platform == platform(),
109             "kernel/encoder model mismatch");
110 
111         encodeKernelPreProcess(k);
112         m_needToPatch.clear();
113         m_mem = &mem;
114         m_numberInstructionsEncoded = k.getInstructionCount();
115         size_t allocLen = m_numberInstructionsEncoded * UNCOMPACTED_SIZE;
116         if (allocLen == 0) // for empty kernel case
117             allocLen = 4;
118         m_instBuf = (uint8_t *)mem.alloc(allocLen);
119         if (!m_instBuf) {
120             fatalAtT(0, "failed to allocate memory for kernel binary");
121             return;
122         }
123 
124         for (auto blk : k.getBlockList()) {
125             START_ENCODER_TIMER();
126             encodeBlock(blk);
127             STOP_ENCODER_TIMER();
128             if (hasFatalError()) {
129                 return;
130             }
131         }
132         START_ENCODER_TIMER();
133         patchJumpOffsets();
134         STOP_ENCODER_TIMER();
135 
136         // setting actual size
137         bitsLen = currentPc();
138         bits = m_instBuf;
139 
140         applyGedWorkarounds(k, currentPc());
141 
142         // clear any padding
143         memset(m_instBuf + bitsLen, 0, allocLen - bitsLen);
144 #ifndef IGA_DISABLE_ENCODER_EXCEPTIONS
145     } catch (const iga::FatalError&) {
146         // error is already reported
147     }
148 #endif
149 }
150 
encodeBlock(Block * blk)151 void Encoder::encodeBlock(Block *blk)
152 {
153     m_blockToOffsetMap[blk] = currentPc();
154     for (const auto inst : blk->getInstList()) {
155         setCurrInst(inst);
156         encodeInstruction(*inst);
157         if (hasFatalError()) {
158             return;
159         }
160         setEncodedPC(inst, currentPc());
161 
162         GED_RETURN_VALUE status = GED_RETURN_VALUE_SIZE;
163 
164         // If -Xforce-no-compact is set, do not compact any insruction
165         // Otherwise, if {NoCompact} is set, do not compact the instruction
166         // Otherwise, if {Copmacted} is set on the instructionm, try to compact it and throw error on fail
167         // Otherwise, if no compaction setting on the instruction, try to compact the instruction if -Xauto-compact
168         // Otherwise, do not compact the instruction
169         bool mustCompact = inst->hasInstOpt(InstOpt::COMPACTED);
170         bool mustNotCompact = inst->hasInstOpt(InstOpt::NOCOMPACT);
171         if (m_opts.forceNoCompact) {
172             mustCompact = false;
173             mustNotCompact = true;
174         }
175 
176         int32_t iLen = 16;
177         if (mustCompact || (!mustNotCompact && m_opts.autoCompact)) {
178             // try compact first
179             status = GED_EncodeIns(
180               &m_gedInst, GED_INS_TYPE_COMPACT, m_instBuf + currentPc());
181             if (status == GED_RETURN_VALUE_SUCCESS) {
182                 //If auto compation is turned on, in case we need to patch later.
183                 inst->addInstOpt(InstOpt::COMPACTED);
184                 iLen = 8;
185             } else if (status == GED_RETURN_VALUE_NO_COMPACT_FORM) {
186                 if (mustCompact) {
187                     if (m_opts.explicitCompactMissIsWarning) {
188                         warningAtT(inst->getLoc(), "GED unable to compact instruction");
189                     } else {
190                         errorAtT(inst->getLoc(), "GED unable to compact instruction");
191                     }
192                 }
193             } // else: some other error (unreachable?)
194         }
195 
196         // try native encoding if compaction failed
197         if (status != GED_RETURN_VALUE_SUCCESS) {
198             inst->removeInstOpt(InstOpt::COMPACTED);
199             status = GED_EncodeIns(
200               &m_gedInst, GED_INS_TYPE_NATIVE,  m_instBuf + currentPc());
201             if (status != GED_RETURN_VALUE_SUCCESS) {
202                 errorAtT(inst->getLoc(), "GED unable to encode instruction: ",
203                     gedReturnValueToString(status));
204             }
205         }
206 
207         advancePc(iLen);
208     }
209 }
210 
getBlockOffset(const Block * b,uint32_t & pc)211 bool Encoder::getBlockOffset(const Block *b, uint32_t &pc)
212 {
213     auto iter = m_blockToOffsetMap.find(b);
214     if (iter != m_blockToOffsetMap.end())
215     {
216         pc = iter->second;
217         return true;
218     }
219     return false;
220 }
221 
setEncodedPC(Instruction * inst,int32_t encodedPC)222 void Encoder::setEncodedPC(Instruction *inst, int32_t encodedPC)
223 {
224 #if 0
225     auto iter = m_instPcs.find(inst);
226     IGA_ASSERT(iter == m_instPcs.end(), "resetting encode PC");
227     ((void)iter); // dummy use where ASSERT_USER compiles out
228     m_instPcs[inst] = encodedPC;
229 #else
230     inst->setPC(encodedPC);
231 #endif
232 }
233 
getEncodedPC(const Instruction * inst) const234 int32_t Encoder::getEncodedPC(const Instruction *inst) const
235 {
236 #if 0
237     auto iter = m_instPcs.find(inst);
238     if (iter == m_instPcs.end()) {
239         IGA_ASSERT_FALSE("inst PC not found");
240         return 0;
241     }
242     return iter->second;
243 #else
244     return inst->getPC();
245 #endif
246 }
247 
encodeFC(const Instruction & i)248 void Encoder::encodeFC(const Instruction &i)
249 {
250     const OpSpec &os = i.getOpSpec();
251 
252     if (os.is(Op::MATH)) {
253         GED_MATH_FC mfc = lowerMathFC(i.getMathFc());
254         GED_ENCODE(MathFC, mfc);
255     } else if (os.is(Op::BFN)) {
256         GED_ENCODE(BfnFC, i.getBfnFc().value);
257     } else if (os.isDpasFamily()) {
258         auto sf = i.getDpasFc();
259         GED_ENCODE(SystolicDepth, GetDpasSystolicDepth(sf));
260         GED_ENCODE(RepeatCount, GetDpasRepeatCount(sf));
261     } else if (os.isSendOrSendsFamily()) {
262         if (platform() >= Platform::XE) {
263             // on earlier platforms this is stowed in ExDesc
264             auto sfid = lowerSFID(i.getSendFc());
265             GED_ENCODE(SFID, sfid);
266         }
267     } else if (os.is(Op::SYNC)) {
268         GED_SYNC_FC wfc = lowerSyncFC(i.getSyncFc());
269         GED_ENCODE(SyncFC, wfc);
270     } else if (os.supportsBranchCtrl()) {
271         GED_ENCODE(BranchCtrl,
272             lowerBranchCntrl(i.getBranchCtrl()));
273     } else if (os.supportsSubfunction()) {
274         IGA_ASSERT_FALSE("encoder needs to encode subfunction for this op");
275     }
276 }
277 
encodeInstruction(Instruction & inst)278 void Encoder::encodeInstruction(Instruction& inst)
279 {
280     m_opcode = inst.getOp();
281     const auto gedPlat = lowerPlatform(platform());
282     const auto gedOp = lowerOpcode(m_opcode);
283     GED_RETURN_VALUE status = GED_InitEmptyIns(
284         gedPlat,
285         &m_gedInst,
286         gedOp);
287     if (status != GED_RETURN_VALUE_SUCCESS) {
288         fatalAtT(inst.getLoc(), "GED failed to create instruction template");
289         return;
290     }
291 
292     if (m_opcode == Op::ILLEGAL) {
293         // GED does all the work for this instruction
294         return;
295     } else if (m_opcode == Op::NOP) {
296         // nop supports {Breakpoint}
297         encodeOptions(inst);
298         return;
299     }
300     const OpSpec &os = inst.getOpSpec();
301 
302     // Dwindling cases where we must use Align16
303     // Pre-GEN10 ternary ops are all align16
304     bool isTernary = platform() < Platform::GEN10 && os.isTernary();
305     bool contextSaveRestoreNeedsAlign16 =
306         isAlign16MathMacroRegisterCsrOperand(inst.getDestination()) ||
307         isAlign16MathMacroRegisterCsrOperand(inst.getSource(0));
308     // IEEE macro instructions (math.invm and math.rsqrtm)
309     bool align16MacroInst = m_model.supportsAlign16MacroInst() && inst.isMacro();
310     m_encodeAlign16 = isTernary || contextSaveRestoreNeedsAlign16 || align16MacroInst;
311     GED_ACCESS_MODE accessMode = m_encodeAlign16 ?
312         GED_ACCESS_MODE_Align16 : GED_ACCESS_MODE_Align1;
313     if (m_model.supportsAccessMode()) {
314         GED_ENCODE(AccessMode, accessMode);
315     } // else GED will crash given this call (even given Align1)
316 
317     ExecSize execSize = inst.getExecSize();
318     if (os.isTernary() &&
319         m_model.supportsAlign16Ternary() &&
320         inst.getExecSize() == ExecSize::SIMD1)
321     {
322         // scalar ternary workaround for Align16
323         // (c.f. Encoder::encodeTernaryDestinationAlign16)
324         execSize = inst.getDestination().getType() == Type::DF ?
325             ExecSize::SIMD2 : ExecSize::SIMD4;
326     }
327     GED_ENCODE(ExecSize, lowerExecSize(execSize));
328 
329     encodeFC(inst);
330 
331     if (os.supportsQtrCtrl()) {
332         // use ExecSize from above since it may  have been modified
333         GED_CHANNEL_OFFSET qtrCtrl = lowerQtrCtrl(inst.getChannelOffset());
334         GED_ENCODE(ChannelOffset, qtrCtrl);
335     }
336 
337     GED_ENCODE(MaskCtrl, lowerEmask(inst.getMaskCtrl()));
338 
339     // Predicate
340     const Predication &pred = inst.getPredication();
341     if (os.supportsPredication()) {
342         GED_ENCODE(PredCtrl, lowerPredCtrl(pred.function));
343     } else {
344         GED_ENCODE(PredCtrl, GED_PRED_CTRL_Normal);
345     }
346 
347     bool isImm64Src0Overlap =
348         platform() >= Platform::XE &&
349         inst.getSource(0).getKind() == Operand::Kind::IMMEDIATE &&
350         TypeIs64b(inst.getSource(0).getType());
351 
352     if (!isImm64Src0Overlap && inst.getOpSpec().supportsFlagModifier()) {
353         if (os.op == Op::BFN) {
354             switch (inst.getFlagModifier()) {
355             case FlagModifier::NONE:
356             case FlagModifier::EQ:
357             case FlagModifier::GT:
358             case FlagModifier::LT:
359                 // GED does the special mapping to CondMod2
360                 // only a subset of cond modifiers are supported on this op
361                 GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
362                 break;
363             default:
364                 errorT("this instruction format only supports "
365                     "(eq), (gt), and (lt) conditional modifiers");
366             }
367         } else {
368             GED_ENCODE(CondModifier, lowerCondModifier(inst.getFlagModifier()));
369         }
370     }
371 
372     bool hasFlagRegField = true;
373     // For >= XE_HPC, Some fields only exist when having CondCtrl or PredCtrl:
374     // PredInv, FlagRegNum, FlagSubRegNum
375     // In GED, either CondCtrl or PredCtrl have to be set to non-zero before
376     // these fields can be set
377     if (platform() >= Platform::XE_HPC) {
378         hasFlagRegField = (inst.getFlagModifier() != FlagModifier::NONE) ||
379                           (pred.function != PredCtrl::NONE) ||
380                           inst.isBranching();
381     }
382 
383     if (os.supportsPredication() && hasFlagRegField)
384         GED_ENCODE(PredInv, pred.inverse ? GED_PRED_INV_Invert : GED_PRED_INV_Normal);
385 
386     // GED_ExecutionDataType
387     RegRef flagReg = inst.getFlagReg();
388     if (hasFlagRegField && (flagReg != REGREF_INVALID)) {
389         GED_ENCODE(FlagRegNum, static_cast<uint32_t>(inst.getFlagReg().regNum));
390         GED_ENCODE(FlagSubRegNum, inst.getFlagReg().subRegNum);
391     }
392 
393     // set AccWrEn where supported
394     if (inst.hasInstOpt(InstOpt::ACCWREN)) {
395         GED_ENCODE(AccWrCtrl, GED_ACC_WR_CTRL_AccWrEn);
396     }
397 
398     if (os.isBranching()) {
399         if (m_model.supportsSimplifiedBranches()) {
400             encodeBranchingInstructionSimplified(inst);
401         } else {
402             encodeBranchingInstruction(inst);
403         }
404         // options encoded internally
405     } else if (os.isTernary()) {
406         encodeTernaryInstruction(inst, accessMode);
407     } else if (os.isSendOrSendsFamily()) {
408         encodeSendInstruction(inst);
409     } else if (os.is(Op::SYNC)) {
410         encodeSyncInstruction(inst);
411     } else {
412         encodeBasicInstruction(inst, accessMode);
413     }
414 
415     if (!hasFatalError()) {
416         encodeOptions(inst);
417 
418         // setup for back patching on branching ops
419         if (os.isBranching() || inst.isMovWithLabel()) {
420             bool src0IsLabel = inst.getSource(0).isImm();
421             bool src1IsLabel = inst.getSourceCount() > 1 && inst.getSource(1).isImm();
422             if (src0IsLabel || src1IsLabel) {
423                 m_needToPatch.emplace_back(&inst, m_gedInst, m_instBuf + currentPc());
424                 // Force not to compact label instructions to avoid the compaction error
425                 // when auto-compaction is enabled. We could set this inst to compactable during
426                 // Encoder::encodeBlock when the value is unknown (and assume to be 0). But we can
427                 // only compact imm values use up to 12 bits.
428                 inst.addInstOpt(InstOpt::NOCOMPACT);
429             }
430         }
431     }
432 }
433 
encodeBasicInstruction(const Instruction & inst,GED_ACCESS_MODE accessMode)434 void Encoder::encodeBasicInstruction(
435     const Instruction& inst,
436     GED_ACCESS_MODE accessMode)
437 {
438     const OpSpec& os = inst.getOpSpec();
439     if (os.supportsDestination()) {
440         encodeBasicDestination(inst, inst.getDestination(), accessMode);
441     } else if (os.op == Op::WAIT ) {
442         // wait has an implicit destination (same as first source)
443         // but with dst region of <1>
444         Operand copy(inst.getSource(0));
445         copy.setRegion(Region::DST1);
446         encodeBasicDestination(inst, copy);
447     }
448 
449     switch (inst.getSourceCount())
450     {
451     case 2:
452         encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(1), accessMode);
453         // vvvv fall through vvvv
454     case 1:
455         encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0), accessMode);
456     }
457 }
458 
encodeTernaryDestinationAlign1(const Instruction & inst)459 void Encoder::encodeTernaryDestinationAlign1(const Instruction& inst)
460 {
461     const Operand& dst = inst.getDestination();
462 
463     if (inst.getOpSpec().supportsSaturation()) {
464         GED_ENCODE(Saturate, lowerSaturate(dst.getDstModifier()));
465     }
466     GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
467     GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
468     encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
469 
470     if (inst.isMacro()) {
471         GED_ENCODE(DstMathMacroExt, lowerMathMacroReg(dst.getMathMacroExt()));
472         // GED_ENCODE(DstHorzStride, 1);
473     } else {
474         GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
475             dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
476         bool hasDstRgnHz = true;
477         // dpas does not have a dst region
478         hasDstRgnHz = !inst.getOpSpec().isDpasFamily();
479         if (hasDstRgnHz) {
480             GED_ENCODE(DstHorzStride, static_cast<int>(dst.getRegion().getHz()));
481         }
482     }
483 }
484 
485 template <SourceIndex S>
encodeTernarySourceAlign1(const Instruction & inst)486 void Encoder::encodeTernarySourceAlign1(const Instruction& inst)
487 {
488     // CNL+ align1 ternary
489     if (platform() < Platform::GEN10) {
490         fatalT("src", (int)S, ": align1 ternary is not supported on this "
491             "platform");
492         return;
493     }
494 
495     const Operand& src = inst.getSource(S);
496     Type srcType = src.getType();
497     // DPAS
498     if (inst.getOpSpec().isDpasFamily()) {
499         // src0's type is the type for all sources
500 
501         if (S == SourceIndex::SRC0) {
502             GED_ENCODE(Src0DataType, lowerDataType(srcType));
503             // GED: src0 HS = 0, VS=3
504         } else if (S == SourceIndex::SRC1) {
505             GED_ENCODE(Src1Precision, lowerSubBytePrecision(srcType));
506             // GED sets both the type and the precision at the same time for us
507             // GED: src1 HS = 1, VS=3
508             // via this higher-level API
509         } else if (S == SourceIndex::SRC2) {
510             GED_ENCODE(Src2Precision, lowerSubBytePrecision(srcType));
511             // GED: src2 HS = 3
512             // GED sets both the type and the precision at the same time for us
513             // via this higher-level API
514         }
515         encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
516         encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
517         encodeSrcSubRegNum<S>(SubRegToBinaryOffset(
518             src.getDirRegRef().subRegNum, src.getDirRegName(), srcType, m_model.platform));
519 
520         return;
521     }
522 
523     // GED will catch any mismatch between float and int (illegal mixed mode)
524     encodeSrcType<S>(srcType); // GED dependency requires type before reg file
525 
526     switch (src.getKind()) {
527     case Operand::Kind::DIRECT:
528     case Operand::Kind::MACRO: {
529         encodeSrcRegFile<S>(lowerRegFile(src.getDirRegName()));
530 
531         if (platform() <= Platform::GEN11) {
532             encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
533         }
534 
535         // source modifiers
536         if (inst.getOpSpec().supportsSourceModifiers()) {
537             encodeSrcModifier<S>(src.getSrcModifier());
538         }
539 
540         // regioning
541         //
542         // ternary align1 puts SpcAcc into subreg, so regions may be set
543         // in all cases
544         auto rgn = src.getRegion();
545         // * madm doesn't have a region in GEN9 ...
546         //   it does in GEN10+, but we haven't supported it in syntax yet
547         //   and leave it to GED to set it
548         // * src0 and src1 only has <w;h>, src2 only has <h>
549         bool hasSrcRgnHz = !inst.isMacro();
550         bool hasSrcRgnVt = !inst.isMacro() && S < SourceIndex::SRC2;
551         if (hasSrcRgnHz) {
552             encodeSrcRegionHorz<S>(rgn.getHz());
553         }
554         if (hasSrcRgnVt) {
555             encodeTernarySrcRegionVert(S, rgn.getVt());
556         }
557         // register and subregister
558         encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
559         if (inst.isMacro()) {
560             if (platform() < Platform::GEN11) {
561                 fatalT("src", (int)S, ": math macro operands require Align16");
562                 return;
563             }
564             encodeSrcMathMacroReg<S>(src.getMathMacroExt());
565             if (S != SourceIndex::SRC2) {
566                 encodeTernarySrcRegionVert(S, Region::Vert::VT_4);
567             }
568             encodeSrcRegionHorz<S>(Region::Horz::HZ_1);
569 
570         } else {
571             auto subReg = SubRegToBinaryOffset(
572                 src.getDirRegRef().subRegNum, src.getDirRegName(), src.getType(), m_model.platform);
573             encodeSrcSubRegNum<S>(subReg);
574         }
575         break;
576     }
577     case Operand::Kind::IMMEDIATE:
578         if (S == SourceIndex::SRC1) {
579             fatalT("src1: immediate operand in ternary align1 must be "
580                 "src0 or src2");
581             return;
582         }
583         encodeSrcRegFile<S>(GED_REG_FILE_IMM);
584         if (platform() < Platform::GEN10) {
585             encodeImmVal(src.getImmediateValue(), src.getType());
586         } else {
587             encodeTernaryImmVal<S>(src.getImmediateValue(), src.getType());
588         }
589         break;
590     default:
591         fatalT("src", (int)S, ": invalid operand kind");
592         return;
593     }
594 }
595 
596 
encodeTernaryInstruction(const Instruction & inst,GED_ACCESS_MODE accessMode)597 void Encoder::encodeTernaryInstruction(
598     const Instruction& inst,
599     GED_ACCESS_MODE accessMode)
600 {
601     if (accessMode == GED_ACCESS_MODE_Align1) {
602         encodeTernaryAlign1Instruction(inst);
603     } else {
604         encodeTernaryAlign16Instruction(inst);
605     }
606 }
encodeTernaryAlign16Instruction(const Instruction & inst)607 void Encoder::encodeTernaryAlign16Instruction(const Instruction& inst)
608 {
609     if (inst.getOpSpec().supportsDestination()) {
610         encodeTernaryDestinationAlign16(inst);
611     }
612     encodeTernarySourceAlign16<SourceIndex::SRC0>(inst);
613     encodeTernarySourceAlign16<SourceIndex::SRC1>(inst);
614     encodeTernarySourceAlign16<SourceIndex::SRC2>(inst);
615 }
encodeTernaryAlign1Instruction(const Instruction & inst)616 void Encoder::encodeTernaryAlign1Instruction(const Instruction& inst)
617 {
618     // set ExecutionDataType (integral or floating)
619     // the operands must be part of the same type set
620     Type src0Type = inst.getSource(0).getType();
621     GED_EXECUTION_DATA_TYPE execDataType;
622     if (isTernaryAlign1Floating(src0Type)) {
623         execDataType = GED_EXECUTION_DATA_TYPE_Float;
624     } else if (isTernaryAlign1Integral(src0Type)) {
625         execDataType = GED_EXECUTION_DATA_TYPE_Integer;
626     } else {
627         fatalT("src0: unsupported type for ternary align1 encoding");
628         return;
629     }
630     GED_ENCODE(ExecutionDataType, execDataType);
631 
632     if (inst.getOpSpec().supportsDestination()) {
633         encodeTernaryDestinationAlign1(inst);
634     }
635     encodeTernarySourceAlign1<SourceIndex::SRC0>(inst);
636     encodeTernarySourceAlign1<SourceIndex::SRC1>(inst);
637     encodeTernarySourceAlign1<SourceIndex::SRC2>(inst);
638 }
639 
encodeBranchingInstruction(const Instruction & inst)640 void Encoder::encodeBranchingInstruction(const Instruction& inst)
641 {
642     // the destination stride is always 1 for all control flow
643     GED_ENCODE(DstHorzStride, 1);
644 
645     // control flow instructions require patching later if any operand is a label
646     bool src0IsLabel = inst.getSource(0).getKind() == Operand::Kind::LABEL;
647 
648     // break up instructions into various classes
649     //   - stuff with implicit operands: jmpi
650     //   - stuff that can take register operands: call, calla, return
651     //   - everything else: if, else, while, ..., goto, join, ...
652     if (m_opcode == Op::JMPI)
653     {
654         // jmpi encodes the syntax
655         //   jmpi (1) LABEL
656         //   jmpi (1) reg32
657         // as
658         //   jmpi (1) ip ip LABEL
659         //   jmpi (1) ip ip reg32
660         //
661         // "Restriction: The index data type must be D (Signed DWord Integer)."
662         //
663         // implicit IP ...
664         encodeBasicDestination(inst, Operand::DST_REG_IP_UD);
665         encodeBasicSource<SourceIndex::SRC0>(inst, Operand::SRC_REG_IP_UD);
666         GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
667         if (src0IsLabel) {
668             // jmpi (1) LABEL   (encodes into Src1)
669             GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
670         } else {
671             // jmpi (1) reg32   (encodes into Src1)
672             encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
673         }
674     }
675     else if (m_opcode == Op::CALL ||
676         m_opcode == Op::CALLA ||
677         m_opcode == Op::RET)
678     {
679         // e.g. call, calla, ret
680         //   call  (..)   imm32
681         //   call  (..)   reg32
682         //   calla (..)   imm32
683         //   ret   (...)  reg32       => encodes as ret (...) null reg
684         //
685         if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
686             encodeBasicDestination(inst, inst.getDestination());
687         } else if (m_opcode == Op::RET) {
688             encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);
689             encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
690         }
691 
692         if (m_opcode == Op::CALL || m_opcode == Op::CALLA) {
693             if (src0IsLabel) {
694                 // op == CALL (since it's a label), hence we have
695                 // call (..) imm32 => which uses src1
696                 GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
697                 GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
698             } else {
699                 // call  (..) reg32
700                 // calla (..) imm32
701                 encodeBasicSource<SourceIndex::SRC1>(inst, inst.getSource(0));
702             }
703 
704             // <2;2,1> restriction for CALL and CALLA restriction is only for
705             // IVB+HSW, but simulator has it until CNL.  So we have to support it
706             // until we get CNL HW validation moves to it
707             if (callNeedsSrc0Region221(inst)) {
708                 GED_ENCODE(Src0VertStride, 2);
709                 GED_ENCODE(Src0Width,      2);
710                 GED_ENCODE(Src0HorzStride, 1);
711             }
712             // though it's not state in the spec, ICL requires src0 region be set to <2;4,1>
713             else if (callNeedsSrc0Region241(inst)) {
714                 GED_ENCODE(Src0VertStride, 2);
715                 GED_ENCODE(Src0Width,      4);
716                 GED_ENCODE(Src0HorzStride, 1);
717             }
718         }
719     } else if (m_opcode == Op::BRD || m_opcode == Op::BRC) {
720         // [brd/brc]: The ip register must be used (for example, by the assembler) as dst.
721         encodeBasicDestination(inst, Operand::DST_REG_IP_D);
722         if (!src0IsLabel) {
723             encodeBasicSource<SourceIndex::SRC0>(inst, inst.getSource(0));
724         }
725         GED_DATA_TYPE ty =
726             platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
727         GED_ENCODE(Src0RegFile,
728             src0IsLabel ? GED_REG_FILE_IMM : GED_REG_FILE_GRF);
729         GED_ENCODE(Src0DataType, ty);
730         // GED automatically sets?
731         // if (m_opcode == Op::BRC && src0IsLabel) {
732         //    GED_ENCODE(Src1DataType, ty);
733         // }
734         // if (m_opcode == Op::BRD && m_inst->getOpSpec().hasImplicitSrcRegion(0)) {
735         //     encodeSrcRegion(SourceIndex::SRC0,
736         //         m_inst->getOpSpec().implicitSrcRegion(0));
737         // }
738     } else {
739         // regular control flow that only accepts immediate values
740         // e.g. if, else, endif, while, cont, break, goto, join, halt
741 
742         // Apparently, the implicit destination on these instructions
743         // is null instead of ip (unlike jmpi etc)
744         // destination is ip<1>:ud
745         //
746         // encodeDestination(&Operand::DST_IP);
747         encodeBasicDestination(inst, Operand::DST_REG_NULL_UD);
748 
749         //UIP
750         if (m_opcode != Op::ENDIF &&
751             m_opcode != Op::WHILE &&
752             m_opcode != Op::JOIN)
753         {
754             // if/else/halt/brk/cont.... all require :d on operands
755             GED_DATA_TYPE ty =
756                 platform() < Platform::GEN8 ? GED_DATA_TYPE_w : GED_DATA_TYPE_d;
757             GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
758             GED_ENCODE(Src0DataType, ty);
759         }
760         //before XE don't need to set JIP for control flow instructions that have UIP
761         //JIP
762         if (m_opcode == Op::WHILE ||
763             m_opcode == Op::ENDIF ||
764             m_opcode == Op::JOIN)
765         {
766             GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
767             GED_ENCODE(Src1DataType, GED_DATA_TYPE_d);
768         }
769     }
770 }
771 
encodeBranchingInstructionSimplified(const Instruction & inst)772 void Encoder::encodeBranchingInstructionSimplified(const Instruction& inst)
773 {
774     const OpSpec& os = inst.getOpSpec();
775 
776     // set branch control
777     if (os.supportsBranchCtrl()) {
778         GED_ENCODE(BranchCtrl, lowerBranchCntrl(inst.getBranchCtrl()));
779     }
780 
781     // control flow instructions require patching later if any operand is a label
782     const Operand& src0 = inst.getSource(0);
783     bool src0IsLabel = src0.getKind() == Operand::Kind::LABEL;
784 
785     // for jmpi HW will take care of IP so don't need to encode it for dst/src0
786     if (inst.getOpSpec().supportsDestination()) {
787         encodeBranchDestination(inst.getDestination());
788     } else {
789         encodeBranchDestination(Operand::DST_REG_NULL_UD);
790     }
791     // regualar control flow that only accepts immediate values
792     // e.g. if, else, endif, while, cont, break, goto, join, halt
793 
794     // encoding JIP
795     if (src0IsLabel) {
796         GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
797         // if (src0.getTargetBlock() == nullptr) {
798         //    // the input value is immediate; use m_immVal as the value
799         //    encodeBranchSource(src0);
800         // }
801     } else {
802         // jmpi, call, brc, ...
803         if (src0.getKind() == Operand::Kind::INDIRECT)
804             errorT("branch instructions forbid indirect register mode");
805         encodeBranchSource(src0);
806     }
807 
808     if (inst.getSourceCount() == 2) {
809         // encoding UIP always IMM except for brc with a register argument
810         if (inst.getOp() != Op::BRC || src0.isImm()) {
811             GED_ENCODE(Src1RegFile, GED_REG_FILE_IMM);
812         }
813     }
814 }
815 
encodeSendInstruction(const Instruction & i)816 void Encoder::encodeSendInstruction(const Instruction& i)
817 {
818     ////////////////////////////////////////////
819     // send operands
820     const OpSpec& os = i.getOpSpec();
821     if (os.isSendFamily()) {
822         encodeSendDestination(i.getDestination());
823         encodeSendSource0(i.getSource(0));
824         if (m_model.supportsUnifiedSend()) {
825             encodeSendsSource1(i.getSource(1));
826         }
827     } else if (os.isSendsFamily()) {
828         encodeSendDestination(i.getDestination());
829         encodeSendsSource0(i.getSource(0));
830         encodeSendsSource1(i.getSource(1));
831     }
832 
833     ////////////////////////////////////////////
834     // send descriptors and other gunk
835     encodeSendDescs(i);
836 
837     ////////////////////////////////////////////
838     // send options
839 
840     // FusionCtrl is removed from XeHPC+
841     bool hasFusion =
842         platform() >= Platform::XE && platform() < Platform::XE_HPC;
843     if (hasFusion) {
844         GED_ENCODE(FusionCtrl,
845             i.hasInstOpt(InstOpt::SERIALIZE) ?
846                 GED_FUSION_CTRL_Serialized : GED_FUSION_CTRL_Normal);
847     }
848 
849     if (i.hasInstOpt(InstOpt::EOT)) {
850         GED_ENCODE(EOT, GED_EOT_EOT);
851     }
852 } //end: encodeSendInstruction
853 
854 
encodeSendDescs(const Instruction & i)855 void Encoder::encodeSendDescs(const Instruction& i)
856 {
857     if (platform() < Platform::XE) {
858         encodeSendDescsPreXe(i);
859     } else if (platform() == Platform::XE) {
860         encodeSendDescsXe(i);
861     } else if (platform() == Platform::XE_HP) {
862         encodeSendDescsXeHP(i);
863     } else if (platform() == Platform::XE_HPG ||
864         platform() == Platform::XE_HPC)
865     {
866         encodeSendDescsXeHPG(i);
867     } else {
868         errorT("unsupported platform");
869     }
870 
871     bool noEOTinExDesc = m_model.supportsUnifiedSend();
872     if (noEOTinExDesc &&
873         i.getExtMsgDescriptor().isImm() &&
874         (i.getExtMsgDescriptor().imm & 1 << 5))
875         errorT("Encoder: Send exDesc[5] must not be set (the legacy EOT bit)");
876 }
877 
encodeSendDescsPreXe(const Instruction & i)878 void Encoder::encodeSendDescsPreXe(const Instruction& i)
879 {
880     SendDesc exDesc = i.getExtMsgDescriptor();
881     const OpSpec& os = i.getOpSpec();
882     if (exDesc.isReg()) {
883         if (os.isSendFamily()) {
884             errorT("unary send forbids register ExDesc");
885         }
886         GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
887         GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
888     } else {
889         GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
890         GED_ENCODE(ExMsgDesc, exDesc.imm);
891     }
892 
893     SendDesc desc = i.getMsgDescriptor();
894     if (desc.isReg()) {
895         if (platform() == Platform::GEN9) {
896             uint32_t msgDescriptor = 0;
897             // There is a HW bug on SKL where HW will only copy bits 0-28 from
898             // the address register (descriptor register) and will miss bit 30
899             // of the descriptor.  Hence, even in the case of an register
900             // descriptor we must program bit 30 as immediate (it will be
901             // taken from the encoding and OR'd in correctly)
902             //
903             // E.g. (old syntax)
904             //   sends (8) r74:hf r16 r73 0x42:ud a0.0 {Align1, Q1, NoMask}
905             //       // sampler, resLen=3, msgLen=1, extMsgLen=1
906             // On SKL, HW will copy bits 29-31 from the actual immediate
907             // descriptor bits.  Hence, we must set immediate descriptor
908             // bit 30 even in the case of a register descriptor. (For SKL).
909             //
910             // For 3D sampler bit 30 indicates HF/F return format.
911             // For render target write bit 30 indicates HF/F input...
912             // Thankfully for SKL the 3D sampler doesn't support HF input.
913             // For CNL it does, and that will be bit 29.
914             // But this bug should be fixed in CNL.
915             if (platform() == Platform::GEN9 && desc.isReg()) {
916                 if (i.getDestination().getType() == Type::HF ||
917                     i.getSource(0).getType() == Type::HF)
918                 {
919                     msgDescriptor |= (1 << 30);
920                 }
921             }
922             GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
923             GED_ENCODE(MsgDesc, msgDescriptor);
924         }
925         GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
926         uint8_t regNumBits;
927         const RegInfo *ri = m_model.lookupRegInfoByRegName(RegName::ARF_A);
928         IGA_ASSERT(ri, "failed to find a0 register");
929         ri->encode((int)desc.reg.regNum, regNumBits);
930         GED_ENCODE(DescRegNum, regNumBits);
931     } else if (desc.isImm()) {
932         GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
933         GED_ENCODE(MsgDesc, desc.imm);
934     }
935 }
encodeSendDescsXe(const Instruction & i)936 void Encoder::encodeSendDescsXe(const Instruction& i)
937 {
938     SendDesc exDesc = i.getExtMsgDescriptor();
939     if (exDesc.isReg()) {
940         GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
941         GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
942     } else {
943         GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
944         GED_ENCODE(ExMsgDesc, exDesc.imm);
945     }
946 
947     SendDesc desc = i.getMsgDescriptor();
948     if (desc.isReg()) {
949         GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
950         // a0.0 is implied (there's no field)
951         if (desc.reg.subRegNum != 0) {
952             errorT("send with reg desc must be a0.0");
953         }
954     } else {
955         GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
956         GED_ENCODE(MsgDesc, desc.imm);
957     }
958 }
959 
960 // A bit harder than Xe
961 //   * If ExBSO is set then Src1Length holds xlen
962 //   * CPS has it's own field (ExDesc[11]) only if ExDesc.IsReg
encodeSendDescsXeHP(const Instruction & i)963 void Encoder::encodeSendDescsXeHP(const Instruction& i)
964 {
965     SendDesc exDesc = i.getExtMsgDescriptor();
966     if (exDesc.isReg()) {
967         GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
968         GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
969         GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
970         if (i.hasInstOpt(InstOpt::EXBSO)) {
971             GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
972             GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
973         } else if (i.hasInstOpt(InstOpt::CPS)) {
974             errorT("{CPS} requires {ExBSO}");
975         }
976     } else {
977         if (i.hasInstOpt(InstOpt::CPS)) {
978             warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
979             exDesc.imm |= 1 << 11;
980         }
981         GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
982         GED_ENCODE(ExMsgDesc, exDesc.imm);
983     }
984 
985     SendDesc desc = i.getMsgDescriptor();
986     if (desc.isReg()) {
987         GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
988         if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
989             errorT("send with reg desc must be a0.0");
990         }
991     } else {
992         GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
993         GED_ENCODE(MsgDesc, desc.imm);
994     }
995 }
996 
997 // Similar to XeHP, except
998 //    * ExDesc.IsImm implies use of Src1Length (Src.Length is in EU bits)
encodeSendDescsXeHPG(const Instruction & i)999 void Encoder::encodeSendDescsXeHPG(const Instruction& i)
1000 {
1001     SendDesc exDesc = i.getExtMsgDescriptor();
1002     if (exDesc.isReg()) {
1003         GED_ENCODE(ExDescRegFile, GED_REG_FILE_ARF);
1004         GED_ENCODE(ExDescAddrSubRegNum, 2 * exDesc.reg.subRegNum);
1005         GED_ENCODE(ExBSO, i.hasInstOpt(InstOpt::EXBSO) ? 1 : 0);
1006         if (i.hasInstOpt(InstOpt::EXBSO)) {
1007             GED_ENCODE(CPS, i.hasInstOpt(InstOpt::CPS) ? 1 : 0);
1008             GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
1009         } else if (i.hasInstOpt(InstOpt::CPS)) {
1010             errorT("{CPS} requires {ExBSO}");
1011         }
1012     } else {
1013         if (i.hasInstOpt(InstOpt::CPS)) {
1014             warningT("when ExDesc is immediate use ExDesc[11] rather than {CPS}");
1015             exDesc.imm |= 1 << 11;
1016         }
1017         GED_ENCODE(ExDescRegFile, GED_REG_FILE_IMM);
1018         GED_ENCODE(ExMsgDesc, exDesc.imm);
1019         GED_ENCODE(Src1Length, (uint32_t)i.getSrc1Length());
1020     }
1021 
1022     SendDesc desc = i.getMsgDescriptor();
1023     if (desc.isReg()) {
1024         GED_ENCODE(DescRegFile, GED_REG_FILE_ARF);
1025         if (desc.reg.subRegNum != 0) { // a0.0 is implied (there's no field)
1026             errorT("send with reg desc must be a0.0");
1027         }
1028     } else {
1029         GED_ENCODE(DescRegFile, GED_REG_FILE_IMM);
1030         GED_ENCODE(MsgDesc, desc.imm);
1031     }
1032 }
1033 
1034 
1035 
1036 
encodeSyncInstruction(const Instruction & inst)1037 void Encoder::encodeSyncInstruction(const Instruction& inst)
1038 {
1039     // Set the Dst.HorStride to 1 so that "sync.bar null" can be compacted
1040     GED_ENCODE(DstHorzStride, 1);
1041 
1042     const Operand &src = inst.getSource(0);
1043     if (src.getKind() == Operand::Kind::IMMEDIATE) {
1044         encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_IMM);
1045         encodeSrcType<SourceIndex::SRC0>(src.getType());
1046         encodeImmVal(src.getImmediateValue(), src.getType());
1047     } else {
1048         if (platform() <= Platform::XE_HPG) {
1049             encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
1050         } else {
1051             // XeHPC+ supports sync with reg32. For earlier platforms encode it to the null reg anyway.
1052             // If not doing so we'll encounter some weird behavior on validation. Suspect it's
1053             // becuase on some previous platforms' testcase there are reg32 those are not valid,
1054             // but IGA workaround (set it to NULL) them
1055             if (src.isNull()) {
1056                 encodeSrcRegFile<SourceIndex::SRC0>(GED_REG_FILE_ARF);
1057             } else {
1058                 // currently only flag register is supported in sync.bar
1059                 encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
1060                 encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(), src.getDirRegRef().regNum);
1061                 encodeSrcType<SourceIndex::SRC0>(src.getType());
1062                 // must be flag register (otherwise GED will return error), encode the subreg directly.
1063                 GED_ENCODE(Src0SubRegNum, SubRegToBinaryOffset(
1064                     src.getDirRegRef().subRegNum, src.getDirRegName(), src.getType(), m_model.platform));
1065             }
1066         }
1067     }
1068 }
1069 
encodeBranchDestination(const Operand & dst)1070 void Encoder::encodeBranchDestination(const Operand& dst) {
1071     GED_ENCODE(DstRegFile,
1072         lowerRegFile(dst.getDirRegName()));
1073     encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1074     GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1075         dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1076 }
1077 
encodeBasicDestination(const Instruction & inst,const Operand & dst,GED_ACCESS_MODE accessMode)1078 void Encoder::encodeBasicDestination(
1079     const Instruction& inst,
1080     const Operand& dst,
1081     GED_ACCESS_MODE accessMode)
1082 {
1083     IGA_ASSERT(accessMode != GED_ACCESS_MODE_Align16 ||
1084         m_model.supportsAlign16(),
1085         "Align16 not supported on this platform.");
1086 
1087     GED_ENCODE(DstRegFile,
1088         lowerRegFile(dst.getDirRegName()));
1089     switch (dst.getKind())
1090     {
1091     case Operand::Kind::DIRECT:
1092     case Operand::Kind::MACRO:
1093         GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1094         GED_ENCODE(DstDataType,
1095             lowerDataType(dst.getType()));
1096         if (inst.getOpSpec().supportsSaturation()) {
1097             GED_ENCODE(Saturate,
1098                 lowerSaturate(dst.getDstModifier()));
1099         }
1100         // VVVVV   fallthrough  VVVVV
1101     default: break;
1102     }
1103 
1104     switch (dst.getKind())
1105     {
1106     case Operand::Kind::DIRECT:
1107         if (accessMode == GED_ACCESS_MODE_Align16) {
1108             if (dst.getRegion() != Region::DST1) {
1109                 fatalT("dst has inconvertible region for Align16 encoding");
1110                 return;
1111             }
1112             if (isAlign16MathMacroRegisterCsrOperand(dst)) {
1113                 // acc2.XXXX on BDW .. SKL is context save and restore
1114                 // This is really mme0
1115                 encodeDstReg(RegName::ARF_MME, 0);
1116                 // on GEN8 and GEN9 all encode as acc2, but the mux varies
1117                 // to distinguish which acc it really is.
1118                 GED_DST_CHAN_EN chEn;
1119                 switch (dst.getDirRegRef().regNum) {
1120                 /// case 0: ... acc2 actually uses Align1!
1121                 // old-style for acc2 would be:
1122                 // mov(8) r113:ud acc2:ud  {NoMask} // acc2
1123                 //
1124                 // acc3-9 are Align16
1125                 case 1: chEn = GED_DST_CHAN_EN_x;    break; // mme0/acc3 -> acc2.x (0001b)
1126                 case 2: chEn = GED_DST_CHAN_EN_y;    break; // mme1/acc4 -> acc2.y (0010b)
1127                 case 3: chEn = GED_DST_CHAN_EN_xy;   break;
1128                 case 4: chEn = GED_DST_CHAN_EN_z;    break;
1129                 case 5: chEn = GED_DST_CHAN_EN_xz;   break;
1130                 case 6: chEn = GED_DST_CHAN_EN_yz;   break;
1131                 case 7: chEn = GED_DST_CHAN_EN_xyzw; break; // mme7/acc9 -> acc2.xyzw (0111b)
1132                 default: IGA_ASSERT_FALSE("unreachable"); chEn = GED_DST_CHAN_EN_x;
1133                 }
1134                 GED_ENCODE(DstChanEn, chEn);
1135             } else {
1136                 // normal align16 destination (this still might be a
1137                 // CSR work around op if the src is "acc2")
1138                 encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1139                 GED_ENCODE(DstChanEn, GED_DST_CHAN_EN_xyzw);
1140             }
1141             GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1142                 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1143         } else { // Align1
1144             encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1145             GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1146                 dst.getDirRegRef().subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1147         }
1148         break;
1149     case Operand::Kind::MACRO:
1150         encodeDstReg(dst.getDirRegName(), dst.getDirRegRef().regNum);
1151         GED_ENCODE(DstMathMacroExt,
1152             lowerSpecialAcc(dst.getMathMacroExt()));
1153         if (accessMode == GED_ACCESS_MODE_Align1 &&
1154             m_model.supportsAlign16ImplicitAcc())
1155         {
1156             fatalT("Align1 dst math macro unsupported on this platform.");
1157             return;
1158         }
1159         break;
1160     case Operand::Kind::INDIRECT:
1161         GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
1162         GED_ENCODE(DstDataType,
1163             lowerDataType(dst.getType()));
1164         if (inst.getOpSpec().supportsSaturation()) {
1165             GED_ENCODE(Saturate,
1166                 lowerSaturate(dst.getDstModifier()));
1167         }
1168 
1169         GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
1170         GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
1171         break;
1172     default:
1173         IGA_ASSERT_FALSE("unsupported operand kind");
1174         break;
1175     }
1176 
1177     if (accessMode == GED_ACCESS_MODE_Align1) {
1178         auto dstRgn = dst.getRegion();
1179         if (inst.getOpSpec().hasImplicitDstRegion(inst.isMacro())) {
1180             auto dstRgnImpl = inst.getOpSpec().implicitDstRegion(inst.isMacro());
1181             if (dstRgn != dstRgnImpl) {
1182                 warningT("dst region should be ", ToSyntax(dstRgnImpl));
1183             }
1184         }
1185         GED_ENCODE(DstHorzStride, lowerRegionHorz(dstRgn.getHz()));
1186     }
1187 }
1188 
createChSelForCtxSavRst(GED_SWIZZLE * chSel,GED_SWIZZLE x,GED_SWIZZLE y)1189 static void createChSelForCtxSavRst(
1190     GED_SWIZZLE *chSel,
1191     GED_SWIZZLE x,
1192     GED_SWIZZLE y)
1193 {
1194     // following IsaAsm rules here
1195     // reg.ab expands to reg.abbb
1196     chSel[0] = x;
1197     chSel[1] = chSel[2] = chSel[3] = y;
1198 }
1199 
1200 
encodeBranchSource(const Operand & src)1201 void Encoder::encodeBranchSource(const Operand& src)
1202 {
1203     encodeSrcRegFile<SourceIndex::SRC0>(lowerRegFile(src.getDirRegName()));
1204     encodeSrcReg<SourceIndex::SRC0>(src.getDirRegName(),src.getDirRegRef().regNum);
1205     auto subReg = SubRegToBinaryOffset(
1206         src.getDirRegRef().subRegNum, src.getDirRegName(), Type::D, m_model.platform);
1207     encodeSrcSubRegNum<SourceIndex::SRC0>(subReg);
1208 }
1209 
1210 template <SourceIndex S>
encodeBasicSource(const Instruction & inst,const Operand & src,GED_ACCESS_MODE accessMode)1211 void Encoder::encodeBasicSource(
1212     const Instruction& inst,
1213     const Operand& src,
1214     GED_ACCESS_MODE accessMode)
1215 {
1216     // setting the reg file must precede  must precede setting the type in GED
1217     switch (src.getKind()) {
1218     case Operand::Kind::DIRECT:
1219     case Operand::Kind::MACRO:
1220     case Operand::Kind::INDIRECT:
1221         encodeSrcRegFile<S>(
1222             lowerRegFile(src.getDirRegName()));
1223         if (inst.getOpSpec().supportsSourceModifiers()) {
1224             encodeSrcModifier<S>(src.getSrcModifier());
1225         } else if (src.getSrcModifier() != SrcModifier::NONE) {
1226             // better be invalid in the IR if unsupported
1227             errorT("src", (int)S, " source modifier not supported (invalid IR)");
1228         }
1229         break;
1230     case Operand::Kind::IMMEDIATE:
1231         encodeSrcRegFile<S>(GED_REG_FILE_IMM);
1232         break;
1233     default:
1234         break;
1235     }
1236 
1237     encodeSrcType<S>(src.getType());
1238 
1239     switch (src.getKind()) {
1240     case Operand::Kind::DIRECT:
1241     case Operand::Kind::MACRO: {
1242         encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
1243         if (src.getKind() == Operand::Kind::DIRECT) {
1244             if (isAlign16MathMacroRegisterCsrOperand(src)) {
1245                 // BDW..SKL context save and restore of acc3...acc9
1246                 // encode as acc2.####, ChSel will be changed in regioning code
1247                 // recall acc2 is remapped to mme0
1248                 encodeSrcReg<S>(RegName::ARF_MME, 0);
1249             } else {
1250                 encodeSrcReg<S>(src.getDirRegName(), src.getDirRegRef().regNum);
1251                 auto subReg = SubRegToBinaryOffset(
1252                     src.getDirRegRef().subRegNum,
1253                     src.getDirRegName(),
1254                     src.getType(),
1255                     m_model.platform);
1256                 encodeSrcSubRegNum<S>(subReg);
1257             }
1258         } else { // (src.getKind() == Operand::Kind::MACRO)
1259             encodeSrcReg<S>(RegName::GRF_R,src.getDirRegRef().regNum);
1260             encodeSrcMathMacroReg<S>(src.getMathMacroExt());
1261             if (accessMode == GED_ACCESS_MODE_Align16) {
1262                 // vertical stride has to be halved for 8B types
1263                 if (src.getType() == Type::DF) {
1264                     encodeSrcRegionVert<S>(Region::Vert::VT_2);
1265                 } else {
1266                     encodeSrcRegionVert<S>(Region::Vert::VT_4);
1267                 }
1268             }
1269         }
1270         break;
1271     }
1272     case Operand::Kind::INDIRECT:
1273         encodeSrcAddrMode<S>(GED_ADDR_MODE_Indirect);
1274         encodeSrcAddrImm<S>(src.getIndImmAddr());
1275         encodeSrcAddrSubRegNum<S>(src.getIndAddrReg().subRegNum);
1276         break;
1277     case Operand::Kind::IMMEDIATE:
1278         encodeImmVal(src.getImmediateValue(), src.getType());
1279         break;
1280     default:
1281         // support mov label
1282         if (static_cast<int>(S) == 0 && inst.isMovWithLabel()) {
1283             GED_ENCODE(Src0RegFile, GED_REG_FILE_IMM);
1284         } else {
1285             fatalT("src", (int)S, ": unsupported source operand kind "
1286                 "(malformed IR)");
1287             return;
1288         }
1289         break;
1290     }
1291 
1292     // sets stuff found in all register accesses (not macros)
1293     //   - region
1294     switch (src.getKind()) {
1295     case Operand::Kind::DIRECT:
1296     case Operand::Kind::INDIRECT:
1297         if (accessMode == GED_ACCESS_MODE_Align16) {
1298             // r13.0<4>.xyzw is the only supported ChEn
1299             //      ^^^
1300             encodeSrcRegionVert<S>(Region::Vert::VT_4);
1301             GED_SWIZZLE chSel[4] =
1302                 {GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w};
1303             if (isAlign16MathMacroRegisterCsrOperand(src)) {
1304                 // context save and restore workaround on GEN8 and GEN9
1305                 switch (src.getDirRegRef().regNum) {
1306                 case 1: // acc2.yx = mme1 (acc3)
1307                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_x);
1308                     break;
1309                 case 2: // acc2.zx = mme2 (acc4)
1310                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_x);
1311                     break;
1312                 case 3: // acc2.wx = mme3 (acc5)
1313                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_x);
1314                     break;
1315                 case 4: // acc2.xy = mme4 (acc6)
1316                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_x, GED_SWIZZLE_y);
1317                     break;
1318                 case 5: // acc2.yy = mme5 (acc7)
1319                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_y, GED_SWIZZLE_y);
1320                     break;
1321                 case 6: // acc2.zy = mme6 (acc8)
1322                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_z, GED_SWIZZLE_y);
1323                     break;
1324                 case 7: // acc2.wy = mme7 (acc9)
1325                     createChSelForCtxSavRst(chSel, GED_SWIZZLE_w, GED_SWIZZLE_y);
1326                     break;
1327                 }
1328             } else {
1329                 // normal Align16 that we are converting to Align1
1330                 if (src.getRegion() != Region::SRC110 &&
1331                     // supports legacy bits that may use <K;K,1> for "block"
1332                     // access; this allows us to assemble/reassemble similar bits
1333                     src.getRegion() != Region::SRC221 &&
1334                     src.getRegion() != Region::SRC441 &&
1335                     src.getRegion() != Region::SRC881 &&
1336                     src.getRegion() != Region::SRCFF1)
1337                 {
1338                     fatalT("src", (int)S, ": unsupported region for "
1339                         "translation to align16 encoding");
1340                     return;
1341                 }
1342                 // TODO: we could permit SIMD4 with .x to mean broadcast read
1343                 // of subreg 0, but I don't think any System Routine code uses
1344                 // this.
1345                 //
1346                 // NOTE: technically we could convert
1347                 //   r13.0<0>.xxxx to r13.0<0;1,0>
1348                 //   r13.0<0>.yyyy to r13.1<0;1,0>
1349                 //   r13.0<0>.zzzz to r13.2<0;1,0>
1350                 //   r13.0<0>.wwww to r13.3<0;1,0>
1351                 // Also be sure to handle stuff like:
1352                 //   r13.4<0>.zzzz (would be r13.7<0;1,0>)
1353                 //
1354                 // Let's wait until we need this though.
1355             }
1356             encodeSrcChanSel<S>(chSel[0], chSel[1], chSel[2], chSel[3]);
1357         } else { // Align1
1358             bool hasRgnWi = true;
1359             encodeSrcRegion<S>(src.getRegion(), hasRgnWi);
1360         }
1361         break;
1362     case Operand::Kind::MACRO:
1363         if (accessMode == GED_ACCESS_MODE_Align1) {
1364             encodeSrcRegion<S>(src.getRegion());
1365         } // else {align16 macros use the regioning bits, don't clobber them}
1366         break;
1367     default:
1368         break;
1369     }
1370 }
1371 
encodeSendDirectDestination(const Operand & dst)1372 void Encoder::encodeSendDirectDestination(const Operand& dst)
1373 {
1374     if (platform() >= Platform::XE) {
1375         //auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1376         //GED_ENCODE(DstDataType, lowerDataType(t));
1377         GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1378     } else {
1379         auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1380         GED_ENCODE(DstDataType, lowerDataType(t));
1381 
1382         //GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
1383         if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
1384             GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
1385         }
1386 
1387         GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1388         // GED_ENCODE(DstSubRegNum,
1389         //    SubRegToBinaryOffset(dst.getDirRegRef().subRegNum, RegName::GRF_R, dst.getType(), m_model.platform));
1390     }
1391 }
1392 
encodeSendDestinationDataType(const Operand & dst)1393 void Encoder::encodeSendDestinationDataType(const Operand& dst)
1394 {
1395     if (platform() >= Platform::XE)
1396         return;
1397 
1398     auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1399     GED_ENCODE(DstDataType, lowerDataType(t));
1400 }
1401 
encodeSendDestination(const Operand & dst)1402 void Encoder::encodeSendDestination(const Operand& dst)
1403 {
1404     if (m_model.supportsUnarySend()) {
1405         switch (dst.getKind())
1406         {
1407         case Operand::Kind::DIRECT:
1408             GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1409             break;
1410         case Operand::Kind::INDIRECT:
1411             GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Indirect);
1412             break;
1413         default:
1414             fatalT("dst: unsupported destination operand kind/addrMode "
1415                 "(malformed IR)");
1416             return;
1417         }
1418     }
1419 
1420     GED_ENCODE(DstRegFile,
1421         lowerRegFile(dst.getDirRegName()));
1422 
1423     if (dst.getKind() ==  Operand::Kind::DIRECT) {
1424         encodeSendDirectDestination(dst);
1425     } else if (dst.getKind() ==  Operand::Kind::INDIRECT) {
1426         encodeSendDestinationDataType(dst);
1427         if (m_opcode != Op::SENDS && m_opcode != Op::SENDSC) {
1428             GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst.getRegion().getHz())); // not used for sends
1429         }
1430         GED_ENCODE(DstAddrImm, dst.getIndImmAddr());
1431         GED_ENCODE(DstAddrSubRegNum, dst.getIndAddrReg().subRegNum);
1432     }
1433 }
1434 
encodeSendSource0(const Operand & src)1435 void Encoder::encodeSendSource0(const Operand& src)
1436 {
1437     if (m_model.supportsUnarySend()) {
1438         switch(src.getKind())
1439         {
1440         case Operand::Kind::DIRECT:
1441             GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
1442             break;
1443         case Operand::Kind::INDIRECT:
1444             GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
1445             break;
1446         default:
1447             fatalT("src0: unsupported source operand kind/addrMode "
1448                 "(malformed IR)");
1449             return;
1450             break;
1451         }
1452     }
1453 
1454     GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
1455     GED_ENCODE(Src0RegFile, gedRegFile);
1456 
1457     auto t = src.getType() == Type::INVALID ? Type::UD : src.getType();
1458 
1459     if (src.getKind() ==  Operand::Kind::DIRECT)
1460     {
1461         if (m_model.supportsUnifiedSend()){
1462             GED_ENCODE(Src0RegNum, src.getDirRegRef().regNum);
1463         } else {
1464             GED_ENCODE(Src0DataType, lowerDataType(t));
1465             GED_ENCODE(Src0RegNum,    src.getDirRegRef().regNum);
1466             GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
1467         }
1468     }
1469     else if (src.getKind() ==  Operand::Kind::INDIRECT)
1470     {
1471         {
1472             GED_ENCODE(Src0DataType, lowerDataType(t));
1473             GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
1474             // For platform >= XeHPC, the ImmAddr is represented in Word Offset in bianry,
1475             //     platform <  XeHPC, the ImmAddr is represented in Byte Offset in bianry
1476             // And for all platforms, the ImmAddr is represented in Byet Offset in assembly
1477             if (platform() >= Platform::XE_HPC) {
1478                 GED_ENCODE(Src0AddrImm, src.getIndImmAddr() / 2);
1479             } else {
1480                 GED_ENCODE(Src0AddrImm, src.getIndImmAddr());
1481             }
1482         }
1483     }
1484 }
1485 
1486 // The sends opCode exists on gen9+.  There is no sends opcode on pre-gen9.
1487 // Starting from XE, send opcode can have two sources, so the sends opcode
1488 // is not needed.
1489 
encodeSendsSource0(const Operand & src)1490 void Encoder::encodeSendsSource0(const Operand& src)
1491 {
1492     // "...for sends/sendsc instructions Src0.SrcMod, ... and Src0.SrcType are not used."
1493     // "Src0.RegFile[1], Src1.RegFile[1] are implicitly set to 0,
1494     //  and Src0.RegFile[0] is implicitly set as 1 for sends/sendsc instructions."
1495     switch (src.getKind())
1496     {
1497     case Operand::Kind::DIRECT:
1498         GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Direct);
1499         break;
1500     case Operand::Kind::INDIRECT:
1501         GED_ENCODE(Src0AddrMode, GED_ADDR_MODE_Indirect);
1502         break;
1503     default:
1504         fatalT("src0: unsupported source operand kind/addrMode (malformed IR)");
1505         return;
1506         break;
1507     }
1508 
1509     if (src.getKind() ==  Operand::Kind::DIRECT)
1510     {
1511         GED_ENCODE(Src0RegNum,    src.getDirRegRef().regNum);
1512         GED_ENCODE(Src0SubRegNum, src.getDirRegRef().subRegNum);
1513     }
1514     else if (src.getKind() ==  Operand::Kind::INDIRECT)
1515     {
1516         auto immAddr = src.getIndImmAddr();
1517         // For platforms >= XeHPC, ImmAddr is encoded as words,
1518         //     platforms <  XeHPC, ImmAddr is encoded as bytes
1519         // For all platforms, ImmAddr is represented in Byte Offset in syntax
1520         if (platform() >= Platform::XE_HPC) {
1521             immAddr /= 2;
1522         }
1523         GED_ENCODE(Src0AddrImm, immAddr);
1524         GED_ENCODE(Src0AddrSubRegNum, src.getIndAddrReg().subRegNum);
1525     }
1526 }
1527 
1528 
encodeSendsSource1(const Operand & src)1529 void Encoder::encodeSendsSource1(const Operand& src)
1530 {
1531     //GED_ENCODE(Src1AddrMode, GED_ADDR_MODE_Direct);
1532     GED_REG_FILE gedRegFile = lowerRegFile(src.getDirRegName());
1533     GED_ENCODE(Src1RegFile, gedRegFile);
1534     GED_ENCODE(Src1RegNum, src.getDirRegRef().regNum);
1535 }
1536 
encodeSendsDestination(const Operand & dst)1537 void Encoder::encodeSendsDestination(const Operand& dst)
1538 {
1539     GED_ENCODE(DstAddrMode, GED_ADDR_MODE_Direct);
1540     GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
1541     // send types use :ud where possible
1542     auto t = dst.getType() == Type::INVALID ? Type::UD : dst.getType();
1543     GED_ENCODE(DstDataType, lowerDataType(t));
1544 
1545     //GED_ENCODE(Saturate, lowerSaturate(dst->getDstModifier()));
1546     //GED_ENCODE(DstHorzStride, static_cast<uint32_t>(dst->getHz()));
1547 
1548     GED_ENCODE(DstRegNum, dst.getDirRegRef().regNum);
1549     // TODO: set correct regType
1550     GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1551         dst.getDirRegRef().subRegNum, RegName::GRF_R, dst.getType(), m_model.platform));
1552 }
1553 
1554 template <SourceIndex S>
encodeTernarySourceAlign16(const Instruction & inst)1555 void Encoder::encodeTernarySourceAlign16(const Instruction& inst)
1556 {
1557     // PreCNL Align16
1558     // GRF-only
1559     encodeSrcAddrMode<S>(GED_ADDR_MODE_Direct);
1560 
1561     const Operand& src = inst.getSource(S);
1562 
1563     if (inst.getOpSpec().supportsSourceModifiers()) {
1564         encodeSrcModifier<S>(src.getSrcModifier());
1565     }
1566 
1567     // set the data type
1568     GED_DATA_TYPE gedType = lowerDataType(src.getType());
1569     if (S == SourceIndex::SRC0) {
1570         GED_ENCODE(SrcDataType, gedType);
1571     } else {
1572         const Operand &src0 = inst.getSource(SourceIndex::SRC0);
1573         bool src0IsFloating = src0.getType() == Type::F || src0.getType() == Type::HF;
1574         if (platform() >= Platform::GEN8LP && src0IsFloating) {
1575             bool srcNIsFloating = src.getType() == Type::F || src.getType() == Type::HF;
1576             if (src0IsFloating && srcNIsFloating) {
1577                 encodeSrcType<S>(src.getType());
1578             } else {
1579                 fatalT("src", (int)S, ": mixed types require :f and :hf "
1580                     "(or vice versa)");
1581                 return;
1582             }
1583         }
1584     }
1585 
1586     if (!inst.isMacro()) {
1587         const Region& rgn = src.getRegion();
1588         const RegRef& reg = src.getDirRegRef();
1589         // Adjusting sub register when going from align1 to align16 representation.
1590         // in align 16 subregister is always 16 byte alligned, but we can play
1591         // with swizzle to access none aligned sub register
1592         uint16_t subRegNumber = reg.subRegNum;
1593         // mad (8) r46.0.xyzw:df r46.0.xyzw:df r50.0.xyzw:df r48.0.xyzw:df {Align16, Q1}
1594         // mad (2) r5.0.xy:df r5.0.xyxy:df r92.2.xyxy:df r93.0.xyxy:df {Align16, Q1, NoMask} // BDW,SKL
1595         if (S != SourceIndex::SRC2) {
1596             if (rgn == Region::SRC8X1 ||
1597                 rgn == Region::SRC4X1 ||
1598                 rgn == Region::SRC2X1) {
1599                 encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
1600                 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w);
1601             } else if (rgn == Region::SRC0X0) {
1602                 if (src.getType() == Type::DF) {
1603                     if (reg.subRegNum % 2 == 0) {
1604                         encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1605                     } else {
1606                         encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z, GED_SWIZZLE_w);
1607                         subRegNumber -= 1;
1608                     }
1609                 } else {
1610                     encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
1611                 }
1612             } else {
1613                 fatalT("src", (int)S, ": unsupported region for Align16 encoding");
1614                 return;
1615             }
1616         } else {
1617             if (rgn == Region::SRCXX1) {
1618                 encodeSrcRepCtrl<S>(GED_REP_CTRL_NoRep);
1619                 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_z, GED_SWIZZLE_w);
1620             } else if (rgn == Region::SRCXX0) {
1621                 if (src.getType() == Type::DF) {
1622                     if (src.getDirRegRef().subRegNum % 2 == 0) {
1623                         encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1624                     } else {
1625                         encodeSrcChanSel<S>(GED_SWIZZLE_z, GED_SWIZZLE_w, GED_SWIZZLE_z, GED_SWIZZLE_w);
1626                         subRegNumber -= 1;
1627                     }
1628                 } else {
1629                     encodeSrcRepCtrl<S>(GED_REP_CTRL_Rep);
1630                 }
1631             }
1632             else if (rgn == Region::SRC0X0 && src.getType() == Type::DF) {
1633                 encodeSrcChanSel<S>(GED_SWIZZLE_x, GED_SWIZZLE_y, GED_SWIZZLE_x, GED_SWIZZLE_y);
1634             }
1635             else {
1636                 fatalT("src", (int)S, ": unsupported region for Align16 encoding");
1637                 return;
1638             }
1639         }
1640         uint32_t regNum = reg.regNum;
1641         encodeSrcReg<S>(RegName::GRF_R, (uint16_t)regNum);
1642         auto subReg = SubRegToBinaryOffset(subRegNumber, src.getDirRegName(), src.getType(), m_model.platform);
1643         encodeSrcSubRegNum<S>(subReg);
1644     } else {
1645         // implicit operand accumulator
1646         // e.g. madm (4) ... -r14.acc3
1647         encodeSrcReg<S>(RegName::GRF_R,src.getDirRegRef().regNum);
1648         encodeSrcMathMacroReg<S>(src.getMathMacroExt());
1649     }
1650 }
1651 
encodeTernaryDestinationAlign16(const Instruction & inst)1652 void Encoder::encodeTernaryDestinationAlign16(const Instruction& inst)
1653 {
1654     const Operand& dst = inst.getDestination();
1655     if (inst.getOpSpec().supportsSaturation()) {
1656         GED_ENCODE(Saturate,
1657             lowerSaturate(dst.getDstModifier()));
1658     }
1659     GED_ENCODE(DstDataType, lowerDataType(dst.getType()));
1660     if (dst.getDirRegName() != RegName::GRF_R) {
1661         fatalT("align16 ternary dst must be to GRF");
1662         return;
1663     }
1664 
1665     // register / info (must be GRF)
1666     GED_ENCODE(DstRegFile, lowerRegFile(dst.getDirRegName()));
1667     uint32_t regNum = dst.getDirRegRef().regNum;
1668     GED_ENCODE(DstRegNum, regNum);
1669     if (inst.isMacro()) {
1670         // macro only
1671         GED_DST_CHAN_EN chanEn = mathMacroRegToChEn(dst.getMathMacroExt());
1672         GED_ENCODE(DstChanEn, chanEn);
1673     } else {
1674         // Align16 instruction (we must convert from Align1)
1675         //
1676         // As long as the Align1 sequences are packed (.xyzw), this is
1677         // straightforward.  However, "scalar" (braoadcast) sequences are
1678         // a bit harder as we must carefully choose the ChEn based on the
1679         // subregister that would be used in Align1
1680         // (See also Decoder::decodeDestinationTernaryAlign16)
1681         GED_DST_CHAN_EN chanEn = GED_DST_CHAN_EN_xyzw;
1682         auto reg = dst.getDirRegRef();
1683         if (inst.getExecSize() == ExecSize::SIMD1) {
1684             // SIMD1 MAD is not allowed, so MDF (and IGC) are generating use
1685             // SIMD4 and SIMD2 with specific channel masks to selectively
1686             // enable just the bottom channel.
1687             if (dst.getType() == Type::DF) {
1688                 // For 64-bit types we use a mad (2) ...
1689                 // Note, only :df is needed since :q and :uq are not supported
1690                 //
1691                 // e.g. mad (2)  r5.0.xy:df     ... {Align16, Q1, NoMask} //
1692                 if (dst.getDirRegRef().subRegNum % 2 == 0) {
1693                     chanEn = GED_DST_CHAN_EN_xy;
1694                 } else {
1695                     // e.g. mad (1) r5.1<1>:df
1696                     //  encodes as
1697                     //      mad (2) r5.0.zw:df
1698                     //           ^ SIMD2 and .zw (~= .1)
1699                     chanEn = GED_DST_CHAN_EN_zw;
1700                     reg.subRegNum -= 1;
1701                 }
1702             } else {
1703                 // 32-bit or 16-bit type (:hf).  We use a SIMD4
1704                 //
1705                 // one channel enabled. E.g. we'll parse
1706                 //   mad (1|M0)  r53.6<1>:f  ...
1707                 // and encode it as
1708                 //   mad (4)     r53.4.z:f
1709                 //        ^ SIMD4    ^^^ aligned subreg .4.z == subreg .6:f Align1
1710                 switch (reg.subRegNum % 4) {
1711                 case 0: chanEn = GED_DST_CHAN_EN_x; break;
1712                 case 1: chanEn = GED_DST_CHAN_EN_y; break;
1713                 case 2: chanEn = GED_DST_CHAN_EN_z; break;
1714                 case 3: chanEn = GED_DST_CHAN_EN_w; break;
1715                 }
1716                 // align the subregister
1717                 reg.subRegNum -= (reg.subRegNum % 4);
1718             }
1719         }
1720         GED_ENCODE(DstChanEn, chanEn);
1721         GED_ENCODE(DstSubRegNum, SubRegToBinaryOffset(
1722             reg.subRegNum, dst.getDirRegName(), dst.getType(), m_model.platform));
1723     }
1724 }
1725 
encodeDstReg(RegName regName,uint16_t regNum)1726 void Encoder::encodeDstReg(RegName regName, uint16_t regNum)
1727 {
1728     // encodes ARF or GRF
1729     uint32_t gedBits = translateRegNum(-1, regName, regNum);
1730     GED_ENCODE(DstRegNum, gedBits);
1731 }
1732 
1733 
encodeImmVal(const ImmVal & val,Type type)1734 void Encoder::encodeImmVal(const ImmVal &val, Type type) {
1735     GED_ENCODE(Imm, typeConvesionHelper(val, type));
1736 }
1737 
1738 template <SourceIndex S>
encodeSrcRepCtrl(GED_REP_CTRL rep)1739 void Encoder::encodeSrcRepCtrl(GED_REP_CTRL rep)
1740 {
1741     if (S == SourceIndex::SRC0) {
1742         GED_ENCODE(Src0RepCtrl, rep);
1743     } else if (S == SourceIndex::SRC1) {
1744         GED_ENCODE(Src1RepCtrl, rep);
1745     } else {
1746         GED_ENCODE(Src2RepCtrl, rep);
1747     }
1748 }
1749 
encodeSrcChanSel(GED_SWIZZLE chSelX,GED_SWIZZLE chSelY,GED_SWIZZLE chSelZ,GED_SWIZZLE chSelW)1750 template <SourceIndex S> void Encoder::encodeSrcChanSel(
1751     GED_SWIZZLE chSelX,
1752     GED_SWIZZLE chSelY,
1753     GED_SWIZZLE chSelZ,
1754     GED_SWIZZLE chSelW)
1755 {
1756     uint32_t chSelBits =
1757         createChanSel(chSelX, chSelY, chSelZ, chSelW);
1758     if (S == SourceIndex::SRC0) {
1759         GED_ENCODE(Src0ChanSel, chSelBits);
1760     } else if (S == SourceIndex::SRC1) {
1761         GED_ENCODE(Src1ChanSel, chSelBits);
1762     } else {
1763         GED_ENCODE(Src2ChanSel, chSelBits);
1764     }
1765 }
1766 
translateRegNum(int opIx,RegName regName,uint16_t regNum)1767 uint32_t Encoder::translateRegNum(
1768     int opIx, RegName regName, uint16_t regNum)
1769 {
1770     uint8_t regNumBits = 0;
1771 
1772     const char *whichOp =
1773         opIx == 0 ? "src0" :
1774             opIx == 1 ? "src1" :
1775             opIx == 2 ? "src2" :
1776             "dst";
1777 
1778     const RegInfo *ri = m_model.lookupRegInfoByRegName(regName);
1779     if (ri == nullptr) {
1780         errorT(whichOp, ": invalid register name for this platform");
1781     } else if (!ri->isRegNumberValid((int)regNum)) {
1782         errorT(whichOp, ": ", ri->syntax, regNum, " number out of range");
1783     } else {
1784         ri->encode((int)regNum, regNumBits);
1785     }
1786     return regNumBits; // widen for GED
1787 }
1788 
mathMacroRegToBits(int src,MathMacroExt implAcc)1789 uint32_t Encoder::mathMacroRegToBits(int src, MathMacroExt implAcc) {
1790     uint32_t bits = 8; // NOACC
1791     switch (implAcc) {
1792     /// or 00000000b (GEN11)
1793     case MathMacroExt::MME0:  bits = 0; break; // 0000b
1794     case MathMacroExt::MME1:  bits = 1; break;
1795     case MathMacroExt::MME2:  bits = 2; break;
1796     case MathMacroExt::MME3:  bits = 3; break;
1797     case MathMacroExt::MME4:  bits = 4; break;
1798     case MathMacroExt::MME5:  bits = 5; break;
1799     case MathMacroExt::MME6:  bits = 6; break;
1800     case MathMacroExt::MME7:  bits = 7; break;
1801     /// or 00008000b (GEN11)
1802     case MathMacroExt::NOMME: bits = 8; break; // 1000b
1803     default:
1804         if (src < 0) {
1805             fatalT("dst operand has invalid math macro register");
1806         } else {
1807             fatalT("src", src, " operand has invalid math macro register");
1808         }
1809         return bits;
1810     }
1811     return bits;
1812 }
mathMacroRegToChEn(MathMacroExt implAcc)1813 GED_DST_CHAN_EN Encoder::mathMacroRegToChEn(MathMacroExt implAcc) {
1814     GED_DST_CHAN_EN bits = GED_DST_CHAN_EN_w; // NOACC
1815     switch (implAcc) {
1816     case MathMacroExt::MME0:   bits = GED_DST_CHAN_EN_None; break; // 0000b
1817     case MathMacroExt::MME1:   bits = GED_DST_CHAN_EN_x;    break;
1818     case MathMacroExt::MME2:   bits = GED_DST_CHAN_EN_y;    break;
1819     case MathMacroExt::MME3:   bits = GED_DST_CHAN_EN_xy;   break;
1820     case MathMacroExt::MME4:   bits = GED_DST_CHAN_EN_z;    break; // 0100b
1821     case MathMacroExt::MME5:   bits = GED_DST_CHAN_EN_xz;   break;
1822     case MathMacroExt::MME6:   bits = GED_DST_CHAN_EN_yz;   break;
1823     case MathMacroExt::MME7:   bits = GED_DST_CHAN_EN_xyz;  break;
1824     case MathMacroExt::NOMME:  bits = GED_DST_CHAN_EN_w;    break; // 1000b
1825     default: fatalT("operand has invalid math macro register");
1826     }
1827     return bits;
1828 }
1829 
encodeOptionsThreadControl(const Instruction & inst)1830 void Encoder::encodeOptionsThreadControl(const Instruction& inst)
1831 {
1832     if (inst.hasInstOpt(InstOpt::NOPREEMPT)) {
1833         if (m_model.supportsNoPreempt()) {
1834             GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_NoPreempt);
1835         }
1836         else {
1837             warningT("NoPreempt not supported on this platform (dropping)");
1838         }
1839     }
1840 }
1841 
encodeOptions(const Instruction & inst)1842 void Encoder::encodeOptions(const Instruction& inst)
1843 {
1844     GED_ENCODE(DebugCtrl,
1845         inst.hasInstOpt(InstOpt::BREAKPOINT) ?
1846             GED_DEBUG_CTRL_Breakpoint : GED_DEBUG_CTRL_Normal);
1847 
1848     auto &os = inst.getOpSpec();
1849     if (os.supportsDepCtrl()) {
1850         if (inst.hasInstOpt(InstOpt::NODDCHK) &&
1851             !inst.hasInstOpt(InstOpt::NODDCLR))
1852         {
1853             GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDChk);
1854         }
1855         else if (!inst.hasInstOpt(InstOpt::NODDCHK) &&
1856                   inst.hasInstOpt(InstOpt::NODDCLR))
1857         {
1858             GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr);
1859         }
1860         else if (inst.hasInstOpt(InstOpt::NODDCHK) &&
1861                  inst.hasInstOpt(InstOpt::NODDCLR))
1862         {
1863             GED_ENCODE(DepCtrl, GED_DEP_CTRL_NoDDClr_NoDDChk);
1864         }
1865         else if (!inst.getOpSpec().isSendOrSendsFamily() && inst.getOp() != Op::NOP)
1866         {
1867             GED_ENCODE(DepCtrl, GED_DEP_CTRL_Normal);
1868         }
1869     }
1870 
1871     if (inst.hasInstOpt(InstOpt::ATOMIC))
1872     {
1873         GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Atomic);
1874     }
1875 
1876     if (inst.hasInstOpt(InstOpt::SWITCH) && m_model.supportsHwDeps())
1877     {
1878         if (inst.getOp() == Op::NOP) {
1879             warningT("nop doesn't support Switch option (dropping)");
1880         } else {
1881             GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Switch);
1882         }
1883     }
1884     encodeOptionsThreadControl(inst);
1885 
1886     if (!inst.hasInstOpt(InstOpt::ATOMIC) &&
1887         !inst.hasInstOpt(InstOpt::SWITCH) &&
1888         !inst.hasInstOpt(InstOpt::NOPREEMPT) &&
1889         !inst.getOpSpec().isSendOrSendsFamily() &&
1890         inst.getOp() != Op::NOP)
1891     {
1892         GED_ENCODE(ThreadCtrl, GED_THREAD_CTRL_Normal);
1893     }
1894 
1895     if (inst.hasInstOpt(InstOpt::NOSRCDEPSET))
1896     {
1897         GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_NoSrcDepSet);
1898     }
1899     else if (inst.getOpSpec().isSendOrSendsFamily() &&
1900              m_model.supportNoSrcDepSet())
1901     {
1902         GED_ENCODE(NoSrcDepSet, GED_NO_SRC_DEP_SET_Normal);
1903     }
1904 
1905     if (platform() >= Platform::XE && m_opcode != Op::ILLEGAL) {
1906         SWSB::InstType inst_type = inst.getSWSBInstType(m_opts.swsbEncodeMode);
1907         uint32_t swsbBinary = inst.getSWSB().encode(m_opts.swsbEncodeMode, inst_type);
1908         IGA_ASSERT(inst.getSWSB().verify(m_opts.swsbEncodeMode, inst_type),
1909             "INTERNAL ERROR: invalid SWSB (parser/IR-creator should have prevented this)");
1910 
1911         GED_ENCODE(SWSB, swsbBinary);
1912     }
1913 }
1914 
1915 
patchJumpOffsets()1916 void Encoder::patchJumpOffsets()
1917 {
1918     for (JumpPatch &jp : m_needToPatch)
1919     {
1920         const Instruction *inst = jp.inst;
1921         IGA_ASSERT(
1922             inst->getOpSpec().isBranching() || inst->isMovWithLabel(),
1923             "patching non-control-flow/non-mov instruction");
1924 
1925         // on some platforms jmpi os post-increment
1926         uint32_t jmpiExtraOffset = 0;
1927         bool isPostIncrementJmpi =
1928             inst->getOp() == Op::JMPI && !m_model.supportsSimplifiedBranches();
1929         if (isPostIncrementJmpi) {
1930             // jmpi is relative to the incremented PC, hence we must add
1931             // the size of the instruction here.  jmpi probably will never
1932             // compact, but we'll be careful here
1933             jmpiExtraOffset = inst->hasInstOpt(InstOpt::COMPACTED) ? 8 : 16;
1934             IGA_ASSERT(inst->getSource(0).getKind() == Operand::Kind::LABEL,
1935                 "patching non label op");
1936             // skip registers
1937         }
1938 
1939         // calla and mov is an absolute offset
1940         uint32_t encodePC =
1941             (inst->getOpSpec().isJipAbsolute()) || (inst->getOp() == Op::MOV) ?
1942             0 : getEncodedPC(inst);
1943 
1944         uint32_t jumpPC = 0;
1945         const Block *jipBlk = inst->getJIP();
1946         if (jipBlk == nullptr) {
1947             // immediate offset: we have to treat this as a relative offset
1948             jumpPC = inst->getSource(0).getImmediateValue().s32 + encodePC;
1949         } else if (!getBlockOffset(jipBlk, jumpPC)) {
1950             // For call, its target symbol may not be resolvable until in the
1951             // link stage when other kernels are available.
1952             if (inst->getOp() != Op::CALL && inst->getOp() != Op::CALLA) {
1953                 fatalAtT(inst->getLoc(), "jip label invalid");
1954             }
1955         }
1956 
1957         int32_t jip = jumpPC - encodePC - jmpiExtraOffset;
1958         // JIP and UIP are in QWORDS for most ops on PreBDW
1959         int32_t pcUnscale = arePcsInQWords(inst->getOpSpec()) ? 8 : 1;
1960 
1961         if (inst->isMovWithLabel()) {
1962             // encode mov label
1963             GED_DATA_TYPE src0_ty = lowerDataType(inst->getSource(0).getType());
1964             GED_ENCODE_TO(Src0DataType, src0_ty, &jp.gedInst);
1965             GED_ENCODE_TO(Imm, jip, &jp.gedInst);
1966         } else {
1967             // encode other branch instructions
1968             GED_ENCODE_TO(JIP, jip / pcUnscale, &jp.gedInst);
1969         }
1970 
1971         if (inst->getSourceCount() == 2 &&
1972             (inst->getOp() != Op::BRC || inst->getSource(1).isImm()))
1973         {
1974             // No need to set src1 regFile and type,
1975             // it will be over written by UIP
1976             const Block *uipBlk = inst->getUIP();
1977             if (uipBlk == nullptr) {
1978                 jumpPC = inst->getSource(1).getImmediateValue().s32 + encodePC;
1979             } else if (!getBlockOffset(uipBlk, jumpPC)) {
1980                 fatalAtT(inst->getLoc(), "uip label invalid");
1981             }
1982             encodePC = getEncodedPC(inst);
1983             int32_t uip = jumpPC - encodePC;
1984             GED_ENCODE_TO(UIP, uip/pcUnscale, &jp.gedInst);
1985         }
1986 
1987         // re-encode branch
1988         START_GED_TIMER();
1989         GED_RETURN_VALUE status = GED_EncodeIns(&jp.gedInst,
1990             inst->hasInstOpt(InstOpt::COMPACTED) ?
1991                 GED_INS_TYPE_COMPACT : GED_INS_TYPE_NATIVE,
1992             jp.bits);
1993         STOP_GED_TIMER();
1994         if (status != GED_RETURN_VALUE_SUCCESS) {
1995             fatalAtT(inst->getLoc(),
1996                 "GED_EncodeIns failed: ", gedReturnValueToString(status));
1997         }
1998     }
1999 }
2000 
2001 
arePcsInQWords(const OpSpec & os) const2002 bool Encoder::arePcsInQWords(const OpSpec &os) const
2003 {
2004     // everything is in bytes except:
2005     // HSW calla, call, and jmpi
2006     return platform() < Platform::GEN8 &&
2007         os.op != Op::JMPI &&
2008         os.op != Op::CALL &&
2009         os.op != Op::CALLA;
2010 }
2011 
2012 
callNeedsSrc0Region221(const Instruction & inst) const2013 bool Encoder::callNeedsSrc0Region221(const Instruction &inst) const
2014 {
2015     // [call]: "Restriction: The src0 regioning control must be <2;2,1>"
2016     // [calla]: "Restriction: The src0 regioning control must be <2;2,1>"
2017     return (inst.getOp() == Op::CALL && platform() < Platform::GEN8) ||
2018         (inst.getOp() == Op::CALL && platform() == Platform::GEN9) ||
2019         (inst.getOp() == Op::CALLA && platform() <= Platform::GEN10);
2020 }
2021 
callNeedsSrc0Region241(const Instruction & inst) const2022 bool Encoder::callNeedsSrc0Region241(const Instruction &inst) const
2023 {
2024     return (inst.getOp() == Op::CALL && platform() == Platform::GEN11);
2025 }
2026 
encodeTernarySrcRegionVert(SourceIndex S,Region::Vert v)2027 void Encoder::encodeTernarySrcRegionVert(SourceIndex S, Region::Vert v) {
2028     if (S == SourceIndex::SRC0) {
2029         GED_ENCODE(Src0VertStride, lowerRegionVert(v));
2030     } else { // (S == SourceIndex::SRC1)
2031         GED_ENCODE(Src1VertStride, lowerRegionVert(v));
2032     } // S != SRC2 since ternary Align1 doesn't have bits for that
2033 }
2034 
2035 // fixes stuff where GED just ignores or where it refuses to allow us to
2036 // set bits.  This should be empty unless GED fixes are in flight.
applyGedWorkarounds(const Kernel &,size_t)2037 void Encoder::applyGedWorkarounds(
2038     const Kernel& /* k */, size_t /* bitsLen */)
2039 {
2040     // NOTE: there should be a GED raw bits setter (we can use this for
2041     // workarounds...)
2042 }
2043