1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #ifndef NGEN_ASM_HPP
18 #define NGEN_ASM_HPP
19 
20 #include "ngen_config.hpp"
21 
22 #include <array>
23 #include <cstdint>
24 #include <sstream>
25 #include <string>
26 
27 #define NGEN_ASM
28 #include "ngen.hpp"
29 
30 
31 namespace ngen {
32 
33 
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const34 inline void RegData::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
35 {
36 #ifdef NGEN_SAFE
37     if (isInvalid()) throw invalid_object_exception();
38 #endif
39     auto vs = getVS();
40     if (detail == PrintDetail::vs_hs)
41         if (vs > 8 && (getHS() != 0))
42             vs = 8;
43 
44     if (getNeg()) str << '-';
45     if (getAbs()) str << "(abs)";
46 
47     if (isARF()) {
48         str << getARFType();
49         switch (getARFType()) {
50             case ARFType::null:
51             case ARFType::sp:
52             case ARFType::ip:
53                 break;
54             default:
55                 str << getARFBase();
56         }
57     } else if (isIndirect()) {
58         str << "r[a" << getIndirectBase() << '.' << getIndirectOff();
59         if (getOffset())
60             str << ',' << getOffset();
61         str << ']';
62     } else
63         str << 'r' << base;
64 
65     if (detail <= PrintDetail::base) return;
66 
67     if (!isIndirect() && !isNull())
68         str << '.' << getOffset();
69 
70     if (detail <= PrintDetail::sub_no_type) return;
71 
72     if (detail >= PrintDetail::hs && !isNull()) {
73         str << '<';
74         if (detail >= PrintDetail::vs_hs && !isVxIndirect())
75             str << vs << ';';
76         if (detail == PrintDetail::full)
77             str << getWidth() << ',';
78         str << getHS();
79         str << '>';
80     }
81 
82     str << ':' << getType();
83 }
84 
operator <<(std::ostream & str,const RegData & r)85 static inline std::ostream& operator<<(std::ostream &str, const RegData &r)
86 {
87     LabelManager man;
88     r.outputText(str, PrintDetail::full, man);
89     return str;
90 }
91 
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const92 inline void Immediate::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
93 {
94     uint64_t nbytes = getBytes(getType());
95     uint64_t val;
96 
97     if (nbytes == 8)
98         val = payload;
99     else
100         val = payload & ((uint64_t(1) << (nbytes * 8)) - 1);
101 
102     str << "0x" << std::hex << val << std::dec;
103     if (!hiddenType && detail >= PrintDetail::sub)
104         str << ':' << type;
105 }
106 
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const107 inline void ExtendedReg::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
108 {
109 #ifdef NGEN_SAFE
110     if (isInvalid()) throw invalid_object_exception();
111 #endif
112 
113     if (base.getNeg()) str << '-';
114     if (base.getAbs()) str << "(abs)";
115 
116     str << 'r' << base.getBase() << '.';
117     if (mmeNum == 8)
118         str << "nomme";
119     else
120         str << "mme" << int(mmeNum);
121 
122     if (detail >= PrintDetail::sub)
123         str << ':' << base.getType();
124 }
125 
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const126 inline void Align16Operand::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
127 {
128 #ifdef NGEN_SAFE
129     if (isInvalid()) throw invalid_object_exception();
130     throw iga_align16_exception();
131 #else
132     str << "<unsupported Align16 operand>";
133 #endif
134 }
135 
outputText(std::ostream & str,PrintDetail detail,LabelManager & man)136 inline void Label::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) {
137     str << 'L' << getID(man);
138 }
139 
140 struct NoOperand {
141     static const bool emptyOp = true;
fixupngen::NoOperand142     void fixup(int esize, DataType defaultType, bool isDest, int arity) const {}
isScalarngen::NoOperand143     constexpr bool isScalar() const { return false; }
144 
outputTextngen::NoOperand145     void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const {}
146 };
147 
148 struct AsmOperand {
149     union {
150         RegData reg;
151         ExtendedReg ereg;
152         Immediate imm;
153         Label label;
154         GRFRange range;
155     };
156     enum class Type : uint8_t {
157         none = 0,
158         reg = 1,
159         ereg = 2,
160         imm = 3,
161         label = 4,
162         range = 5
163     } type;
164 
AsmOperandngen::AsmOperand165     AsmOperand()                  : type{Type::none} {}
AsmOperandngen::AsmOperand166     AsmOperand(NoOperand)         : AsmOperand() {}
AsmOperandngen::AsmOperand167     AsmOperand(RegData reg_)      : reg{reg_}, type{Type::reg} {}
AsmOperandngen::AsmOperand168     AsmOperand(ExtendedReg ereg_) : ereg{ereg_}, type{Type::ereg} {}
AsmOperandngen::AsmOperand169     AsmOperand(Immediate imm_)    : imm{imm_}, type{Type::imm} {}
AsmOperandngen::AsmOperand170     AsmOperand(Label label_)      : label{label_}, type{Type::label} {}
AsmOperandngen::AsmOperand171     AsmOperand(GRFRange range_)   : range{range_}, type{Type::range} {}
AsmOperandngen::AsmOperand172     AsmOperand(uint32_t imm_)     : imm{imm_}, type{Type::imm} {}
173 
outputTextngen::AsmOperand174     void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const {
175         switch (type) {
176             case Type::none:    break;
177             case Type::ereg:    ereg.outputText(str, detail, man); break;
178             case Type::reg:     reg.outputText(str, detail, man); break;
179             case Type::imm:     imm.outputText(str, detail, man); break;
180             case Type::label: {
181                 auto clone = label;
182                 clone.outputText(str, detail, man);
183                 break;
184             }
185             case Type::range:   break;  /* not used for output */
186         }
187     }
188 };
189 
190 struct AsmInstruction {
191     Opcode op;
192     uint16_t ext;
193     uint32_t inum;
194     InstructionModifier mod;
195     AsmOperand dst, src[4];
196     LabelManager *labelManager;
197     std::string comment;
198 
AsmInstructionngen::AsmInstruction199     AsmInstruction(Opcode op_, uint16_t ext_, uint32_t inum_, InstructionModifier mod_, AsmOperand dst_,
200         AsmOperand src0, AsmOperand src1, AsmOperand src2, AsmOperand src3, LabelManager *man)
201             : op(op_), ext(ext_), inum(inum_), mod(mod_), dst(dst_), src{src0, src1, src2, src3}, labelManager{man}, comment{} {}
AsmInstructionngen::AsmInstruction202     explicit AsmInstruction(uint32_t inum_, const std::string &comment_)
203             : op(Opcode::illegal), ext(0), inum(inum_), mod{}, dst{}, src{}, labelManager{nullptr}, comment{comment_} {}
204     inline AsmInstruction(const autoswsb::SyncInsertion &si);
205 
isLabelngen::AsmInstruction206     bool isLabel() const   { return (op == Opcode::illegal) && (dst.type == AsmOperand::Type::label); }
isCommentngen::AsmInstruction207     bool isComment() const { return (op == Opcode::illegal) && !comment.empty(); }
208 
209     // Auto-SWSB interface.
autoSWSBngen::AsmInstruction210     bool autoSWSB() const       { return mod.isAutoSWSB(); }
swsbngen::AsmInstruction211     SWSBInfo swsb() const       { return mod.getSWSB(); }
setSWSBngen::AsmInstruction212     void setSWSB(SWSBInfo swsb) { mod.setSWSB(swsb); }
clearAutoSWSBngen::AsmInstruction213     void clearAutoSWSB()        { mod.setAutoSWSB(false); }
opcodengen::AsmInstruction214     Opcode opcode() const       { return op; }
syncFCngen::AsmInstruction215     SyncFunction syncFC() const { return static_cast<SyncFunction>(ext & 0xF); }
sfidngen::AsmInstruction216     SharedFunction sfid() const { return static_cast<SharedFunction>(ext & 0xF); }
eotngen::AsmInstruction217     bool eot() const            { return mod.isEOT(); }
predicatedngen::AsmInstruction218     bool predicated() const     { return !mod.isWrEn() || (mod.getPredCtrl() != PredCtrl::None); }
atomicngen::AsmInstruction219     bool atomic() const         { return mod.isAtomic(); }
220 
dstTypecodengen::AsmInstruction221     inline unsigned dstTypecode()  const { return getTypecode(dst); }
src0Typecodengen::AsmInstruction222     inline unsigned src0Typecode() const { return getTypecode(src[0]); }
src1Typecodengen::AsmInstruction223     inline unsigned src1Typecode() const { return getTypecode(src[1]); }
224     inline autoswsb::DestinationMask destinations(int &jip, int &uip) const;
225     inline bool getOperandRegion(autoswsb::DependencyRegion &region, int opNum) const;
226 
shiftJIPngen::AsmInstruction227     void shiftJIP(int32_t shift) const {}
shiftUIPngen::AsmInstruction228     void shiftUIP(int32_t shift) const {}
229 
getImm32ngen::AsmInstruction230     bool getImm32(uint32_t &imm, int opNum = 0) const {
231         if (src[opNum].type == AsmOperand::Type::imm) {
232             imm = uint32_t(static_cast<uint64_t>(src[opNum].imm));
233             return true;
234         } else
235             return false;
236     }
getARFTypengen::AsmInstruction237     bool getARFType(ARFType &arfType, int opNum) const {
238         auto &opd = (opNum < 0) ? dst : src[opNum];
239         if (opd.type == AsmOperand::Type::reg && opd.reg.isARF()) {
240             arfType = opd.reg.getARFType();
241             return true;
242         } else
243             return false;
244     }
getSendDescngen::AsmInstruction245     bool getSendDesc(MessageDescriptor &desc) const { return getImm32(desc.all, 3); }
246 
247 protected:
248     static inline unsigned getTypecode(const AsmOperand &op);
249 };
250 
AsmInstruction(const autoswsb::SyncInsertion & si)251 AsmInstruction::AsmInstruction(const autoswsb::SyncInsertion &si)
252 {
253     op = Opcode::sync;
254     ext = static_cast<uint8_t>(si.fc);
255     mod = InstructionModifier::createMaskCtrl(true);
256     mod.setSWSB(si.swsb);
257     dst = NoOperand();
258     for (auto n = 0; n < 4; n++)
259         src[n] = NoOperand();
260     if (si.mask)
261         src[0] = Immediate::ud(si.mask);
262     else
263         src[0] = NullRegister();
264 }
265 
getTypecode(const AsmOperand & op)266 unsigned AsmInstruction::getTypecode(const AsmOperand &op)
267 {
268     DataType dt = DataType::invalid;
269 
270     switch (op.type) {
271         case AsmOperand::Type::reg:  dt = op.reg.getType(); break;
272         case AsmOperand::Type::ereg: dt = op.ereg.getType(); break;
273         default: break;
274     }
275 
276     return getTypecode12(dt);
277 }
278 
destinations(int & jip,int & uip) const279 autoswsb::DestinationMask AsmInstruction::destinations(int &jip, int &uip) const
280 {
281     using namespace autoswsb;
282 
283     if (!isBranch(op))
284         return eot() ? DestNone : DestNextIP;
285 
286     if (src[0].type == AsmOperand::Type::reg)
287         return DestUnknown;
288 
289     DestinationMask mask = DestNextIP;
290     if (src[0].type == AsmOperand::Type::label) {
291         auto label = src[0].label;
292         mask |= DestJIP;
293         jip = labelManager->getTarget(label.getID(*labelManager)) - inum;
294     }
295 
296     if (src[1].type == AsmOperand::Type::label) {
297         auto label = src[1].label;
298         mask |= DestUIP;
299         uip = labelManager->getTarget(label.getID(*labelManager)) - inum;
300     }
301 
302     if (op == Opcode::jmpi && mod.getPredCtrl() == PredCtrl::None)
303         mask &= ~DestNextIP;
304 
305     return mask;
306 }
307 
getOperandRegion(autoswsb::DependencyRegion & region,int opNum) const308 bool AsmInstruction::getOperandRegion(autoswsb::DependencyRegion &region, int opNum) const
309 {
310     using namespace autoswsb;
311     const AsmOperand &operand = (opNum < 0) ? dst : src[opNum];
312     RegData rd;
313     auto hw = region.hw;
314 
315     switch (operand.type) {
316         case AsmOperand::Type::reg:    rd = operand.reg; break;
317         case AsmOperand::Type::ereg:   rd = operand.ereg.getBase(); break;
318         case AsmOperand::Type::range:  region = DependencyRegion(hw, operand.range); return true;
319         default: return false;
320     }
321 
322     if (rd.isARF())
323         return false;
324 
325     if (rd.isIndirect())
326         region = DependencyRegion();
327     else if (op == Opcode::send || op == Opcode::sendc) {
328         int len = 0;
329         if (opNum <= 0) {
330             if (src[3].type == AsmOperand::Type::imm) {
331                 MessageDescriptor desc;
332                 desc.all = static_cast<uint64_t>(src[3].imm);
333                 len = (opNum < 0) ? desc.parts.responseLen : desc.parts.messageLen;
334                 if (len == 31) len++;       // 32 GRF responses are encoded as 31. Conservatively use the higher value.
335             } else
336                 len = -1;
337         } else if (opNum == 1) {
338             bool exdescImm = (src[2].type == AsmOperand::Type::imm);
339             if (exdescImm && (hw >= HW::XeHPG))
340                 len = ext >> 8;
341             else
342             if (exdescImm) {
343                 ExtendedMessageDescriptor exdesc;
344                 exdesc.all = static_cast<uint64_t>(src[2].imm);
345                 len = exdesc.parts.extMessageLen;
346             } else
347                 len = -1;
348         }
349         if (len == 0)
350             return false;
351         else if (len == -1)
352             region = DependencyRegion();
353         else
354             region = DependencyRegion(hw, GRFRange(rd.getBase(), len));
355     } else if (op == Opcode::dpas || op == Opcode::dpasw) {
356         unsigned sdepth = ext >> 8;
357         unsigned rcount = ext & 0xFF;
358         unsigned len;
359 
360         switch (opNum) {
361             case -1:
362             case 0: len = rcount; break;
363             case 1: len = sdepth; break;
364             case 2:
365                 if (op == Opcode::dpasw) rcount = (rcount + 1) >> 1;
366                 len = (operand.reg.getByteOffset() + sdepth * rcount * 4 + 31) >> 5;
367                 break;
368             default: return false;
369         }
370 
371         region = DependencyRegion(hw, GRFRange(operand.reg.getBase(), len));
372     } else
373         region = DependencyRegion(hw, mod.getExecSize(), rd);
374 
375     return true;
376 }
377 
378 #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
379 #include "ngen_registers.hpp"
380 #endif
381 
382 class AsmCodeGenerator {
383 private:
384 #include "ngen_compiler_fix.hpp"
385 public:
AsmCodeGenerator(HW hardware_)386     AsmCodeGenerator(HW hardware_) : hardware(hardware_), isGen12(hardware_ >= HW::Gen12LP),
387             defaultOutput{nullptr}, sync{this}, load{this}, store{this}, atomic{this} {
388         _workaround_();
389         streamStack.push_back(new InstructionStream());
390     }
AsmCodeGenerator(HW hardware_,std::ostream & defaultOutput_)391     AsmCodeGenerator(HW hardware_, std::ostream &defaultOutput_) : AsmCodeGenerator(hardware_) {
392         defaultOutput = &defaultOutput_;
393     }
~AsmCodeGenerator()394     ~AsmCodeGenerator() noexcept(false) {
395         if (defaultOutput != nullptr)
396             getCode(*defaultOutput);
397         for (auto &s : streamStack)
398             delete s;
399     }
400     inline void getCode(std::ostream &out);
enableLineNumbers(bool enable=true)401     void enableLineNumbers(bool enable = true) { lineNumbers = enable; }
402 
403 protected:
404     struct InstructionStream {
405         std::vector<AsmInstruction> buffer;
406         std::vector<uint32_t> labels;
407 
408         template <typename... Remaining>
appendngen::AsmCodeGenerator::InstructionStream409         AsmInstruction &append(Opcode op, uint16_t ext, Remaining&&... args) {
410             buffer.emplace_back(op, ext, 0, std::forward<Remaining>(args)...);
411             return buffer.back();
412         }
413 
appendCommentngen::AsmCodeGenerator::InstructionStream414         void appendComment(const std::string &str) { buffer.emplace_back(0, str); }
415 
markngen::AsmCodeGenerator::InstructionStream416         void mark(Label &label, LabelManager &man) {
417             uint32_t id = label.getID(man);
418 
419             man.setTarget(id, buffer.size());
420             labels.push_back(id);
421             buffer.emplace_back(Opcode::illegal, 0, 0, InstructionModifier(), label, NoOperand(), NoOperand(), NoOperand(), NoOperand(), &man);
422         }
423 
appendngen::AsmCodeGenerator::InstructionStream424         void append(InstructionStream &other, LabelManager &man) {
425             for (uint32_t id : other.labels)
426                 man.offsetTarget(id, buffer.size());
427 
428             buffer.insert(buffer.end(), other.buffer.begin(), other.buffer.end());
429             labels.insert(labels.end(), other.labels.begin(), other.labels.end());
430         }
431     };
432 
433     HW hardware;
434     bool isGen12;
435     std::ostream *defaultOutput;
436     bool lineNumbers = false;
437 
438     Label _labelLocalIDsLoaded;
439     Label _labelArgsLoaded;
440 
441 private:
442     InstructionModifier defaultModifier;
443     LabelManager labelManager;
444     std::vector<InstructionStream*> streamStack;
445 
446     inline void unsupported();
447 
448     // Output functions.
449     template <typename D, typename S0, typename S1, typename S2>
450     inline void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2, uint16_t ext);
451 
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)452     template <typename D, typename S0, typename S1, typename S2> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2) {
453         opX(op, defaultType, mod, dst, src0, src1, src2, 0);
454     }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)455     template <typename D, typename S0, typename S1> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1) {
456         opX(op, defaultType, mod, dst, src0, src1, NoOperand());
457     }
opX(Opcode op,const InstructionModifier & mod,D dst,S0 src0,S1 src1)458     template <typename D, typename S0, typename S1> void opX(Opcode op, const InstructionModifier &mod, D dst, S0 src0, S1 src1) {
459         opX(op, DataType::invalid, mod, dst, src0, src1);
460     }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)461     template <typename D, typename S0> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0) {
462         opX(op, defaultType, mod, dst, src0, NoOperand());
463     }
opX(Opcode op,const InstructionModifier & mod,D dst,S0 src0)464     template <typename D, typename S0> void opX(Opcode op, const InstructionModifier &mod, D dst, S0 src0) {
465         opX(op, DataType::invalid, mod, dst, src0);
466     }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst)467     template <typename D> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst) {
468         opX(op, defaultType, mod, dst, NoOperand());
469     }
opX(Opcode op,const InstructionModifier & mod,D dst)470     template <typename D> void opX(Opcode op, const InstructionModifier &mod, D dst) {
471         opX(op, DataType::invalid, mod, dst);
472     }
opX(Opcode op)473     void opX(Opcode op) {
474         opX(op, InstructionModifier(), NoOperand());
475     }
opX(Opcode op,const InstructionModifier & mod,Label & jip)476     void opX(Opcode op, const InstructionModifier &mod, Label &jip) {
477         (void) jip.getID(labelManager);
478         opX(op, DataType::invalid, mod, NoOperand(), jip);
479     }
opX(Opcode op,const InstructionModifier & mod,Label & jip,Label & uip)480     void opX(Opcode op, const InstructionModifier &mod, Label &jip, Label &uip) {
481         (void) jip.getID(labelManager);
482         (void) uip.getID(labelManager);
483         opX(op, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand());
484     }
485 
486     template <typename S1, typename ED, typename D>
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sf,RegData dst,RegData src0,S1 src1,ED exdesc,D desc)487     void opSend(Opcode op, const InstructionModifier &mod, SharedFunction sf, RegData dst, RegData src0, S1 src1, ED exdesc, D desc) {
488         auto &i = streamStack.back()->append(op, static_cast<uint8_t>(sf), mod | defaultModifier, dst, src0, src1, exdesc, desc, &labelManager);
489         if (i.src[2].type == AsmOperand::Type::imm) {
490             uint32_t exdesc = static_cast<uint64_t>(i.src[2].imm);
491             if (isGen12) {
492                 if (hardware >= HW::XeHPG) {
493                     i.ext |= 0x80 | (((exdesc >> 6) & 0x1F) << 8);
494                     i.src[2].imm = uint32_t(exdesc & ~0x7EF);
495                 } else
496                 i.src[2].imm = uint32_t(exdesc & ~0x2F);
497             } else
498                 i.src[2].imm = uint32_t(exdesc | static_cast<uint8_t>(sf));
499         }
500     }
opDpas(Opcode op,const InstructionModifier & mod,int sdepth,int rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)501     void opDpas(Opcode op, const InstructionModifier &mod, int sdepth, int rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
502         (void) streamStack.back()->append(op, (sdepth << 8) | rcount, mod | defaultModifier, dst, src0, src1, src2, NoOperand(), &labelManager);
503     }
opCall(Opcode op,const InstructionModifier & mod,D dst,S0 src0)504     template <typename D, typename S0> void opCall(Opcode op, const InstructionModifier &mod, D dst, S0 src0) {
505         (void) streamStack.back()->append(op, 0, mod | defaultModifier | NoMask, dst, src0, NoOperand(), NoOperand(), NoOperand(), &labelManager);
506     }
opJmpi(Opcode op,const InstructionModifier & mod,S1 src1)507     template <typename S1> void opJmpi(Opcode op, const InstructionModifier &mod, S1 src1) {
508         (void) streamStack.back()->append(op, 0, mod | defaultModifier | NoMask, NoOperand(), src1, NoOperand(), NoOperand(), NoOperand(), &labelManager);
509     }
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,S0 src0)510     template <typename S0> void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, S0 src0) {
511         (void) streamStack.back()->append(op, static_cast<uint8_t>(fc), mod | defaultModifier, NoOperand(), src0, NoOperand(), NoOperand(), NoOperand(), &labelManager);
512     }
513 
514     inline void finalize();
515 
516     enum class ModPlacementType {Pre, Mid, Post};
517     inline void outX(std::ostream &out, const AsmInstruction &i, int lineNo);
518     inline void outExt(std::ostream &out, const AsmInstruction &i);
519     inline void outMods(std::ostream &out, const InstructionModifier &mod, Opcode op, ModPlacementType location);
520     inline void outSync(std::ostream &out, const autoswsb::SyncInsertion &si);
521 
522 protected:
523     // Configuration.
setDefaultNoMask(bool def=true)524     void setDefaultNoMask(bool def = true)          { defaultModifier.setWrEn(def); }
setDefaultAutoSWSB(bool def=true)525     void setDefaultAutoSWSB(bool def = true)        { defaultModifier.setAutoSWSB(def); }
getDefaultNoMask() const526     bool getDefaultNoMask() const                   { return defaultModifier.isWrEn(); }
getDefaultAutoSWSB() const527     bool getDefaultAutoSWSB() const                 { return defaultModifier.isAutoSWSB(); }
528 
529     // Stream handling.
pushStream()530     void pushStream()                               { pushStream(new InstructionStream()); }
pushStream(InstructionStream & s)531     void pushStream(InstructionStream &s)           { pushStream(&s); }
pushStream(InstructionStream * s)532     void pushStream(InstructionStream *s)           { streamStack.push_back(s); }
533 
534     inline InstructionStream *popStream();
535 
appendStream(InstructionStream * s)536     void appendStream(InstructionStream *s)         { appendStream(*s); }
appendStream(InstructionStream & s)537     void appendStream(InstructionStream &s)         { streamStack.back()->append(s, labelManager); }
appendCurrentStream()538     void appendCurrentStream()                      { InstructionStream *s = popStream(); appendStream(s); delete s; }
539 
discardStream()540     void discardStream()                            { delete popStream(); }
541 
comment(const std::string & str)542     void comment(const std::string &str)            { streamStack.back()->appendComment(str); }
543 
544     // Instructions.
545     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)546     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
547         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
548     }
549     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)550     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
551         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
552     }
553     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)554     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
555         opX(Opcode::addc, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
556     }
557     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)558     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
559         opX(Opcode::addc, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
560     }
561     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)562     void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
563         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
564     }
565     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)566     void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
567         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
568     }
569     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)570     void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
571         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
572     }
573     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)574     void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
575         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
576     }
577     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)578     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
579         opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
580     }
581     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)582     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
583         opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
584     }
585 #ifndef NGEN_NO_OP_NAMES
586     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)587     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
588         and_<DT>(mod, dst, src0, src1);
589     }
590     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)591     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
592         and_<DT>(mod, dst, src0, src1);
593     }
594 #endif
595     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)596     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
597         opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
598     }
599     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)600     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
601         opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
602     }
603     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)604     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
605         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
606     }
607     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)608     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
609         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
610     }
611     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)612     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
613         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
614     }
615     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)616     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
617         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
618     }
619     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)620     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
621         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
622     }
623     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)624     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
625         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
626     }
627     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)628     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
629         opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
630     }
631     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)632     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
633         opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
634     }
635     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)636     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
637         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
638     }
639     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)640     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
641         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
642     }
643     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)644     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
645         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
646     }
647     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)648     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
649         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
650     }
651     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)652     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
653         opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
654     }
655     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)656     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
657         opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
658     }
659     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)660     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
661         opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
662     }
663     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)664     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
665         opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
666     }
667     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const RegData & src0)668     void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
669         opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
670     }
671     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)672     void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
673         opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
674     }
brc(const InstructionModifier & mod,Label & jip,Label & uip)675     void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
676         (void) jip.getID(labelManager);
677         (void) uip.getID(labelManager);
678         opX(Opcode::brc, mod, jip, uip);
679     }
brc(const InstructionModifier & mod,const RegData & src0)680     void brc(const InstructionModifier &mod, const RegData &src0) {
681         opCall(Opcode::brc, mod, NoOperand(), src0);
682     }
brd(const InstructionModifier & mod,Label & jip)683     void brd(const InstructionModifier &mod, Label &jip) {
684         (void) jip.getID(labelManager);
685         opX(Opcode::brd, mod, jip);
686     }
brd(const InstructionModifier & mod,const RegData & src0)687     void brd(const InstructionModifier &mod, const RegData &src0) {
688         opCall(Opcode::brd, mod, NoOperand(), src0);
689     }
break_(const InstructionModifier & mod,Label & jip,Label & uip)690     void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
691         (void) jip.getID(labelManager);
692         (void) uip.getID(labelManager);
693         opX(Opcode::break_, mod, jip, uip);
694     }
call(const InstructionModifier & mod,const RegData & dst,Label & jip)695     void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
696         (void) jip.getID(labelManager);
697         opCall(Opcode::call, mod, dst, jip);
698     }
call(const InstructionModifier & mod,const RegData & dst,const RegData & jip)699     void call(const InstructionModifier &mod, const RegData &dst, const RegData &jip) {
700         opCall(Opcode::call, mod, dst, jip);
701     }
calla(const InstructionModifier & mod,const RegData & dst,int32_t jip)702     void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
703         opCall(Opcode::calla, mod, dst, Immediate::ud(jip));
704     }
calla(const InstructionModifier & mod,const RegData & dst,const RegData & jip)705     void calla(const InstructionModifier &mod, const RegData &dst, const RegData &jip) {
706         opCall(Opcode::calla, mod, dst, jip);
707     }
708     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const RegData & src0)709     void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
710         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
711     }
712     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)713     void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
714         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
715     }
716     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)717     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
718         opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
719     }
720     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)721     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
722         opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
723     }
724     template <typename DT = void>
cmpn(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)725     void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
726         opX(isGen12 ? Opcode::cmpn_gen12 : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
727     }
728     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)729     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
730         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
731     }
732     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)733     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
734         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
735     }
736     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)737     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
738         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
739     }
740     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)741     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
742         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
743     }
cont(const InstructionModifier & mod,Label & jip,Label & uip)744     void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
745         (void) jip.getID(labelManager);
746         (void) uip.getID(labelManager);
747         opX(Opcode::cont, mod, jip, uip);
748     }
749     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)750     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
751         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
752     }
753     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)754     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
755         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
756     }
757     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)758     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
759         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
760     }
761     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)762     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
763         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
764     }
765     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)766     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
767         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
768     }
769     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)770     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
771         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
772     }
773     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)774     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
775         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
776     }
777     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)778     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
779         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
780     }
781     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)782     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
783         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
784     }
785     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)786     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
787         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
788     }
dpas(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)789     void dpas(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
790         opDpas(Opcode::dpas, mod, sdepth, rcount, dst, src0, src1, src2);
791     }
dpasw(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)792     void dpasw(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
793         opDpas(Opcode::dpasw, mod, sdepth, rcount, dst, src0, src1, src2);
794     }
795     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)796     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
797         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
798     }
799     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)800     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
801         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
802     }
else_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)803     void else_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
804         (void) jip.getID(labelManager);
805         (void) uip.getID(labelManager);
806         opX(Opcode::else_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
807     }
else_(InstructionModifier mod,Label & jip)808     void else_(InstructionModifier mod, Label &jip) {
809         else_(mod, jip, jip);
810     }
endif(const InstructionModifier & mod,Label & jip)811     void endif(const InstructionModifier &mod, Label &jip) {
812         (void) jip.getID(labelManager);
813         opX(Opcode::endif, mod, NoOperand(), jip);
814     }
endif(const InstructionModifier & mod)815     void endif(const InstructionModifier &mod) {
816         Label next;
817         endif(mod, next);
818         mark(next);
819     }
820     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const RegData & src0)821     void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
822         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
823     }
824     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)825     void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
826         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
827     }
828     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const RegData & src0)829     void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
830         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
831     }
832     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)833     void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
834         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
835     }
836     template <typename DT = void>
frc(const InstructionModifier & mod,const RegData & dst,const RegData & src0)837     void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
838         opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
839     }
goto_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)840     void goto_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
841         (void) jip.getID(labelManager);
842         (void) uip.getID(labelManager);
843         opX(Opcode::goto_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
844     }
goto_(const InstructionModifier & mod,Label & jip)845     void goto_(const InstructionModifier &mod, Label &jip) {
846         goto_(mod, jip, jip);
847     }
halt(const InstructionModifier & mod,Label & jip,Label & uip)848     void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
849         (void) jip.getID(labelManager);
850         (void) uip.getID(labelManager);
851         opX(Opcode::halt, mod, jip, uip);
852     }
halt(const InstructionModifier & mod,Label & jip)853     void halt(const InstructionModifier &mod, Label &jip) {
854         halt(mod, jip, jip);
855     }
if_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)856     void if_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
857         (void) jip.getID(labelManager);
858         (void) uip.getID(labelManager);
859         opX(Opcode::if_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
860     }
if_(const InstructionModifier & mod,Label & jip)861     void if_(const InstructionModifier &mod, Label &jip) {
862         if_(mod, jip, jip);
863     }
illegal()864     void illegal() {
865         opX(Opcode::illegal);
866     }
join(const InstructionModifier & mod,Label & jip)867     void join(const InstructionModifier &mod, Label &jip) {
868         opX(Opcode::join, mod, jip);
869     }
join(const InstructionModifier & mod)870     void join(const InstructionModifier &mod) {
871         Label next;
872         join(mod, next);
873         mark(next);
874     }
jmpi(const InstructionModifier & mod,Label & jip)875     void jmpi(const InstructionModifier &mod, Label &jip) {
876         (void) jip.getID(labelManager);
877         opJmpi(Opcode::jmpi, mod, jip);
878     }
jmpi(const InstructionModifier & mod,const RegData & jip)879     void jmpi(const InstructionModifier &mod, const RegData &jip) {
880         opJmpi(Opcode::jmpi, mod, jip);
881     }
882     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)883     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
884         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
885     }
886     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)887     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
888         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
889     }
890     template <typename DT = void>
lrp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)891     void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
892         opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
893     }
894     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)895     void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
896         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
897     }
898     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)899     void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
900         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
901     }
902     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)903     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
904         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
905     }
906     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)907     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
908         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
909     }
910     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)911     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
912         opX(Opcode::mach, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
913     }
914     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)915     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
916         opX(Opcode::mach, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
917     }
918     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)919     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
920 #ifdef NGEN_SAFE
921         if (hardware < HW::Gen10) unsupported();
922 #endif
923         opX((hardware >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
924     }
925     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)926     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
927 #ifdef NGEN_SAFE
928         if (hardware < HW::Gen10) unsupported();
929 #endif
930         opX((hardware >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
931     }
932     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)933     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
934         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
935     }
936     template <typename DT = void>
mad(const InstructionModifier & mod,const Align16Operand & dst,const Align16Operand & src0,const Align16Operand & src1,const Align16Operand & src2)937     void mad(const InstructionModifier &mod, const Align16Operand &dst, const Align16Operand &src0, const Align16Operand &src1, const Align16Operand &src2) {
938         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
939     }
940     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)941     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
942         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
943     }
944     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)945     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
946         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
947     }
948     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)949     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
950         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
951     }
952     template <typename DT = void>
madm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1,const ExtendedReg & src2)953     void madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
954         opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
955     }
956     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0)957     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
958 #ifdef NGEN_SAFE
959         if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
960 #endif
961         if (fc == MathFunction::rsqtm)
962             math<DT>(mod, fc, dst | nomme, src0 | nomme);
963         else
964             opX(Opcode::math, getDataType<DT>(), mod, dst, src0, NoOperand(), NoOperand(), static_cast<uint8_t>(fc));
965     }
966     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const RegData & src1)967     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
968 #ifdef NGEN_SAFE
969         if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
970 #endif
971         if (fc == MathFunction::invm)
972             math<DT>(mod, fc, dst | nomme, src0 | nomme, src1 | nomme);
973         else
974             opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1, NoOperand(), static_cast<uint8_t>(fc));
975     }
976     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const Immediate & src1)977     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
978 #ifdef NGEN_SAFE
979         if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
980 #endif
981         opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1.forceInt32(), NoOperand(), static_cast<uint8_t>(fc));
982     }
983     template <typename DT = void>
math(InstructionModifier mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0)984     void math(InstructionModifier mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
985 #ifdef NGEN_SAFE
986         if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
987 #endif
988         mod.setCMod(ConditionModifier::eo);
989         opX(Opcode::math, getDataType<DT>(), mod, dst, src0, NoOperand(), NoOperand(), static_cast<uint8_t>(fc));
990     }
991     template <typename DT = void>
math(InstructionModifier mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)992     void math(InstructionModifier mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
993 #ifdef NGEN_SAFE
994         if (fc != MathFunction::invm) throw invalid_operand_exception();
995 #endif
996         mod.setCMod(ConditionModifier::eo);
997         opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1, NoOperand(), static_cast<uint8_t>(fc));
998     }
999     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1000     void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1001         opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
1002     }
1003     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1004     void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1005         opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
1006     }
1007     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1008     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1009         if (hardware >= HW::Gen10)
1010             movi<DT>(mod, dst, src0, null);
1011         else
1012             opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0);
1013     }
1014     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1015     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1016 #ifdef NGEN_SAFE
1017         if (hardware < HW::Gen10) throw unsupported_instruction();
1018 #endif
1019         opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
1020     }
1021     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1022     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1023 #ifdef NGEN_SAFE
1024         if (hardware < HW::Gen10) throw unsupported_instruction();
1025 #endif
1026         opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
1027     }
1028     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1029     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1030         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
1031     }
1032     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,Immediate src1)1033     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
1034         if (dst.getBytes() == 8)
1035             src1 = src1.forceInt32();
1036         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
1037     }
nop()1038     void nop() {
1039         opX(isGen12 ? Opcode::nop_gen12 : Opcode::nop);
1040     }
1041     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1042     void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1043         opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
1044     }
1045     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1046     void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1047         opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
1048     }
1049 #ifndef NGEN_NO_OP_NAMES
1050     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1051     void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1052         not_<DT>(mod, dst, src0);
1053     }
1054     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1055     void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1056         not_<DT>(mod, dst, src0);
1057     }
1058 #endif
1059     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1060     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1061         opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
1062     }
1063     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1064     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1065         opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
1066     }
1067 #ifndef NGEN_NO_OP_NAMES
1068     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1069     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1070         or_<DT>(mod, dst, src0, src1);
1071     }
1072     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1073     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1074         or_<DT>(mod, dst, src0, src1);
1075     }
1076 #endif
1077     template <typename DT = void>
pln(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1078     void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1079         opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
1080     }
ret(const InstructionModifier & mod,const RegData & src0)1081     void ret(const InstructionModifier &mod, const RegData &src0) {
1082         opJmpi(Opcode::ret, mod, src0);
1083     }
1084     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1085     void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1086         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
1087     }
1088     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1089     void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1090         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
1091     }
1092     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1093     void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1094         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
1095     }
1096     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1097     void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1098         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
1099     }
1100     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1101     void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1102         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
1103     }
1104     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1105     void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1106         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
1107     }
1108     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1109     void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1110         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
1111     }
1112     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1113     void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1114         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
1115     }
1116     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1117     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1118         opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
1119     }
1120     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1121     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1122         opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
1123     }
1124     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1125     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1126         opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
1127     }
1128     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1129     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1130         opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
1131     }
1132     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1133     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1134         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
1135     }
1136     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1137     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1138         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
1139     }
1140     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1141     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1142         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
1143     }
1144     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1145     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1146         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
1147     }
1148     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1149     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1150         opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1151     }
1152     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1153     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1154         opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1155     }
1156 
1157     /* Gen12-style sends */
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1158     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1159         opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, Immediate::ud(exdesc), Immediate::ud(desc));
1160     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1161     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1162         opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, exdesc, Immediate::ud(desc));
1163     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1164     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1165         opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, Immediate::ud(exdesc), desc);
1166     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1167     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1168         opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, exdesc, desc);
1169     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1170     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1171         opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, Immediate::ud(exdesc), Immediate::ud(desc));
1172     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1173     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1174         opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, exdesc, Immediate::ud(desc));
1175     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1176     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1177         opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, Immediate::ud(exdesc), desc);
1178     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1179     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1180         opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, exdesc, desc);
1181     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,NoOperand src1,T1 exdesc,T2 desc)1182     template <typename T1, typename T2> void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, NoOperand src1, T1 exdesc, T2 desc) {
1183         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1184     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,NoOperand src1,T1 exdesc,T2 desc)1185     template <typename T1, typename T2> void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, NoOperand src1, T1 exdesc, T2 desc) {
1186         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1187     }
1188     /* Pre-Gen12 style sends */
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1189     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1190         if (isGen12)
1191             send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1192         else
1193             send(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), Immediate::ud(desc));
1194     }
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1195     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1196         if (isGen12)
1197             send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1198         else
1199             send(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), desc);
1200     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1201     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1202         if (isGen12)
1203             sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1204         else
1205             sendc(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), Immediate::ud(desc));
1206     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1207     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1208         if (isGen12)
1209             sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1210         else
1211             sendc(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), desc);
1212     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1213     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1214         send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1215     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1216     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1217         send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1218     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1219     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1220 #ifdef NGEN_SAFE
1221         if (isGen12) throw sfid_needed_exception();
1222 #endif
1223         send(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1224     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1225     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1226 #ifdef NGEN_SAFE
1227         if (isGen12) throw sfid_needed_exception();
1228 #endif
1229         send(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1230     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1231     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1232         sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1233     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1234     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1235         sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1236     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1237     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1238 #ifdef NGEN_SAFE
1239         if (isGen12) throw sfid_needed_exception();
1240 #endif
1241         sendc(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1242     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1243     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1244 #ifdef NGEN_SAFE
1245         if (isGen12) throw sfid_needed_exception();
1246 #endif
1247         sendc(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1248     }
1249 
1250     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1251     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1252         opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1253     }
1254     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1255     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1256         opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1257     }
1258     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1259     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1260         opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1261     }
1262     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1263     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1264         opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1265     }
1266     template <typename DT = void>
smov(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1267     void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1268         opX(isGen12 ? Opcode::smov_gen12 : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1269     }
1270     template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1271     void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1272         opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1273     }
1274     template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1275     void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1276         opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1277     }
1278     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1279     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1280         opX(Opcode::subb, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
1281     }
1282     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1283     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1284         opX(Opcode::subb, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
1285     }
wait(const InstructionModifier & mod,const RegData & nreg)1286     void wait(const InstructionModifier &mod, const RegData &nreg) {
1287         opX(Opcode::wait, mod, NoOperand(), nreg);
1288     }
while_(const InstructionModifier & mod,Label & jip)1289     void while_(const InstructionModifier &mod, Label &jip) {
1290         (void) jip.getID(labelManager);
1291         opX(Opcode::while_, mod, jip);
1292     }
1293     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1294     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1295         opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1296     }
1297     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1298     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1299         opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1300     }
1301 #ifndef NGEN_NO_OP_NAMES
1302     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1303     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1304         xor_<DT>(mod, dst, src0, src1);
1305     }
1306     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1307     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1308         xor_<DT>(mod, dst, src0, src1);
1309     }
1310 #endif
1311 
1312 private:
1313     struct Sync {
1314         AsmCodeGenerator &parent;
1315 
Syncngen::AsmCodeGenerator::Sync1316         Sync(AsmCodeGenerator *parent_) : parent(*parent_) {}
1317 
operator ()ngen::AsmCodeGenerator::Sync1318         void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1319             parent.opSync(Opcode::sync, fc, mod, null);
1320         }
operator ()ngen::AsmCodeGenerator::Sync1321         void operator()(SyncFunction fc, const RegData &src0) {
1322             this->operator()(fc, InstructionModifier(), src0);
1323         }
operator ()ngen::AsmCodeGenerator::Sync1324         void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1325             parent.opSync(Opcode::sync, fc, mod, src0);
1326         }
operator ()ngen::AsmCodeGenerator::Sync1327         void operator()(SyncFunction fc, int src0) {
1328             this->operator()(fc, InstructionModifier(), src0);
1329         }
operator ()ngen::AsmCodeGenerator::Sync1330         void operator()(SyncFunction fc, const InstructionModifier &mod, int src0) {
1331             parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1332         }
allrdngen::AsmCodeGenerator::Sync1333         void allrd() {
1334             allrd(null);
1335         }
allrdngen::AsmCodeGenerator::Sync1336         void allrd(const InstructionModifier &mod) {
1337             allrd(mod, null);
1338         }
allrdngen::AsmCodeGenerator::Sync1339         void allrd(const RegData &src0) {
1340             allrd(InstructionModifier(), src0);
1341         }
allrdngen::AsmCodeGenerator::Sync1342         void allrd(const InstructionModifier &mod, const RegData &src0) {
1343             this->operator()(SyncFunction::allrd, mod, src0);
1344         }
allrdngen::AsmCodeGenerator::Sync1345         void allrd(uint32_t src0) {
1346             allrd(InstructionModifier(), src0);
1347         }
allrdngen::AsmCodeGenerator::Sync1348         void allrd(const InstructionModifier &mod, uint32_t src0) {
1349             this->operator()(SyncFunction::allrd, mod, src0);
1350         }
allwrngen::AsmCodeGenerator::Sync1351         void allwr() {
1352             allwr(null);
1353         }
allwrngen::AsmCodeGenerator::Sync1354         void allwr(const InstructionModifier &mod) {
1355             allwr(mod, null);
1356         }
allwrngen::AsmCodeGenerator::Sync1357         void allwr(const RegData &src0) {
1358             allwr(InstructionModifier(), src0);
1359         }
allwrngen::AsmCodeGenerator::Sync1360         void allwr(const InstructionModifier &mod, const RegData &src0) {
1361             this->operator()(SyncFunction::allwr, mod, src0);
1362         }
allwrngen::AsmCodeGenerator::Sync1363         void allwr(uint32_t src0) {
1364             allwr(InstructionModifier(), src0);
1365         }
allwrngen::AsmCodeGenerator::Sync1366         void allwr(const InstructionModifier &mod, uint32_t src0) {
1367             this->operator()(SyncFunction::allwr, mod, src0);
1368         }
barngen::AsmCodeGenerator::Sync1369         void bar(const InstructionModifier &mod = InstructionModifier()) {
1370             this->operator()(SyncFunction::bar, mod);
1371         }
barngen::AsmCodeGenerator::Sync1372         void bar(const InstructionModifier &mod, uint32_t src0) {
1373             this->operator()(SyncFunction::bar, mod, src0);
1374         }
barngen::AsmCodeGenerator::Sync1375         void bar(const InstructionModifier &mod, const RegData &src0) {
1376             this->operator()(SyncFunction::bar, mod, src0);
1377         }
barngen::AsmCodeGenerator::Sync1378         void bar(uint32_t src0) {
1379             this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1380         }
barngen::AsmCodeGenerator::Sync1381         void bar(const RegData &src0) {
1382             this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1383         }
hostngen::AsmCodeGenerator::Sync1384         void host(const InstructionModifier &mod = InstructionModifier()) {
1385             this->operator()(SyncFunction::host, mod);
1386         }
nopngen::AsmCodeGenerator::Sync1387         void nop(const InstructionModifier &mod = InstructionModifier()) {
1388             this->operator()(SyncFunction::nop, mod);
1389         }
1390     };
1391 public:
1392     Sync sync;
1393 
1394 private:
1395     struct Load {
1396         AsmCodeGenerator &parent;
1397 
Loadngen::AsmCodeGenerator::Load1398         Load(AsmCodeGenerator *parent_) : parent(*parent_) {}
1399 
1400         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1401         void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1402         {
1403             MessageDescriptor desc;
1404             ExtendedMessageDescriptor exdesc;
1405 
1406             encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1407             parent.send(mod, dst, addr, exdesc.all, desc.all);
1408         }
1409 
1410         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1411         void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1412         {
1413             MessageDescriptor desc;
1414             ExtendedMessageDescriptor exdesc;
1415 
1416             encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1417             parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1418         }
1419 
1420         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1421         void operator()(SharedFunction sfid, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1422         {
1423             MessageDescriptor desc;
1424             ExtendedMessageDescriptor exdesc;
1425 
1426             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1427             encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1428             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1429             parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1430         }
1431 
ugmngen::AsmCodeGenerator::Load1432         void ugm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1433         {
1434             this->operator()(SharedFunction::ugm, mod, dst, spec, base, addr);
1435         }
ugmlngen::AsmCodeGenerator::Load1436         void ugml(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1437         {
1438             this->operator()(SharedFunction::ugml, mod, dst, spec, base, addr);
1439         }
tgmngen::AsmCodeGenerator::Load1440         void tgm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1441         {
1442             this->operator()(SharedFunction::tgm, mod, dst, spec, base, addr);
1443         }
slmngen::AsmCodeGenerator::Load1444         void slm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1445         {
1446             this->operator()(SharedFunction::slm, mod, dst, spec, base, addr);
1447         }
1448     };
1449 
1450     struct Store {
1451         AsmCodeGenerator &parent;
1452 
Storengen::AsmCodeGenerator::Store1453         Store(AsmCodeGenerator *parent_) : parent(*parent_) {}
1454 
1455         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1456         void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1457         {
1458             MessageDescriptor desc;
1459             ExtendedMessageDescriptor exdesc;
1460 
1461             encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1462             parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1463         }
1464 
1465         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1466         void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1467         {
1468             MessageDescriptor desc;
1469             ExtendedMessageDescriptor exdesc;
1470 
1471             encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1472             parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1473         }
1474 
1475         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1476         void operator()(SharedFunction sfid, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1477         {
1478             MessageDescriptor desc;
1479             ExtendedMessageDescriptor exdesc;
1480 
1481             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1482             encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1483             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1484             parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1485         }
1486 
ugmngen::AsmCodeGenerator::Store1487         void ugm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1488         {
1489             this->operator()(SharedFunction::ugm, mod, spec, base, addr, data);
1490         }
ugmlngen::AsmCodeGenerator::Store1491         void ugml(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1492         {
1493             this->operator()(SharedFunction::ugml, mod, spec, base, addr, data);
1494         }
tgmngen::AsmCodeGenerator::Store1495         void tgm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1496         {
1497             this->operator()(SharedFunction::tgm, mod, spec, base, addr, data);
1498         }
slmngen::AsmCodeGenerator::Store1499         void slm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1500         {
1501             this->operator()(SharedFunction::slm, mod, spec, base, addr, data);
1502         }
1503     };
1504 
1505     struct Atomic {
1506         AsmCodeGenerator &parent;
1507 
Atomicngen::AsmCodeGenerator::Atomic1508         Atomic(AsmCodeGenerator *parent_) : parent(*parent_) {}
1509 
1510         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1511         void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1512         {
1513             MessageDescriptor desc;
1514             ExtendedMessageDescriptor exdesc;
1515 
1516             encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1517             if (data.isNull())
1518                 parent.send(mod, dst, addr, exdesc.all, desc.all);
1519             else
1520                 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1521         }
1522         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1523         void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1524         {
1525             (*this)(op, mod, NullRegister(), spec, base, addr, data);
1526         }
1527 
1528         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1529         void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1530         {
1531             MessageDescriptor desc;
1532             ExtendedMessageDescriptor exdesc;
1533 
1534             encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1535             parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1536         }
1537         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1538         void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1539         {
1540             (*this)(op, mod, NullRegister(), spec, base, addr, data);
1541         }
1542         template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1543         void operator()(SharedFunction sfid, AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1544         {
1545             MessageDescriptor desc;
1546             ExtendedMessageDescriptor exdesc;
1547 
1548             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1549             encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1550             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1551             parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1552         }
1553 
ugmngen::AsmCodeGenerator::Atomic1554         void ugm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1555         {
1556             this->operator()(SharedFunction::ugm, op, mod, dst, spec, base, addr, data);
1557         }
ugmngen::AsmCodeGenerator::Atomic1558         void ugm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1559         {
1560             this->operator()(SharedFunction::ugm, op, mod, NullRegister(), spec, base, addr, data);
1561         }
ugmlngen::AsmCodeGenerator::Atomic1562         void ugml(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1563         {
1564             this->operator()(SharedFunction::ugml, op, mod, dst, spec, base, addr, data);
1565         }
ugmlngen::AsmCodeGenerator::Atomic1566         void ugml(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1567         {
1568             this->operator()(SharedFunction::ugml, op, mod, NullRegister(), spec, base, addr, data);
1569         }
tgmngen::AsmCodeGenerator::Atomic1570         void tgm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1571         {
1572             this->operator()(SharedFunction::tgm, op, mod, dst, spec, base, addr, data);
1573         }
tgmngen::AsmCodeGenerator::Atomic1574         void tgm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1575         {
1576             this->operator()(SharedFunction::tgm, op, mod, NullRegister(), spec, base, addr, data);
1577         }
slmngen::AsmCodeGenerator::Atomic1578         void slm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1579         {
1580             this->operator()(SharedFunction::slm, op, mod, dst, spec, base, addr, data);
1581         }
slmngen::AsmCodeGenerator::Atomic1582         void slm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1583         {
1584             this->operator()(SharedFunction::slm, op, mod, NullRegister(), spec, base, addr, data);
1585         }
1586     };
1587 public:
1588     Load load;
1589     Store store;
1590     Atomic atomic;
1591 
wrdep(const GRFRange & r)1592     void wrdep(const GRFRange &r) {
1593         opX(Opcode::wrdep, DataType::ud, InstructionModifier::createAutoSWSB(), null, r);
1594     }
wrdep(const GRF & r)1595     void wrdep(const GRF &r) {
1596         wrdep(r-r);
1597     }
1598 
mark(Label & label)1599     inline void mark(Label &label)          { streamStack.back()->mark(label, labelManager); }
1600 
1601 #include "ngen_pseudo.hpp"
1602 #ifndef NGEN_GLOBAL_REGS
1603 #include "ngen_registers.hpp"
1604 #endif
1605 };
1606 
1607 
unsupported()1608 void AsmCodeGenerator::unsupported()
1609 {
1610 #ifdef NGEN_SAFE
1611     throw unsupported_instruction();
1612 #endif
1613 }
1614 
popStream()1615 AsmCodeGenerator::InstructionStream *AsmCodeGenerator::popStream()
1616 {
1617 #ifdef NGEN_SAFE
1618     if (streamStack.size() <= 1) throw stream_stack_underflow();
1619 #endif
1620 
1621     InstructionStream *result = streamStack.back();
1622     streamStack.pop_back();
1623     return result;
1624 }
1625 
finalize()1626 void AsmCodeGenerator::finalize()
1627 {
1628 #ifdef NGEN_SAFE
1629     if (streamStack.size() > 1) throw unfinished_stream_exception();
1630 #endif
1631     auto &buffer = streamStack.back()->buffer;
1632     int inum = 0;
1633     for (auto &i : buffer)
1634         i.inum = inum++;
1635 }
1636 
getCode(std::ostream & out)1637 void AsmCodeGenerator::getCode(std::ostream &out)
1638 {
1639     finalize();
1640 
1641     autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hardware, streamStack.back()->buffer);
1642     std::multimap<int32_t, autoswsb::SyncInsertion*> syncs;      // Syncs inserted by auto-SWSB.
1643 
1644     for (auto &bb : analysis)
1645         for (auto &sync : bb.syncs)
1646             syncs.insert(std::make_pair(sync.inum, &sync));
1647 
1648     auto nextSync = syncs.begin();
1649     int lineNo = 0;
1650 
1651     for (auto &i : streamStack.back()->buffer) {
1652         if (i.isLabel()) {
1653             i.dst.label.outputText(out, PrintDetail::full, labelManager);
1654             out << ':' << std::endl;
1655         } else if (i.isComment()) {
1656             out << "// " << i.comment << std::endl;
1657         } else if (i.op != Opcode::wrdep) {
1658             while ((nextSync != syncs.end()) && (nextSync->second->inum == i.inum))
1659                 outX(out, *(nextSync++)->second, lineNo++);
1660             outX(out, i, lineNo++);
1661         }
1662     }
1663 }
1664 
1665 template <typename D, typename S0, typename S1, typename S2>
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2,uint16_t ext)1666 void AsmCodeGenerator::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2, uint16_t ext)
1667 {
1668     bool is2Src = !S1::emptyOp;
1669     bool is3Src = !S2::emptyOp;
1670     int arity = 1 + is2Src + is3Src;
1671 
1672     InstructionModifier emod = mod | defaultModifier;
1673     auto esize = emod.getExecSize();
1674 
1675     if (is3Src && hardware < HW::Gen10)
1676         esize = std::min<int>(esize, 8);        // WA for IGA Align16 emulation issue
1677 
1678 #ifdef NGEN_SAFE
1679     if (esize > 1 && dst.isScalar())
1680         throw invalid_execution_size_exception();
1681 #endif
1682 
1683     dst.fixup(esize, defaultType, true, arity);
1684     src0.fixup(esize, defaultType, false, arity);
1685     src1.fixup(esize, defaultType, false, arity);
1686     src2.fixup(esize, defaultType, false, arity);
1687 
1688     streamStack.back()->append(op, ext, emod, dst, src0, src1, src2, NoOperand{}, &labelManager);
1689 }
1690 
outX(std::ostream & out,const AsmInstruction & i,int lineNo)1691 void AsmCodeGenerator::outX(std::ostream &out, const AsmInstruction &i, int lineNo)
1692 {
1693     bool ternary = (i.src[2].type != AsmOperand::Type::none);
1694     PrintDetail ddst = PrintDetail::hs;
1695     PrintDetail dsrc01 = ternary ? PrintDetail::vs_hs : PrintDetail::full;
1696     PrintDetail dsrc[4] = {dsrc01, dsrc01, PrintDetail::hs, PrintDetail::base};
1697 
1698     switch (i.op) {
1699         case Opcode::send:
1700         case Opcode::sends:
1701         case Opcode::sendc:
1702         case Opcode::sendsc:
1703             ddst = dsrc[0] = dsrc[1] = PrintDetail::base;
1704             dsrc[2] = dsrc[3] = PrintDetail::sub_no_type;
1705             break;
1706         case Opcode::brc:
1707         case Opcode::brd:
1708         case Opcode::call:
1709         case Opcode::calla:
1710             ddst = PrintDetail::sub;
1711             dsrc[0] = PrintDetail::sub_no_type;
1712             break;
1713         case Opcode::jmpi:
1714         case Opcode::ret:
1715             dsrc[0] = PrintDetail::sub_no_type;
1716             break;
1717         case Opcode::dpas:
1718         case Opcode::dpasw:
1719             if (isGen12) ddst = dsrc[0] = dsrc[1] = dsrc[2] = PrintDetail::sub;
1720             break;
1721         case Opcode::sync:
1722             if (isGen12) {
1723                 if (i.src[0].type == AsmOperand::Type::reg)
1724                     dsrc[0] = PrintDetail::sub;
1725                 else
1726                     dsrc[0] = PrintDetail::sub_no_type;
1727             }
1728             break;
1729         default: break;
1730     }
1731 
1732     outMods(out, i.mod, i.op, ModPlacementType::Pre);
1733 
1734     out << getMnemonic(i.op, hardware);
1735     outExt(out, i);
1736     out << '\t';
1737 
1738     outMods(out, i.mod, i.op, ModPlacementType::Mid);
1739 
1740     i.dst.outputText(out, ddst, labelManager); out << '\t';
1741     for (int n = 0; n < 4; n++) {
1742         i.src[n].outputText(out, dsrc[n], labelManager);
1743         if (hardware >= HW::XeHPG && n == 1 && (i.op == Opcode::send || i.op == Opcode::sendc) && (i.ext & 0x80))
1744             out << ':' << (i.ext >> 8);
1745         out << '\t';
1746     }
1747 
1748     outMods(out, i.mod, i.op, ModPlacementType::Post);
1749     if (lineNumbers)
1750         out << "\t// " << lineNo * 2;
1751     out << std::endl;
1752 }
1753 
outExt(std::ostream & out,const AsmInstruction & i)1754 void AsmCodeGenerator::outExt(std::ostream &out, const AsmInstruction &i)
1755 {
1756     switch (i.opcode()) {
1757         case Opcode::else_:
1758         case Opcode::goto_:
1759         case Opcode::if_:       if (i.ext) out << ".b";                         break;
1760         case Opcode::math:      out << '.' << static_cast<MathFunction>(i.ext); break;
1761         default: break;
1762     }
1763 
1764     if (isGen12) switch (i.opcode()) {
1765         case Opcode::send:
1766         case Opcode::sends:     out << '.' << getMnemonic(static_cast<SharedFunction>(i.ext & 0xF), hardware); break;
1767         case Opcode::sync:      out << '.' << static_cast<SyncFunction>(i.ext);                                break;
1768         case Opcode::bfn:       out << ".0x" << std::hex << i.ext << std::dec;                                 break;
1769         case Opcode::dpas:
1770         case Opcode::dpasw: {
1771             int sdepth = i.ext >> 8;
1772             int rcount = i.ext & 0xFF;
1773             out << '.' << sdepth << 'x' << rcount;
1774         }
1775         default: break;
1776     }
1777 }
1778 
outMods(std::ostream & out,const InstructionModifier & mod,Opcode op,AsmCodeGenerator::ModPlacementType location)1779 void AsmCodeGenerator::outMods(std::ostream &out,const InstructionModifier &mod, Opcode op, AsmCodeGenerator::ModPlacementType location)
1780 {
1781     ConditionModifier cmod = mod.getCMod();
1782     PredCtrl ctrl = mod.getPredCtrl();
1783     bool wrEn = mod.isWrEn();
1784     bool havePred = (ctrl != PredCtrl::None) && (cmod != ConditionModifier::eo);
1785 
1786     switch (location) {
1787         case ModPlacementType::Pre:
1788             if (wrEn || havePred) {
1789                 out << '(';
1790                 if (wrEn) {
1791                     out << 'W';
1792                     if (havePred) out << '&';
1793                 }
1794                 if (havePred) {
1795                     if (mod.isPredInv()) out << '~';
1796                     mod.getFlagReg().outputText(out, PrintDetail::sub_no_type, labelManager);
1797                     if (ctrl != PredCtrl::Normal)
1798                         out << '.' << toText(ctrl, mod.isAlign16());
1799                 }
1800                 out << ')';
1801             }
1802             out << '\t';
1803             break;
1804         case ModPlacementType::Mid:
1805             if (mod.getExecSize() > 0)
1806                 out << '(' << mod.getExecSize() << "|M" << mod.getChannelOffset() << ')' << '\t';
1807 
1808             if (cmod != ConditionModifier::none) {
1809                 out << '(' << cmod << ')';
1810                 mod.getFlagReg().outputText(out, PrintDetail::sub_no_type, labelManager);
1811                 out << '\t';
1812             }
1813 
1814             if (mod.isSaturate()) out << "(sat)";
1815             break;
1816         case ModPlacementType::Post:
1817         {
1818             bool havePostMod = false;
1819             auto startPostMod = [&]() {
1820                 out << (havePostMod ? ',' : '{');
1821                 havePostMod = true;
1822             };
1823             auto printPostMod = [&](const char *name) {
1824                 startPostMod(); out << name;
1825             };
1826 
1827             SWSBInfo swsb = mod.getSWSB();
1828             if (swsb.hasToken()) {
1829                 startPostMod(); out << '$' << swsb.parts.token;
1830                 if (swsb.parts.src && !swsb.parts.dst) out << ".src";
1831                 if (swsb.parts.dst && !swsb.parts.src) out << ".dst";
1832             }
1833             if (swsb.hasDist()) {
1834                 startPostMod();
1835                 if (hardware > HW::Gen12LP && (op == Opcode::send || op == Opcode::sendc) && swsb.getPipe() == Pipe::Default)
1836                     out << Pipe::A;
1837                 else if (hardware > HW::Gen12LP || !swsb.hasToken())
1838                     out << swsb.getPipe();
1839                 out << '@' << swsb.parts.dist;
1840             }
1841 
1842             if (mod.isAlign16())                                          printPostMod("Align16");
1843             if (mod.isNoDDClr())                                          printPostMod("NoDDClr");
1844             if (mod.isNoDDChk())                                          printPostMod("NoDDChk");
1845             if (mod.getThreadCtrl() == ThreadCtrl::Atomic)                printPostMod("Atomic");
1846             if (!isGen12 && mod.getThreadCtrl() == ThreadCtrl::Switch)    printPostMod("Switch");
1847             if (!isGen12 && mod.getThreadCtrl() == ThreadCtrl::NoPreempt) printPostMod("NoPreempt");
1848             if (mod.isAccWrEn())                                          printPostMod("AccWrEn");
1849             if (mod.isCompact())                                          printPostMod("Compact");
1850             if (mod.isBreakpoint())                                       printPostMod("Breakpoint");
1851             if (mod.isSerialized())                                       printPostMod("Serialize");
1852             if (mod.isEOT())                                              printPostMod("EOT");
1853 
1854             if (havePostMod) out << '}';
1855         }
1856         break;
1857     }
1858 }
1859 
1860 } /* namespace ngen */
1861 
1862 #endif
1863