1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef NGEN_ASM_HPP
18 #define NGEN_ASM_HPP
19
20 #include "ngen_config.hpp"
21
22 #include <array>
23 #include <cstdint>
24 #include <sstream>
25 #include <string>
26
27 #define NGEN_ASM
28 #include "ngen.hpp"
29
30
31 namespace ngen {
32
33
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const34 inline void RegData::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
35 {
36 #ifdef NGEN_SAFE
37 if (isInvalid()) throw invalid_object_exception();
38 #endif
39 auto vs = getVS();
40 if (detail == PrintDetail::vs_hs)
41 if (vs > 8 && (getHS() != 0))
42 vs = 8;
43
44 if (getNeg()) str << '-';
45 if (getAbs()) str << "(abs)";
46
47 if (isARF()) {
48 str << getARFType();
49 switch (getARFType()) {
50 case ARFType::null:
51 case ARFType::sp:
52 case ARFType::ip:
53 break;
54 default:
55 str << getARFBase();
56 }
57 } else if (isIndirect()) {
58 str << "r[a" << getIndirectBase() << '.' << getIndirectOff();
59 if (getOffset())
60 str << ',' << getOffset();
61 str << ']';
62 } else
63 str << 'r' << base;
64
65 if (detail <= PrintDetail::base) return;
66
67 if (!isIndirect() && !isNull())
68 str << '.' << getOffset();
69
70 if (detail <= PrintDetail::sub_no_type) return;
71
72 if (detail >= PrintDetail::hs && !isNull()) {
73 str << '<';
74 if (detail >= PrintDetail::vs_hs && !isVxIndirect())
75 str << vs << ';';
76 if (detail == PrintDetail::full)
77 str << getWidth() << ',';
78 str << getHS();
79 str << '>';
80 }
81
82 str << ':' << getType();
83 }
84
operator <<(std::ostream & str,const RegData & r)85 static inline std::ostream& operator<<(std::ostream &str, const RegData &r)
86 {
87 LabelManager man;
88 r.outputText(str, PrintDetail::full, man);
89 return str;
90 }
91
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const92 inline void Immediate::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
93 {
94 uint64_t nbytes = getBytes(getType());
95 uint64_t val;
96
97 if (nbytes == 8)
98 val = payload;
99 else
100 val = payload & ((uint64_t(1) << (nbytes * 8)) - 1);
101
102 str << "0x" << std::hex << val << std::dec;
103 if (!hiddenType && detail >= PrintDetail::sub)
104 str << ':' << type;
105 }
106
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const107 inline void ExtendedReg::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
108 {
109 #ifdef NGEN_SAFE
110 if (isInvalid()) throw invalid_object_exception();
111 #endif
112
113 if (base.getNeg()) str << '-';
114 if (base.getAbs()) str << "(abs)";
115
116 str << 'r' << base.getBase() << '.';
117 if (mmeNum == 8)
118 str << "nomme";
119 else
120 str << "mme" << int(mmeNum);
121
122 if (detail >= PrintDetail::sub)
123 str << ':' << base.getType();
124 }
125
outputText(std::ostream & str,PrintDetail detail,LabelManager & man) const126 inline void Align16Operand::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const
127 {
128 #ifdef NGEN_SAFE
129 if (isInvalid()) throw invalid_object_exception();
130 throw iga_align16_exception();
131 #else
132 str << "<unsupported Align16 operand>";
133 #endif
134 }
135
outputText(std::ostream & str,PrintDetail detail,LabelManager & man)136 inline void Label::outputText(std::ostream &str, PrintDetail detail, LabelManager &man) {
137 str << 'L' << getID(man);
138 }
139
140 struct NoOperand {
141 static const bool emptyOp = true;
fixupngen::NoOperand142 void fixup(int esize, DataType defaultType, bool isDest, int arity) const {}
isScalarngen::NoOperand143 constexpr bool isScalar() const { return false; }
144
outputTextngen::NoOperand145 void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const {}
146 };
147
148 struct AsmOperand {
149 union {
150 RegData reg;
151 ExtendedReg ereg;
152 Immediate imm;
153 Label label;
154 GRFRange range;
155 };
156 enum class Type : uint8_t {
157 none = 0,
158 reg = 1,
159 ereg = 2,
160 imm = 3,
161 label = 4,
162 range = 5
163 } type;
164
AsmOperandngen::AsmOperand165 AsmOperand() : type{Type::none} {}
AsmOperandngen::AsmOperand166 AsmOperand(NoOperand) : AsmOperand() {}
AsmOperandngen::AsmOperand167 AsmOperand(RegData reg_) : reg{reg_}, type{Type::reg} {}
AsmOperandngen::AsmOperand168 AsmOperand(ExtendedReg ereg_) : ereg{ereg_}, type{Type::ereg} {}
AsmOperandngen::AsmOperand169 AsmOperand(Immediate imm_) : imm{imm_}, type{Type::imm} {}
AsmOperandngen::AsmOperand170 AsmOperand(Label label_) : label{label_}, type{Type::label} {}
AsmOperandngen::AsmOperand171 AsmOperand(GRFRange range_) : range{range_}, type{Type::range} {}
AsmOperandngen::AsmOperand172 AsmOperand(uint32_t imm_) : imm{imm_}, type{Type::imm} {}
173
outputTextngen::AsmOperand174 void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const {
175 switch (type) {
176 case Type::none: break;
177 case Type::ereg: ereg.outputText(str, detail, man); break;
178 case Type::reg: reg.outputText(str, detail, man); break;
179 case Type::imm: imm.outputText(str, detail, man); break;
180 case Type::label: {
181 auto clone = label;
182 clone.outputText(str, detail, man);
183 break;
184 }
185 case Type::range: break; /* not used for output */
186 }
187 }
188 };
189
190 struct AsmInstruction {
191 Opcode op;
192 uint16_t ext;
193 uint32_t inum;
194 InstructionModifier mod;
195 AsmOperand dst, src[4];
196 LabelManager *labelManager;
197 std::string comment;
198
AsmInstructionngen::AsmInstruction199 AsmInstruction(Opcode op_, uint16_t ext_, uint32_t inum_, InstructionModifier mod_, AsmOperand dst_,
200 AsmOperand src0, AsmOperand src1, AsmOperand src2, AsmOperand src3, LabelManager *man)
201 : op(op_), ext(ext_), inum(inum_), mod(mod_), dst(dst_), src{src0, src1, src2, src3}, labelManager{man}, comment{} {}
AsmInstructionngen::AsmInstruction202 explicit AsmInstruction(uint32_t inum_, const std::string &comment_)
203 : op(Opcode::illegal), ext(0), inum(inum_), mod{}, dst{}, src{}, labelManager{nullptr}, comment{comment_} {}
204 inline AsmInstruction(const autoswsb::SyncInsertion &si);
205
isLabelngen::AsmInstruction206 bool isLabel() const { return (op == Opcode::illegal) && (dst.type == AsmOperand::Type::label); }
isCommentngen::AsmInstruction207 bool isComment() const { return (op == Opcode::illegal) && !comment.empty(); }
208
209 // Auto-SWSB interface.
autoSWSBngen::AsmInstruction210 bool autoSWSB() const { return mod.isAutoSWSB(); }
swsbngen::AsmInstruction211 SWSBInfo swsb() const { return mod.getSWSB(); }
setSWSBngen::AsmInstruction212 void setSWSB(SWSBInfo swsb) { mod.setSWSB(swsb); }
clearAutoSWSBngen::AsmInstruction213 void clearAutoSWSB() { mod.setAutoSWSB(false); }
opcodengen::AsmInstruction214 Opcode opcode() const { return op; }
syncFCngen::AsmInstruction215 SyncFunction syncFC() const { return static_cast<SyncFunction>(ext & 0xF); }
sfidngen::AsmInstruction216 SharedFunction sfid() const { return static_cast<SharedFunction>(ext & 0xF); }
eotngen::AsmInstruction217 bool eot() const { return mod.isEOT(); }
predicatedngen::AsmInstruction218 bool predicated() const { return !mod.isWrEn() || (mod.getPredCtrl() != PredCtrl::None); }
atomicngen::AsmInstruction219 bool atomic() const { return mod.isAtomic(); }
220
dstTypecodengen::AsmInstruction221 inline unsigned dstTypecode() const { return getTypecode(dst); }
src0Typecodengen::AsmInstruction222 inline unsigned src0Typecode() const { return getTypecode(src[0]); }
src1Typecodengen::AsmInstruction223 inline unsigned src1Typecode() const { return getTypecode(src[1]); }
224 inline autoswsb::DestinationMask destinations(int &jip, int &uip) const;
225 inline bool getOperandRegion(autoswsb::DependencyRegion ®ion, int opNum) const;
226
shiftJIPngen::AsmInstruction227 void shiftJIP(int32_t shift) const {}
shiftUIPngen::AsmInstruction228 void shiftUIP(int32_t shift) const {}
229
getImm32ngen::AsmInstruction230 bool getImm32(uint32_t &imm, int opNum = 0) const {
231 if (src[opNum].type == AsmOperand::Type::imm) {
232 imm = uint32_t(static_cast<uint64_t>(src[opNum].imm));
233 return true;
234 } else
235 return false;
236 }
getARFTypengen::AsmInstruction237 bool getARFType(ARFType &arfType, int opNum) const {
238 auto &opd = (opNum < 0) ? dst : src[opNum];
239 if (opd.type == AsmOperand::Type::reg && opd.reg.isARF()) {
240 arfType = opd.reg.getARFType();
241 return true;
242 } else
243 return false;
244 }
getSendDescngen::AsmInstruction245 bool getSendDesc(MessageDescriptor &desc) const { return getImm32(desc.all, 3); }
246
247 protected:
248 static inline unsigned getTypecode(const AsmOperand &op);
249 };
250
AsmInstruction(const autoswsb::SyncInsertion & si)251 AsmInstruction::AsmInstruction(const autoswsb::SyncInsertion &si)
252 {
253 op = Opcode::sync;
254 ext = static_cast<uint8_t>(si.fc);
255 mod = InstructionModifier::createMaskCtrl(true);
256 mod.setSWSB(si.swsb);
257 dst = NoOperand();
258 for (auto n = 0; n < 4; n++)
259 src[n] = NoOperand();
260 if (si.mask)
261 src[0] = Immediate::ud(si.mask);
262 else
263 src[0] = NullRegister();
264 }
265
getTypecode(const AsmOperand & op)266 unsigned AsmInstruction::getTypecode(const AsmOperand &op)
267 {
268 DataType dt = DataType::invalid;
269
270 switch (op.type) {
271 case AsmOperand::Type::reg: dt = op.reg.getType(); break;
272 case AsmOperand::Type::ereg: dt = op.ereg.getType(); break;
273 default: break;
274 }
275
276 return getTypecode12(dt);
277 }
278
destinations(int & jip,int & uip) const279 autoswsb::DestinationMask AsmInstruction::destinations(int &jip, int &uip) const
280 {
281 using namespace autoswsb;
282
283 if (!isBranch(op))
284 return eot() ? DestNone : DestNextIP;
285
286 if (src[0].type == AsmOperand::Type::reg)
287 return DestUnknown;
288
289 DestinationMask mask = DestNextIP;
290 if (src[0].type == AsmOperand::Type::label) {
291 auto label = src[0].label;
292 mask |= DestJIP;
293 jip = labelManager->getTarget(label.getID(*labelManager)) - inum;
294 }
295
296 if (src[1].type == AsmOperand::Type::label) {
297 auto label = src[1].label;
298 mask |= DestUIP;
299 uip = labelManager->getTarget(label.getID(*labelManager)) - inum;
300 }
301
302 if (op == Opcode::jmpi && mod.getPredCtrl() == PredCtrl::None)
303 mask &= ~DestNextIP;
304
305 return mask;
306 }
307
getOperandRegion(autoswsb::DependencyRegion & region,int opNum) const308 bool AsmInstruction::getOperandRegion(autoswsb::DependencyRegion ®ion, int opNum) const
309 {
310 using namespace autoswsb;
311 const AsmOperand &operand = (opNum < 0) ? dst : src[opNum];
312 RegData rd;
313 auto hw = region.hw;
314
315 switch (operand.type) {
316 case AsmOperand::Type::reg: rd = operand.reg; break;
317 case AsmOperand::Type::ereg: rd = operand.ereg.getBase(); break;
318 case AsmOperand::Type::range: region = DependencyRegion(hw, operand.range); return true;
319 default: return false;
320 }
321
322 if (rd.isARF())
323 return false;
324
325 if (rd.isIndirect())
326 region = DependencyRegion();
327 else if (op == Opcode::send || op == Opcode::sendc) {
328 int len = 0;
329 if (opNum <= 0) {
330 if (src[3].type == AsmOperand::Type::imm) {
331 MessageDescriptor desc;
332 desc.all = static_cast<uint64_t>(src[3].imm);
333 len = (opNum < 0) ? desc.parts.responseLen : desc.parts.messageLen;
334 if (len == 31) len++; // 32 GRF responses are encoded as 31. Conservatively use the higher value.
335 } else
336 len = -1;
337 } else if (opNum == 1) {
338 bool exdescImm = (src[2].type == AsmOperand::Type::imm);
339 if (exdescImm && (hw >= HW::XeHPG))
340 len = ext >> 8;
341 else
342 if (exdescImm) {
343 ExtendedMessageDescriptor exdesc;
344 exdesc.all = static_cast<uint64_t>(src[2].imm);
345 len = exdesc.parts.extMessageLen;
346 } else
347 len = -1;
348 }
349 if (len == 0)
350 return false;
351 else if (len == -1)
352 region = DependencyRegion();
353 else
354 region = DependencyRegion(hw, GRFRange(rd.getBase(), len));
355 } else if (op == Opcode::dpas || op == Opcode::dpasw) {
356 unsigned sdepth = ext >> 8;
357 unsigned rcount = ext & 0xFF;
358 unsigned len;
359
360 switch (opNum) {
361 case -1:
362 case 0: len = rcount; break;
363 case 1: len = sdepth; break;
364 case 2:
365 if (op == Opcode::dpasw) rcount = (rcount + 1) >> 1;
366 len = (operand.reg.getByteOffset() + sdepth * rcount * 4 + 31) >> 5;
367 break;
368 default: return false;
369 }
370
371 region = DependencyRegion(hw, GRFRange(operand.reg.getBase(), len));
372 } else
373 region = DependencyRegion(hw, mod.getExecSize(), rd);
374
375 return true;
376 }
377
378 #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
379 #include "ngen_registers.hpp"
380 #endif
381
382 class AsmCodeGenerator {
383 private:
384 #include "ngen_compiler_fix.hpp"
385 public:
AsmCodeGenerator(HW hardware_)386 AsmCodeGenerator(HW hardware_) : hardware(hardware_), isGen12(hardware_ >= HW::Gen12LP),
387 defaultOutput{nullptr}, sync{this}, load{this}, store{this}, atomic{this} {
388 _workaround_();
389 streamStack.push_back(new InstructionStream());
390 }
AsmCodeGenerator(HW hardware_,std::ostream & defaultOutput_)391 AsmCodeGenerator(HW hardware_, std::ostream &defaultOutput_) : AsmCodeGenerator(hardware_) {
392 defaultOutput = &defaultOutput_;
393 }
~AsmCodeGenerator()394 ~AsmCodeGenerator() noexcept(false) {
395 if (defaultOutput != nullptr)
396 getCode(*defaultOutput);
397 for (auto &s : streamStack)
398 delete s;
399 }
400 inline void getCode(std::ostream &out);
enableLineNumbers(bool enable=true)401 void enableLineNumbers(bool enable = true) { lineNumbers = enable; }
402
403 protected:
404 struct InstructionStream {
405 std::vector<AsmInstruction> buffer;
406 std::vector<uint32_t> labels;
407
408 template <typename... Remaining>
appendngen::AsmCodeGenerator::InstructionStream409 AsmInstruction &append(Opcode op, uint16_t ext, Remaining&&... args) {
410 buffer.emplace_back(op, ext, 0, std::forward<Remaining>(args)...);
411 return buffer.back();
412 }
413
appendCommentngen::AsmCodeGenerator::InstructionStream414 void appendComment(const std::string &str) { buffer.emplace_back(0, str); }
415
markngen::AsmCodeGenerator::InstructionStream416 void mark(Label &label, LabelManager &man) {
417 uint32_t id = label.getID(man);
418
419 man.setTarget(id, buffer.size());
420 labels.push_back(id);
421 buffer.emplace_back(Opcode::illegal, 0, 0, InstructionModifier(), label, NoOperand(), NoOperand(), NoOperand(), NoOperand(), &man);
422 }
423
appendngen::AsmCodeGenerator::InstructionStream424 void append(InstructionStream &other, LabelManager &man) {
425 for (uint32_t id : other.labels)
426 man.offsetTarget(id, buffer.size());
427
428 buffer.insert(buffer.end(), other.buffer.begin(), other.buffer.end());
429 labels.insert(labels.end(), other.labels.begin(), other.labels.end());
430 }
431 };
432
433 HW hardware;
434 bool isGen12;
435 std::ostream *defaultOutput;
436 bool lineNumbers = false;
437
438 Label _labelLocalIDsLoaded;
439 Label _labelArgsLoaded;
440
441 private:
442 InstructionModifier defaultModifier;
443 LabelManager labelManager;
444 std::vector<InstructionStream*> streamStack;
445
446 inline void unsupported();
447
448 // Output functions.
449 template <typename D, typename S0, typename S1, typename S2>
450 inline void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2, uint16_t ext);
451
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)452 template <typename D, typename S0, typename S1, typename S2> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2) {
453 opX(op, defaultType, mod, dst, src0, src1, src2, 0);
454 }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)455 template <typename D, typename S0, typename S1> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1) {
456 opX(op, defaultType, mod, dst, src0, src1, NoOperand());
457 }
opX(Opcode op,const InstructionModifier & mod,D dst,S0 src0,S1 src1)458 template <typename D, typename S0, typename S1> void opX(Opcode op, const InstructionModifier &mod, D dst, S0 src0, S1 src1) {
459 opX(op, DataType::invalid, mod, dst, src0, src1);
460 }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)461 template <typename D, typename S0> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0) {
462 opX(op, defaultType, mod, dst, src0, NoOperand());
463 }
opX(Opcode op,const InstructionModifier & mod,D dst,S0 src0)464 template <typename D, typename S0> void opX(Opcode op, const InstructionModifier &mod, D dst, S0 src0) {
465 opX(op, DataType::invalid, mod, dst, src0);
466 }
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst)467 template <typename D> void opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst) {
468 opX(op, defaultType, mod, dst, NoOperand());
469 }
opX(Opcode op,const InstructionModifier & mod,D dst)470 template <typename D> void opX(Opcode op, const InstructionModifier &mod, D dst) {
471 opX(op, DataType::invalid, mod, dst);
472 }
opX(Opcode op)473 void opX(Opcode op) {
474 opX(op, InstructionModifier(), NoOperand());
475 }
opX(Opcode op,const InstructionModifier & mod,Label & jip)476 void opX(Opcode op, const InstructionModifier &mod, Label &jip) {
477 (void) jip.getID(labelManager);
478 opX(op, DataType::invalid, mod, NoOperand(), jip);
479 }
opX(Opcode op,const InstructionModifier & mod,Label & jip,Label & uip)480 void opX(Opcode op, const InstructionModifier &mod, Label &jip, Label &uip) {
481 (void) jip.getID(labelManager);
482 (void) uip.getID(labelManager);
483 opX(op, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand());
484 }
485
486 template <typename S1, typename ED, typename D>
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sf,RegData dst,RegData src0,S1 src1,ED exdesc,D desc)487 void opSend(Opcode op, const InstructionModifier &mod, SharedFunction sf, RegData dst, RegData src0, S1 src1, ED exdesc, D desc) {
488 auto &i = streamStack.back()->append(op, static_cast<uint8_t>(sf), mod | defaultModifier, dst, src0, src1, exdesc, desc, &labelManager);
489 if (i.src[2].type == AsmOperand::Type::imm) {
490 uint32_t exdesc = static_cast<uint64_t>(i.src[2].imm);
491 if (isGen12) {
492 if (hardware >= HW::XeHPG) {
493 i.ext |= 0x80 | (((exdesc >> 6) & 0x1F) << 8);
494 i.src[2].imm = uint32_t(exdesc & ~0x7EF);
495 } else
496 i.src[2].imm = uint32_t(exdesc & ~0x2F);
497 } else
498 i.src[2].imm = uint32_t(exdesc | static_cast<uint8_t>(sf));
499 }
500 }
opDpas(Opcode op,const InstructionModifier & mod,int sdepth,int rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)501 void opDpas(Opcode op, const InstructionModifier &mod, int sdepth, int rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
502 (void) streamStack.back()->append(op, (sdepth << 8) | rcount, mod | defaultModifier, dst, src0, src1, src2, NoOperand(), &labelManager);
503 }
opCall(Opcode op,const InstructionModifier & mod,D dst,S0 src0)504 template <typename D, typename S0> void opCall(Opcode op, const InstructionModifier &mod, D dst, S0 src0) {
505 (void) streamStack.back()->append(op, 0, mod | defaultModifier | NoMask, dst, src0, NoOperand(), NoOperand(), NoOperand(), &labelManager);
506 }
opJmpi(Opcode op,const InstructionModifier & mod,S1 src1)507 template <typename S1> void opJmpi(Opcode op, const InstructionModifier &mod, S1 src1) {
508 (void) streamStack.back()->append(op, 0, mod | defaultModifier | NoMask, NoOperand(), src1, NoOperand(), NoOperand(), NoOperand(), &labelManager);
509 }
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,S0 src0)510 template <typename S0> void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, S0 src0) {
511 (void) streamStack.back()->append(op, static_cast<uint8_t>(fc), mod | defaultModifier, NoOperand(), src0, NoOperand(), NoOperand(), NoOperand(), &labelManager);
512 }
513
514 inline void finalize();
515
516 enum class ModPlacementType {Pre, Mid, Post};
517 inline void outX(std::ostream &out, const AsmInstruction &i, int lineNo);
518 inline void outExt(std::ostream &out, const AsmInstruction &i);
519 inline void outMods(std::ostream &out, const InstructionModifier &mod, Opcode op, ModPlacementType location);
520 inline void outSync(std::ostream &out, const autoswsb::SyncInsertion &si);
521
522 protected:
523 // Configuration.
setDefaultNoMask(bool def=true)524 void setDefaultNoMask(bool def = true) { defaultModifier.setWrEn(def); }
setDefaultAutoSWSB(bool def=true)525 void setDefaultAutoSWSB(bool def = true) { defaultModifier.setAutoSWSB(def); }
getDefaultNoMask() const526 bool getDefaultNoMask() const { return defaultModifier.isWrEn(); }
getDefaultAutoSWSB() const527 bool getDefaultAutoSWSB() const { return defaultModifier.isAutoSWSB(); }
528
529 // Stream handling.
pushStream()530 void pushStream() { pushStream(new InstructionStream()); }
pushStream(InstructionStream & s)531 void pushStream(InstructionStream &s) { pushStream(&s); }
pushStream(InstructionStream * s)532 void pushStream(InstructionStream *s) { streamStack.push_back(s); }
533
534 inline InstructionStream *popStream();
535
appendStream(InstructionStream * s)536 void appendStream(InstructionStream *s) { appendStream(*s); }
appendStream(InstructionStream & s)537 void appendStream(InstructionStream &s) { streamStack.back()->append(s, labelManager); }
appendCurrentStream()538 void appendCurrentStream() { InstructionStream *s = popStream(); appendStream(s); delete s; }
539
discardStream()540 void discardStream() { delete popStream(); }
541
comment(const std::string & str)542 void comment(const std::string &str) { streamStack.back()->appendComment(str); }
543
544 // Instructions.
545 template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)546 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
547 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
548 }
549 template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)550 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
551 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
552 }
553 template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)554 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
555 opX(Opcode::addc, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
556 }
557 template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)558 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
559 opX(Opcode::addc, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
560 }
561 template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)562 void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
563 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
564 }
565 template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)566 void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
567 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
568 }
569 template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)570 void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
571 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
572 }
573 template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)574 void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
575 opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
576 }
577 template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)578 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
579 opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
580 }
581 template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)582 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
583 opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
584 }
585 #ifndef NGEN_NO_OP_NAMES
586 template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)587 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
588 and_<DT>(mod, dst, src0, src1);
589 }
590 template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)591 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
592 and_<DT>(mod, dst, src0, src1);
593 }
594 #endif
595 template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)596 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
597 opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
598 }
599 template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)600 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
601 opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
602 }
603 template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)604 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
605 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
606 }
607 template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)608 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
609 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
610 }
611 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)612 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
613 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
614 }
615 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)616 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
617 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
618 }
619 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)620 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
621 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
622 }
623 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)624 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
625 opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
626 }
627 template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)628 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
629 opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
630 }
631 template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)632 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
633 opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
634 }
635 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)636 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
637 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
638 }
639 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)640 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
641 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
642 }
643 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)644 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
645 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
646 }
647 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)648 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
649 opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
650 }
651 template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)652 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
653 opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
654 }
655 template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)656 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
657 opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
658 }
659 template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)660 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
661 opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
662 }
663 template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)664 void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
665 opX(Opcode::bfn, getDataType<DT>(), mod, dst, src0, src1, src2, ctrl);
666 }
667 template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const RegData & src0)668 void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
669 opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
670 }
671 template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)672 void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
673 opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
674 }
brc(const InstructionModifier & mod,Label & jip,Label & uip)675 void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
676 (void) jip.getID(labelManager);
677 (void) uip.getID(labelManager);
678 opX(Opcode::brc, mod, jip, uip);
679 }
brc(const InstructionModifier & mod,const RegData & src0)680 void brc(const InstructionModifier &mod, const RegData &src0) {
681 opCall(Opcode::brc, mod, NoOperand(), src0);
682 }
brd(const InstructionModifier & mod,Label & jip)683 void brd(const InstructionModifier &mod, Label &jip) {
684 (void) jip.getID(labelManager);
685 opX(Opcode::brd, mod, jip);
686 }
brd(const InstructionModifier & mod,const RegData & src0)687 void brd(const InstructionModifier &mod, const RegData &src0) {
688 opCall(Opcode::brd, mod, NoOperand(), src0);
689 }
break_(const InstructionModifier & mod,Label & jip,Label & uip)690 void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
691 (void) jip.getID(labelManager);
692 (void) uip.getID(labelManager);
693 opX(Opcode::break_, mod, jip, uip);
694 }
call(const InstructionModifier & mod,const RegData & dst,Label & jip)695 void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
696 (void) jip.getID(labelManager);
697 opCall(Opcode::call, mod, dst, jip);
698 }
call(const InstructionModifier & mod,const RegData & dst,const RegData & jip)699 void call(const InstructionModifier &mod, const RegData &dst, const RegData &jip) {
700 opCall(Opcode::call, mod, dst, jip);
701 }
calla(const InstructionModifier & mod,const RegData & dst,int32_t jip)702 void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
703 opCall(Opcode::calla, mod, dst, Immediate::ud(jip));
704 }
calla(const InstructionModifier & mod,const RegData & dst,const RegData & jip)705 void calla(const InstructionModifier &mod, const RegData &dst, const RegData &jip) {
706 opCall(Opcode::calla, mod, dst, jip);
707 }
708 template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const RegData & src0)709 void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
710 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
711 }
712 template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)713 void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
714 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
715 }
716 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)717 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
718 opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
719 }
720 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)721 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
722 opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
723 }
724 template <typename DT = void>
cmpn(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)725 void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
726 opX(isGen12 ? Opcode::cmpn_gen12 : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
727 }
728 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)729 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
730 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
731 }
732 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)733 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
734 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
735 }
736 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)737 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
738 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
739 }
740 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)741 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
742 opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
743 }
cont(const InstructionModifier & mod,Label & jip,Label & uip)744 void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
745 (void) jip.getID(labelManager);
746 (void) uip.getID(labelManager);
747 opX(Opcode::cont, mod, jip, uip);
748 }
749 template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)750 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
751 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
752 }
753 template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)754 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
755 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
756 }
757 template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)758 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
759 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
760 }
761 template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)762 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
763 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
764 }
765 template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)766 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
767 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
768 }
769 template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)770 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
771 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
772 }
773 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)774 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
775 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
776 }
777 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)778 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
779 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
780 }
781 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)782 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
783 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
784 }
785 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)786 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
787 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
788 }
dpas(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)789 void dpas(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
790 opDpas(Opcode::dpas, mod, sdepth, rcount, dst, src0, src1, src2);
791 }
dpasw(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)792 void dpasw(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
793 opDpas(Opcode::dpasw, mod, sdepth, rcount, dst, src0, src1, src2);
794 }
795 template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)796 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
797 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
798 }
799 template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)800 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
801 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
802 }
else_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)803 void else_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
804 (void) jip.getID(labelManager);
805 (void) uip.getID(labelManager);
806 opX(Opcode::else_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
807 }
else_(InstructionModifier mod,Label & jip)808 void else_(InstructionModifier mod, Label &jip) {
809 else_(mod, jip, jip);
810 }
endif(const InstructionModifier & mod,Label & jip)811 void endif(const InstructionModifier &mod, Label &jip) {
812 (void) jip.getID(labelManager);
813 opX(Opcode::endif, mod, NoOperand(), jip);
814 }
endif(const InstructionModifier & mod)815 void endif(const InstructionModifier &mod) {
816 Label next;
817 endif(mod, next);
818 mark(next);
819 }
820 template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const RegData & src0)821 void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
822 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
823 }
824 template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)825 void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
826 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
827 }
828 template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const RegData & src0)829 void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
830 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
831 }
832 template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)833 void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
834 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
835 }
836 template <typename DT = void>
frc(const InstructionModifier & mod,const RegData & dst,const RegData & src0)837 void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
838 opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
839 }
goto_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)840 void goto_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
841 (void) jip.getID(labelManager);
842 (void) uip.getID(labelManager);
843 opX(Opcode::goto_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
844 }
goto_(const InstructionModifier & mod,Label & jip)845 void goto_(const InstructionModifier &mod, Label &jip) {
846 goto_(mod, jip, jip);
847 }
halt(const InstructionModifier & mod,Label & jip,Label & uip)848 void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
849 (void) jip.getID(labelManager);
850 (void) uip.getID(labelManager);
851 opX(Opcode::halt, mod, jip, uip);
852 }
halt(const InstructionModifier & mod,Label & jip)853 void halt(const InstructionModifier &mod, Label &jip) {
854 halt(mod, jip, jip);
855 }
if_(const InstructionModifier & mod,Label & jip,Label & uip,bool branchCtrl=false)856 void if_(const InstructionModifier &mod, Label &jip, Label &uip, bool branchCtrl = false) {
857 (void) jip.getID(labelManager);
858 (void) uip.getID(labelManager);
859 opX(Opcode::if_, DataType::invalid, mod, NoOperand(), jip, uip, NoOperand(), branchCtrl);
860 }
if_(const InstructionModifier & mod,Label & jip)861 void if_(const InstructionModifier &mod, Label &jip) {
862 if_(mod, jip, jip);
863 }
illegal()864 void illegal() {
865 opX(Opcode::illegal);
866 }
join(const InstructionModifier & mod,Label & jip)867 void join(const InstructionModifier &mod, Label &jip) {
868 opX(Opcode::join, mod, jip);
869 }
join(const InstructionModifier & mod)870 void join(const InstructionModifier &mod) {
871 Label next;
872 join(mod, next);
873 mark(next);
874 }
jmpi(const InstructionModifier & mod,Label & jip)875 void jmpi(const InstructionModifier &mod, Label &jip) {
876 (void) jip.getID(labelManager);
877 opJmpi(Opcode::jmpi, mod, jip);
878 }
jmpi(const InstructionModifier & mod,const RegData & jip)879 void jmpi(const InstructionModifier &mod, const RegData &jip) {
880 opJmpi(Opcode::jmpi, mod, jip);
881 }
882 template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)883 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
884 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
885 }
886 template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)887 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
888 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
889 }
890 template <typename DT = void>
lrp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)891 void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
892 opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
893 }
894 template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)895 void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
896 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
897 }
898 template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)899 void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
900 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
901 }
902 template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)903 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
904 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
905 }
906 template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)907 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
908 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
909 }
910 template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)911 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
912 opX(Opcode::mach, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
913 }
914 template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)915 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
916 opX(Opcode::mach, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
917 }
918 template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)919 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
920 #ifdef NGEN_SAFE
921 if (hardware < HW::Gen10) unsupported();
922 #endif
923 opX((hardware >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
924 }
925 template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)926 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
927 #ifdef NGEN_SAFE
928 if (hardware < HW::Gen10) unsupported();
929 #endif
930 opX((hardware >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
931 }
932 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)933 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
934 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
935 }
936 template <typename DT = void>
mad(const InstructionModifier & mod,const Align16Operand & dst,const Align16Operand & src0,const Align16Operand & src1,const Align16Operand & src2)937 void mad(const InstructionModifier &mod, const Align16Operand &dst, const Align16Operand &src0, const Align16Operand &src1, const Align16Operand &src2) {
938 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
939 }
940 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)941 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
942 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
943 }
944 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)945 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
946 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
947 }
948 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)949 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
950 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
951 }
952 template <typename DT = void>
madm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1,const ExtendedReg & src2)953 void madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
954 opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
955 }
956 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0)957 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
958 #ifdef NGEN_SAFE
959 if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
960 #endif
961 if (fc == MathFunction::rsqtm)
962 math<DT>(mod, fc, dst | nomme, src0 | nomme);
963 else
964 opX(Opcode::math, getDataType<DT>(), mod, dst, src0, NoOperand(), NoOperand(), static_cast<uint8_t>(fc));
965 }
966 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const RegData & src1)967 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
968 #ifdef NGEN_SAFE
969 if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
970 #endif
971 if (fc == MathFunction::invm)
972 math<DT>(mod, fc, dst | nomme, src0 | nomme, src1 | nomme);
973 else
974 opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1, NoOperand(), static_cast<uint8_t>(fc));
975 }
976 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const Immediate & src1)977 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
978 #ifdef NGEN_SAFE
979 if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
980 #endif
981 opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1.forceInt32(), NoOperand(), static_cast<uint8_t>(fc));
982 }
983 template <typename DT = void>
math(InstructionModifier mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0)984 void math(InstructionModifier mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
985 #ifdef NGEN_SAFE
986 if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
987 #endif
988 mod.setCMod(ConditionModifier::eo);
989 opX(Opcode::math, getDataType<DT>(), mod, dst, src0, NoOperand(), NoOperand(), static_cast<uint8_t>(fc));
990 }
991 template <typename DT = void>
math(InstructionModifier mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)992 void math(InstructionModifier mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
993 #ifdef NGEN_SAFE
994 if (fc != MathFunction::invm) throw invalid_operand_exception();
995 #endif
996 mod.setCMod(ConditionModifier::eo);
997 opX(Opcode::math, getDataType<DT>(), mod, dst, src0, src1, NoOperand(), static_cast<uint8_t>(fc));
998 }
999 template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1000 void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1001 opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
1002 }
1003 template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1004 void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1005 opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
1006 }
1007 template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1008 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1009 if (hardware >= HW::Gen10)
1010 movi<DT>(mod, dst, src0, null);
1011 else
1012 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0);
1013 }
1014 template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1015 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1016 #ifdef NGEN_SAFE
1017 if (hardware < HW::Gen10) throw unsupported_instruction();
1018 #endif
1019 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
1020 }
1021 template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1022 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1023 #ifdef NGEN_SAFE
1024 if (hardware < HW::Gen10) throw unsupported_instruction();
1025 #endif
1026 opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
1027 }
1028 template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1029 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1030 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
1031 }
1032 template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,Immediate src1)1033 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
1034 if (dst.getBytes() == 8)
1035 src1 = src1.forceInt32();
1036 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
1037 }
nop()1038 void nop() {
1039 opX(isGen12 ? Opcode::nop_gen12 : Opcode::nop);
1040 }
1041 template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1042 void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1043 opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
1044 }
1045 template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1046 void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1047 opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
1048 }
1049 #ifndef NGEN_NO_OP_NAMES
1050 template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1051 void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1052 not_<DT>(mod, dst, src0);
1053 }
1054 template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1055 void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1056 not_<DT>(mod, dst, src0);
1057 }
1058 #endif
1059 template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1060 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1061 opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
1062 }
1063 template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1064 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1065 opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
1066 }
1067 #ifndef NGEN_NO_OP_NAMES
1068 template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1069 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1070 or_<DT>(mod, dst, src0, src1);
1071 }
1072 template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1073 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1074 or_<DT>(mod, dst, src0, src1);
1075 }
1076 #endif
1077 template <typename DT = void>
pln(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1078 void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1079 opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
1080 }
ret(const InstructionModifier & mod,const RegData & src0)1081 void ret(const InstructionModifier &mod, const RegData &src0) {
1082 opJmpi(Opcode::ret, mod, src0);
1083 }
1084 template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1085 void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1086 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
1087 }
1088 template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1089 void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1090 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
1091 }
1092 template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1093 void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1094 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
1095 }
1096 template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1097 void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1098 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
1099 }
1100 template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1101 void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1102 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
1103 }
1104 template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1105 void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1106 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
1107 }
1108 template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const RegData & src0)1109 void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
1110 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
1111 }
1112 template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)1113 void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
1114 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
1115 }
1116 template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1117 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1118 opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
1119 }
1120 template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1121 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1122 opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
1123 }
1124 template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1125 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1126 opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
1127 }
1128 template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1129 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1130 opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
1131 }
1132 template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1133 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1134 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
1135 }
1136 template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1137 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1138 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
1139 }
1140 template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1141 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1142 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
1143 }
1144 template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1145 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1146 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
1147 }
1148 template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1149 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1150 opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1151 }
1152 template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1153 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1154 opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1155 }
1156
1157 /* Gen12-style sends */
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1158 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1159 opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, Immediate::ud(exdesc), Immediate::ud(desc));
1160 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1161 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1162 opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, exdesc, Immediate::ud(desc));
1163 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1164 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1165 opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, Immediate::ud(exdesc), desc);
1166 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1167 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1168 opSend(isGen12 ? Opcode::send : Opcode::sends, mod, sf, dst, src0, src1, exdesc, desc);
1169 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1170 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1171 opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, Immediate::ud(exdesc), Immediate::ud(desc));
1172 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1173 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1174 opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, exdesc, Immediate::ud(desc));
1175 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1176 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1177 opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, Immediate::ud(exdesc), desc);
1178 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1179 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1180 opSend(isGen12 ? Opcode::sendc : Opcode::sendsc, mod, sf, dst, src0, src1, exdesc, desc);
1181 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,NoOperand src1,T1 exdesc,T2 desc)1182 template <typename T1, typename T2> void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, NoOperand src1, T1 exdesc, T2 desc) {
1183 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1184 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,NoOperand src1,T1 exdesc,T2 desc)1185 template <typename T1, typename T2> void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, NoOperand src1, T1 exdesc, T2 desc) {
1186 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1187 }
1188 /* Pre-Gen12 style sends */
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1189 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1190 if (isGen12)
1191 send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1192 else
1193 send(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), Immediate::ud(desc));
1194 }
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1195 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1196 if (isGen12)
1197 send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1198 else
1199 send(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), desc);
1200 }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1201 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1202 if (isGen12)
1203 sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1204 else
1205 sendc(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), Immediate::ud(desc));
1206 }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1207 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1208 if (isGen12)
1209 sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, null, exdesc, desc);
1210 else
1211 sendc(mod, SharedFunction::null, dst, src0, NoOperand(), Immediate::ud(exdesc), desc);
1212 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1213 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1214 send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1215 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1216 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1217 send(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1218 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1219 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1220 #ifdef NGEN_SAFE
1221 if (isGen12) throw sfid_needed_exception();
1222 #endif
1223 send(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1224 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1225 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1226 #ifdef NGEN_SAFE
1227 if (isGen12) throw sfid_needed_exception();
1228 #endif
1229 send(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1230 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1231 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1232 sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1233 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1234 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1235 sendc(mod, static_cast<SharedFunction>(exdesc & 0xF), dst, src0, src1, exdesc, desc);
1236 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1237 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1238 #ifdef NGEN_SAFE
1239 if (isGen12) throw sfid_needed_exception();
1240 #endif
1241 sendc(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1242 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1243 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1244 #ifdef NGEN_SAFE
1245 if (isGen12) throw sfid_needed_exception();
1246 #endif
1247 sendc(mod, static_cast<SharedFunction>(0), dst, src0, src1, exdesc, desc);
1248 }
1249
1250 template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1251 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1252 opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1253 }
1254 template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1255 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1256 opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1257 }
1258 template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1259 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1260 opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1261 }
1262 template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1263 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1264 opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1265 }
1266 template <typename DT = void>
smov(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1267 void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1268 opX(isGen12 ? Opcode::smov_gen12 : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1269 }
1270 template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1271 void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1272 opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1273 }
1274 template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1275 void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1276 opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1277 }
1278 template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1279 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1280 opX(Opcode::subb, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
1281 }
1282 template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1283 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1284 opX(Opcode::subb, getDataType<DT>(), (hardware >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
1285 }
wait(const InstructionModifier & mod,const RegData & nreg)1286 void wait(const InstructionModifier &mod, const RegData &nreg) {
1287 opX(Opcode::wait, mod, NoOperand(), nreg);
1288 }
while_(const InstructionModifier & mod,Label & jip)1289 void while_(const InstructionModifier &mod, Label &jip) {
1290 (void) jip.getID(labelManager);
1291 opX(Opcode::while_, mod, jip);
1292 }
1293 template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1294 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1295 opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1296 }
1297 template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1298 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1299 opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1300 }
1301 #ifndef NGEN_NO_OP_NAMES
1302 template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1303 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1304 xor_<DT>(mod, dst, src0, src1);
1305 }
1306 template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1307 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1308 xor_<DT>(mod, dst, src0, src1);
1309 }
1310 #endif
1311
1312 private:
1313 struct Sync {
1314 AsmCodeGenerator &parent;
1315
Syncngen::AsmCodeGenerator::Sync1316 Sync(AsmCodeGenerator *parent_) : parent(*parent_) {}
1317
operator ()ngen::AsmCodeGenerator::Sync1318 void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1319 parent.opSync(Opcode::sync, fc, mod, null);
1320 }
operator ()ngen::AsmCodeGenerator::Sync1321 void operator()(SyncFunction fc, const RegData &src0) {
1322 this->operator()(fc, InstructionModifier(), src0);
1323 }
operator ()ngen::AsmCodeGenerator::Sync1324 void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1325 parent.opSync(Opcode::sync, fc, mod, src0);
1326 }
operator ()ngen::AsmCodeGenerator::Sync1327 void operator()(SyncFunction fc, int src0) {
1328 this->operator()(fc, InstructionModifier(), src0);
1329 }
operator ()ngen::AsmCodeGenerator::Sync1330 void operator()(SyncFunction fc, const InstructionModifier &mod, int src0) {
1331 parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1332 }
allrdngen::AsmCodeGenerator::Sync1333 void allrd() {
1334 allrd(null);
1335 }
allrdngen::AsmCodeGenerator::Sync1336 void allrd(const InstructionModifier &mod) {
1337 allrd(mod, null);
1338 }
allrdngen::AsmCodeGenerator::Sync1339 void allrd(const RegData &src0) {
1340 allrd(InstructionModifier(), src0);
1341 }
allrdngen::AsmCodeGenerator::Sync1342 void allrd(const InstructionModifier &mod, const RegData &src0) {
1343 this->operator()(SyncFunction::allrd, mod, src0);
1344 }
allrdngen::AsmCodeGenerator::Sync1345 void allrd(uint32_t src0) {
1346 allrd(InstructionModifier(), src0);
1347 }
allrdngen::AsmCodeGenerator::Sync1348 void allrd(const InstructionModifier &mod, uint32_t src0) {
1349 this->operator()(SyncFunction::allrd, mod, src0);
1350 }
allwrngen::AsmCodeGenerator::Sync1351 void allwr() {
1352 allwr(null);
1353 }
allwrngen::AsmCodeGenerator::Sync1354 void allwr(const InstructionModifier &mod) {
1355 allwr(mod, null);
1356 }
allwrngen::AsmCodeGenerator::Sync1357 void allwr(const RegData &src0) {
1358 allwr(InstructionModifier(), src0);
1359 }
allwrngen::AsmCodeGenerator::Sync1360 void allwr(const InstructionModifier &mod, const RegData &src0) {
1361 this->operator()(SyncFunction::allwr, mod, src0);
1362 }
allwrngen::AsmCodeGenerator::Sync1363 void allwr(uint32_t src0) {
1364 allwr(InstructionModifier(), src0);
1365 }
allwrngen::AsmCodeGenerator::Sync1366 void allwr(const InstructionModifier &mod, uint32_t src0) {
1367 this->operator()(SyncFunction::allwr, mod, src0);
1368 }
barngen::AsmCodeGenerator::Sync1369 void bar(const InstructionModifier &mod = InstructionModifier()) {
1370 this->operator()(SyncFunction::bar, mod);
1371 }
barngen::AsmCodeGenerator::Sync1372 void bar(const InstructionModifier &mod, uint32_t src0) {
1373 this->operator()(SyncFunction::bar, mod, src0);
1374 }
barngen::AsmCodeGenerator::Sync1375 void bar(const InstructionModifier &mod, const RegData &src0) {
1376 this->operator()(SyncFunction::bar, mod, src0);
1377 }
barngen::AsmCodeGenerator::Sync1378 void bar(uint32_t src0) {
1379 this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1380 }
barngen::AsmCodeGenerator::Sync1381 void bar(const RegData &src0) {
1382 this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1383 }
hostngen::AsmCodeGenerator::Sync1384 void host(const InstructionModifier &mod = InstructionModifier()) {
1385 this->operator()(SyncFunction::host, mod);
1386 }
nopngen::AsmCodeGenerator::Sync1387 void nop(const InstructionModifier &mod = InstructionModifier()) {
1388 this->operator()(SyncFunction::nop, mod);
1389 }
1390 };
1391 public:
1392 Sync sync;
1393
1394 private:
1395 struct Load {
1396 AsmCodeGenerator &parent;
1397
Loadngen::AsmCodeGenerator::Load1398 Load(AsmCodeGenerator *parent_) : parent(*parent_) {}
1399
1400 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1401 void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1402 {
1403 MessageDescriptor desc;
1404 ExtendedMessageDescriptor exdesc;
1405
1406 encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1407 parent.send(mod, dst, addr, exdesc.all, desc.all);
1408 }
1409
1410 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1411 void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1412 {
1413 MessageDescriptor desc;
1414 ExtendedMessageDescriptor exdesc;
1415
1416 encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1417 parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1418 }
1419
1420 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Load1421 void operator()(SharedFunction sfid, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1422 {
1423 MessageDescriptor desc;
1424 ExtendedMessageDescriptor exdesc;
1425
1426 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1427 encodeLoadDescriptors(parent.hardware, desc, exdesc, mod, dst, spec, base, addr);
1428 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1429 parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1430 }
1431
ugmngen::AsmCodeGenerator::Load1432 void ugm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1433 {
1434 this->operator()(SharedFunction::ugm, mod, dst, spec, base, addr);
1435 }
ugmlngen::AsmCodeGenerator::Load1436 void ugml(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1437 {
1438 this->operator()(SharedFunction::ugml, mod, dst, spec, base, addr);
1439 }
tgmngen::AsmCodeGenerator::Load1440 void tgm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1441 {
1442 this->operator()(SharedFunction::tgm, mod, dst, spec, base, addr);
1443 }
slmngen::AsmCodeGenerator::Load1444 void slm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1445 {
1446 this->operator()(SharedFunction::slm, mod, dst, spec, base, addr);
1447 }
1448 };
1449
1450 struct Store {
1451 AsmCodeGenerator &parent;
1452
Storengen::AsmCodeGenerator::Store1453 Store(AsmCodeGenerator *parent_) : parent(*parent_) {}
1454
1455 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1456 void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1457 {
1458 MessageDescriptor desc;
1459 ExtendedMessageDescriptor exdesc;
1460
1461 encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1462 parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1463 }
1464
1465 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1466 void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1467 {
1468 MessageDescriptor desc;
1469 ExtendedMessageDescriptor exdesc;
1470
1471 encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1472 parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1473 }
1474
1475 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Store1476 void operator()(SharedFunction sfid, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1477 {
1478 MessageDescriptor desc;
1479 ExtendedMessageDescriptor exdesc;
1480
1481 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1482 encodeStoreDescriptors(parent.hardware, desc, exdesc, mod, spec, base, addr);
1483 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1484 parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1485 }
1486
ugmngen::AsmCodeGenerator::Store1487 void ugm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1488 {
1489 this->operator()(SharedFunction::ugm, mod, spec, base, addr, data);
1490 }
ugmlngen::AsmCodeGenerator::Store1491 void ugml(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1492 {
1493 this->operator()(SharedFunction::ugml, mod, spec, base, addr, data);
1494 }
tgmngen::AsmCodeGenerator::Store1495 void tgm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1496 {
1497 this->operator()(SharedFunction::tgm, mod, spec, base, addr, data);
1498 }
slmngen::AsmCodeGenerator::Store1499 void slm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1500 {
1501 this->operator()(SharedFunction::slm, mod, spec, base, addr, data);
1502 }
1503 };
1504
1505 struct Atomic {
1506 AsmCodeGenerator &parent;
1507
Atomicngen::AsmCodeGenerator::Atomic1508 Atomic(AsmCodeGenerator *parent_) : parent(*parent_) {}
1509
1510 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1511 void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1512 {
1513 MessageDescriptor desc;
1514 ExtendedMessageDescriptor exdesc;
1515
1516 encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1517 if (data.isNull())
1518 parent.send(mod, dst, addr, exdesc.all, desc.all);
1519 else
1520 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1521 }
1522 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1523 void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1524 {
1525 (*this)(op, mod, NullRegister(), spec, base, addr, data);
1526 }
1527
1528 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1529 void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1530 {
1531 MessageDescriptor desc;
1532 ExtendedMessageDescriptor exdesc;
1533
1534 encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1535 parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1536 }
1537 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1538 void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1539 {
1540 (*this)(op, mod, NullRegister(), spec, base, addr, data);
1541 }
1542 template <typename DataSpec>
operator ()ngen::AsmCodeGenerator::Atomic1543 void operator()(SharedFunction sfid, AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1544 {
1545 MessageDescriptor desc;
1546 ExtendedMessageDescriptor exdesc;
1547
1548 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1549 encodeAtomicDescriptors(parent.hardware, desc, exdesc, op, mod, dst, spec, base, addr);
1550 exdesc.parts.sfid = static_cast<unsigned>(sfid);
1551 parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1552 }
1553
ugmngen::AsmCodeGenerator::Atomic1554 void ugm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1555 {
1556 this->operator()(SharedFunction::ugm, op, mod, dst, spec, base, addr, data);
1557 }
ugmngen::AsmCodeGenerator::Atomic1558 void ugm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1559 {
1560 this->operator()(SharedFunction::ugm, op, mod, NullRegister(), spec, base, addr, data);
1561 }
ugmlngen::AsmCodeGenerator::Atomic1562 void ugml(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1563 {
1564 this->operator()(SharedFunction::ugml, op, mod, dst, spec, base, addr, data);
1565 }
ugmlngen::AsmCodeGenerator::Atomic1566 void ugml(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1567 {
1568 this->operator()(SharedFunction::ugml, op, mod, NullRegister(), spec, base, addr, data);
1569 }
tgmngen::AsmCodeGenerator::Atomic1570 void tgm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1571 {
1572 this->operator()(SharedFunction::tgm, op, mod, dst, spec, base, addr, data);
1573 }
tgmngen::AsmCodeGenerator::Atomic1574 void tgm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1575 {
1576 this->operator()(SharedFunction::tgm, op, mod, NullRegister(), spec, base, addr, data);
1577 }
slmngen::AsmCodeGenerator::Atomic1578 void slm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1579 {
1580 this->operator()(SharedFunction::slm, op, mod, dst, spec, base, addr, data);
1581 }
slmngen::AsmCodeGenerator::Atomic1582 void slm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1583 {
1584 this->operator()(SharedFunction::slm, op, mod, NullRegister(), spec, base, addr, data);
1585 }
1586 };
1587 public:
1588 Load load;
1589 Store store;
1590 Atomic atomic;
1591
wrdep(const GRFRange & r)1592 void wrdep(const GRFRange &r) {
1593 opX(Opcode::wrdep, DataType::ud, InstructionModifier::createAutoSWSB(), null, r);
1594 }
wrdep(const GRF & r)1595 void wrdep(const GRF &r) {
1596 wrdep(r-r);
1597 }
1598
mark(Label & label)1599 inline void mark(Label &label) { streamStack.back()->mark(label, labelManager); }
1600
1601 #include "ngen_pseudo.hpp"
1602 #ifndef NGEN_GLOBAL_REGS
1603 #include "ngen_registers.hpp"
1604 #endif
1605 };
1606
1607
unsupported()1608 void AsmCodeGenerator::unsupported()
1609 {
1610 #ifdef NGEN_SAFE
1611 throw unsupported_instruction();
1612 #endif
1613 }
1614
popStream()1615 AsmCodeGenerator::InstructionStream *AsmCodeGenerator::popStream()
1616 {
1617 #ifdef NGEN_SAFE
1618 if (streamStack.size() <= 1) throw stream_stack_underflow();
1619 #endif
1620
1621 InstructionStream *result = streamStack.back();
1622 streamStack.pop_back();
1623 return result;
1624 }
1625
finalize()1626 void AsmCodeGenerator::finalize()
1627 {
1628 #ifdef NGEN_SAFE
1629 if (streamStack.size() > 1) throw unfinished_stream_exception();
1630 #endif
1631 auto &buffer = streamStack.back()->buffer;
1632 int inum = 0;
1633 for (auto &i : buffer)
1634 i.inum = inum++;
1635 }
1636
getCode(std::ostream & out)1637 void AsmCodeGenerator::getCode(std::ostream &out)
1638 {
1639 finalize();
1640
1641 autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hardware, streamStack.back()->buffer);
1642 std::multimap<int32_t, autoswsb::SyncInsertion*> syncs; // Syncs inserted by auto-SWSB.
1643
1644 for (auto &bb : analysis)
1645 for (auto &sync : bb.syncs)
1646 syncs.insert(std::make_pair(sync.inum, &sync));
1647
1648 auto nextSync = syncs.begin();
1649 int lineNo = 0;
1650
1651 for (auto &i : streamStack.back()->buffer) {
1652 if (i.isLabel()) {
1653 i.dst.label.outputText(out, PrintDetail::full, labelManager);
1654 out << ':' << std::endl;
1655 } else if (i.isComment()) {
1656 out << "// " << i.comment << std::endl;
1657 } else if (i.op != Opcode::wrdep) {
1658 while ((nextSync != syncs.end()) && (nextSync->second->inum == i.inum))
1659 outX(out, *(nextSync++)->second, lineNo++);
1660 outX(out, i, lineNo++);
1661 }
1662 }
1663 }
1664
1665 template <typename D, typename S0, typename S1, typename S2>
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2,uint16_t ext)1666 void AsmCodeGenerator::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2, uint16_t ext)
1667 {
1668 bool is2Src = !S1::emptyOp;
1669 bool is3Src = !S2::emptyOp;
1670 int arity = 1 + is2Src + is3Src;
1671
1672 InstructionModifier emod = mod | defaultModifier;
1673 auto esize = emod.getExecSize();
1674
1675 if (is3Src && hardware < HW::Gen10)
1676 esize = std::min<int>(esize, 8); // WA for IGA Align16 emulation issue
1677
1678 #ifdef NGEN_SAFE
1679 if (esize > 1 && dst.isScalar())
1680 throw invalid_execution_size_exception();
1681 #endif
1682
1683 dst.fixup(esize, defaultType, true, arity);
1684 src0.fixup(esize, defaultType, false, arity);
1685 src1.fixup(esize, defaultType, false, arity);
1686 src2.fixup(esize, defaultType, false, arity);
1687
1688 streamStack.back()->append(op, ext, emod, dst, src0, src1, src2, NoOperand{}, &labelManager);
1689 }
1690
outX(std::ostream & out,const AsmInstruction & i,int lineNo)1691 void AsmCodeGenerator::outX(std::ostream &out, const AsmInstruction &i, int lineNo)
1692 {
1693 bool ternary = (i.src[2].type != AsmOperand::Type::none);
1694 PrintDetail ddst = PrintDetail::hs;
1695 PrintDetail dsrc01 = ternary ? PrintDetail::vs_hs : PrintDetail::full;
1696 PrintDetail dsrc[4] = {dsrc01, dsrc01, PrintDetail::hs, PrintDetail::base};
1697
1698 switch (i.op) {
1699 case Opcode::send:
1700 case Opcode::sends:
1701 case Opcode::sendc:
1702 case Opcode::sendsc:
1703 ddst = dsrc[0] = dsrc[1] = PrintDetail::base;
1704 dsrc[2] = dsrc[3] = PrintDetail::sub_no_type;
1705 break;
1706 case Opcode::brc:
1707 case Opcode::brd:
1708 case Opcode::call:
1709 case Opcode::calla:
1710 ddst = PrintDetail::sub;
1711 dsrc[0] = PrintDetail::sub_no_type;
1712 break;
1713 case Opcode::jmpi:
1714 case Opcode::ret:
1715 dsrc[0] = PrintDetail::sub_no_type;
1716 break;
1717 case Opcode::dpas:
1718 case Opcode::dpasw:
1719 if (isGen12) ddst = dsrc[0] = dsrc[1] = dsrc[2] = PrintDetail::sub;
1720 break;
1721 case Opcode::sync:
1722 if (isGen12) {
1723 if (i.src[0].type == AsmOperand::Type::reg)
1724 dsrc[0] = PrintDetail::sub;
1725 else
1726 dsrc[0] = PrintDetail::sub_no_type;
1727 }
1728 break;
1729 default: break;
1730 }
1731
1732 outMods(out, i.mod, i.op, ModPlacementType::Pre);
1733
1734 out << getMnemonic(i.op, hardware);
1735 outExt(out, i);
1736 out << '\t';
1737
1738 outMods(out, i.mod, i.op, ModPlacementType::Mid);
1739
1740 i.dst.outputText(out, ddst, labelManager); out << '\t';
1741 for (int n = 0; n < 4; n++) {
1742 i.src[n].outputText(out, dsrc[n], labelManager);
1743 if (hardware >= HW::XeHPG && n == 1 && (i.op == Opcode::send || i.op == Opcode::sendc) && (i.ext & 0x80))
1744 out << ':' << (i.ext >> 8);
1745 out << '\t';
1746 }
1747
1748 outMods(out, i.mod, i.op, ModPlacementType::Post);
1749 if (lineNumbers)
1750 out << "\t// " << lineNo * 2;
1751 out << std::endl;
1752 }
1753
outExt(std::ostream & out,const AsmInstruction & i)1754 void AsmCodeGenerator::outExt(std::ostream &out, const AsmInstruction &i)
1755 {
1756 switch (i.opcode()) {
1757 case Opcode::else_:
1758 case Opcode::goto_:
1759 case Opcode::if_: if (i.ext) out << ".b"; break;
1760 case Opcode::math: out << '.' << static_cast<MathFunction>(i.ext); break;
1761 default: break;
1762 }
1763
1764 if (isGen12) switch (i.opcode()) {
1765 case Opcode::send:
1766 case Opcode::sends: out << '.' << getMnemonic(static_cast<SharedFunction>(i.ext & 0xF), hardware); break;
1767 case Opcode::sync: out << '.' << static_cast<SyncFunction>(i.ext); break;
1768 case Opcode::bfn: out << ".0x" << std::hex << i.ext << std::dec; break;
1769 case Opcode::dpas:
1770 case Opcode::dpasw: {
1771 int sdepth = i.ext >> 8;
1772 int rcount = i.ext & 0xFF;
1773 out << '.' << sdepth << 'x' << rcount;
1774 }
1775 default: break;
1776 }
1777 }
1778
outMods(std::ostream & out,const InstructionModifier & mod,Opcode op,AsmCodeGenerator::ModPlacementType location)1779 void AsmCodeGenerator::outMods(std::ostream &out,const InstructionModifier &mod, Opcode op, AsmCodeGenerator::ModPlacementType location)
1780 {
1781 ConditionModifier cmod = mod.getCMod();
1782 PredCtrl ctrl = mod.getPredCtrl();
1783 bool wrEn = mod.isWrEn();
1784 bool havePred = (ctrl != PredCtrl::None) && (cmod != ConditionModifier::eo);
1785
1786 switch (location) {
1787 case ModPlacementType::Pre:
1788 if (wrEn || havePred) {
1789 out << '(';
1790 if (wrEn) {
1791 out << 'W';
1792 if (havePred) out << '&';
1793 }
1794 if (havePred) {
1795 if (mod.isPredInv()) out << '~';
1796 mod.getFlagReg().outputText(out, PrintDetail::sub_no_type, labelManager);
1797 if (ctrl != PredCtrl::Normal)
1798 out << '.' << toText(ctrl, mod.isAlign16());
1799 }
1800 out << ')';
1801 }
1802 out << '\t';
1803 break;
1804 case ModPlacementType::Mid:
1805 if (mod.getExecSize() > 0)
1806 out << '(' << mod.getExecSize() << "|M" << mod.getChannelOffset() << ')' << '\t';
1807
1808 if (cmod != ConditionModifier::none) {
1809 out << '(' << cmod << ')';
1810 mod.getFlagReg().outputText(out, PrintDetail::sub_no_type, labelManager);
1811 out << '\t';
1812 }
1813
1814 if (mod.isSaturate()) out << "(sat)";
1815 break;
1816 case ModPlacementType::Post:
1817 {
1818 bool havePostMod = false;
1819 auto startPostMod = [&]() {
1820 out << (havePostMod ? ',' : '{');
1821 havePostMod = true;
1822 };
1823 auto printPostMod = [&](const char *name) {
1824 startPostMod(); out << name;
1825 };
1826
1827 SWSBInfo swsb = mod.getSWSB();
1828 if (swsb.hasToken()) {
1829 startPostMod(); out << '$' << swsb.parts.token;
1830 if (swsb.parts.src && !swsb.parts.dst) out << ".src";
1831 if (swsb.parts.dst && !swsb.parts.src) out << ".dst";
1832 }
1833 if (swsb.hasDist()) {
1834 startPostMod();
1835 if (hardware > HW::Gen12LP && (op == Opcode::send || op == Opcode::sendc) && swsb.getPipe() == Pipe::Default)
1836 out << Pipe::A;
1837 else if (hardware > HW::Gen12LP || !swsb.hasToken())
1838 out << swsb.getPipe();
1839 out << '@' << swsb.parts.dist;
1840 }
1841
1842 if (mod.isAlign16()) printPostMod("Align16");
1843 if (mod.isNoDDClr()) printPostMod("NoDDClr");
1844 if (mod.isNoDDChk()) printPostMod("NoDDChk");
1845 if (mod.getThreadCtrl() == ThreadCtrl::Atomic) printPostMod("Atomic");
1846 if (!isGen12 && mod.getThreadCtrl() == ThreadCtrl::Switch) printPostMod("Switch");
1847 if (!isGen12 && mod.getThreadCtrl() == ThreadCtrl::NoPreempt) printPostMod("NoPreempt");
1848 if (mod.isAccWrEn()) printPostMod("AccWrEn");
1849 if (mod.isCompact()) printPostMod("Compact");
1850 if (mod.isBreakpoint()) printPostMod("Breakpoint");
1851 if (mod.isSerialized()) printPostMod("Serialize");
1852 if (mod.isEOT()) printPostMod("EOT");
1853
1854 if (havePostMod) out << '}';
1855 }
1856 break;
1857 }
1858 }
1859
1860 } /* namespace ngen */
1861
1862 #endif
1863