1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 // nGEN: a C++ library for runtime Gen assembly generation.
18 //
19 // Macros that control nGEN's interface:
20 //    NGEN_SAFE             if defined, enables run-time safety checks. Exceptions will be thrown if checks fail.
21 //    NGEN_SHORT_NAMES      if defined, enables some short names (r[...] for indirect addressing, W for NoMask)
22 //    NGEN_GLOBAL_REGS      if defined, register names and instruction modifiers (r7, cr0, Switch, etc.) are
23 //                           global variables in the ngen namespace. Otherwise, they are members of the code
24 //                           generator classes
25 //    NGEN_CPP11            if defined, ngen is C++11-compatible (C++17 not required)
26 
27 #ifndef NGEN_HPP
28 #define NGEN_HPP
29 
30 #include "ngen_config.hpp"
31 
32 #include <array>
33 #include <cstring>
34 #include <type_traits>
35 #include <vector>
36 
37 #include "ngen_core.hpp"
38 #include "ngen_auto_swsb.hpp"
39 
40 namespace ngen {
41 
42 // Forward declarations.
43 template <HW hw> class BinaryCodeGenerator;
44 template <HW hw> class ELFCodeGenerator;
45 
46 // MSVC v140 workaround for enum comparison in template arguments.
hwLT(HW hw1,HW hw2)47 static constexpr bool hwLT(HW hw1, HW hw2) { return hw1 < hw2; }
hwLE(HW hw1,HW hw2)48 static constexpr bool hwLE(HW hw1, HW hw2) { return hw1 <= hw2; }
hwGE(HW hw1,HW hw2)49 static constexpr bool hwGE(HW hw1, HW hw2) { return hw1 >= hw2; }
hwGT(HW hw1,HW hw2)50 static constexpr bool hwGT(HW hw1, HW hw2) { return hw1 > hw2; }
51 
52 // -----------------------------------------------------------------------
53 
54 enum RegFiles : unsigned {
55     RegFileARF = 0,
56     RegFileGRF = 1,
57     RegFileIMM = 3,
58 };
59 
getRegFile(const RegData & rd)60 inline unsigned getRegFile(const RegData &rd)          { return rd.isARF() ? RegFileARF : RegFileGRF; }
getRegFile(const Align16Operand & o)61 inline unsigned getRegFile(const Align16Operand &o)    { return getRegFile(o.getReg()); }
getRegFile(const ExtendedReg & reg)62 inline unsigned getRegFile(const ExtendedReg &reg)     { return getRegFile(reg.getBase()); }
getRegFile(const Immediate & imm)63 inline unsigned getRegFile(const Immediate &imm)       { return RegFileIMM; }
64 
65 // -----------------------------------------------------------------------
66 // Binary formats, split between pre-Gen12 and post-Gen12.
67 
68 #include "ngen_gen8.hpp"
69 #include "ngen_gen12.hpp"
70 
71 // -----------------------------------------------------------------------
72 
73 
74 class LabelFixup {
75 public:
76     uint32_t labelID;
77     int32_t anchor;
78     int32_t offset;
79 
LabelFixup(uint32_t labelID_,int32_t offset_)80     LabelFixup(uint32_t labelID_, int32_t offset_) : labelID(labelID_), anchor(0), offset(offset_) {}
81 
82     static constexpr auto JIPOffset = 12;
83     static constexpr auto JIPOffsetJMPI = -4;
84     static constexpr auto UIPOffset = 8;
85 };
86 
87 #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
88 #define NGEN_GLOBAL_REGS_DEFINED
89 #include "ngen_registers.hpp"
90 #endif
91 
92 template <HW hw>
93 class BinaryCodeGenerator
94 {
95     friend class ELFCodeGenerator<hw>;
96 
97 public:
98     static constexpr HW hardware = hw;
99 
100 protected:
101     class InstructionStream {
102         friend class BinaryCodeGenerator;
103 
104         std::vector<LabelFixup> fixups;
105         std::vector<uint32_t> labels;
106         std::vector<uint64_t> code;
107         bool appended = false;
108 
length() const109         int length() const { return int(code.size() * sizeof(uint64_t)); }
110 
db(const Instruction8 & i)111         void db(const Instruction8 &i) {
112             code.push_back(i.qword[0]);
113             code.push_back(i.qword[1]);
114         }
115 
db(const Instruction12 & i)116         void db(const Instruction12 &i) {
117             code.push_back(i.qword[0]);
118             code.push_back(i.qword[1]);
119         }
120 
addFixup(LabelFixup fixup)121         void addFixup(LabelFixup fixup) {
122             fixup.anchor = length();
123             fixups.push_back(fixup);
124         }
125 
mark(Label & label,LabelManager & man)126         void mark(Label &label, LabelManager &man) {
127             uint32_t id = label.getID(man);
128 
129             man.setTarget(id, length());
130             labels.push_back(id);
131         }
132 
fixLabels(LabelManager & man)133         void fixLabels(LabelManager &man) {
134             for (const auto &fixup : fixups) {
135                 int32_t target = man.getTarget(fixup.labelID);
136                 uint8_t *field = ((uint8_t *) code.data()) + fixup.anchor + fixup.offset;
137                 *((int32_t *) field) = target - fixup.anchor;
138             }
139         }
140 
append(InstructionStream & other,LabelManager & man)141         void append(InstructionStream &other, LabelManager &man) {
142             auto offset = length();
143             auto sz = code.size();
144 
145             code.resize(sz + other.code.size());
146             std::copy(other.code.begin(), other.code.end(), code.begin() + sz);
147 
148             sz = labels.size();
149             labels.resize(sz + other.labels.size());
150             std::copy(other.labels.begin(), other.labels.end(), labels.begin() + sz);
151 
152             for (LabelFixup fixup : other.fixups) {
153                 fixup.anchor += offset;
154                 fixups.push_back(fixup);
155             }
156 
157 #ifdef NGEN_SAFE
158             if (other.appended && !other.labels.empty())
159                 throw multiple_label_exception();
160 #endif
161 
162             for (uint32_t id : other.labels)
163                 man.offsetTarget(id, offset);
164 
165             other.appended = true;
166         }
167 
InstructionStream()168         InstructionStream() {}
169     };
170 
171     class Program {
172         friend class BinaryCodeGenerator;
173         using Instruction = typename std::conditional<(hw >= HW::XeHPC), InstructionXeHPC, Instruction12>::type;
174         std::vector<uint64_t> &code;
175 
Program(InstructionStream & stream)176         Program(InstructionStream &stream) : code(stream.code) {};
177 
178     public:
size() const179         size_t size() const                               { return code.size() >> 1; }
operator [](size_t index)180         Instruction &operator[](size_t index)             { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
operator [](size_t index) const181         const Instruction &operator[](size_t index) const { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
182     };
183 
184     static constexpr bool isGen12 = (hw >= HW::Gen12LP);
185 
186     Label _labelLocalIDsLoaded;
187     Label _labelArgsLoaded;
188 
189 private:
190     InstructionModifier defaultModifier;
191 
192     LabelManager labelManager;
193     InstructionStream rootStream;
194     std::vector<InstructionStream*> streamStack;
195 
db(const Instruction8 & i)196     void db(const Instruction8 &i)  { streamStack.back()->db(i); }
db(const Instruction12 & i)197     void db(const Instruction12 &i) { streamStack.back()->db(i); }
addFixup(LabelFixup fixup)198     void addFixup(LabelFixup fixup) { streamStack.back()->addFixup(fixup); }
199 
200     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
201     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
202     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
203     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
204     template <bool forceWE = false, typename D, HW hw_ = hw>
205     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
206     template <bool forceWE = false, typename D, HW hw_ = hw>
207     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
208 
209     template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
210     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
211     template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
212     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
213     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
214     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
215     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
216     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
217 
218     template <HW hw_ = hw>
219     typename std::enable_if<hwLE(hw_, HW::Gen9)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2);
220     template <HW hw_ = hw>
221     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2);
222     template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
223     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
224     template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
225     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
226 
227     template <typename DS0>
228     void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0);
229     template <typename DS0, typename S1>
230     void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1);
231 
232     template <typename D, typename S0, typename S2>
233     void opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2);
234     void opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2);
235 
236     template <typename D, HW hw_ = hw>
237     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
238     template <typename D, HW hw_ = hw>
239     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc);
240     template <typename ED, typename D, HW hw_ = hw>
241     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
242 
243     template <HW hw_ = hw>
244     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc);
245     template <HW hw_ = hw>
246     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc);
247     template <typename D, HW hw_ = hw>
248     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc);
249 
250     template <typename ED, typename D, HW hw_ = hw>
251     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
252     template <typename D, HW hw_ = hw>
253     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
254     template <typename D, HW hw_ = hw>
255     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc);
256 
257     template <HW hw_ = hw>
258     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
259     template <HW hw_ = hw>
260     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
261     template <bool forceWE = false, HW hw_ = hw>
262     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
263     template <bool forceWE = false, HW hw_ = hw>
264     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
265     template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
266     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
267     template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
268     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
269 
270     void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip);
271     template <bool forceWE = false>
272     void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
273     void opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
274 
275     template <HW hw_ = hw>
276     typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
277     template <HW hw_ = hw>
278     typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
279     void opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip);
280 
281     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod);
282     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0);
283     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0);
284 
285     void opNop(Opcode op);
286 
287     inline void unsupported();
288 
289 #include "ngen_compiler_fix.hpp"
290 
291 public:
BinaryCodeGenerator()292     BinaryCodeGenerator() : defaultModifier{}, labelManager{}, sync{this}, load{this}, store{this}, atomic{this} {
293         _workaround_();
294         pushStream(rootStream);
295     }
296 
~BinaryCodeGenerator()297     ~BinaryCodeGenerator() {
298         for (size_t sn = 1; sn < streamStack.size(); sn++)
299             delete streamStack[sn];
300     }
301 
302     std::vector<uint8_t> getCode();
getRootStreamLength() const303     size_t getRootStreamLength() const { return rootStream.length(); };
304 
305 protected:
306     // Configuration.
setDefaultNoMask(bool def=true)307     void setDefaultNoMask(bool def = true)          { defaultModifier.setWrEn(def); }
setDefaultAutoSWSB(bool def=true)308     void setDefaultAutoSWSB(bool def = true)        { defaultModifier.setAutoSWSB(def); }
getDefaultNoMask() const309     bool getDefaultNoMask() const                   { return defaultModifier.isWrEn(); }
getDefaultAutoSWSB() const310     bool getDefaultAutoSWSB() const                 { return defaultModifier.isAutoSWSB(); }
311 
312     // Stream handling.
pushStream()313     void pushStream()                               { pushStream(new InstructionStream()); }
pushStream(InstructionStream * s)314     void pushStream(InstructionStream *s)           { streamStack.push_back(s); }
pushStream(InstructionStream & s)315     void pushStream(InstructionStream &s)           { pushStream(&s); }
316 
317     InstructionStream *popStream();
318 
appendStream(InstructionStream * s)319     void appendStream(InstructionStream *s)         { appendStream(*s); }
appendStream(InstructionStream & s)320     void appendStream(InstructionStream &s)         { streamStack.back()->append(s, labelManager); }
appendCurrentStream()321     void appendCurrentStream()                      { InstructionStream *s = popStream(); appendStream(s); delete s; }
322 
discardStream()323     void discardStream()                            { delete popStream(); }
324 
325     template <typename String>
comment(String)326     void comment(String)                            {}
327 
328     // Registers.
329 #ifndef NGEN_GLOBAL_REGS
330 #include "ngen_registers.hpp"
331 #endif
332 
333     // Labels.
mark(Label & label)334     inline void mark(Label &label)          { streamStack.back()->mark(label, labelManager); }
335 
336     // Instructions.
337     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)338     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
339         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
340     }
341     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)342     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
343         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
344     }
345     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)346     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
347         opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
348     }
349     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)350     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
351         opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
352     }
353     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)354     void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
355         if (hw < HW::XeHP) unsupported();
356         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
357     }
358     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)359     void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
360         if (hw < HW::XeHP) unsupported();
361         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
362     }
363     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)364     void add3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
365         if (hw < HW::XeHP) unsupported();
366         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
367     }
368     template <typename DT = void>
add3(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)369     void add3(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
370         if (hw < HW::XeHP) unsupported();
371         opX(Opcode::add3, getDataType<DT>(), mod, dst, src0, src1, src2);
372     }
373     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)374     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
375         opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
376     }
377     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)378     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
379         opX(isGen12 ? Opcode::and_gen12 : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
380     }
381 #ifndef NGEN_NO_OP_NAMES
382     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)383     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
384         and_<DT>(mod, dst, src0, src1);
385     }
386     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)387     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
388         and_<DT>(mod, dst, src0, src1);
389     }
390 #endif
391     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)392     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
393         opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
394     }
395     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)396     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
397         opX(isGen12 ? Opcode::asr_gen12 : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
398     }
399     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)400     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
401         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
402     }
403     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)404     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
405         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
406     }
407     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)408     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
409         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
410     }
411     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)412     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
413         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
414     }
415     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)416     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
417         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
418     }
419     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)420     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
421         opX(isGen12 ? Opcode::bfe_gen12 : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
422     }
423     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)424     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
425         opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
426     }
427     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)428     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
429         opX(isGen12 ? Opcode::bfi1_gen12 : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
430     }
431     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)432     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
433         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
434     }
435     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)436     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
437         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
438     }
439     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)440     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
441         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
442     }
443     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)444     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
445         opX(isGen12 ? Opcode::bfi2_gen12 : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
446     }
447     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)448     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
449         if (hw < HW::XeHP) unsupported();
450         opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
451     }
452     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)453     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
454         if (hw < HW::XeHP) unsupported();
455         opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
456     }
457     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)458     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
459         if (hw < HW::XeHP) unsupported();
460         opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
461     }
462     template <typename DT = void>
bfn(const InstructionModifier & mod,uint8_t ctrl,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)463     void bfn(const InstructionModifier &mod, uint8_t ctrl, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
464         if (hw < HW::XeHP) unsupported();
465         opBfn(Opcode::bfn, getDataType<DT>(), mod, ctrl, dst, src0, src1, src2);
466     }
467     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const RegData & src0)468     void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
469         opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
470     }
471     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)472     void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
473         opX(isGen12 ? Opcode::bfrev_gen12 : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
474     }
brc(const InstructionModifier & mod,Label & jip,Label & uip)475     void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
476         opBranch(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), jip, uip);
477     }
brc(const InstructionModifier & mod,RegData src0)478     void brc(const InstructionModifier &mod, RegData src0) {
479         src0.setRegion(2, 2, 1);
480         opBranch<true, true>(Opcode::brc, mod, isGen12 ? null.ud() : ip.d(), src0);
481     }
brd(const InstructionModifier & mod,Label & jip)482     void brd(const InstructionModifier &mod, Label &jip) {
483         opBranch(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), jip);
484     }
brd(const InstructionModifier & mod,RegData src0)485     void brd(const InstructionModifier &mod, RegData src0) {
486         src0.setRegion(2, 2, 1);
487         opBranch<true, true>(Opcode::brd, mod, isGen12 ? null.ud() : ip.d(), src0);
488     }
break_(const InstructionModifier & mod,Label & jip,Label & uip)489     void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
490         opBranch(Opcode::break_, mod, null, jip, uip);
491     }
call(const InstructionModifier & mod,const RegData & dst,Label & jip)492     void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
493         opCall(Opcode::call, mod, dst, jip);
494     }
call(const InstructionModifier & mod,const RegData & dst,RegData jip)495     void call(const InstructionModifier &mod, const RegData &dst, RegData jip) {
496         if (isGen12)
497             opBranch<true, true>(Opcode::call, mod, dst, jip);
498         else {
499             jip.setRegion(0, 1, 0);
500             opX<true>(Opcode::call, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
501         }
502     }
calla(const InstructionModifier & mod,const RegData & dst,int32_t jip)503     void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
504         if (isGen12)
505             opBranch<true>(Opcode::calla, mod, dst, jip);
506         else
507             opX<true>(Opcode::calla, DataType::d, mod, dst, (hw <= HW::Gen9) ? null.ud(0)(2,2,1) : null.ud(0)(0,1,0), Immediate::d(jip));
508     }
calla(const InstructionModifier & mod,const RegData & dst,RegData jip)509     void calla(const InstructionModifier &mod, const RegData &dst, RegData jip) {
510         if (isGen12)
511             opBranch<true, true>(Opcode::calla, mod, dst, jip);
512         else {
513             jip.setRegion(0, 1, 0);
514             opX<true>(Opcode::calla, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
515         }
516     }
517     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const RegData & src0)518     void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
519         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
520     }
521     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)522     void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
523         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
524     }
525     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)526     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
527         opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
528     }
529     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)530     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
531         opX(isGen12 ? Opcode::cmp_gen12 : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
532     }
533     template <typename DT = void>
cmpn(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)534     void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
535         opX(isGen12 ? Opcode::cmpn_gen12 : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
536     }
537     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)538     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
539         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
540     }
541     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)542     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
543         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
544     }
545     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)546     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
547         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
548     }
549     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)550     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
551         opX(isGen12 ? Opcode::csel_gen12 : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
552     }
cont(const InstructionModifier & mod,Label & jip,Label & uip)553     void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
554         opBranch(Opcode::cont, mod, null, jip, uip);
555     }
556     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)557     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
558         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
559     }
560     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)561     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
562         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
563     }
564     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)565     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
566         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
567     }
568     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)569     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
570         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
571     }
572     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)573     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
574         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
575     }
576     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)577     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
578         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
579     }
580     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)581     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
582         if (hw < HW::Gen12LP) unsupported();
583         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
584     }
585     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)586     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
587         if (hw < HW::Gen12LP) unsupported();
588         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
589     }
590     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)591     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
592         if (hw < HW::Gen12LP) unsupported();
593         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
594     }
595     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)596     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
597         if (hw < HW::Gen12LP) unsupported();
598         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
599     }
600     template <typename DT = void>
dpas(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)601     void dpas(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
602         opDpas(Opcode::dpas, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2);
603     }
604     template <typename DT = void>
dpasw(const InstructionModifier & mod,uint8_t sdepth,uint8_t rcount,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)605     void dpasw(const InstructionModifier &mod, uint8_t sdepth, uint8_t rcount, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
606         opDpas(Opcode::dpasw, getDataType<DT>(), mod, sdepth, rcount, dst, src0, src1, src2);
607     }
608     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)609     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
610         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
611     }
612     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)613     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
614         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
615     }
else_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)616     void else_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
617         mod.setBranchCtrl(branchCtrl);
618         opBranch(Opcode::else_, mod, null, jip, uip);
619     }
else_(InstructionModifier mod,Label & jip)620     void else_(InstructionModifier mod, Label &jip) {
621         else_(mod, jip, jip);
622     }
endif(const InstructionModifier & mod,Label & jip)623     void endif(const InstructionModifier &mod, Label &jip) {
624         opBranch(Opcode::endif, mod, null, jip);
625     }
endif(const InstructionModifier & mod)626     void endif(const InstructionModifier &mod) {
627         opBranch(Opcode::endif, mod, null, sizeof(Instruction8));
628     }
629     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const RegData & src0)630     void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
631         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
632     }
633     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)634     void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
635         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
636     }
637     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const RegData & src0)638     void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
639         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
640     }
641     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)642     void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
643         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
644     }
645     template <typename DT = void>
frc(const InstructionModifier & mod,const RegData & dst,const RegData & src0)646     void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
647         opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
648     }
goto_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)649     void goto_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
650         mod.setBranchCtrl(branchCtrl);
651         opBranch(Opcode::goto_, mod, null, jip, uip);
652     }
goto_(const InstructionModifier & mod,Label & jip)653     void goto_(const InstructionModifier &mod, Label &jip) {
654         goto_(mod, jip, jip);
655     }
halt(const InstructionModifier & mod,Label & jip,Label & uip)656     void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
657         opBranch(Opcode::halt, mod, null, jip, uip);
658     }
halt(const InstructionModifier & mod,Label & jip)659     void halt(const InstructionModifier &mod, Label &jip) {
660         halt(mod, jip, jip);
661     }
if_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)662     void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
663         mod.setBranchCtrl(branchCtrl);
664         opBranch(Opcode::if_, mod, null, jip, uip);
665     }
if_(const InstructionModifier & mod,Label & jip)666     void if_(const InstructionModifier &mod, Label &jip) {
667         if_(mod, jip, jip);
668     }
illegal()669     void illegal() {
670         opX(Opcode::illegal, DataType::invalid, InstructionModifier(), null, null, null);
671     }
join(InstructionModifier mod,Label & jip)672     void join(InstructionModifier mod, Label &jip) {
673         opBranch(Opcode::join, mod, null, jip);
674     }
join(InstructionModifier mod)675     void join(InstructionModifier mod) {
676         opBranch(Opcode::join, mod, null, sizeof(Instruction8));
677     }
jmpi(const InstructionModifier & mod,Label & jip)678     void jmpi(const InstructionModifier &mod, Label &jip) {
679         auto dst = isGen12 ? ARF(null) : ARF(ip);
680         opJmpi(Opcode::jmpi, mod, dst, dst, jip);
681     }
jmpi(const InstructionModifier & mod,const RegData & jip)682     void jmpi(const InstructionModifier &mod, const RegData &jip) {
683 #ifdef NGEN_SAFE
684         if (!isGen12 && jip.getType() != DataType::d && jip.getType() != DataType::invalid)
685             throw invalid_type_exception();
686 #endif
687         if (isGen12)
688             opBranch<true, false>(Opcode::jmpi, mod, null, jip);
689         else
690             opX(Opcode::jmpi, DataType::d, mod, ip, ip, jip);
691     }
692     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)693     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
694         if (hw >= HW::Gen11) unsupported();
695         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
696     }
697     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)698     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
699         if (hw >= HW::Gen11) unsupported();
700         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
701     }
702     template <typename DT = void>
lrp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)703     void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
704         opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
705     }
706     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)707     void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
708         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
709     }
710     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)711     void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
712         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
713     }
714     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)715     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
716         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
717     }
718     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)719     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
720         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
721     }
722     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)723     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
724         opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
725     }
726     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)727     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
728         opX(Opcode::mach, getDataType<DT>(), (hw >= HW::XeHPC) ? mod : (mod | AccWrEn), dst, src0, src1);
729     }
730     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)731     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
732 #ifdef NGEN_SAFE
733         if (hw < HW::Gen10) unsupported();
734 #endif
735         opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
736     }
737     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)738     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
739 #ifdef NGEN_SAFE
740         if (hw < HW::Gen10) unsupported();
741 #endif
742         opX((hw >= HW::XeHPC) ? Opcode::macl : Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
743     }
744     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)745     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
746         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
747     }
748     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)749     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
750         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
751     }
752     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)753     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
754         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
755     }
756     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)757     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
758         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
759     }
760     template <typename DT = void, HW hw_ = hw>
761     typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1,const ExtendedReg & src2)762     madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
763         opX(Opcode::madm, getDataType<DT>(), mod, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1), extToAlign16(src2));
764     }
765     template <typename DT = void, HW hw_ = hw>
766     typename std::enable_if<hwGT(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1,const ExtendedReg & src2)767     madm(const InstructionModifier &mod, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1, const ExtendedReg &src2) {
768         src0.getBase().setRegion(4,4,1);
769         src1.getBase().setRegion(4,4,1);
770         opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
771     }
772     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0)773     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
774 #ifdef NGEN_SAFE
775         if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
776 #endif
777         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
778     }
779     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const RegData & src1)780     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
781 #ifdef NGEN_SAFE
782         if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
783 #endif
784         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
785     }
786     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const Immediate & src1)787     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
788 #ifdef NGEN_SAFE
789         if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
790 #endif
791         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1.forceInt32());
792     }
793     template <typename DT = void, HW hw_ = hw>
794     typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0)795     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
796 #ifdef NGEN_SAFE
797         if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
798 #endif
799         opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0));
800     }
801     template <typename DT = void, HW hw_ = hw>
802     typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0)803     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0) {
804 #ifdef NGEN_SAFE
805         if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
806 #endif
807         if (hw == HW::Gen11)
808             src0.getBase().setRegion(2,2,1);
809         else
810             src0.getBase().setRegion(1,1,0);
811         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
812     }
813     template <typename DT = void, HW hw_ = hw>
814     typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)815     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
816 #ifdef NGEN_SAFE
817         if (fc != MathFunction::invm) throw invalid_operand_exception();
818 #endif
819         opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1));
820     }
821     template <typename DT = void, HW hw_ = hw>
822     typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1)823     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1) {
824 #ifdef NGEN_SAFE
825         if (fc != MathFunction::invm) throw invalid_operand_exception();
826 #endif
827         if (hw == HW::Gen11) {
828             src0.getBase().setRegion(2,2,1);
829             src1.getBase().setRegion(2,2,1);
830         } else {
831             src0.getBase().setRegion(1,1,0);
832             src1.getBase().setRegion(1,1,0);
833         }
834         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
835     }
836     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const RegData & src0)837     void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
838         opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
839     }
840     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)841     void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
842         opX(isGen12 ? Opcode::mov_gen12 : Opcode::mov, getDataType<DT>(), mod, dst, src0);
843     }
844     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0)845     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
846         if (hardware >= HW::Gen10)
847             movi<DT>(mod, dst, src0, null.ud(0)(1,1,0));
848         else
849             opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0);
850     }
851     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)852     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
853 #ifdef NGEN_SAFE
854         if (hardware < HW::Gen10) throw unsupported_instruction();
855 #endif
856         opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
857     }
858     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)859     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
860 #ifdef NGEN_SAFE
861         if (hardware < HW::Gen10) throw unsupported_instruction();
862 #endif
863         opX(isGen12 ? Opcode::movi_gen12 : Opcode::movi, getDataType<DT>(), mod, dst, src0, src1);
864     }
865     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)866     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
867         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
868     }
869     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,Immediate src1)870     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
871         if (dst.getBytes() == 8)
872             src1 = src1.forceInt32();
873         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
874     }
nop()875     void nop() {
876         opNop(isGen12 ? Opcode::nop_gen12 : Opcode::nop);
877     }
nop(const InstructionModifier & mod)878     void nop(const InstructionModifier &mod) {
879         opX(isGen12 ? Opcode::nop_gen12 : Opcode::nop, DataType::invalid, mod, null, null, null);
880     }
881     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const RegData & src0)882     void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
883         opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
884     }
885     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)886     void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
887         opX(isGen12 ? Opcode::not_gen12 : Opcode::not_, getDataType<DT>(), mod, dst, src0);
888     }
889 #ifndef NGEN_NO_OP_NAMES
890     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const RegData & src0)891     void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
892         not_<DT>(mod, dst, src0);
893     }
894     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)895     void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
896         not_<DT>(mod, dst, src0);
897     }
898 #endif
899     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)900     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
901         opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
902     }
903     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)904     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
905         opX(isGen12 ? Opcode::or_gen12 : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
906     }
907 #ifndef NGEN_NO_OP_NAMES
908     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)909     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
910         or_<DT>(mod, dst, src0, src1);
911     }
912     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)913     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
914         or_<DT>(mod, dst, src0, src1);
915     }
916 #endif
917     template <typename DT = void>
pln(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)918     void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
919         if (hw >= HW::Gen11) unsupported();
920         opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
921     }
ret(const InstructionModifier & mod,RegData src0)922     void ret(const InstructionModifier &mod, RegData src0) {
923         src0.setRegion(2,2,1);
924         if (isGen12)
925             opBranch<true, true>(Opcode::ret, mod, null, src0);
926         else
927             opX<true>(Opcode::ret, DataType::ud, mod, null, src0);
928     }
929     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)930     void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
931         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
932     }
933     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)934     void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
935         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
936     }
937     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const RegData & src0)938     void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
939         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
940     }
941     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)942     void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
943         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
944     }
945     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const RegData & src0)946     void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
947         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
948     }
949     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)950     void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
951         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
952     }
953     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const RegData & src0)954     void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
955         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
956     }
957     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)958     void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
959         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
960     }
961     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)962     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
963         opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
964     }
965     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)966     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
967         opX(isGen12 ? Opcode::rol_gen12 : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
968     }
969     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)970     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
971         opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
972     }
973     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)974     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
975         opX(isGen12 ? Opcode::ror_gen12 : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
976     }
977     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)978     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
979         if (hw >= HW::Gen12LP) unsupported();
980         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
981     }
982     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)983     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
984         if (hw >= HW::Gen12LP) unsupported();
985         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
986     }
987     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)988     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
989         if (hw >= HW::Gen12LP) unsupported();
990         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
991     }
992     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)993     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
994         if (hw >= HW::Gen12LP) unsupported();
995         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
996     }
997     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)998     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
999         opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1000     }
1001     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1002     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1003         opX(isGen12 ? Opcode::sel_gen12 : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
1004     }
1005 
1006     /* Gen12-style sends */
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1007     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1008         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1009     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1010     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1011         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1012     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1013     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1014         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1015     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1016     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1017         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
1018     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1019     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1020         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1021     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1022     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1023         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1024     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1025     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1026         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1027     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1028     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1029         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
1030     }
1031     /* Pre-Gen12-style sends; also supported on Gen12. */
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1032     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1033         opSend(Opcode::send, mod, dst, src0, exdesc, desc);
1034     }
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1035     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1036         opSend(Opcode::send, mod, dst, src0, exdesc, desc);
1037     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1038     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
1039         opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
1040     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1041     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
1042         opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
1043     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1044     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1045         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1046     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1047     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1048         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1049     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1050     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1051         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1052     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1053     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1054         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
1055     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)1056     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
1057         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1058     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)1059     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
1060         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1061     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)1062     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
1063         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1064     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)1065     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
1066         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
1067     }
1068 
1069     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1070     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1071         opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1072     }
1073     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1074     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1075         opX(isGen12 ? Opcode::shl_gen12 : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1076     }
1077     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1078     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1079         opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1080     }
1081     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1082     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1083         opX(isGen12 ? Opcode::shr_gen12 : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1084     }
1085     template <typename DT = void>
smov(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1086     void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1087         opX(isGen12 ? Opcode::smov_gen12 : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1088     }
1089     template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1090     void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1091         opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1092     }
1093     template <typename DT = void>
srnd(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1094     void srnd(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1095         opX(Opcode::srnd, getDataType<DT>(), mod, dst, src0, src1);
1096     }
1097     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1098     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1099         opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1100     }
1101     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1102     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1103         opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1104     }
wait(const InstructionModifier & mod,const RegData & nreg)1105     void wait(const InstructionModifier &mod, const RegData &nreg) {
1106 #ifdef NGEN_SAFE
1107         if (!nreg.isARF() || nreg.getARFType() != ARFType::n) throw invalid_arf_exception();
1108 #endif
1109         opX(Opcode::wait, DataType::invalid, mod, nreg, nreg);
1110     }
while_(const InstructionModifier & mod,Label & jip)1111     void while_(const InstructionModifier &mod, Label &jip) {
1112         opBranch(Opcode::while_, mod, null, jip);
1113     }
1114     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1115     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1116         opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1117     }
1118     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1119     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1120         opX(isGen12 ? Opcode::xor_gen12 : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1121     }
1122 #ifndef NGEN_NO_OP_NAMES
1123     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1124     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1125         xor_<DT>(mod, dst, src0, src1);
1126     }
1127     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1128     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1129         xor_<DT>(mod, dst, src0, src1);
1130     }
1131 #endif
1132 
1133 private:
1134     struct Sync {
1135         BinaryCodeGenerator<hw> &parent;
1136 
Syncngen::BinaryCodeGenerator::Sync1137         Sync(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1138 
operator ()ngen::BinaryCodeGenerator::Sync1139         void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1140             parent.opSync(Opcode::sync, fc, mod);
1141         }
operator ()ngen::BinaryCodeGenerator::Sync1142         void operator()(SyncFunction fc, const RegData &src0) {
1143             this->operator()(fc, InstructionModifier(), src0);
1144         }
operator ()ngen::BinaryCodeGenerator::Sync1145         void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1146             parent.opSync(Opcode::sync, fc, mod, src0);
1147         }
operator ()ngen::BinaryCodeGenerator::Sync1148         void operator()(SyncFunction fc, int src0) {
1149             this->operator()(fc, InstructionModifier(), src0);
1150         }
operator ()ngen::BinaryCodeGenerator::Sync1151         void operator()(SyncFunction fc, const InstructionModifier &mod, uint32_t src0) {
1152             parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1153         }
allrdngen::BinaryCodeGenerator::Sync1154         void allrd() {
1155             allrd(null.ud(0)(0, 1, 1));
1156         }
allrdngen::BinaryCodeGenerator::Sync1157         void allrd(const InstructionModifier &mod) {
1158             allrd(mod, null.ud(0)(0, 1, 1));
1159         }
allrdngen::BinaryCodeGenerator::Sync1160         void allrd(const RegData &src0) {
1161             allrd(InstructionModifier(), src0);
1162         }
allrdngen::BinaryCodeGenerator::Sync1163         void allrd(const InstructionModifier &mod, const RegData &src0) {
1164             this->operator()(SyncFunction::allrd, mod, src0);
1165         }
allrdngen::BinaryCodeGenerator::Sync1166         void allrd(uint32_t src0) {
1167             allrd(InstructionModifier(), src0);
1168         }
allrdngen::BinaryCodeGenerator::Sync1169         void allrd(const InstructionModifier &mod, uint32_t src0) {
1170             this->operator()(SyncFunction::allrd, mod, src0);
1171         }
allwrngen::BinaryCodeGenerator::Sync1172         void allwr() {
1173             allwr(null);
1174         }
allwrngen::BinaryCodeGenerator::Sync1175         void allwr(const InstructionModifier &mod) {
1176             allwr(mod, null);
1177         }
allwrngen::BinaryCodeGenerator::Sync1178         void allwr(const RegData &src0) {
1179             allwr(InstructionModifier(), src0);
1180         }
allwrngen::BinaryCodeGenerator::Sync1181         void allwr(const InstructionModifier &mod, const RegData &src0) {
1182             this->operator()(SyncFunction::allwr, mod, src0);
1183         }
allwrngen::BinaryCodeGenerator::Sync1184         void allwr(uint32_t src0) {
1185             allwr(InstructionModifier(), src0);
1186         }
allwrngen::BinaryCodeGenerator::Sync1187         void allwr(const InstructionModifier &mod, uint32_t src0) {
1188             this->operator()(SyncFunction::allwr, mod, src0);
1189         }
barngen::BinaryCodeGenerator::Sync1190         void bar(const InstructionModifier &mod = InstructionModifier()) {
1191             this->operator()(SyncFunction::bar, mod);
1192         }
barngen::BinaryCodeGenerator::Sync1193         void bar(const InstructionModifier &mod, uint32_t src0) {
1194             this->operator()(SyncFunction::bar, mod, src0);
1195         }
barngen::BinaryCodeGenerator::Sync1196         void bar(const InstructionModifier &mod, const RegData &src0) {
1197             this->operator()(SyncFunction::bar, mod, src0);
1198         }
barngen::BinaryCodeGenerator::Sync1199         void bar(uint32_t src0) {
1200             this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1201         }
barngen::BinaryCodeGenerator::Sync1202         void bar(const RegData &src0) {
1203             this->operator()(SyncFunction::bar, InstructionModifier(), src0);
1204         }
hostngen::BinaryCodeGenerator::Sync1205         void host(const InstructionModifier &mod = InstructionModifier()) {
1206             this->operator()(SyncFunction::host, mod);
1207         }
nopngen::BinaryCodeGenerator::Sync1208         void nop(const InstructionModifier &mod = InstructionModifier()) {
1209             this->operator()(SyncFunction::nop, mod);
1210         }
1211     };
1212 public:
1213     Sync sync;
1214 
1215 
1216 private:
1217     struct Load {
1218         BinaryCodeGenerator<hw> &parent;
1219 
Loadngen::BinaryCodeGenerator::Load1220         Load(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1221 
1222         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Load1223         void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1224         {
1225             MessageDescriptor desc;
1226             ExtendedMessageDescriptor exdesc;
1227 
1228             encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1229             parent.send(mod, dst, addr, exdesc.all, desc.all);
1230         }
1231 
1232         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Load1233         void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1234         {
1235             MessageDescriptor desc;
1236             ExtendedMessageDescriptor exdesc;
1237 
1238             encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1239             parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1240         }
1241 
1242         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Load1243         void operator()(SharedFunction sfid, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr)
1244         {
1245             MessageDescriptor desc;
1246             ExtendedMessageDescriptor exdesc;
1247 
1248             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1249             encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1250             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1251             parent.send(mod, dst, addr.getBase(), exdesc.all, desc.all);
1252         }
1253 
ugmngen::BinaryCodeGenerator::Load1254         void ugm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1255         {
1256             this->operator()(SharedFunction::ugm, mod, dst, spec, base, addr);
1257         }
ugmlngen::BinaryCodeGenerator::Load1258         void ugml(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1259         {
1260             this->operator()(SharedFunction::ugml, mod, dst, spec, base, addr);
1261         }
tgmngen::BinaryCodeGenerator::Load1262         void tgm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1263         {
1264             this->operator()(SharedFunction::tgm, mod, dst, spec, base, addr);
1265         }
slmngen::BinaryCodeGenerator::Load1266         void slm(const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr)
1267         {
1268             this->operator()(SharedFunction::slm, mod, dst, spec, base, addr);
1269         }
1270     };
1271 
1272     struct Store {
1273         BinaryCodeGenerator<hw> &parent;
1274 
Storengen::BinaryCodeGenerator::Store1275         Store(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1276 
1277         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Store1278         void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1279         {
1280             MessageDescriptor desc;
1281             ExtendedMessageDescriptor exdesc;
1282 
1283             encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1284             parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1285         }
1286 
1287         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Store1288         void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1289         {
1290             MessageDescriptor desc;
1291             ExtendedMessageDescriptor exdesc;
1292 
1293             encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1294             parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1295         }
1296 
1297         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Store1298         void operator()(SharedFunction sfid, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1299         {
1300             MessageDescriptor desc;
1301             ExtendedMessageDescriptor exdesc;
1302 
1303             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1304             encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1305             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1306             parent.sends(mod, NullRegister(), addr.getBase(), data, exdesc.all, desc.all);
1307         }
1308 
ugmngen::BinaryCodeGenerator::Store1309         void ugm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1310         {
1311             this->operator()(SharedFunction::ugm, mod, spec, base, addr, data);
1312         }
ugmlngen::BinaryCodeGenerator::Store1313         void ugml(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1314         {
1315             this->operator()(SharedFunction::ugml, mod, spec, base, addr, data);
1316         }
tgmngen::BinaryCodeGenerator::Store1317         void tgm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1318         {
1319             this->operator()(SharedFunction::tgm, mod, spec, base, addr, data);
1320         }
slmngen::BinaryCodeGenerator::Store1321         void slm(const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1322         {
1323             this->operator()(SharedFunction::slm, mod, spec, base, addr, data);
1324         }
1325     };
1326 
1327     struct Atomic_ {
1328         BinaryCodeGenerator<hw> &parent;
1329 
Atomic_ngen::BinaryCodeGenerator::Atomic_1330         Atomic_(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1331 
1332         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1333         void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1334         {
1335             MessageDescriptor desc;
1336             ExtendedMessageDescriptor exdesc;
1337 
1338             encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1339             if (data.isNull())
1340                 parent.send(mod, dst, addr, exdesc.all, desc.all);
1341             else
1342                 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1343         }
1344         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1345         void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1346         {
1347             (*this)(op, mod, NullRegister(), spec, base, addr, data);
1348         }
1349 
1350         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1351         void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1352         {
1353             MessageDescriptor desc;
1354             ExtendedMessageDescriptor exdesc;
1355 
1356             encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1357             parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1358         }
1359         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1360         void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1361         {
1362             (*this)(op, mod, NullRegister(), spec, base, addr, data);
1363         }
1364         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1365         void operator()(SharedFunction sfid, AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const GRFDisp &addr, const RegData &data)
1366         {
1367             MessageDescriptor desc;
1368             ExtendedMessageDescriptor exdesc;
1369 
1370             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1371             encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1372             exdesc.parts.sfid = static_cast<unsigned>(sfid);
1373             parent.sends(mod, dst, addr.getBase(), data, exdesc.all, desc.all);
1374         }
1375 
ugmngen::BinaryCodeGenerator::Atomic_1376         void ugm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1377         {
1378             this->operator()(SharedFunction::ugm, op, mod, dst, spec, base, addr, data);
1379         }
ugmngen::BinaryCodeGenerator::Atomic_1380         void ugm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1381         {
1382             this->operator()(SharedFunction::ugm, op, mod, NullRegister(), spec, base, addr, data);
1383         }
ugmlngen::BinaryCodeGenerator::Atomic_1384         void ugml(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1385         {
1386             this->operator()(SharedFunction::ugml, op, mod, dst, spec, base, addr, data);
1387         }
ugmlngen::BinaryCodeGenerator::Atomic_1388         void ugml(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1389         {
1390             this->operator()(SharedFunction::ugml, op, mod, NullRegister(), spec, base, addr, data);
1391         }
tgmngen::BinaryCodeGenerator::Atomic_1392         void tgm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1393         {
1394             this->operator()(SharedFunction::tgm, op, mod, dst, spec, base, addr, data);
1395         }
tgmngen::BinaryCodeGenerator::Atomic_1396         void tgm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1397         {
1398             this->operator()(SharedFunction::tgm, op, mod, NullRegister(), spec, base, addr, data);
1399         }
slmngen::BinaryCodeGenerator::Atomic_1400         void slm(AtomicOp op, const InstructionModifier &mod, const RegData &dst, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1401         {
1402             this->operator()(SharedFunction::slm, op, mod, dst, spec, base, addr, data);
1403         }
slmngen::BinaryCodeGenerator::Atomic_1404         void slm(AtomicOp op, const InstructionModifier &mod, DataSpecLSC spec, AddressBase base, const GRFDisp &addr, const RegData &data = NullRegister())
1405         {
1406             this->operator()(SharedFunction::slm, op, mod, NullRegister(), spec, base, addr, data);
1407         }
1408     };
1409 public:
1410     Load load;
1411     Store store;
1412     Atomic_ atomic;
1413 
wrdep(const GRFRange & r)1414     void wrdep(const GRFRange &r) {
1415         opX(Opcode::wrdep, DataType::ud, InstructionModifier::createAutoSWSB(), null, r[0], r[r.getLen() - 1]);
1416     }
wrdep(const GRF & r)1417     void wrdep(const GRF &r) {
1418         wrdep(r-r);
1419     }
1420 
1421 #include "ngen_pseudo.hpp"
1422 };
1423 
1424 #define NGEN_FORWARD(hw) \
1425 using InstructionStream = typename ngen::BinaryCodeGenerator<hw>::InstructionStream; \
1426 using ngen::BinaryCodeGenerator<hw>::isGen12; \
1427 template <typename DT = void, typename... Targs> void add(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add<DT>(std::forward<Targs>(args)...); } \
1428 template <typename DT = void, typename... Targs> void addc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template addc<DT>(std::forward<Targs>(args)...); } \
1429 template <typename DT = void, typename... Targs> void and_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1430 template <typename DT = void, typename... Targs> void asr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template asr<DT>(std::forward<Targs>(args)...); } \
1431 template <typename DT = void, typename... Targs> void avg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template avg<DT>(std::forward<Targs>(args)...); } \
1432 template <typename DT = void, typename... Targs> void bfe(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfe<DT>(std::forward<Targs>(args)...); } \
1433 template <typename DT = void, typename... Targs> void bfi1(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi1<DT>(std::forward<Targs>(args)...); } \
1434 template <typename DT = void, typename... Targs> void bfi2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi2<DT>(std::forward<Targs>(args)...); } \
1435 template <typename DT = void, typename... Targs> void bfrev(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfrev<DT>(std::forward<Targs>(args)...); } \
1436 template <typename DT = void, typename... Targs> void cbit(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cbit<DT>(std::forward<Targs>(args)...); } \
1437 template <typename DT = void, typename... Targs> void cmp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmp<DT>(std::forward<Targs>(args)...); } \
1438 template <typename DT = void, typename... Targs> void cmpn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmpn<DT>(std::forward<Targs>(args)...); } \
1439 template <typename DT = void, typename... Targs> void csel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template csel<DT>(std::forward<Targs>(args)...); } \
1440 template <typename DT = void, typename... Targs> void dp2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp2<DT>(std::forward<Targs>(args)...); } \
1441 template <typename DT = void, typename... Targs> void dp3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp3<DT>(std::forward<Targs>(args)...); } \
1442 template <typename DT = void, typename... Targs> void dp4(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4<DT>(std::forward<Targs>(args)...); } \
1443 template <typename DT = void, typename... Targs> void dph(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dph<DT>(std::forward<Targs>(args)...); } \
1444 template <typename DT = void, typename... Targs> void fbh(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbh<DT>(std::forward<Targs>(args)...); } \
1445 template <typename DT = void, typename... Targs> void fbl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbl<DT>(std::forward<Targs>(args)...); } \
1446 template <typename DT = void, typename... Targs> void frc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template frc<DT>(std::forward<Targs>(args)...); } \
1447 template <typename DT = void, typename... Targs> void line(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template line<DT>(std::forward<Targs>(args)...); } \
1448 template <typename DT = void, typename... Targs> void lrp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lrp<DT>(std::forward<Targs>(args)...); } \
1449 template <typename DT = void, typename... Targs> void lzd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lzd<DT>(std::forward<Targs>(args)...); } \
1450 template <typename DT = void, typename... Targs> void mac(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mac<DT>(std::forward<Targs>(args)...); } \
1451 template <typename DT = void, typename... Targs> void macl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template macl<DT>(std::forward<Targs>(args)...); } \
1452 template <typename DT = void, typename... Targs> void mach(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mach<DT>(std::forward<Targs>(args)...); } \
1453 template <typename DT = void, typename... Targs> void mad(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mad<DT>(std::forward<Targs>(args)...); } \
1454 template <typename DT = void, typename... Targs> void madm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template madm<DT>(std::forward<Targs>(args)...); } \
1455 template <typename DT = void, typename... Targs> void math(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template math<DT>(std::forward<Targs>(args)...); } \
1456 template <typename DT = void, typename... Targs> void mov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mov<DT>(std::forward<Targs>(args)...); } \
1457 template <typename DT = void, typename... Targs> void movi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template movi<DT>(std::forward<Targs>(args)...); } \
1458 template <typename DT = void, typename... Targs> void mul(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mul<DT>(std::forward<Targs>(args)...); } \
1459 template <typename DT = void, typename... Targs> void not_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1460 template <typename DT = void, typename... Targs> void or_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1461 template <typename DT = void, typename... Targs> void pln(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pln<DT>(std::forward<Targs>(args)...); } \
1462 template <typename DT = void, typename... Targs> void rndd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndd<DT>(std::forward<Targs>(args)...); } \
1463 template <typename DT = void, typename... Targs> void rnde(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rnde<DT>(std::forward<Targs>(args)...); } \
1464 template <typename DT = void, typename... Targs> void rndu(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndu<DT>(std::forward<Targs>(args)...); } \
1465 template <typename DT = void, typename... Targs> void rndz(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndz<DT>(std::forward<Targs>(args)...); } \
1466 template <typename DT = void, typename... Targs> void rol(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rol<DT>(std::forward<Targs>(args)...); } \
1467 template <typename DT = void, typename... Targs> void ror(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template ror<DT>(std::forward<Targs>(args)...); } \
1468 template <typename DT = void, typename... Targs> void sad2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sad2<DT>(std::forward<Targs>(args)...); } \
1469 template <typename DT = void, typename... Targs> void sada2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sada2<DT>(std::forward<Targs>(args)...); } \
1470 template <typename DT = void, typename... Targs> void sel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sel<DT>(std::forward<Targs>(args)...); } \
1471 template <typename DT = void, typename... Targs> void shl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shl<DT>(std::forward<Targs>(args)...); } \
1472 template <typename DT = void, typename... Targs> void shr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shr<DT>(std::forward<Targs>(args)...); } \
1473 template <typename DT = void, typename... Targs> void smov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template smov<DT>(std::forward<Targs>(args)...); } \
1474 template <typename DT = void, typename... Targs> void subb(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template subb<DT>(std::forward<Targs>(args)...); } \
1475 template <typename DT = void, typename... Targs> void xor_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } \
1476 template <typename... Targs> void brc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brc(std::forward<Targs>(args)...); } \
1477 template <typename... Targs> void brd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brd(std::forward<Targs>(args)...); } \
1478 template <typename... Targs> void break_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::break_(std::forward<Targs>(args)...); } \
1479 template <typename... Targs> void call(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::call(std::forward<Targs>(args)...); } \
1480 template <typename... Targs> void calla(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::calla(std::forward<Targs>(args)...); } \
1481 template <typename... Targs> void cont(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::cont(std::forward<Targs>(args)...); } \
1482 template <typename... Targs> void else_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::else_(std::forward<Targs>(args)...); } \
1483 template <typename... Targs> void endif(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::endif(std::forward<Targs>(args)...); } \
1484 template <typename... Targs> void goto_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::goto_(std::forward<Targs>(args)...); } \
1485 template <typename... Targs> void halt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::halt(std::forward<Targs>(args)...); } \
1486 template <typename... Targs> void if_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::if_(std::forward<Targs>(args)...); } \
1487 template <typename... Targs> void illegal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::illegal(std::forward<Targs>(args)...); } \
1488 template <typename... Targs> void join(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::join(std::forward<Targs>(args)...); } \
1489 template <typename... Targs> void jmpi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::jmpi(std::forward<Targs>(args)...); } \
1490 template <typename... Targs> void nop(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::nop(std::forward<Targs>(args)...); } \
1491 template <typename... Targs> void ret(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::ret(std::forward<Targs>(args)...); } \
1492 template <typename... Targs> void send(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::send(std::forward<Targs>(args)...); } \
1493 template <typename... Targs> void sendc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendc(std::forward<Targs>(args)...); } \
1494 template <typename... Targs> void sends(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sends(std::forward<Targs>(args)...); } \
1495 template <typename... Targs> void sendsc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendsc(std::forward<Targs>(args)...); } \
1496 using ngen::BinaryCodeGenerator<hw>::sync; \
1497 template <typename... Targs> void wait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wait(std::forward<Targs>(args)...); } \
1498 template <typename... Targs> void while_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::while_(std::forward<Targs>(args)...); } \
1499 template <typename... Targs> void wrdep(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wrdep(std::forward<Targs>(args)...); } \
1500 template <typename DT = void, typename... Targs> void min_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min_<DT>(std::forward<Targs>(args)...); } \
1501 template <typename DT = void, typename... Targs> void max_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max_<DT>(std::forward<Targs>(args)...); } \
1502 template <typename DT = void, typename... Targs> void bfi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi<DT>(std::forward<Targs>(args)...); } \
1503 template <typename DT = void, typename... Targs> void cos(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cos<DT>(std::forward<Targs>(args)...); } \
1504 template <typename DT = void, typename... Targs> void exp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template exp<DT>(std::forward<Targs>(args)...); } \
1505 template <typename DT = void, typename... Targs> void fdiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv<DT>(std::forward<Targs>(args)...); } \
1506 template <typename DT = void, typename... Targs> void idiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template idiv<DT>(std::forward<Targs>(args)...); } \
1507 template <typename DT = void, typename... Targs> void inv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv<DT>(std::forward<Targs>(args)...); } \
1508 template <typename DT = void, typename... Targs> void invm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template invm<DT>(std::forward<Targs>(args)...); } \
1509 template <typename DT = void, typename... Targs> void iqot(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template iqot<DT>(std::forward<Targs>(args)...); } \
1510 template <typename DT = void, typename... Targs> void irem(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template irem<DT>(std::forward<Targs>(args)...); } \
1511 template <typename DT = void, typename... Targs> void log(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template log<DT>(std::forward<Targs>(args)...); } \
1512 template <typename DT = void, typename... Targs> void pow(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pow<DT>(std::forward<Targs>(args)...); } \
1513 template <typename DT = void, typename... Targs> void rsqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqt<DT>(std::forward<Targs>(args)...); } \
1514 template <typename DT = void, typename... Targs> void rsqtm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqtm<DT>(std::forward<Targs>(args)...); } \
1515 template <typename DT = void, typename... Targs> void sin(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sin<DT>(std::forward<Targs>(args)...); } \
1516 template <typename DT = void, typename... Targs> void sqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt<DT>(std::forward<Targs>(args)...); } \
1517 template <typename DT = void, typename... Targs> void fdiv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv_ieee<DT>(std::forward<Targs>(args)...); } \
1518 template <typename DT = void, typename... Targs> void inv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv_ieee<DT>(std::forward<Targs>(args)...); } \
1519 template <typename DT = void, typename... Targs> void sqt_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt_ieee<DT>(std::forward<Targs>(args)...); } \
1520 template <typename... Targs> void threadend(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::threadend(std::forward<Targs>(args)...); } \
1521 template <typename... Targs> void barrierheader(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierheader(std::forward<Targs>(args)...); } \
1522 template <typename... Targs> void barriermsg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriermsg(std::forward<Targs>(args)...); } \
1523 template <typename... Targs> void barriersignal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriersignal(std::forward<Targs>(args)...); } \
1524 template <typename... Targs> void barrierwait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierwait(std::forward<Targs>(args)...); } \
1525 template <typename... Targs> void barrier(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrier(std::forward<Targs>(args)...); } \
1526 using ngen::BinaryCodeGenerator<hw>::load; \
1527 using ngen::BinaryCodeGenerator<hw>::store; \
1528 using ngen::BinaryCodeGenerator<hw>::atomic; \
1529 template <typename... Targs> void memfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::memfence(std::forward<Targs>(args)...); } \
1530 template <typename... Targs> void slmfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::slmfence(std::forward<Targs>(args)...); } \
1531 template <typename... Targs> void epilogue(int GRFCount, bool hasSLM, const ngen::RegData &r0_info) { ngen::BinaryCodeGenerator<hw>::epilogue(GRFCount, hasSLM, r0_info); } \
1532 template <typename... Targs> void pushStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::pushStream(std::forward<Targs>(args)...); } \
1533 template <typename... Targs> InstructionStream *popStream(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::popStream(std::forward<Targs>(args)...); } \
1534 template <typename... Targs> void appendStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendStream(std::forward<Targs>(args)...); } \
1535 template <typename... Targs> void appendCurrentStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendCurrentStream(std::forward<Targs>(args)...); } \
1536 template <typename... Targs> void discardStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::discardStream(std::forward<Targs>(args)...); } \
1537 template <typename... Targs> void mark(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::mark(std::forward<Targs>(args)...); } \
1538 template <typename... Targs> void comment(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::comment(std::forward<Targs>(args)...); } \
1539 template <typename... Targs> void setDefaultNoMask(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultNoMask(std::forward<Targs>(args)...); } \
1540 template <typename... Targs> void setDefaultAutoSWSB(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultAutoSWSB(std::forward<Targs>(args)...); } \
1541 bool getDefaultNoMask() { return ngen::BinaryCodeGenerator<hw>::getDefaultNoMask(); } \
1542 bool getDefaultAutoSWSB() { return ngen::BinaryCodeGenerator<hw>::getDefaultAutoSWSB(); } \
1543 NGEN_FORWARD_EXTRA \
1544 NGEN_FORWARD_OP_NAMES \
1545 NGEN_FORWARD_MIN_MAX \
1546 NGEN_FORWARD_REGISTERS
1547 
1548 #define NGEN_FORWARD_EXTRA \
1549 template <typename DT = void, typename... Targs> void add3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add3<DT>(std::forward<Targs>(args)...); } \
1550 template <typename DT = void, typename... Targs> void bfn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfn<DT>(std::forward<Targs>(args)...); } \
1551 template <typename DT = void, typename... Targs> void dp4a(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4a<DT>(std::forward<Targs>(args)...); } \
1552 template <typename DT = void, typename... Targs> void dpas(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpas<DT>(std::forward<Targs>(args)...); } \
1553 template <typename DT = void, typename... Targs> void dpasw(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dpasw<DT>(std::forward<Targs>(args)...); }
1554 
1555 #ifdef NGEN_NO_OP_NAMES
1556 #define NGEN_FORWARD_OP_NAMES
1557 #else
1558 #define NGEN_FORWARD_OP_NAMES \
1559 template <typename DT = void, typename... Targs> void and(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1560 template <typename DT = void, typename... Targs> void not(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1561 template <typename DT = void, typename... Targs> void or(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1562 template <typename DT = void, typename... Targs> void xor(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); }
1563 #endif
1564 
1565 #ifdef NGEN_WINDOWS_COMPAT
1566 #define NGEN_FORWARD_MIN_MAX
1567 #else
1568 #define NGEN_FORWARD_MIN_MAX \
1569 template <typename DT = void, typename... Targs> void min(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min<DT>(std::forward<Targs>(args)...); } \
1570 template <typename DT = void, typename... Targs> void max(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max<DT>(std::forward<Targs>(args)...); }
1571 #endif
1572 
1573 #ifdef NGEN_GLOBAL_REGS
1574 #define NGEN_FORWARD_REGISTERS
1575 #else
1576 #define NGEN_FORWARD_REGISTERS_BASE \
1577 using ngen::BinaryCodeGenerator<hw>::indirect; \
1578 using ngen::BinaryCodeGenerator<hw>::r0; using ngen::BinaryCodeGenerator<hw>::r1; using ngen::BinaryCodeGenerator<hw>::r2; using ngen::BinaryCodeGenerator<hw>::r3; \
1579 using ngen::BinaryCodeGenerator<hw>::r4; using ngen::BinaryCodeGenerator<hw>::r5; using ngen::BinaryCodeGenerator<hw>::r6; using ngen::BinaryCodeGenerator<hw>::r7; \
1580 using ngen::BinaryCodeGenerator<hw>::r8; using ngen::BinaryCodeGenerator<hw>::r9; using ngen::BinaryCodeGenerator<hw>::r10; using ngen::BinaryCodeGenerator<hw>::r11; \
1581 using ngen::BinaryCodeGenerator<hw>::r12; using ngen::BinaryCodeGenerator<hw>::r13; using ngen::BinaryCodeGenerator<hw>::r14; using ngen::BinaryCodeGenerator<hw>::r15; \
1582 using ngen::BinaryCodeGenerator<hw>::r16; using ngen::BinaryCodeGenerator<hw>::r17; using ngen::BinaryCodeGenerator<hw>::r18; using ngen::BinaryCodeGenerator<hw>::r19; \
1583 using ngen::BinaryCodeGenerator<hw>::r20; using ngen::BinaryCodeGenerator<hw>::r21; using ngen::BinaryCodeGenerator<hw>::r22; using ngen::BinaryCodeGenerator<hw>::r23; \
1584 using ngen::BinaryCodeGenerator<hw>::r24; using ngen::BinaryCodeGenerator<hw>::r25; using ngen::BinaryCodeGenerator<hw>::r26; using ngen::BinaryCodeGenerator<hw>::r27; \
1585 using ngen::BinaryCodeGenerator<hw>::r28; using ngen::BinaryCodeGenerator<hw>::r29; using ngen::BinaryCodeGenerator<hw>::r30; using ngen::BinaryCodeGenerator<hw>::r31; \
1586 using ngen::BinaryCodeGenerator<hw>::r32; using ngen::BinaryCodeGenerator<hw>::r33; using ngen::BinaryCodeGenerator<hw>::r34; using ngen::BinaryCodeGenerator<hw>::r35; \
1587 using ngen::BinaryCodeGenerator<hw>::r36; using ngen::BinaryCodeGenerator<hw>::r37; using ngen::BinaryCodeGenerator<hw>::r38; using ngen::BinaryCodeGenerator<hw>::r39; \
1588 using ngen::BinaryCodeGenerator<hw>::r40; using ngen::BinaryCodeGenerator<hw>::r41; using ngen::BinaryCodeGenerator<hw>::r42; using ngen::BinaryCodeGenerator<hw>::r43; \
1589 using ngen::BinaryCodeGenerator<hw>::r44; using ngen::BinaryCodeGenerator<hw>::r45; using ngen::BinaryCodeGenerator<hw>::r46; using ngen::BinaryCodeGenerator<hw>::r47; \
1590 using ngen::BinaryCodeGenerator<hw>::r48; using ngen::BinaryCodeGenerator<hw>::r49; using ngen::BinaryCodeGenerator<hw>::r50; using ngen::BinaryCodeGenerator<hw>::r51; \
1591 using ngen::BinaryCodeGenerator<hw>::r52; using ngen::BinaryCodeGenerator<hw>::r53; using ngen::BinaryCodeGenerator<hw>::r54; using ngen::BinaryCodeGenerator<hw>::r55; \
1592 using ngen::BinaryCodeGenerator<hw>::r56; using ngen::BinaryCodeGenerator<hw>::r57; using ngen::BinaryCodeGenerator<hw>::r58; using ngen::BinaryCodeGenerator<hw>::r59; \
1593 using ngen::BinaryCodeGenerator<hw>::r60; using ngen::BinaryCodeGenerator<hw>::r61; using ngen::BinaryCodeGenerator<hw>::r62; using ngen::BinaryCodeGenerator<hw>::r63; \
1594 using ngen::BinaryCodeGenerator<hw>::r64; using ngen::BinaryCodeGenerator<hw>::r65; using ngen::BinaryCodeGenerator<hw>::r66; using ngen::BinaryCodeGenerator<hw>::r67; \
1595 using ngen::BinaryCodeGenerator<hw>::r68; using ngen::BinaryCodeGenerator<hw>::r69; using ngen::BinaryCodeGenerator<hw>::r70; using ngen::BinaryCodeGenerator<hw>::r71; \
1596 using ngen::BinaryCodeGenerator<hw>::r72; using ngen::BinaryCodeGenerator<hw>::r73; using ngen::BinaryCodeGenerator<hw>::r74; using ngen::BinaryCodeGenerator<hw>::r75; \
1597 using ngen::BinaryCodeGenerator<hw>::r76; using ngen::BinaryCodeGenerator<hw>::r77; using ngen::BinaryCodeGenerator<hw>::r78; using ngen::BinaryCodeGenerator<hw>::r79; \
1598 using ngen::BinaryCodeGenerator<hw>::r80; using ngen::BinaryCodeGenerator<hw>::r81; using ngen::BinaryCodeGenerator<hw>::r82; using ngen::BinaryCodeGenerator<hw>::r83; \
1599 using ngen::BinaryCodeGenerator<hw>::r84; using ngen::BinaryCodeGenerator<hw>::r85; using ngen::BinaryCodeGenerator<hw>::r86; using ngen::BinaryCodeGenerator<hw>::r87; \
1600 using ngen::BinaryCodeGenerator<hw>::r88; using ngen::BinaryCodeGenerator<hw>::r89; using ngen::BinaryCodeGenerator<hw>::r90; using ngen::BinaryCodeGenerator<hw>::r91; \
1601 using ngen::BinaryCodeGenerator<hw>::r92; using ngen::BinaryCodeGenerator<hw>::r93; using ngen::BinaryCodeGenerator<hw>::r94; using ngen::BinaryCodeGenerator<hw>::r95; \
1602 using ngen::BinaryCodeGenerator<hw>::r96; using ngen::BinaryCodeGenerator<hw>::r97; using ngen::BinaryCodeGenerator<hw>::r98; using ngen::BinaryCodeGenerator<hw>::r99; \
1603 using ngen::BinaryCodeGenerator<hw>::r100; using ngen::BinaryCodeGenerator<hw>::r101; using ngen::BinaryCodeGenerator<hw>::r102; using ngen::BinaryCodeGenerator<hw>::r103; \
1604 using ngen::BinaryCodeGenerator<hw>::r104; using ngen::BinaryCodeGenerator<hw>::r105; using ngen::BinaryCodeGenerator<hw>::r106; using ngen::BinaryCodeGenerator<hw>::r107; \
1605 using ngen::BinaryCodeGenerator<hw>::r108; using ngen::BinaryCodeGenerator<hw>::r109; using ngen::BinaryCodeGenerator<hw>::r110; using ngen::BinaryCodeGenerator<hw>::r111; \
1606 using ngen::BinaryCodeGenerator<hw>::r112; using ngen::BinaryCodeGenerator<hw>::r113; using ngen::BinaryCodeGenerator<hw>::r114; using ngen::BinaryCodeGenerator<hw>::r115; \
1607 using ngen::BinaryCodeGenerator<hw>::r116; using ngen::BinaryCodeGenerator<hw>::r117; using ngen::BinaryCodeGenerator<hw>::r118; using ngen::BinaryCodeGenerator<hw>::r119; \
1608 using ngen::BinaryCodeGenerator<hw>::r120; using ngen::BinaryCodeGenerator<hw>::r121; using ngen::BinaryCodeGenerator<hw>::r122; using ngen::BinaryCodeGenerator<hw>::r123; \
1609 using ngen::BinaryCodeGenerator<hw>::r124; using ngen::BinaryCodeGenerator<hw>::r125; using ngen::BinaryCodeGenerator<hw>::r126; using ngen::BinaryCodeGenerator<hw>::r127; \
1610 using ngen::BinaryCodeGenerator<hw>::null; \
1611 using ngen::BinaryCodeGenerator<hw>::a0; \
1612 using ngen::BinaryCodeGenerator<hw>::acc0; using ngen::BinaryCodeGenerator<hw>::acc1; using ngen::BinaryCodeGenerator<hw>::acc2; using ngen::BinaryCodeGenerator<hw>::acc3; \
1613 using ngen::BinaryCodeGenerator<hw>::acc4; using ngen::BinaryCodeGenerator<hw>::acc5; using ngen::BinaryCodeGenerator<hw>::acc6; using ngen::BinaryCodeGenerator<hw>::acc7; \
1614 using ngen::BinaryCodeGenerator<hw>::acc8; using ngen::BinaryCodeGenerator<hw>::acc9; \
1615 using ngen::BinaryCodeGenerator<hw>::mme0; using ngen::BinaryCodeGenerator<hw>::mme1; using ngen::BinaryCodeGenerator<hw>::mme2; using ngen::BinaryCodeGenerator<hw>::mme3; \
1616 using ngen::BinaryCodeGenerator<hw>::mme4; using ngen::BinaryCodeGenerator<hw>::mme5; using ngen::BinaryCodeGenerator<hw>::mme6; using ngen::BinaryCodeGenerator<hw>::mme7; \
1617 using ngen::BinaryCodeGenerator<hw>::noacc; using ngen::BinaryCodeGenerator<hw>::nomme; \
1618 using ngen::BinaryCodeGenerator<hw>::f0; using ngen::BinaryCodeGenerator<hw>::f1; \
1619 using ngen::BinaryCodeGenerator<hw>::ce0; using ngen::BinaryCodeGenerator<hw>::sp; using ngen::BinaryCodeGenerator<hw>::sr0; using ngen::BinaryCodeGenerator<hw>::sr1; \
1620 using ngen::BinaryCodeGenerator<hw>::cr0; using ngen::BinaryCodeGenerator<hw>::n0; using ngen::BinaryCodeGenerator<hw>::ip; using ngen::BinaryCodeGenerator<hw>::tdr0; \
1621 using ngen::BinaryCodeGenerator<hw>::tm0; using ngen::BinaryCodeGenerator<hw>::tm1; using ngen::BinaryCodeGenerator<hw>::tm2; using ngen::BinaryCodeGenerator<hw>::tm3; \
1622 using ngen::BinaryCodeGenerator<hw>::tm4; using ngen::BinaryCodeGenerator<hw>::pm0; using ngen::BinaryCodeGenerator<hw>::tp0; using ngen::BinaryCodeGenerator<hw>::dbg0; \
1623 using ngen::BinaryCodeGenerator<hw>::fc0; using ngen::BinaryCodeGenerator<hw>::fc1; using ngen::BinaryCodeGenerator<hw>::fc2; using ngen::BinaryCodeGenerator<hw>::fc3; \
1624 using ngen::BinaryCodeGenerator<hw>::NoDDClr; using ngen::BinaryCodeGenerator<hw>::NoDDChk; \
1625 using ngen::BinaryCodeGenerator<hw>::AccWrEn; using ngen::BinaryCodeGenerator<hw>::NoSrcDepSet; using ngen::BinaryCodeGenerator<hw>::Breakpoint; using ngen::BinaryCodeGenerator<hw>::sat; \
1626 using ngen::BinaryCodeGenerator<hw>::NoMask; \
1627 using ngen::BinaryCodeGenerator<hw>::Serialize; using ngen::BinaryCodeGenerator<hw>::EOT; \
1628 using ngen::BinaryCodeGenerator<hw>::Atomic; using ngen::BinaryCodeGenerator<hw>::Switch; using ngen::BinaryCodeGenerator<hw>::NoPreempt; \
1629 using ngen::BinaryCodeGenerator<hw>::anyv; using ngen::BinaryCodeGenerator<hw>::allv; using ngen::BinaryCodeGenerator<hw>::any2h; using ngen::BinaryCodeGenerator<hw>::all2h; \
1630 using ngen::BinaryCodeGenerator<hw>::any4h; using ngen::BinaryCodeGenerator<hw>::all4h; using ngen::BinaryCodeGenerator<hw>::any8h; using ngen::BinaryCodeGenerator<hw>::all8h; \
1631 using ngen::BinaryCodeGenerator<hw>::any16h; using ngen::BinaryCodeGenerator<hw>::all16h; using ngen::BinaryCodeGenerator<hw>::any32h; using ngen::BinaryCodeGenerator<hw>::all32h; \
1632 using ngen::BinaryCodeGenerator<hw>::x_repl; using ngen::BinaryCodeGenerator<hw>::y_repl; using ngen::BinaryCodeGenerator<hw>::z_repl; using ngen::BinaryCodeGenerator<hw>::w_repl; \
1633 using ngen::BinaryCodeGenerator<hw>::ze; using ngen::BinaryCodeGenerator<hw>::eq; using ngen::BinaryCodeGenerator<hw>::nz; using ngen::BinaryCodeGenerator<hw>::ne; \
1634 using ngen::BinaryCodeGenerator<hw>::gt; using ngen::BinaryCodeGenerator<hw>::ge; using ngen::BinaryCodeGenerator<hw>::lt; using ngen::BinaryCodeGenerator<hw>::le; \
1635 using ngen::BinaryCodeGenerator<hw>::ov; using ngen::BinaryCodeGenerator<hw>::un; using ngen::BinaryCodeGenerator<hw>::eo; \
1636 using ngen::BinaryCodeGenerator<hw>::M0; using ngen::BinaryCodeGenerator<hw>::M4; using ngen::BinaryCodeGenerator<hw>::M8; using ngen::BinaryCodeGenerator<hw>::M12; \
1637 using ngen::BinaryCodeGenerator<hw>::M16; using ngen::BinaryCodeGenerator<hw>::M20; using ngen::BinaryCodeGenerator<hw>::M24; using ngen::BinaryCodeGenerator<hw>::M28; \
1638 using ngen::BinaryCodeGenerator<hw>::sb0; using ngen::BinaryCodeGenerator<hw>::sb1; using ngen::BinaryCodeGenerator<hw>::sb2; using ngen::BinaryCodeGenerator<hw>::sb3; \
1639 using ngen::BinaryCodeGenerator<hw>::sb4; using ngen::BinaryCodeGenerator<hw>::sb5; using ngen::BinaryCodeGenerator<hw>::sb6; using ngen::BinaryCodeGenerator<hw>::sb7; \
1640 using ngen::BinaryCodeGenerator<hw>::sb8; using ngen::BinaryCodeGenerator<hw>::sb9; using ngen::BinaryCodeGenerator<hw>::sb10; using ngen::BinaryCodeGenerator<hw>::sb11; \
1641 using ngen::BinaryCodeGenerator<hw>::sb12; using ngen::BinaryCodeGenerator<hw>::sb13; using ngen::BinaryCodeGenerator<hw>::sb14; using ngen::BinaryCodeGenerator<hw>::sb15; \
1642 using ngen::BinaryCodeGenerator<hw>::A32; using ngen::BinaryCodeGenerator<hw>::A32NC; using ngen::BinaryCodeGenerator<hw>::A64; using ngen::BinaryCodeGenerator<hw>::A64NC; \
1643 using ngen::BinaryCodeGenerator<hw>::SLM; \
1644 template <typename... Targs> ngen::InstructionModifier ExecutionOffset(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::ExecutionOffset(std::forward<Targs>(args)...); } \
1645 template <typename... Targs> ngen::AddressBase Surface(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::Surface(std::forward<Targs>(args)...); } \
1646 template <typename... Targs> ngen::AddressBase CC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::CC(std::forward<Targs>(args)...); } \
1647 template <typename... Targs> ngen::AddressBase SC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::SC(std::forward<Targs>(args)...); }
1648 #define NGEN_FORWARD_REGISTERS_EXTRA1 \
1649 using ngen::BinaryCodeGenerator<hw>::r128; using ngen::BinaryCodeGenerator<hw>::r129; using ngen::BinaryCodeGenerator<hw>::r130; using ngen::BinaryCodeGenerator<hw>::r131; \
1650 using ngen::BinaryCodeGenerator<hw>::r132; using ngen::BinaryCodeGenerator<hw>::r133; using ngen::BinaryCodeGenerator<hw>::r134; using ngen::BinaryCodeGenerator<hw>::r135; \
1651 using ngen::BinaryCodeGenerator<hw>::r136; using ngen::BinaryCodeGenerator<hw>::r137; using ngen::BinaryCodeGenerator<hw>::r138; using ngen::BinaryCodeGenerator<hw>::r139; \
1652 using ngen::BinaryCodeGenerator<hw>::r140; using ngen::BinaryCodeGenerator<hw>::r141; using ngen::BinaryCodeGenerator<hw>::r142; using ngen::BinaryCodeGenerator<hw>::r143; \
1653 using ngen::BinaryCodeGenerator<hw>::r144; using ngen::BinaryCodeGenerator<hw>::r145; using ngen::BinaryCodeGenerator<hw>::r146; using ngen::BinaryCodeGenerator<hw>::r147; \
1654 using ngen::BinaryCodeGenerator<hw>::r148; using ngen::BinaryCodeGenerator<hw>::r149; using ngen::BinaryCodeGenerator<hw>::r150; using ngen::BinaryCodeGenerator<hw>::r151; \
1655 using ngen::BinaryCodeGenerator<hw>::r152; using ngen::BinaryCodeGenerator<hw>::r153; using ngen::BinaryCodeGenerator<hw>::r154; using ngen::BinaryCodeGenerator<hw>::r155; \
1656 using ngen::BinaryCodeGenerator<hw>::r156; using ngen::BinaryCodeGenerator<hw>::r157; using ngen::BinaryCodeGenerator<hw>::r158; using ngen::BinaryCodeGenerator<hw>::r159; \
1657 using ngen::BinaryCodeGenerator<hw>::r160; using ngen::BinaryCodeGenerator<hw>::r161; using ngen::BinaryCodeGenerator<hw>::r162; using ngen::BinaryCodeGenerator<hw>::r163; \
1658 using ngen::BinaryCodeGenerator<hw>::r164; using ngen::BinaryCodeGenerator<hw>::r165; using ngen::BinaryCodeGenerator<hw>::r166; using ngen::BinaryCodeGenerator<hw>::r167; \
1659 using ngen::BinaryCodeGenerator<hw>::r168; using ngen::BinaryCodeGenerator<hw>::r169; using ngen::BinaryCodeGenerator<hw>::r170; using ngen::BinaryCodeGenerator<hw>::r171; \
1660 using ngen::BinaryCodeGenerator<hw>::r172; using ngen::BinaryCodeGenerator<hw>::r173; using ngen::BinaryCodeGenerator<hw>::r174; using ngen::BinaryCodeGenerator<hw>::r175; \
1661 using ngen::BinaryCodeGenerator<hw>::r176; using ngen::BinaryCodeGenerator<hw>::r177; using ngen::BinaryCodeGenerator<hw>::r178; using ngen::BinaryCodeGenerator<hw>::r179; \
1662 using ngen::BinaryCodeGenerator<hw>::r180; using ngen::BinaryCodeGenerator<hw>::r181; using ngen::BinaryCodeGenerator<hw>::r182; using ngen::BinaryCodeGenerator<hw>::r183; \
1663 using ngen::BinaryCodeGenerator<hw>::r184; using ngen::BinaryCodeGenerator<hw>::r185; using ngen::BinaryCodeGenerator<hw>::r186; using ngen::BinaryCodeGenerator<hw>::r187; \
1664 using ngen::BinaryCodeGenerator<hw>::r188; using ngen::BinaryCodeGenerator<hw>::r189; using ngen::BinaryCodeGenerator<hw>::r190; using ngen::BinaryCodeGenerator<hw>::r191; \
1665 using ngen::BinaryCodeGenerator<hw>::r192; using ngen::BinaryCodeGenerator<hw>::r193; using ngen::BinaryCodeGenerator<hw>::r194; using ngen::BinaryCodeGenerator<hw>::r195; \
1666 using ngen::BinaryCodeGenerator<hw>::r196; using ngen::BinaryCodeGenerator<hw>::r197; using ngen::BinaryCodeGenerator<hw>::r198; using ngen::BinaryCodeGenerator<hw>::r199; \
1667 using ngen::BinaryCodeGenerator<hw>::r200; using ngen::BinaryCodeGenerator<hw>::r201; using ngen::BinaryCodeGenerator<hw>::r202; using ngen::BinaryCodeGenerator<hw>::r203; \
1668 using ngen::BinaryCodeGenerator<hw>::r204; using ngen::BinaryCodeGenerator<hw>::r205; using ngen::BinaryCodeGenerator<hw>::r206; using ngen::BinaryCodeGenerator<hw>::r207; \
1669 using ngen::BinaryCodeGenerator<hw>::r208; using ngen::BinaryCodeGenerator<hw>::r209; using ngen::BinaryCodeGenerator<hw>::r210; using ngen::BinaryCodeGenerator<hw>::r211; \
1670 using ngen::BinaryCodeGenerator<hw>::r212; using ngen::BinaryCodeGenerator<hw>::r213; using ngen::BinaryCodeGenerator<hw>::r214; using ngen::BinaryCodeGenerator<hw>::r215; \
1671 using ngen::BinaryCodeGenerator<hw>::r216; using ngen::BinaryCodeGenerator<hw>::r217; using ngen::BinaryCodeGenerator<hw>::r218; using ngen::BinaryCodeGenerator<hw>::r219; \
1672 using ngen::BinaryCodeGenerator<hw>::r220; using ngen::BinaryCodeGenerator<hw>::r221; using ngen::BinaryCodeGenerator<hw>::r222; using ngen::BinaryCodeGenerator<hw>::r223; \
1673 using ngen::BinaryCodeGenerator<hw>::r224; using ngen::BinaryCodeGenerator<hw>::r225; using ngen::BinaryCodeGenerator<hw>::r226; using ngen::BinaryCodeGenerator<hw>::r227; \
1674 using ngen::BinaryCodeGenerator<hw>::r228; using ngen::BinaryCodeGenerator<hw>::r229; using ngen::BinaryCodeGenerator<hw>::r230; using ngen::BinaryCodeGenerator<hw>::r231; \
1675 using ngen::BinaryCodeGenerator<hw>::r232; using ngen::BinaryCodeGenerator<hw>::r233; using ngen::BinaryCodeGenerator<hw>::r234; using ngen::BinaryCodeGenerator<hw>::r235; \
1676 using ngen::BinaryCodeGenerator<hw>::r236; using ngen::BinaryCodeGenerator<hw>::r237; using ngen::BinaryCodeGenerator<hw>::r238; using ngen::BinaryCodeGenerator<hw>::r239; \
1677 using ngen::BinaryCodeGenerator<hw>::r240; using ngen::BinaryCodeGenerator<hw>::r241; using ngen::BinaryCodeGenerator<hw>::r242; using ngen::BinaryCodeGenerator<hw>::r243; \
1678 using ngen::BinaryCodeGenerator<hw>::r244; using ngen::BinaryCodeGenerator<hw>::r245; using ngen::BinaryCodeGenerator<hw>::r246; using ngen::BinaryCodeGenerator<hw>::r247; \
1679 using ngen::BinaryCodeGenerator<hw>::r248; using ngen::BinaryCodeGenerator<hw>::r249; using ngen::BinaryCodeGenerator<hw>::r250; using ngen::BinaryCodeGenerator<hw>::r251; \
1680 using ngen::BinaryCodeGenerator<hw>::r252; using ngen::BinaryCodeGenerator<hw>::r253; using ngen::BinaryCodeGenerator<hw>::r254; using ngen::BinaryCodeGenerator<hw>::r255;
1681 #define NGEN_FORWARD_REGISTERS_EXTRA2 \
1682 using ngen::BinaryCodeGenerator<hw>::D8; using ngen::BinaryCodeGenerator<hw>::D16; using ngen::BinaryCodeGenerator<hw>::D32; using ngen::BinaryCodeGenerator<hw>::D64; \
1683 using ngen::BinaryCodeGenerator<hw>::D8U32; using ngen::BinaryCodeGenerator<hw>::D16U32; \
1684 using ngen::BinaryCodeGenerator<hw>::D8T; using ngen::BinaryCodeGenerator<hw>::D16T; using ngen::BinaryCodeGenerator<hw>::D32T; using ngen::BinaryCodeGenerator<hw>::D64T; \
1685 using ngen::BinaryCodeGenerator<hw>::D8U32T; using ngen::BinaryCodeGenerator<hw>::D16U32T; \
1686 using ngen::BinaryCodeGenerator<hw>::V1; using ngen::BinaryCodeGenerator<hw>::V2; using ngen::BinaryCodeGenerator<hw>::V3; using ngen::BinaryCodeGenerator<hw>::V4; \
1687 using ngen::BinaryCodeGenerator<hw>::V8; using ngen::BinaryCodeGenerator<hw>::V16; using ngen::BinaryCodeGenerator<hw>::V32; using ngen::BinaryCodeGenerator<hw>::V64; \
1688 using ngen::BinaryCodeGenerator<hw>::V1T; using ngen::BinaryCodeGenerator<hw>::V2T; using ngen::BinaryCodeGenerator<hw>::V3T; using ngen::BinaryCodeGenerator<hw>::V4T; \
1689 using ngen::BinaryCodeGenerator<hw>::V8T; using ngen::BinaryCodeGenerator<hw>::V16T; using ngen::BinaryCodeGenerator<hw>::V32T; using ngen::BinaryCodeGenerator<hw>::V64T; \
1690 using ngen::BinaryCodeGenerator<hw>::transpose; \
1691 using ngen::BinaryCodeGenerator<hw>::L1UC_L3UC; using ngen::BinaryCodeGenerator<hw>::L1UC_L3C; using ngen::BinaryCodeGenerator<hw>::L1C_L3UC; using ngen::BinaryCodeGenerator<hw>::L1C_L3C; \
1692 using ngen::BinaryCodeGenerator<hw>::L1S_L3UC; using ngen::BinaryCodeGenerator<hw>::L1S_L3C; using ngen::BinaryCodeGenerator<hw>::L1IAR_L3C; using ngen::BinaryCodeGenerator<hw>::L1UC_L3WB; \
1693 using ngen::BinaryCodeGenerator<hw>::L1WT_L3UC; using ngen::BinaryCodeGenerator<hw>::L1WT_L3WB; using ngen::BinaryCodeGenerator<hw>::L1S_L3WB; using ngen::BinaryCodeGenerator<hw>::L1WB_L3WB;
1694 #define NGEN_FORWARD_REGISTERS_EXTRA3 \
1695 using ngen::BinaryCodeGenerator<hw>::any; using ngen::BinaryCodeGenerator<hw>::all; \
1696 using ngen::BinaryCodeGenerator<hw>::f2; using ngen::BinaryCodeGenerator<hw>::f3; \
1697 using ngen::BinaryCodeGenerator<hw>::sb16; using ngen::BinaryCodeGenerator<hw>::sb17; using ngen::BinaryCodeGenerator<hw>::sb18; using ngen::BinaryCodeGenerator<hw>::sb19; \
1698 using ngen::BinaryCodeGenerator<hw>::sb20; using ngen::BinaryCodeGenerator<hw>::sb21; using ngen::BinaryCodeGenerator<hw>::sb22; using ngen::BinaryCodeGenerator<hw>::sb23; \
1699 using ngen::BinaryCodeGenerator<hw>::sb24; using ngen::BinaryCodeGenerator<hw>::sb25; using ngen::BinaryCodeGenerator<hw>::sb26; using ngen::BinaryCodeGenerator<hw>::sb27; \
1700 using ngen::BinaryCodeGenerator<hw>::sb28; using ngen::BinaryCodeGenerator<hw>::sb29; using ngen::BinaryCodeGenerator<hw>::sb30; using ngen::BinaryCodeGenerator<hw>::sb31; \
1701 using ngen::BinaryCodeGenerator<hw>::vnni;
1702 #define NGEN_FORWARD_REGISTERS NGEN_FORWARD_REGISTERS_BASE NGEN_FORWARD_REGISTERS_EXTRA1 NGEN_FORWARD_REGISTERS_EXTRA2 NGEN_FORWARD_REGISTERS_EXTRA3
1703 #endif
1704 
1705 template <HW hw>
unsupported()1706 inline void BinaryCodeGenerator<hw>::unsupported()
1707 {
1708 #ifdef NGEN_SAFE
1709     throw unsupported_instruction();
1710 #endif
1711 }
1712 
1713 template <HW hw>
popStream()1714 typename BinaryCodeGenerator<hw>::InstructionStream *BinaryCodeGenerator<hw>::popStream()
1715 {
1716 #ifdef NGEN_SAFE
1717     if (streamStack.size() <= 1) throw stream_stack_underflow();
1718 #endif
1719 
1720     InstructionStream *result = streamStack.back();
1721     streamStack.pop_back();
1722     return result;
1723 }
1724 
1725 template <HW hw>
encodeSyncInsertion(autoswsb::SyncInsertion & si)1726 static inline Instruction12 encodeSyncInsertion(autoswsb::SyncInsertion &si)
1727 {
1728     Instruction12 i;
1729 
1730     i.common.opcode = static_cast<int>(Opcode::sync);
1731     i.common.swsb = (hw >= HW::XeHPC) ? SWSBInfoXeHPC(si.swsb, Opcode::sync).raw()
1732                                       :   SWSBInfo12(si.swsb, Opcode::sync).raw();
1733     i.common.maskCtrl = true;
1734     i.binary.cmod = static_cast<int>(si.fc);
1735 
1736     if (si.mask) {
1737         i.binary.src0Type = getTypecode12(DataType::ud);
1738         i.binary.src0Imm = true;
1739         i.imm32.value = si.mask;
1740     }
1741     i.binary.dst = 1;
1742 
1743     return i;
1744 }
1745 
1746 template <HW hw>
getCode()1747 std::vector<uint8_t> BinaryCodeGenerator<hw>::getCode()
1748 {
1749 #ifdef NGEN_SAFE
1750     if (streamStack.size() > 1) throw unfinished_stream_exception();
1751 #endif
1752     rootStream.fixLabels(labelManager);
1753 
1754     Program program(rootStream);
1755     autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hw, program);
1756     std::vector<uint8_t> result;
1757 
1758     if (analysis.empty()) {
1759         result.resize(rootStream.length());
1760         std::memmove(result.data(), rootStream.code.data(), rootStream.length());
1761     } else {
1762         std::multimap<int32_t, autoswsb::SyncInsertion*> syncs;
1763 
1764         for (auto &bb : analysis)
1765             for (auto &sync : bb.syncs)
1766                 syncs.insert(std::make_pair(sync.inum, &sync));
1767 
1768         result.resize(rootStream.length() + syncs.size() * sizeof(Instruction12));
1769 
1770         auto *psrc = reinterpret_cast<const Instruction12 *>(rootStream.code.data());
1771         auto *pdst = reinterpret_cast<Instruction12 *>(result.data());
1772         auto nextSync = syncs.begin();
1773 
1774         for (uint32_t isrc = 0; isrc < program.size(); isrc++, psrc++) {
1775             if (psrc->opcode() == Opcode::wrdep)
1776                 continue;
1777             while ((nextSync != syncs.end()) && (nextSync->second->inum == isrc))
1778                 *pdst++ = encodeSyncInsertion<hw>(*(nextSync++)->second);
1779             *pdst++ = *psrc;
1780         }
1781 
1782         result.resize(reinterpret_cast<uint8_t *>(pdst) - result.data());
1783     }
1784 
1785     return result;
1786 }
1787 
1788 template <HW hw>
1789 template <bool forceWE, typename D, typename S0, HW hw_>
1790 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1791 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1792 {
1793     Instruction8 i{};
1794     InstructionModifier emod = mod | defaultModifier;
1795     if (forceWE)
1796         emod |= NoMask;
1797 
1798     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1799     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1800 
1801     encodeCommon8(i, op, emod);
1802     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1803 
1804     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1805     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1806 
1807     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1808     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1809 
1810     i.binary.dstType = getTypecode<hw>(dst.getType());
1811     i.binary.src0Type = getTypecode<hw>(src0.getType());
1812 
1813     i.binary.dstRegFile = getRegFile(dst);
1814     i.binary.src0RegFile = getRegFile(src0);
1815 
1816     db(i);
1817 }
1818 
1819 template <HW hw>
1820 template <bool forceWE, typename D, typename S0, HW hw_>
1821 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1822 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1823 {
1824     typename EncodingTag12Dispatch<hw>::tag tag;
1825     Instruction12 i{};
1826 
1827     InstructionModifier emod = mod | defaultModifier;
1828     if (forceWE)
1829         emod |= NoMask;
1830 
1831     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1832     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1833 
1834     encodeCommon12(i, op, emod, dst, tag);
1835 
1836     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
1837     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1838 
1839     i.binary.dstAddrMode = dst.isIndirect();
1840     i.binary.dstType  = getTypecode12(dst.getType());
1841     i.binary.src0Type = getTypecode12(src0.getType());
1842 
1843     i.binary.src0Mods = src0.getMods();
1844 
1845     i.binary.cmod = static_cast<int>(mod.getCMod());
1846 
1847     db(i);
1848 }
1849 
1850 template <HW hw>
1851 template <bool forceWE, typename D, HW hw_>
1852 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1853 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1854 {
1855     Instruction8 i{};
1856     InstructionModifier emod = mod | defaultModifier;
1857     if (forceWE)
1858         emod |= NoMask;
1859 
1860     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1861     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1862 
1863     encodeCommon8(i, op, emod);
1864     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1865 
1866     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1867 
1868     i.binary.dstType = getTypecode<hw>(dst.getType());
1869     i.binary.src0Type = getImmediateTypecode<hw>(src0.getType());
1870 
1871     i.binary.dstRegFile = getRegFile(dst);
1872     i.binary.src0RegFile = getRegFile(src0);
1873 
1874     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1875 
1876     if (getBytes(src0.getType()) == 8)
1877         i.imm64.value = static_cast<uint64_t>(src0);
1878     else
1879         i.imm32.value = static_cast<uint64_t>(src0);
1880 
1881     db(i);
1882 }
1883 
1884 template <HW hw>
1885 template <bool forceWE, typename D, HW hw_>
1886 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1887 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1888 {
1889     typename EncodingTag12Dispatch<hw>::tag tag;
1890     Instruction12 i{};
1891 
1892     InstructionModifier emod = mod | defaultModifier;
1893     if (forceWE)
1894         emod |= NoMask;
1895 
1896     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1897     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1898 
1899     encodeCommon12(i, op, emod, dst, tag);
1900 
1901     i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1902 
1903     i.binary.dstAddrMode = dst.isIndirect();
1904 
1905     i.binary.dstType  = getTypecode12(dst.getType());
1906     i.binary.src0Type = getTypecode12(src0.getType());
1907 
1908     i.binary.src0Imm = true;
1909 
1910     i.binary.cmod = static_cast<int>(mod.getCMod());
1911 
1912     auto val = static_cast<uint64_t>(src0);
1913     i.imm32.value = val;
1914     if (getBytes(src0.getType()) == 8) {
1915 #ifdef NGEN_SAFE
1916         if (mod.getCMod() != ConditionModifier::none) throw invalid_modifiers_exception();
1917 #endif
1918         i.imm64.high = val >> 32;
1919     }
1920 
1921     db(i);
1922 }
1923 
1924 template <HW hw>
1925 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1926 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1927 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1928 {
1929     Instruction8 i{};
1930 
1931     InstructionModifier emod = mod | defaultModifier;
1932     if (forceWE)
1933         emod |= NoMask;
1934 
1935     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1936     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1937     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1938 
1939     encodeCommon8(i, op, emod);
1940     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1941 
1942     i.binary.dst  = encodeBinaryOperand8<true>(dst).bits;
1943     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1944     i.binary.src1 = encodeBinaryOperand8<false>(src1).bits;
1945 
1946     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1947     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1948     if (src1.isIndirect()) i.binary.src1AddrImm9 = src1.getOffset() >> 9;
1949 
1950     i.binary.dstType  = getTypecode<hw>(dst.getType());
1951     i.binary.src0Type = getTypecode<hw>(src0.getType());
1952     i.binary.src1Type = getTypecode<hw>(src1.getType());
1953 
1954     i.binary.dstRegFile = getRegFile(dst);
1955     i.binary.src0RegFile = getRegFile(src0);
1956     i.binary.src1RegFile = getRegFile(src1);
1957 
1958 #ifdef NGEN_SAFE
1959     if (src1.isARF() && op != Opcode::illegal && op != Opcode::movi) throw grf_expected_exception();
1960 #endif
1961 
1962     db(i);
1963 }
1964 
1965 template <HW hw>
1966 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1967 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1968 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1969 {
1970     typename EncodingTag12Dispatch<hw>::tag tag;
1971     Instruction12 i{};
1972 
1973     InstructionModifier emod = mod | defaultModifier;
1974     if (forceWE)
1975         emod |= NoMask;
1976 
1977     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1978     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1979     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1980 
1981     encodeCommon12(i, op, emod, dst, tag);
1982 
1983     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
1984     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1985     i.binary.src1 = encodeBinaryOperand12<false>(src1, tag).bits;
1986 
1987     i.binary.dstAddrMode = dst.isIndirect();
1988     i.binary.dstType  = getTypecode12(dst.getType());
1989     i.binary.src0Type = getTypecode12(src0.getType());
1990     i.binary.src1Type = getTypecode12(src1.getType());
1991 
1992     i.binary.src0Mods = src0.getMods();
1993     i.binary.src1Mods = src1.getMods();
1994 
1995     i.binary.cmod = static_cast<int>(mod.getCMod());
1996 
1997     db(i);
1998 }
1999 
2000 template <HW hw>
2001 template <bool forceWE, typename D, typename S0, HW hw_>
2002 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)2003 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
2004 {
2005     Instruction8 i{};
2006     InstructionModifier emod = mod | defaultModifier;
2007     if (forceWE)
2008         emod |= NoMask;
2009 
2010     dst.fixup(emod.getExecSize(), defaultType, true, 2);
2011     src0.fixup(emod.getExecSize(), defaultType, false, 2);
2012     src1.fixup(emod.getExecSize(), defaultType, false, 2);
2013 
2014     encodeCommon8(i, op, emod);
2015     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
2016 
2017     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2018     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2019 
2020     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
2021     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
2022 
2023     i.binary.dstType = getTypecode<hw>(dst.getType());
2024     i.binary.src0Type = getTypecode<hw>(src0.getType());
2025     i.binary.src1Type = getImmediateTypecode<hw>(src1.getType());
2026 
2027     i.binary.dstRegFile = getRegFile(dst);
2028     i.binary.src0RegFile = getRegFile(src0);
2029     i.binary.src1RegFile = getRegFile(src1);
2030 
2031     i.imm32.value = static_cast<uint64_t>(src1);
2032 
2033     db(i);
2034 }
2035 
2036 template <HW hw>
2037 template <bool forceWE, typename D, typename S0, HW hw_>
2038 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)2039 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
2040 {
2041     typename EncodingTag12Dispatch<hw>::tag tag;
2042     Instruction12 i{};
2043 
2044     InstructionModifier emod = mod | defaultModifier;
2045     if (forceWE)
2046         emod |= NoMask;
2047 
2048     dst.fixup(emod.getExecSize(), defaultType, true, 2);
2049     src0.fixup(emod.getExecSize(), defaultType, false, 2);
2050     src1.fixup(emod.getExecSize(), defaultType, false, 2);
2051 
2052     encodeCommon12(i, op, emod, dst, tag);
2053 
2054     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
2055     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2056     i.binary.src1 = static_cast<uint64_t>(src1);
2057 
2058     i.binary.dstAddrMode = dst.isIndirect();
2059     i.binary.dstType  = getTypecode12(dst.getType());
2060     i.binary.src0Type = getTypecode12(src0.getType());
2061     i.binary.src1Type = getTypecode12(src1.getType());
2062 
2063     i.binary.src0Mods = src0.getMods();
2064 
2065     i.binary.cmod = static_cast<int>(mod.getCMod());
2066 
2067     i.binary.src1Imm = true;
2068     i.imm32.value = static_cast<uint64_t>(src1);
2069 
2070     db(i);
2071 }
2072 
2073 template <HW hw>
2074 template <HW hw_>
2075 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,RegData dst,RegData src0,RegData src1,RegData src2)2076 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2)
2077 {
2078     opX(op, defaultType, mod, emulateAlign16Dst(dst),  emulateAlign16Src(src0),
2079                               emulateAlign16Src(src1), emulateAlign16Src(src2));
2080 }
2081 
2082 
2083 template <HW hw>
2084 template <HW hw_>
2085 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,Align16Operand dst,Align16Operand src0,Align16Operand src1,Align16Operand src2)2086 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2)
2087 {
2088 #ifdef NGEN_SAFE
2089     if (dst.getReg().isARF())  throw grf_expected_exception();
2090     if (src0.getReg().isARF()) throw grf_expected_exception();
2091     if (src1.getReg().isARF()) throw grf_expected_exception();
2092     if (src2.getReg().isARF()) throw grf_expected_exception();
2093 #endif
2094 
2095     Instruction8 i{};
2096     InstructionModifier emod = mod | defaultModifier | Align16;
2097 
2098     dst.getReg().fixup(emod.getExecSize(), defaultType, true, 3);
2099     src0.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
2100     src1.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
2101     src2.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
2102 
2103     encodeCommon8(i, op, emod);
2104 
2105     i.ternary16.dstChanEn = dst.getChanEn();
2106     i.ternary16.dstRegNum = dst.getReg().getBase();
2107     i.ternary16.dstSubregNum2_4 = dst.getReg().getByteOffset() >> 2;
2108     i.ternary16.dstType = getTernary16Typecode8(dst.getReg().getType());
2109 
2110     i.ternary16.srcType = getTernary16Typecode8(src0.getReg().getType());
2111 
2112     bool isFOrHF = (src0.getReg().getType() == DataType::f
2113                  || src0.getReg().getType() == DataType::hf);
2114 
2115     i.ternary16.src1Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
2116     i.ternary16.src2Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
2117 
2118     encodeTernaryCommon8(i, src0, src1, src2);
2119 
2120     db(i);
2121 }
2122 
2123 template <HW hw>
2124 template <typename D, typename S0, typename S1, typename S2, HW hw_>
2125 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)2126 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
2127 {
2128     if (hw < HW::Gen10)
2129         unsupported();
2130 
2131 #ifdef NGEN_SAFE
2132     if (src0.isARF()) throw grf_expected_exception();
2133     if (src2.isARF()) throw grf_expected_exception();
2134 #endif
2135 
2136     Instruction8 i{};
2137     InstructionModifier emod = mod | defaultModifier;
2138 
2139     dst.fixup(emod.getExecSize(), defaultType, true, 3);
2140     src0.fixup(emod.getExecSize(), defaultType, false, 3);
2141     src1.fixup(emod.getExecSize(), defaultType, false, 3);
2142     src2.fixup(emod.getExecSize(), defaultType, false, 3);
2143 
2144     encodeCommon8(i, op, emod);
2145 
2146     i.ternary1.src0RegFile = std::is_base_of<Immediate, S0>::value;
2147     i.ternary1.src1RegFile = src1.isARF();
2148     i.ternary1.src2RegFile = std::is_base_of<Immediate, S2>::value;
2149 
2150     encodeTernaryCommon8(i, src0, src1, src2);
2151     encodeTernary1Dst10(i, dst);
2152 
2153     db(i);
2154 }
2155 
2156 template <HW hw>
2157 template <typename D, typename S0,typename S1, typename S2, HW hw_>
2158 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)2159 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
2160 {
2161     typename EncodingTag12Dispatch<hw>::tag tag;
2162     Instruction12 i{};
2163     InstructionModifier emod = mod | defaultModifier;
2164 
2165     dst.fixup(emod.getExecSize(), defaultType, true, 3);
2166     src0.fixup(emod.getExecSize(), defaultType, false, 3);
2167     src1.fixup(emod.getExecSize(), defaultType, false, 3);
2168     src2.fixup(emod.getExecSize(), defaultType, false, 3);
2169 
2170     encodeCommon12(i, op, emod, dst, tag);
2171 
2172     i.ternary.dst  = encodeTernaryOperand12<true>(dst, tag).bits;
2173     encodeTernarySrc0(i, src0, tag);
2174     encodeTernarySrc1(i, src1, tag);
2175     encodeTernarySrc2(i, src2, tag);
2176     encodeTernaryTypes(i, dst, src0, src1, src2);
2177 
2178     i.ternary.cmod = static_cast<int>(mod.getCMod());
2179 
2180     db(i);
2181 }
2182 
2183 template <HW hw>
2184 template <typename DS0>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0)2185 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0)
2186 {
2187     InstructionModifier mmod = mod;
2188 
2189     mmod.setCMod(static_cast<ConditionModifier>(fc));
2190     opX(op, defaultType, mmod, dst, src0);
2191 }
2192 
2193 template <HW hw>
2194 template <typename DS0, typename S1>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0,S1 src1)2195 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1)
2196 {
2197     InstructionModifier mmod = mod;
2198 
2199     mmod.setCMod(static_cast<ConditionModifier>(fc));
2200     opX(op, defaultType, mmod, dst, src0, src1);
2201 }
2202 
2203 template <HW hw>
2204 template <typename D, typename S0, typename S2>
opBfn(Opcode op,DataType defaultType,const InstructionModifier & mod,int bfnCtrl,D dst,S0 src0,RegData src1,S2 src2)2205 void BinaryCodeGenerator<hw>::opBfn(Opcode op, DataType defaultType, const InstructionModifier &mod, int bfnCtrl, D dst, S0 src0, RegData src1, S2 src2)
2206 {
2207     if (hw < HW::XeHP)
2208         unsupported();
2209 
2210     typename EncodingTag12Dispatch<hw>::tag tag;
2211     Instruction12 i{};
2212     InstructionModifier emod = mod | defaultModifier;
2213 
2214     dst.fixup(emod.getExecSize(), defaultType, true, 3);
2215     src0.fixup(emod.getExecSize(), defaultType, false, 3);
2216     src1.fixup(emod.getExecSize(), defaultType, false, 3);
2217     src2.fixup(emod.getExecSize(), defaultType, false, 3);
2218 
2219     encodeCommon12(i, op, emod, dst, tag);
2220 
2221     i.ternary.dst  = encodeTernaryOperand12<true>(dst, tag).bits;
2222     encodeTernarySrc0(i, src0, tag);
2223     encodeTernarySrc1(i, src1, tag);
2224     encodeTernarySrc2(i, src2, tag);
2225     encodeTernaryTypes(i, dst, src0, src1, src2);
2226 
2227     i.ternary.cmod = static_cast<int>(mod.getCMod());
2228 
2229     i.bfn.bfnCtrl03 = (bfnCtrl >> 0);
2230     i.bfn.bfnCtrl47 = (bfnCtrl >> 4);
2231 
2232     db(i);
2233 }
2234 
2235 template <HW hw>
opDpas(Opcode op,DataType defaultType,const InstructionModifier & mod,int sdepth,int rcount,RegData dst,RegData src0,RegData src1,RegData src2)2236 void BinaryCodeGenerator<hw>::opDpas(Opcode op, DataType defaultType, const InstructionModifier &mod, int sdepth, int rcount, RegData dst, RegData src0, RegData src1, RegData src2)
2237 {
2238     if (hw < HW::XeHP)
2239         unsupported();
2240 
2241     typename EncodingTag12Dispatch<hw>::tag tag;
2242     Instruction12 i{};
2243     InstructionModifier emod = mod | defaultModifier;
2244 
2245     dst.fixup(emod.getExecSize(), defaultType, true, 3);
2246     src0.fixup(emod.getExecSize(), defaultType, false, 3);
2247     src1.fixup(emod.getExecSize(), defaultType, false, 3);
2248     src2.fixup(emod.getExecSize(), defaultType, false, 3);
2249 
2250     encodeCommon12(i, op, emod, dst, tag);
2251 
2252     i.ternary.dst  = encodeTernaryOperand12<true,  false>(dst,  tag).bits;
2253     i.ternary.src0 = encodeTernaryOperand12<false, false>(src0, tag).bits;
2254     i.ternary.src1 = encodeTernaryOperand12<false, false>(src1, tag).bits;
2255     i.ternary.src2 = encodeTernaryOperand12<false, false>(src2, tag).bits;
2256 
2257     encodeTernaryTypes(i, dst, src0, src1, src2);
2258 
2259     i.dpas.rcount = rcount - 1;
2260     i.dpas.sdepth = utils::log2(sdepth);
2261 
2262     // i.dpas.src1SubBytePrecision = 0;     // TODO: 0 -> (none), 1 -> u4/s4, 2 -> u2/s2
2263     // i.dpas.src2SubBytePrecision = 0;
2264 
2265     i.ternary.cmod = static_cast<int>(mod.getCMod());
2266 
2267     db(i);
2268 }
2269 
2270 template <HW hw>
2271 template <typename D, HW hw_>
2272 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)2273 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2274 {
2275     exdesc |= uint32_t(static_cast<uint8_t>(sfid));
2276     opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
2277 }
2278 
2279 template <HW hw>
2280 template <typename D, HW hw_>
2281 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,D desc)2282 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc)
2283 {
2284     opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
2285 }
2286 
2287 template <HW hw>
2288 template <typename ED, typename D, HW hw_>
2289 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)2290 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2291 {
2292     typename EncodingTag12Dispatch<hw>::tag tag;
2293     Instruction12 i{};
2294     InstructionModifier emod = mod | defaultModifier;
2295 
2296     encodeCommon12(i, op, emod, dst, tag);
2297 
2298     i.send.fusionCtrl = emod.isSerialized();
2299 
2300     i.send.dstReg = dst.getBase();
2301     i.send.src0Reg = src0.getBase();
2302     i.send.src1Reg = src1.getBase();
2303 
2304     i.send.dstRegFile = getRegFile(dst);
2305     i.send.src0RegFile = getRegFile(src0);
2306     i.send.src1RegFile = getRegFile(src1);
2307 
2308     i.send.sfid = static_cast<int>(sfid) & 0xF;
2309 
2310     encodeSendDesc(i, desc);
2311     encodeSendExDesc(i, exdesc);
2312 
2313     db(i);
2314 }
2315 
2316 template <HW hw>
2317 template <HW hw_>
2318 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)2319 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc)
2320 {
2321     Instruction8 i{};
2322     InstructionModifier emod = mod | defaultModifier;
2323 
2324     encodeCommon8(i, op, emod);
2325 
2326     i.binary.dst  = encodeBinaryOperand8<true>(dst).bits;
2327     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2328 
2329     i.sendsGen9.dstRegFile = getRegFile(dst);
2330     i.binary.src0RegFile = getRegFile(src0);
2331     i.binary.src1RegFile = RegFileIMM;
2332 
2333     i.binary.dstType = getTypecode<hw>(dst.getType());
2334 
2335     i.sendsGen9.sfid = exdesc & 0xF;
2336     i.sendGen8.zero = 0;
2337     i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2338     i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2339     i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2340     i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2341     i.sendsGen9.desc = desc;
2342 
2343     i.sendsGen9.eot = (exdesc >> 5) & 1;
2344     if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2345 
2346     db(i);
2347 }
2348 
2349 template <HW hw>
2350 template <HW hw_>
2351 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)2352 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc)
2353 {
2354 #ifdef NGEN_SAFE
2355     // Only a0.0:ud is allowed for desc.
2356     if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
2357         throw invalid_arf_exception();
2358 #endif
2359     Instruction8 i{};
2360     InstructionModifier emod = mod | defaultModifier;
2361 
2362     encodeCommon8(i, op, emod);
2363 
2364     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2365     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2366     i.binary.src1 = encodeBinaryOperand8<false>(desc).bits;
2367 
2368     i.sendsGen9.dstRegFile = getRegFile(dst);
2369     i.binary.src0RegFile = getRegFile(src0);
2370     i.binary.src1RegFile = getRegFile(desc);
2371     i.binary.src1Type = getTypecode<hw>(desc.getType());
2372 
2373     i.sendsGen9.sfid = exdesc & 0xF;
2374     i.sendGen8.zero = 0;
2375     i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2376     i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2377     i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2378     i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2379 
2380     i.sendsGen9.eot = (exdesc >> 5) & 1;
2381     if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2382 
2383     db(i);
2384 }
2385 
2386 template <HW hw>
2387 template <typename D, HW hw_>
2388 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,D desc)2389 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc)
2390 {
2391     opSends(op, mod, dst, src0, null, exdesc, desc);
2392 }
2393 
2394 template <HW hw>
2395 template <typename ED, typename D, HW hw_>
2396 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)2397 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2398 {
2399     Instruction8 i{};
2400     InstructionModifier emod = mod | defaultModifier;
2401 
2402     encodeCommon8(i, op, emod);
2403 
2404     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2405     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2406 
2407     i.binary.src0RegFile = 0;                   // ?
2408     i.sendsGen9.dstRegFile = getRegFile(dst);
2409     i.sendsGen9.src1RegFile = getRegFile(src1);
2410     i.sendsGen9.src1RegNum = src1.getBase();
2411 
2412     if (dst.isIndirect())  i.sendsGen9.dstAddrImm9  =  dst.getOffset() >> 9;
2413     if (src0.isIndirect()) i.sendsGen9.src0AddrImm9 = src0.getOffset() >> 9;
2414 
2415     encodeSendsDesc(i, desc);
2416     encodeSendsExDesc(i, exdesc);
2417 
2418     db(i);
2419 }
2420 
2421 template <HW hw>
2422 template <typename D, HW hw_>
2423 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,RegData exdesc,D desc)2424 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc)
2425 {
2426 #ifdef NGEN_SAFE
2427     throw sfid_needed_exception();
2428 #endif
2429 }
2430 
2431 template <HW hw>
2432 template <typename D, HW hw_>
2433 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)2434 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2435 {
2436     Opcode mop = static_cast<Opcode>(static_cast<int>(op) & ~2);
2437     opSend(mop, mod, static_cast<SharedFunction>(exdesc & 0x1F), dst, src0, src1, exdesc, desc);
2438 }
2439 
2440 template <HW hw>
2441 template <HW hw_>
2442 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2443 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2444 {
2445     Instruction8 i{};
2446     InstructionModifier emod = mod | defaultModifier;
2447 
2448     encodeCommon8(i, op, emod);
2449 
2450     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2451     i.binary.dstRegFile = getRegFile(dst);
2452     i.binary.dstType = getTypecode<hw>(dst.getType());
2453     i.binary.src0RegFile = getRegFile(Immediate());
2454     i.binary.src0Type = getTypecode<hw>(DataType::d);
2455     i.branches.jip = jip;
2456     i.branches.uip = uip;
2457 
2458     db(i);
2459 }
2460 
2461 template <HW hw>
2462 template <HW hw_>
2463 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2464 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2465 {
2466     typename EncodingTag12Dispatch<hw>::tag tag;
2467     Instruction12 i{};
2468     InstructionModifier emod = mod | defaultModifier;
2469 
2470     encodeCommon12(i, op, emod, dst, tag);
2471 
2472     i.branches.branchCtrl = emod.getBranchCtrl();
2473 
2474     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2475 
2476     i.binary.src0Imm = true;
2477     i.binary.src1Imm = true;
2478 
2479     i.branches.jip = jip;
2480     i.branches.uip = uip;
2481 
2482     db(i);
2483 }
2484 
2485 template <HW hw>
2486 template <bool forceWE, HW hw_>
2487 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2488 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2489 {
2490     Instruction8 i{};
2491     InstructionModifier emod = mod | defaultModifier;
2492     if (forceWE)
2493         emod |= NoMask;
2494 
2495     encodeCommon8(i, op, emod);
2496 
2497     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2498     i.binary.dstRegFile = getRegFile(dst);
2499     i.binary.dstType = getTypecode<hw>(dst.getType());
2500     i.binary.src1RegFile = RegFileIMM;
2501     i.binary.src1Type = getTypecode<hw>(DataType::d);
2502     i.branches.jip = jip;
2503 
2504     db(i);
2505 }
2506 
2507 template <HW hw>
2508 template <bool forceWE, HW hw_>
2509 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2510 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2511 {
2512     typename EncodingTag12Dispatch<hw>::tag tag;
2513     Instruction12 i{};
2514     InstructionModifier emod = mod | defaultModifier;
2515     if (forceWE)
2516         emod |= NoMask;
2517 
2518     encodeCommon12(i, op, emod, dst, tag);
2519 
2520     i.branches.branchCtrl = emod.getBranchCtrl();
2521 
2522     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2523     i.binary.src0Imm = true;
2524     i.branches.jip = jip;
2525 
2526     db(i);
2527 }
2528 
2529 template <HW hw>
2530 template <bool forceWE, bool small12, HW hw_>
2531 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2532 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2533 {
2534     Instruction8 i{};
2535     InstructionModifier emod = mod | defaultModifier;
2536     if (forceWE)
2537         emod |= NoMask;
2538 
2539     encodeCommon8(i, op, emod);
2540 
2541     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2542     i.binary.dstRegFile = getRegFile(dst);
2543     i.binary.dstType = getTypecode<hw>(DataType::d);
2544     i.binary.src0RegFile = getRegFile(src0);
2545     i.binary.src0Type = getTypecode<hw>(DataType::d);
2546     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2547 
2548     db(i);
2549 }
2550 
2551 template <HW hw>
2552 template <bool forceWE, bool small12, HW hw_>
2553 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2554 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2555 {
2556     typename EncodingTag12Dispatch<hw>::tag tag;
2557     Instruction12 i{};
2558     InstructionModifier emod = mod | defaultModifier;
2559     if (forceWE)
2560         emod |= NoMask;
2561 
2562     encodeCommon12(i, op, emod, dst, tag);
2563 
2564     i.branches.branchCtrl = emod.getBranchCtrl();
2565 
2566     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2567     i.binary.src0 = encodeBinaryOperand12<false, false>(src0, tag).bits;
2568     if (small12)
2569         i.binary.src0 &= 0xFFFF;
2570 
2571     db(i);
2572 }
2573 
2574 template <HW hw>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip,Label & uip)2575 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip)
2576 {
2577     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2578     addFixup(LabelFixup(uip.getID(labelManager), LabelFixup::UIPOffset));
2579     opBranch(op, mod, dst, 0, 0);
2580 }
2581 
2582 template <HW hw>
2583 template <bool forceWE>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2584 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2585 {
2586     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2587     opBranch<forceWE>(op, mod, dst, 0);
2588 }
2589 
2590 template <HW hw>
opCall(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2591 void BinaryCodeGenerator<hw>::opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2592 {
2593     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2594     if (isGen12)
2595         opBranch<true>(op, mod, dst, 0);
2596     else
2597         opX<true>(op, DataType::d, mod, dst, null.ud(0)(0, 1, 0), Immediate::d(0));
2598 }
2599 
2600 template <HW hw>
2601 template <HW hw_>
2602 typename std::enable_if<hwLT(hw_, HW::Gen12LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2603 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2604 {
2605     Instruction8 i{};
2606     InstructionModifier emod = mod | defaultModifier | NoMask;
2607 
2608     encodeCommon8(i, op, emod);
2609 
2610     src0.fixup(emod.getExecSize(), DataType::d, false, 2);
2611 
2612     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2613     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2614     i.binary.src0RegFile = getRegFile(src0);
2615     i.binary.src1RegFile = RegFileIMM;
2616     i.binary.src1Type = getTypecode<hw>(DataType::d);
2617 
2618     i.branches.jip = jip;
2619 
2620     db(i);
2621 }
2622 
2623 template <HW hw>
2624 template <HW hw_>
2625 typename std::enable_if<hwGE(hw_, HW::Gen12LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2626 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2627 {
2628     opBranch<true>(op, mod, dst, jip);
2629 }
2630 
2631 template <HW hw>
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,Label & jip)2632 void BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip)
2633 {
2634     if (hw >= HW::Gen12LP)
2635         addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2636     opJmpi(op, mod, dst, src0, 0);
2637     if (hw < HW::Gen12LP)
2638         addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffsetJMPI));
2639 }
2640 
2641 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod)2642 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod)
2643 {
2644     if (hw < HW::Gen12LP)
2645         unsupported();
2646 
2647     typename EncodingTag12Dispatch<hw>::tag tag;
2648     Instruction12 i{};
2649     InstructionModifier emod = mod | defaultModifier;
2650 
2651     encodeCommon12(i, op, emod, null, tag);
2652 
2653     i.binary.dst = 0x1;
2654     i.binary.cmod = static_cast<int>(fc);
2655 
2656     db(i);
2657 }
2658 
2659 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,RegData src0)2660 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0)
2661 {
2662     typename EncodingTag12Dispatch<hw>::tag tag;
2663     if (hw < HW::Gen12LP)
2664         unsupported();
2665 
2666     Instruction12 i{};
2667     InstructionModifier emod = mod | defaultModifier;
2668 
2669     encodeCommon12(i, op, emod, null, tag);
2670 
2671     i.binary.dst = 0x1;
2672     if (!src0.isNull()) {
2673         src0.setRegion(0, 1, 0);
2674         i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2675         i.binary.src0Type = getTypecode12(src0.getType());
2676     }
2677     i.binary.cmod = static_cast<int>(fc);
2678 
2679     db(i);
2680 }
2681 
2682 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,const Immediate & src0)2683 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0)
2684 {
2685     if (hw < HW::Gen12LP)
2686         unsupported();
2687 
2688     typename EncodingTag12Dispatch<hw>::tag tag;
2689     Instruction12 i{};
2690     InstructionModifier emod = mod | defaultModifier;
2691 
2692     encodeCommon12(i, op, emod, null, tag);
2693 
2694     i.binary.dst = 0x1;
2695     i.binary.src0Type = getTypecode12(src0.getType());
2696     i.binary.src0Imm = true;
2697     i.binary.cmod = static_cast<int>(fc);
2698 
2699     i.imm32.value = static_cast<uint64_t>(src0);
2700 
2701     db(i);
2702 }
2703 
2704 template <HW hw>
opNop(Opcode op)2705 void BinaryCodeGenerator<hw>::opNop(Opcode op)
2706 {
2707     Instruction8 i{};
2708 
2709     i.qword[0] = static_cast<int>(op);
2710     i.qword[1] = 0;
2711 
2712     db(i);
2713 }
2714 
2715 } /* namespace ngen */
2716 
2717 #endif /* header guard */
2718