1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 // nGEN: a C++ library for runtime Gen assembly generation.
18 //
19 // Macros that control nGEN's interface:
20 //    NGEN_SAFE             if defined, enables run-time safety checks. Exceptions will be thrown if checks fail.
21 //    NGEN_SHORT_NAMES      if defined, enables some short names (r[...] for indirect addressing, W for NoMask)
22 //    NGEN_GLOBAL_REGS      if defined, register names and instruction modifiers (r7, cr0, Switch, etc.) are
23 //                           global variables in the ngen namespace. Otherwise, they are members of the code
24 //                           generator classes
25 //    NGEN_CPP11            if defined, ngen is C++11-compatible (C++17 not required)
26 
27 #ifndef NGEN_HPP
28 #define NGEN_HPP
29 
30 #include <array>
31 #include <cstring>
32 #include <type_traits>
33 #include <vector>
34 
35 #include "ngen_core.hpp"
36 #include "ngen_auto_swsb.hpp"
37 
38 namespace ngen {
39 
40 // Forward declarations.
41 template <HW hw> class BinaryCodeGenerator;
42 template <HW hw> class ELFCodeGenerator;
43 
44 // MSVC v140 workaround for enum comparison in template arguments.
hwLT(HW hw1,HW hw2)45 static constexpr bool hwLT(HW hw1, HW hw2) { return hw1 < hw2; }
hwLE(HW hw1,HW hw2)46 static constexpr bool hwLE(HW hw1, HW hw2) { return hw1 <= hw2; }
hwGE(HW hw1,HW hw2)47 static constexpr bool hwGE(HW hw1, HW hw2) { return hw1 >= hw2; }
hwGT(HW hw1,HW hw2)48 static constexpr bool hwGT(HW hw1, HW hw2) { return hw1 > hw2; }
49 
50 // -----------------------------------------------------------------------
51 
52 enum RegFiles : unsigned {
53     RegFileARF = 0,
54     RegFileGRF = 1,
55     RegFileIMM = 3,
56 };
57 
getRegFile(const RegData & rd)58 inline unsigned getRegFile(const RegData &rd)          { return rd.isARF() ? RegFileARF : RegFileGRF; }
getRegFile(const Align16Operand & o)59 inline unsigned getRegFile(const Align16Operand &o)    { return getRegFile(o.getReg()); }
getRegFile(const ExtendedReg & reg)60 inline unsigned getRegFile(const ExtendedReg &reg)     { return getRegFile(reg.getBase()); }
getRegFile(const Immediate & imm)61 inline unsigned getRegFile(const Immediate &imm)       { return RegFileIMM; }
62 
63 // -----------------------------------------------------------------------
64 // Binary formats, split between pre-Xe and post-Xe.
65 
66 #include "ngen_gen8.hpp"
67 #include "ngen_xe.hpp"
68 
69 // -----------------------------------------------------------------------
70 
71 
72 class LabelFixup {
73 public:
74     uint32_t labelID;
75     int32_t anchor;
76     int32_t offset;
77 
LabelFixup(uint32_t labelID_,int32_t offset_)78     LabelFixup(uint32_t labelID_, int32_t offset_) : labelID(labelID_), anchor(0), offset(offset_) {}
79 
80     static constexpr auto JIPOffset = 12;
81     static constexpr auto JIPOffsetJMPI = -4;
82     static constexpr auto UIPOffset = 8;
83 };
84 
85 #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
86 #define NGEN_GLOBAL_REGS_DEFINED
87 #include "ngen_registers.hpp"
88 #endif
89 
90 template <HW hw>
91 class BinaryCodeGenerator
92 {
93     friend class ELFCodeGenerator<hw>;
94 
95 protected:
96     class InstructionStream {
97         friend class BinaryCodeGenerator;
98 
99         std::vector<LabelFixup> fixups;
100         std::vector<uint32_t> labels;
101         std::vector<uint64_t> code;
102         bool appended = false;
103 
length() const104         int length() const { return int(code.size() * sizeof(uint64_t)); }
105 
db(const Instruction8 & i)106         void db(const Instruction8 &i) {
107             code.push_back(i.qword[0]);
108             code.push_back(i.qword[1]);
109         }
110 
db(const Instruction12 & i)111         void db(const Instruction12 &i) {
112             code.push_back(i.qword[0]);
113             code.push_back(i.qword[1]);
114         }
115 
addFixup(LabelFixup fixup)116         void addFixup(LabelFixup fixup) {
117             fixup.anchor = length();
118             fixups.push_back(fixup);
119         }
120 
mark(Label & label,LabelManager & man)121         void mark(Label &label, LabelManager &man) {
122             uint32_t id = label.getID(man);
123 
124             man.setTarget(id, length());
125             labels.push_back(id);
126         }
127 
fixLabels(LabelManager & man)128         void fixLabels(LabelManager &man) {
129             for (const auto &fixup : fixups) {
130                 int32_t target = man.getTarget(fixup.labelID);
131                 uint8_t *field = ((uint8_t *) code.data()) + fixup.anchor + fixup.offset;
132                 *((int32_t *) field) = target - fixup.anchor;
133             }
134         }
135 
append(InstructionStream & other,LabelManager & man)136         void append(InstructionStream &other, LabelManager &man) {
137             auto offset = length();
138             auto sz = code.size();
139 
140             code.resize(sz + other.code.size());
141             std::copy(other.code.begin(), other.code.end(), code.begin() + sz);
142 
143             sz = labels.size();
144             labels.resize(sz + other.labels.size());
145             std::copy(other.labels.begin(), other.labels.end(), labels.begin() + sz);
146 
147             for (LabelFixup fixup : other.fixups) {
148                 fixup.anchor += offset;
149                 fixups.push_back(fixup);
150             }
151 
152 #ifdef NGEN_SAFE
153             if (other.appended && !other.labels.empty())
154                 throw multiple_label_exception();
155 #endif
156 
157             for (uint32_t id : other.labels)
158                 man.offsetTarget(id, offset);
159 
160             other.appended = true;
161         }
162 
InstructionStream()163         InstructionStream() {}
164     };
165 
166     class Program {
167         friend class BinaryCodeGenerator;
168         using Instruction = Instruction12;
169         std::vector<uint64_t> &code;
170 
Program(InstructionStream & stream)171         Program(InstructionStream &stream) : code(stream.code) {};
172 
173     public:
size() const174         size_t size() const                               { return code.size() >> 1; }
operator [](size_t index)175         Instruction &operator[](size_t index)             { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
operator [](size_t index) const176         const Instruction &operator[](size_t index) const { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
177     };
178 
179     static constexpr HW hardware = hw;
180     static constexpr bool isXe = (hw >= HW::Xe_LP);
181 
182     Label _labelLocalIDsLoaded;
183     Label _labelArgsLoaded;
184 
185 private:
186     InstructionModifier defaultModifier;
187 
188     LabelManager labelManager;
189     InstructionStream rootStream;
190     std::vector<InstructionStream*> streamStack;
191 
db(const Instruction8 & i)192     void db(const Instruction8 &i)  { streamStack.back()->db(i); }
db(const Instruction12 & i)193     void db(const Instruction12 &i) { streamStack.back()->db(i); }
addFixup(LabelFixup fixup)194     void addFixup(LabelFixup fixup) { streamStack.back()->addFixup(fixup); }
195 
196     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
197     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
198     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
199     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
200     template <bool forceWE = false, typename D, HW hw_ = hw>
201     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
202     template <bool forceWE = false, typename D, HW hw_ = hw>
203     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
204 
205     template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
206     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
207     template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
208     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
209     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
210     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
211     template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
212     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
213 
214     template <HW hw_ = hw>
215     typename std::enable_if<hwLE(hw_, HW::Gen9)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2);
216     template <HW hw_ = hw>
217     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2);
218     template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
219     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
220     template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
221     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
222 
223     template <typename DS0>
224     void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0);
225     template <typename DS0, typename S1>
226     void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1);
227 
228     template <typename D, HW hw_ = hw>
229     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
230     template <typename D, HW hw_ = hw>
231     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc);
232     template <typename ED, typename D, HW hw_ = hw>
233     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
234 
235     template <HW hw_ = hw>
236     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc);
237     template <HW hw_ = hw>
238     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc);
239     template <typename D, HW hw_ = hw>
240     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc);
241 
242     template <typename ED, typename D, HW hw_ = hw>
243     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
244     template <typename D, HW hw_ = hw>
245     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
246     template <typename D, HW hw_ = hw>
247     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc);
248 
249     template <HW hw_ = hw>
250     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
251     template <HW hw_ = hw>
252     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
253     template <bool forceWE = false, HW hw_ = hw>
254     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
255     template <bool forceWE = false, HW hw_ = hw>
256     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
257     template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
258     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
259     template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
260     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
261 
262     void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip);
263     template <bool forceWE = false>
264     void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
265     void opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
266 
267     template <HW hw_ = hw>
268     typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
269     template <HW hw_ = hw>
270     typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
271     void opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip);
272 
273     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod);
274     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0);
275     void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0);
276 
277     void opNop(Opcode op);
278 
279     inline void unsupported();
280 
281 #include "ngen_compiler_fix.hpp"
282 
283 public:
BinaryCodeGenerator()284     BinaryCodeGenerator() : defaultModifier{}, labelManager{}, sync{this}, load{this}, store{this}, atomic{this} {
285         _workaround_();
286         pushStream(rootStream);
287     }
288 
~BinaryCodeGenerator()289     ~BinaryCodeGenerator() {
290         for (size_t sn = 1; sn < streamStack.size(); sn++)
291             delete streamStack[sn];
292     }
293 
294     std::vector<uint8_t> getCode();
295 
296 protected:
297     // Configuration.
setDefaultNoMask(bool def=true)298     void setDefaultNoMask(bool def = true)          { defaultModifier.setWrEn(def); }
setDefaultAutoSWSB(bool def=true)299     void setDefaultAutoSWSB(bool def = true)        { defaultModifier.setAutoSWSB(def); }
getDefaultNoMask() const300     bool getDefaultNoMask() const                   { return defaultModifier.isWrEn(); }
getDefaultAutoSWSB() const301     bool getDefaultAutoSWSB() const                 { return defaultModifier.isAutoSWSB(); }
302 
303     // Stream handling.
pushStream()304     void pushStream()                               { pushStream(new InstructionStream()); }
pushStream(InstructionStream * s)305     void pushStream(InstructionStream *s)           { streamStack.push_back(s); }
pushStream(InstructionStream & s)306     void pushStream(InstructionStream &s)           { pushStream(&s); }
307 
308     InstructionStream *popStream();
309 
appendStream(InstructionStream * s)310     void appendStream(InstructionStream *s)         { appendStream(*s); }
appendStream(InstructionStream & s)311     void appendStream(InstructionStream &s)         { streamStack.back()->append(s, labelManager); }
appendCurrentStream()312     void appendCurrentStream()                      { InstructionStream *s = popStream(); appendStream(s); delete s; }
313 
discardStream()314     void discardStream()                            { delete popStream(); }
315 
316     template <typename String>
comment(String)317     void comment(String)                            {}
318 
319     // Registers.
320 #ifndef NGEN_GLOBAL_REGS
321 #include "ngen_registers.hpp"
322 #endif
323 
324     // Labels.
mark(Label & label)325     inline void mark(Label &label)          { streamStack.back()->mark(label, labelManager); }
326 
327     // Instructions.
328     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)329     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
330         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
331     }
332     template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)333     void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
334         opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
335     }
336     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)337     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
338         opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
339     }
340     template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)341     void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
342         opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
343     }
344     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)345     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
346         opX(isXe ? Opcode::and_xe : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
347     }
348     template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)349     void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
350         opX(isXe ? Opcode::and_xe : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
351     }
352 #ifndef NGEN_NO_OP_NAMES
353     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)354     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
355         and_<DT>(mod, dst, src0, src1);
356     }
357     template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)358     void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
359         and_<DT>(mod, dst, src0, src1);
360     }
361 #endif
362     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)363     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
364         opX(isXe ? Opcode::asr_xe : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
365     }
366     template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)367     void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
368         opX(isXe ? Opcode::asr_xe : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
369     }
370     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)371     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
372         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
373     }
374     template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)375     void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
376         opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
377     }
378     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)379     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
380         opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
381     }
382     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)383     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
384         opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
385     }
386     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)387     void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
388         opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
389     }
390     template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)391     void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
392         opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
393     }
394     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)395     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
396         opX(isXe ? Opcode::bfi1_xe : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
397     }
398     template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)399     void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
400         opX(isXe ? Opcode::bfi1_xe : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
401     }
402     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)403     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
404         opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
405     }
406     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)407     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
408         opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
409     }
410     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)411     void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
412         opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
413     }
414     template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)415     void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
416         opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
417     }
418     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const RegData & src0)419     void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
420         opX(isXe ? Opcode::bfrev_xe : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
421     }
422     template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)423     void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
424         opX(isXe ? Opcode::bfrev_xe : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
425     }
brc(const InstructionModifier & mod,Label & jip,Label & uip)426     void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
427         opBranch(Opcode::brc, mod, isXe ? null.ud() : ip.d(), jip, uip);
428     }
brc(const InstructionModifier & mod,RegData src0)429     void brc(const InstructionModifier &mod, RegData src0) {
430         src0.setRegion(2, 2, 1);
431         opBranch<true, true>(Opcode::brc, mod, isXe ? null.ud() : ip.d(), src0);
432     }
brd(const InstructionModifier & mod,Label & jip)433     void brd(const InstructionModifier &mod, Label &jip) {
434         opBranch(Opcode::brd, mod, isXe ? null.ud() : ip.d(), jip);
435     }
brd(const InstructionModifier & mod,RegData src0)436     void brd(const InstructionModifier &mod, RegData src0) {
437         src0.setRegion(2, 2, 1);
438         opBranch<true, true>(Opcode::brd, mod, isXe ? null.ud() : ip.d(), src0);
439     }
break_(const InstructionModifier & mod,Label & jip,Label & uip)440     void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
441         opBranch(Opcode::break_, mod, null, jip, uip);
442     }
call(const InstructionModifier & mod,const RegData & dst,Label & jip)443     void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
444         opCall(Opcode::call, mod, dst, jip);
445     }
call(const InstructionModifier & mod,const RegData & dst,RegData jip)446     void call(const InstructionModifier &mod, const RegData &dst, RegData jip) {
447         if (isXe)
448             opBranch<true, true>(Opcode::call, mod, dst, jip);
449         else {
450             jip.setRegion(0, 1, 0);
451             opX<true>(Opcode::call, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
452         }
453     }
calla(const InstructionModifier & mod,const RegData & dst,int32_t jip)454     void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
455         if (isXe)
456             opBranch<true>(Opcode::calla, mod, dst, jip);
457         else
458             opX<true>(Opcode::calla, DataType::d, mod, dst, (hw <= HW::Gen9) ? null.ud(0)(2,2,1) : null.ud(0)(0,1,0), Immediate::d(jip));
459     }
calla(const InstructionModifier & mod,const RegData & dst,RegData jip)460     void calla(const InstructionModifier &mod, const RegData &dst, RegData jip) {
461         if (isXe)
462             opBranch<true, true>(Opcode::calla, mod, dst, jip);
463         else {
464             jip.setRegion(0, 1, 0);
465             opX<true>(Opcode::calla, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
466         }
467     }
468     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const RegData & src0)469     void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
470         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
471     }
472     template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)473     void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
474         opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
475     }
476     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)477     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
478         opX(isXe ? Opcode::cmp_xe : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
479     }
480     template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)481     void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
482         opX(isXe ? Opcode::cmp_xe : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
483     }
484     template <typename DT = void>
cmpn(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)485     void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
486         opX(isXe ? Opcode::cmpn_xe : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
487     }
488     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)489     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
490         opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
491     }
492     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)493     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
494         opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
495     }
496     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)497     void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
498         opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
499     }
500     template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)501     void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
502         opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
503     }
cont(const InstructionModifier & mod,Label & jip,Label & uip)504     void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
505         opBranch(Opcode::cont, mod, null, jip, uip);
506     }
507     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)508     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
509         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
510     }
511     template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)512     void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
513         opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
514     }
515     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)516     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
517         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
518     }
519     template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)520     void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
521         opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
522     }
523     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)524     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
525         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
526     }
527     template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)528     void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
529         opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
530     }
531     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)532     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
533         if (hw < HW::Xe_LP) unsupported();
534         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
535     }
536     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)537     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
538         if (hw < HW::Xe_LP) unsupported();
539         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
540     }
541     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)542     void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
543         if (hw < HW::Xe_LP) unsupported();
544         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
545     }
546     template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)547     void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
548         if (hw < HW::Xe_LP) unsupported();
549         opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
550     }
551     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)552     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
553         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
554     }
555     template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)556     void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
557         opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
558     }
else_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)559     void else_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
560         mod.setBranchCtrl(branchCtrl);
561         opBranch(Opcode::else_, mod, null, jip, uip);
562     }
else_(InstructionModifier mod,Label & jip)563     void else_(InstructionModifier mod, Label &jip) {
564         else_(mod, jip, jip);
565     }
endif(const InstructionModifier & mod,Label & jip)566     void endif(const InstructionModifier &mod, Label &jip) {
567         opBranch(Opcode::endif, mod, null, jip);
568     }
endif(const InstructionModifier & mod)569     void endif(const InstructionModifier &mod) {
570         opBranch(Opcode::endif, mod, null, sizeof(Instruction8));
571     }
572     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const RegData & src0)573     void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
574         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
575     }
576     template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)577     void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
578         opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
579     }
580     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const RegData & src0)581     void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
582         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
583     }
584     template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)585     void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
586         opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
587     }
588     template <typename DT = void>
frc(const InstructionModifier & mod,const RegData & dst,const RegData & src0)589     void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
590         opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
591     }
goto_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)592     void goto_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
593         mod.setBranchCtrl(branchCtrl);
594         opBranch(Opcode::goto_, mod, null, jip, uip);
595     }
goto_(const InstructionModifier & mod,Label & jip)596     void goto_(const InstructionModifier &mod, Label &jip) {
597         goto_(mod, jip, jip);
598     }
halt(const InstructionModifier & mod,Label & jip,Label & uip)599     void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
600         opBranch(Opcode::halt, mod, null, jip, uip);
601     }
halt(const InstructionModifier & mod,Label & jip)602     void halt(const InstructionModifier &mod, Label &jip) {
603         halt(mod, jip, jip);
604     }
if_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)605     void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
606         mod.setBranchCtrl(branchCtrl);
607         opBranch(Opcode::if_, mod, null, jip, uip);
608     }
if_(const InstructionModifier & mod,Label & jip)609     void if_(const InstructionModifier &mod, Label &jip) {
610         if_(mod, jip, jip);
611     }
illegal()612     void illegal() {
613         opX(Opcode::illegal, DataType::invalid, InstructionModifier(), null, null, null);
614     }
join(InstructionModifier mod,Label & jip)615     void join(InstructionModifier mod, Label &jip) {
616         opBranch(Opcode::join, mod, null, jip);
617     }
join(InstructionModifier mod)618     void join(InstructionModifier mod) {
619         opBranch(Opcode::join, mod, null, sizeof(Instruction8));
620     }
jmpi(const InstructionModifier & mod,Label & jip)621     void jmpi(const InstructionModifier &mod, Label &jip) {
622         auto dst = isXe ? ARF(null) : ARF(ip);
623         opJmpi(Opcode::jmpi, mod, dst, dst, jip);
624     }
jmpi(const InstructionModifier & mod,const RegData & jip)625     void jmpi(const InstructionModifier &mod, const RegData &jip) {
626 #ifdef NGEN_SAFE
627         if (!isXe && jip.getType() != DataType::d && jip.getType() != DataType::invalid)
628             throw invalid_type_exception();
629 #endif
630         if (isXe)
631             opBranch<true, false>(Opcode::jmpi, mod, null, jip);
632         else
633             opX(Opcode::jmpi, DataType::d, mod, ip, ip, jip);
634     }
635     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)636     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
637         if (hw >= HW::Gen11) unsupported();
638         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
639     }
640     template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)641     void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
642         if (hw >= HW::Gen11) unsupported();
643         opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
644     }
645     template <typename DT = void>
lrp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)646     void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
647         opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
648     }
649     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)650     void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
651         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
652     }
653     template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)654     void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
655         opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
656     }
657     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)658     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
659         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
660     }
661     template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)662     void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
663         opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
664     }
665     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)666     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
667         opX(Opcode::mach, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
668     }
669     template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)670     void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
671         opX(Opcode::mach, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
672     }
673     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)674     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
675 #ifdef NGEN_SAFE
676         if (hw < HW::Gen10) unsupported();
677 #endif
678         opX(Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
679     }
680     template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)681     void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
682 #ifdef NGEN_SAFE
683         if (hw < HW::Gen10) unsupported();
684 #endif
685         opX(Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
686     }
687     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)688     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
689         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
690     }
691     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)692     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
693         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
694     }
695     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)696     void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
697         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
698     }
699     template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)700     void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
701         opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
702     }
703     template <typename DT = void, HW hw_ = hw>
704     typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1,const ExtendedReg & src2)705     madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
706         opX(Opcode::madm, getDataType<DT>(), mod, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1), extToAlign16(src2));
707     }
708     template <typename DT = void, HW hw_ = hw>
709     typename std::enable_if<hwGT(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1,const ExtendedReg & src2)710     madm(const InstructionModifier &mod, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1, const ExtendedReg &src2) {
711         src0.getBase().setRegion(4,4,1);
712         src1.getBase().setRegion(4,4,1);
713         opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
714     }
715     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0)716     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
717 #ifdef NGEN_SAFE
718         if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
719 #endif
720         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
721     }
722     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const RegData & src1)723     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
724 #ifdef NGEN_SAFE
725         if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
726 #endif
727         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
728     }
729     template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const Immediate & src1)730     void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
731 #ifdef NGEN_SAFE
732         if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
733 #endif
734         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1.forceInt32());
735     }
736     template <typename DT = void, HW hw_ = hw>
737     typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0)738     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
739 #ifdef NGEN_SAFE
740         if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
741 #endif
742         opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0));
743     }
744     template <typename DT = void, HW hw_ = hw>
745     typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0)746     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0) {
747 #ifdef NGEN_SAFE
748         if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
749 #endif
750         if (hw == HW::Gen11)
751             src0.getBase().setRegion(2,2,1);
752         else
753             src0.getBase().setRegion(1,1,0);
754         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
755     }
756     template <typename DT = void, HW hw_ = hw>
757     typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)758     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
759 #ifdef NGEN_SAFE
760         if (fc != MathFunction::invm) throw invalid_operand_exception();
761 #endif
762         opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1));
763     }
764     template <typename DT = void, HW hw_ = hw>
765     typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1)766     math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1) {
767 #ifdef NGEN_SAFE
768         if (fc != MathFunction::invm) throw invalid_operand_exception();
769 #endif
770         if (hw == HW::Gen11) {
771             src0.getBase().setRegion(2,2,1);
772             src1.getBase().setRegion(2,2,1);
773         } else {
774             src0.getBase().setRegion(1,1,0);
775             src1.getBase().setRegion(1,1,0);
776         }
777         opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
778     }
779     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const RegData & src0)780     void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
781         opX(isXe ? Opcode::mov_xe : Opcode::mov, getDataType<DT>(), mod, dst, src0);
782     }
783     template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)784     void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
785         opX(isXe ? Opcode::mov_xe : Opcode::mov, getDataType<DT>(), mod, dst, src0);
786     }
787     template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0)788     void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
789         opX(isXe ? Opcode::movi_xe : Opcode::movi, getDataType<DT>(), mod, dst, src0);
790     }
791     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)792     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
793         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
794     }
795     template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,Immediate src1)796     void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
797         if (dst.getBytes() == 8)
798             src1 = src1.forceInt32();
799         opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
800     }
nop()801     void nop() {
802         opNop(isXe ? Opcode::nop_xe : Opcode::nop);
803     }
nop(const InstructionModifier & mod)804     void nop(const InstructionModifier &mod) {
805         opX(isXe ? Opcode::nop_xe : Opcode::nop, DataType::invalid, mod, null, null, null);
806     }
807     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const RegData & src0)808     void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
809         opX(isXe ? Opcode::not_xe : Opcode::not_, getDataType<DT>(), mod, dst, src0);
810     }
811     template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)812     void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
813         opX(isXe ? Opcode::not_xe : Opcode::not_, getDataType<DT>(), mod, dst, src0);
814     }
815 #ifndef NGEN_NO_OP_NAMES
816     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const RegData & src0)817     void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
818         not_<DT>(mod, dst, src0);
819     }
820     template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)821     void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
822         not_<DT>(mod, dst, src0);
823     }
824 #endif
825     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)826     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
827         opX(isXe ? Opcode::or_xe : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
828     }
829     template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)830     void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
831         opX(isXe ? Opcode::or_xe : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
832     }
833 #ifndef NGEN_NO_OP_NAMES
834     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)835     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
836         or_<DT>(mod, dst, src0, src1);
837     }
838     template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)839     void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
840         or_<DT>(mod, dst, src0, src1);
841     }
842 #endif
843     template <typename DT = void>
pln(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)844     void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
845         if (hw >= HW::Gen11) unsupported();
846         opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
847     }
ret(const InstructionModifier & mod,RegData src0)848     void ret(const InstructionModifier &mod, RegData src0) {
849         src0.setRegion(2,2,1);
850         if (isXe)
851             opBranch<true, true>(Opcode::ret, mod, null, src0);
852         else
853             opX<true>(Opcode::ret, DataType::ud, mod, null, src0);
854     }
855     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)856     void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
857         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
858     }
859     template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)860     void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
861         opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
862     }
863     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const RegData & src0)864     void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
865         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
866     }
867     template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)868     void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
869         opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
870     }
871     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const RegData & src0)872     void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
873         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
874     }
875     template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)876     void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
877         opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
878     }
879     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const RegData & src0)880     void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
881         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
882     }
883     template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)884     void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
885         opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
886     }
887     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)888     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
889         opX(isXe ? Opcode::rol_xe : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
890     }
891     template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)892     void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
893         opX(isXe ? Opcode::rol_xe : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
894     }
895     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)896     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
897         opX(isXe ? Opcode::ror_xe : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
898     }
899     template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)900     void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
901         opX(isXe ? Opcode::ror_xe : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
902     }
903     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)904     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
905         if (hw >= HW::Xe_LP) unsupported();
906         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
907     }
908     template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)909     void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
910         if (hw >= HW::Xe_LP) unsupported();
911         opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
912     }
913     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)914     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
915         if (hw >= HW::Xe_LP) unsupported();
916         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
917     }
918     template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)919     void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
920         if (hw >= HW::Xe_LP) unsupported();
921         opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
922     }
923     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)924     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
925         opX(isXe ? Opcode::sel_xe : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
926     }
927     template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)928     void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
929         opX(isXe ? Opcode::sel_xe : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
930     }
931 
932     /* Xe-style sends */
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)933     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
934         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
935     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)936     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
937         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
938     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)939     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
940         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
941     }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)942     void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
943         opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
944     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)945     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
946         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
947     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)948     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
949         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
950     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)951     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
952         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
953     }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)954     void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
955         opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
956     }
957     /* Pre-Xe-style sends; also supported on Xe. */
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)958     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
959         opSend(Opcode::send, mod, dst, src0, exdesc, desc);
960     }
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)961     void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
962         opSend(Opcode::send, mod, dst, src0, exdesc, desc);
963     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)964     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
965         opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
966     }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)967     void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
968         opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
969     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)970     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
971         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
972     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)973     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
974         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
975     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)976     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
977         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
978     }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)979     void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
980         opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
981     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)982     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
983         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
984     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)985     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
986         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
987     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)988     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
989         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
990     }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)991     void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
992         opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
993     }
994 
995     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)996     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
997         opX(isXe ? Opcode::shl_xe : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
998     }
999     template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1000     void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1001         opX(isXe ? Opcode::shl_xe : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1002     }
1003     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1004     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1005         opX(isXe ? Opcode::shr_xe : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1006     }
1007     template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1008     void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1009         opX(isXe ? Opcode::shr_xe : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1010     }
1011     template <typename DT = void>
smov(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1012     void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1013         opX(isXe ? Opcode::smov_xe : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1014     }
1015     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1016     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1017         opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1018     }
1019     template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1020     void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1021         opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1022     }
wait(const InstructionModifier & mod,const RegData & nreg)1023     void wait(const InstructionModifier &mod, const RegData &nreg) {
1024 #ifdef NGEN_SAFE
1025         if (!nreg.isARF() || nreg.getARFType() != ARFType::n) throw invalid_arf_exception();
1026 #endif
1027         opX(Opcode::wait, DataType::invalid, mod, nreg, nreg);
1028     }
while_(const InstructionModifier & mod,Label & jip)1029     void while_(const InstructionModifier &mod, Label &jip) {
1030         opBranch(Opcode::while_, mod, null, jip);
1031     }
1032     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1033     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1034         opX(isXe ? Opcode::xor_xe : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1035     }
1036     template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1037     void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1038         opX(isXe ? Opcode::xor_xe : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1039     }
1040 #ifndef NGEN_NO_OP_NAMES
1041     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1042     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1043         xor_<DT>(mod, dst, src0, src1);
1044     }
1045     template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1046     void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1047         xor_<DT>(mod, dst, src0, src1);
1048     }
1049 #endif
1050 
1051 private:
1052     struct Sync {
1053         BinaryCodeGenerator<hw> &parent;
1054 
Syncngen::BinaryCodeGenerator::Sync1055         Sync(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1056 
operator ()ngen::BinaryCodeGenerator::Sync1057         void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1058             parent.opSync(Opcode::sync, fc, mod);
1059         }
operator ()ngen::BinaryCodeGenerator::Sync1060         void operator()(SyncFunction fc, const RegData &src0) {
1061             this->operator()(fc, InstructionModifier(), src0);
1062         }
operator ()ngen::BinaryCodeGenerator::Sync1063         void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1064             parent.opSync(Opcode::sync, fc, mod, src0);
1065         }
operator ()ngen::BinaryCodeGenerator::Sync1066         void operator()(SyncFunction fc, int src0) {
1067             this->operator()(fc, InstructionModifier(), src0);
1068         }
operator ()ngen::BinaryCodeGenerator::Sync1069         void operator()(SyncFunction fc, const InstructionModifier &mod, uint32_t src0) {
1070             parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1071         }
allrdngen::BinaryCodeGenerator::Sync1072         void allrd() {
1073             allrd(null.ud(0)(0, 1, 1));
1074         }
allrdngen::BinaryCodeGenerator::Sync1075         void allrd(const InstructionModifier &mod) {
1076             allrd(mod, null.ud(0)(0, 1, 1));
1077         }
allrdngen::BinaryCodeGenerator::Sync1078         void allrd(const RegData &src0) {
1079             allrd(InstructionModifier(), src0);
1080         }
allrdngen::BinaryCodeGenerator::Sync1081         void allrd(const InstructionModifier &mod, const RegData &src0) {
1082             this->operator()(SyncFunction::allrd, mod, src0);
1083         }
allrdngen::BinaryCodeGenerator::Sync1084         void allrd(uint32_t src0) {
1085             allrd(InstructionModifier(), src0);
1086         }
allrdngen::BinaryCodeGenerator::Sync1087         void allrd(const InstructionModifier &mod, uint32_t src0) {
1088             this->operator()(SyncFunction::allrd, mod, src0);
1089         }
allwrngen::BinaryCodeGenerator::Sync1090         void allwr() {
1091             allwr(null);
1092         }
allwrngen::BinaryCodeGenerator::Sync1093         void allwr(const InstructionModifier &mod) {
1094             allwr(mod, null);
1095         }
allwrngen::BinaryCodeGenerator::Sync1096         void allwr(const RegData &src0) {
1097             allwr(InstructionModifier(), src0);
1098         }
allwrngen::BinaryCodeGenerator::Sync1099         void allwr(const InstructionModifier &mod, const RegData &src0) {
1100             this->operator()(SyncFunction::allwr, mod, src0);
1101         }
allwrngen::BinaryCodeGenerator::Sync1102         void allwr(uint32_t src0) {
1103             allwr(InstructionModifier(), src0);
1104         }
allwrngen::BinaryCodeGenerator::Sync1105         void allwr(const InstructionModifier &mod, uint32_t src0) {
1106             this->operator()(SyncFunction::allwr, mod, src0);
1107         }
barngen::BinaryCodeGenerator::Sync1108         void bar(const InstructionModifier &mod = InstructionModifier()) {
1109             this->operator()(SyncFunction::bar, mod);
1110         }
hostngen::BinaryCodeGenerator::Sync1111         void host(const InstructionModifier &mod = InstructionModifier()) {
1112             this->operator()(SyncFunction::host, mod);
1113         }
nopngen::BinaryCodeGenerator::Sync1114         void nop(const InstructionModifier &mod = InstructionModifier()) {
1115             this->operator()(SyncFunction::nop, mod);
1116         }
1117     };
1118 public:
1119     Sync sync;
1120 
1121 
1122 private:
1123     struct Load {
1124         BinaryCodeGenerator<hw> &parent;
1125 
Loadngen::BinaryCodeGenerator::Load1126         Load(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1127 
1128         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Load1129         void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1130         {
1131             MessageDescriptor desc;
1132             ExtendedMessageDescriptor exdesc;
1133 
1134             encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1135             parent.send(mod, dst, addr, exdesc.all, desc.all);
1136         }
1137 
1138     };
1139 
1140     struct Store {
1141         BinaryCodeGenerator<hw> &parent;
1142 
Storengen::BinaryCodeGenerator::Store1143         Store(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1144 
1145         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Store1146         void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1147         {
1148             MessageDescriptor desc;
1149             ExtendedMessageDescriptor exdesc;
1150 
1151             encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1152             parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1153         }
1154 
1155     };
1156 
1157     struct Atomic_ {
1158         BinaryCodeGenerator<hw> &parent;
1159 
Atomic_ngen::BinaryCodeGenerator::Atomic_1160         Atomic_(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1161 
1162         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1163         void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1164         {
1165             MessageDescriptor desc;
1166             ExtendedMessageDescriptor exdesc;
1167 
1168             encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1169             if (data.isNull())
1170                 parent.send(mod, dst, addr, exdesc.all, desc.all);
1171             else
1172                 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1173         }
1174         template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1175         void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1176         {
1177             (*this)(op, mod, NullRegister(), spec, base, addr, data);
1178         }
1179 
1180     };
1181 public:
1182     Load load;
1183     Store store;
1184     Atomic_ atomic;
1185 
1186 #include "ngen_pseudo.hpp"
1187 };
1188 
1189 #define NGEN_FORWARD(hw) \
1190 using InstructionStream = typename ngen::BinaryCodeGenerator<hw>::InstructionStream; \
1191 using ngen::BinaryCodeGenerator<hw>::isXe; \
1192 template <typename DT = void, typename... Targs> void add(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add<DT>(std::forward<Targs>(args)...); } \
1193 template <typename DT = void, typename... Targs> void addc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template addc<DT>(std::forward<Targs>(args)...); } \
1194 template <typename DT = void, typename... Targs> void and_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1195 template <typename DT = void, typename... Targs> void asr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template asr<DT>(std::forward<Targs>(args)...); } \
1196 template <typename DT = void, typename... Targs> void avg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template avg<DT>(std::forward<Targs>(args)...); } \
1197 template <typename DT = void, typename... Targs> void bfe(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfe<DT>(std::forward<Targs>(args)...); } \
1198 template <typename DT = void, typename... Targs> void bfi1(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi1<DT>(std::forward<Targs>(args)...); } \
1199 template <typename DT = void, typename... Targs> void bfi2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi2<DT>(std::forward<Targs>(args)...); } \
1200 template <typename DT = void, typename... Targs> void bfrev(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfrev<DT>(std::forward<Targs>(args)...); } \
1201 template <typename DT = void, typename... Targs> void cbit(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cbit<DT>(std::forward<Targs>(args)...); } \
1202 template <typename DT = void, typename... Targs> void cmp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmp<DT>(std::forward<Targs>(args)...); } \
1203 template <typename DT = void, typename... Targs> void cmpn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmpn<DT>(std::forward<Targs>(args)...); } \
1204 template <typename DT = void, typename... Targs> void csel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template csel<DT>(std::forward<Targs>(args)...); } \
1205 template <typename DT = void, typename... Targs> void dp2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp2<DT>(std::forward<Targs>(args)...); } \
1206 template <typename DT = void, typename... Targs> void dp3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp3<DT>(std::forward<Targs>(args)...); } \
1207 template <typename DT = void, typename... Targs> void dp4(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4<DT>(std::forward<Targs>(args)...); } \
1208 template <typename DT = void, typename... Targs> void dph(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dph<DT>(std::forward<Targs>(args)...); } \
1209 template <typename DT = void, typename... Targs> void fbh(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbh<DT>(std::forward<Targs>(args)...); } \
1210 template <typename DT = void, typename... Targs> void fbl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbl<DT>(std::forward<Targs>(args)...); } \
1211 template <typename DT = void, typename... Targs> void frc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template frc<DT>(std::forward<Targs>(args)...); } \
1212 template <typename DT = void, typename... Targs> void line(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template line<DT>(std::forward<Targs>(args)...); } \
1213 template <typename DT = void, typename... Targs> void lrp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lrp<DT>(std::forward<Targs>(args)...); } \
1214 template <typename DT = void, typename... Targs> void lzd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lzd<DT>(std::forward<Targs>(args)...); } \
1215 template <typename DT = void, typename... Targs> void mac(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mac<DT>(std::forward<Targs>(args)...); } \
1216 template <typename DT = void, typename... Targs> void macl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template macl<DT>(std::forward<Targs>(args)...); } \
1217 template <typename DT = void, typename... Targs> void mach(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mach<DT>(std::forward<Targs>(args)...); } \
1218 template <typename DT = void, typename... Targs> void mad(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mad<DT>(std::forward<Targs>(args)...); } \
1219 template <typename DT = void, typename... Targs> void madm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template madm<DT>(std::forward<Targs>(args)...); } \
1220 template <typename DT = void, typename... Targs> void math(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template math<DT>(std::forward<Targs>(args)...); } \
1221 template <typename DT = void, typename... Targs> void mov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mov<DT>(std::forward<Targs>(args)...); } \
1222 template <typename DT = void, typename... Targs> void movi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template movi<DT>(std::forward<Targs>(args)...); } \
1223 template <typename DT = void, typename... Targs> void mul(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mul<DT>(std::forward<Targs>(args)...); } \
1224 template <typename DT = void, typename... Targs> void not_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1225 template <typename DT = void, typename... Targs> void or_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1226 template <typename DT = void, typename... Targs> void pln(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pln<DT>(std::forward<Targs>(args)...); } \
1227 template <typename DT = void, typename... Targs> void rndd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndd<DT>(std::forward<Targs>(args)...); } \
1228 template <typename DT = void, typename... Targs> void rnde(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rnde<DT>(std::forward<Targs>(args)...); } \
1229 template <typename DT = void, typename... Targs> void rndu(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndu<DT>(std::forward<Targs>(args)...); } \
1230 template <typename DT = void, typename... Targs> void rndz(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndz<DT>(std::forward<Targs>(args)...); } \
1231 template <typename DT = void, typename... Targs> void rol(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rol<DT>(std::forward<Targs>(args)...); } \
1232 template <typename DT = void, typename... Targs> void ror(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template ror<DT>(std::forward<Targs>(args)...); } \
1233 template <typename DT = void, typename... Targs> void sad2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sad2<DT>(std::forward<Targs>(args)...); } \
1234 template <typename DT = void, typename... Targs> void sada2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sada2<DT>(std::forward<Targs>(args)...); } \
1235 template <typename DT = void, typename... Targs> void sel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sel<DT>(std::forward<Targs>(args)...); } \
1236 template <typename DT = void, typename... Targs> void shl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shl<DT>(std::forward<Targs>(args)...); } \
1237 template <typename DT = void, typename... Targs> void shr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shr<DT>(std::forward<Targs>(args)...); } \
1238 template <typename DT = void, typename... Targs> void smov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template smov<DT>(std::forward<Targs>(args)...); } \
1239 template <typename DT = void, typename... Targs> void subb(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template subb<DT>(std::forward<Targs>(args)...); } \
1240 template <typename DT = void, typename... Targs> void xor_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } \
1241 template <typename... Targs> void brc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brc(std::forward<Targs>(args)...); } \
1242 template <typename... Targs> void brd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brd(std::forward<Targs>(args)...); } \
1243 template <typename... Targs> void break_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::break_(std::forward<Targs>(args)...); } \
1244 template <typename... Targs> void call(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::call(std::forward<Targs>(args)...); } \
1245 template <typename... Targs> void calla(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::calla(std::forward<Targs>(args)...); } \
1246 template <typename... Targs> void cont(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::cont(std::forward<Targs>(args)...); } \
1247 template <typename... Targs> void else_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::else_(std::forward<Targs>(args)...); } \
1248 template <typename... Targs> void endif(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::endif(std::forward<Targs>(args)...); } \
1249 template <typename... Targs> void goto_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::goto_(std::forward<Targs>(args)...); } \
1250 template <typename... Targs> void halt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::halt(std::forward<Targs>(args)...); } \
1251 template <typename... Targs> void if_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::if_(std::forward<Targs>(args)...); } \
1252 template <typename... Targs> void illegal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::illegal(std::forward<Targs>(args)...); } \
1253 template <typename... Targs> void join(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::join(std::forward<Targs>(args)...); } \
1254 template <typename... Targs> void jmpi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::jmpi(std::forward<Targs>(args)...); } \
1255 template <typename... Targs> void nop(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::nop(std::forward<Targs>(args)...); } \
1256 template <typename... Targs> void ret(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::ret(std::forward<Targs>(args)...); } \
1257 template <typename... Targs> void send(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::send(std::forward<Targs>(args)...); } \
1258 template <typename... Targs> void sendc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendc(std::forward<Targs>(args)...); } \
1259 template <typename... Targs> void sends(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sends(std::forward<Targs>(args)...); } \
1260 template <typename... Targs> void sendsc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendsc(std::forward<Targs>(args)...); } \
1261 using ngen::BinaryCodeGenerator<hw>::sync; \
1262 template <typename... Targs> void wait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wait(std::forward<Targs>(args)...); } \
1263 template <typename... Targs> void while_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::while_(std::forward<Targs>(args)...); } \
1264 template <typename DT = void, typename... Targs> void min_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min_<DT>(std::forward<Targs>(args)...); } \
1265 template <typename DT = void, typename... Targs> void max_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max_<DT>(std::forward<Targs>(args)...); } \
1266 template <typename DT = void, typename... Targs> void bfi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi<DT>(std::forward<Targs>(args)...); } \
1267 template <typename DT = void, typename... Targs> void cos(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cos<DT>(std::forward<Targs>(args)...); } \
1268 template <typename DT = void, typename... Targs> void exp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template exp<DT>(std::forward<Targs>(args)...); } \
1269 template <typename DT = void, typename... Targs> void fdiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv<DT>(std::forward<Targs>(args)...); } \
1270 template <typename DT = void, typename... Targs> void idiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template idiv<DT>(std::forward<Targs>(args)...); } \
1271 template <typename DT = void, typename... Targs> void inv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv<DT>(std::forward<Targs>(args)...); } \
1272 template <typename DT = void, typename... Targs> void invm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template invm<DT>(std::forward<Targs>(args)...); } \
1273 template <typename DT = void, typename... Targs> void iqot(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template iqot<DT>(std::forward<Targs>(args)...); } \
1274 template <typename DT = void, typename... Targs> void irem(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template irem<DT>(std::forward<Targs>(args)...); } \
1275 template <typename DT = void, typename... Targs> void log(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template log<DT>(std::forward<Targs>(args)...); } \
1276 template <typename DT = void, typename... Targs> void pow(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pow<DT>(std::forward<Targs>(args)...); } \
1277 template <typename DT = void, typename... Targs> void rsqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqt<DT>(std::forward<Targs>(args)...); } \
1278 template <typename DT = void, typename... Targs> void rsqtm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqtm<DT>(std::forward<Targs>(args)...); } \
1279 template <typename DT = void, typename... Targs> void sin(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sin<DT>(std::forward<Targs>(args)...); } \
1280 template <typename DT = void, typename... Targs> void sqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt<DT>(std::forward<Targs>(args)...); } \
1281 template <typename DT = void, typename... Targs> void fdiv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv_ieee<DT>(std::forward<Targs>(args)...); } \
1282 template <typename DT = void, typename... Targs> void inv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv_ieee<DT>(std::forward<Targs>(args)...); } \
1283 template <typename DT = void, typename... Targs> void sqt_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt_ieee<DT>(std::forward<Targs>(args)...); } \
1284 template <typename... Targs> void threadend(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::threadend(std::forward<Targs>(args)...); } \
1285 template <typename... Targs> void barriermsg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriermsg(std::forward<Targs>(args)...); } \
1286 template <typename... Targs> void barriersignal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriersignal(std::forward<Targs>(args)...); } \
1287 template <typename... Targs> void barrierwait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierwait(std::forward<Targs>(args)...); } \
1288 template <typename... Targs> void barrier(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrier(std::forward<Targs>(args)...); } \
1289 using ngen::BinaryCodeGenerator<hw>::load; \
1290 using ngen::BinaryCodeGenerator<hw>::store; \
1291 using ngen::BinaryCodeGenerator<hw>::atomic; \
1292 template <typename... Targs> void memfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::memfence(std::forward<Targs>(args)...); } \
1293 template <typename... Targs> void slmfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::slmfence(std::forward<Targs>(args)...); } \
1294 template <typename... Targs> void pushStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::pushStream(std::forward<Targs>(args)...); } \
1295 template <typename... Targs> InstructionStream *popStream(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::popStream(std::forward<Targs>(args)...); } \
1296 template <typename... Targs> void appendStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendStream(std::forward<Targs>(args)...); } \
1297 template <typename... Targs> void appendCurrentStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendCurrentStream(std::forward<Targs>(args)...); } \
1298 template <typename... Targs> void discardStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::discardStream(std::forward<Targs>(args)...); } \
1299 template <typename... Targs> void mark(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::mark(std::forward<Targs>(args)...); } \
1300 template <typename... Targs> void comment(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::comment(std::forward<Targs>(args)...); } \
1301 template <typename... Targs> void setDefaultNoMask(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultNoMask(std::forward<Targs>(args)...); } \
1302 template <typename... Targs> void setDefaultAutoSWSB(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultAutoSWSB(std::forward<Targs>(args)...); } \
1303 bool getDefaultNoMask() { return ngen::BinaryCodeGenerator<hw>::getDefaultNoMask(); } \
1304 bool getDefaultAutoSWSB() { return ngen::BinaryCodeGenerator<hw>::getDefaultAutoSWSB(); } \
1305 NGEN_FORWARD_EXTRA \
1306 NGEN_FORWARD_OP_NAMES \
1307 NGEN_FORWARD_MIN_MAX \
1308 NGEN_FORWARD_REGISTERS
1309 
1310 #define NGEN_FORWARD_EXTRA \
1311 template <typename DT = void, typename... Targs> void dp4a(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4a<DT>(std::forward<Targs>(args)...); }
1312 
1313 #ifdef NGEN_NO_OP_NAMES
1314 #define NGEN_FORWARD_OP_NAMES
1315 #else
1316 #define NGEN_FORWARD_OP_NAMES \
1317 template <typename DT = void, typename... Targs> void and(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1318 template <typename DT = void, typename... Targs> void not(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1319 template <typename DT = void, typename... Targs> void or(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1320 template <typename DT = void, typename... Targs> void xor(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); }
1321 #endif
1322 
1323 #ifdef NGEN_WINDOWS_COMPAT
1324 #define NGEN_FORWARD_MIN_MAX
1325 #else
1326 #define NGEN_FORWARD_MIN_MAX \
1327 template <typename DT = void, typename... Targs> void min(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min<DT>(std::forward<Targs>(args)...); } \
1328 template <typename DT = void, typename... Targs> void max(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max<DT>(std::forward<Targs>(args)...); }
1329 #endif
1330 
1331 #ifdef NGEN_GLOBAL_REGS
1332 #define NGEN_FORWARD_REGISTERS
1333 #else
1334 #define NGEN_FORWARD_REGISTERS_BASE \
1335 using ngen::BinaryCodeGenerator<hw>::indirect; \
1336 using ngen::BinaryCodeGenerator<hw>::r0; using ngen::BinaryCodeGenerator<hw>::r1; using ngen::BinaryCodeGenerator<hw>::r2; using ngen::BinaryCodeGenerator<hw>::r3; \
1337 using ngen::BinaryCodeGenerator<hw>::r4; using ngen::BinaryCodeGenerator<hw>::r5; using ngen::BinaryCodeGenerator<hw>::r6; using ngen::BinaryCodeGenerator<hw>::r7; \
1338 using ngen::BinaryCodeGenerator<hw>::r8; using ngen::BinaryCodeGenerator<hw>::r9; using ngen::BinaryCodeGenerator<hw>::r10; using ngen::BinaryCodeGenerator<hw>::r11; \
1339 using ngen::BinaryCodeGenerator<hw>::r12; using ngen::BinaryCodeGenerator<hw>::r13; using ngen::BinaryCodeGenerator<hw>::r14; using ngen::BinaryCodeGenerator<hw>::r15; \
1340 using ngen::BinaryCodeGenerator<hw>::r16; using ngen::BinaryCodeGenerator<hw>::r17; using ngen::BinaryCodeGenerator<hw>::r18; using ngen::BinaryCodeGenerator<hw>::r19; \
1341 using ngen::BinaryCodeGenerator<hw>::r20; using ngen::BinaryCodeGenerator<hw>::r21; using ngen::BinaryCodeGenerator<hw>::r22; using ngen::BinaryCodeGenerator<hw>::r23; \
1342 using ngen::BinaryCodeGenerator<hw>::r24; using ngen::BinaryCodeGenerator<hw>::r25; using ngen::BinaryCodeGenerator<hw>::r26; using ngen::BinaryCodeGenerator<hw>::r27; \
1343 using ngen::BinaryCodeGenerator<hw>::r28; using ngen::BinaryCodeGenerator<hw>::r29; using ngen::BinaryCodeGenerator<hw>::r30; using ngen::BinaryCodeGenerator<hw>::r31; \
1344 using ngen::BinaryCodeGenerator<hw>::r32; using ngen::BinaryCodeGenerator<hw>::r33; using ngen::BinaryCodeGenerator<hw>::r34; using ngen::BinaryCodeGenerator<hw>::r35; \
1345 using ngen::BinaryCodeGenerator<hw>::r36; using ngen::BinaryCodeGenerator<hw>::r37; using ngen::BinaryCodeGenerator<hw>::r38; using ngen::BinaryCodeGenerator<hw>::r39; \
1346 using ngen::BinaryCodeGenerator<hw>::r40; using ngen::BinaryCodeGenerator<hw>::r41; using ngen::BinaryCodeGenerator<hw>::r42; using ngen::BinaryCodeGenerator<hw>::r43; \
1347 using ngen::BinaryCodeGenerator<hw>::r44; using ngen::BinaryCodeGenerator<hw>::r45; using ngen::BinaryCodeGenerator<hw>::r46; using ngen::BinaryCodeGenerator<hw>::r47; \
1348 using ngen::BinaryCodeGenerator<hw>::r48; using ngen::BinaryCodeGenerator<hw>::r49; using ngen::BinaryCodeGenerator<hw>::r50; using ngen::BinaryCodeGenerator<hw>::r51; \
1349 using ngen::BinaryCodeGenerator<hw>::r52; using ngen::BinaryCodeGenerator<hw>::r53; using ngen::BinaryCodeGenerator<hw>::r54; using ngen::BinaryCodeGenerator<hw>::r55; \
1350 using ngen::BinaryCodeGenerator<hw>::r56; using ngen::BinaryCodeGenerator<hw>::r57; using ngen::BinaryCodeGenerator<hw>::r58; using ngen::BinaryCodeGenerator<hw>::r59; \
1351 using ngen::BinaryCodeGenerator<hw>::r60; using ngen::BinaryCodeGenerator<hw>::r61; using ngen::BinaryCodeGenerator<hw>::r62; using ngen::BinaryCodeGenerator<hw>::r63; \
1352 using ngen::BinaryCodeGenerator<hw>::r64; using ngen::BinaryCodeGenerator<hw>::r65; using ngen::BinaryCodeGenerator<hw>::r66; using ngen::BinaryCodeGenerator<hw>::r67; \
1353 using ngen::BinaryCodeGenerator<hw>::r68; using ngen::BinaryCodeGenerator<hw>::r69; using ngen::BinaryCodeGenerator<hw>::r70; using ngen::BinaryCodeGenerator<hw>::r71; \
1354 using ngen::BinaryCodeGenerator<hw>::r72; using ngen::BinaryCodeGenerator<hw>::r73; using ngen::BinaryCodeGenerator<hw>::r74; using ngen::BinaryCodeGenerator<hw>::r75; \
1355 using ngen::BinaryCodeGenerator<hw>::r76; using ngen::BinaryCodeGenerator<hw>::r77; using ngen::BinaryCodeGenerator<hw>::r78; using ngen::BinaryCodeGenerator<hw>::r79; \
1356 using ngen::BinaryCodeGenerator<hw>::r80; using ngen::BinaryCodeGenerator<hw>::r81; using ngen::BinaryCodeGenerator<hw>::r82; using ngen::BinaryCodeGenerator<hw>::r83; \
1357 using ngen::BinaryCodeGenerator<hw>::r84; using ngen::BinaryCodeGenerator<hw>::r85; using ngen::BinaryCodeGenerator<hw>::r86; using ngen::BinaryCodeGenerator<hw>::r87; \
1358 using ngen::BinaryCodeGenerator<hw>::r88; using ngen::BinaryCodeGenerator<hw>::r89; using ngen::BinaryCodeGenerator<hw>::r90; using ngen::BinaryCodeGenerator<hw>::r91; \
1359 using ngen::BinaryCodeGenerator<hw>::r92; using ngen::BinaryCodeGenerator<hw>::r93; using ngen::BinaryCodeGenerator<hw>::r94; using ngen::BinaryCodeGenerator<hw>::r95; \
1360 using ngen::BinaryCodeGenerator<hw>::r96; using ngen::BinaryCodeGenerator<hw>::r97; using ngen::BinaryCodeGenerator<hw>::r98; using ngen::BinaryCodeGenerator<hw>::r99; \
1361 using ngen::BinaryCodeGenerator<hw>::r100; using ngen::BinaryCodeGenerator<hw>::r101; using ngen::BinaryCodeGenerator<hw>::r102; using ngen::BinaryCodeGenerator<hw>::r103; \
1362 using ngen::BinaryCodeGenerator<hw>::r104; using ngen::BinaryCodeGenerator<hw>::r105; using ngen::BinaryCodeGenerator<hw>::r106; using ngen::BinaryCodeGenerator<hw>::r107; \
1363 using ngen::BinaryCodeGenerator<hw>::r108; using ngen::BinaryCodeGenerator<hw>::r109; using ngen::BinaryCodeGenerator<hw>::r110; using ngen::BinaryCodeGenerator<hw>::r111; \
1364 using ngen::BinaryCodeGenerator<hw>::r112; using ngen::BinaryCodeGenerator<hw>::r113; using ngen::BinaryCodeGenerator<hw>::r114; using ngen::BinaryCodeGenerator<hw>::r115; \
1365 using ngen::BinaryCodeGenerator<hw>::r116; using ngen::BinaryCodeGenerator<hw>::r117; using ngen::BinaryCodeGenerator<hw>::r118; using ngen::BinaryCodeGenerator<hw>::r119; \
1366 using ngen::BinaryCodeGenerator<hw>::r120; using ngen::BinaryCodeGenerator<hw>::r121; using ngen::BinaryCodeGenerator<hw>::r122; using ngen::BinaryCodeGenerator<hw>::r123; \
1367 using ngen::BinaryCodeGenerator<hw>::r124; using ngen::BinaryCodeGenerator<hw>::r125; using ngen::BinaryCodeGenerator<hw>::r126; using ngen::BinaryCodeGenerator<hw>::r127; \
1368 using ngen::BinaryCodeGenerator<hw>::null; \
1369 using ngen::BinaryCodeGenerator<hw>::a0; \
1370 using ngen::BinaryCodeGenerator<hw>::acc0; using ngen::BinaryCodeGenerator<hw>::acc1; using ngen::BinaryCodeGenerator<hw>::acc2; using ngen::BinaryCodeGenerator<hw>::acc3; \
1371 using ngen::BinaryCodeGenerator<hw>::acc4; using ngen::BinaryCodeGenerator<hw>::acc5; using ngen::BinaryCodeGenerator<hw>::acc6; using ngen::BinaryCodeGenerator<hw>::acc7; \
1372 using ngen::BinaryCodeGenerator<hw>::acc8; using ngen::BinaryCodeGenerator<hw>::acc9; \
1373 using ngen::BinaryCodeGenerator<hw>::mme0; using ngen::BinaryCodeGenerator<hw>::mme1; using ngen::BinaryCodeGenerator<hw>::mme2; using ngen::BinaryCodeGenerator<hw>::mme3; \
1374 using ngen::BinaryCodeGenerator<hw>::mme4; using ngen::BinaryCodeGenerator<hw>::mme5; using ngen::BinaryCodeGenerator<hw>::mme6; using ngen::BinaryCodeGenerator<hw>::mme7; \
1375 using ngen::BinaryCodeGenerator<hw>::noacc; using ngen::BinaryCodeGenerator<hw>::nomme; \
1376 using ngen::BinaryCodeGenerator<hw>::f0; using ngen::BinaryCodeGenerator<hw>::f1; \
1377 using ngen::BinaryCodeGenerator<hw>::ce0; using ngen::BinaryCodeGenerator<hw>::sp; using ngen::BinaryCodeGenerator<hw>::sr0; using ngen::BinaryCodeGenerator<hw>::sr1; \
1378 using ngen::BinaryCodeGenerator<hw>::cr0; using ngen::BinaryCodeGenerator<hw>::n0; using ngen::BinaryCodeGenerator<hw>::ip; using ngen::BinaryCodeGenerator<hw>::tdr0; \
1379 using ngen::BinaryCodeGenerator<hw>::tm0; using ngen::BinaryCodeGenerator<hw>::tm1; using ngen::BinaryCodeGenerator<hw>::tm2; using ngen::BinaryCodeGenerator<hw>::tm3; \
1380 using ngen::BinaryCodeGenerator<hw>::tm4; using ngen::BinaryCodeGenerator<hw>::pm0; using ngen::BinaryCodeGenerator<hw>::tp0; using ngen::BinaryCodeGenerator<hw>::dbg0; \
1381 using ngen::BinaryCodeGenerator<hw>::fc0; using ngen::BinaryCodeGenerator<hw>::fc1; using ngen::BinaryCodeGenerator<hw>::fc2; using ngen::BinaryCodeGenerator<hw>::fc3; \
1382 using ngen::BinaryCodeGenerator<hw>::NoDDClr; using ngen::BinaryCodeGenerator<hw>::NoDDChk; \
1383 using ngen::BinaryCodeGenerator<hw>::AccWrEn; using ngen::BinaryCodeGenerator<hw>::NoSrcDepSet; using ngen::BinaryCodeGenerator<hw>::Breakpoint; using ngen::BinaryCodeGenerator<hw>::sat; \
1384 using ngen::BinaryCodeGenerator<hw>::NoMask; \
1385 using ngen::BinaryCodeGenerator<hw>::Serialize; using ngen::BinaryCodeGenerator<hw>::EOT; \
1386 using ngen::BinaryCodeGenerator<hw>::Atomic; using ngen::BinaryCodeGenerator<hw>::Switch; using ngen::BinaryCodeGenerator<hw>::NoPreempt; \
1387 using ngen::BinaryCodeGenerator<hw>::anyv; using ngen::BinaryCodeGenerator<hw>::allv; using ngen::BinaryCodeGenerator<hw>::any2h; using ngen::BinaryCodeGenerator<hw>::all2h; \
1388 using ngen::BinaryCodeGenerator<hw>::any4h; using ngen::BinaryCodeGenerator<hw>::all4h; using ngen::BinaryCodeGenerator<hw>::any8h; using ngen::BinaryCodeGenerator<hw>::all8h; \
1389 using ngen::BinaryCodeGenerator<hw>::any16h; using ngen::BinaryCodeGenerator<hw>::all16h; using ngen::BinaryCodeGenerator<hw>::any32h; using ngen::BinaryCodeGenerator<hw>::all32h; \
1390 using ngen::BinaryCodeGenerator<hw>::x_repl; using ngen::BinaryCodeGenerator<hw>::y_repl; using ngen::BinaryCodeGenerator<hw>::z_repl; using ngen::BinaryCodeGenerator<hw>::w_repl; \
1391 using ngen::BinaryCodeGenerator<hw>::ze; using ngen::BinaryCodeGenerator<hw>::eq; using ngen::BinaryCodeGenerator<hw>::nz; using ngen::BinaryCodeGenerator<hw>::ne; \
1392 using ngen::BinaryCodeGenerator<hw>::gt; using ngen::BinaryCodeGenerator<hw>::ge; using ngen::BinaryCodeGenerator<hw>::lt; using ngen::BinaryCodeGenerator<hw>::le; \
1393 using ngen::BinaryCodeGenerator<hw>::ov; using ngen::BinaryCodeGenerator<hw>::un; using ngen::BinaryCodeGenerator<hw>::eo; \
1394 using ngen::BinaryCodeGenerator<hw>::M0; using ngen::BinaryCodeGenerator<hw>::M4; using ngen::BinaryCodeGenerator<hw>::M8; using ngen::BinaryCodeGenerator<hw>::M12; \
1395 using ngen::BinaryCodeGenerator<hw>::M16; using ngen::BinaryCodeGenerator<hw>::M20; using ngen::BinaryCodeGenerator<hw>::M24; using ngen::BinaryCodeGenerator<hw>::M28; \
1396 using ngen::BinaryCodeGenerator<hw>::sb0; using ngen::BinaryCodeGenerator<hw>::sb1; using ngen::BinaryCodeGenerator<hw>::sb2; using ngen::BinaryCodeGenerator<hw>::sb3; \
1397 using ngen::BinaryCodeGenerator<hw>::sb4; using ngen::BinaryCodeGenerator<hw>::sb5; using ngen::BinaryCodeGenerator<hw>::sb6; using ngen::BinaryCodeGenerator<hw>::sb7; \
1398 using ngen::BinaryCodeGenerator<hw>::sb8; using ngen::BinaryCodeGenerator<hw>::sb9; using ngen::BinaryCodeGenerator<hw>::sb10; using ngen::BinaryCodeGenerator<hw>::sb11; \
1399 using ngen::BinaryCodeGenerator<hw>::sb12; using ngen::BinaryCodeGenerator<hw>::sb13; using ngen::BinaryCodeGenerator<hw>::sb14; using ngen::BinaryCodeGenerator<hw>::sb15; \
1400 using ngen::BinaryCodeGenerator<hw>::A32; using ngen::BinaryCodeGenerator<hw>::A32NC; using ngen::BinaryCodeGenerator<hw>::A64; using ngen::BinaryCodeGenerator<hw>::A64NC; \
1401 using ngen::BinaryCodeGenerator<hw>::SLM; \
1402 template <typename... Targs> ngen::InstructionModifier ExecutionOffset(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::ExecutionOffset(std::forward<Targs>(args)...); } \
1403 template <typename... Targs> ngen::AddressBase Surface(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::Surface(std::forward<Targs>(args)...); } \
1404 template <typename... Targs> ngen::AddressBase CC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::CC(std::forward<Targs>(args)...); } \
1405 template <typename... Targs> ngen::AddressBase SC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::SC(std::forward<Targs>(args)...); }
1406 #define NGEN_FORWARD_REGISTERS_EXTRA1
1407 #define NGEN_FORWARD_REGISTERS_EXTRA2
1408 #define NGEN_FORWARD_REGISTERS_EXTRA3
1409 #define NGEN_FORWARD_REGISTERS NGEN_FORWARD_REGISTERS_BASE NGEN_FORWARD_REGISTERS_EXTRA1 NGEN_FORWARD_REGISTERS_EXTRA2 NGEN_FORWARD_REGISTERS_EXTRA3
1410 #endif
1411 
1412 template <HW hw>
unsupported()1413 inline void BinaryCodeGenerator<hw>::unsupported()
1414 {
1415 #ifdef NGEN_SAFE
1416     throw unsupported_instruction();
1417 #endif
1418 }
1419 
1420 template <HW hw>
popStream()1421 typename BinaryCodeGenerator<hw>::InstructionStream *BinaryCodeGenerator<hw>::popStream()
1422 {
1423 #ifdef NGEN_SAFE
1424     if (streamStack.size() <= 1) throw stream_stack_underflow();
1425 #endif
1426 
1427     InstructionStream *result = streamStack.back();
1428     streamStack.pop_back();
1429     return result;
1430 }
1431 
1432 template <HW hw>
encodeSyncInsertion(autoswsb::SyncInsertion & si)1433 static inline Instruction12 encodeSyncInsertion(autoswsb::SyncInsertion &si)
1434 {
1435     Instruction12 i;
1436 
1437     i.common.opcode = static_cast<int>(Opcode::sync);
1438     i.common.swsb = SWSBInfo12(si.swsb, Opcode::sync).raw();
1439     i.common.maskCtrl = true;
1440     i.binary.cmod = static_cast<int>(si.fc);
1441 
1442     if (si.mask) {
1443         i.binary.src0Type = getTypecode12(DataType::ud);
1444         i.binary.src0Imm = true;
1445         i.imm32.value = si.mask;
1446     }
1447     i.binary.dst = 1;
1448 
1449     return i;
1450 }
1451 
1452 template <HW hw>
getCode()1453 std::vector<uint8_t> BinaryCodeGenerator<hw>::getCode()
1454 {
1455 #ifdef NGEN_SAFE
1456     if (streamStack.size() > 1) throw unfinished_stream_exception();
1457 #endif
1458     rootStream.fixLabels(labelManager);
1459 
1460     Program program(rootStream);
1461     autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hw, program);
1462     std::vector<uint8_t> result;
1463 
1464     if (analysis.empty()) {
1465         result.resize(rootStream.length());
1466         std::memmove(result.data(), rootStream.code.data(), rootStream.length());
1467     } else {
1468         std::multimap<int32_t, autoswsb::SyncInsertion*> syncs;
1469 
1470         for (auto &bb : analysis)
1471             for (auto &sync : bb.syncs)
1472                 syncs.insert(std::make_pair(sync.inum, &sync));
1473 
1474         result.resize(rootStream.length() + syncs.size() * sizeof(Instruction12));
1475 
1476         auto *psrc = reinterpret_cast<const Instruction12 *>(rootStream.code.data());
1477         auto *pdst = reinterpret_cast<Instruction12 *>(result.data());
1478         auto nextSync = syncs.begin();
1479 
1480         for (uint32_t isrc = 0; isrc < program.size(); isrc++) {
1481             while ((nextSync != syncs.end()) && (nextSync->second->inum == isrc))
1482                 *pdst++ = encodeSyncInsertion<hw>(*(nextSync++)->second);
1483             *pdst++ = *psrc++;
1484         }
1485     }
1486 
1487     return result;
1488 }
1489 
1490 template <HW hw>
1491 template <bool forceWE, typename D, typename S0, HW hw_>
1492 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1493 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1494 {
1495     Instruction8 i{};
1496     InstructionModifier emod = mod | defaultModifier;
1497     if (forceWE)
1498         emod |= NoMask;
1499 
1500     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1501     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1502 
1503     encodeCommon8(i, op, emod);
1504     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1505 
1506     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1507     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1508 
1509     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1510     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1511 
1512     i.binary.dstType = getTypecode<hw>(dst.getType());
1513     i.binary.src0Type = getTypecode<hw>(src0.getType());
1514 
1515     i.binary.dstRegFile = getRegFile(dst);
1516     i.binary.src0RegFile = getRegFile(src0);
1517 
1518     db(i);
1519 }
1520 
1521 template <HW hw>
1522 template <bool forceWE, typename D, typename S0, HW hw_>
1523 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1524 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1525 {
1526     typename EncodingTag12Dispatch<hw>::tag tag;
1527     Instruction12 i{};
1528 
1529     InstructionModifier emod = mod | defaultModifier;
1530     if (forceWE)
1531         emod |= NoMask;
1532 
1533     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1534     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1535 
1536     encodeCommon12(i, op, emod, dst, tag);
1537 
1538     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
1539     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1540 
1541     i.binary.dstAddrMode = dst.isIndirect();
1542     i.binary.dstType  = getTypecode12(dst.getType());
1543     i.binary.src0Type = getTypecode12(src0.getType());
1544 
1545     i.binary.src0Mods = src0.getMods();
1546 
1547     i.binary.cmod = static_cast<int>(mod.getCMod());
1548 
1549     db(i);
1550 }
1551 
1552 template <HW hw>
1553 template <bool forceWE, typename D, HW hw_>
1554 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1555 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1556 {
1557     Instruction8 i{};
1558     InstructionModifier emod = mod | defaultModifier;
1559     if (forceWE)
1560         emod |= NoMask;
1561 
1562     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1563     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1564 
1565     encodeCommon8(i, op, emod);
1566     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1567 
1568     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1569 
1570     i.binary.dstType = getTypecode<hw>(dst.getType());
1571     i.binary.src0Type = getImmediateTypecode<hw>(src0.getType());
1572 
1573     i.binary.dstRegFile = getRegFile(dst);
1574     i.binary.src0RegFile = getRegFile(src0);
1575 
1576     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1577 
1578     if (getBytes(src0.getType()) == 8)
1579         i.imm64.value = static_cast<uint64_t>(src0);
1580     else
1581         i.imm32.value = static_cast<uint64_t>(src0);
1582 
1583     db(i);
1584 }
1585 
1586 template <HW hw>
1587 template <bool forceWE, typename D, HW hw_>
1588 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1589 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1590 {
1591     typename EncodingTag12Dispatch<hw>::tag tag;
1592     Instruction12 i{};
1593 
1594     InstructionModifier emod = mod | defaultModifier;
1595     if (forceWE)
1596         emod |= NoMask;
1597 
1598     dst.fixup(emod.getExecSize(), defaultType, true, 1);
1599     src0.fixup(emod.getExecSize(), defaultType, false, 1);
1600 
1601     encodeCommon12(i, op, emod, dst, tag);
1602 
1603     i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1604 
1605     i.binary.dstAddrMode = dst.isIndirect();
1606 
1607     i.binary.dstType  = getTypecode12(dst.getType());
1608     i.binary.src0Type = getTypecode12(src0.getType());
1609 
1610     i.binary.src0Imm = true;
1611 
1612     i.binary.cmod = static_cast<int>(mod.getCMod());
1613 
1614     auto val = static_cast<uint64_t>(src0);
1615     i.imm32.value = val;
1616     if (getBytes(src0.getType()) == 8) {
1617 #ifdef NGEN_SAFE
1618         if (mod.getCMod() != ConditionModifier::none) throw invalid_modifiers_exception();
1619 #endif
1620         i.imm64.high = val >> 32;
1621     }
1622 
1623     db(i);
1624 }
1625 
1626 template <HW hw>
1627 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1628 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1629 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1630 {
1631     Instruction8 i{};
1632 
1633     InstructionModifier emod = mod | defaultModifier;
1634     if (forceWE)
1635         emod |= NoMask;
1636 
1637     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1638     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1639     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1640 
1641     encodeCommon8(i, op, emod);
1642     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1643 
1644     i.binary.dst  = encodeBinaryOperand8<true>(dst).bits;
1645     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1646     i.binary.src1 = encodeBinaryOperand8<false>(src1).bits;
1647 
1648     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1649     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1650     if (src1.isIndirect()) i.binary.src1AddrImm9 = src1.getOffset() >> 9;
1651 
1652     i.binary.dstType  = getTypecode<hw>(dst.getType());
1653     i.binary.src0Type = getTypecode<hw>(src0.getType());
1654     i.binary.src1Type = getTypecode<hw>(src1.getType());
1655 
1656     i.binary.dstRegFile = getRegFile(dst);
1657     i.binary.src0RegFile = getRegFile(src0);
1658     i.binary.src1RegFile = RegFileGRF;
1659 
1660 #ifdef NGEN_SAFE
1661     if (src1.isARF() && op != Opcode::illegal) throw grf_expected_exception();
1662 #endif
1663 
1664     db(i);
1665 }
1666 
1667 template <HW hw>
1668 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1669 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1670 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1671 {
1672     typename EncodingTag12Dispatch<hw>::tag tag;
1673     Instruction12 i{};
1674 
1675     InstructionModifier emod = mod | defaultModifier;
1676     if (forceWE)
1677         emod |= NoMask;
1678 
1679     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1680     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1681     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1682 
1683     encodeCommon12(i, op, emod, dst, tag);
1684 
1685     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
1686     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1687     i.binary.src1 = encodeBinaryOperand12<false>(src1, tag).bits;
1688 
1689     i.binary.dstAddrMode = dst.isIndirect();
1690     i.binary.dstType  = getTypecode12(dst.getType());
1691     i.binary.src0Type = getTypecode12(src0.getType());
1692     i.binary.src1Type = getTypecode12(src1.getType());
1693 
1694     i.binary.src0Mods = src0.getMods();
1695     i.binary.src1Mods = src1.getMods();
1696 
1697     i.binary.cmod = static_cast<int>(mod.getCMod());
1698 
1699     db(i);
1700 }
1701 
1702 template <HW hw>
1703 template <bool forceWE, typename D, typename S0, HW hw_>
1704 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)1705 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
1706 {
1707     Instruction8 i{};
1708     InstructionModifier emod = mod | defaultModifier;
1709     if (forceWE)
1710         emod |= NoMask;
1711 
1712     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1713     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1714     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1715 
1716     encodeCommon8(i, op, emod);
1717     i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1718 
1719     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1720     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1721 
1722     if (dst.isIndirect())  i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1723     if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1724 
1725     i.binary.dstType = getTypecode<hw>(dst.getType());
1726     i.binary.src0Type = getTypecode<hw>(src0.getType());
1727     i.binary.src1Type = getImmediateTypecode<hw>(src1.getType());
1728 
1729     i.binary.dstRegFile = getRegFile(dst);
1730     i.binary.src0RegFile = getRegFile(src0);
1731     i.binary.src1RegFile = getRegFile(src1);
1732 
1733     i.imm32.value = static_cast<uint64_t>(src1);
1734 
1735     db(i);
1736 }
1737 
1738 template <HW hw>
1739 template <bool forceWE, typename D, typename S0, HW hw_>
1740 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)1741 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
1742 {
1743     typename EncodingTag12Dispatch<hw>::tag tag;
1744     Instruction12 i{};
1745 
1746     InstructionModifier emod = mod | defaultModifier;
1747     if (forceWE)
1748         emod |= NoMask;
1749 
1750     dst.fixup(emod.getExecSize(), defaultType, true, 2);
1751     src0.fixup(emod.getExecSize(), defaultType, false, 2);
1752     src1.fixup(emod.getExecSize(), defaultType, false, 2);
1753 
1754     encodeCommon12(i, op, emod, dst, tag);
1755 
1756     i.binary.dst  = encodeBinaryOperand12<true>(dst, tag).bits;
1757     i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1758     i.binary.src1 = static_cast<uint64_t>(src1);
1759 
1760     i.binary.dstAddrMode = dst.isIndirect();
1761     i.binary.dstType  = getTypecode12(dst.getType());
1762     i.binary.src0Type = getTypecode12(src0.getType());
1763     i.binary.src1Type = getTypecode12(src1.getType());
1764 
1765     i.binary.src0Mods = src0.getMods();
1766 
1767     i.binary.cmod = static_cast<int>(mod.getCMod());
1768 
1769     i.binary.src1Imm = true;
1770     i.imm32.value = static_cast<uint64_t>(src1);
1771 
1772     db(i);
1773 }
1774 
1775 template <HW hw>
1776 template <HW hw_>
1777 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,RegData dst,RegData src0,RegData src1,RegData src2)1778 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2)
1779 {
1780     opX(op, defaultType, mod, emulateAlign16Dst(dst),  emulateAlign16Src(src0),
1781                               emulateAlign16Src(src1), emulateAlign16Src(src2));
1782 }
1783 
1784 
1785 template <HW hw>
1786 template <HW hw_>
1787 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,Align16Operand dst,Align16Operand src0,Align16Operand src1,Align16Operand src2)1788 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2)
1789 {
1790 #ifdef NGEN_SAFE
1791     if (dst.getReg().isARF())  throw grf_expected_exception();
1792     if (src0.getReg().isARF()) throw grf_expected_exception();
1793     if (src1.getReg().isARF()) throw grf_expected_exception();
1794     if (src2.getReg().isARF()) throw grf_expected_exception();
1795 #endif
1796 
1797     Instruction8 i{};
1798     InstructionModifier emod = mod | defaultModifier | Align16;
1799 
1800     dst.getReg().fixup(emod.getExecSize(), defaultType, true, 3);
1801     src0.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1802     src1.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1803     src2.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1804 
1805     encodeCommon8(i, op, emod);
1806 
1807     i.ternary16.dstChanEn = dst.getChanEn();
1808     i.ternary16.dstRegNum = dst.getReg().getBase();
1809     i.ternary16.dstSubregNum2_4 = dst.getReg().getByteOffset() >> 2;
1810     i.ternary16.dstType = getTernary16Typecode8(dst.getReg().getType());
1811 
1812     i.ternary16.srcType = getTernary16Typecode8(src0.getReg().getType());
1813 
1814     bool isFOrHF = (src0.getReg().getType() == DataType::f
1815                  || src0.getReg().getType() == DataType::hf);
1816 
1817     i.ternary16.src1Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
1818     i.ternary16.src2Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
1819 
1820     encodeTernaryCommon8(i, src0, src1, src2);
1821 
1822     db(i);
1823 }
1824 
1825 template <HW hw>
1826 template <typename D, typename S0, typename S1, typename S2, HW hw_>
1827 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)1828 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
1829 {
1830     if (hw < HW::Gen10)
1831         unsupported();
1832 
1833 #ifdef NGEN_SAFE
1834     if (src0.isARF()) throw grf_expected_exception();
1835     if (src2.isARF()) throw grf_expected_exception();
1836 #endif
1837 
1838     Instruction8 i{};
1839     InstructionModifier emod = mod | defaultModifier;
1840 
1841     dst.fixup(emod.getExecSize(), defaultType, true, 3);
1842     src0.fixup(emod.getExecSize(), defaultType, false, 3);
1843     src1.fixup(emod.getExecSize(), defaultType, false, 3);
1844     src2.fixup(emod.getExecSize(), defaultType, false, 3);
1845 
1846     encodeCommon8(i, op, emod);
1847 
1848     i.ternary1.src0RegFile = std::is_base_of<Immediate, S0>::value;
1849     i.ternary1.src1RegFile = src1.isARF();
1850     i.ternary1.src2RegFile = std::is_base_of<Immediate, S2>::value;
1851 
1852     encodeTernaryCommon8(i, src0, src1, src2);
1853     encodeTernary1Dst10(i, dst);
1854 
1855     db(i);
1856 }
1857 
1858 template <HW hw>
1859 template <typename D, typename S0,typename S1, typename S2, HW hw_>
1860 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)1861 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
1862 {
1863     typename EncodingTag12Dispatch<hw>::tag tag;
1864     Instruction12 i{};
1865     InstructionModifier emod = mod | defaultModifier;
1866 
1867     dst.fixup(emod.getExecSize(), defaultType, true, 3);
1868     src0.fixup(emod.getExecSize(), defaultType, false, 3);
1869     src1.fixup(emod.getExecSize(), defaultType, false, 3);
1870     src2.fixup(emod.getExecSize(), defaultType, false, 3);
1871 
1872     encodeCommon12(i, op, emod, dst, tag);
1873 
1874     i.ternary.dst  = encodeTernaryOperand12<true>(dst, tag).bits;
1875     encodeTernarySrc0(i, src0, tag);
1876     encodeTernarySrc1(i, src1, tag);
1877     encodeTernarySrc2(i, src2, tag);
1878     encodeTernaryTypes(i, dst, src0, src1, src2);
1879 
1880     i.ternary.cmod = static_cast<int>(mod.getCMod());
1881 
1882     db(i);
1883 }
1884 
1885 template <HW hw>
1886 template <typename DS0>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0)1887 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0)
1888 {
1889     InstructionModifier mmod = mod;
1890 
1891     mmod.setCMod(static_cast<ConditionModifier>(fc));
1892     opX(op, defaultType, mmod, dst, src0);
1893 }
1894 
1895 template <HW hw>
1896 template <typename DS0, typename S1>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0,S1 src1)1897 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1)
1898 {
1899     InstructionModifier mmod = mod;
1900 
1901     mmod.setCMod(static_cast<ConditionModifier>(fc));
1902     opX(op, defaultType, mmod, dst, src0, src1);
1903 }
1904 
1905 template <HW hw>
1906 template <typename D, HW hw_>
1907 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)1908 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
1909 {
1910     exdesc |= uint32_t(static_cast<uint8_t>(sfid));
1911     opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
1912 }
1913 
1914 template <HW hw>
1915 template <typename D, HW hw_>
1916 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,D desc)1917 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc)
1918 {
1919     opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
1920 }
1921 
1922 template <HW hw>
1923 template <typename ED, typename D, HW hw_>
1924 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)1925 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
1926 {
1927     typename EncodingTag12Dispatch<hw>::tag tag;
1928     Instruction12 i{};
1929     InstructionModifier emod = mod | defaultModifier;
1930 
1931     encodeCommon12(i, op, emod, dst, tag);
1932 
1933     i.send.fusionCtrl = emod.isSerialized();
1934 
1935     i.send.dstReg = dst.getBase();
1936     i.send.src0Reg = src0.getBase();
1937     i.send.src1Reg = src1.getBase();
1938 
1939     i.send.dstRegFile = getRegFile(dst);
1940     i.send.src0RegFile = getRegFile(src0);
1941     i.send.src1RegFile = getRegFile(src1);
1942 
1943     i.send.sfid = static_cast<int>(sfid) & 0xF;
1944 
1945     encodeSendDesc(i, desc);
1946     encodeSendExDesc(i, exdesc);
1947 
1948     db(i);
1949 }
1950 
1951 template <HW hw>
1952 template <HW hw_>
1953 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1954 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc)
1955 {
1956     Instruction8 i{};
1957     InstructionModifier emod = mod | defaultModifier;
1958 
1959     encodeCommon8(i, op, emod);
1960 
1961     i.binary.dst  = encodeBinaryOperand8<true>(dst).bits;
1962     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1963 
1964     i.sendsGen9.dstRegFile = getRegFile(dst);
1965     i.binary.src0RegFile = getRegFile(src0);
1966     i.binary.src1RegFile = RegFileIMM;
1967 
1968     i.binary.dstType = getTypecode<hw>(dst.getType());
1969 
1970     i.sendsGen9.sfid = exdesc & 0xF;
1971     i.sendGen8.zero = 0;
1972     i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
1973     i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
1974     i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
1975     i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
1976     i.sendsGen9.desc = desc;
1977 
1978     i.sendsGen9.eot = (exdesc >> 5) & 1;
1979     if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
1980 
1981     db(i);
1982 }
1983 
1984 template <HW hw>
1985 template <HW hw_>
1986 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1987 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc)
1988 {
1989 #ifdef NGEN_SAFE
1990     // Only a0.0:ud is allowed for desc.
1991     if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
1992         throw invalid_arf_exception();
1993 #endif
1994     Instruction8 i{};
1995     InstructionModifier emod = mod | defaultModifier;
1996 
1997     encodeCommon8(i, op, emod);
1998 
1999     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2000     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2001     i.binary.src1 = encodeBinaryOperand8<false>(desc).bits;
2002 
2003     i.sendsGen9.dstRegFile = getRegFile(dst);
2004     i.binary.src0RegFile = getRegFile(src0);
2005     i.binary.src1RegFile = getRegFile(desc);
2006     i.binary.src1Type = getTypecode<hw>(desc.getType());
2007 
2008     i.sendsGen9.sfid = exdesc & 0xF;
2009     i.sendGen8.zero = 0;
2010     i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2011     i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2012     i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2013     i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2014 
2015     i.sendsGen9.eot = (exdesc >> 5) & 1;
2016     if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2017 
2018     db(i);
2019 }
2020 
2021 template <HW hw>
2022 template <typename D, HW hw_>
2023 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,D desc)2024 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc)
2025 {
2026     opSends(op, mod, dst, src0, null, exdesc, desc);
2027 }
2028 
2029 template <HW hw>
2030 template <typename ED, typename D, HW hw_>
2031 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)2032 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2033 {
2034     Instruction8 i{};
2035     InstructionModifier emod = mod | defaultModifier;
2036 
2037     encodeCommon8(i, op, emod);
2038 
2039     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2040     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2041 
2042     i.binary.src0RegFile = 0;                   // ?
2043     i.sendsGen9.dstRegFile = getRegFile(dst);
2044     i.sendsGen9.src1RegFile = getRegFile(src1);
2045     i.sendsGen9.src1RegNum = src1.getBase();
2046 
2047     if (dst.isIndirect())  i.sendsGen9.dstAddrImm9  =  dst.getOffset() >> 9;
2048     if (src0.isIndirect()) i.sendsGen9.src0AddrImm9 = src0.getOffset() >> 9;
2049 
2050     encodeSendsDesc(i, desc);
2051     encodeSendsExDesc(i, exdesc);
2052 
2053     db(i);
2054 }
2055 
2056 template <HW hw>
2057 template <typename D, HW hw_>
2058 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,RegData exdesc,D desc)2059 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc)
2060 {
2061 #ifdef NGEN_SAFE
2062     throw sfid_needed_exception();
2063 #endif
2064 }
2065 
2066 template <HW hw>
2067 template <typename D, HW hw_>
2068 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)2069 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2070 {
2071     Opcode mop = static_cast<Opcode>(static_cast<int>(op) & ~2);
2072     opSend(mop, mod, static_cast<SharedFunction>(exdesc & 0x1F), dst, src0, src1, exdesc, desc);
2073 }
2074 
2075 template <HW hw>
2076 template <HW hw_>
2077 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2078 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2079 {
2080     Instruction8 i{};
2081     InstructionModifier emod = mod | defaultModifier;
2082 
2083     encodeCommon8(i, op, emod);
2084 
2085     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2086     i.binary.dstRegFile = getRegFile(dst);
2087     i.binary.dstType = getTypecode<hw>(dst.getType());
2088     i.binary.src0RegFile = getRegFile(Immediate());
2089     i.binary.src0Type = getTypecode<hw>(DataType::d);
2090     i.branches.jip = jip;
2091     i.branches.uip = uip;
2092 
2093     db(i);
2094 }
2095 
2096 template <HW hw>
2097 template <HW hw_>
2098 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2099 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2100 {
2101     typename EncodingTag12Dispatch<hw>::tag tag;
2102     Instruction12 i{};
2103     InstructionModifier emod = mod | defaultModifier;
2104 
2105     encodeCommon12(i, op, emod, dst, tag);
2106 
2107     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2108 
2109     i.binary.src0Imm = true;
2110     i.binary.src1Imm = true;
2111 
2112     i.branches.jip = jip;
2113     i.branches.uip = uip;
2114 
2115     db(i);
2116 }
2117 
2118 template <HW hw>
2119 template <bool forceWE, HW hw_>
2120 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2121 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2122 {
2123     Instruction8 i{};
2124     InstructionModifier emod = mod | defaultModifier;
2125     if (forceWE)
2126         emod |= NoMask;
2127 
2128     encodeCommon8(i, op, emod);
2129 
2130     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2131     i.binary.dstRegFile = getRegFile(dst);
2132     i.binary.dstType = getTypecode<hw>(dst.getType());
2133     i.binary.src1RegFile = RegFileIMM;
2134     i.binary.src1Type = getTypecode<hw>(DataType::d);
2135     i.branches.jip = jip;
2136 
2137     db(i);
2138 }
2139 
2140 template <HW hw>
2141 template <bool forceWE, HW hw_>
2142 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2143 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2144 {
2145     typename EncodingTag12Dispatch<hw>::tag tag;
2146     Instruction12 i{};
2147     InstructionModifier emod = mod | defaultModifier;
2148     if (forceWE)
2149         emod |= NoMask;
2150 
2151     encodeCommon12(i, op, emod, dst, tag);
2152 
2153     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2154     i.binary.src0Imm = true;
2155     i.branches.jip = jip;
2156 
2157     db(i);
2158 }
2159 
2160 template <HW hw>
2161 template <bool forceWE, bool small12, HW hw_>
2162 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2163 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2164 {
2165     Instruction8 i{};
2166     InstructionModifier emod = mod | defaultModifier;
2167     if (forceWE)
2168         emod |= NoMask;
2169 
2170     encodeCommon8(i, op, emod);
2171 
2172     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2173     i.binary.dstRegFile = getRegFile(dst);
2174     i.binary.dstType = getTypecode<hw>(DataType::d);
2175     i.binary.src0RegFile = getRegFile(src0);
2176     i.binary.src0Type = getTypecode<hw>(DataType::d);
2177     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2178 
2179     db(i);
2180 }
2181 
2182 template <HW hw>
2183 template <bool forceWE, bool small12, HW hw_>
2184 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2185 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2186 {
2187     typename EncodingTag12Dispatch<hw>::tag tag;
2188     Instruction12 i{};
2189     InstructionModifier emod = mod | defaultModifier;
2190     if (forceWE)
2191         emod |= NoMask;
2192 
2193     encodeCommon12(i, op, emod, dst, tag);
2194 
2195     i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2196     i.binary.src0 = encodeBinaryOperand12<false, false>(src0, tag).bits;
2197     if (small12)
2198         i.binary.src0 &= 0xFFFF;
2199 
2200     db(i);
2201 }
2202 
2203 template <HW hw>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip,Label & uip)2204 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip)
2205 {
2206     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2207     addFixup(LabelFixup(uip.getID(labelManager), LabelFixup::UIPOffset));
2208     opBranch(op, mod, dst, 0, 0);
2209 }
2210 
2211 template <HW hw>
2212 template <bool forceWE>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2213 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2214 {
2215     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2216     opBranch<forceWE>(op, mod, dst, 0);
2217 }
2218 
2219 template <HW hw>
opCall(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2220 void BinaryCodeGenerator<hw>::opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2221 {
2222     addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2223     if (isXe)
2224         opBranch<true>(op, mod, dst, 0);
2225     else
2226         opX<true>(op, DataType::d, mod, dst, null.ud(0)(0, 1, 0), Immediate::d(0));
2227 }
2228 
2229 template <HW hw>
2230 template <HW hw_>
2231 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2232 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2233 {
2234     Instruction8 i{};
2235     InstructionModifier emod = mod | defaultModifier | NoMask;
2236 
2237     encodeCommon8(i, op, emod);
2238 
2239     src0.fixup(emod.getExecSize(), DataType::d, false, 2);
2240 
2241     i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2242     i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2243     i.binary.src0RegFile = getRegFile(src0);
2244     i.binary.src1RegFile = RegFileIMM;
2245     i.binary.src1Type = getTypecode<hw>(DataType::d);
2246 
2247     i.branches.jip = jip;
2248 
2249     db(i);
2250 }
2251 
2252 template <HW hw>
2253 template <HW hw_>
2254 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2255 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2256 {
2257     opBranch<true>(op, mod, dst, jip);
2258 }
2259 
2260 template <HW hw>
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,Label & jip)2261 void BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip)
2262 {
2263     if (hw >= HW::Xe_LP)
2264         addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2265     opJmpi(op, mod, dst, src0, 0);
2266     if (hw < HW::Xe_LP)
2267         addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffsetJMPI));
2268 }
2269 
2270 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod)2271 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod)
2272 {
2273     if (hw < HW::Xe_LP)
2274         unsupported();
2275 
2276     typename EncodingTag12Dispatch<hw>::tag tag;
2277     Instruction12 i{};
2278     InstructionModifier emod = mod | defaultModifier;
2279 
2280     encodeCommon12(i, op, emod, null, tag);
2281 
2282     i.binary.dst = 0x1;
2283     i.binary.cmod = static_cast<int>(fc);
2284 
2285     db(i);
2286 }
2287 
2288 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,RegData src0)2289 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0)
2290 {
2291     typename EncodingTag12Dispatch<hw>::tag tag;
2292     if (hw < HW::Xe_LP)
2293         unsupported();
2294 
2295     Instruction12 i{};
2296     InstructionModifier emod = mod | defaultModifier;
2297 
2298     encodeCommon12(i, op, emod, null, tag);
2299 
2300     i.binary.dst = 0x1;
2301     if (!src0.isNull()) {
2302         src0.setRegion(0, 1, 0);
2303         i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2304         i.binary.src0Type = getTypecode12(src0.getType());
2305     }
2306     i.binary.cmod = static_cast<int>(fc);
2307 
2308     db(i);
2309 }
2310 
2311 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,const Immediate & src0)2312 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0)
2313 {
2314     if (hw < HW::Xe_LP)
2315         unsupported();
2316 
2317     typename EncodingTag12Dispatch<hw>::tag tag;
2318     Instruction12 i{};
2319     InstructionModifier emod = mod | defaultModifier;
2320 
2321     encodeCommon12(i, op, emod, null, tag);
2322 
2323     i.binary.dst = 0x1;
2324     i.binary.src0Type = getTypecode12(src0.getType());
2325     i.binary.src0Imm = true;
2326     i.binary.cmod = static_cast<int>(fc);
2327 
2328     i.imm32.value = static_cast<uint64_t>(src0);
2329 
2330     db(i);
2331 }
2332 
2333 template <HW hw>
opNop(Opcode op)2334 void BinaryCodeGenerator<hw>::opNop(Opcode op)
2335 {
2336     Instruction8 i{};
2337 
2338     i.qword[0] = static_cast<int>(op);
2339     i.qword[1] = 0;
2340 
2341     db(i);
2342 }
2343 
2344 } /* namespace ngen */
2345 
2346 #endif /* header guard */
2347