1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 // nGEN: a C++ library for runtime Gen assembly generation.
18 //
19 // Macros that control nGEN's interface:
20 // NGEN_SAFE if defined, enables run-time safety checks. Exceptions will be thrown if checks fail.
21 // NGEN_SHORT_NAMES if defined, enables some short names (r[...] for indirect addressing, W for NoMask)
22 // NGEN_GLOBAL_REGS if defined, register names and instruction modifiers (r7, cr0, Switch, etc.) are
23 // global variables in the ngen namespace. Otherwise, they are members of the code
24 // generator classes
25 // NGEN_CPP11 if defined, ngen is C++11-compatible (C++17 not required)
26
27 #ifndef NGEN_HPP
28 #define NGEN_HPP
29
30 #include <array>
31 #include <cstring>
32 #include <type_traits>
33 #include <vector>
34
35 #include "ngen_core.hpp"
36 #include "ngen_auto_swsb.hpp"
37
38 namespace ngen {
39
40 // Forward declarations.
41 template <HW hw> class BinaryCodeGenerator;
42 template <HW hw> class ELFCodeGenerator;
43
44 // MSVC v140 workaround for enum comparison in template arguments.
hwLT(HW hw1,HW hw2)45 static constexpr bool hwLT(HW hw1, HW hw2) { return hw1 < hw2; }
hwLE(HW hw1,HW hw2)46 static constexpr bool hwLE(HW hw1, HW hw2) { return hw1 <= hw2; }
hwGE(HW hw1,HW hw2)47 static constexpr bool hwGE(HW hw1, HW hw2) { return hw1 >= hw2; }
hwGT(HW hw1,HW hw2)48 static constexpr bool hwGT(HW hw1, HW hw2) { return hw1 > hw2; }
49
50 // -----------------------------------------------------------------------
51
52 enum RegFiles : unsigned {
53 RegFileARF = 0,
54 RegFileGRF = 1,
55 RegFileIMM = 3,
56 };
57
getRegFile(const RegData & rd)58 inline unsigned getRegFile(const RegData &rd) { return rd.isARF() ? RegFileARF : RegFileGRF; }
getRegFile(const Align16Operand & o)59 inline unsigned getRegFile(const Align16Operand &o) { return getRegFile(o.getReg()); }
getRegFile(const ExtendedReg & reg)60 inline unsigned getRegFile(const ExtendedReg ®) { return getRegFile(reg.getBase()); }
getRegFile(const Immediate & imm)61 inline unsigned getRegFile(const Immediate &imm) { return RegFileIMM; }
62
63 // -----------------------------------------------------------------------
64 // Binary formats, split between pre-Xe and post-Xe.
65
66 #include "ngen_gen8.hpp"
67 #include "ngen_xe.hpp"
68
69 // -----------------------------------------------------------------------
70
71
72 class LabelFixup {
73 public:
74 uint32_t labelID;
75 int32_t anchor;
76 int32_t offset;
77
LabelFixup(uint32_t labelID_,int32_t offset_)78 LabelFixup(uint32_t labelID_, int32_t offset_) : labelID(labelID_), anchor(0), offset(offset_) {}
79
80 static constexpr auto JIPOffset = 12;
81 static constexpr auto JIPOffsetJMPI = -4;
82 static constexpr auto UIPOffset = 8;
83 };
84
85 #if defined(NGEN_GLOBAL_REGS) && !defined(NGEN_GLOBAL_REGS_DEFINED)
86 #define NGEN_GLOBAL_REGS_DEFINED
87 #include "ngen_registers.hpp"
88 #endif
89
90 template <HW hw>
91 class BinaryCodeGenerator
92 {
93 friend class ELFCodeGenerator<hw>;
94
95 protected:
96 class InstructionStream {
97 friend class BinaryCodeGenerator;
98
99 std::vector<LabelFixup> fixups;
100 std::vector<uint32_t> labels;
101 std::vector<uint64_t> code;
102 bool appended = false;
103
length() const104 int length() const { return int(code.size() * sizeof(uint64_t)); }
105
db(const Instruction8 & i)106 void db(const Instruction8 &i) {
107 code.push_back(i.qword[0]);
108 code.push_back(i.qword[1]);
109 }
110
db(const Instruction12 & i)111 void db(const Instruction12 &i) {
112 code.push_back(i.qword[0]);
113 code.push_back(i.qword[1]);
114 }
115
addFixup(LabelFixup fixup)116 void addFixup(LabelFixup fixup) {
117 fixup.anchor = length();
118 fixups.push_back(fixup);
119 }
120
mark(Label & label,LabelManager & man)121 void mark(Label &label, LabelManager &man) {
122 uint32_t id = label.getID(man);
123
124 man.setTarget(id, length());
125 labels.push_back(id);
126 }
127
fixLabels(LabelManager & man)128 void fixLabels(LabelManager &man) {
129 for (const auto &fixup : fixups) {
130 int32_t target = man.getTarget(fixup.labelID);
131 uint8_t *field = ((uint8_t *) code.data()) + fixup.anchor + fixup.offset;
132 *((int32_t *) field) = target - fixup.anchor;
133 }
134 }
135
append(InstructionStream & other,LabelManager & man)136 void append(InstructionStream &other, LabelManager &man) {
137 auto offset = length();
138 auto sz = code.size();
139
140 code.resize(sz + other.code.size());
141 std::copy(other.code.begin(), other.code.end(), code.begin() + sz);
142
143 sz = labels.size();
144 labels.resize(sz + other.labels.size());
145 std::copy(other.labels.begin(), other.labels.end(), labels.begin() + sz);
146
147 for (LabelFixup fixup : other.fixups) {
148 fixup.anchor += offset;
149 fixups.push_back(fixup);
150 }
151
152 #ifdef NGEN_SAFE
153 if (other.appended && !other.labels.empty())
154 throw multiple_label_exception();
155 #endif
156
157 for (uint32_t id : other.labels)
158 man.offsetTarget(id, offset);
159
160 other.appended = true;
161 }
162
InstructionStream()163 InstructionStream() {}
164 };
165
166 class Program {
167 friend class BinaryCodeGenerator;
168 using Instruction = Instruction12;
169 std::vector<uint64_t> &code;
170
Program(InstructionStream & stream)171 Program(InstructionStream &stream) : code(stream.code) {};
172
173 public:
size() const174 size_t size() const { return code.size() >> 1; }
operator [](size_t index)175 Instruction &operator[](size_t index) { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
operator [](size_t index) const176 const Instruction &operator[](size_t index) const { return *reinterpret_cast<Instruction *>(&code[index * 2]); }
177 };
178
179 static constexpr HW hardware = hw;
180 static constexpr bool isXe = (hw >= HW::Xe_LP);
181
182 Label _labelLocalIDsLoaded;
183 Label _labelArgsLoaded;
184
185 private:
186 InstructionModifier defaultModifier;
187
188 LabelManager labelManager;
189 InstructionStream rootStream;
190 std::vector<InstructionStream*> streamStack;
191
db(const Instruction8 & i)192 void db(const Instruction8 &i) { streamStack.back()->db(i); }
db(const Instruction12 & i)193 void db(const Instruction12 &i) { streamStack.back()->db(i); }
addFixup(LabelFixup fixup)194 void addFixup(LabelFixup fixup) { streamStack.back()->addFixup(fixup); }
195
196 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
197 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
198 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
199 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0);
200 template <bool forceWE = false, typename D, HW hw_ = hw>
201 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
202 template <bool forceWE = false, typename D, HW hw_ = hw>
203 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0);
204
205 template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
206 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
207 template <bool forceWE = false, typename D, typename S0, typename S1, HW hw_ = hw>
208 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1);
209 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
210 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
211 template <bool forceWE = false, typename D, typename S0, HW hw_ = hw>
212 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1);
213
214 template <HW hw_ = hw>
215 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2);
216 template <HW hw_ = hw>
217 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2);
218 template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
219 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
220 template <typename D, typename S0, typename S1, typename S2, HW hw_ = hw>
221 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2);
222
223 template <typename DS0>
224 void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0);
225 template <typename DS0, typename S1>
226 void opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1);
227
228 template <typename D, HW hw_ = hw>
229 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
230 template <typename D, HW hw_ = hw>
231 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc);
232 template <typename ED, typename D, HW hw_ = hw>
233 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
234
235 template <HW hw_ = hw>
236 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc);
237 template <HW hw_ = hw>
238 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc);
239 template <typename D, HW hw_ = hw>
240 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc);
241
242 template <typename ED, typename D, HW hw_ = hw>
243 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc);
244 template <typename D, HW hw_ = hw>
245 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc);
246 template <typename D, HW hw_ = hw>
247 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc);
248
249 template <HW hw_ = hw>
250 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
251 template <HW hw_ = hw>
252 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip);
253 template <bool forceWE = false, HW hw_ = hw>
254 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
255 template <bool forceWE = false, HW hw_ = hw>
256 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip);
257 template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
258 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
259 template <bool forceWE = false, bool small12 = true, HW hw_ = hw>
260 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0);
261
262 void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip);
263 template <bool forceWE = false>
264 void opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
265 void opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip);
266
267 template <HW hw_ = hw>
268 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
269 template <HW hw_ = hw>
270 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip);
271 void opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip);
272
273 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod);
274 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0);
275 void opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0);
276
277 void opNop(Opcode op);
278
279 inline void unsupported();
280
281 #include "ngen_compiler_fix.hpp"
282
283 public:
BinaryCodeGenerator()284 BinaryCodeGenerator() : defaultModifier{}, labelManager{}, sync{this}, load{this}, store{this}, atomic{this} {
285 _workaround_();
286 pushStream(rootStream);
287 }
288
~BinaryCodeGenerator()289 ~BinaryCodeGenerator() {
290 for (size_t sn = 1; sn < streamStack.size(); sn++)
291 delete streamStack[sn];
292 }
293
294 std::vector<uint8_t> getCode();
295
296 protected:
297 // Configuration.
setDefaultNoMask(bool def=true)298 void setDefaultNoMask(bool def = true) { defaultModifier.setWrEn(def); }
setDefaultAutoSWSB(bool def=true)299 void setDefaultAutoSWSB(bool def = true) { defaultModifier.setAutoSWSB(def); }
getDefaultNoMask() const300 bool getDefaultNoMask() const { return defaultModifier.isWrEn(); }
getDefaultAutoSWSB() const301 bool getDefaultAutoSWSB() const { return defaultModifier.isAutoSWSB(); }
302
303 // Stream handling.
pushStream()304 void pushStream() { pushStream(new InstructionStream()); }
pushStream(InstructionStream * s)305 void pushStream(InstructionStream *s) { streamStack.push_back(s); }
pushStream(InstructionStream & s)306 void pushStream(InstructionStream &s) { pushStream(&s); }
307
308 InstructionStream *popStream();
309
appendStream(InstructionStream * s)310 void appendStream(InstructionStream *s) { appendStream(*s); }
appendStream(InstructionStream & s)311 void appendStream(InstructionStream &s) { streamStack.back()->append(s, labelManager); }
appendCurrentStream()312 void appendCurrentStream() { InstructionStream *s = popStream(); appendStream(s); delete s; }
313
discardStream()314 void discardStream() { delete popStream(); }
315
316 template <typename String>
comment(String)317 void comment(String) {}
318
319 // Registers.
320 #ifndef NGEN_GLOBAL_REGS
321 #include "ngen_registers.hpp"
322 #endif
323
324 // Labels.
mark(Label & label)325 inline void mark(Label &label) { streamStack.back()->mark(label, labelManager); }
326
327 // Instructions.
328 template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)329 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
330 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
331 }
332 template <typename DT = void>
add(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)333 void add(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
334 opX(Opcode::add, getDataType<DT>(), mod, dst, src0, src1);
335 }
336 template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)337 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
338 opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
339 }
340 template <typename DT = void>
addc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)341 void addc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
342 opX(Opcode::addc, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
343 }
344 template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)345 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
346 opX(isXe ? Opcode::and_xe : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
347 }
348 template <typename DT = void>
and_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)349 void and_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
350 opX(isXe ? Opcode::and_xe : Opcode::and_, getDataType<DT>(), mod, dst, src0, src1);
351 }
352 #ifndef NGEN_NO_OP_NAMES
353 template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)354 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
355 and_<DT>(mod, dst, src0, src1);
356 }
357 template <typename DT = void>
and(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)358 void and(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
359 and_<DT>(mod, dst, src0, src1);
360 }
361 #endif
362 template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)363 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
364 opX(isXe ? Opcode::asr_xe : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
365 }
366 template <typename DT = void>
asr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)367 void asr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
368 opX(isXe ? Opcode::asr_xe : Opcode::asr, getDataType<DT>(), mod, dst, src0, src1);
369 }
370 template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)371 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
372 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
373 }
374 template <typename DT = void>
avg(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)375 void avg(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
376 opX(Opcode::avg, getDataType<DT>(), mod, dst, src0, src1);
377 }
378 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)379 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
380 opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
381 }
382 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)383 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
384 opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
385 }
386 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)387 void bfe(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
388 opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
389 }
390 template <typename DT = void>
bfe(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)391 void bfe(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
392 opX(isXe ? Opcode::bfe_xe : Opcode::bfe, getDataType<DT>(), mod, dst, src0, src1, src2);
393 }
394 template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)395 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
396 opX(isXe ? Opcode::bfi1_xe : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
397 }
398 template <typename DT = void>
bfi1(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)399 void bfi1(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
400 opX(isXe ? Opcode::bfi1_xe : Opcode::bfi1, getDataType<DT>(), mod, dst, src0, src1);
401 }
402 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)403 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
404 opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
405 }
406 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)407 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
408 opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
409 }
410 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)411 void bfi2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
412 opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
413 }
414 template <typename DT = void>
bfi2(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)415 void bfi2(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
416 opX(isXe ? Opcode::bfi2_xe : Opcode::bfi2, getDataType<DT>(), mod, dst, src0, src1, src2);
417 }
418 template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const RegData & src0)419 void bfrev(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
420 opX(isXe ? Opcode::bfrev_xe : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
421 }
422 template <typename DT = void>
bfrev(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)423 void bfrev(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
424 opX(isXe ? Opcode::bfrev_xe : Opcode::bfrev, getDataType<DT>(), mod, dst, src0);
425 }
brc(const InstructionModifier & mod,Label & jip,Label & uip)426 void brc(const InstructionModifier &mod, Label &jip, Label &uip) {
427 opBranch(Opcode::brc, mod, isXe ? null.ud() : ip.d(), jip, uip);
428 }
brc(const InstructionModifier & mod,RegData src0)429 void brc(const InstructionModifier &mod, RegData src0) {
430 src0.setRegion(2, 2, 1);
431 opBranch<true, true>(Opcode::brc, mod, isXe ? null.ud() : ip.d(), src0);
432 }
brd(const InstructionModifier & mod,Label & jip)433 void brd(const InstructionModifier &mod, Label &jip) {
434 opBranch(Opcode::brd, mod, isXe ? null.ud() : ip.d(), jip);
435 }
brd(const InstructionModifier & mod,RegData src0)436 void brd(const InstructionModifier &mod, RegData src0) {
437 src0.setRegion(2, 2, 1);
438 opBranch<true, true>(Opcode::brd, mod, isXe ? null.ud() : ip.d(), src0);
439 }
break_(const InstructionModifier & mod,Label & jip,Label & uip)440 void break_(const InstructionModifier &mod, Label &jip, Label &uip) {
441 opBranch(Opcode::break_, mod, null, jip, uip);
442 }
call(const InstructionModifier & mod,const RegData & dst,Label & jip)443 void call(const InstructionModifier &mod, const RegData &dst, Label &jip) {
444 opCall(Opcode::call, mod, dst, jip);
445 }
call(const InstructionModifier & mod,const RegData & dst,RegData jip)446 void call(const InstructionModifier &mod, const RegData &dst, RegData jip) {
447 if (isXe)
448 opBranch<true, true>(Opcode::call, mod, dst, jip);
449 else {
450 jip.setRegion(0, 1, 0);
451 opX<true>(Opcode::call, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
452 }
453 }
calla(const InstructionModifier & mod,const RegData & dst,int32_t jip)454 void calla(const InstructionModifier &mod, const RegData &dst, int32_t jip) {
455 if (isXe)
456 opBranch<true>(Opcode::calla, mod, dst, jip);
457 else
458 opX<true>(Opcode::calla, DataType::d, mod, dst, (hw <= HW::Gen9) ? null.ud(0)(2,2,1) : null.ud(0)(0,1,0), Immediate::d(jip));
459 }
calla(const InstructionModifier & mod,const RegData & dst,RegData jip)460 void calla(const InstructionModifier &mod, const RegData &dst, RegData jip) {
461 if (isXe)
462 opBranch<true, true>(Opcode::calla, mod, dst, jip);
463 else {
464 jip.setRegion(0, 1, 0);
465 opX<true>(Opcode::calla, DataType::d, mod, dst, null.ud(0)(0, 1, 0), jip);
466 }
467 }
468 template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const RegData & src0)469 void cbit(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
470 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
471 }
472 template <typename DT = void>
cbit(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)473 void cbit(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
474 opX(Opcode::cbit, getDataType<DT>(), mod, dst, src0);
475 }
476 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)477 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
478 opX(isXe ? Opcode::cmp_xe : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
479 }
480 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)481 void cmp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
482 opX(isXe ? Opcode::cmp_xe : Opcode::cmp, getDataType<DT>(), mod, dst, src0, src1);
483 }
484 template <typename DT = void>
cmpn(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)485 void cmpn(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
486 opX(isXe ? Opcode::cmpn_xe : Opcode::cmpn, getDataType<DT>(), mod, dst, src0, src1);
487 }
488 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)489 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
490 opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
491 }
492 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)493 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
494 opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
495 }
496 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)497 void csel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
498 opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
499 }
500 template <typename DT = void>
csel(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)501 void csel(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
502 opX(isXe ? Opcode::csel_xe : Opcode::csel, getDataType<DT>(), mod, dst, src0, src1, src2);
503 }
cont(const InstructionModifier & mod,Label & jip,Label & uip)504 void cont(const InstructionModifier &mod, Label &jip, Label &uip) {
505 opBranch(Opcode::cont, mod, null, jip, uip);
506 }
507 template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)508 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
509 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
510 }
511 template <typename DT = void>
dp2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)512 void dp2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
513 opX(Opcode::dp2, getDataType<DT>(), mod, dst, src0, src1);
514 }
515 template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)516 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
517 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
518 }
519 template <typename DT = void>
dp3(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)520 void dp3(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
521 opX(Opcode::dp3, getDataType<DT>(), mod, dst, src0, src1);
522 }
523 template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)524 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
525 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
526 }
527 template <typename DT = void>
dp4(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)528 void dp4(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
529 opX(Opcode::dp4, getDataType<DT>(), mod, dst, src0, src1);
530 }
531 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)532 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
533 if (hw < HW::Xe_LP) unsupported();
534 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
535 }
536 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)537 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
538 if (hw < HW::Xe_LP) unsupported();
539 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
540 }
541 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)542 void dp4a(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
543 if (hw < HW::Xe_LP) unsupported();
544 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
545 }
546 template <typename DT = void>
dp4a(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)547 void dp4a(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
548 if (hw < HW::Xe_LP) unsupported();
549 opX(Opcode::dp4a, getDataType<DT>(), mod, dst, src0, src1, src2);
550 }
551 template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)552 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
553 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
554 }
555 template <typename DT = void>
dph(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)556 void dph(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
557 opX(Opcode::dph, getDataType<DT>(), mod, dst, src0, src1);
558 }
else_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)559 void else_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
560 mod.setBranchCtrl(branchCtrl);
561 opBranch(Opcode::else_, mod, null, jip, uip);
562 }
else_(InstructionModifier mod,Label & jip)563 void else_(InstructionModifier mod, Label &jip) {
564 else_(mod, jip, jip);
565 }
endif(const InstructionModifier & mod,Label & jip)566 void endif(const InstructionModifier &mod, Label &jip) {
567 opBranch(Opcode::endif, mod, null, jip);
568 }
endif(const InstructionModifier & mod)569 void endif(const InstructionModifier &mod) {
570 opBranch(Opcode::endif, mod, null, sizeof(Instruction8));
571 }
572 template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const RegData & src0)573 void fbh(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
574 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
575 }
576 template <typename DT = void>
fbh(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)577 void fbh(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
578 opX(Opcode::fbh, getDataType<DT>(), mod, dst, src0);
579 }
580 template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const RegData & src0)581 void fbl(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
582 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
583 }
584 template <typename DT = void>
fbl(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)585 void fbl(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
586 opX(Opcode::fbl, getDataType<DT>(), mod, dst, src0);
587 }
588 template <typename DT = void>
frc(const InstructionModifier & mod,const RegData & dst,const RegData & src0)589 void frc(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
590 opX(Opcode::frc, getDataType<DT>(), mod, dst, src0);
591 }
goto_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)592 void goto_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
593 mod.setBranchCtrl(branchCtrl);
594 opBranch(Opcode::goto_, mod, null, jip, uip);
595 }
goto_(const InstructionModifier & mod,Label & jip)596 void goto_(const InstructionModifier &mod, Label &jip) {
597 goto_(mod, jip, jip);
598 }
halt(const InstructionModifier & mod,Label & jip,Label & uip)599 void halt(const InstructionModifier &mod, Label &jip, Label &uip) {
600 opBranch(Opcode::halt, mod, null, jip, uip);
601 }
halt(const InstructionModifier & mod,Label & jip)602 void halt(const InstructionModifier &mod, Label &jip) {
603 halt(mod, jip, jip);
604 }
if_(InstructionModifier mod,Label & jip,Label & uip,bool branchCtrl=false)605 void if_(InstructionModifier mod, Label &jip, Label &uip, bool branchCtrl = false) {
606 mod.setBranchCtrl(branchCtrl);
607 opBranch(Opcode::if_, mod, null, jip, uip);
608 }
if_(const InstructionModifier & mod,Label & jip)609 void if_(const InstructionModifier &mod, Label &jip) {
610 if_(mod, jip, jip);
611 }
illegal()612 void illegal() {
613 opX(Opcode::illegal, DataType::invalid, InstructionModifier(), null, null, null);
614 }
join(InstructionModifier mod,Label & jip)615 void join(InstructionModifier mod, Label &jip) {
616 opBranch(Opcode::join, mod, null, jip);
617 }
join(InstructionModifier mod)618 void join(InstructionModifier mod) {
619 opBranch(Opcode::join, mod, null, sizeof(Instruction8));
620 }
jmpi(const InstructionModifier & mod,Label & jip)621 void jmpi(const InstructionModifier &mod, Label &jip) {
622 auto dst = isXe ? ARF(null) : ARF(ip);
623 opJmpi(Opcode::jmpi, mod, dst, dst, jip);
624 }
jmpi(const InstructionModifier & mod,const RegData & jip)625 void jmpi(const InstructionModifier &mod, const RegData &jip) {
626 #ifdef NGEN_SAFE
627 if (!isXe && jip.getType() != DataType::d && jip.getType() != DataType::invalid)
628 throw invalid_type_exception();
629 #endif
630 if (isXe)
631 opBranch<true, false>(Opcode::jmpi, mod, null, jip);
632 else
633 opX(Opcode::jmpi, DataType::d, mod, ip, ip, jip);
634 }
635 template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)636 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
637 if (hw >= HW::Gen11) unsupported();
638 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
639 }
640 template <typename DT = void>
line(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)641 void line(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
642 if (hw >= HW::Gen11) unsupported();
643 opX(Opcode::line, getDataType<DT>(), mod, dst, src0, src1);
644 }
645 template <typename DT = void>
lrp(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)646 void lrp(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
647 opX(Opcode::lrp, getDataType<DT>(), mod, dst, src0, src1, src2);
648 }
649 template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)650 void lzd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
651 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
652 }
653 template <typename DT = void>
lzd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)654 void lzd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
655 opX(Opcode::lzd, getDataType<DT>(), mod, dst, src0);
656 }
657 template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)658 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
659 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
660 }
661 template <typename DT = void>
mac(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)662 void mac(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
663 opX(Opcode::mac, getDataType<DT>(), mod, dst, src0, src1);
664 }
665 template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)666 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
667 opX(Opcode::mach, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
668 }
669 template <typename DT = void>
mach(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)670 void mach(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
671 opX(Opcode::mach, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
672 }
673 template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)674 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
675 #ifdef NGEN_SAFE
676 if (hw < HW::Gen10) unsupported();
677 #endif
678 opX(Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
679 }
680 template <typename DT = void>
macl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)681 void macl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
682 #ifdef NGEN_SAFE
683 if (hw < HW::Gen10) unsupported();
684 #endif
685 opX(Opcode::mach, getDataType<DT>(), mod, dst, src0, src1);
686 }
687 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2)688 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2) {
689 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
690 }
691 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const RegData & src2)692 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const RegData &src2) {
693 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
694 }
695 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const Immediate & src2)696 void mad(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const Immediate &src2) {
697 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
698 }
699 template <typename DT = void>
mad(const InstructionModifier & mod,const RegData & dst,const Immediate & src0,const RegData & src1,const Immediate & src2)700 void mad(const InstructionModifier &mod, const RegData &dst, const Immediate &src0, const RegData &src1, const Immediate &src2) {
701 opX(Opcode::mad, getDataType<DT>(), mod, dst, src0, src1, src2);
702 }
703 template <typename DT = void, HW hw_ = hw>
704 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1,const ExtendedReg & src2)705 madm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1, const ExtendedReg &src2) {
706 opX(Opcode::madm, getDataType<DT>(), mod, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1), extToAlign16(src2));
707 }
708 template <typename DT = void, HW hw_ = hw>
709 typename std::enable_if<hwGT(hw_, HW::Gen9)>::type
madm(const InstructionModifier & mod,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1,const ExtendedReg & src2)710 madm(const InstructionModifier &mod, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1, const ExtendedReg &src2) {
711 src0.getBase().setRegion(4,4,1);
712 src1.getBase().setRegion(4,4,1);
713 opX(Opcode::madm, getDataType<DT>(), mod, dst, src0, src1, src2);
714 }
715 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0)716 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0) {
717 #ifdef NGEN_SAFE
718 if (mathArgCount(fc) != 1) throw invalid_operand_count_exception();
719 #endif
720 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
721 }
722 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const RegData & src1)723 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const RegData &src1) {
724 #ifdef NGEN_SAFE
725 if (mathArgCount(fc) != 2) throw invalid_operand_count_exception();
726 #endif
727 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
728 }
729 template <typename DT = void>
math(const InstructionModifier & mod,MathFunction fc,const RegData & dst,const RegData & src0,const Immediate & src1)730 void math(const InstructionModifier &mod, MathFunction fc, const RegData &dst, const RegData &src0, const Immediate &src1) {
731 #ifdef NGEN_SAFE
732 if (fc == MathFunction::invm || fc == MathFunction::rsqtm) throw invalid_operand_exception();
733 #endif
734 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1.forceInt32());
735 }
736 template <typename DT = void, HW hw_ = hw>
737 typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0)738 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0) {
739 #ifdef NGEN_SAFE
740 if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
741 #endif
742 opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0));
743 }
744 template <typename DT = void, HW hw_ = hw>
745 typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0)746 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0) {
747 #ifdef NGEN_SAFE
748 if (fc != MathFunction::rsqtm) throw invalid_operand_exception();
749 #endif
750 if (hw == HW::Gen11)
751 src0.getBase().setRegion(2,2,1);
752 else
753 src0.getBase().setRegion(1,1,0);
754 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0);
755 }
756 template <typename DT = void, HW hw_ = hw>
757 typename std::enable_if<hwLT(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)758 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
759 #ifdef NGEN_SAFE
760 if (fc != MathFunction::invm) throw invalid_operand_exception();
761 #endif
762 opMath(Opcode::math, getDataType<DT>(), mod, fc, extToAlign16(dst), extToAlign16(src0), extToAlign16(src1));
763 }
764 template <typename DT = void, HW hw_ = hw>
765 typename std::enable_if<hwGE(hw_, HW::Gen11)>::type
math(const InstructionModifier & mod,MathFunction fc,const ExtendedReg & dst,ExtendedReg src0,ExtendedReg src1)766 math(const InstructionModifier &mod, MathFunction fc, const ExtendedReg &dst, ExtendedReg src0, ExtendedReg src1) {
767 #ifdef NGEN_SAFE
768 if (fc != MathFunction::invm) throw invalid_operand_exception();
769 #endif
770 if (hw == HW::Gen11) {
771 src0.getBase().setRegion(2,2,1);
772 src1.getBase().setRegion(2,2,1);
773 } else {
774 src0.getBase().setRegion(1,1,0);
775 src1.getBase().setRegion(1,1,0);
776 }
777 opMath(Opcode::math, getDataType<DT>(), mod, fc, dst, src0, src1);
778 }
779 template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const RegData & src0)780 void mov(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
781 opX(isXe ? Opcode::mov_xe : Opcode::mov, getDataType<DT>(), mod, dst, src0);
782 }
783 template <typename DT = void>
mov(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)784 void mov(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
785 opX(isXe ? Opcode::mov_xe : Opcode::mov, getDataType<DT>(), mod, dst, src0);
786 }
787 template <typename DT = void>
movi(const InstructionModifier & mod,const RegData & dst,const RegData & src0)788 void movi(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
789 opX(isXe ? Opcode::movi_xe : Opcode::movi, getDataType<DT>(), mod, dst, src0);
790 }
791 template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)792 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
793 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
794 }
795 template <typename DT = void>
mul(const InstructionModifier & mod,const RegData & dst,const RegData & src0,Immediate src1)796 void mul(const InstructionModifier &mod, const RegData &dst, const RegData &src0, Immediate src1) {
797 if (dst.getBytes() == 8)
798 src1 = src1.forceInt32();
799 opX(Opcode::mul, getDataType<DT>(), mod, dst, src0, src1);
800 }
nop()801 void nop() {
802 opNop(isXe ? Opcode::nop_xe : Opcode::nop);
803 }
nop(const InstructionModifier & mod)804 void nop(const InstructionModifier &mod) {
805 opX(isXe ? Opcode::nop_xe : Opcode::nop, DataType::invalid, mod, null, null, null);
806 }
807 template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const RegData & src0)808 void not_(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
809 opX(isXe ? Opcode::not_xe : Opcode::not_, getDataType<DT>(), mod, dst, src0);
810 }
811 template <typename DT = void>
not_(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)812 void not_(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
813 opX(isXe ? Opcode::not_xe : Opcode::not_, getDataType<DT>(), mod, dst, src0);
814 }
815 #ifndef NGEN_NO_OP_NAMES
816 template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const RegData & src0)817 void not(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
818 not_<DT>(mod, dst, src0);
819 }
820 template <typename DT = void>
not(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)821 void not(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
822 not_<DT>(mod, dst, src0);
823 }
824 #endif
825 template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)826 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
827 opX(isXe ? Opcode::or_xe : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
828 }
829 template <typename DT = void>
or_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)830 void or_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
831 opX(isXe ? Opcode::or_xe : Opcode::or_, getDataType<DT>(), mod, dst, src0, src1);
832 }
833 #ifndef NGEN_NO_OP_NAMES
834 template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)835 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
836 or_<DT>(mod, dst, src0, src1);
837 }
838 template <typename DT = void>
or(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)839 void or(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
840 or_<DT>(mod, dst, src0, src1);
841 }
842 #endif
843 template <typename DT = void>
pln(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)844 void pln(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
845 if (hw >= HW::Gen11) unsupported();
846 opX(Opcode::pln, getDataType<DT>(), mod, dst, src0, src1);
847 }
ret(const InstructionModifier & mod,RegData src0)848 void ret(const InstructionModifier &mod, RegData src0) {
849 src0.setRegion(2,2,1);
850 if (isXe)
851 opBranch<true, true>(Opcode::ret, mod, null, src0);
852 else
853 opX<true>(Opcode::ret, DataType::ud, mod, null, src0);
854 }
855 template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const RegData & src0)856 void rndd(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
857 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
858 }
859 template <typename DT = void>
rndd(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)860 void rndd(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
861 opX(Opcode::rndd, getDataType<DT>(), mod, dst, src0);
862 }
863 template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const RegData & src0)864 void rnde(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
865 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
866 }
867 template <typename DT = void>
rnde(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)868 void rnde(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
869 opX(Opcode::rnde, getDataType<DT>(), mod, dst, src0);
870 }
871 template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const RegData & src0)872 void rndu(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
873 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
874 }
875 template <typename DT = void>
rndu(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)876 void rndu(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
877 opX(Opcode::rndu, getDataType<DT>(), mod, dst, src0);
878 }
879 template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const RegData & src0)880 void rndz(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
881 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
882 }
883 template <typename DT = void>
rndz(const InstructionModifier & mod,const RegData & dst,const Immediate & src0)884 void rndz(const InstructionModifier &mod, const RegData &dst, const Immediate &src0) {
885 opX(Opcode::rndz, getDataType<DT>(), mod, dst, src0);
886 }
887 template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)888 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
889 opX(isXe ? Opcode::rol_xe : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
890 }
891 template <typename DT = void>
rol(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)892 void rol(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
893 opX(isXe ? Opcode::rol_xe : Opcode::rol, getDataType<DT>(), mod, dst, src0, src1);
894 }
895 template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)896 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
897 opX(isXe ? Opcode::ror_xe : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
898 }
899 template <typename DT = void>
ror(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)900 void ror(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
901 opX(isXe ? Opcode::ror_xe : Opcode::ror, getDataType<DT>(), mod, dst, src0, src1);
902 }
903 template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)904 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
905 if (hw >= HW::Xe_LP) unsupported();
906 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
907 }
908 template <typename DT = void>
sad2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)909 void sad2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
910 if (hw >= HW::Xe_LP) unsupported();
911 opX(Opcode::sad2, getDataType<DT>(), mod, dst, src0, src1);
912 }
913 template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)914 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
915 if (hw >= HW::Xe_LP) unsupported();
916 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
917 }
918 template <typename DT = void>
sada2(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)919 void sada2(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
920 if (hw >= HW::Xe_LP) unsupported();
921 opX(Opcode::sada2, getDataType<DT>(), mod, dst, src0, src1);
922 }
923 template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)924 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
925 opX(isXe ? Opcode::sel_xe : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
926 }
927 template <typename DT = void>
sel(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)928 void sel(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
929 opX(isXe ? Opcode::sel_xe : Opcode::sel, getDataType<DT>(), mod, dst, src0, src1);
930 }
931
932 /* Xe-style sends */
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)933 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
934 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
935 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)936 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
937 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
938 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)939 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
940 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
941 }
send(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)942 void send(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
943 opSend(Opcode::send, mod, sf, dst, src0, src1, exdesc, desc);
944 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)945 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
946 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
947 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)948 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
949 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
950 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)951 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
952 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
953 }
sendc(const InstructionModifier & mod,SharedFunction sf,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)954 void sendc(const InstructionModifier &mod, SharedFunction sf, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
955 opSend(Opcode::sendc, mod, sf, dst, src0, src1, exdesc, desc);
956 }
957 /* Pre-Xe-style sends; also supported on Xe. */
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)958 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
959 opSend(Opcode::send, mod, dst, src0, exdesc, desc);
960 }
send(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)961 void send(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
962 opSend(Opcode::send, mod, dst, src0, exdesc, desc);
963 }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)964 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc) {
965 opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
966 }
sendc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)967 void sendc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc) {
968 opSend(Opcode::sendc, mod, dst, src0, exdesc, desc);
969 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)970 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
971 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
972 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)973 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
974 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
975 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)976 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
977 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
978 }
sends(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)979 void sends(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
980 opSends(Opcode::sends, mod, dst, src0, src1, exdesc, desc);
981 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,uint32_t desc)982 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, uint32_t desc) {
983 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
984 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,const RegData & desc)985 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, const RegData &desc) {
986 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
987 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,uint32_t desc)988 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, uint32_t desc) {
989 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
990 }
sendsc(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,const RegData & desc)991 void sendsc(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, const RegData &desc) {
992 opSends(Opcode::sendsc, mod, dst, src0, src1, exdesc, desc);
993 }
994
995 template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)996 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
997 opX(isXe ? Opcode::shl_xe : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
998 }
999 template <typename DT = void>
shl(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1000 void shl(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1001 opX(isXe ? Opcode::shl_xe : Opcode::shl, getDataType<DT>(), mod, dst, src0, src1);
1002 }
1003 template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1004 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1005 opX(isXe ? Opcode::shr_xe : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1006 }
1007 template <typename DT = void>
shr(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1008 void shr(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1009 opX(isXe ? Opcode::shr_xe : Opcode::shr, getDataType<DT>(), mod, dst, src0, src1);
1010 }
1011 template <typename DT = void>
smov(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1012 void smov(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1013 opX(isXe ? Opcode::smov_xe : Opcode::smov, getDataType<DT>(), mod, dst, src0, src1);
1014 }
1015 template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1016 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1017 opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1018 }
1019 template <typename DT = void>
subb(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1020 void subb(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1021 opX(Opcode::subb, getDataType<DT>(), mod | AccWrEn, dst, src0, src1);
1022 }
wait(const InstructionModifier & mod,const RegData & nreg)1023 void wait(const InstructionModifier &mod, const RegData &nreg) {
1024 #ifdef NGEN_SAFE
1025 if (!nreg.isARF() || nreg.getARFType() != ARFType::n) throw invalid_arf_exception();
1026 #endif
1027 opX(Opcode::wait, DataType::invalid, mod, nreg, nreg);
1028 }
while_(const InstructionModifier & mod,Label & jip)1029 void while_(const InstructionModifier &mod, Label &jip) {
1030 opBranch(Opcode::while_, mod, null, jip);
1031 }
1032 template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1033 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1034 opX(isXe ? Opcode::xor_xe : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1035 }
1036 template <typename DT = void>
xor_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1037 void xor_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1038 opX(isXe ? Opcode::xor_xe : Opcode::xor_, getDataType<DT>(), mod, dst, src0, src1);
1039 }
1040 #ifndef NGEN_NO_OP_NAMES
1041 template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)1042 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
1043 xor_<DT>(mod, dst, src0, src1);
1044 }
1045 template <typename DT = void>
xor(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)1046 void xor(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
1047 xor_<DT>(mod, dst, src0, src1);
1048 }
1049 #endif
1050
1051 private:
1052 struct Sync {
1053 BinaryCodeGenerator<hw> &parent;
1054
Syncngen::BinaryCodeGenerator::Sync1055 Sync(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1056
operator ()ngen::BinaryCodeGenerator::Sync1057 void operator()(SyncFunction fc, const InstructionModifier &mod = InstructionModifier()) {
1058 parent.opSync(Opcode::sync, fc, mod);
1059 }
operator ()ngen::BinaryCodeGenerator::Sync1060 void operator()(SyncFunction fc, const RegData &src0) {
1061 this->operator()(fc, InstructionModifier(), src0);
1062 }
operator ()ngen::BinaryCodeGenerator::Sync1063 void operator()(SyncFunction fc, const InstructionModifier &mod, const RegData &src0) {
1064 parent.opSync(Opcode::sync, fc, mod, src0);
1065 }
operator ()ngen::BinaryCodeGenerator::Sync1066 void operator()(SyncFunction fc, int src0) {
1067 this->operator()(fc, InstructionModifier(), src0);
1068 }
operator ()ngen::BinaryCodeGenerator::Sync1069 void operator()(SyncFunction fc, const InstructionModifier &mod, uint32_t src0) {
1070 parent.opSync(Opcode::sync, fc, mod, Immediate::ud(src0));
1071 }
allrdngen::BinaryCodeGenerator::Sync1072 void allrd() {
1073 allrd(null.ud(0)(0, 1, 1));
1074 }
allrdngen::BinaryCodeGenerator::Sync1075 void allrd(const InstructionModifier &mod) {
1076 allrd(mod, null.ud(0)(0, 1, 1));
1077 }
allrdngen::BinaryCodeGenerator::Sync1078 void allrd(const RegData &src0) {
1079 allrd(InstructionModifier(), src0);
1080 }
allrdngen::BinaryCodeGenerator::Sync1081 void allrd(const InstructionModifier &mod, const RegData &src0) {
1082 this->operator()(SyncFunction::allrd, mod, src0);
1083 }
allrdngen::BinaryCodeGenerator::Sync1084 void allrd(uint32_t src0) {
1085 allrd(InstructionModifier(), src0);
1086 }
allrdngen::BinaryCodeGenerator::Sync1087 void allrd(const InstructionModifier &mod, uint32_t src0) {
1088 this->operator()(SyncFunction::allrd, mod, src0);
1089 }
allwrngen::BinaryCodeGenerator::Sync1090 void allwr() {
1091 allwr(null);
1092 }
allwrngen::BinaryCodeGenerator::Sync1093 void allwr(const InstructionModifier &mod) {
1094 allwr(mod, null);
1095 }
allwrngen::BinaryCodeGenerator::Sync1096 void allwr(const RegData &src0) {
1097 allwr(InstructionModifier(), src0);
1098 }
allwrngen::BinaryCodeGenerator::Sync1099 void allwr(const InstructionModifier &mod, const RegData &src0) {
1100 this->operator()(SyncFunction::allwr, mod, src0);
1101 }
allwrngen::BinaryCodeGenerator::Sync1102 void allwr(uint32_t src0) {
1103 allwr(InstructionModifier(), src0);
1104 }
allwrngen::BinaryCodeGenerator::Sync1105 void allwr(const InstructionModifier &mod, uint32_t src0) {
1106 this->operator()(SyncFunction::allwr, mod, src0);
1107 }
barngen::BinaryCodeGenerator::Sync1108 void bar(const InstructionModifier &mod = InstructionModifier()) {
1109 this->operator()(SyncFunction::bar, mod);
1110 }
hostngen::BinaryCodeGenerator::Sync1111 void host(const InstructionModifier &mod = InstructionModifier()) {
1112 this->operator()(SyncFunction::host, mod);
1113 }
nopngen::BinaryCodeGenerator::Sync1114 void nop(const InstructionModifier &mod = InstructionModifier()) {
1115 this->operator()(SyncFunction::nop, mod);
1116 }
1117 };
1118 public:
1119 Sync sync;
1120
1121
1122 private:
1123 struct Load {
1124 BinaryCodeGenerator<hw> &parent;
1125
Loadngen::BinaryCodeGenerator::Load1126 Load(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1127
1128 template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Load1129 void operator()(const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr)
1130 {
1131 MessageDescriptor desc;
1132 ExtendedMessageDescriptor exdesc;
1133
1134 encodeLoadDescriptors(hw, desc, exdesc, mod, dst, spec, base, addr);
1135 parent.send(mod, dst, addr, exdesc.all, desc.all);
1136 }
1137
1138 };
1139
1140 struct Store {
1141 BinaryCodeGenerator<hw> &parent;
1142
Storengen::BinaryCodeGenerator::Store1143 Store(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1144
1145 template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Store1146 void operator()(const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data)
1147 {
1148 MessageDescriptor desc;
1149 ExtendedMessageDescriptor exdesc;
1150
1151 encodeStoreDescriptors(hw, desc, exdesc, mod, spec, base, addr);
1152 parent.sends(mod, NullRegister(), addr, data, exdesc.all, desc.all);
1153 }
1154
1155 };
1156
1157 struct Atomic_ {
1158 BinaryCodeGenerator<hw> &parent;
1159
Atomic_ngen::BinaryCodeGenerator::Atomic_1160 Atomic_(BinaryCodeGenerator<hw> *parent_) : parent(*parent_) {}
1161
1162 template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1163 void operator()(AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1164 {
1165 MessageDescriptor desc;
1166 ExtendedMessageDescriptor exdesc;
1167
1168 encodeAtomicDescriptors(hw, desc, exdesc, op, mod, dst, spec, base, addr);
1169 if (data.isNull())
1170 parent.send(mod, dst, addr, exdesc.all, desc.all);
1171 else
1172 parent.sends(mod, dst, addr, data, exdesc.all, desc.all);
1173 }
1174 template <typename DataSpec>
operator ()ngen::BinaryCodeGenerator::Atomic_1175 void operator()(AtomicOp op, const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const RegData &addr, const RegData &data = NullRegister())
1176 {
1177 (*this)(op, mod, NullRegister(), spec, base, addr, data);
1178 }
1179
1180 };
1181 public:
1182 Load load;
1183 Store store;
1184 Atomic_ atomic;
1185
1186 #include "ngen_pseudo.hpp"
1187 };
1188
1189 #define NGEN_FORWARD(hw) \
1190 using InstructionStream = typename ngen::BinaryCodeGenerator<hw>::InstructionStream; \
1191 using ngen::BinaryCodeGenerator<hw>::isXe; \
1192 template <typename DT = void, typename... Targs> void add(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template add<DT>(std::forward<Targs>(args)...); } \
1193 template <typename DT = void, typename... Targs> void addc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template addc<DT>(std::forward<Targs>(args)...); } \
1194 template <typename DT = void, typename... Targs> void and_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1195 template <typename DT = void, typename... Targs> void asr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template asr<DT>(std::forward<Targs>(args)...); } \
1196 template <typename DT = void, typename... Targs> void avg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template avg<DT>(std::forward<Targs>(args)...); } \
1197 template <typename DT = void, typename... Targs> void bfe(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfe<DT>(std::forward<Targs>(args)...); } \
1198 template <typename DT = void, typename... Targs> void bfi1(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi1<DT>(std::forward<Targs>(args)...); } \
1199 template <typename DT = void, typename... Targs> void bfi2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi2<DT>(std::forward<Targs>(args)...); } \
1200 template <typename DT = void, typename... Targs> void bfrev(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfrev<DT>(std::forward<Targs>(args)...); } \
1201 template <typename DT = void, typename... Targs> void cbit(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cbit<DT>(std::forward<Targs>(args)...); } \
1202 template <typename DT = void, typename... Targs> void cmp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmp<DT>(std::forward<Targs>(args)...); } \
1203 template <typename DT = void, typename... Targs> void cmpn(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cmpn<DT>(std::forward<Targs>(args)...); } \
1204 template <typename DT = void, typename... Targs> void csel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template csel<DT>(std::forward<Targs>(args)...); } \
1205 template <typename DT = void, typename... Targs> void dp2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp2<DT>(std::forward<Targs>(args)...); } \
1206 template <typename DT = void, typename... Targs> void dp3(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp3<DT>(std::forward<Targs>(args)...); } \
1207 template <typename DT = void, typename... Targs> void dp4(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4<DT>(std::forward<Targs>(args)...); } \
1208 template <typename DT = void, typename... Targs> void dph(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dph<DT>(std::forward<Targs>(args)...); } \
1209 template <typename DT = void, typename... Targs> void fbh(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbh<DT>(std::forward<Targs>(args)...); } \
1210 template <typename DT = void, typename... Targs> void fbl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fbl<DT>(std::forward<Targs>(args)...); } \
1211 template <typename DT = void, typename... Targs> void frc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template frc<DT>(std::forward<Targs>(args)...); } \
1212 template <typename DT = void, typename... Targs> void line(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template line<DT>(std::forward<Targs>(args)...); } \
1213 template <typename DT = void, typename... Targs> void lrp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lrp<DT>(std::forward<Targs>(args)...); } \
1214 template <typename DT = void, typename... Targs> void lzd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template lzd<DT>(std::forward<Targs>(args)...); } \
1215 template <typename DT = void, typename... Targs> void mac(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mac<DT>(std::forward<Targs>(args)...); } \
1216 template <typename DT = void, typename... Targs> void macl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template macl<DT>(std::forward<Targs>(args)...); } \
1217 template <typename DT = void, typename... Targs> void mach(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mach<DT>(std::forward<Targs>(args)...); } \
1218 template <typename DT = void, typename... Targs> void mad(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mad<DT>(std::forward<Targs>(args)...); } \
1219 template <typename DT = void, typename... Targs> void madm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template madm<DT>(std::forward<Targs>(args)...); } \
1220 template <typename DT = void, typename... Targs> void math(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template math<DT>(std::forward<Targs>(args)...); } \
1221 template <typename DT = void, typename... Targs> void mov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mov<DT>(std::forward<Targs>(args)...); } \
1222 template <typename DT = void, typename... Targs> void movi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template movi<DT>(std::forward<Targs>(args)...); } \
1223 template <typename DT = void, typename... Targs> void mul(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template mul<DT>(std::forward<Targs>(args)...); } \
1224 template <typename DT = void, typename... Targs> void not_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1225 template <typename DT = void, typename... Targs> void or_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1226 template <typename DT = void, typename... Targs> void pln(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pln<DT>(std::forward<Targs>(args)...); } \
1227 template <typename DT = void, typename... Targs> void rndd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndd<DT>(std::forward<Targs>(args)...); } \
1228 template <typename DT = void, typename... Targs> void rnde(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rnde<DT>(std::forward<Targs>(args)...); } \
1229 template <typename DT = void, typename... Targs> void rndu(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndu<DT>(std::forward<Targs>(args)...); } \
1230 template <typename DT = void, typename... Targs> void rndz(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rndz<DT>(std::forward<Targs>(args)...); } \
1231 template <typename DT = void, typename... Targs> void rol(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rol<DT>(std::forward<Targs>(args)...); } \
1232 template <typename DT = void, typename... Targs> void ror(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template ror<DT>(std::forward<Targs>(args)...); } \
1233 template <typename DT = void, typename... Targs> void sad2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sad2<DT>(std::forward<Targs>(args)...); } \
1234 template <typename DT = void, typename... Targs> void sada2(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sada2<DT>(std::forward<Targs>(args)...); } \
1235 template <typename DT = void, typename... Targs> void sel(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sel<DT>(std::forward<Targs>(args)...); } \
1236 template <typename DT = void, typename... Targs> void shl(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shl<DT>(std::forward<Targs>(args)...); } \
1237 template <typename DT = void, typename... Targs> void shr(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template shr<DT>(std::forward<Targs>(args)...); } \
1238 template <typename DT = void, typename... Targs> void smov(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template smov<DT>(std::forward<Targs>(args)...); } \
1239 template <typename DT = void, typename... Targs> void subb(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template subb<DT>(std::forward<Targs>(args)...); } \
1240 template <typename DT = void, typename... Targs> void xor_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); } \
1241 template <typename... Targs> void brc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brc(std::forward<Targs>(args)...); } \
1242 template <typename... Targs> void brd(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::brd(std::forward<Targs>(args)...); } \
1243 template <typename... Targs> void break_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::break_(std::forward<Targs>(args)...); } \
1244 template <typename... Targs> void call(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::call(std::forward<Targs>(args)...); } \
1245 template <typename... Targs> void calla(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::calla(std::forward<Targs>(args)...); } \
1246 template <typename... Targs> void cont(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::cont(std::forward<Targs>(args)...); } \
1247 template <typename... Targs> void else_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::else_(std::forward<Targs>(args)...); } \
1248 template <typename... Targs> void endif(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::endif(std::forward<Targs>(args)...); } \
1249 template <typename... Targs> void goto_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::goto_(std::forward<Targs>(args)...); } \
1250 template <typename... Targs> void halt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::halt(std::forward<Targs>(args)...); } \
1251 template <typename... Targs> void if_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::if_(std::forward<Targs>(args)...); } \
1252 template <typename... Targs> void illegal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::illegal(std::forward<Targs>(args)...); } \
1253 template <typename... Targs> void join(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::join(std::forward<Targs>(args)...); } \
1254 template <typename... Targs> void jmpi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::jmpi(std::forward<Targs>(args)...); } \
1255 template <typename... Targs> void nop(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::nop(std::forward<Targs>(args)...); } \
1256 template <typename... Targs> void ret(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::ret(std::forward<Targs>(args)...); } \
1257 template <typename... Targs> void send(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::send(std::forward<Targs>(args)...); } \
1258 template <typename... Targs> void sendc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendc(std::forward<Targs>(args)...); } \
1259 template <typename... Targs> void sends(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sends(std::forward<Targs>(args)...); } \
1260 template <typename... Targs> void sendsc(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::sendsc(std::forward<Targs>(args)...); } \
1261 using ngen::BinaryCodeGenerator<hw>::sync; \
1262 template <typename... Targs> void wait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::wait(std::forward<Targs>(args)...); } \
1263 template <typename... Targs> void while_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::while_(std::forward<Targs>(args)...); } \
1264 template <typename DT = void, typename... Targs> void min_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min_<DT>(std::forward<Targs>(args)...); } \
1265 template <typename DT = void, typename... Targs> void max_(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max_<DT>(std::forward<Targs>(args)...); } \
1266 template <typename DT = void, typename... Targs> void bfi(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template bfi<DT>(std::forward<Targs>(args)...); } \
1267 template <typename DT = void, typename... Targs> void cos(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template cos<DT>(std::forward<Targs>(args)...); } \
1268 template <typename DT = void, typename... Targs> void exp(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template exp<DT>(std::forward<Targs>(args)...); } \
1269 template <typename DT = void, typename... Targs> void fdiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv<DT>(std::forward<Targs>(args)...); } \
1270 template <typename DT = void, typename... Targs> void idiv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template idiv<DT>(std::forward<Targs>(args)...); } \
1271 template <typename DT = void, typename... Targs> void inv(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv<DT>(std::forward<Targs>(args)...); } \
1272 template <typename DT = void, typename... Targs> void invm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template invm<DT>(std::forward<Targs>(args)...); } \
1273 template <typename DT = void, typename... Targs> void iqot(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template iqot<DT>(std::forward<Targs>(args)...); } \
1274 template <typename DT = void, typename... Targs> void irem(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template irem<DT>(std::forward<Targs>(args)...); } \
1275 template <typename DT = void, typename... Targs> void log(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template log<DT>(std::forward<Targs>(args)...); } \
1276 template <typename DT = void, typename... Targs> void pow(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template pow<DT>(std::forward<Targs>(args)...); } \
1277 template <typename DT = void, typename... Targs> void rsqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqt<DT>(std::forward<Targs>(args)...); } \
1278 template <typename DT = void, typename... Targs> void rsqtm(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template rsqtm<DT>(std::forward<Targs>(args)...); } \
1279 template <typename DT = void, typename... Targs> void sin(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sin<DT>(std::forward<Targs>(args)...); } \
1280 template <typename DT = void, typename... Targs> void sqt(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt<DT>(std::forward<Targs>(args)...); } \
1281 template <typename DT = void, typename... Targs> void fdiv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template fdiv_ieee<DT>(std::forward<Targs>(args)...); } \
1282 template <typename DT = void, typename... Targs> void inv_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template inv_ieee<DT>(std::forward<Targs>(args)...); } \
1283 template <typename DT = void, typename... Targs> void sqt_ieee(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template sqt_ieee<DT>(std::forward<Targs>(args)...); } \
1284 template <typename... Targs> void threadend(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::threadend(std::forward<Targs>(args)...); } \
1285 template <typename... Targs> void barriermsg(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriermsg(std::forward<Targs>(args)...); } \
1286 template <typename... Targs> void barriersignal(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barriersignal(std::forward<Targs>(args)...); } \
1287 template <typename... Targs> void barrierwait(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrierwait(std::forward<Targs>(args)...); } \
1288 template <typename... Targs> void barrier(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::barrier(std::forward<Targs>(args)...); } \
1289 using ngen::BinaryCodeGenerator<hw>::load; \
1290 using ngen::BinaryCodeGenerator<hw>::store; \
1291 using ngen::BinaryCodeGenerator<hw>::atomic; \
1292 template <typename... Targs> void memfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::memfence(std::forward<Targs>(args)...); } \
1293 template <typename... Targs> void slmfence(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::slmfence(std::forward<Targs>(args)...); } \
1294 template <typename... Targs> void pushStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::pushStream(std::forward<Targs>(args)...); } \
1295 template <typename... Targs> InstructionStream *popStream(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::popStream(std::forward<Targs>(args)...); } \
1296 template <typename... Targs> void appendStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendStream(std::forward<Targs>(args)...); } \
1297 template <typename... Targs> void appendCurrentStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::appendCurrentStream(std::forward<Targs>(args)...); } \
1298 template <typename... Targs> void discardStream(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::discardStream(std::forward<Targs>(args)...); } \
1299 template <typename... Targs> void mark(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::mark(std::forward<Targs>(args)...); } \
1300 template <typename... Targs> void comment(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::comment(std::forward<Targs>(args)...); } \
1301 template <typename... Targs> void setDefaultNoMask(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultNoMask(std::forward<Targs>(args)...); } \
1302 template <typename... Targs> void setDefaultAutoSWSB(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::setDefaultAutoSWSB(std::forward<Targs>(args)...); } \
1303 bool getDefaultNoMask() { return ngen::BinaryCodeGenerator<hw>::getDefaultNoMask(); } \
1304 bool getDefaultAutoSWSB() { return ngen::BinaryCodeGenerator<hw>::getDefaultAutoSWSB(); } \
1305 NGEN_FORWARD_EXTRA \
1306 NGEN_FORWARD_OP_NAMES \
1307 NGEN_FORWARD_MIN_MAX \
1308 NGEN_FORWARD_REGISTERS
1309
1310 #define NGEN_FORWARD_EXTRA \
1311 template <typename DT = void, typename... Targs> void dp4a(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template dp4a<DT>(std::forward<Targs>(args)...); }
1312
1313 #ifdef NGEN_NO_OP_NAMES
1314 #define NGEN_FORWARD_OP_NAMES
1315 #else
1316 #define NGEN_FORWARD_OP_NAMES \
1317 template <typename DT = void, typename... Targs> void and(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template and_<DT>(std::forward<Targs>(args)...); } \
1318 template <typename DT = void, typename... Targs> void not(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template not_<DT>(std::forward<Targs>(args)...); } \
1319 template <typename DT = void, typename... Targs> void or(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template or_<DT>(std::forward<Targs>(args)...); } \
1320 template <typename DT = void, typename... Targs> void xor(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template xor_<DT>(std::forward<Targs>(args)...); }
1321 #endif
1322
1323 #ifdef NGEN_WINDOWS_COMPAT
1324 #define NGEN_FORWARD_MIN_MAX
1325 #else
1326 #define NGEN_FORWARD_MIN_MAX \
1327 template <typename DT = void, typename... Targs> void min(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template min<DT>(std::forward<Targs>(args)...); } \
1328 template <typename DT = void, typename... Targs> void max(Targs&&... args) { ngen::BinaryCodeGenerator<hw>::template max<DT>(std::forward<Targs>(args)...); }
1329 #endif
1330
1331 #ifdef NGEN_GLOBAL_REGS
1332 #define NGEN_FORWARD_REGISTERS
1333 #else
1334 #define NGEN_FORWARD_REGISTERS_BASE \
1335 using ngen::BinaryCodeGenerator<hw>::indirect; \
1336 using ngen::BinaryCodeGenerator<hw>::r0; using ngen::BinaryCodeGenerator<hw>::r1; using ngen::BinaryCodeGenerator<hw>::r2; using ngen::BinaryCodeGenerator<hw>::r3; \
1337 using ngen::BinaryCodeGenerator<hw>::r4; using ngen::BinaryCodeGenerator<hw>::r5; using ngen::BinaryCodeGenerator<hw>::r6; using ngen::BinaryCodeGenerator<hw>::r7; \
1338 using ngen::BinaryCodeGenerator<hw>::r8; using ngen::BinaryCodeGenerator<hw>::r9; using ngen::BinaryCodeGenerator<hw>::r10; using ngen::BinaryCodeGenerator<hw>::r11; \
1339 using ngen::BinaryCodeGenerator<hw>::r12; using ngen::BinaryCodeGenerator<hw>::r13; using ngen::BinaryCodeGenerator<hw>::r14; using ngen::BinaryCodeGenerator<hw>::r15; \
1340 using ngen::BinaryCodeGenerator<hw>::r16; using ngen::BinaryCodeGenerator<hw>::r17; using ngen::BinaryCodeGenerator<hw>::r18; using ngen::BinaryCodeGenerator<hw>::r19; \
1341 using ngen::BinaryCodeGenerator<hw>::r20; using ngen::BinaryCodeGenerator<hw>::r21; using ngen::BinaryCodeGenerator<hw>::r22; using ngen::BinaryCodeGenerator<hw>::r23; \
1342 using ngen::BinaryCodeGenerator<hw>::r24; using ngen::BinaryCodeGenerator<hw>::r25; using ngen::BinaryCodeGenerator<hw>::r26; using ngen::BinaryCodeGenerator<hw>::r27; \
1343 using ngen::BinaryCodeGenerator<hw>::r28; using ngen::BinaryCodeGenerator<hw>::r29; using ngen::BinaryCodeGenerator<hw>::r30; using ngen::BinaryCodeGenerator<hw>::r31; \
1344 using ngen::BinaryCodeGenerator<hw>::r32; using ngen::BinaryCodeGenerator<hw>::r33; using ngen::BinaryCodeGenerator<hw>::r34; using ngen::BinaryCodeGenerator<hw>::r35; \
1345 using ngen::BinaryCodeGenerator<hw>::r36; using ngen::BinaryCodeGenerator<hw>::r37; using ngen::BinaryCodeGenerator<hw>::r38; using ngen::BinaryCodeGenerator<hw>::r39; \
1346 using ngen::BinaryCodeGenerator<hw>::r40; using ngen::BinaryCodeGenerator<hw>::r41; using ngen::BinaryCodeGenerator<hw>::r42; using ngen::BinaryCodeGenerator<hw>::r43; \
1347 using ngen::BinaryCodeGenerator<hw>::r44; using ngen::BinaryCodeGenerator<hw>::r45; using ngen::BinaryCodeGenerator<hw>::r46; using ngen::BinaryCodeGenerator<hw>::r47; \
1348 using ngen::BinaryCodeGenerator<hw>::r48; using ngen::BinaryCodeGenerator<hw>::r49; using ngen::BinaryCodeGenerator<hw>::r50; using ngen::BinaryCodeGenerator<hw>::r51; \
1349 using ngen::BinaryCodeGenerator<hw>::r52; using ngen::BinaryCodeGenerator<hw>::r53; using ngen::BinaryCodeGenerator<hw>::r54; using ngen::BinaryCodeGenerator<hw>::r55; \
1350 using ngen::BinaryCodeGenerator<hw>::r56; using ngen::BinaryCodeGenerator<hw>::r57; using ngen::BinaryCodeGenerator<hw>::r58; using ngen::BinaryCodeGenerator<hw>::r59; \
1351 using ngen::BinaryCodeGenerator<hw>::r60; using ngen::BinaryCodeGenerator<hw>::r61; using ngen::BinaryCodeGenerator<hw>::r62; using ngen::BinaryCodeGenerator<hw>::r63; \
1352 using ngen::BinaryCodeGenerator<hw>::r64; using ngen::BinaryCodeGenerator<hw>::r65; using ngen::BinaryCodeGenerator<hw>::r66; using ngen::BinaryCodeGenerator<hw>::r67; \
1353 using ngen::BinaryCodeGenerator<hw>::r68; using ngen::BinaryCodeGenerator<hw>::r69; using ngen::BinaryCodeGenerator<hw>::r70; using ngen::BinaryCodeGenerator<hw>::r71; \
1354 using ngen::BinaryCodeGenerator<hw>::r72; using ngen::BinaryCodeGenerator<hw>::r73; using ngen::BinaryCodeGenerator<hw>::r74; using ngen::BinaryCodeGenerator<hw>::r75; \
1355 using ngen::BinaryCodeGenerator<hw>::r76; using ngen::BinaryCodeGenerator<hw>::r77; using ngen::BinaryCodeGenerator<hw>::r78; using ngen::BinaryCodeGenerator<hw>::r79; \
1356 using ngen::BinaryCodeGenerator<hw>::r80; using ngen::BinaryCodeGenerator<hw>::r81; using ngen::BinaryCodeGenerator<hw>::r82; using ngen::BinaryCodeGenerator<hw>::r83; \
1357 using ngen::BinaryCodeGenerator<hw>::r84; using ngen::BinaryCodeGenerator<hw>::r85; using ngen::BinaryCodeGenerator<hw>::r86; using ngen::BinaryCodeGenerator<hw>::r87; \
1358 using ngen::BinaryCodeGenerator<hw>::r88; using ngen::BinaryCodeGenerator<hw>::r89; using ngen::BinaryCodeGenerator<hw>::r90; using ngen::BinaryCodeGenerator<hw>::r91; \
1359 using ngen::BinaryCodeGenerator<hw>::r92; using ngen::BinaryCodeGenerator<hw>::r93; using ngen::BinaryCodeGenerator<hw>::r94; using ngen::BinaryCodeGenerator<hw>::r95; \
1360 using ngen::BinaryCodeGenerator<hw>::r96; using ngen::BinaryCodeGenerator<hw>::r97; using ngen::BinaryCodeGenerator<hw>::r98; using ngen::BinaryCodeGenerator<hw>::r99; \
1361 using ngen::BinaryCodeGenerator<hw>::r100; using ngen::BinaryCodeGenerator<hw>::r101; using ngen::BinaryCodeGenerator<hw>::r102; using ngen::BinaryCodeGenerator<hw>::r103; \
1362 using ngen::BinaryCodeGenerator<hw>::r104; using ngen::BinaryCodeGenerator<hw>::r105; using ngen::BinaryCodeGenerator<hw>::r106; using ngen::BinaryCodeGenerator<hw>::r107; \
1363 using ngen::BinaryCodeGenerator<hw>::r108; using ngen::BinaryCodeGenerator<hw>::r109; using ngen::BinaryCodeGenerator<hw>::r110; using ngen::BinaryCodeGenerator<hw>::r111; \
1364 using ngen::BinaryCodeGenerator<hw>::r112; using ngen::BinaryCodeGenerator<hw>::r113; using ngen::BinaryCodeGenerator<hw>::r114; using ngen::BinaryCodeGenerator<hw>::r115; \
1365 using ngen::BinaryCodeGenerator<hw>::r116; using ngen::BinaryCodeGenerator<hw>::r117; using ngen::BinaryCodeGenerator<hw>::r118; using ngen::BinaryCodeGenerator<hw>::r119; \
1366 using ngen::BinaryCodeGenerator<hw>::r120; using ngen::BinaryCodeGenerator<hw>::r121; using ngen::BinaryCodeGenerator<hw>::r122; using ngen::BinaryCodeGenerator<hw>::r123; \
1367 using ngen::BinaryCodeGenerator<hw>::r124; using ngen::BinaryCodeGenerator<hw>::r125; using ngen::BinaryCodeGenerator<hw>::r126; using ngen::BinaryCodeGenerator<hw>::r127; \
1368 using ngen::BinaryCodeGenerator<hw>::null; \
1369 using ngen::BinaryCodeGenerator<hw>::a0; \
1370 using ngen::BinaryCodeGenerator<hw>::acc0; using ngen::BinaryCodeGenerator<hw>::acc1; using ngen::BinaryCodeGenerator<hw>::acc2; using ngen::BinaryCodeGenerator<hw>::acc3; \
1371 using ngen::BinaryCodeGenerator<hw>::acc4; using ngen::BinaryCodeGenerator<hw>::acc5; using ngen::BinaryCodeGenerator<hw>::acc6; using ngen::BinaryCodeGenerator<hw>::acc7; \
1372 using ngen::BinaryCodeGenerator<hw>::acc8; using ngen::BinaryCodeGenerator<hw>::acc9; \
1373 using ngen::BinaryCodeGenerator<hw>::mme0; using ngen::BinaryCodeGenerator<hw>::mme1; using ngen::BinaryCodeGenerator<hw>::mme2; using ngen::BinaryCodeGenerator<hw>::mme3; \
1374 using ngen::BinaryCodeGenerator<hw>::mme4; using ngen::BinaryCodeGenerator<hw>::mme5; using ngen::BinaryCodeGenerator<hw>::mme6; using ngen::BinaryCodeGenerator<hw>::mme7; \
1375 using ngen::BinaryCodeGenerator<hw>::noacc; using ngen::BinaryCodeGenerator<hw>::nomme; \
1376 using ngen::BinaryCodeGenerator<hw>::f0; using ngen::BinaryCodeGenerator<hw>::f1; \
1377 using ngen::BinaryCodeGenerator<hw>::ce0; using ngen::BinaryCodeGenerator<hw>::sp; using ngen::BinaryCodeGenerator<hw>::sr0; using ngen::BinaryCodeGenerator<hw>::sr1; \
1378 using ngen::BinaryCodeGenerator<hw>::cr0; using ngen::BinaryCodeGenerator<hw>::n0; using ngen::BinaryCodeGenerator<hw>::ip; using ngen::BinaryCodeGenerator<hw>::tdr0; \
1379 using ngen::BinaryCodeGenerator<hw>::tm0; using ngen::BinaryCodeGenerator<hw>::tm1; using ngen::BinaryCodeGenerator<hw>::tm2; using ngen::BinaryCodeGenerator<hw>::tm3; \
1380 using ngen::BinaryCodeGenerator<hw>::tm4; using ngen::BinaryCodeGenerator<hw>::pm0; using ngen::BinaryCodeGenerator<hw>::tp0; using ngen::BinaryCodeGenerator<hw>::dbg0; \
1381 using ngen::BinaryCodeGenerator<hw>::fc0; using ngen::BinaryCodeGenerator<hw>::fc1; using ngen::BinaryCodeGenerator<hw>::fc2; using ngen::BinaryCodeGenerator<hw>::fc3; \
1382 using ngen::BinaryCodeGenerator<hw>::NoDDClr; using ngen::BinaryCodeGenerator<hw>::NoDDChk; \
1383 using ngen::BinaryCodeGenerator<hw>::AccWrEn; using ngen::BinaryCodeGenerator<hw>::NoSrcDepSet; using ngen::BinaryCodeGenerator<hw>::Breakpoint; using ngen::BinaryCodeGenerator<hw>::sat; \
1384 using ngen::BinaryCodeGenerator<hw>::NoMask; \
1385 using ngen::BinaryCodeGenerator<hw>::Serialize; using ngen::BinaryCodeGenerator<hw>::EOT; \
1386 using ngen::BinaryCodeGenerator<hw>::Atomic; using ngen::BinaryCodeGenerator<hw>::Switch; using ngen::BinaryCodeGenerator<hw>::NoPreempt; \
1387 using ngen::BinaryCodeGenerator<hw>::anyv; using ngen::BinaryCodeGenerator<hw>::allv; using ngen::BinaryCodeGenerator<hw>::any2h; using ngen::BinaryCodeGenerator<hw>::all2h; \
1388 using ngen::BinaryCodeGenerator<hw>::any4h; using ngen::BinaryCodeGenerator<hw>::all4h; using ngen::BinaryCodeGenerator<hw>::any8h; using ngen::BinaryCodeGenerator<hw>::all8h; \
1389 using ngen::BinaryCodeGenerator<hw>::any16h; using ngen::BinaryCodeGenerator<hw>::all16h; using ngen::BinaryCodeGenerator<hw>::any32h; using ngen::BinaryCodeGenerator<hw>::all32h; \
1390 using ngen::BinaryCodeGenerator<hw>::x_repl; using ngen::BinaryCodeGenerator<hw>::y_repl; using ngen::BinaryCodeGenerator<hw>::z_repl; using ngen::BinaryCodeGenerator<hw>::w_repl; \
1391 using ngen::BinaryCodeGenerator<hw>::ze; using ngen::BinaryCodeGenerator<hw>::eq; using ngen::BinaryCodeGenerator<hw>::nz; using ngen::BinaryCodeGenerator<hw>::ne; \
1392 using ngen::BinaryCodeGenerator<hw>::gt; using ngen::BinaryCodeGenerator<hw>::ge; using ngen::BinaryCodeGenerator<hw>::lt; using ngen::BinaryCodeGenerator<hw>::le; \
1393 using ngen::BinaryCodeGenerator<hw>::ov; using ngen::BinaryCodeGenerator<hw>::un; using ngen::BinaryCodeGenerator<hw>::eo; \
1394 using ngen::BinaryCodeGenerator<hw>::M0; using ngen::BinaryCodeGenerator<hw>::M4; using ngen::BinaryCodeGenerator<hw>::M8; using ngen::BinaryCodeGenerator<hw>::M12; \
1395 using ngen::BinaryCodeGenerator<hw>::M16; using ngen::BinaryCodeGenerator<hw>::M20; using ngen::BinaryCodeGenerator<hw>::M24; using ngen::BinaryCodeGenerator<hw>::M28; \
1396 using ngen::BinaryCodeGenerator<hw>::sb0; using ngen::BinaryCodeGenerator<hw>::sb1; using ngen::BinaryCodeGenerator<hw>::sb2; using ngen::BinaryCodeGenerator<hw>::sb3; \
1397 using ngen::BinaryCodeGenerator<hw>::sb4; using ngen::BinaryCodeGenerator<hw>::sb5; using ngen::BinaryCodeGenerator<hw>::sb6; using ngen::BinaryCodeGenerator<hw>::sb7; \
1398 using ngen::BinaryCodeGenerator<hw>::sb8; using ngen::BinaryCodeGenerator<hw>::sb9; using ngen::BinaryCodeGenerator<hw>::sb10; using ngen::BinaryCodeGenerator<hw>::sb11; \
1399 using ngen::BinaryCodeGenerator<hw>::sb12; using ngen::BinaryCodeGenerator<hw>::sb13; using ngen::BinaryCodeGenerator<hw>::sb14; using ngen::BinaryCodeGenerator<hw>::sb15; \
1400 using ngen::BinaryCodeGenerator<hw>::A32; using ngen::BinaryCodeGenerator<hw>::A32NC; using ngen::BinaryCodeGenerator<hw>::A64; using ngen::BinaryCodeGenerator<hw>::A64NC; \
1401 using ngen::BinaryCodeGenerator<hw>::SLM; \
1402 template <typename... Targs> ngen::InstructionModifier ExecutionOffset(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::ExecutionOffset(std::forward<Targs>(args)...); } \
1403 template <typename... Targs> ngen::AddressBase Surface(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::Surface(std::forward<Targs>(args)...); } \
1404 template <typename... Targs> ngen::AddressBase CC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::CC(std::forward<Targs>(args)...); } \
1405 template <typename... Targs> ngen::AddressBase SC(Targs&&... args) { return ngen::BinaryCodeGenerator<hw>::SC(std::forward<Targs>(args)...); }
1406 #define NGEN_FORWARD_REGISTERS_EXTRA1
1407 #define NGEN_FORWARD_REGISTERS_EXTRA2
1408 #define NGEN_FORWARD_REGISTERS_EXTRA3
1409 #define NGEN_FORWARD_REGISTERS NGEN_FORWARD_REGISTERS_BASE NGEN_FORWARD_REGISTERS_EXTRA1 NGEN_FORWARD_REGISTERS_EXTRA2 NGEN_FORWARD_REGISTERS_EXTRA3
1410 #endif
1411
1412 template <HW hw>
unsupported()1413 inline void BinaryCodeGenerator<hw>::unsupported()
1414 {
1415 #ifdef NGEN_SAFE
1416 throw unsupported_instruction();
1417 #endif
1418 }
1419
1420 template <HW hw>
popStream()1421 typename BinaryCodeGenerator<hw>::InstructionStream *BinaryCodeGenerator<hw>::popStream()
1422 {
1423 #ifdef NGEN_SAFE
1424 if (streamStack.size() <= 1) throw stream_stack_underflow();
1425 #endif
1426
1427 InstructionStream *result = streamStack.back();
1428 streamStack.pop_back();
1429 return result;
1430 }
1431
1432 template <HW hw>
encodeSyncInsertion(autoswsb::SyncInsertion & si)1433 static inline Instruction12 encodeSyncInsertion(autoswsb::SyncInsertion &si)
1434 {
1435 Instruction12 i;
1436
1437 i.common.opcode = static_cast<int>(Opcode::sync);
1438 i.common.swsb = SWSBInfo12(si.swsb, Opcode::sync).raw();
1439 i.common.maskCtrl = true;
1440 i.binary.cmod = static_cast<int>(si.fc);
1441
1442 if (si.mask) {
1443 i.binary.src0Type = getTypecode12(DataType::ud);
1444 i.binary.src0Imm = true;
1445 i.imm32.value = si.mask;
1446 }
1447 i.binary.dst = 1;
1448
1449 return i;
1450 }
1451
1452 template <HW hw>
getCode()1453 std::vector<uint8_t> BinaryCodeGenerator<hw>::getCode()
1454 {
1455 #ifdef NGEN_SAFE
1456 if (streamStack.size() > 1) throw unfinished_stream_exception();
1457 #endif
1458 rootStream.fixLabels(labelManager);
1459
1460 Program program(rootStream);
1461 autoswsb::BasicBlockList analysis = autoswsb::autoSWSB(hw, program);
1462 std::vector<uint8_t> result;
1463
1464 if (analysis.empty()) {
1465 result.resize(rootStream.length());
1466 std::memmove(result.data(), rootStream.code.data(), rootStream.length());
1467 } else {
1468 std::multimap<int32_t, autoswsb::SyncInsertion*> syncs;
1469
1470 for (auto &bb : analysis)
1471 for (auto &sync : bb.syncs)
1472 syncs.insert(std::make_pair(sync.inum, &sync));
1473
1474 result.resize(rootStream.length() + syncs.size() * sizeof(Instruction12));
1475
1476 auto *psrc = reinterpret_cast<const Instruction12 *>(rootStream.code.data());
1477 auto *pdst = reinterpret_cast<Instruction12 *>(result.data());
1478 auto nextSync = syncs.begin();
1479
1480 for (uint32_t isrc = 0; isrc < program.size(); isrc++) {
1481 while ((nextSync != syncs.end()) && (nextSync->second->inum == isrc))
1482 *pdst++ = encodeSyncInsertion<hw>(*(nextSync++)->second);
1483 *pdst++ = *psrc++;
1484 }
1485 }
1486
1487 return result;
1488 }
1489
1490 template <HW hw>
1491 template <bool forceWE, typename D, typename S0, HW hw_>
1492 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1493 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1494 {
1495 Instruction8 i{};
1496 InstructionModifier emod = mod | defaultModifier;
1497 if (forceWE)
1498 emod |= NoMask;
1499
1500 dst.fixup(emod.getExecSize(), defaultType, true, 1);
1501 src0.fixup(emod.getExecSize(), defaultType, false, 1);
1502
1503 encodeCommon8(i, op, emod);
1504 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1505
1506 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1507 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1508
1509 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1510 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1511
1512 i.binary.dstType = getTypecode<hw>(dst.getType());
1513 i.binary.src0Type = getTypecode<hw>(src0.getType());
1514
1515 i.binary.dstRegFile = getRegFile(dst);
1516 i.binary.src0RegFile = getRegFile(src0);
1517
1518 db(i);
1519 }
1520
1521 template <HW hw>
1522 template <bool forceWE, typename D, typename S0, HW hw_>
1523 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0)1524 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0)
1525 {
1526 typename EncodingTag12Dispatch<hw>::tag tag;
1527 Instruction12 i{};
1528
1529 InstructionModifier emod = mod | defaultModifier;
1530 if (forceWE)
1531 emod |= NoMask;
1532
1533 dst.fixup(emod.getExecSize(), defaultType, true, 1);
1534 src0.fixup(emod.getExecSize(), defaultType, false, 1);
1535
1536 encodeCommon12(i, op, emod, dst, tag);
1537
1538 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1539 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1540
1541 i.binary.dstAddrMode = dst.isIndirect();
1542 i.binary.dstType = getTypecode12(dst.getType());
1543 i.binary.src0Type = getTypecode12(src0.getType());
1544
1545 i.binary.src0Mods = src0.getMods();
1546
1547 i.binary.cmod = static_cast<int>(mod.getCMod());
1548
1549 db(i);
1550 }
1551
1552 template <HW hw>
1553 template <bool forceWE, typename D, HW hw_>
1554 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1555 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1556 {
1557 Instruction8 i{};
1558 InstructionModifier emod = mod | defaultModifier;
1559 if (forceWE)
1560 emod |= NoMask;
1561
1562 dst.fixup(emod.getExecSize(), defaultType, true, 1);
1563 src0.fixup(emod.getExecSize(), defaultType, false, 1);
1564
1565 encodeCommon8(i, op, emod);
1566 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1567
1568 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1569
1570 i.binary.dstType = getTypecode<hw>(dst.getType());
1571 i.binary.src0Type = getImmediateTypecode<hw>(src0.getType());
1572
1573 i.binary.dstRegFile = getRegFile(dst);
1574 i.binary.src0RegFile = getRegFile(src0);
1575
1576 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1577
1578 if (getBytes(src0.getType()) == 8)
1579 i.imm64.value = static_cast<uint64_t>(src0);
1580 else
1581 i.imm32.value = static_cast<uint64_t>(src0);
1582
1583 db(i);
1584 }
1585
1586 template <HW hw>
1587 template <bool forceWE, typename D, HW hw_>
1588 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,const Immediate & src0)1589 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, const Immediate &src0)
1590 {
1591 typename EncodingTag12Dispatch<hw>::tag tag;
1592 Instruction12 i{};
1593
1594 InstructionModifier emod = mod | defaultModifier;
1595 if (forceWE)
1596 emod |= NoMask;
1597
1598 dst.fixup(emod.getExecSize(), defaultType, true, 1);
1599 src0.fixup(emod.getExecSize(), defaultType, false, 1);
1600
1601 encodeCommon12(i, op, emod, dst, tag);
1602
1603 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1604
1605 i.binary.dstAddrMode = dst.isIndirect();
1606
1607 i.binary.dstType = getTypecode12(dst.getType());
1608 i.binary.src0Type = getTypecode12(src0.getType());
1609
1610 i.binary.src0Imm = true;
1611
1612 i.binary.cmod = static_cast<int>(mod.getCMod());
1613
1614 auto val = static_cast<uint64_t>(src0);
1615 i.imm32.value = val;
1616 if (getBytes(src0.getType()) == 8) {
1617 #ifdef NGEN_SAFE
1618 if (mod.getCMod() != ConditionModifier::none) throw invalid_modifiers_exception();
1619 #endif
1620 i.imm64.high = val >> 32;
1621 }
1622
1623 db(i);
1624 }
1625
1626 template <HW hw>
1627 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1628 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1629 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1630 {
1631 Instruction8 i{};
1632
1633 InstructionModifier emod = mod | defaultModifier;
1634 if (forceWE)
1635 emod |= NoMask;
1636
1637 dst.fixup(emod.getExecSize(), defaultType, true, 2);
1638 src0.fixup(emod.getExecSize(), defaultType, false, 2);
1639 src1.fixup(emod.getExecSize(), defaultType, false, 2);
1640
1641 encodeCommon8(i, op, emod);
1642 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1643
1644 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1645 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1646 i.binary.src1 = encodeBinaryOperand8<false>(src1).bits;
1647
1648 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1649 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1650 if (src1.isIndirect()) i.binary.src1AddrImm9 = src1.getOffset() >> 9;
1651
1652 i.binary.dstType = getTypecode<hw>(dst.getType());
1653 i.binary.src0Type = getTypecode<hw>(src0.getType());
1654 i.binary.src1Type = getTypecode<hw>(src1.getType());
1655
1656 i.binary.dstRegFile = getRegFile(dst);
1657 i.binary.src0RegFile = getRegFile(src0);
1658 i.binary.src1RegFile = RegFileGRF;
1659
1660 #ifdef NGEN_SAFE
1661 if (src1.isARF() && op != Opcode::illegal) throw grf_expected_exception();
1662 #endif
1663
1664 db(i);
1665 }
1666
1667 template <HW hw>
1668 template <bool forceWE, typename D, typename S0, typename S1, HW hw_>
1669 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1)1670 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1)
1671 {
1672 typename EncodingTag12Dispatch<hw>::tag tag;
1673 Instruction12 i{};
1674
1675 InstructionModifier emod = mod | defaultModifier;
1676 if (forceWE)
1677 emod |= NoMask;
1678
1679 dst.fixup(emod.getExecSize(), defaultType, true, 2);
1680 src0.fixup(emod.getExecSize(), defaultType, false, 2);
1681 src1.fixup(emod.getExecSize(), defaultType, false, 2);
1682
1683 encodeCommon12(i, op, emod, dst, tag);
1684
1685 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1686 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1687 i.binary.src1 = encodeBinaryOperand12<false>(src1, tag).bits;
1688
1689 i.binary.dstAddrMode = dst.isIndirect();
1690 i.binary.dstType = getTypecode12(dst.getType());
1691 i.binary.src0Type = getTypecode12(src0.getType());
1692 i.binary.src1Type = getTypecode12(src1.getType());
1693
1694 i.binary.src0Mods = src0.getMods();
1695 i.binary.src1Mods = src1.getMods();
1696
1697 i.binary.cmod = static_cast<int>(mod.getCMod());
1698
1699 db(i);
1700 }
1701
1702 template <HW hw>
1703 template <bool forceWE, typename D, typename S0, HW hw_>
1704 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)1705 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
1706 {
1707 Instruction8 i{};
1708 InstructionModifier emod = mod | defaultModifier;
1709 if (forceWE)
1710 emod |= NoMask;
1711
1712 dst.fixup(emod.getExecSize(), defaultType, true, 2);
1713 src0.fixup(emod.getExecSize(), defaultType, false, 2);
1714 src1.fixup(emod.getExecSize(), defaultType, false, 2);
1715
1716 encodeCommon8(i, op, emod);
1717 i.common.accessMode = std::is_base_of<Align16Operand, D>::value;
1718
1719 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1720 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1721
1722 if (dst.isIndirect()) i.binary.dstAddrImm9 = dst.getOffset() >> 9;
1723 if (src0.isIndirect()) i.binary.src0AddrImm9 = src0.getOffset() >> 9;
1724
1725 i.binary.dstType = getTypecode<hw>(dst.getType());
1726 i.binary.src0Type = getTypecode<hw>(src0.getType());
1727 i.binary.src1Type = getImmediateTypecode<hw>(src1.getType());
1728
1729 i.binary.dstRegFile = getRegFile(dst);
1730 i.binary.src0RegFile = getRegFile(src0);
1731 i.binary.src1RegFile = getRegFile(src1);
1732
1733 i.imm32.value = static_cast<uint64_t>(src1);
1734
1735 db(i);
1736 }
1737
1738 template <HW hw>
1739 template <bool forceWE, typename D, typename S0, HW hw_>
1740 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,const Immediate & src1)1741 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, const Immediate &src1)
1742 {
1743 typename EncodingTag12Dispatch<hw>::tag tag;
1744 Instruction12 i{};
1745
1746 InstructionModifier emod = mod | defaultModifier;
1747 if (forceWE)
1748 emod |= NoMask;
1749
1750 dst.fixup(emod.getExecSize(), defaultType, true, 2);
1751 src0.fixup(emod.getExecSize(), defaultType, false, 2);
1752 src1.fixup(emod.getExecSize(), defaultType, false, 2);
1753
1754 encodeCommon12(i, op, emod, dst, tag);
1755
1756 i.binary.dst = encodeBinaryOperand12<true>(dst, tag).bits;
1757 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
1758 i.binary.src1 = static_cast<uint64_t>(src1);
1759
1760 i.binary.dstAddrMode = dst.isIndirect();
1761 i.binary.dstType = getTypecode12(dst.getType());
1762 i.binary.src0Type = getTypecode12(src0.getType());
1763 i.binary.src1Type = getTypecode12(src1.getType());
1764
1765 i.binary.src0Mods = src0.getMods();
1766
1767 i.binary.cmod = static_cast<int>(mod.getCMod());
1768
1769 i.binary.src1Imm = true;
1770 i.imm32.value = static_cast<uint64_t>(src1);
1771
1772 db(i);
1773 }
1774
1775 template <HW hw>
1776 template <HW hw_>
1777 typename std::enable_if<hwLE(hw_, HW::Gen9)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,RegData dst,RegData src0,RegData src1,RegData src2)1778 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, RegData dst, RegData src0, RegData src1, RegData src2)
1779 {
1780 opX(op, defaultType, mod, emulateAlign16Dst(dst), emulateAlign16Src(src0),
1781 emulateAlign16Src(src1), emulateAlign16Src(src2));
1782 }
1783
1784
1785 template <HW hw>
1786 template <HW hw_>
1787 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,Align16Operand dst,Align16Operand src0,Align16Operand src1,Align16Operand src2)1788 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, Align16Operand dst, Align16Operand src0, Align16Operand src1, Align16Operand src2)
1789 {
1790 #ifdef NGEN_SAFE
1791 if (dst.getReg().isARF()) throw grf_expected_exception();
1792 if (src0.getReg().isARF()) throw grf_expected_exception();
1793 if (src1.getReg().isARF()) throw grf_expected_exception();
1794 if (src2.getReg().isARF()) throw grf_expected_exception();
1795 #endif
1796
1797 Instruction8 i{};
1798 InstructionModifier emod = mod | defaultModifier | Align16;
1799
1800 dst.getReg().fixup(emod.getExecSize(), defaultType, true, 3);
1801 src0.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1802 src1.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1803 src2.getReg().fixup(emod.getExecSize(), defaultType, false, 3);
1804
1805 encodeCommon8(i, op, emod);
1806
1807 i.ternary16.dstChanEn = dst.getChanEn();
1808 i.ternary16.dstRegNum = dst.getReg().getBase();
1809 i.ternary16.dstSubregNum2_4 = dst.getReg().getByteOffset() >> 2;
1810 i.ternary16.dstType = getTernary16Typecode8(dst.getReg().getType());
1811
1812 i.ternary16.srcType = getTernary16Typecode8(src0.getReg().getType());
1813
1814 bool isFOrHF = (src0.getReg().getType() == DataType::f
1815 || src0.getReg().getType() == DataType::hf);
1816
1817 i.ternary16.src1Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
1818 i.ternary16.src2Type = isFOrHF && (src1.getReg().getType() == DataType::hf);
1819
1820 encodeTernaryCommon8(i, src0, src1, src2);
1821
1822 db(i);
1823 }
1824
1825 template <HW hw>
1826 template <typename D, typename S0, typename S1, typename S2, HW hw_>
1827 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)1828 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
1829 {
1830 if (hw < HW::Gen10)
1831 unsupported();
1832
1833 #ifdef NGEN_SAFE
1834 if (src0.isARF()) throw grf_expected_exception();
1835 if (src2.isARF()) throw grf_expected_exception();
1836 #endif
1837
1838 Instruction8 i{};
1839 InstructionModifier emod = mod | defaultModifier;
1840
1841 dst.fixup(emod.getExecSize(), defaultType, true, 3);
1842 src0.fixup(emod.getExecSize(), defaultType, false, 3);
1843 src1.fixup(emod.getExecSize(), defaultType, false, 3);
1844 src2.fixup(emod.getExecSize(), defaultType, false, 3);
1845
1846 encodeCommon8(i, op, emod);
1847
1848 i.ternary1.src0RegFile = std::is_base_of<Immediate, S0>::value;
1849 i.ternary1.src1RegFile = src1.isARF();
1850 i.ternary1.src2RegFile = std::is_base_of<Immediate, S2>::value;
1851
1852 encodeTernaryCommon8(i, src0, src1, src2);
1853 encodeTernary1Dst10(i, dst);
1854
1855 db(i);
1856 }
1857
1858 template <HW hw>
1859 template <typename D, typename S0,typename S1, typename S2, HW hw_>
1860 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opX(Opcode op,DataType defaultType,const InstructionModifier & mod,D dst,S0 src0,S1 src1,S2 src2)1861 BinaryCodeGenerator<hw>::opX(Opcode op, DataType defaultType, const InstructionModifier &mod, D dst, S0 src0, S1 src1, S2 src2)
1862 {
1863 typename EncodingTag12Dispatch<hw>::tag tag;
1864 Instruction12 i{};
1865 InstructionModifier emod = mod | defaultModifier;
1866
1867 dst.fixup(emod.getExecSize(), defaultType, true, 3);
1868 src0.fixup(emod.getExecSize(), defaultType, false, 3);
1869 src1.fixup(emod.getExecSize(), defaultType, false, 3);
1870 src2.fixup(emod.getExecSize(), defaultType, false, 3);
1871
1872 encodeCommon12(i, op, emod, dst, tag);
1873
1874 i.ternary.dst = encodeTernaryOperand12<true>(dst, tag).bits;
1875 encodeTernarySrc0(i, src0, tag);
1876 encodeTernarySrc1(i, src1, tag);
1877 encodeTernarySrc2(i, src2, tag);
1878 encodeTernaryTypes(i, dst, src0, src1, src2);
1879
1880 i.ternary.cmod = static_cast<int>(mod.getCMod());
1881
1882 db(i);
1883 }
1884
1885 template <HW hw>
1886 template <typename DS0>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0)1887 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0)
1888 {
1889 InstructionModifier mmod = mod;
1890
1891 mmod.setCMod(static_cast<ConditionModifier>(fc));
1892 opX(op, defaultType, mmod, dst, src0);
1893 }
1894
1895 template <HW hw>
1896 template <typename DS0, typename S1>
opMath(Opcode op,DataType defaultType,const InstructionModifier & mod,MathFunction fc,DS0 dst,DS0 src0,S1 src1)1897 void BinaryCodeGenerator<hw>::opMath(Opcode op, DataType defaultType, const InstructionModifier &mod, MathFunction fc, DS0 dst, DS0 src0, S1 src1)
1898 {
1899 InstructionModifier mmod = mod;
1900
1901 mmod.setCMod(static_cast<ConditionModifier>(fc));
1902 opX(op, defaultType, mmod, dst, src0, src1);
1903 }
1904
1905 template <HW hw>
1906 template <typename D, HW hw_>
1907 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)1908 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
1909 {
1910 exdesc |= uint32_t(static_cast<uint8_t>(sfid));
1911 opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
1912 }
1913
1914 template <HW hw>
1915 template <typename D, HW hw_>
1916 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & exdesc,D desc)1917 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &exdesc, D desc)
1918 {
1919 opSends(static_cast<Opcode>(static_cast<uint8_t>(op) | 2), mod, dst, src0, src1, exdesc, desc);
1920 }
1921
1922 template <HW hw>
1923 template <typename ED, typename D, HW hw_>
1924 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,SharedFunction sfid,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)1925 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, SharedFunction sfid, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
1926 {
1927 typename EncodingTag12Dispatch<hw>::tag tag;
1928 Instruction12 i{};
1929 InstructionModifier emod = mod | defaultModifier;
1930
1931 encodeCommon12(i, op, emod, dst, tag);
1932
1933 i.send.fusionCtrl = emod.isSerialized();
1934
1935 i.send.dstReg = dst.getBase();
1936 i.send.src0Reg = src0.getBase();
1937 i.send.src1Reg = src1.getBase();
1938
1939 i.send.dstRegFile = getRegFile(dst);
1940 i.send.src0RegFile = getRegFile(src0);
1941 i.send.src1RegFile = getRegFile(src1);
1942
1943 i.send.sfid = static_cast<int>(sfid) & 0xF;
1944
1945 encodeSendDesc(i, desc);
1946 encodeSendExDesc(i, exdesc);
1947
1948 db(i);
1949 }
1950
1951 template <HW hw>
1952 template <HW hw_>
1953 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,uint32_t desc)1954 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, uint32_t desc)
1955 {
1956 Instruction8 i{};
1957 InstructionModifier emod = mod | defaultModifier;
1958
1959 encodeCommon8(i, op, emod);
1960
1961 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
1962 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
1963
1964 i.sendsGen9.dstRegFile = getRegFile(dst);
1965 i.binary.src0RegFile = getRegFile(src0);
1966 i.binary.src1RegFile = RegFileIMM;
1967
1968 i.binary.dstType = getTypecode<hw>(dst.getType());
1969
1970 i.sendsGen9.sfid = exdesc & 0xF;
1971 i.sendGen8.zero = 0;
1972 i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
1973 i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
1974 i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
1975 i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
1976 i.sendsGen9.desc = desc;
1977
1978 i.sendsGen9.eot = (exdesc >> 5) & 1;
1979 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
1980
1981 db(i);
1982 }
1983
1984 template <HW hw>
1985 template <HW hw_>
1986 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,const RegData & desc)1987 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, const RegData &desc)
1988 {
1989 #ifdef NGEN_SAFE
1990 // Only a0.0:ud is allowed for desc.
1991 if (!desc.isARF() || desc.getARFType() != ARFType::a || desc.getARFBase() != 0 || desc.getOffset() != 0)
1992 throw invalid_arf_exception();
1993 #endif
1994 Instruction8 i{};
1995 InstructionModifier emod = mod | defaultModifier;
1996
1997 encodeCommon8(i, op, emod);
1998
1999 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2000 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2001 i.binary.src1 = encodeBinaryOperand8<false>(desc).bits;
2002
2003 i.sendsGen9.dstRegFile = getRegFile(dst);
2004 i.binary.src0RegFile = getRegFile(src0);
2005 i.binary.src1RegFile = getRegFile(desc);
2006 i.binary.src1Type = getTypecode<hw>(desc.getType());
2007
2008 i.sendsGen9.sfid = exdesc & 0xF;
2009 i.sendGen8.zero = 0;
2010 i.sendGen8.exDesc16_19 = (exdesc >> 16) & 0xF;
2011 i.sendGen8.exDesc20_23 = (exdesc >> 20) & 0xF;
2012 i.sendGen8.exDesc24_27 = (exdesc >> 24) & 0xF;
2013 i.sendGen8.exDesc28_31 = (exdesc >> 28) & 0xF;
2014
2015 i.sendsGen9.eot = (exdesc >> 5) & 1;
2016 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2017
2018 db(i);
2019 }
2020
2021 template <HW hw>
2022 template <typename D, HW hw_>
2023 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSend(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,uint32_t exdesc,D desc)2024 BinaryCodeGenerator<hw>::opSend(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, uint32_t exdesc, D desc)
2025 {
2026 opSends(op, mod, dst, src0, null, exdesc, desc);
2027 }
2028
2029 template <HW hw>
2030 template <typename ED, typename D, HW hw_>
2031 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,ED exdesc,D desc)2032 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, ED exdesc, D desc)
2033 {
2034 Instruction8 i{};
2035 InstructionModifier emod = mod | defaultModifier;
2036
2037 encodeCommon8(i, op, emod);
2038
2039 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2040 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2041
2042 i.binary.src0RegFile = 0; // ?
2043 i.sendsGen9.dstRegFile = getRegFile(dst);
2044 i.sendsGen9.src1RegFile = getRegFile(src1);
2045 i.sendsGen9.src1RegNum = src1.getBase();
2046
2047 if (dst.isIndirect()) i.sendsGen9.dstAddrImm9 = dst.getOffset() >> 9;
2048 if (src0.isIndirect()) i.sendsGen9.src0AddrImm9 = src0.getOffset() >> 9;
2049
2050 encodeSendsDesc(i, desc);
2051 encodeSendsExDesc(i, exdesc);
2052
2053 db(i);
2054 }
2055
2056 template <HW hw>
2057 template <typename D, HW hw_>
2058 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,RegData exdesc,D desc)2059 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, RegData exdesc, D desc)
2060 {
2061 #ifdef NGEN_SAFE
2062 throw sfid_needed_exception();
2063 #endif
2064 }
2065
2066 template <HW hw>
2067 template <typename D, HW hw_>
2068 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opSends(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,uint32_t exdesc,D desc)2069 BinaryCodeGenerator<hw>::opSends(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, uint32_t exdesc, D desc)
2070 {
2071 Opcode mop = static_cast<Opcode>(static_cast<int>(op) & ~2);
2072 opSend(mop, mod, static_cast<SharedFunction>(exdesc & 0x1F), dst, src0, src1, exdesc, desc);
2073 }
2074
2075 template <HW hw>
2076 template <HW hw_>
2077 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2078 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2079 {
2080 Instruction8 i{};
2081 InstructionModifier emod = mod | defaultModifier;
2082
2083 encodeCommon8(i, op, emod);
2084
2085 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2086 i.binary.dstRegFile = getRegFile(dst);
2087 i.binary.dstType = getTypecode<hw>(dst.getType());
2088 i.binary.src0RegFile = getRegFile(Immediate());
2089 i.binary.src0Type = getTypecode<hw>(DataType::d);
2090 i.branches.jip = jip;
2091 i.branches.uip = uip;
2092
2093 db(i);
2094 }
2095
2096 template <HW hw>
2097 template <HW hw_>
2098 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip,int32_t uip)2099 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip, int32_t uip)
2100 {
2101 typename EncodingTag12Dispatch<hw>::tag tag;
2102 Instruction12 i{};
2103 InstructionModifier emod = mod | defaultModifier;
2104
2105 encodeCommon12(i, op, emod, dst, tag);
2106
2107 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2108
2109 i.binary.src0Imm = true;
2110 i.binary.src1Imm = true;
2111
2112 i.branches.jip = jip;
2113 i.branches.uip = uip;
2114
2115 db(i);
2116 }
2117
2118 template <HW hw>
2119 template <bool forceWE, HW hw_>
2120 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2121 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2122 {
2123 Instruction8 i{};
2124 InstructionModifier emod = mod | defaultModifier;
2125 if (forceWE)
2126 emod |= NoMask;
2127
2128 encodeCommon8(i, op, emod);
2129
2130 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2131 i.binary.dstRegFile = getRegFile(dst);
2132 i.binary.dstType = getTypecode<hw>(dst.getType());
2133 i.binary.src1RegFile = RegFileIMM;
2134 i.binary.src1Type = getTypecode<hw>(DataType::d);
2135 i.branches.jip = jip;
2136
2137 db(i);
2138 }
2139
2140 template <HW hw>
2141 template <bool forceWE, HW hw_>
2142 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,int32_t jip)2143 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, int32_t jip)
2144 {
2145 typename EncodingTag12Dispatch<hw>::tag tag;
2146 Instruction12 i{};
2147 InstructionModifier emod = mod | defaultModifier;
2148 if (forceWE)
2149 emod |= NoMask;
2150
2151 encodeCommon12(i, op, emod, dst, tag);
2152
2153 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2154 i.binary.src0Imm = true;
2155 i.branches.jip = jip;
2156
2157 db(i);
2158 }
2159
2160 template <HW hw>
2161 template <bool forceWE, bool small12, HW hw_>
2162 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2163 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2164 {
2165 Instruction8 i{};
2166 InstructionModifier emod = mod | defaultModifier;
2167 if (forceWE)
2168 emod |= NoMask;
2169
2170 encodeCommon8(i, op, emod);
2171
2172 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2173 i.binary.dstRegFile = getRegFile(dst);
2174 i.binary.dstType = getTypecode<hw>(DataType::d);
2175 i.binary.src0RegFile = getRegFile(src0);
2176 i.binary.src0Type = getTypecode<hw>(DataType::d);
2177 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2178
2179 db(i);
2180 }
2181
2182 template <HW hw>
2183 template <bool forceWE, bool small12, HW hw_>
2184 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0)2185 BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0)
2186 {
2187 typename EncodingTag12Dispatch<hw>::tag tag;
2188 Instruction12 i{};
2189 InstructionModifier emod = mod | defaultModifier;
2190 if (forceWE)
2191 emod |= NoMask;
2192
2193 encodeCommon12(i, op, emod, dst, tag);
2194
2195 i.binary.dst = encodeBinaryOperand12<true, false>(dst, tag).bits;
2196 i.binary.src0 = encodeBinaryOperand12<false, false>(src0, tag).bits;
2197 if (small12)
2198 i.binary.src0 &= 0xFFFF;
2199
2200 db(i);
2201 }
2202
2203 template <HW hw>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip,Label & uip)2204 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip, Label &uip)
2205 {
2206 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2207 addFixup(LabelFixup(uip.getID(labelManager), LabelFixup::UIPOffset));
2208 opBranch(op, mod, dst, 0, 0);
2209 }
2210
2211 template <HW hw>
2212 template <bool forceWE>
opBranch(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2213 void BinaryCodeGenerator<hw>::opBranch(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2214 {
2215 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2216 opBranch<forceWE>(op, mod, dst, 0);
2217 }
2218
2219 template <HW hw>
opCall(Opcode op,const InstructionModifier & mod,const RegData & dst,Label & jip)2220 void BinaryCodeGenerator<hw>::opCall(Opcode op, const InstructionModifier &mod, const RegData &dst, Label &jip)
2221 {
2222 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2223 if (isXe)
2224 opBranch<true>(op, mod, dst, 0);
2225 else
2226 opX<true>(op, DataType::d, mod, dst, null.ud(0)(0, 1, 0), Immediate::d(0));
2227 }
2228
2229 template <HW hw>
2230 template <HW hw_>
2231 typename std::enable_if<hwLT(hw_, HW::Xe_LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2232 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2233 {
2234 Instruction8 i{};
2235 InstructionModifier emod = mod | defaultModifier | NoMask;
2236
2237 encodeCommon8(i, op, emod);
2238
2239 src0.fixup(emod.getExecSize(), DataType::d, false, 2);
2240
2241 i.binary.dst = encodeBinaryOperand8<true>(dst).bits;
2242 i.binary.src0 = encodeBinaryOperand8<false>(src0).bits;
2243 i.binary.src0RegFile = getRegFile(src0);
2244 i.binary.src1RegFile = RegFileIMM;
2245 i.binary.src1Type = getTypecode<hw>(DataType::d);
2246
2247 i.branches.jip = jip;
2248
2249 db(i);
2250 }
2251
2252 template <HW hw>
2253 template <HW hw_>
2254 typename std::enable_if<hwGE(hw_, HW::Xe_LP)>::type
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,RegData src0,uint32_t jip)2255 BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, RegData src0, uint32_t jip)
2256 {
2257 opBranch<true>(op, mod, dst, jip);
2258 }
2259
2260 template <HW hw>
opJmpi(Opcode op,const InstructionModifier & mod,const RegData & dst,const RegData & src0,Label & jip)2261 void BinaryCodeGenerator<hw>::opJmpi(Opcode op, const InstructionModifier &mod, const RegData &dst, const RegData &src0, Label &jip)
2262 {
2263 if (hw >= HW::Xe_LP)
2264 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffset));
2265 opJmpi(op, mod, dst, src0, 0);
2266 if (hw < HW::Xe_LP)
2267 addFixup(LabelFixup(jip.getID(labelManager), LabelFixup::JIPOffsetJMPI));
2268 }
2269
2270 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod)2271 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod)
2272 {
2273 if (hw < HW::Xe_LP)
2274 unsupported();
2275
2276 typename EncodingTag12Dispatch<hw>::tag tag;
2277 Instruction12 i{};
2278 InstructionModifier emod = mod | defaultModifier;
2279
2280 encodeCommon12(i, op, emod, null, tag);
2281
2282 i.binary.dst = 0x1;
2283 i.binary.cmod = static_cast<int>(fc);
2284
2285 db(i);
2286 }
2287
2288 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,RegData src0)2289 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, RegData src0)
2290 {
2291 typename EncodingTag12Dispatch<hw>::tag tag;
2292 if (hw < HW::Xe_LP)
2293 unsupported();
2294
2295 Instruction12 i{};
2296 InstructionModifier emod = mod | defaultModifier;
2297
2298 encodeCommon12(i, op, emod, null, tag);
2299
2300 i.binary.dst = 0x1;
2301 if (!src0.isNull()) {
2302 src0.setRegion(0, 1, 0);
2303 i.binary.src0 = encodeBinaryOperand12<false>(src0, tag).bits;
2304 i.binary.src0Type = getTypecode12(src0.getType());
2305 }
2306 i.binary.cmod = static_cast<int>(fc);
2307
2308 db(i);
2309 }
2310
2311 template <HW hw>
opSync(Opcode op,SyncFunction fc,const InstructionModifier & mod,const Immediate & src0)2312 void BinaryCodeGenerator<hw>::opSync(Opcode op, SyncFunction fc, const InstructionModifier &mod, const Immediate &src0)
2313 {
2314 if (hw < HW::Xe_LP)
2315 unsupported();
2316
2317 typename EncodingTag12Dispatch<hw>::tag tag;
2318 Instruction12 i{};
2319 InstructionModifier emod = mod | defaultModifier;
2320
2321 encodeCommon12(i, op, emod, null, tag);
2322
2323 i.binary.dst = 0x1;
2324 i.binary.src0Type = getTypecode12(src0.getType());
2325 i.binary.src0Imm = true;
2326 i.binary.cmod = static_cast<int>(fc);
2327
2328 i.imm32.value = static_cast<uint64_t>(src0);
2329
2330 db(i);
2331 }
2332
2333 template <HW hw>
opNop(Opcode op)2334 void BinaryCodeGenerator<hw>::opNop(Opcode op)
2335 {
2336 Instruction8 i{};
2337
2338 i.qword[0] = static_cast<int>(op);
2339 i.qword[1] = 0;
2340
2341 db(i);
2342 }
2343
2344 } /* namespace ngen */
2345
2346 #endif /* header guard */
2347