1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef NGEN_CORE_HPP
18 #define NGEN_CORE_HPP
19
20
21 #include <cstdint>
22 #include <vector>
23 #include <algorithm>
24 #include <type_traits>
25
26 #include "ngen_utils.hpp"
27
28 #ifndef NGEN_NO_OP_NAMES
29 #if not +0
30 #error Compile with -fno-operator-names [Linux/OS X] or without /Za [Windows] if you want to use and(), or(), xor(), or define NGEN_NO_OP_NAMES and use and_(), or_(), xor_().
31 #endif
32 #endif
33
34 #ifdef NGEN_ASM
35 #include <ostream>
36 #endif
37
38 #ifdef NGEN_SAFE
39 #include <stdexcept>
40 #endif
41
42 /*
43 Syntax
44 ------
45
46 Register Syntax Overview
47 r17 Plain register
48 r17.f(4) -> r17.4:f
49 In fact, r17.4<0;1,0>:f, as subregisters default to
50 being scalar
51 r17.sub<float>(4) Same as above, allowing for C++ templating.
52 r17.f() -> r17.0:f (defaults to offset 0)
53 r17.sub<float>() Same as above
54 r17.df(3)(8,8,1) Register regioning (vertical stride, width, horizontal stride)
55 r17.df(3)(8,1) (Width, horiz. stride): vertical stride is inferred
56 r17.df(3)(1) Horizontal stride only: width, vertical stride inferred from execution size.
57 r[a0.w(8)].f(4,4,1) Indirect addressing: VxH (if NGEN_SHORT_NAMES defined otherwise use indirect[a0...])
58 r[a0.w(8)].f(4,1) Indirect addressing: Vx1
59 -r17.q(1) Source modifier: negation
60 abs(r17) Source modifier: absolute value. Note that abs is defined in namespace ngen.
61 -abs(r3)
62 ~r17 Alternative syntax to -r17 for logical operations.
63 r17 + 3 ...is r20. Operators ++ and += are defined similarly.
64
65 Command Syntax Overview
66 add(8, r3.f(0)(8,8,1), r9.f(0)(8,8,1), r12.f(0)(0,1,0)) -> add (8) r3.0<8;8,1>:f r9.0<8;8,1>:f r12.f<0;1,0>
67 add(8, r3.f(), r9.f(), r12.f()) Same as above. Register regions default to unit stride.
68 add<float>(8, r3, r9, r12) A default operand data type can be provided.
69 add<uint32_t>(8, r3, r9, r12.uw(8)(0,1,0)) Default operand types can be overridden.
70 add<float>(8, r3, r9, 3.14159f) The data type of scalar immediate values is inferred.
71 add<int32_t>(8, r3, r9, int16_t(12)) Here an int16_t immediate is mapped to the :w data type.
72 mul<float>(8, r3, r9, Immediate::vf(-1.0,1.0,-1.0,1.25)) Vector immediates require helper functions.
73 mov(8, r2.d(), Immediate::uv(7,6,5,4,3,2,1,0))
74 mov(8, r2.d(), Immediate::v(7,-6,5,-4,3,-2,1,0))
75
76 All modifiers for an instruction go in the first parameter, OR'ed together.
77 add(8 | M0, ...)
78 add(8 | W | ~f0.w(0) | sat, ...) Use NoMask instead of W if NGEN_SHORT_NAMES not defined.
79 add(8 | lt | f1_0, ...)
80 add(8 | ~any2h | f1, ...)
81 */
82
83 namespace ngen {
84
85 #ifdef NGEN_SAFE
86 static constexpr bool _safe_ = 1;
87 #else
88 static constexpr bool _safe_ = 0;
89 #endif
90
91 // Forward declarations.
92 class RegData;
93 class Register;
94 class GRFDisp;
95 class Subregister;
96 class RegisterRegion;
97 class NullRegister;
98 class InstructionModifier;
99 struct Instruction12;
100 enum class Opcode;
101
102 struct EncodingTag12;
103 static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
104 struct EncodingTagXeHPC;
105 static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
106
107 // Exceptions, used when NGEN_SAFE is defined.
108
109 #ifdef NGEN_SAFE
110 class invalid_type_exception : public std::runtime_error {
111 public:
invalid_type_exception()112 invalid_type_exception() : std::runtime_error("Instruction does not support this type or combination of types") {}
113 };
114 class invalid_object_exception : public std::runtime_error {
115 public:
invalid_object_exception()116 invalid_object_exception() : std::runtime_error("Object is invalid") {}
117 };
118 class invalid_immediate_exception : public std::runtime_error {
119 public:
invalid_immediate_exception()120 invalid_immediate_exception() : std::runtime_error("Invalid immediate value") {}
121 };
122 class invalid_modifiers_exception : public std::runtime_error {
123 public:
invalid_modifiers_exception()124 invalid_modifiers_exception() : std::runtime_error("Invalid or conflicting modifiers") {}
125 };
126 class invalid_operand_exception : public std::runtime_error {
127 public:
invalid_operand_exception()128 invalid_operand_exception() : std::runtime_error("Invalid operand to instruction") {}
129 };
130 class invalid_operand_count_exception : public std::runtime_error {
131 public:
invalid_operand_count_exception()132 invalid_operand_count_exception() : std::runtime_error("Invalid operand count") {}
133 };
134 class invalid_arf_exception : public std::runtime_error {
135 public:
invalid_arf_exception()136 invalid_arf_exception() : std::runtime_error("Invalid ARF specified") {}
137 };
138 class grf_expected_exception : public std::runtime_error {
139 public:
grf_expected_exception()140 grf_expected_exception() : std::runtime_error("GRF expected, but found an ARF") {}
141 };
142 class invalid_model_exception : public std::runtime_error {
143 public:
invalid_model_exception()144 invalid_model_exception() : std::runtime_error("Invalid addressing model specified") {}
145 };
146 class invalid_load_store_exception : public std::runtime_error {
147 public:
invalid_load_store_exception()148 invalid_load_store_exception() : std::runtime_error("Invalid operands for load/store/atomic") {}
149 };
150 class invalid_range_exception : public std::runtime_error {
151 public:
invalid_range_exception()152 invalid_range_exception() : std::runtime_error("Invalid register range") {}
153 };
154 class invalid_region_exception : public std::runtime_error {
155 public:
invalid_region_exception()156 invalid_region_exception() : std::runtime_error("Unsupported register region") {}
157 };
158 class missing_type_exception : public std::runtime_error {
159 public:
missing_type_exception()160 missing_type_exception() : std::runtime_error("Operand is missing its type") {}
161 };
162 class read_only_exception : public std::runtime_error {
163 public:
read_only_exception()164 read_only_exception() : std::runtime_error("Memory model is read-only") {}
165 };
166 class stream_stack_underflow : public std::runtime_error {
167 public:
stream_stack_underflow()168 stream_stack_underflow() : std::runtime_error("Stream stack underflow occurred") {}
169 };
170 class unfinished_stream_exception : public std::runtime_error {
171 public:
unfinished_stream_exception()172 unfinished_stream_exception() : std::runtime_error("An unfinished instruction stream is still active") {}
173 };
174 class dangling_label_exception : public std::runtime_error {
175 public:
dangling_label_exception()176 dangling_label_exception() : std::runtime_error("A label was referenced, but its location was not defined") {}
177 };
178 class multiple_label_exception : public std::runtime_error {
179 public:
multiple_label_exception()180 multiple_label_exception() : std::runtime_error("Label already has a location") {}
181 };
182 class unsupported_instruction : public std::runtime_error {
183 public:
unsupported_instruction()184 unsupported_instruction() : std::runtime_error("Instruction is not supported by the chosen hardware") {}
185 };
186 class unsupported_message : public std::runtime_error {
187 public:
unsupported_message()188 unsupported_message() : std::runtime_error("Message is not supported by the chosen hardware") {}
189 };
190 class iga_align16_exception : public std::runtime_error {
191 public:
iga_align16_exception()192 iga_align16_exception() : std::runtime_error("Align16 not supported by the IGA assembler; use binary output") {}
193 };
194 class sfid_needed_exception : public std::runtime_error {
195 public:
sfid_needed_exception()196 sfid_needed_exception() : std::runtime_error("SFID must be specified on Gen12+") {}
197 };
198 class invalid_execution_size_exception : public std::runtime_error {
199 public:
invalid_execution_size_exception()200 invalid_execution_size_exception() : std::runtime_error("Invalid execution size") {}
201 };
202 #endif
203
204 // Gen hardware generations.
205 enum class HW {
206 Unknown,
207 Gen9,
208 Gen10,
209 Gen11,
210 XeLP,
211 Gen12LP = XeLP,
212 XeHP,
213 XeHPG,
214 XeHPC,
215 };
216
217 // Data types. Bits[0:4] are the ID, bits[5:7] hold log2(width in bytes).
218 enum class DataType : uint8_t {
219 ud = 0x40,
220 d = 0x41,
221 uw = 0x22,
222 w = 0x23,
223 ub = 0x04,
224 b = 0x05,
225 df = 0x66,
226 f = 0x47,
227 uq = 0x68,
228 q = 0x69,
229 hf = 0x2A,
230 bf = 0x2B,
231 uv = 0x4D,
232 v = 0x4E,
233 vf = 0x4F,
234 bf8 = 0x0C,
235 tf32 = 0x50,
236 invalid = 0x00
237 };
238
239 #ifdef NGEN_ASM
operator <<(std::ostream & str,DataType type)240 static inline std::ostream &operator<<(std::ostream &str, DataType type)
241 {
242 static const char *names[32] = {"ud", "d", "uw", "w", "ub", "b", "df", "f", "uq", "q", "hf", "bf", "bf8", "uv", "v", "vf",
243 "tf32", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""};
244 str << names[static_cast<uint8_t>(type) & 0x1F];
245 return str;
246 }
247 #endif
248
getLog2Bytes(DataType type)249 static inline constexpr int getLog2Bytes(DataType type) { return static_cast<int>(type) >> 5; }
getBytes(DataType type)250 static inline constexpr int getBytes(DataType type) { return 1 << getLog2Bytes(type); }
getDwords(DataType type)251 static inline constexpr14 int getDwords(DataType type) { return std::max<int>(getBytes(type) >> 2, 1); }
252
isSigned(DataType type)253 static inline constexpr bool isSigned(DataType type)
254 {
255 return !(type == DataType::ub || type == DataType::uw || type == DataType::ud || type == DataType::uq);
256 }
257
getDataType()258 template <typename T> static inline DataType getDataType() { return DataType::invalid; }
259
getDataType()260 template <> inline DataType getDataType<uint64_t>() { return DataType::uq; }
getDataType()261 template <> inline DataType getDataType<int64_t>() { return DataType::q; }
getDataType()262 template <> inline DataType getDataType<uint32_t>() { return DataType::ud; }
getDataType()263 template <> inline DataType getDataType<int32_t>() { return DataType::d; }
getDataType()264 template <> inline DataType getDataType<uint16_t>() { return DataType::uw; }
getDataType()265 template <> inline DataType getDataType<int16_t>() { return DataType::w; }
getDataType()266 template <> inline DataType getDataType<uint8_t>() { return DataType::ub; }
getDataType()267 template <> inline DataType getDataType<int8_t>() { return DataType::b; }
getDataType()268 template <> inline DataType getDataType<double>() { return DataType::df; }
getDataType()269 template <> inline DataType getDataType<float>() { return DataType::f; }
270 #ifdef NGEN_HALF_TYPE
getDataType()271 template <> inline DataType getDataType<half>() { return DataType::hf; }
272 #endif
273 #ifdef NGEN_BFLOAT16_TYPE
getDataType()274 template <> inline DataType getDataType<bfloat16>() { return DataType::bf; }
275 #endif
276 #ifdef NGEN_BFLOAT8_TYPE
getDataType()277 template <> inline DataType getDataType<bfloat8>() { return DataType::bf8; }
278 #endif
279 #ifdef NGEN_TFLOAT32_TYPE
getDataType()280 template <> inline DataType getDataType<tfloat32>() { return DataType::tf32; }
281 #endif
282
283 // Math function codes.
284 enum class MathFunction : uint8_t {
285 inv = 1,
286 log = 2,
287 exp = 3,
288 sqt = 4,
289 rsqt = 5,
290 sin = 6,
291 cos = 7,
292 fdiv = 9,
293 pow = 10,
294 idiv = 11,
295 iqot = 12,
296 irem = 13,
297 invm = 14,
298 rsqtm = 15
299 };
300
mathArgCount(MathFunction func)301 static inline int mathArgCount(MathFunction func)
302 {
303 static const char argCounts[16] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 1};
304 return argCounts[static_cast<uint8_t>(func) & 0xF];
305 }
306
307 #ifdef NGEN_ASM
operator <<(std::ostream & str,MathFunction func)308 static inline std::ostream &operator<<(std::ostream &str, MathFunction func)
309 {
310 static const char *names[16] = {"", "inv", "log", "exp", "sqt", "rsqt", "sin", "cos", "", "fdiv", "pow", "idiv", "iqot", "irem", "invm", "rsqtm"};
311 str << names[static_cast<uint8_t>(func) & 0xF];
312 return str;
313 }
314 #endif
315
hasIEEEMacro(HW hw)316 static inline bool hasIEEEMacro(HW hw) {
317 if (hw == HW::Gen12LP) return false;
318 if (hw == HW::XeHPG) return false;
319 return true;
320 }
321
322 // Sync function codes.
323 enum class SyncFunction : uint8_t {
324 nop = 0,
325 allrd = 2,
326 allwr = 3,
327 bar = 14,
328 host = 15
329 };
330
331 #ifdef NGEN_ASM
operator <<(std::ostream & str,SyncFunction func)332 static inline std::ostream &operator<<(std::ostream &str, SyncFunction func)
333 {
334 static const char *names[16] = {"nop", "", "allrd", "allwr", "", "", "", "", "", "", "", "", "", "", "bar", "host"};
335 str << names[static_cast<uint8_t>(func) & 0xF];
336 return str;
337 }
338 #endif
339
340 // Shared function IDs (SFIDs).
341 enum class SharedFunction : uint8_t {
342 null = 0x0,
343 smpl = 0x2,
344 gtwy = 0x3,
345 dc2 = 0x4,
346 rc = 0x5,
347 urb = 0x6,
348 ts = 0x7,
349 vme = 0x8,
350 dcro = 0x9,
351 dc0 = 0xA,
352 pixi = 0xB,
353 dc1 = 0xC,
354 cre = 0xD,
355 btd = 0x7,
356 rta = 0x8,
357 ugml = 0x1,
358 tgm = 0xD,
359 slm = 0xE,
360 ugm = 0xF,
361
362 // alias
363 sampler = smpl,
364 gateway = gtwy,
365 spawner = ts,
366 };
367
368 #ifdef NGEN_ASM
getMnemonic(SharedFunction sfid,HW hw)369 static inline const char *getMnemonic(SharedFunction sfid, HW hw)
370 {
371 static const char *names[16] = {
372 "null", "" , "smpl", "gtwy", "dc2", "rc" , "urb", "ts" ,
373 "vme" , "dcro", "dc0" , "pixi", "dc1", "cre", "" , "" ,
374 };
375 static const char *namesLSC[16] = {
376 "null", "ugml", "smpl", "gtwy", "dc2", "rc" , "urb", "btd",
377 "rta" , "dcro", "dc0" , "pixi", "dc1", "tgm", "slm", "ugm",
378 };
379 const auto &table = (hw >= HW::XeHPG) ? namesLSC : names;
380 return table[static_cast<uint8_t>(sfid) & 0xF];
381 }
382 #endif
383
384 // ARFs: high nybble of register # specifies type
385 enum class ARFType : uint8_t {
386 null = 0,
387 a = 1,
388 acc = 2,
389 f = 3,
390 ce = 4,
391 msg = 5,
392 sp = 6,
393 sr = 7,
394 cr = 8,
395 n = 9,
396 ip = 10,
397 tdr = 11,
398 tm = 12,
399 fc = 13,
400 dbg = 15,
401 };
402
403 #ifdef NGEN_ASM
operator <<(std::ostream & str,ARFType type)404 static inline std::ostream &operator<<(std::ostream &str, ARFType type)
405 {
406 static const char *names[16] = {"null", "a", "acc", "f", "ce", "msg", "sp", "sr", "cr", "n", "ip", "tdr", "tm", "fc", "", "dbg"};
407 str << names[static_cast<uint8_t>(type) & 0xF];
408 return str;
409 }
410
411 enum class PrintDetail {base = 0, sub_no_type = 1, sub = 2, hs = 3, vs_hs = 4, full = 5};
412 #endif
413
414 // Invalid singleton class. Can be assigned to nGEN objects to invalidate them.
415 static constexpr class Invalid {} invalid{};
416
417 class LabelManager {
418 protected:
419 uint32_t nextID;
420 std::vector<uint32_t> targets;
421
422 enum TargetConstants : uint32_t {
423 noTarget = uint32_t(-1),
424 };
425
426 public:
LabelManager()427 LabelManager() : nextID(0) {}
428
getNewID()429 uint32_t getNewID() {
430 targets.push_back(TargetConstants::noTarget);
431 return nextID++;
432 }
433
hasTarget(uint32_t id) const434 bool hasTarget(uint32_t id) const {
435 return (targets[id] != TargetConstants::noTarget);
436 }
437
setTarget(uint32_t id,uint32_t target)438 void setTarget(uint32_t id, uint32_t target) {
439 #ifdef NGEN_SAFE
440 if (hasTarget(id)) throw multiple_label_exception();
441 #endif
442 targets[id] = target;
443 }
444
offsetTarget(uint32_t id,uint32_t offset)445 void offsetTarget(uint32_t id, uint32_t offset) {
446 #ifdef NGEN_SAFE
447 if (!hasTarget(id)) throw dangling_label_exception();
448 #endif
449 targets[id] += offset;
450 }
451
getTarget(uint32_t id) const452 uint32_t getTarget(uint32_t id) const {
453 #ifdef NGEN_SAFE
454 if (!hasTarget(id)) throw dangling_label_exception();
455 #endif
456 return targets[id];
457 }
458 };
459
460 // An object representing a label.
461 class Label {
462 protected:
463 unsigned id : 31;
464 unsigned uninit : 1;
465
466 public:
Label()467 Label() : id(0), uninit(true) {}
468
getID(LabelManager & man)469 uint32_t getID(LabelManager &man) {
470 if (uninit) {
471 id = man.getNewID();
472 uninit = false;
473 }
474 return id;
475 }
476
477 /* for compatibility with RegData */
fixup(int execSize,DataType defaultType,bool isDest,int arity)478 void fixup(int execSize, DataType defaultType, bool isDest, int arity) {}
isScalar() const479 constexpr14 bool isScalar() const { return false; }
480
481 #ifdef NGEN_ASM
482 static const bool emptyOp = false;
483 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man);
484 #endif
485 };
486
487 static inline bool operator==(const RegData &r1, const RegData &r2);
488 static inline bool operator!=(const RegData &r1, const RegData &r2);
489
490 // Superclass for registers, subregisters, and register regions, possibly
491 // with source modifiers.
492 class RegData {
493 protected:
494 unsigned base : 8;
495 unsigned arf : 1;
496 signed off : 11;
497 unsigned mods : 2;
498 unsigned type : 8;
499 unsigned indirect : 1;
500 unsigned _pad1 : 1;
501 unsigned vs : 7;
502 unsigned width : 5;
503 unsigned hs : 6;
504 unsigned _pad2 : 13;
505 unsigned invalid : 1;
506
RegData(int base_,bool arf_,int off_,bool indirect_,DataType type_,int vs_,int width_,int hs_)507 constexpr RegData(int base_, bool arf_, int off_, bool indirect_, DataType type_, int vs_, int width_, int hs_)
508 : base(base_), arf(arf_), off(off_), mods(0), type(static_cast<int>(type_)), indirect(indirect_), _pad1(0), vs(vs_), width(width_), hs(hs_), _pad2(0), invalid(0) {}
509
510 public:
511 #ifdef NGEN_ASM
512 static const bool emptyOp = false;
513 #endif
514
RegData()515 constexpr RegData()
516 : base(0), arf(0), off(0), mods(0), type(0), indirect(0), _pad1(0), vs(0), width(0), hs(0), _pad2(0), invalid(1) {}
517
getBase() const518 constexpr int getBase() const { return base; }
isARF() const519 constexpr bool isARF() const { return arf; }
getARFBase() const520 constexpr int getARFBase() const { return base & 0xF; }
getARFType() const521 constexpr ARFType getARFType() const { return static_cast<ARFType>(base >> 4); }
isIndirect() const522 constexpr bool isIndirect() const { return indirect; }
isVxIndirect() const523 constexpr bool isVxIndirect() const { return indirect && (vs == 0x7F); }
getIndirectBase() const524 constexpr int getIndirectBase() const { return base >> 4; }
getIndirectOff() const525 constexpr int getIndirectOff() const { return base & 0xF; }
isNull() const526 constexpr bool isNull() const { return isARF() && (getARFType() == ARFType::null); }
isInvalid() const527 constexpr bool isInvalid() const { return invalid; }
isValid() const528 constexpr bool isValid() const { return !invalid; }
getOffset() const529 constexpr int getOffset() const { return off; }
getByteOffset() const530 constexpr int getByteOffset() const { return off * getBytes(); }
getType() const531 constexpr DataType getType() const { return static_cast<DataType>(type); }
getVS() const532 constexpr int getVS() const { return vs; }
getWidth() const533 constexpr int getWidth() const { return width; }
getHS() const534 constexpr int getHS() const { return hs; }
getNeg() const535 constexpr bool getNeg() const { return mods & 2; }
getAbs() const536 constexpr bool getAbs() const { return mods & 1; }
getMods() const537 constexpr int getMods() const { return mods; }
getBytes() const538 constexpr int getBytes() const { return ngen::getBytes(getType()); }
getDwords() const539 constexpr14 int getDwords() const { return ngen::getDwords(getType()); }
isScalar() const540 constexpr bool isScalar() const { return hs == 0 && vs == 0 && width == 1; }
541
setBase(int base_)542 constexpr14 RegData &setBase(int base_) { base = base_; return *this; }
setOffset(int off_)543 constexpr14 RegData &setOffset(int off_) { off = off_; return *this; }
setType(DataType newType)544 constexpr14 RegData &setType(DataType newType) { type = static_cast<unsigned>(newType); return *this; }
setMods(int mods_)545 constexpr14 RegData &setMods(int mods_) { mods = mods_; return *this; }
setRegion(int vs_,int width_,int hs_)546 constexpr14 RegData &setRegion(int vs_, int width_, int hs_) { vs = vs_; width = width_; hs = hs_; return *this; }
547
invalidate()548 void invalidate() { invalid = true; }
operator =(const Invalid & i)549 RegData &operator=(const Invalid &i) { this->invalidate(); return *this; }
550
551 inline void fixup(int execSize, DataType defaultType, bool isDest, int arity); // Adjust automatically-computed strides given ESize.
552
operator +() const553 constexpr RegData operator+() const { return *this; }
operator -() const554 constexpr14 RegData operator-() const {
555 auto result = *this;
556 result.negate();
557 return result;
558 }
operator ~() const559 constexpr14 RegData operator~() const { return -*this; }
negate()560 constexpr14 void negate() { mods = mods ^ 2; }
561
562 friend inline bool operator==(const RegData &r1, const RegData &r2);
563 friend inline bool operator!=(const RegData &r1, const RegData &r2);
564
565 friend inline RegData abs(const RegData &r);
566
567 #ifdef NGEN_ASM
568 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
569 #endif
570 };
571
572 static_assert(sizeof(RegData) == 8, "RegData structure is not laid out correctly in memory.");
573
operator ==(const RegData & r1,const RegData & r2)574 static inline bool operator==(const RegData &r1, const RegData &r2) {
575 return *((uint64_t *) &r1) == *((uint64_t *) &r2);
576 }
577
operator !=(const RegData & r1,const RegData & r2)578 static inline bool operator!=(const RegData &r1, const RegData &r2) {
579 return !(r1 == r2);
580 }
581
abs(const RegData & r)582 inline RegData abs(const RegData &r)
583 {
584 RegData result = r;
585 return result.setMods(1);
586 }
587
fixup(int execSize,DataType defaultType,bool isDest,int arity)588 inline void RegData::fixup(int execSize, DataType defaultType, bool isDest, int arity)
589 {
590 #ifdef NGEN_SAFE
591 if (isInvalid()) throw invalid_object_exception();
592 #endif
593
594 if (getType() == DataType::invalid) {
595 #ifdef NGEN_SAFE
596 if (defaultType == DataType::invalid)
597 throw missing_type_exception();
598 #endif
599 setType(defaultType);
600 }
601 if (!isVxIndirect()) {
602 if (execSize == 1) {
603 vs = hs = 0;
604 width = 1;
605 } else if (width == 0) {
606 int maxWidth = 32 / getBytes();
607 width = (hs == 0) ? 1 : std::min<int>({int(maxWidth / hs), execSize, 16});
608 vs = width * hs;
609 }
610 if (isDest && hs == 0)
611 hs = 1;
612 }
613 }
614
615 // Operands for Align16 instructions
616 class Align16Operand {
617 protected:
618 RegData rd;
619 unsigned chanSel : 8;
620 unsigned chanEn : 4;
621 bool rep : 1;
622
623 public:
Align16Operand(RegData rd_,int chanEn_)624 constexpr Align16Operand(RegData rd_, int chanEn_) : rd(rd_), chanSel(0b11100100), chanEn(chanEn_), rep(false) {}
Align16Operand(RegData rd_,int s0,int s1,int s2,int s3)625 constexpr Align16Operand(RegData rd_, int s0, int s1, int s2, int s3) : rd(rd_),
626 chanSel((s0 & 3) | ((s1 & 3) << 2) | ((s2 & 3) << 4) | ((s3 & 3) << 6)), chanEn(0xF), rep(false) {}
627
createBroadcast(RegData rd_)628 static constexpr14 Align16Operand createBroadcast(RegData rd_) {
629 Align16Operand op{rd_, 0xF};
630 op.rep = true;
631 return op;
632 }
633
createWithMME(RegData rd_,int mme)634 static constexpr14 Align16Operand createWithMME(RegData rd_, int mme) {
635 Align16Operand op{rd_, mme};
636 op.chanSel = mme;
637 return op;
638 }
639
getReg()640 RegData &getReg() { return rd; }
getReg() const641 constexpr const RegData &getReg() const { return rd; }
getChanSel() const642 constexpr uint8_t getChanSel() const { return chanSel; }
getChanEn() const643 constexpr uint8_t getChanEn() const { return chanEn; }
isRep() const644 constexpr bool isRep() const { return rep; }
645
isIndirect() const646 constexpr bool isIndirect() const { return rd.isIndirect(); }
getType() const647 constexpr DataType getType() const { return rd.getType(); }
getOffset() const648 constexpr int getOffset() const { return rd.getOffset(); }
getMods() const649 constexpr int getMods() const { return rd.getMods(); }
isARF() const650 constexpr bool isARF() const { return rd.isARF(); }
651
invalidate()652 void invalidate() { rd.invalidate(); }
operator =(const Invalid & i)653 Align16Operand &operator=(const Invalid &i) { this->invalidate(); return *this; }
isInvalid() const654 bool isInvalid() const { return rd.isInvalid(); }
isValid() const655 bool isValid() const { return !rd.isInvalid(); }
isScalar() const656 constexpr bool isScalar() const { return rd.isScalar(); }
657
fixup(int execSize,DataType defaultType,bool isDest,int arity)658 void fixup(int execSize, DataType defaultType, bool isDest, int arity) {
659 rd.fixup(execSize, defaultType, isDest, arity);
660 }
661
662 #ifdef NGEN_ASM
663 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
664 static const bool emptyOp = false;
665 #endif
666 };
667
668 // Register regions.
669 class RegisterRegion : public RegData
670 {
671 public:
RegisterRegion()672 constexpr RegisterRegion() : RegData() {}
RegisterRegion(RegData rdata_,int vs_,int width_,int hs_)673 constexpr14 RegisterRegion(RegData rdata_, int vs_, int width_, int hs_) {
674 *static_cast<RegData *>(this) = rdata_;
675 vs = vs_;
676 width = width_;
677 hs = hs_;
678 }
679
operator =(const Invalid & i)680 RegisterRegion &operator=(const Invalid &i) { this->invalidate(); return *this; }
681
operator +() const682 constexpr RegisterRegion operator+() const { return *this; }
operator -() const683 constexpr14 RegisterRegion operator-() const {
684 auto result = *this;
685 result.negate();
686 return result;
687 }
operator ~() const688 constexpr14 RegisterRegion operator~() const { return -*this; }
689 };
690
691 // Subregister; always associated with a specific data type.
692 class Subregister : public RegData
693 {
694 protected:
checkGRF() const695 void checkGRF() const {
696 #ifdef NGEN_SAFE
697 if (isARF()) throw grf_expected_exception();
698 #endif
699 }
700
701 public:
Subregister()702 constexpr Subregister() : RegData() {}
Subregister(RegData reg_,int offset_,DataType type_)703 constexpr14 Subregister(RegData reg_, int offset_, DataType type_) {
704 *static_cast<RegData *>(this) = reg_;
705 off = offset_;
706 type = static_cast<int>(type_);
707 hs = vs = 0;
708 width = 1;
709 }
Subregister(RegData reg_,DataType type_)710 constexpr14 Subregister(RegData reg_, DataType type_) {
711 *static_cast<RegData *>(this) = reg_;
712 off = 0;
713 type = static_cast<int>(type_);
714 }
715
716 inline RegisterRegion operator()(int vs, int width, int hs) const;
717 inline RegisterRegion operator()(int vs, int hs) const;
718 inline RegisterRegion operator()(int hs) const;
719
operator =(const Invalid & i)720 Subregister &operator=(const Invalid &i) { this->invalidate(); return *this; }
721
operator +() const722 constexpr Subregister operator+() const { return *this; }
operator -() const723 constexpr14 Subregister operator-() const {
724 auto result = *this;
725 result.negate();
726 return result;
727 }
operator ~() const728 constexpr14 Subregister operator~() const { return -*this; }
729
swizzle(int s0,int s1,int s2,int s3) const730 Align16Operand swizzle(int s0, int s1, int s2, int s3) const { checkGRF(); return Align16Operand(*this, s0, s1, s2, s3); }
broadcast() const731 Align16Operand broadcast() const { checkGRF(); return Align16Operand::createBroadcast(*this); }
enable(bool c0,bool c1,bool c2,bool c3) const732 Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { checkGRF(); return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
noSwizzle() const733 Align16Operand noSwizzle() const { return swizzle(0, 1, 2, 3); }
enableAll() const734 Align16Operand enableAll() const { return enable(true, true, true, true); }
735
736 inline Subregister reinterpret(int offset, DataType type_) const;
reinterpret(int offset=0) const737 template <typename T> Subregister reinterpret(int offset = 0) const { return reinterpret(offset, getDataType<T>()); }
738
offset(int off) const739 inline Subregister offset(int off) const { return reinterpret(off, getType()); }
740
uq(int offset=0) const741 Subregister uq(int offset = 0) const { return reinterpret(offset, DataType::uq); }
q(int offset=0) const742 Subregister q(int offset = 0) const { return reinterpret(offset, DataType::q); }
ud(int offset=0) const743 Subregister ud(int offset = 0) const { return reinterpret(offset, DataType::ud); }
d(int offset=0) const744 Subregister d(int offset = 0) const { return reinterpret(offset, DataType::d); }
uw(int offset=0) const745 Subregister uw(int offset = 0) const { return reinterpret(offset, DataType::uw); }
w(int offset=0) const746 Subregister w(int offset = 0) const { return reinterpret(offset, DataType::w); }
ub(int offset=0) const747 Subregister ub(int offset = 0) const { return reinterpret(offset, DataType::ub); }
b(int offset=0) const748 Subregister b(int offset = 0) const { return reinterpret(offset, DataType::b); }
df(int offset=0) const749 Subregister df(int offset = 0) const { return reinterpret(offset, DataType::df); }
f(int offset=0) const750 Subregister f(int offset = 0) const { return reinterpret(offset, DataType::f); }
hf(int offset=0) const751 Subregister hf(int offset = 0) const { return reinterpret(offset, DataType::hf); }
bf(int offset=0) const752 Subregister bf(int offset = 0) const { return reinterpret(offset, DataType::bf); }
tf32(int offset=0) const753 Subregister tf32(int offset = 0) const { return reinterpret(offset, DataType::tf32); }
bf8(int offset=0) const754 Subregister bf8(int offset = 0) const { return reinterpret(offset, DataType::bf8); }
755 };
756
757 // Single register.
758 class Register : public RegData
759 {
760 public:
Register()761 constexpr Register() : RegData() {}
Register(int reg_,bool arf_,DataType defaultType=DataType::invalid,int off_=0)762 constexpr Register(int reg_, bool arf_, DataType defaultType = DataType::invalid, int off_ = 0)
763 : RegData(reg_, arf_, off_, false, defaultType, 0, 0, 1) {}
764
operator +() const765 constexpr Register operator+() const { return *this; }
operator -() const766 constexpr14 Register operator-() const {
767 auto result = *this;
768 result.negate();
769 return result;
770 }
operator ~() const771 constexpr14 Register operator~() const { return -*this; }
772
sub(int offset,DataType type_) const773 constexpr14 Subregister sub(int offset, DataType type_) const { return Subregister(*this, offset, type_); }
sub(int offset) const774 template <typename T> constexpr14 Subregister sub(int offset) const { return sub(offset, getDataType<T>()); }
775
retype(DataType type_) const776 constexpr14 Register retype(DataType type_) const { auto clone = *this; clone.setType(type_); return clone; }
retype() const777 template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
778
uq(int offset) const779 constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
q(int offset) const780 constexpr14 Subregister q(int offset) const { return sub(offset, DataType::q); }
ud(int offset) const781 constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
d(int offset) const782 constexpr14 Subregister d(int offset) const { return sub(offset, DataType::d); }
uw(int offset) const783 constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
w(int offset) const784 constexpr14 Subregister w(int offset) const { return sub(offset, DataType::w); }
ub(int offset) const785 constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
b(int offset) const786 constexpr14 Subregister b(int offset) const { return sub(offset, DataType::b); }
df(int offset) const787 constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
f(int offset) const788 constexpr14 Subregister f(int offset) const { return sub(offset, DataType::f); }
hf(int offset) const789 constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
bf(int offset) const790 constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
tf32(int offset) const791 constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
bf8(int offset) const792 constexpr14 Subregister bf8(int offset) const { return sub(offset, DataType::bf8); }
793
uq() const794 constexpr14 Register uq() const { return retype(DataType::uq); }
q() const795 constexpr14 Register q() const { return retype(DataType::q); }
ud() const796 constexpr14 Register ud() const { return retype(DataType::ud); }
d() const797 constexpr14 Register d() const { return retype(DataType::d); }
uw() const798 constexpr14 Register uw() const { return retype(DataType::uw); }
w() const799 constexpr14 Register w() const { return retype(DataType::w); }
ub() const800 constexpr14 Register ub() const { return retype(DataType::ub); }
b() const801 constexpr14 Register b() const { return retype(DataType::b); }
df() const802 constexpr14 Register df() const { return retype(DataType::df); }
f() const803 constexpr14 Register f() const { return retype(DataType::f); }
hf() const804 constexpr14 Register hf() const { return retype(DataType::hf); }
bf() const805 constexpr14 Register bf() const { return retype(DataType::bf); }
tf32() const806 constexpr14 Register tf32() const { return retype(DataType::tf32); }
bf8() const807 constexpr14 Register bf8() const { return retype(DataType::bf8); }
808
operator [](int offset) const809 constexpr14 Subregister operator[](int offset) const { return sub(offset, getType()); }
810
operator =(const Invalid & i)811 Register &operator=(const Invalid &i) { this->invalidate(); return *this; }
812 };
813
814 class GRF : public Register
815 {
816 public:
GRF()817 GRF() : Register() {}
GRF(int reg_)818 explicit constexpr GRF(int reg_) : Register(reg_, false) {}
819
operator +() const820 constexpr GRF operator+() const { return *this; }
operator -() const821 constexpr14 GRF operator-() const {
822 auto result = *this;
823 result.negate();
824 return result;
825 }
operator ~() const826 constexpr14 GRF operator~() const { return -*this; }
827
retype(DataType type_) const828 constexpr14 GRF retype(DataType type_) const { auto clone = *this; clone.setType(type_); return clone; }
retype() const829 template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
830
uq(int offset) const831 constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
q(int offset) const832 constexpr14 Subregister q(int offset) const { return sub(offset, DataType::q); }
ud(int offset) const833 constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
d(int offset) const834 constexpr14 Subregister d(int offset) const { return sub(offset, DataType::d); }
uw(int offset) const835 constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
w(int offset) const836 constexpr14 Subregister w(int offset) const { return sub(offset, DataType::w); }
ub(int offset) const837 constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
b(int offset) const838 constexpr14 Subregister b(int offset) const { return sub(offset, DataType::b); }
df(int offset) const839 constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
f(int offset) const840 constexpr14 Subregister f(int offset) const { return sub(offset, DataType::f); }
hf(int offset) const841 constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
bf(int offset) const842 constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
bf8(int offset) const843 constexpr14 Subregister bf8(int offset) const { return sub(offset, DataType::bf8); }
tf32(int offset) const844 constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
845
uq() const846 constexpr14 GRF uq() const { return retype(DataType::uq); }
q() const847 constexpr14 GRF q() const { return retype(DataType::q); }
ud() const848 constexpr14 GRF ud() const { return retype(DataType::ud); }
d() const849 constexpr14 GRF d() const { return retype(DataType::d); }
uw() const850 constexpr14 GRF uw() const { return retype(DataType::uw); }
w() const851 constexpr14 GRF w() const { return retype(DataType::w); }
ub() const852 constexpr14 GRF ub() const { return retype(DataType::ub); }
b() const853 constexpr14 GRF b() const { return retype(DataType::b); }
df() const854 constexpr14 GRF df() const { return retype(DataType::df); }
f() const855 constexpr14 GRF f() const { return retype(DataType::f); }
hf() const856 constexpr14 GRF hf() const { return retype(DataType::hf); }
bf() const857 constexpr14 GRF bf() const { return retype(DataType::bf); }
bf8() const858 constexpr14 GRF bf8() const { return retype(DataType::bf8); }
tf32() const859 constexpr14 GRF tf32() const { return retype(DataType::tf32); }
860
swizzle(int s0,int s1,int s2,int s3) const861 Align16Operand swizzle(int s0, int s1, int s2, int s3) const { return Align16Operand(*this, s0, s1, s2, s3); }
enable(bool c0,bool c1,bool c2,bool c3) const862 Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
noSwizzle() const863 Align16Operand noSwizzle() const { return swizzle(0, 1, 2, 3); }
enableAll() const864 Align16Operand enableAll() const { return enable(true, true, true, true); }
865
operator =(const Invalid & i)866 GRF &operator=(const Invalid &i) { this->invalidate(); return *this; }
867
operator +=(const int & inc)868 GRF &operator+=(const int &inc) {
869 base += inc;
870 return *this;
871 }
872
operator ++(int i)873 GRF operator++(int i) {
874 GRF old = *this;
875 ++*this;
876 return old;
877 }
878
operator ++()879 GRF &operator++() {
880 *this += 1;
881 return *this;
882 }
883
advance(int inc)884 GRF advance(int inc) {
885 auto result = *this;
886 result += inc;
887 return result;
888 }
889
890 inline GRFDisp operator+(int offset) const;
891 inline GRFDisp operator-(int offset) const;
892
log2Bytes(HW hw)893 static constexpr int log2Bytes(HW hw) { return (hw == HW::XeHPC) ? 6 : 5; }
bytes(HW hw)894 static constexpr int bytes(HW hw) { return (1 << log2Bytes(hw)); }
bytesToGRFs(HW hw,unsigned x)895 static constexpr int bytesToGRFs(HW hw, unsigned x) { return (x + bytes(hw) - 1) >> log2Bytes(hw); }
896 };
897
898 class GRFDisp {
899 protected:
900 GRF base;
901 int32_t disp;
902
903 public:
GRFDisp(const GRF & base_,int32_t disp_)904 GRFDisp(const GRF &base_, int32_t disp_) : base(base_), disp(disp_) {}
GRFDisp(const RegData & rd)905 /* implicit */ GRFDisp(const RegData &rd) : base(reinterpret_cast<const GRF &>(rd)), disp(0) {}
906
getBase() const907 constexpr GRF getBase() const { return base; }
getDisp() const908 constexpr int32_t getDisp() const { return disp; }
909 };
910
operator +(int offset) const911 GRFDisp GRF::operator+(int offset) const { return GRFDisp(*this, offset); }
operator -(int offset) const912 GRFDisp GRF::operator-(int offset) const { return *this + (-offset); }
913
914 class ARF : public Register
915 {
916 public:
ARF()917 constexpr ARF() : Register() {}
ARF(ARFType type_,int reg_,DataType defaultType=DataType::invalid,int off_=0)918 constexpr ARF(ARFType type_, int reg_, DataType defaultType = DataType::invalid, int off_ = 0)
919 : Register((static_cast<int>(type_) << 4) | (reg_ & 0xF), true, defaultType, off_) {}
920
operator =(const Invalid & i)921 ARF &operator=(const Invalid &i) { this->invalidate(); return *this; }
922 };
923
924 class NullRegister : public ARF
925 {
926 public:
NullRegister()927 constexpr NullRegister() : ARF(ARFType::null, 0, DataType::ud) {}
928 };
929
930 class AddressRegister : public ARF
931 {
932 public:
AddressRegister()933 constexpr AddressRegister() : ARF() {}
AddressRegister(int reg_)934 explicit constexpr AddressRegister(int reg_) : ARF(ARFType::a, reg_, DataType::uw) {}
935
operator =(const Invalid & i)936 AddressRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
937 };
938
939 class AccumulatorRegister : public ARF
940 {
941 public:
AccumulatorRegister()942 constexpr AccumulatorRegister() : ARF() {}
AccumulatorRegister(int reg_)943 explicit constexpr AccumulatorRegister(int reg_) : ARF(ARFType::acc, reg_) {}
944
operator =(const Invalid & i)945 AccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
946
count(HW hw)947 static constexpr int count(HW hw) { return (hw >= HW::XeHP) ? 4 : 2; }
948 };
949
950 class SpecialAccumulatorRegister : public AccumulatorRegister
951 {
952 uint8_t mmeNum;
953
954 public:
SpecialAccumulatorRegister()955 constexpr SpecialAccumulatorRegister() : AccumulatorRegister(), mmeNum(0) {}
SpecialAccumulatorRegister(int reg_,int mmeNum_)956 constexpr SpecialAccumulatorRegister(int reg_, int mmeNum_) : AccumulatorRegister(reg_), mmeNum(mmeNum_) {}
957
createNoMME()958 static constexpr SpecialAccumulatorRegister createNoMME() { return SpecialAccumulatorRegister(0, 8); }
959
getMME() const960 constexpr uint8_t getMME() const { return mmeNum; }
961
operator =(const Invalid & i)962 SpecialAccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
963 };
964
965 // An "extended register" is a combination of a regular GRF and some extra accumulator bits, used for math macro operations.
966 class ExtendedReg {
967 RegData base;
968 uint8_t mmeNum;
969
970 public:
ExtendedReg(RegData base_,uint8_t mmeNum_)971 constexpr ExtendedReg(RegData base_, uint8_t mmeNum_) : base(base_), mmeNum(mmeNum_) {}
ExtendedReg(RegData base_,SpecialAccumulatorRegister acc)972 constexpr ExtendedReg(RegData base_, SpecialAccumulatorRegister acc) : base(base_), mmeNum(acc.getMME()) {}
973
fixup(int execSize,DataType defaultType,bool isDest,int arity)974 void fixup(int execSize, DataType defaultType, bool isDest, int arity) {
975 base.fixup(execSize, defaultType, isDest, arity);
976 }
977
getMods() const978 constexpr int getMods() const { return base.getMods(); }
getType() const979 constexpr DataType getType() const { return base.getType(); }
getOffset() const980 constexpr int getOffset() const { return base.getOffset(); }
isIndirect() const981 constexpr bool isIndirect() const { return base.isIndirect(); }
isInvalid() const982 constexpr bool isInvalid() const { return base.isInvalid(); }
isValid() const983 constexpr bool isValid() const { return !base.isInvalid(); }
isScalar() const984 constexpr bool isScalar() const { return base.isScalar(); }
isARF() const985 constexpr bool isARF() const { return base.isARF(); }
986
getBase()987 constexpr14 RegData &getBase() { return base; }
getBase() const988 constexpr RegData getBase() const { return base; }
getMMENum() const989 constexpr uint8_t getMMENum() const { return mmeNum; }
990
991 #ifdef NGEN_ASM
992 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
993 static const bool emptyOp = false;
994 #endif
995 };
996
operator |(const RegData & base,const SpecialAccumulatorRegister & acc)997 static inline ExtendedReg operator|(const RegData &base, const SpecialAccumulatorRegister &acc)
998 {
999 return ExtendedReg(base, acc);
1000 }
1001
1002 class FlagRegister : public ARF
1003 {
1004 public:
FlagRegister()1005 constexpr FlagRegister() : ARF() {}
FlagRegister(int reg_)1006 explicit constexpr FlagRegister(int reg_) : ARF(ARFType::f, reg_, DataType::ud, 0) {}
FlagRegister(int reg_,int off_)1007 constexpr FlagRegister(int reg_, int off_) : ARF(ARFType::f, reg_, DataType::uw, off_) {}
1008
createFromIndex(int index)1009 static FlagRegister createFromIndex(int index) {
1010 return FlagRegister(index >> 1, index & 1);
1011 }
1012
operator ~() const1013 FlagRegister operator~() const {
1014 FlagRegister result = *this;
1015 result.mods = result.mods ^ 2;
1016 return result;
1017 }
1018
operator =(const Invalid & i)1019 FlagRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1020
operator [](int offset) const1021 constexpr FlagRegister operator[](int offset) const { return FlagRegister(getARFBase(), getOffset() + offset); }
1022
index() const1023 int index() const { return (getARFBase() << 1) + getOffset(); }
1024
count(HW hw)1025 static inline constexpr int count(HW hw) {
1026 return (hw == HW::XeHPC) ? 4 : 2;
1027 }
subcount(HW hw)1028 static inline constexpr int subcount(HW hw) { return count(hw) * 2; }
1029 };
1030
1031 class ChannelEnableRegister : public ARF
1032 {
1033 public:
ChannelEnableRegister(int reg_=0)1034 explicit constexpr ChannelEnableRegister(int reg_ = 0) : ARF(ARFType::ce, reg_, DataType::ud) {}
1035 };
1036
1037 class StackPointerRegister : public ARF
1038 {
1039 public:
StackPointerRegister(int reg_=0)1040 explicit constexpr StackPointerRegister(int reg_ = 0) : ARF(ARFType::sp, reg_, DataType::uq) {}
1041 };
1042
1043 class StateRegister : public ARF
1044 {
1045 public:
StateRegister(int reg_=0)1046 explicit constexpr StateRegister(int reg_ = 0) : ARF(ARFType::sr, reg_, DataType::ud) {}
1047 };
1048
1049 class ControlRegister : public ARF
1050 {
1051 public:
ControlRegister(int reg_=0)1052 explicit constexpr ControlRegister(int reg_ = 0) : ARF(ARFType::cr, reg_, DataType::ud) {}
1053 };
1054
1055 class NotificationRegister : public ARF
1056 {
1057 public:
NotificationRegister(int reg_=0)1058 explicit constexpr NotificationRegister(int reg_ = 0) : ARF(ARFType::n, reg_, DataType::ud) {}
1059 };
1060
1061 class InstructionPointerRegister : public ARF
1062 {
1063 public:
InstructionPointerRegister()1064 constexpr InstructionPointerRegister() : ARF(ARFType::ip, 0, DataType::ud) {}
1065 };
1066
1067 class ThreadDependencyRegister : public ARF
1068 {
1069 public:
ThreadDependencyRegister(int reg_=0)1070 explicit constexpr ThreadDependencyRegister(int reg_ = 0) : ARF(ARFType::tdr, reg_, DataType::uw) {}
1071 };
1072
1073 class PerformanceRegister : public ARF
1074 {
1075 public:
PerformanceRegister(int reg_=0,int off_=0)1076 explicit constexpr PerformanceRegister(int reg_ = 0, int off_ = 0) : ARF(ARFType::tm, reg_, DataType::ud, off_) {}
1077 };
1078
1079 class DebugRegister : public ARF
1080 {
1081 public:
DebugRegister(int reg_=0)1082 explicit constexpr DebugRegister(int reg_ = 0) : ARF(ARFType::dbg, reg_, DataType::ud) {}
1083 };
1084
1085 class FlowControlRegister : public ARF
1086 {
1087 public:
FlowControlRegister(int reg_=0)1088 explicit constexpr FlowControlRegister(int reg_ = 0) : ARF(ARFType::fc, reg_, DataType::ud) {}
1089 };
1090
operator ()(int vs,int width,int hs) const1091 inline RegisterRegion Subregister::operator()(int vs, int width, int hs) const
1092 {
1093 RegisterRegion rr(*this, vs, width, hs);
1094 return rr;
1095 }
1096
operator ()(int vs_or_width,int hs) const1097 inline RegisterRegion Subregister::operator()(int vs_or_width, int hs) const
1098 {
1099 int vs, width;
1100
1101 if (isIndirect()) {
1102 vs = -1;
1103 width = vs_or_width;
1104 } else {
1105 vs = vs_or_width;
1106 width = (hs == 0) ? ((vs == 0) ? 1 : vs) : vs / hs;
1107 }
1108
1109 return operator()(vs, width, hs);
1110 }
1111
operator ()(int hs) const1112 inline RegisterRegion Subregister::operator()(int hs) const
1113 {
1114 return operator()(0, 0, hs);
1115 }
1116
reinterpret(int offset,DataType type_) const1117 inline Subregister Subregister::reinterpret(int offset, DataType type_) const
1118 {
1119 Subregister r = *this;
1120 r.setType(type_);
1121
1122 int o = getOffset();
1123 int oldbytes = getBytes(), newbytes = r.getBytes();
1124 int bitdiff = (oldbytes == 0) ? 0
1125 : (utils::log2(newbytes) - utils::log2(oldbytes));
1126
1127 if (newbytes < oldbytes)
1128 r.setOffset((o << -bitdiff) + offset);
1129 else
1130 r.setOffset((o >> bitdiff) + offset);
1131
1132 return r;
1133 }
1134
1135 // Indirect register and frames for making them.
1136 class IndirectRegister : public Register {
1137 protected:
IndirectRegister(const RegData & reg)1138 explicit constexpr14 IndirectRegister(const RegData ®) : Register((reg.getARFBase() << 4) | reg.getOffset(), false) {
1139 indirect = true;
1140 }
1141 friend class IndirectRegisterFrame;
1142
operator =(const Invalid & i)1143 IndirectRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1144 };
1145
1146 class IndirectRegisterFrame {
1147 public:
operator [](const RegData & reg) const1148 IndirectRegister operator[](const RegData ®) const {
1149 #ifdef NGEN_SAFE
1150 if (!reg.isARF() || reg.getARFType() != ARFType::a)
1151 throw invalid_arf_exception();
1152 #endif
1153 return IndirectRegister(reg);
1154 }
1155 };
1156
1157 // GRFRange represents a contiguous range of GRF registers.
1158 class GRFRange {
1159 protected:
1160 uint8_t base;
1161 uint8_t len;
1162
1163 static constexpr uint8_t invalidLen = 0xFF;
1164
1165 public:
GRFRange()1166 GRFRange() : GRFRange(0, invalidLen) {}
GRFRange(int base_,int len_)1167 GRFRange(int base_, int len_) : base(base_), len(len_) {}
GRFRange(GRF base_,int len_)1168 GRFRange(GRF base_, int len_) : GRFRange(base_.getBase(), len_) {}
1169
getBase() const1170 int getBase() const { return base; }
getLen() const1171 int getLen() const { return len; }
isEmpty() const1172 bool isEmpty() const { return len == 0; }
isNull() const1173 bool isNull() const { return false; }
1174
invalidate()1175 void invalidate() { len = invalidLen; }
isInvalid() const1176 bool isInvalid() const { return len == invalidLen; }
isValid() const1177 bool isValid() const { return !isInvalid(); }
1178
operator =(const Invalid & i)1179 GRFRange &operator=(const Invalid &i) { this->invalidate(); return *this; }
1180
operator [](int i) const1181 GRF operator[](int i) const {
1182 #ifdef NGEN_SAFE
1183 if (isInvalid()) throw invalid_object_exception();
1184 #endif
1185 return GRF(base + i);
1186 }
1187
operator GRF() const1188 operator GRF() const { return (*this)[0]; }
1189
fixup(int execSize,DataType defaultType,bool isDest,int arity)1190 void fixup(int execSize, DataType defaultType, bool isDest, int arity) {}
1191 };
1192
operator -(const GRF & reg1,const GRF & reg2)1193 static inline GRFRange operator-(const GRF ®1, const GRF ®2)
1194 {
1195 uint8_t b1 = reg1.getBase(), b2 = reg2.getBase();
1196 int len = int(b2) + 1 - int(b1);
1197
1198 #ifdef NGEN_SAFE
1199 if (len < 0) throw invalid_range_exception();
1200 #endif
1201
1202 return GRFRange(reg1, len);
1203 }
1204
operator ==(const GRFRange & r1,const GRFRange & r2)1205 static inline bool operator==(const GRFRange &r1, const GRFRange &r2)
1206 {
1207 return (r1.getBase() == r2.getBase()) && (r1.getLen() == r2.getLen());
1208 }
1209
operator !=(const GRFRange & r1,const GRFRange & r2)1210 static inline bool operator!=(const GRFRange &r1, const GRFRange &r2)
1211 {
1212 return !(r1 == r2);
1213 }
1214
1215 enum class ConditionModifier {
1216 none = 0,
1217 ze = 1,
1218 eq = 1,
1219 nz = 2,
1220 ne = 2,
1221 gt = 3,
1222 ge = 4,
1223 lt = 5,
1224 le = 6,
1225 ov = 8,
1226 un = 9,
1227 eo = 0xF
1228 };
1229
1230 #ifdef NGEN_ASM
operator <<(std::ostream & str,ConditionModifier cmod)1231 static inline std::ostream &operator<<(std::ostream &str, ConditionModifier cmod)
1232 {
1233 static const char *names[16] = {"", "eq", "ne", "gt", "ge", "lt", "le", "", "ov", "un", "", "", "", "", "", "eo"};
1234 str << names[static_cast<uint8_t>(cmod) & 0xF];
1235 return str;
1236 }
1237 #endif
1238
1239 enum class ChannelMask {
1240 rgba = 0,
1241 gba = 1,
1242 rba = 2,
1243 ba = 3,
1244 rga = 4,
1245 bga = 5,
1246 ga = 6,
1247 a = 7,
1248 rgb = 8,
1249 gb = 9,
1250 rb = 10,
1251 b = 11,
1252 rg = 12,
1253 g = 13,
1254 r = 14,
1255 };
1256
1257 enum class PredCtrl {
1258 None = 0,
1259 Normal = 1,
1260 anyv = 2,
1261 allv = 3,
1262 any2h = 4,
1263 all2h = 5,
1264 any4h = 6,
1265 all4h = 7,
1266 any8h = 8,
1267 all8h = 9,
1268 any16h = 10,
1269 all16h = 11,
1270 any32h = 12,
1271 all32h = 13,
1272 any = 14,
1273 all = 15,
1274 x = 2,
1275 y = 3,
1276 z = 4,
1277 w = 5,
1278 };
1279
1280 #ifdef NGEN_ASM
toText(PredCtrl ctrl,bool align16)1281 static const char *toText(PredCtrl ctrl, bool align16) {
1282 const char *names[2][16] = {{"", "", "anyv", "allv", "any2h", "all2h", "any4h", "all4h", "any8h", "all8h", "any16h", "all16h", "any32h", "all32h", "any", "all"},
1283 {"", "", "x", "y", "z", "w", "", "", "", "", "", "", "", "", "", ""}};
1284 return names[align16][static_cast<int>(ctrl) & 0xF];
1285 }
1286 #endif
1287
1288 enum class ThreadCtrl {
1289 Normal = 0,
1290 Atomic = 1,
1291 Switch = 2,
1292 NoPreempt = 3
1293 };
1294
1295 enum class Opcode {
1296 illegal = 0x00,
1297 sync = 0x01,
1298 mov = 0x01,
1299 sel = 0x02,
1300 movi = 0x03,
1301 not_ = 0x04,
1302 and_ = 0x05,
1303 or_ = 0x06,
1304 xor_ = 0x07,
1305 shr = 0x08,
1306 shl = 0x09,
1307 smov = 0x0A,
1308 asr = 0x0C,
1309 ror = 0x0E,
1310 rol = 0x0F,
1311 cmp = 0x10,
1312 cmpn = 0x11,
1313 csel = 0x12,
1314 bfrev = 0x17,
1315 bfe = 0x18,
1316 bfi1 = 0x19,
1317 bfi2 = 0x1A,
1318 jmpi = 0x20,
1319 brd = 0x21,
1320 if_ = 0x22,
1321 brc = 0x23,
1322 else_ = 0x24,
1323 endif = 0x25,
1324 while_ = 0x27,
1325 break_ = 0x28,
1326 cont = 0x29,
1327 halt = 0x2A,
1328 calla = 0x2B,
1329 call = 0x2C,
1330 ret = 0x2D,
1331 goto_ = 0x2E,
1332 join = 0x2F,
1333 wait = 0x30,
1334 send = 0x31,
1335 sendc = 0x32,
1336 sends = 0x33,
1337 sendsc = 0x34,
1338 math = 0x38,
1339 add = 0x40,
1340 mul = 0x41,
1341 avg = 0x42,
1342 frc = 0x43,
1343 rndu = 0x44,
1344 rndd = 0x45,
1345 rnde = 0x46,
1346 rndz = 0x47,
1347 mac = 0x48,
1348 mach = 0x49,
1349 lzd = 0x4A,
1350 fbh = 0x4B,
1351 fbl = 0x4C,
1352 cbit = 0x4D,
1353 addc = 0x4E,
1354 subb = 0x4F,
1355 sad2 = 0x50,
1356 sada2 = 0x51,
1357 add3 = 0x52,
1358 macl = 0x53,
1359 srnd = 0x54,
1360 dp4 = 0x54,
1361 dph = 0x55,
1362 dp3 = 0x56,
1363 dp2 = 0x57,
1364 dp4a = 0x58,
1365 line = 0x59,
1366 dpas = 0x59,
1367 pln = 0x5A,
1368 dpasw = 0x5A,
1369 mad = 0x5B,
1370 lrp = 0x5C,
1371 madm = 0x5D,
1372 nop_gen12 = 0x60,
1373 mov_gen12 = 0x61,
1374 sel_gen12 = 0x62,
1375 movi_gen12 = 0x63,
1376 not_gen12 = 0x64,
1377 and_gen12 = 0x65,
1378 or_gen12 = 0x66,
1379 xor_gen12 = 0x67,
1380 shr_gen12 = 0x68,
1381 shl_gen12 = 0x69,
1382 smov_gen12 = 0x6A,
1383 bfn = 0x6B,
1384 asr_gen12 = 0x6C,
1385 ror_gen12 = 0x6E,
1386 rol_gen12 = 0x6F,
1387 cmp_gen12 = 0x70,
1388 cmpn_gen12 = 0x71,
1389 csel_gen12 = 0x72,
1390 bfrev_gen12 = 0x77,
1391 bfe_gen12 = 0x78,
1392 bfi1_gen12 = 0x79,
1393 bfi2_gen12 = 0x7A,
1394 nop = 0x7E,
1395 wrdep = 0x7F, /* not a valid opcode; used internally by nGEN */
1396 };
1397
isVariableLatency(HW hw,Opcode op)1398 static inline bool isVariableLatency(HW hw, Opcode op)
1399 {
1400 switch (op) {
1401 case Opcode::math:
1402 if (hw >= HW::XeHPC) return false;
1403 case Opcode::send:
1404 case Opcode::sendc:
1405 case Opcode::dpas:
1406 case Opcode::dpasw:
1407 return true;
1408 default:
1409 return false;
1410 }
1411 }
1412
isBranch(Opcode op)1413 static inline bool isBranch(Opcode op)
1414 {
1415 return (static_cast<int>(op) >> 4) == 2;
1416 }
1417
1418 #ifdef NGEN_ASM
getMnemonic(Opcode op,HW hw)1419 static const char *getMnemonic(Opcode op, HW hw)
1420 {
1421 const char *names[0x80] = {
1422 "illegal", "sync", "sel", "movi", "not", "and", "or", "xor",
1423 "shr", "shl", "smov", "", "asr", "", "ror", "rol",
1424 "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1425 "bfe", "bfi1", "bfi2", "", "", "", "", "",
1426 "jmpi", "brd", "if", "brc", "else", "endif", "", "while",
1427 "break", "cont", "halt", "calla", "call", "ret", "goto", "join",
1428 "wait", "send", "sendc", "sends", "sendsc", "", "", "",
1429 "math", "", "", "", "", "", "", "",
1430 "add", "mul", "avg", "frc", "rndu", "rndd", "rnde", "rndz",
1431 "mac", "mach", "lzd", "fbh", "fbl", "cbit", "addc", "subb",
1432 "sad2", "sada2", "add3", "macl", "srnd", "dph", "dp3", "dp2",
1433 "dp4a", "dpas", "dpasw", "mad", "lrp", "madm", "", "",
1434 "nop", "mov", "sel", "movi", "not", "and", "or", "xor",
1435 "shr", "shl", "smov", "bfn", "asr", "", "ror", "rol",
1436 "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1437 "bfe", "bfi1", "bfi2", "", "", "", "nop", ""
1438 };
1439
1440 const char *mnemonic = names[static_cast<int>(op) & 0x7F];
1441
1442 if (hw < HW::Gen12LP) switch (op) {
1443 case Opcode::mov: mnemonic = "mov"; break;
1444 case Opcode::line: mnemonic = "line"; break;
1445 case Opcode::pln: mnemonic = "pln"; break;
1446 case Opcode::dp4: mnemonic = "dp4"; break;
1447 default: break;
1448 }
1449
1450 return mnemonic;
1451 }
1452 #endif
1453
1454 class AllPipes {};
1455 enum class Pipe : uint8_t {
1456 Default = 0,
1457 A = 1, All = A,
1458 F = 2, Float = F,
1459 I = 3, Integer = I,
1460 L = 4, Long = L,
1461 M = 5, Math = M,
1462 };
1463
1464 #ifdef NGEN_ASM
operator <<(std::ostream & str,Pipe pipe)1465 static inline std::ostream &operator<<(std::ostream &str, Pipe pipe)
1466 {
1467 static const char *names[8] = {"", "A", "F", "I", "L", "M", "", ""};
1468 str << names[static_cast<uint8_t>(pipe) & 7];
1469 return str;
1470 }
1471 #endif
1472
1473 class SWSBInfo
1474 {
1475 friend class InstructionModifier;
1476
1477 public:
1478 union {
1479 struct {
1480 unsigned token : 6;
1481 unsigned src : 1;
1482 unsigned dst : 1;
1483 unsigned dist : 4;
1484 unsigned pipe : 4;
1485 } parts;
1486 uint16_t all;
1487 };
1488
hasDist() const1489 constexpr bool hasDist() const { return parts.dist > 0; }
hasToken() const1490 constexpr bool hasToken() const { return parts.src || parts.dst; }
hasTokenSet() const1491 constexpr bool hasTokenSet() const { return parts.src && parts.dst; }
getToken() const1492 constexpr int getToken() const { return hasToken() ? parts.token : 0; }
tokenMode() const1493 constexpr unsigned tokenMode() const { return (parts.src << 1) | parts.dst; }
getPipe() const1494 constexpr Pipe getPipe() const { return static_cast<Pipe>(parts.pipe); }
setPipe(Pipe pipe)1495 void setPipe(Pipe pipe) { parts.pipe = static_cast<unsigned>(pipe); }
empty() const1496 constexpr bool empty() const { return (all == 0); }
1497
1498 protected:
SWSBInfo(uint16_t all_)1499 explicit constexpr SWSBInfo(uint16_t all_) : all(all_) {}
1500
1501 public:
SWSBInfo()1502 constexpr SWSBInfo() : all(0) {}
SWSBInfo(Pipe pipe_,int dist_)1503 constexpr SWSBInfo(Pipe pipe_, int dist_) : all(((dist_ & 0xF) << 8) | (static_cast<unsigned>(pipe_) << 12)) {}
SWSBInfo(int id_,bool src_,bool dst_)1504 constexpr SWSBInfo(int id_, bool src_, bool dst_) : all(id_ | (uint16_t(src_) << 6) | (uint16_t(dst_) << 7)) {}
1505
operator |(const SWSBInfo & i1,const SWSBInfo & i2)1506 friend constexpr SWSBInfo operator|(const SWSBInfo &i1, const SWSBInfo &i2) { return SWSBInfo(i1.all | i2.all); }
1507 };
1508
1509 // Token count.
tokenCount(HW hw)1510 constexpr inline int tokenCount(HW hw)
1511 {
1512 return (hw >= HW::XeHPC) ? 32 : 16;
1513 }
1514
1515 class SBID
1516 {
1517 public:
1518 SWSBInfo set;
1519 SWSBInfo src;
1520 SWSBInfo dst;
1521
SBID(int id)1522 constexpr SBID(int id) : set(id, true, true), src(id, true, false), dst(id, false, true) {}
operator SWSBInfo() const1523 constexpr operator SWSBInfo() const { return set; }
1524
getID() const1525 constexpr int getID() const { return set.getToken(); }
1526 };
1527
getPipe()1528 template <typename T> static constexpr Pipe getPipe() { return (sizeof(T) == 8) ? Pipe::L : Pipe::I; }
getPipe()1529 template <> constexpr Pipe getPipe<float>() { return Pipe::F; }
getPipe()1530 template <> constexpr Pipe getPipe<void>() { return Pipe::Default; }
getPipe()1531 template <> constexpr Pipe getPipe<AllPipes>() { return Pipe::A; }
1532
SWSB(SWSBInfo info)1533 constexpr SWSBInfo SWSB(SWSBInfo info) { return info; }
SWSB(Pipe pipe,int dist)1534 constexpr SWSBInfo SWSB(Pipe pipe, int dist) { return SWSBInfo(pipe, dist); }
SWSB(int dist)1535 template <typename T = void> constexpr SWSBInfo SWSB(int dist) { return SWSB(getPipe<T>(), dist); }
SWSB(SWSBInfo info,int dist)1536 template <typename T = void> constexpr SWSBInfo SWSB(SWSBInfo info, int dist) { return SWSB<T>(dist) | info; }
1537
1538 class InstructionModifier {
1539 protected:
1540 union {
1541 struct {
1542 unsigned execSize : 8; // Execution size as integer (for internal use).
1543 unsigned accessMode : 1; // From here on matches the low 64-bits of the binary format for Gen8-11
1544 unsigned noDDClr : 1;
1545 unsigned noDDChk : 1;
1546 unsigned chanOff : 3;
1547 unsigned threadCtrl : 2;
1548 unsigned predCtrl : 4;
1549 unsigned predInv : 1;
1550 unsigned eSizeField : 3;
1551 unsigned cmod : 4; // Also stores channel mask temporarily for surface r/w
1552 unsigned accWrCtrl : 1; // = noSrcDepSet for send, = branchCtrl for branch instructions
1553 unsigned cmptCtrl : 1;
1554 unsigned debugCtrl : 1;
1555 unsigned saturate : 1;
1556 unsigned flagSubRegNum : 1;
1557 unsigned flagRegNum : 1;
1558 unsigned maskCtrl : 1;
1559 unsigned _zeros_: 9;
1560 unsigned flagRegNum1 : 1;
1561 unsigned autoSWSB : 1;
1562 unsigned fusionCtrl : 1; // Gen12
1563 unsigned eot : 1;
1564 unsigned swsb : 16;
1565 } parts;
1566 uint64_t all;
1567 };
1568
InstructionModifier(uint64_t all_)1569 constexpr InstructionModifier(uint64_t all_) : all(all_) {}
1570
1571 friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
1572 friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
1573
1574 public:
getExecSize() const1575 constexpr int getExecSize() const { return parts.execSize; }
isAlign16() const1576 constexpr bool isAlign16() const { return parts.accessMode; }
isNoDDClr() const1577 constexpr bool isNoDDClr() const { return parts.noDDClr; }
isNoDDChk() const1578 constexpr bool isNoDDChk() const { return parts.noDDChk; }
getChannelOffset() const1579 constexpr int getChannelOffset() const { return parts.chanOff << 2; }
getThreadCtrl() const1580 constexpr ThreadCtrl getThreadCtrl() const { return static_cast<ThreadCtrl>(parts.threadCtrl); }
isAtomic() const1581 constexpr bool isAtomic() const { return getThreadCtrl() == ThreadCtrl::Atomic; }
getPredCtrl() const1582 constexpr PredCtrl getPredCtrl() const { return static_cast<PredCtrl>(parts.predCtrl); }
isPredInv() const1583 constexpr bool isPredInv() const { return parts.predInv; }
getCMod() const1584 constexpr ConditionModifier getCMod() const { return static_cast<ConditionModifier>(parts.cmod); }
isAccWrEn() const1585 constexpr bool isAccWrEn() const { return parts.accWrCtrl; }
getBranchCtrl() const1586 constexpr bool getBranchCtrl() const { return parts.accWrCtrl; }
isCompact() const1587 constexpr bool isCompact() const { return parts.cmptCtrl; }
isBreakpoint() const1588 constexpr bool isBreakpoint() const { return parts.debugCtrl; }
isSaturate() const1589 constexpr bool isSaturate() const { return parts.saturate; }
getFlagReg() const1590 constexpr14 FlagRegister getFlagReg() const { return FlagRegister((parts.flagRegNum1 << 1) | parts.flagRegNum, parts.flagSubRegNum); }
isWrEn() const1591 constexpr bool isWrEn() const { return parts.maskCtrl; }
isAutoSWSB() const1592 constexpr bool isAutoSWSB() const { return parts.autoSWSB; }
isSerialized() const1593 constexpr bool isSerialized() const { return parts.fusionCtrl; }
isEOT() const1594 constexpr bool isEOT() const { return parts.eot; }
getSWSB() const1595 constexpr SWSBInfo getSWSB() const { return SWSBInfo(parts.swsb); }
getAll() const1596 constexpr uint64_t getAll() const { return all; }
1597
setExecSize(int execSize_)1598 constexpr14 void setExecSize(int execSize_) { parts.execSize = execSize_; parts.eSizeField = utils::log2(execSize_); }
setPredCtrl(PredCtrl predCtrl_)1599 constexpr14 void setPredCtrl(PredCtrl predCtrl_) { parts.predCtrl = static_cast<unsigned>(predCtrl_); }
setPredInv(bool predInv_)1600 constexpr14 void setPredInv(bool predInv_) { parts.predInv = predInv_; }
setCMod(const ConditionModifier & cmod_)1601 constexpr14 void setCMod(const ConditionModifier &cmod_) { parts.cmod = static_cast<unsigned>(cmod_); }
setBranchCtrl(bool branchCtrl)1602 constexpr14 void setBranchCtrl(bool branchCtrl) { parts.accWrCtrl = branchCtrl; }
setFlagReg(FlagRegister & flag)1603 constexpr14 void setFlagReg(FlagRegister &flag) { parts.flagRegNum1 = flag.getBase() >> 1; parts.flagRegNum = flag.getBase() & 1; parts.flagSubRegNum = flag.getOffset(); }
setWrEn(bool maskCtrl_)1604 constexpr14 void setWrEn(bool maskCtrl_) { parts.maskCtrl = maskCtrl_; }
setAutoSWSB(bool autoSWSB_)1605 constexpr14 void setAutoSWSB(bool autoSWSB_) { parts.autoSWSB = autoSWSB_; }
setSWSB(SWSBInfo swsb_)1606 constexpr14 void setSWSB(SWSBInfo swsb_) { parts.swsb = swsb_.all; }
setSWSB(uint16_t swsb_)1607 constexpr14 void setSWSB(uint16_t swsb_) { parts.swsb = swsb_; }
1608
InstructionModifier()1609 constexpr InstructionModifier() : all(0) {}
1610
1611 // Hardcoded shift counts are a workaround for MSVC v140 bug.
InstructionModifier(const PredCtrl & predCtrl_)1612 constexpr /* implicit */ InstructionModifier(const PredCtrl &predCtrl_)
1613 : all{static_cast<uint64_t>(predCtrl_) << 16} {}
1614
InstructionModifier(const ThreadCtrl & threadCtrl_)1615 constexpr /* implicit */ InstructionModifier(const ThreadCtrl &threadCtrl_)
1616 : all{static_cast<uint64_t>(threadCtrl_) << 14} {}
1617
InstructionModifier(const ConditionModifier & cmod_)1618 constexpr /* implicit */ InstructionModifier(const ConditionModifier &cmod_)
1619 : all{static_cast<uint64_t>(cmod_) << 24} {}
1620
InstructionModifier(const int & execSize_)1621 constexpr14 /* implicit */ InstructionModifier(const int &execSize_) : InstructionModifier() {
1622 setExecSize(execSize_);
1623 }
InstructionModifier(const SWSBInfo & swsb)1624 constexpr14 /* implicit */ InstructionModifier(const SWSBInfo &swsb) : InstructionModifier() {
1625 parts.swsb = swsb.all;
1626 }
InstructionModifier(const SBID & sb)1627 constexpr14 /* implicit */ InstructionModifier(const SBID &sb) : InstructionModifier(SWSB(sb)) {}
1628
1629 protected:
InstructionModifier(bool accessMode_,bool noDDClr_,bool noDDChk_,unsigned chanOff_,bool accWrCtrl_,bool debugCtrl_,bool saturate_,bool maskCtrl_,bool autoSWSB_,bool fusionCtrl_,bool eot_)1630 constexpr InstructionModifier(bool accessMode_, bool noDDClr_, bool noDDChk_, unsigned chanOff_, bool accWrCtrl_,
1631 bool debugCtrl_, bool saturate_, bool maskCtrl_, bool autoSWSB_, bool fusionCtrl_, bool eot_)
1632 : all{(uint64_t(accessMode_) << 8) | (uint64_t(noDDClr_) << 9) | (uint64_t(noDDChk_) << 10) | (uint64_t(chanOff_ >> 2) << 11)
1633 | (uint64_t(accWrCtrl_) << 28) | (uint64_t(debugCtrl_) << 30) | (uint64_t(saturate_) << 31)
1634 | (uint64_t(maskCtrl_) << 34) | (uint64_t(autoSWSB_) << 45) | (uint64_t(fusionCtrl_) << 46) | (uint64_t(eot_) << 47)} {}
1635
1636 public:
createAccessMode(int accessMode_)1637 static constexpr InstructionModifier createAccessMode(int accessMode_) {
1638 return InstructionModifier(accessMode_, false, false, 0, false, false, false, false, false, false, false);
1639 }
createNoDDClr()1640 static constexpr InstructionModifier createNoDDClr() {
1641 return InstructionModifier(false, true, false, 0, false, false, false, false, false, false, false);
1642 }
createNoDDChk()1643 static constexpr InstructionModifier createNoDDChk() {
1644 return InstructionModifier(false, false, true, 0, false, false, false, false, false, false, false);
1645 }
createChanOff(int offset)1646 static constexpr InstructionModifier createChanOff(int offset) {
1647 return InstructionModifier(false, false, false, offset, false, false, false, false, false, false, false);
1648 }
createAccWrCtrl()1649 static constexpr InstructionModifier createAccWrCtrl() {
1650 return InstructionModifier(false, false, false, 0, true, false, false, false, false, false, false);
1651 }
createDebugCtrl()1652 static constexpr InstructionModifier createDebugCtrl() {
1653 return InstructionModifier(false, false, false, 0, false, true, false, false, false, false, false);
1654 }
createSaturate()1655 static constexpr InstructionModifier createSaturate() {
1656 return InstructionModifier(false, false, false, 0, false, false, true, false, false, false, false);
1657 }
createMaskCtrl(bool maskCtrl_)1658 static constexpr InstructionModifier createMaskCtrl(bool maskCtrl_) {
1659 return InstructionModifier(false, false, false, 0, false, false, false, maskCtrl_, false, false, false);
1660 }
createAutoSWSB()1661 static constexpr InstructionModifier createAutoSWSB() {
1662 return InstructionModifier(false, false, false, 0, false, false, false, false, true, false, false);
1663 }
createSerialized()1664 static constexpr InstructionModifier createSerialized() {
1665 return InstructionModifier(false, false, false, 0, false, false, false, false, false, true, false);
1666 }
createEOT()1667 static constexpr InstructionModifier createEOT() {
1668 return InstructionModifier(false, false, false, 0, false, false, false, false, false, false, true);
1669 }
1670
1671 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2);
1672 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &mod2);
1673 friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2);
1674
1675 friend constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2);
1676
operator ~()1677 constexpr14 InstructionModifier operator~() {
1678 InstructionModifier mod = *this;
1679 mod.parts.predInv = ~mod.parts.predInv;
1680 return mod;
1681 }
1682
1683 template <typename T>
operator |=(const T & mod)1684 InstructionModifier &operator|=(const T &mod) {
1685 *this = *this | mod;
1686 return *this;
1687 }
1688
operator ^=(const InstructionModifier & mod)1689 InstructionModifier &operator^=(const InstructionModifier &mod) {
1690 *this = *this ^ mod;
1691 return *this;
1692 }
1693 };
1694
operator |(const InstructionModifier & mod1,const InstructionModifier & mod2)1695 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2)
1696 {
1697 return InstructionModifier(mod1.all | mod2.all);
1698 }
1699
1700
operator |(const InstructionModifier & mod1,const FlagRegister & flag)1701 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &flag)
1702 {
1703 InstructionModifier mod = mod1;
1704
1705 mod.parts.flagRegNum1 = flag.getBase() >> 1;
1706 mod.parts.flagRegNum = flag.getBase() & 1;
1707 mod.parts.flagSubRegNum = flag.getOffset();
1708
1709 if (mod.getCMod() == ConditionModifier::none) {
1710 mod.parts.predInv = flag.getNeg();
1711 mod.parts.predCtrl = static_cast<int>(PredCtrl::Normal);
1712 }
1713
1714 return mod;
1715 }
1716
operator |(const InstructionModifier & mod1,const PredCtrl & mod2)1717 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2)
1718 {
1719 InstructionModifier mod = mod1;
1720 mod.parts.predCtrl = static_cast<int>(mod2);
1721 return mod;
1722 }
1723
operator ^(const InstructionModifier & mod1,const InstructionModifier & mod2)1724 inline constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2)
1725 {
1726 return InstructionModifier(mod1.all ^ mod2.all);
1727 }
1728
1729 class Immediate {
1730 protected:
1731 uint64_t payload;
1732 DataType type;
1733 bool hiddenType = false;
1734
Immediate(uint64_t payload_,DataType type_)1735 Immediate(uint64_t payload_, DataType type_) : payload(payload_), type(type_) {}
1736
setPayload(T imm)1737 template <typename T> typename std::enable_if<sizeof(T) == 2>::type setPayload(T imm) {
1738 uint32_t ximm = utils::bitcast<T, uint16_t>(imm);
1739 payload = ximm | (ximm << 16);
1740 }
setPayload(T imm)1741 template <typename T> typename std::enable_if<sizeof(T) == 4>::type setPayload(T imm) {
1742 payload = utils::bitcast<T, uint32_t>(imm);
1743 }
setPayload(T imm)1744 template <typename T> typename std::enable_if<sizeof(T) == 8>::type setPayload(T imm) {
1745 payload = utils::bitcast<T, uint64_t>(imm);
1746 }
1747
set(T imm)1748 template <typename T> void set(T imm) {
1749 setPayload<T>(imm);
1750 type = getDataType<T>();
1751 }
1752
shrinkSigned(T imm)1753 template <typename T> void shrinkSigned(T imm) {
1754 if (imm == T(int16_t(imm))) set<int16_t>(imm);
1755 else if (imm == T(uint16_t(imm))) set<uint16_t>(imm);
1756 else if (imm == T(int32_t(imm))) set<int32_t>(imm);
1757 else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1758 else set(imm);
1759 }
1760
shrinkUnsigned(T imm)1761 template <typename T> void shrinkUnsigned(T imm) {
1762 if (imm == T(uint16_t(imm))) set<uint16_t>(imm);
1763 else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1764 else set(imm);
1765 }
1766
1767 public:
Immediate()1768 Immediate() : payload(0), type(DataType::invalid) {}
1769
1770 #ifdef NGEN_ASM
1771 static const bool emptyOp = false;
1772 #endif
1773
getType() const1774 constexpr14 DataType getType() const { return type; }
operator uint64_t() const1775 explicit constexpr14 operator uint64_t() const { return payload; }
getMods() const1776 constexpr14 int getMods() const { return 0; }
isARF() const1777 constexpr14 bool isARF() const { return false; }
1778
setType(DataType type_)1779 Immediate &setType(DataType type_) { type = type_; return *this; }
1780
Immediate(uint16_t imm)1781 Immediate(uint16_t imm) { set(imm); }
Immediate(int16_t imm)1782 Immediate(int16_t imm) { set(imm); }
Immediate(uint32_t imm)1783 Immediate(uint32_t imm) { shrinkUnsigned(imm); }
Immediate(int32_t imm)1784 Immediate(int32_t imm) { shrinkSigned(imm); }
Immediate(uint64_t imm)1785 Immediate(uint64_t imm) { shrinkUnsigned(imm); }
Immediate(int64_t imm)1786 Immediate(int64_t imm) { shrinkSigned(imm); }
1787
Immediate(float imm)1788 Immediate(float imm) { set(imm); }
Immediate(double imm)1789 Immediate(double imm) { set(imm); }
1790 #ifdef NGEN_HALF_TYPE
Immediate(half imm)1791 Immediate(half imm) { set(imm); }
1792 #endif
1793 #ifdef NGEN_BFLOAT16_TYPE
Immediate(bfloat16 imm)1794 Immediate(bfloat16 imm) { set(imm); }
1795 #endif
1796
hideType() const1797 Immediate hideType() const {
1798 Immediate result = *this;
1799 result.hiddenType = true;
1800 return result;
1801 }
1802
uw(uint16_t imm)1803 static inline Immediate uw(uint16_t imm) { return Immediate(imm); }
w(int16_t imm)1804 static inline Immediate w(int16_t imm) { return Immediate(imm); }
ud(uint32_t imm)1805 static inline Immediate ud(uint32_t imm) { Immediate i; i.set(imm); return i; }
d(int32_t imm)1806 static inline Immediate d(int32_t imm) { Immediate i; i.set(imm); return i; }
uq(uint64_t imm)1807 static inline Immediate uq(uint64_t imm) { Immediate i; i.set(imm); return i; }
q(int64_t imm)1808 static inline Immediate q(int64_t imm) { Immediate i; i.set(imm); return i; }
f(float imm)1809 static inline Immediate f(float imm) { return Immediate(imm); }
df(double imm)1810 static inline Immediate df(double imm) { return Immediate(imm); }
1811
hf(uint16_t f)1812 static inline Immediate hf(uint16_t f) {
1813 uint32_t fimm = f;
1814 fimm |= (fimm << 16);
1815 return Immediate(fimm, DataType::hf);
1816 }
1817
bf(uint16_t f)1818 static inline Immediate bf(uint16_t f) {
1819 uint32_t fimm = f;
1820 fimm |= (fimm << 16);
1821 return Immediate(fimm, DataType::bf);
1822 }
1823
1824 protected:
toUV(int8_t i)1825 static inline uint32_t toUV(int8_t i) {
1826 #ifdef NGEN_SAFE
1827 if (i & 0xF0) throw invalid_immediate_exception();
1828 #endif
1829 return i;
1830 }
1831
1832 public:
uv(uint32_t i)1833 static inline Immediate uv(uint32_t i) {
1834 return Immediate(i, DataType::uv);
1835 }
1836
uv(uint8_t i0,uint8_t i1,uint8_t i2,uint8_t i3,uint8_t i4,uint8_t i5,uint8_t i6,uint8_t i7)1837 static inline Immediate uv(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7) {
1838 uint32_t payload = (toUV(i0) << 0)
1839 | (toUV(i1) << 4)
1840 | (toUV(i2) << 8)
1841 | (toUV(i3) << 12)
1842 | (toUV(i4) << 16)
1843 | (toUV(i5) << 20)
1844 | (toUV(i6) << 24)
1845 | (toUV(i7) << 28);
1846 return uv(payload);
1847 }
1848
1849 protected:
toV(int8_t i)1850 static inline uint32_t toV(int8_t i) {
1851 #ifdef NGEN_SAFE
1852 if (i < -8 || i > 7) throw invalid_immediate_exception();
1853 #endif
1854 return (i & 0x7) | ((i >> 4) & 0x8);
1855 }
1856
1857 public:
v(uint32_t i)1858 static inline Immediate v(uint32_t i) {
1859 return Immediate(i, DataType::v);
1860 }
1861
v(int8_t i0,int8_t i1,int8_t i2,int8_t i3,int8_t i4,int8_t i5,int8_t i6,int8_t i7)1862 static inline Immediate v(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7) {
1863 uint32_t payload = (toV(i0) << 0)
1864 | (toV(i1) << 4)
1865 | (toV(i2) << 8)
1866 | (toV(i3) << 12)
1867 | (toV(i4) << 16)
1868 | (toV(i5) << 20)
1869 | (toV(i6) << 24)
1870 | (toV(i7) << 28);
1871 return v(payload);
1872 }
1873
toVF(float f)1874 static inline uint32_t toVF(float f) {
1875 uint32_t fi = utils::bitcast<float, uint32_t>(f);
1876 int exp = (fi >> 23) & 0xFF;
1877 int new_exp = exp - 127 + 3;
1878
1879 if (f == 0.) new_exp = 0;
1880
1881 #ifdef NGEN_SAFE
1882 if ((new_exp & ~7) || (fi & 0x0007FFFF))
1883 throw invalid_immediate_exception();
1884 #endif
1885
1886 return ((fi >> 24) & 0x80)
1887 | ((new_exp & 0x7) << 4)
1888 | ((fi >> 19) & 0xF);
1889 }
1890
vf(float f0,float f1,float f2,float f3)1891 static inline Immediate vf(float f0, float f1, float f2, float f3) {
1892 uint32_t payload = (toVF(f0) << 0)
1893 | (toVF(f1) << 8)
1894 | (toVF(f2) << 16)
1895 | (toVF(f3) << 24);
1896
1897 return Immediate(payload, DataType::vf);
1898 }
1899
fixup(int execSize,DataType defaultType,bool isDest,int arity) const1900 void fixup(int execSize, DataType defaultType, bool isDest, int arity) const {
1901 #ifdef NGEN_SAFE
1902 if (getBytes(type) > (16 >> arity))
1903 throw invalid_immediate_exception();
1904 #endif
1905 }
1906
isScalar() const1907 constexpr14 bool isScalar() const {
1908 switch (type) {
1909 case DataType::uv:
1910 case DataType::v:
1911 case DataType::vf:
1912 return false;
1913 default:
1914 return true;
1915 }
1916 }
1917
forceInt32() const1918 Immediate forceInt32() const {
1919 auto result = *this;
1920 if (result.type == DataType::uw)
1921 result.set<uint32_t>(uint16_t(payload));
1922 else if (result.type == DataType::w)
1923 result.set<int32_t>(int16_t(payload));
1924 return result;
1925 }
1926
1927 #ifdef NGEN_ASM
1928 inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
1929 #endif
1930 };
1931
1932 // Compute ctrl field for bfn instruction.
1933 // e.g. ctrl = getBFNCtrl([](uint8_t a, uint8_t b, uint8_t c) { return (a & b) | (c & ~b); });
1934 template <typename F>
getBFNCtrl(F func)1935 inline uint8_t getBFNCtrl(F func) { return func(0xAA, 0xCC, 0xF0); }
1936
1937 enum class BarrierType : uint8_t {
1938 ProducerConsumer = 0,
1939 Producer = 1,
1940 Consumer = 2,
1941 };
1942
1943 /********************************************************************/
1944 /* HDC sends */
1945 /********************************************************************/
1946 union MessageDescriptor {
1947 uint32_t all;
1948 struct {
1949 unsigned funcCtrl : 19; /* SF-dependent */
1950 unsigned header : 1; /* is a header present? */
1951 unsigned responseLen : 5; /* # of GRFs returned: valid range 0-16 */
1952 unsigned messageLen : 4; /* # of GRFs sent in src0: valid range 1-15 */
1953 unsigned : 3;
1954 } parts;
1955 struct {
1956 unsigned index : 8;
1957 unsigned rest : 24;
1958 } bti;
1959 struct {
1960 unsigned index : 8;
1961 unsigned elements : 3;
1962 unsigned subtype : 2;
1963 unsigned subtype2 : 1;
1964 unsigned messageType : 5;
1965 unsigned header : 1;
1966 unsigned responseLen : 5;
1967 unsigned messageLen : 4;
1968 unsigned : 3;
1969 } block;
1970 struct {
1971 unsigned index : 8;
1972 unsigned simd16 : 1;
1973 unsigned legacySIMD : 1;
1974 unsigned elements : 2;
1975 unsigned : 1;
1976 unsigned : 1;
1977 unsigned messageType : 5;
1978 unsigned header : 1;
1979 unsigned responseLen : 5;
1980 unsigned messageLen : 4;
1981 unsigned : 3;
1982 } scattered;
1983 struct {
1984 unsigned index : 8;
1985 unsigned subtype : 2;
1986 unsigned elements : 2;
1987 unsigned simd16 : 1;
1988 unsigned : 1;
1989 unsigned messageType : 5;
1990 unsigned header : 1;
1991 unsigned responseLen : 5;
1992 unsigned messageLen : 4;
1993 unsigned : 3;
1994 } a64_scattered;
1995 struct {
1996 unsigned index : 8;
1997 unsigned atomicOp : 4;
1998 unsigned simd8 : 1; // or data width.
1999 unsigned returnData : 1;
2000 unsigned messageType : 5;
2001 unsigned header : 1;
2002 unsigned responseLen : 5;
2003 unsigned messageLen : 4;
2004 unsigned : 3;
2005 } atomic;
2006 struct {
2007 unsigned index : 8;
2008 unsigned cmask : 4;
2009 unsigned simdMode : 2;
2010 unsigned messageType : 5;
2011 unsigned header : 1;
2012 unsigned responseLen : 5;
2013 unsigned messageLen : 4;
2014 unsigned : 3;
2015 } surface;
2016 struct {
2017 unsigned opcode : 6;
2018 unsigned : 1;
2019 unsigned addrSize : 2;
2020 unsigned dataSize : 3;
2021 unsigned vectSize : 3;
2022 unsigned transpose : 1;
2023 unsigned : 1;
2024 unsigned cache : 3;
2025 unsigned : 9;
2026 unsigned model : 2;
2027 unsigned : 1;
2028 } standardLSC;
2029 struct {
2030 unsigned : 12;
2031 unsigned cmask : 4;
2032 unsigned : 16;
2033 } cmask;
2034 struct {
2035 unsigned : 7;
2036 unsigned vnni : 1;
2037 unsigned : 24;
2038 } block2D;
2039
MessageDescriptor()2040 MessageDescriptor() : all(0) {}
MessageDescriptor(uint32_t all_)2041