1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #ifndef NGEN_CORE_HPP
18 #define NGEN_CORE_HPP
19 
20 
21 #include <cstdint>
22 #include <vector>
23 #include <algorithm>
24 #include <type_traits>
25 
26 #include "ngen_utils.hpp"
27 
28 #ifndef NGEN_NO_OP_NAMES
29 #if not +0
30 #error Compile with -fno-operator-names [Linux/OS X] or without /Za [Windows] if you want to use and(), or(), xor(), or define NGEN_NO_OP_NAMES and use and_(), or_(), xor_().
31 #endif
32 #endif
33 
34 #ifdef NGEN_ASM
35 #include <ostream>
36 #endif
37 
38 #ifdef NGEN_SAFE
39 #include <stdexcept>
40 #endif
41 
42 /*
43   Syntax
44   ------
45 
46   Register Syntax Overview
47     r17                 Plain register
48     r17.f(4)            -> r17.4:f
49                         In fact, r17.4<0;1,0>:f, as subregisters default to
50                           being scalar
51     r17.sub<float>(4)   Same as above, allowing for C++ templating.
52     r17.f()             -> r17.0:f (defaults to offset 0)
53     r17.sub<float>()    Same as above
54     r17.df(3)(8,8,1)    Register regioning (vertical stride, width, horizontal stride)
55     r17.df(3)(8,1)      (Width, horiz. stride): vertical stride is inferred
56     r17.df(3)(1)        Horizontal stride only: width, vertical stride inferred from execution size.
57     r[a0.w(8)].f(4,4,1) Indirect addressing: VxH (if NGEN_SHORT_NAMES defined otherwise use indirect[a0...])
58     r[a0.w(8)].f(4,1)   Indirect addressing: Vx1
59     -r17.q(1)           Source modifier: negation
60     abs(r17)            Source modifier: absolute value. Note that abs is defined in namespace ngen.
61     -abs(r3)
62     ~r17                Alternative syntax to -r17 for logical operations.
63     r17 + 3             ...is r20. Operators ++ and += are defined similarly.
64 
65   Command Syntax Overview
66     add(8, r3.f(0)(8,8,1), r9.f(0)(8,8,1), r12.f(0)(0,1,0))         ->   add (8) r3.0<8;8,1>:f r9.0<8;8,1>:f r12.f<0;1,0>
67     add(8, r3.f(), r9.f(), r12.f())                                 Same as above. Register regions default to unit stride.
68     add<float>(8, r3, r9, r12)                                      A default operand data type can be provided.
69     add<uint32_t>(8, r3, r9, r12.uw(8)(0,1,0))                      Default operand types can be overridden.
70     add<float>(8, r3, r9, 3.14159f)                                 The data type of scalar immediate values is inferred.
71     add<int32_t>(8, r3, r9, int16_t(12))                            Here an int16_t immediate is mapped to the :w data type.
72     mul<float>(8, r3, r9, Immediate::vf(-1.0,1.0,-1.0,1.25))        Vector immediates require helper functions.
73     mov(8, r2.d(), Immediate::uv(7,6,5,4,3,2,1,0))
74     mov(8, r2.d(), Immediate::v(7,-6,5,-4,3,-2,1,0))
75 
76   All modifiers for an instruction go in the first parameter, OR'ed together.
77     add(8 | M0, ...)
78     add(8 | W | ~f0.w(0) | sat, ...)            Use NoMask instead of W if NGEN_SHORT_NAMES not defined.
79     add(8 | lt | f1_0, ...)
80     add(8 | ~any2h | f1, ...)
81  */
82 
83 namespace ngen {
84 
85 #ifdef NGEN_SAFE
86 static constexpr bool _safe_ = 1;
87 #else
88 static constexpr bool _safe_ = 0;
89 #endif
90 
91 // Forward declarations.
92 class RegData;
93 class Register;
94 class GRFDisp;
95 class Subregister;
96 class RegisterRegion;
97 class NullRegister;
98 class InstructionModifier;
99 struct Instruction12;
100 enum class Opcode;
101 
102 struct EncodingTag12;
103 static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
104 struct EncodingTagXeHPC;
105 static inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
106 
107 // Exceptions, used when NGEN_SAFE is defined.
108 
109 #ifdef NGEN_SAFE
110 class invalid_type_exception : public std::runtime_error {
111 public:
invalid_type_exception()112     invalid_type_exception() : std::runtime_error("Instruction does not support this type or combination of types") {}
113 };
114 class invalid_object_exception : public std::runtime_error {
115 public:
invalid_object_exception()116     invalid_object_exception() : std::runtime_error("Object is invalid") {}
117 };
118 class invalid_immediate_exception : public std::runtime_error {
119 public:
invalid_immediate_exception()120     invalid_immediate_exception() : std::runtime_error("Invalid immediate value") {}
121 };
122 class invalid_modifiers_exception : public std::runtime_error {
123 public:
invalid_modifiers_exception()124     invalid_modifiers_exception() : std::runtime_error("Invalid or conflicting modifiers") {}
125 };
126 class invalid_operand_exception : public std::runtime_error {
127 public:
invalid_operand_exception()128     invalid_operand_exception() : std::runtime_error("Invalid operand to instruction") {}
129 };
130 class invalid_operand_count_exception : public std::runtime_error {
131 public:
invalid_operand_count_exception()132     invalid_operand_count_exception() : std::runtime_error("Invalid operand count") {}
133 };
134 class invalid_arf_exception : public std::runtime_error {
135 public:
invalid_arf_exception()136     invalid_arf_exception() : std::runtime_error("Invalid ARF specified") {}
137 };
138 class grf_expected_exception : public std::runtime_error {
139 public:
grf_expected_exception()140     grf_expected_exception() : std::runtime_error("GRF expected, but found an ARF") {}
141 };
142 class invalid_model_exception : public std::runtime_error {
143 public:
invalid_model_exception()144     invalid_model_exception() : std::runtime_error("Invalid addressing model specified") {}
145 };
146 class invalid_load_store_exception : public std::runtime_error {
147 public:
invalid_load_store_exception()148     invalid_load_store_exception() : std::runtime_error("Invalid operands for load/store/atomic") {}
149 };
150 class invalid_range_exception : public std::runtime_error {
151 public:
invalid_range_exception()152     invalid_range_exception() : std::runtime_error("Invalid register range") {}
153 };
154 class invalid_region_exception : public std::runtime_error {
155 public:
invalid_region_exception()156     invalid_region_exception() : std::runtime_error("Unsupported register region") {}
157 };
158 class missing_type_exception : public std::runtime_error {
159 public:
missing_type_exception()160     missing_type_exception() : std::runtime_error("Operand is missing its type") {}
161 };
162 class read_only_exception : public std::runtime_error {
163 public:
read_only_exception()164     read_only_exception() : std::runtime_error("Memory model is read-only") {}
165 };
166 class stream_stack_underflow : public std::runtime_error {
167 public:
stream_stack_underflow()168     stream_stack_underflow() : std::runtime_error("Stream stack underflow occurred") {}
169 };
170 class unfinished_stream_exception : public std::runtime_error {
171 public:
unfinished_stream_exception()172     unfinished_stream_exception() : std::runtime_error("An unfinished instruction stream is still active") {}
173 };
174 class dangling_label_exception : public std::runtime_error {
175 public:
dangling_label_exception()176     dangling_label_exception() : std::runtime_error("A label was referenced, but its location was not defined") {}
177 };
178 class multiple_label_exception : public std::runtime_error {
179 public:
multiple_label_exception()180     multiple_label_exception() : std::runtime_error("Label already has a location") {}
181 };
182 class unsupported_instruction : public std::runtime_error {
183 public:
unsupported_instruction()184     unsupported_instruction() : std::runtime_error("Instruction is not supported by the chosen hardware") {}
185 };
186 class unsupported_message : public std::runtime_error {
187 public:
unsupported_message()188     unsupported_message() : std::runtime_error("Message is not supported by the chosen hardware") {}
189 };
190 class iga_align16_exception : public std::runtime_error {
191 public:
iga_align16_exception()192     iga_align16_exception() : std::runtime_error("Align16 not supported by the IGA assembler; use binary output") {}
193 };
194 class sfid_needed_exception : public std::runtime_error {
195 public:
sfid_needed_exception()196     sfid_needed_exception() : std::runtime_error("SFID must be specified on Gen12+") {}
197 };
198 class invalid_execution_size_exception : public std::runtime_error {
199 public:
invalid_execution_size_exception()200     invalid_execution_size_exception() : std::runtime_error("Invalid execution size") {}
201 };
202 #endif
203 
204 // Gen hardware generations.
205 enum class HW {
206     Unknown,
207     Gen9,
208     Gen10,
209     Gen11,
210     XeLP,
211     Gen12LP = XeLP,
212     XeHP,
213     XeHPG,
214     XeHPC,
215 };
216 
217 // Data types. Bits[0:4] are the ID, bits[5:7] hold log2(width in bytes).
218 enum class DataType : uint8_t {
219     ud = 0x40,
220     d  = 0x41,
221     uw = 0x22,
222     w  = 0x23,
223     ub = 0x04,
224     b  = 0x05,
225     df = 0x66,
226     f  = 0x47,
227     uq = 0x68,
228     q  = 0x69,
229     hf = 0x2A,
230     bf = 0x2B,
231     uv = 0x4D,
232     v  = 0x4E,
233     vf = 0x4F,
234     bf8 = 0x0C,
235     tf32 = 0x50,
236     invalid = 0x00
237 };
238 
239 #ifdef NGEN_ASM
operator <<(std::ostream & str,DataType type)240 static inline std::ostream &operator<<(std::ostream &str, DataType type)
241 {
242     static const char *names[32] = {"ud",   "d", "uw", "w", "ub", "b", "df", "f", "uq", "q", "hf", "bf", "bf8", "uv", "v", "vf",
243                                     "tf32", "",  "",   "",  "",   "",  "",   "",  "",   "",  "",   "",   "",    "",   "",  ""};
244     str << names[static_cast<uint8_t>(type) & 0x1F];
245     return str;
246 }
247 #endif
248 
getLog2Bytes(DataType type)249 static inline constexpr   int getLog2Bytes(DataType type)              { return static_cast<int>(type) >> 5; }
getBytes(DataType type)250 static inline constexpr   int getBytes(DataType type)                  { return 1 << getLog2Bytes(type); }
getDwords(DataType type)251 static inline constexpr14 int getDwords(DataType type)                 { return std::max<int>(getBytes(type) >> 2, 1); }
252 
isSigned(DataType type)253 static inline constexpr bool isSigned(DataType type)
254 {
255     return !(type == DataType::ub || type == DataType::uw || type == DataType::ud || type == DataType::uq);
256 }
257 
getDataType()258 template <typename T> static inline DataType getDataType() { return DataType::invalid; }
259 
getDataType()260 template <> inline DataType getDataType<uint64_t>() { return DataType::uq; }
getDataType()261 template <> inline DataType getDataType<int64_t>()  { return DataType::q;  }
getDataType()262 template <> inline DataType getDataType<uint32_t>() { return DataType::ud; }
getDataType()263 template <> inline DataType getDataType<int32_t>()  { return DataType::d;  }
getDataType()264 template <> inline DataType getDataType<uint16_t>() { return DataType::uw; }
getDataType()265 template <> inline DataType getDataType<int16_t>()  { return DataType::w;  }
getDataType()266 template <> inline DataType getDataType<uint8_t>()  { return DataType::ub; }
getDataType()267 template <> inline DataType getDataType<int8_t>()   { return DataType::b;  }
getDataType()268 template <> inline DataType getDataType<double>()   { return DataType::df; }
getDataType()269 template <> inline DataType getDataType<float>()    { return DataType::f;  }
270 #ifdef NGEN_HALF_TYPE
getDataType()271 template <> inline DataType getDataType<half>()     { return DataType::hf; }
272 #endif
273 #ifdef NGEN_BFLOAT16_TYPE
getDataType()274 template <> inline DataType getDataType<bfloat16>() { return DataType::bf; }
275 #endif
276 #ifdef NGEN_BFLOAT8_TYPE
getDataType()277 template <> inline DataType getDataType<bfloat8>() { return DataType::bf8; }
278 #endif
279 #ifdef NGEN_TFLOAT32_TYPE
getDataType()280 template <> inline DataType getDataType<tfloat32>() { return DataType::tf32; }
281 #endif
282 
283 // Math function codes.
284 enum class MathFunction : uint8_t {
285     inv   = 1,
286     log   = 2,
287     exp   = 3,
288     sqt   = 4,
289     rsqt  = 5,
290     sin   = 6,
291     cos   = 7,
292     fdiv  = 9,
293     pow   = 10,
294     idiv  = 11,
295     iqot  = 12,
296     irem  = 13,
297     invm  = 14,
298     rsqtm = 15
299 };
300 
mathArgCount(MathFunction func)301 static inline int mathArgCount(MathFunction func)
302 {
303     static const char argCounts[16] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 1};
304     return argCounts[static_cast<uint8_t>(func) & 0xF];
305 }
306 
307 #ifdef NGEN_ASM
operator <<(std::ostream & str,MathFunction func)308 static inline std::ostream &operator<<(std::ostream &str, MathFunction func)
309 {
310     static const char *names[16] = {"", "inv", "log", "exp", "sqt", "rsqt", "sin", "cos", "", "fdiv", "pow", "idiv", "iqot", "irem", "invm", "rsqtm"};
311     str << names[static_cast<uint8_t>(func) & 0xF];
312     return str;
313 }
314 #endif
315 
hasIEEEMacro(HW hw)316 static inline bool hasIEEEMacro(HW hw) {
317     if (hw == HW::Gen12LP) return false;
318     if (hw == HW::XeHPG) return false;
319     return true;
320 }
321 
322 // Sync function codes.
323 enum class SyncFunction : uint8_t {
324     nop   = 0,
325     allrd = 2,
326     allwr = 3,
327     bar   = 14,
328     host  = 15
329 };
330 
331 #ifdef NGEN_ASM
operator <<(std::ostream & str,SyncFunction func)332 static inline std::ostream &operator<<(std::ostream &str, SyncFunction func)
333 {
334     static const char *names[16] = {"nop", "", "allrd", "allwr", "", "", "", "", "", "", "", "", "", "", "bar", "host"};
335     str << names[static_cast<uint8_t>(func) & 0xF];
336     return str;
337 }
338 #endif
339 
340 // Shared function IDs (SFIDs).
341 enum class SharedFunction : uint8_t {
342     null = 0x0,
343     smpl = 0x2,
344     gtwy = 0x3,
345     dc2 = 0x4,
346     rc = 0x5,
347     urb = 0x6,
348     ts = 0x7,
349     vme = 0x8,
350     dcro = 0x9,
351     dc0 = 0xA,
352     pixi = 0xB,
353     dc1 = 0xC,
354     cre = 0xD,
355     btd = 0x7,
356     rta = 0x8,
357     ugml = 0x1,
358     tgm = 0xD,
359     slm = 0xE,
360     ugm = 0xF,
361 
362     // alias
363     sampler = smpl,
364     gateway = gtwy,
365     spawner = ts,
366 };
367 
368 #ifdef NGEN_ASM
getMnemonic(SharedFunction sfid,HW hw)369 static inline const char *getMnemonic(SharedFunction sfid, HW hw)
370 {
371     static const char *names[16] = {
372         "null", ""    , "smpl", "gtwy", "dc2", "rc" , "urb", "ts" ,
373         "vme" , "dcro", "dc0" , "pixi", "dc1", "cre", ""   , ""   ,
374     };
375     static const char *namesLSC[16] = {
376         "null", "ugml", "smpl", "gtwy", "dc2", "rc" , "urb", "btd",
377         "rta" , "dcro", "dc0" , "pixi", "dc1", "tgm", "slm", "ugm",
378     };
379     const auto &table = (hw >= HW::XeHPG) ? namesLSC : names;
380     return table[static_cast<uint8_t>(sfid) & 0xF];
381 }
382 #endif
383 
384 // ARFs: high nybble of register # specifies type
385 enum class ARFType : uint8_t {
386     null = 0,
387     a    = 1,
388     acc  = 2,
389     f    = 3,
390     ce   = 4,
391     msg  = 5,
392     sp   = 6,
393     sr   = 7,
394     cr   = 8,
395     n    = 9,
396     ip   = 10,
397     tdr  = 11,
398     tm   = 12,
399     fc   = 13,
400     dbg  = 15,
401 };
402 
403 #ifdef NGEN_ASM
operator <<(std::ostream & str,ARFType type)404 static inline std::ostream &operator<<(std::ostream &str, ARFType type)
405 {
406     static const char *names[16] = {"null", "a", "acc", "f", "ce", "msg", "sp", "sr", "cr", "n", "ip", "tdr", "tm", "fc", "", "dbg"};
407     str << names[static_cast<uint8_t>(type) & 0xF];
408     return str;
409 }
410 
411 enum class PrintDetail {base = 0, sub_no_type = 1, sub = 2, hs = 3, vs_hs = 4, full = 5};
412 #endif
413 
414 // Invalid singleton class. Can be assigned to nGEN objects to invalidate them.
415 static constexpr class Invalid {} invalid{};
416 
417 class LabelManager {
418 protected:
419     uint32_t nextID;
420     std::vector<uint32_t> targets;
421 
422     enum TargetConstants : uint32_t {
423         noTarget = uint32_t(-1),
424     };
425 
426 public:
LabelManager()427     LabelManager() : nextID(0) {}
428 
getNewID()429     uint32_t getNewID() {
430         targets.push_back(TargetConstants::noTarget);
431         return nextID++;
432     }
433 
hasTarget(uint32_t id) const434     bool hasTarget(uint32_t id) const {
435         return (targets[id] != TargetConstants::noTarget);
436     }
437 
setTarget(uint32_t id,uint32_t target)438     void setTarget(uint32_t id, uint32_t target) {
439 #ifdef NGEN_SAFE
440         if (hasTarget(id)) throw multiple_label_exception();
441 #endif
442         targets[id] = target;
443     }
444 
offsetTarget(uint32_t id,uint32_t offset)445     void offsetTarget(uint32_t id, uint32_t offset) {
446 #ifdef NGEN_SAFE
447         if (!hasTarget(id)) throw dangling_label_exception();
448 #endif
449         targets[id] += offset;
450     }
451 
getTarget(uint32_t id) const452     uint32_t getTarget(uint32_t id) const {
453 #ifdef NGEN_SAFE
454         if (!hasTarget(id)) throw dangling_label_exception();
455 #endif
456         return targets[id];
457     }
458 };
459 
460 // An object representing a label.
461 class Label {
462 protected:
463     unsigned id : 31;
464     unsigned uninit : 1;
465 
466 public:
Label()467     Label() : id(0), uninit(true) {}
468 
getID(LabelManager & man)469     uint32_t getID(LabelManager &man) {
470         if (uninit) {
471             id = man.getNewID();
472             uninit = false;
473         }
474         return id;
475     }
476 
477     /* for compatibility with RegData */
fixup(int execSize,DataType defaultType,bool isDest,int arity)478     void fixup(int execSize, DataType defaultType, bool isDest, int arity) {}
isScalar() const479     constexpr14 bool isScalar() const { return false; }
480 
481 #ifdef NGEN_ASM
482     static const bool emptyOp = false;
483     inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man);
484 #endif
485 };
486 
487 static inline bool operator==(const RegData &r1, const RegData &r2);
488 static inline bool operator!=(const RegData &r1, const RegData &r2);
489 
490 // Superclass for registers, subregisters, and register regions, possibly
491 // with source modifiers.
492 class RegData {
493 protected:
494     unsigned base : 8;
495     unsigned arf : 1;
496       signed off : 11;
497     unsigned mods : 2;
498     unsigned type : 8;
499     unsigned indirect : 1;
500     unsigned _pad1 : 1;
501     unsigned vs : 7;
502     unsigned width : 5;
503     unsigned hs : 6;
504     unsigned _pad2 : 13;
505     unsigned invalid : 1;
506 
RegData(int base_,bool arf_,int off_,bool indirect_,DataType type_,int vs_,int width_,int hs_)507     constexpr RegData(int base_, bool arf_, int off_, bool indirect_, DataType type_, int vs_, int width_, int hs_)
508         : base(base_), arf(arf_), off(off_), mods(0), type(static_cast<int>(type_)), indirect(indirect_), _pad1(0), vs(vs_), width(width_), hs(hs_), _pad2(0), invalid(0) {}
509 
510 public:
511 #ifdef NGEN_ASM
512     static const bool emptyOp = false;
513 #endif
514 
RegData()515     constexpr RegData()
516         : base(0), arf(0), off(0), mods(0), type(0), indirect(0), _pad1(0), vs(0), width(0), hs(0), _pad2(0), invalid(1) {}
517 
getBase() const518     constexpr int getBase()         const { return base; }
isARF() const519     constexpr bool isARF()          const { return arf; }
getARFBase() const520     constexpr int getARFBase()      const { return base & 0xF; }
getARFType() const521     constexpr ARFType getARFType()  const { return static_cast<ARFType>(base >> 4); }
isIndirect() const522     constexpr bool isIndirect()     const { return indirect; }
isVxIndirect() const523     constexpr bool isVxIndirect()   const { return indirect && (vs == 0x7F); }
getIndirectBase() const524     constexpr int getIndirectBase() const { return base >> 4; }
getIndirectOff() const525     constexpr int getIndirectOff()  const { return base & 0xF; }
isNull() const526     constexpr bool isNull()         const { return isARF() && (getARFType() == ARFType::null); }
isInvalid() const527     constexpr bool isInvalid()      const { return invalid; }
isValid() const528     constexpr bool isValid()        const { return !invalid; }
getOffset() const529     constexpr int getOffset()       const { return off; }
getByteOffset() const530     constexpr int getByteOffset()   const { return off * getBytes(); }
getType() const531     constexpr DataType getType()    const { return static_cast<DataType>(type); }
getVS() const532     constexpr int getVS()           const { return vs; }
getWidth() const533     constexpr int getWidth()        const { return width; }
getHS() const534     constexpr int getHS()           const { return hs; }
getNeg() const535     constexpr bool getNeg()         const { return mods & 2; }
getAbs() const536     constexpr bool getAbs()         const { return mods & 1; }
getMods() const537     constexpr int getMods()         const { return mods; }
getBytes() const538     constexpr int getBytes()        const { return ngen::getBytes(getType()); }
getDwords() const539     constexpr14 int getDwords()     const { return ngen::getDwords(getType()); }
isScalar() const540     constexpr bool isScalar()       const { return hs == 0 && vs == 0 && width == 1; }
541 
setBase(int base_)542     constexpr14 RegData &setBase(int base_)                      { base = base_; return *this; }
setOffset(int off_)543     constexpr14 RegData &setOffset(int off_)                     { off = off_; return *this; }
setType(DataType newType)544     constexpr14 RegData &setType(DataType newType)               { type = static_cast<unsigned>(newType); return *this; }
setMods(int mods_)545     constexpr14 RegData &setMods(int mods_)                      { mods = mods_; return *this; }
setRegion(int vs_,int width_,int hs_)546     constexpr14 RegData &setRegion(int vs_, int width_, int hs_) { vs = vs_; width = width_; hs = hs_; return *this; }
547 
invalidate()548     void invalidate()                     { invalid = true; }
operator =(const Invalid & i)549     RegData &operator=(const Invalid &i)  { this->invalidate(); return *this; }
550 
551     inline void fixup(int execSize, DataType defaultType, bool isDest, int arity);                    // Adjust automatically-computed strides given ESize.
552 
operator +() const553     constexpr RegData operator+() const { return *this; }
operator -() const554     constexpr14 RegData operator-() const {
555         auto result = *this;
556         result.negate();
557         return result;
558     }
operator ~() const559     constexpr14 RegData operator~() const { return -*this; }
negate()560     constexpr14 void negate()             { mods = mods ^ 2; }
561 
562     friend inline bool operator==(const RegData &r1, const RegData &r2);
563     friend inline bool operator!=(const RegData &r1, const RegData &r2);
564 
565     friend inline RegData abs(const RegData &r);
566 
567 #ifdef NGEN_ASM
568     inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
569 #endif
570 };
571 
572 static_assert(sizeof(RegData) == 8, "RegData structure is not laid out correctly in memory.");
573 
operator ==(const RegData & r1,const RegData & r2)574 static inline bool operator==(const RegData &r1, const RegData &r2) {
575     return *((uint64_t *) &r1) == *((uint64_t *) &r2);
576 }
577 
operator !=(const RegData & r1,const RegData & r2)578 static inline bool operator!=(const RegData &r1, const RegData &r2) {
579     return !(r1 == r2);
580 }
581 
abs(const RegData & r)582 inline RegData abs(const RegData &r)
583 {
584     RegData result = r;
585     return result.setMods(1);
586 }
587 
fixup(int execSize,DataType defaultType,bool isDest,int arity)588 inline void RegData::fixup(int execSize, DataType defaultType, bool isDest, int arity)
589 {
590 #ifdef NGEN_SAFE
591     if (isInvalid()) throw invalid_object_exception();
592 #endif
593 
594     if (getType() == DataType::invalid) {
595 #ifdef NGEN_SAFE
596         if (defaultType == DataType::invalid)
597             throw missing_type_exception();
598 #endif
599         setType(defaultType);
600     }
601     if (!isVxIndirect()) {
602         if (execSize == 1) {
603             vs = hs = 0;
604             width = 1;
605         } else if (width == 0) {
606             int maxWidth = 32 / getBytes();
607             width = (hs == 0) ? 1 : std::min<int>({int(maxWidth / hs), execSize, 16});
608             vs = width * hs;
609         }
610         if (isDest && hs == 0)
611             hs = 1;
612     }
613 }
614 
615 // Operands for Align16 instructions
616 class Align16Operand {
617 protected:
618     RegData rd;
619     unsigned chanSel : 8;
620     unsigned chanEn : 4;
621     bool rep : 1;
622 
623 public:
Align16Operand(RegData rd_,int chanEn_)624     constexpr Align16Operand(RegData rd_, int chanEn_) : rd(rd_), chanSel(0b11100100), chanEn(chanEn_), rep(false) {}
Align16Operand(RegData rd_,int s0,int s1,int s2,int s3)625     constexpr Align16Operand(RegData rd_, int s0, int s1, int s2, int s3) : rd(rd_),
626         chanSel((s0 & 3) | ((s1 & 3) << 2) | ((s2 & 3) << 4) | ((s3 & 3) << 6)), chanEn(0xF), rep(false) {}
627 
createBroadcast(RegData rd_)628     static constexpr14 Align16Operand createBroadcast(RegData rd_) {
629         Align16Operand op{rd_, 0xF};
630         op.rep = true;
631         return op;
632     }
633 
createWithMME(RegData rd_,int mme)634     static constexpr14 Align16Operand createWithMME(RegData rd_, int mme) {
635         Align16Operand op{rd_, mme};
636         op.chanSel = mme;
637         return op;
638     }
639 
getReg()640     RegData &getReg()                           { return rd; }
getReg() const641     constexpr const RegData &getReg()     const { return rd; }
getChanSel() const642     constexpr uint8_t getChanSel()        const { return chanSel; }
getChanEn() const643     constexpr uint8_t getChanEn()         const { return chanEn; }
isRep() const644     constexpr bool isRep()                const { return rep; }
645 
isIndirect() const646     constexpr bool isIndirect()           const { return rd.isIndirect(); }
getType() const647     constexpr DataType getType()          const { return rd.getType(); }
getOffset() const648     constexpr int getOffset()             const { return rd.getOffset(); }
getMods() const649     constexpr int getMods()               const { return rd.getMods(); }
isARF() const650     constexpr bool isARF()                const { return rd.isARF(); }
651 
invalidate()652     void invalidate() { rd.invalidate(); }
operator =(const Invalid & i)653     Align16Operand &operator=(const Invalid &i) { this->invalidate(); return *this; }
isInvalid() const654     bool isInvalid()                      const { return rd.isInvalid(); }
isValid() const655     bool isValid()                        const { return !rd.isInvalid(); }
isScalar() const656     constexpr bool isScalar()             const { return rd.isScalar(); }
657 
fixup(int execSize,DataType defaultType,bool isDest,int arity)658     void fixup(int execSize, DataType defaultType, bool isDest, int arity) {
659         rd.fixup(execSize, defaultType, isDest, arity);
660     }
661 
662 #ifdef NGEN_ASM
663     inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
664     static const bool emptyOp = false;
665 #endif
666 };
667 
668 // Register regions.
669 class RegisterRegion : public RegData
670 {
671 public:
RegisterRegion()672     constexpr RegisterRegion() : RegData() {}
RegisterRegion(RegData rdata_,int vs_,int width_,int hs_)673     constexpr14 RegisterRegion(RegData rdata_, int vs_, int width_, int hs_) {
674         *static_cast<RegData *>(this) = rdata_;
675         vs = vs_;
676         width = width_;
677         hs = hs_;
678     }
679 
operator =(const Invalid & i)680     RegisterRegion &operator=(const Invalid &i) { this->invalidate(); return *this; }
681 
operator +() const682     constexpr RegisterRegion operator+() const { return *this; }
operator -() const683     constexpr14 RegisterRegion operator-() const {
684         auto result = *this;
685         result.negate();
686         return result;
687     }
operator ~() const688     constexpr14 RegisterRegion operator~() const { return -*this; }
689 };
690 
691 // Subregister; always associated with a specific data type.
692 class Subregister : public RegData
693 {
694 protected:
checkGRF() const695     void checkGRF() const {
696 #ifdef NGEN_SAFE
697         if (isARF()) throw grf_expected_exception();
698 #endif
699     }
700 
701 public:
Subregister()702     constexpr Subregister() : RegData() {}
Subregister(RegData reg_,int offset_,DataType type_)703     constexpr14 Subregister(RegData reg_, int offset_, DataType type_) {
704         *static_cast<RegData *>(this) = reg_;
705         off = offset_;
706         type = static_cast<int>(type_);
707         hs = vs = 0;
708         width = 1;
709     }
Subregister(RegData reg_,DataType type_)710     constexpr14 Subregister(RegData reg_, DataType type_) {
711         *static_cast<RegData *>(this) = reg_;
712         off = 0;
713         type = static_cast<int>(type_);
714     }
715 
716     inline RegisterRegion operator()(int vs, int width, int hs) const;
717     inline RegisterRegion operator()(int vs, int hs) const;
718     inline RegisterRegion operator()(int hs) const;
719 
operator =(const Invalid & i)720     Subregister &operator=(const Invalid &i) { this->invalidate(); return *this; }
721 
operator +() const722     constexpr Subregister operator+() const { return *this; }
operator -() const723     constexpr14 Subregister operator-() const {
724         auto result = *this;
725         result.negate();
726         return result;
727     }
operator ~() const728     constexpr14 Subregister operator~() const { return -*this; }
729 
swizzle(int s0,int s1,int s2,int s3) const730     Align16Operand swizzle(int s0, int s1, int s2, int s3)    const { checkGRF(); return Align16Operand(*this, s0, s1, s2, s3); }
broadcast() const731     Align16Operand broadcast()                                const { checkGRF(); return Align16Operand::createBroadcast(*this); }
enable(bool c0,bool c1,bool c2,bool c3) const732     Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { checkGRF(); return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
noSwizzle() const733     Align16Operand noSwizzle()                                const { return swizzle(0, 1, 2, 3); }
enableAll() const734     Align16Operand enableAll()                                const { return enable(true, true, true, true); }
735 
736     inline Subregister reinterpret(int offset, DataType type_) const;
reinterpret(int offset=0) const737     template <typename T> Subregister reinterpret(int offset = 0) const { return reinterpret(offset, getDataType<T>()); }
738 
offset(int off) const739     inline Subregister offset(int off) const { return reinterpret(off, getType()); }
740 
uq(int offset=0) const741     Subregister uq(int offset = 0) const { return reinterpret(offset, DataType::uq); }
q(int offset=0) const742     Subregister  q(int offset = 0) const { return reinterpret(offset, DataType::q);  }
ud(int offset=0) const743     Subregister ud(int offset = 0) const { return reinterpret(offset, DataType::ud); }
d(int offset=0) const744     Subregister  d(int offset = 0) const { return reinterpret(offset, DataType::d);  }
uw(int offset=0) const745     Subregister uw(int offset = 0) const { return reinterpret(offset, DataType::uw); }
w(int offset=0) const746     Subregister  w(int offset = 0) const { return reinterpret(offset, DataType::w);  }
ub(int offset=0) const747     Subregister ub(int offset = 0) const { return reinterpret(offset, DataType::ub); }
b(int offset=0) const748     Subregister  b(int offset = 0) const { return reinterpret(offset, DataType::b);  }
df(int offset=0) const749     Subregister df(int offset = 0) const { return reinterpret(offset, DataType::df); }
f(int offset=0) const750     Subregister  f(int offset = 0) const { return reinterpret(offset, DataType::f);  }
hf(int offset=0) const751     Subregister hf(int offset = 0) const { return reinterpret(offset, DataType::hf); }
bf(int offset=0) const752     Subregister bf(int offset = 0) const { return reinterpret(offset, DataType::bf); }
tf32(int offset=0) const753     Subregister tf32(int offset = 0) const { return reinterpret(offset, DataType::tf32); }
bf8(int offset=0) const754     Subregister bf8(int offset = 0)  const { return reinterpret(offset, DataType::bf8); }
755 };
756 
757 // Single register.
758 class Register : public RegData
759 {
760 public:
Register()761     constexpr Register() : RegData() {}
Register(int reg_,bool arf_,DataType defaultType=DataType::invalid,int off_=0)762     constexpr Register(int reg_, bool arf_, DataType defaultType = DataType::invalid, int off_ = 0)
763         : RegData(reg_, arf_, off_, false, defaultType, 0, 0, 1) {}
764 
operator +() const765     constexpr Register operator+() const { return *this; }
operator -() const766     constexpr14 Register operator-() const {
767         auto result = *this;
768         result.negate();
769         return result;
770     }
operator ~() const771     constexpr14 Register operator~() const { return -*this; }
772 
sub(int offset,DataType type_) const773     constexpr14 Subregister sub(int offset, DataType type_)        const { return Subregister(*this, offset, type_); }
sub(int offset) const774     template <typename T> constexpr14 Subregister sub(int offset)  const { return sub(offset, getDataType<T>()); }
775 
retype(DataType type_) const776     constexpr14 Register retype(DataType type_)         const { auto clone = *this; clone.setType(type_); return clone; }
retype() const777     template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
778 
uq(int offset) const779     constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
q(int offset) const780     constexpr14 Subregister  q(int offset) const { return sub(offset, DataType::q);  }
ud(int offset) const781     constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
d(int offset) const782     constexpr14 Subregister  d(int offset) const { return sub(offset, DataType::d);  }
uw(int offset) const783     constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
w(int offset) const784     constexpr14 Subregister  w(int offset) const { return sub(offset, DataType::w);  }
ub(int offset) const785     constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
b(int offset) const786     constexpr14 Subregister  b(int offset) const { return sub(offset, DataType::b);  }
df(int offset) const787     constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
f(int offset) const788     constexpr14 Subregister  f(int offset) const { return sub(offset, DataType::f);  }
hf(int offset) const789     constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
bf(int offset) const790     constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
tf32(int offset) const791     constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
bf8(int offset) const792     constexpr14 Subregister bf8(int offset)  const { return sub(offset, DataType::bf8); }
793 
uq() const794     constexpr14 Register uq() const { return retype(DataType::uq); }
q() const795     constexpr14 Register  q() const { return retype(DataType::q);  }
ud() const796     constexpr14 Register ud() const { return retype(DataType::ud); }
d() const797     constexpr14 Register  d() const { return retype(DataType::d);  }
uw() const798     constexpr14 Register uw() const { return retype(DataType::uw); }
w() const799     constexpr14 Register  w() const { return retype(DataType::w);  }
ub() const800     constexpr14 Register ub() const { return retype(DataType::ub); }
b() const801     constexpr14 Register  b() const { return retype(DataType::b);  }
df() const802     constexpr14 Register df() const { return retype(DataType::df); }
f() const803     constexpr14 Register  f() const { return retype(DataType::f);  }
hf() const804     constexpr14 Register hf() const { return retype(DataType::hf); }
bf() const805     constexpr14 Register bf() const { return retype(DataType::bf); }
tf32() const806     constexpr14 Register tf32() const { return retype(DataType::tf32); }
bf8() const807     constexpr14 Register bf8()  const { return retype(DataType::bf8); }
808 
operator [](int offset) const809     constexpr14 Subregister operator[](int offset) const { return sub(offset, getType()); }
810 
operator =(const Invalid & i)811     Register &operator=(const Invalid &i) { this->invalidate(); return *this; }
812 };
813 
814 class GRF : public Register
815 {
816 public:
GRF()817     GRF() : Register() {}
GRF(int reg_)818     explicit constexpr GRF(int reg_) : Register(reg_, false) {}
819 
operator +() const820     constexpr GRF operator+() const { return *this; }
operator -() const821     constexpr14 GRF operator-() const {
822         auto result = *this;
823         result.negate();
824         return result;
825     }
operator ~() const826     constexpr14 GRF operator~() const { return -*this; }
827 
retype(DataType type_) const828     constexpr14 GRF retype(DataType type_)              const { auto clone = *this; clone.setType(type_); return clone; }
retype() const829     template <typename T> constexpr14 Register retype() const { return retype(getDataType<T>()); }
830 
uq(int offset) const831     constexpr14 Subregister uq(int offset) const { return sub(offset, DataType::uq); }
q(int offset) const832     constexpr14 Subregister  q(int offset) const { return sub(offset, DataType::q);  }
ud(int offset) const833     constexpr14 Subregister ud(int offset) const { return sub(offset, DataType::ud); }
d(int offset) const834     constexpr14 Subregister  d(int offset) const { return sub(offset, DataType::d);  }
uw(int offset) const835     constexpr14 Subregister uw(int offset) const { return sub(offset, DataType::uw); }
w(int offset) const836     constexpr14 Subregister  w(int offset) const { return sub(offset, DataType::w);  }
ub(int offset) const837     constexpr14 Subregister ub(int offset) const { return sub(offset, DataType::ub); }
b(int offset) const838     constexpr14 Subregister  b(int offset) const { return sub(offset, DataType::b);  }
df(int offset) const839     constexpr14 Subregister df(int offset) const { return sub(offset, DataType::df); }
f(int offset) const840     constexpr14 Subregister  f(int offset) const { return sub(offset, DataType::f);  }
hf(int offset) const841     constexpr14 Subregister hf(int offset) const { return sub(offset, DataType::hf); }
bf(int offset) const842     constexpr14 Subregister bf(int offset) const { return sub(offset, DataType::bf); }
bf8(int offset) const843     constexpr14 Subregister bf8(int offset)  const { return sub(offset, DataType::bf8); }
tf32(int offset) const844     constexpr14 Subregister tf32(int offset) const { return sub(offset, DataType::tf32); }
845 
uq() const846     constexpr14 GRF uq() const { return retype(DataType::uq); }
q() const847     constexpr14 GRF  q() const { return retype(DataType::q);  }
ud() const848     constexpr14 GRF ud() const { return retype(DataType::ud); }
d() const849     constexpr14 GRF  d() const { return retype(DataType::d);  }
uw() const850     constexpr14 GRF uw() const { return retype(DataType::uw); }
w() const851     constexpr14 GRF  w() const { return retype(DataType::w);  }
ub() const852     constexpr14 GRF ub() const { return retype(DataType::ub); }
b() const853     constexpr14 GRF  b() const { return retype(DataType::b);  }
df() const854     constexpr14 GRF df() const { return retype(DataType::df); }
f() const855     constexpr14 GRF  f() const { return retype(DataType::f);  }
hf() const856     constexpr14 GRF hf() const { return retype(DataType::hf); }
bf() const857     constexpr14 GRF bf() const { return retype(DataType::bf); }
bf8() const858     constexpr14 GRF bf8()  const { return retype(DataType::bf8); }
tf32() const859     constexpr14 GRF tf32() const { return retype(DataType::tf32); }
860 
swizzle(int s0,int s1,int s2,int s3) const861     Align16Operand swizzle(int s0, int s1, int s2, int s3)    const { return Align16Operand(*this, s0, s1, s2, s3); }
enable(bool c0,bool c1,bool c2,bool c3) const862     Align16Operand enable(bool c0, bool c1, bool c2, bool c3) const { return Align16Operand(*this, (int(c3) << 3) | (int(c2) << 2) | (int(c1) << 1) | int(c0)); }
noSwizzle() const863     Align16Operand noSwizzle()                                const { return swizzle(0, 1, 2, 3); }
enableAll() const864     Align16Operand enableAll()                                const { return enable(true, true, true, true); }
865 
operator =(const Invalid & i)866     GRF &operator=(const Invalid &i) { this->invalidate(); return *this; }
867 
operator +=(const int & inc)868     GRF &operator+=(const int &inc) {
869         base += inc;
870         return *this;
871     }
872 
operator ++(int i)873     GRF operator++(int i) {
874         GRF old = *this;
875         ++*this;
876         return old;
877     }
878 
operator ++()879     GRF &operator++() {
880         *this += 1;
881         return *this;
882     }
883 
advance(int inc)884     GRF advance(int inc) {
885         auto result = *this;
886         result += inc;
887         return result;
888     }
889 
890     inline GRFDisp operator+(int offset) const;
891     inline GRFDisp operator-(int offset) const;
892 
log2Bytes(HW hw)893     static constexpr int log2Bytes(HW hw)                  { return (hw == HW::XeHPC) ? 6 : 5;  }
bytes(HW hw)894     static constexpr int bytes(HW hw)                      { return (1 << log2Bytes(hw)); }
bytesToGRFs(HW hw,unsigned x)895     static constexpr int bytesToGRFs(HW hw, unsigned x)    { return (x + bytes(hw) - 1) >> log2Bytes(hw); }
896 };
897 
898 class GRFDisp {
899 protected:
900     GRF base;
901     int32_t disp;
902 
903 public:
GRFDisp(const GRF & base_,int32_t disp_)904     GRFDisp(const GRF &base_, int32_t disp_) : base(base_), disp(disp_) {}
GRFDisp(const RegData & rd)905     /* implicit */ GRFDisp(const RegData &rd) : base(reinterpret_cast<const GRF &>(rd)), disp(0) {}
906 
getBase() const907     constexpr GRF     getBase() const { return base; }
getDisp() const908     constexpr int32_t getDisp() const { return disp; }
909 };
910 
operator +(int offset) const911 GRFDisp GRF::operator+(int offset) const { return GRFDisp(*this, offset); }
operator -(int offset) const912 GRFDisp GRF::operator-(int offset) const { return *this + (-offset); }
913 
914 class ARF : public Register
915 {
916 public:
ARF()917     constexpr ARF() : Register() {}
ARF(ARFType type_,int reg_,DataType defaultType=DataType::invalid,int off_=0)918     constexpr ARF(ARFType type_, int reg_, DataType defaultType = DataType::invalid, int off_ = 0)
919         : Register((static_cast<int>(type_) << 4) | (reg_ & 0xF), true, defaultType, off_) {}
920 
operator =(const Invalid & i)921     ARF &operator=(const Invalid &i) { this->invalidate(); return *this; }
922 };
923 
924 class NullRegister : public ARF
925 {
926 public:
NullRegister()927     constexpr NullRegister() : ARF(ARFType::null, 0, DataType::ud) {}
928 };
929 
930 class AddressRegister : public ARF
931 {
932 public:
AddressRegister()933     constexpr AddressRegister() : ARF() {}
AddressRegister(int reg_)934     explicit constexpr AddressRegister(int reg_) : ARF(ARFType::a, reg_, DataType::uw) {}
935 
operator =(const Invalid & i)936     AddressRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
937 };
938 
939 class AccumulatorRegister : public ARF
940 {
941 public:
AccumulatorRegister()942     constexpr AccumulatorRegister() : ARF() {}
AccumulatorRegister(int reg_)943     explicit constexpr AccumulatorRegister(int reg_) : ARF(ARFType::acc, reg_) {}
944 
operator =(const Invalid & i)945     AccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
946 
count(HW hw)947     static constexpr int count(HW hw) { return (hw >= HW::XeHP) ? 4 : 2; }
948 };
949 
950 class SpecialAccumulatorRegister : public AccumulatorRegister
951 {
952     uint8_t mmeNum;
953 
954 public:
SpecialAccumulatorRegister()955     constexpr SpecialAccumulatorRegister() : AccumulatorRegister(), mmeNum(0) {}
SpecialAccumulatorRegister(int reg_,int mmeNum_)956     constexpr SpecialAccumulatorRegister(int reg_, int mmeNum_) : AccumulatorRegister(reg_), mmeNum(mmeNum_) {}
957 
createNoMME()958     static constexpr SpecialAccumulatorRegister createNoMME() { return SpecialAccumulatorRegister(0, 8); }
959 
getMME() const960     constexpr uint8_t getMME() const { return mmeNum; }
961 
operator =(const Invalid & i)962     SpecialAccumulatorRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
963 };
964 
965 // An "extended register" is a combination of a regular GRF and some extra accumulator bits, used for math macro operations.
966 class ExtendedReg {
967     RegData base;
968     uint8_t mmeNum;
969 
970 public:
ExtendedReg(RegData base_,uint8_t mmeNum_)971     constexpr ExtendedReg(RegData base_, uint8_t mmeNum_) : base(base_), mmeNum(mmeNum_) {}
ExtendedReg(RegData base_,SpecialAccumulatorRegister acc)972     constexpr ExtendedReg(RegData base_, SpecialAccumulatorRegister acc) : base(base_), mmeNum(acc.getMME()) {}
973 
fixup(int execSize,DataType defaultType,bool isDest,int arity)974     void fixup(int execSize, DataType defaultType, bool isDest, int arity) {
975         base.fixup(execSize, defaultType, isDest, arity);
976     }
977 
getMods() const978     constexpr int getMods()         const { return base.getMods(); }
getType() const979     constexpr DataType getType()    const { return base.getType(); }
getOffset() const980     constexpr int getOffset()       const { return base.getOffset(); }
isIndirect() const981     constexpr bool isIndirect()     const { return base.isIndirect(); }
isInvalid() const982     constexpr bool isInvalid()      const { return base.isInvalid(); }
isValid() const983     constexpr bool isValid()        const { return !base.isInvalid(); }
isScalar() const984     constexpr bool isScalar()       const { return base.isScalar(); }
isARF() const985     constexpr bool isARF()          const { return base.isARF(); }
986 
getBase()987     constexpr14 RegData &getBase()        { return base; }
getBase() const988     constexpr RegData getBase()     const { return base; }
getMMENum() const989     constexpr uint8_t getMMENum()   const { return mmeNum; }
990 
991 #ifdef NGEN_ASM
992     inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
993     static const bool emptyOp = false;
994 #endif
995 };
996 
operator |(const RegData & base,const SpecialAccumulatorRegister & acc)997 static inline ExtendedReg operator|(const RegData &base, const SpecialAccumulatorRegister &acc)
998 {
999     return ExtendedReg(base, acc);
1000 }
1001 
1002 class FlagRegister : public ARF
1003 {
1004 public:
FlagRegister()1005     constexpr FlagRegister() : ARF() {}
FlagRegister(int reg_)1006     explicit constexpr FlagRegister(int reg_)  : ARF(ARFType::f, reg_, DataType::ud, 0) {}
FlagRegister(int reg_,int off_)1007     constexpr FlagRegister(int reg_, int off_) : ARF(ARFType::f, reg_, DataType::uw, off_) {}
1008 
createFromIndex(int index)1009     static FlagRegister createFromIndex(int index) {
1010         return FlagRegister(index >> 1, index & 1);
1011     }
1012 
operator ~() const1013     FlagRegister operator~() const {
1014         FlagRegister result = *this;
1015         result.mods = result.mods ^ 2;
1016         return result;
1017     }
1018 
operator =(const Invalid & i)1019     FlagRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1020 
operator [](int offset) const1021     constexpr FlagRegister operator[](int offset) const { return FlagRegister(getARFBase(), getOffset() + offset); }
1022 
index() const1023     int index() const { return (getARFBase() << 1) + getOffset(); }
1024 
count(HW hw)1025     static inline constexpr int count(HW hw) {
1026         return (hw == HW::XeHPC) ? 4 : 2;
1027     }
subcount(HW hw)1028     static inline constexpr int subcount(HW hw) { return count(hw) * 2; }
1029 };
1030 
1031 class ChannelEnableRegister : public ARF
1032 {
1033 public:
ChannelEnableRegister(int reg_=0)1034     explicit constexpr ChannelEnableRegister(int reg_ = 0) : ARF(ARFType::ce, reg_, DataType::ud) {}
1035 };
1036 
1037 class StackPointerRegister : public ARF
1038 {
1039 public:
StackPointerRegister(int reg_=0)1040     explicit constexpr StackPointerRegister(int reg_ = 0) : ARF(ARFType::sp, reg_, DataType::uq) {}
1041 };
1042 
1043 class StateRegister : public ARF
1044 {
1045 public:
StateRegister(int reg_=0)1046     explicit constexpr StateRegister(int reg_ = 0) : ARF(ARFType::sr, reg_, DataType::ud) {}
1047 };
1048 
1049 class ControlRegister : public ARF
1050 {
1051 public:
ControlRegister(int reg_=0)1052     explicit constexpr ControlRegister(int reg_ = 0) : ARF(ARFType::cr, reg_, DataType::ud) {}
1053 };
1054 
1055 class NotificationRegister : public ARF
1056 {
1057 public:
NotificationRegister(int reg_=0)1058     explicit constexpr NotificationRegister(int reg_ = 0) : ARF(ARFType::n, reg_, DataType::ud) {}
1059 };
1060 
1061 class InstructionPointerRegister : public ARF
1062 {
1063 public:
InstructionPointerRegister()1064     constexpr InstructionPointerRegister() : ARF(ARFType::ip, 0, DataType::ud) {}
1065 };
1066 
1067 class ThreadDependencyRegister : public ARF
1068 {
1069 public:
ThreadDependencyRegister(int reg_=0)1070     explicit constexpr ThreadDependencyRegister(int reg_ = 0) : ARF(ARFType::tdr, reg_, DataType::uw) {}
1071 };
1072 
1073 class PerformanceRegister : public ARF
1074 {
1075 public:
PerformanceRegister(int reg_=0,int off_=0)1076     explicit constexpr PerformanceRegister(int reg_ = 0, int off_ = 0) : ARF(ARFType::tm, reg_, DataType::ud, off_) {}
1077 };
1078 
1079 class DebugRegister : public ARF
1080 {
1081 public:
DebugRegister(int reg_=0)1082     explicit constexpr DebugRegister(int reg_ = 0) : ARF(ARFType::dbg, reg_, DataType::ud) {}
1083 };
1084 
1085 class FlowControlRegister : public ARF
1086 {
1087 public:
FlowControlRegister(int reg_=0)1088     explicit constexpr FlowControlRegister(int reg_ = 0) : ARF(ARFType::fc, reg_, DataType::ud) {}
1089 };
1090 
operator ()(int vs,int width,int hs) const1091 inline RegisterRegion Subregister::operator()(int vs, int width, int hs) const
1092 {
1093     RegisterRegion rr(*this, vs, width, hs);
1094     return rr;
1095 }
1096 
operator ()(int vs_or_width,int hs) const1097 inline RegisterRegion Subregister::operator()(int vs_or_width, int hs) const
1098 {
1099     int vs, width;
1100 
1101     if (isIndirect()) {
1102         vs = -1;
1103         width = vs_or_width;
1104     } else {
1105         vs = vs_or_width;
1106         width = (hs == 0) ? ((vs == 0) ? 1 : vs) : vs / hs;
1107     }
1108 
1109     return operator()(vs, width, hs);
1110 }
1111 
operator ()(int hs) const1112 inline RegisterRegion Subregister::operator()(int hs) const
1113 {
1114     return operator()(0, 0, hs);
1115 }
1116 
reinterpret(int offset,DataType type_) const1117 inline Subregister Subregister::reinterpret(int offset, DataType type_) const
1118 {
1119     Subregister r = *this;
1120     r.setType(type_);
1121 
1122     int o = getOffset();
1123     int oldbytes = getBytes(), newbytes = r.getBytes();
1124     int bitdiff = (oldbytes == 0) ? 0
1125                                   : (utils::log2(newbytes) - utils::log2(oldbytes));
1126 
1127     if (newbytes < oldbytes)
1128         r.setOffset((o << -bitdiff) + offset);
1129     else
1130         r.setOffset((o >>  bitdiff) + offset);
1131 
1132     return r;
1133 }
1134 
1135 // Indirect register and frames for making them.
1136 class IndirectRegister : public Register {
1137 protected:
IndirectRegister(const RegData & reg)1138     explicit constexpr14 IndirectRegister(const RegData &reg) : Register((reg.getARFBase() << 4) | reg.getOffset(), false) {
1139         indirect = true;
1140     }
1141     friend class IndirectRegisterFrame;
1142 
operator =(const Invalid & i)1143     IndirectRegister &operator=(const Invalid &i) { this->invalidate(); return *this; }
1144 };
1145 
1146 class IndirectRegisterFrame {
1147 public:
operator [](const RegData & reg) const1148     IndirectRegister operator[](const RegData &reg) const {
1149 #ifdef NGEN_SAFE
1150         if (!reg.isARF() || reg.getARFType() != ARFType::a)
1151             throw invalid_arf_exception();
1152 #endif
1153         return IndirectRegister(reg);
1154     }
1155 };
1156 
1157 // GRFRange represents a contiguous range of GRF registers.
1158 class GRFRange {
1159 protected:
1160     uint8_t base;
1161     uint8_t len;
1162 
1163     static constexpr uint8_t invalidLen = 0xFF;
1164 
1165 public:
GRFRange()1166     GRFRange() : GRFRange(0, invalidLen) {}
GRFRange(int base_,int len_)1167     GRFRange(int base_, int len_) : base(base_), len(len_) {}
GRFRange(GRF base_,int len_)1168     GRFRange(GRF base_, int len_) : GRFRange(base_.getBase(), len_) {}
1169 
getBase() const1170     int getBase()    const { return base; }
getLen() const1171     int getLen()     const { return len; }
isEmpty() const1172     bool isEmpty()   const { return len == 0; }
isNull() const1173     bool isNull()    const { return false; }
1174 
invalidate()1175     void invalidate()      { len = invalidLen; }
isInvalid() const1176     bool isInvalid() const { return len == invalidLen; }
isValid() const1177     bool isValid()   const { return !isInvalid(); }
1178 
operator =(const Invalid & i)1179     GRFRange &operator=(const Invalid &i) { this->invalidate(); return *this; }
1180 
operator [](int i) const1181     GRF operator[](int i) const {
1182 #ifdef NGEN_SAFE
1183         if (isInvalid()) throw invalid_object_exception();
1184 #endif
1185         return GRF(base + i);
1186     }
1187 
operator GRF() const1188     operator GRF() const { return (*this)[0]; }
1189 
fixup(int execSize,DataType defaultType,bool isDest,int arity)1190     void fixup(int execSize, DataType defaultType, bool isDest, int arity) {}
1191 };
1192 
operator -(const GRF & reg1,const GRF & reg2)1193 static inline GRFRange operator-(const GRF &reg1, const GRF &reg2)
1194 {
1195     uint8_t b1 = reg1.getBase(), b2 = reg2.getBase();
1196     int len = int(b2) + 1 - int(b1);
1197 
1198 #ifdef NGEN_SAFE
1199     if (len < 0) throw invalid_range_exception();
1200 #endif
1201 
1202     return GRFRange(reg1, len);
1203 }
1204 
operator ==(const GRFRange & r1,const GRFRange & r2)1205 static inline bool operator==(const GRFRange &r1, const GRFRange &r2)
1206 {
1207     return (r1.getBase() == r2.getBase()) && (r1.getLen() == r2.getLen());
1208 }
1209 
operator !=(const GRFRange & r1,const GRFRange & r2)1210 static inline bool operator!=(const GRFRange &r1, const GRFRange &r2)
1211 {
1212     return !(r1 == r2);
1213 }
1214 
1215 enum class ConditionModifier {
1216     none = 0,
1217     ze = 1,
1218     eq = 1,
1219     nz = 2,
1220     ne = 2,
1221     gt = 3,
1222     ge = 4,
1223     lt = 5,
1224     le = 6,
1225     ov = 8,
1226     un = 9,
1227     eo = 0xF
1228 };
1229 
1230 #ifdef NGEN_ASM
operator <<(std::ostream & str,ConditionModifier cmod)1231 static inline std::ostream &operator<<(std::ostream &str, ConditionModifier cmod)
1232 {
1233     static const char *names[16] = {"", "eq", "ne", "gt", "ge", "lt", "le", "", "ov", "un", "", "", "", "", "", "eo"};
1234     str << names[static_cast<uint8_t>(cmod) & 0xF];
1235     return str;
1236 }
1237 #endif
1238 
1239 enum class ChannelMask {
1240     rgba = 0,
1241     gba = 1,
1242     rba = 2,
1243     ba = 3,
1244     rga = 4,
1245     bga = 5,
1246     ga = 6,
1247     a = 7,
1248     rgb = 8,
1249     gb = 9,
1250     rb = 10,
1251     b = 11,
1252     rg = 12,
1253     g = 13,
1254     r = 14,
1255 };
1256 
1257 enum class PredCtrl {
1258     None = 0,
1259     Normal = 1,
1260     anyv = 2,
1261     allv = 3,
1262     any2h = 4,
1263     all2h = 5,
1264     any4h = 6,
1265     all4h = 7,
1266     any8h = 8,
1267     all8h = 9,
1268     any16h = 10,
1269     all16h = 11,
1270     any32h = 12,
1271     all32h = 13,
1272     any = 14,
1273     all = 15,
1274     x = 2,
1275     y = 3,
1276     z = 4,
1277     w = 5,
1278 };
1279 
1280 #ifdef NGEN_ASM
toText(PredCtrl ctrl,bool align16)1281 static const char *toText(PredCtrl ctrl, bool align16) {
1282     const char *names[2][16] = {{"", "", "anyv", "allv", "any2h", "all2h", "any4h", "all4h", "any8h", "all8h", "any16h", "all16h", "any32h", "all32h", "any", "all"},
1283                                 {"", "", "x",    "y",    "z",     "w",     "",      "",      "",      "",      "",       "",       "",       "",       "",    ""}};
1284     return names[align16][static_cast<int>(ctrl) & 0xF];
1285 }
1286 #endif
1287 
1288 enum class ThreadCtrl {
1289     Normal = 0,
1290     Atomic = 1,
1291     Switch = 2,
1292     NoPreempt = 3
1293 };
1294 
1295 enum class Opcode {
1296     illegal = 0x00,
1297     sync = 0x01,
1298     mov = 0x01,
1299     sel = 0x02,
1300     movi = 0x03,
1301     not_ = 0x04,
1302     and_ = 0x05,
1303     or_ = 0x06,
1304     xor_ = 0x07,
1305     shr = 0x08,
1306     shl = 0x09,
1307     smov = 0x0A,
1308     asr = 0x0C,
1309     ror = 0x0E,
1310     rol = 0x0F,
1311     cmp = 0x10,
1312     cmpn = 0x11,
1313     csel = 0x12,
1314     bfrev = 0x17,
1315     bfe = 0x18,
1316     bfi1 = 0x19,
1317     bfi2 = 0x1A,
1318     jmpi = 0x20,
1319     brd = 0x21,
1320     if_ = 0x22,
1321     brc = 0x23,
1322     else_ = 0x24,
1323     endif = 0x25,
1324     while_ = 0x27,
1325     break_ = 0x28,
1326     cont = 0x29,
1327     halt = 0x2A,
1328     calla = 0x2B,
1329     call = 0x2C,
1330     ret = 0x2D,
1331     goto_ = 0x2E,
1332     join = 0x2F,
1333     wait = 0x30,
1334     send = 0x31,
1335     sendc = 0x32,
1336     sends = 0x33,
1337     sendsc = 0x34,
1338     math = 0x38,
1339     add = 0x40,
1340     mul = 0x41,
1341     avg = 0x42,
1342     frc = 0x43,
1343     rndu = 0x44,
1344     rndd = 0x45,
1345     rnde = 0x46,
1346     rndz = 0x47,
1347     mac = 0x48,
1348     mach = 0x49,
1349     lzd = 0x4A,
1350     fbh = 0x4B,
1351     fbl = 0x4C,
1352     cbit = 0x4D,
1353     addc = 0x4E,
1354     subb = 0x4F,
1355     sad2 = 0x50,
1356     sada2 = 0x51,
1357     add3 = 0x52,
1358     macl = 0x53,
1359     srnd = 0x54,
1360     dp4 = 0x54,
1361     dph = 0x55,
1362     dp3 = 0x56,
1363     dp2 = 0x57,
1364     dp4a = 0x58,
1365     line = 0x59,
1366     dpas = 0x59,
1367     pln = 0x5A,
1368     dpasw = 0x5A,
1369     mad = 0x5B,
1370     lrp = 0x5C,
1371     madm = 0x5D,
1372     nop_gen12 = 0x60,
1373     mov_gen12 = 0x61,
1374     sel_gen12 = 0x62,
1375     movi_gen12 = 0x63,
1376     not_gen12 = 0x64,
1377     and_gen12 = 0x65,
1378     or_gen12 = 0x66,
1379     xor_gen12 = 0x67,
1380     shr_gen12 = 0x68,
1381     shl_gen12 = 0x69,
1382     smov_gen12 = 0x6A,
1383     bfn = 0x6B,
1384     asr_gen12 = 0x6C,
1385     ror_gen12 = 0x6E,
1386     rol_gen12 = 0x6F,
1387     cmp_gen12 = 0x70,
1388     cmpn_gen12 = 0x71,
1389     csel_gen12 = 0x72,
1390     bfrev_gen12 = 0x77,
1391     bfe_gen12 = 0x78,
1392     bfi1_gen12 = 0x79,
1393     bfi2_gen12 = 0x7A,
1394     nop = 0x7E,
1395     wrdep = 0x7F,   /* not a valid opcode; used internally by nGEN */
1396 };
1397 
isVariableLatency(HW hw,Opcode op)1398 static inline bool isVariableLatency(HW hw, Opcode op)
1399 {
1400     switch (op) {
1401         case Opcode::math:
1402             if (hw >= HW::XeHPC) return false;
1403         case Opcode::send:
1404         case Opcode::sendc:
1405         case Opcode::dpas:
1406         case Opcode::dpasw:
1407             return true;
1408         default:
1409             return false;
1410     }
1411 }
1412 
isBranch(Opcode op)1413 static inline bool isBranch(Opcode op)
1414 {
1415     return (static_cast<int>(op) >> 4) == 2;
1416 }
1417 
1418 #ifdef NGEN_ASM
getMnemonic(Opcode op,HW hw)1419 static const char *getMnemonic(Opcode op, HW hw)
1420 {
1421     const char *names[0x80] = {
1422         "illegal", "sync", "sel", "movi", "not", "and", "or", "xor",
1423         "shr", "shl", "smov", "", "asr", "", "ror", "rol",
1424         "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1425         "bfe", "bfi1", "bfi2", "", "", "", "", "",
1426         "jmpi", "brd", "if", "brc", "else", "endif", "", "while",
1427         "break", "cont", "halt", "calla", "call", "ret", "goto", "join",
1428         "wait", "send", "sendc", "sends", "sendsc", "", "", "",
1429         "math", "", "", "", "", "", "", "",
1430         "add", "mul", "avg", "frc", "rndu", "rndd", "rnde", "rndz",
1431         "mac", "mach", "lzd", "fbh", "fbl", "cbit", "addc", "subb",
1432         "sad2", "sada2", "add3", "macl", "srnd", "dph", "dp3", "dp2",
1433         "dp4a", "dpas", "dpasw", "mad", "lrp", "madm", "", "",
1434         "nop", "mov", "sel", "movi", "not", "and", "or", "xor",
1435         "shr", "shl", "smov", "bfn", "asr", "", "ror", "rol",
1436         "cmp", "cmpn", "csel", "", "", "", "", "bfrev",
1437         "bfe", "bfi1", "bfi2", "", "", "", "nop", ""
1438     };
1439 
1440     const char *mnemonic = names[static_cast<int>(op) & 0x7F];
1441 
1442     if (hw < HW::Gen12LP) switch (op) {
1443         case Opcode::mov:   mnemonic = "mov";   break;
1444         case Opcode::line:  mnemonic = "line";  break;
1445         case Opcode::pln:   mnemonic = "pln";   break;
1446         case Opcode::dp4:   mnemonic = "dp4";   break;
1447         default: break;
1448     }
1449 
1450     return mnemonic;
1451 }
1452 #endif
1453 
1454 class AllPipes {};
1455 enum class Pipe : uint8_t {
1456     Default = 0,
1457     A = 1, All = A,
1458     F = 2, Float = F,
1459     I = 3, Integer = I,
1460     L = 4, Long = L,
1461     M = 5, Math = M,
1462 };
1463 
1464 #ifdef NGEN_ASM
operator <<(std::ostream & str,Pipe pipe)1465 static inline std::ostream &operator<<(std::ostream &str, Pipe pipe)
1466 {
1467     static const char *names[8] = {"", "A", "F", "I", "L", "M", "", ""};
1468     str << names[static_cast<uint8_t>(pipe) & 7];
1469     return str;
1470 }
1471 #endif
1472 
1473 class SWSBInfo
1474 {
1475     friend class InstructionModifier;
1476 
1477 public:
1478     union {
1479         struct {
1480             unsigned token : 6;
1481             unsigned src : 1;
1482             unsigned dst : 1;
1483             unsigned dist : 4;
1484             unsigned pipe : 4;
1485         } parts;
1486         uint16_t all;
1487     };
1488 
hasDist() const1489     constexpr bool hasDist() const       { return parts.dist > 0; }
hasToken() const1490     constexpr bool hasToken() const      { return parts.src || parts.dst; }
hasTokenSet() const1491     constexpr bool hasTokenSet() const   { return parts.src && parts.dst; }
getToken() const1492     constexpr int getToken() const       { return hasToken() ? parts.token : 0; }
tokenMode() const1493     constexpr unsigned tokenMode() const { return (parts.src << 1) | parts.dst; }
getPipe() const1494     constexpr Pipe getPipe() const       { return static_cast<Pipe>(parts.pipe); }
setPipe(Pipe pipe)1495     void setPipe(Pipe pipe)              { parts.pipe = static_cast<unsigned>(pipe); }
empty() const1496     constexpr bool empty() const         { return (all == 0); }
1497 
1498 protected:
SWSBInfo(uint16_t all_)1499     explicit constexpr SWSBInfo(uint16_t all_) : all(all_) {}
1500 
1501 public:
SWSBInfo()1502     constexpr SWSBInfo() : all(0) {}
SWSBInfo(Pipe pipe_,int dist_)1503     constexpr SWSBInfo(Pipe pipe_, int dist_) : all(((dist_ & 0xF) << 8) | (static_cast<unsigned>(pipe_) << 12)) {}
SWSBInfo(int id_,bool src_,bool dst_)1504     constexpr SWSBInfo(int id_, bool src_, bool dst_) : all(id_ | (uint16_t(src_) << 6) | (uint16_t(dst_) << 7)) {}
1505 
operator |(const SWSBInfo & i1,const SWSBInfo & i2)1506     friend constexpr SWSBInfo operator|(const SWSBInfo &i1, const SWSBInfo &i2) { return SWSBInfo(i1.all | i2.all); }
1507 };
1508 
1509 // Token count.
tokenCount(HW hw)1510 constexpr inline int tokenCount(HW hw)
1511 {
1512     return (hw >= HW::XeHPC) ? 32 : 16;
1513 }
1514 
1515 class SBID
1516 {
1517 public:
1518     SWSBInfo set;
1519     SWSBInfo src;
1520     SWSBInfo dst;
1521 
SBID(int id)1522     constexpr SBID(int id) : set(id, true, true), src(id, true, false), dst(id, false, true) {}
operator SWSBInfo() const1523     constexpr operator SWSBInfo() const { return set; }
1524 
getID() const1525     constexpr int getID() const { return set.getToken(); }
1526 };
1527 
getPipe()1528 template <typename T> static constexpr Pipe getPipe() { return (sizeof(T) == 8) ? Pipe::L : Pipe::I; }
getPipe()1529 template <> constexpr Pipe getPipe<float>()           { return Pipe::F; }
getPipe()1530 template <> constexpr Pipe getPipe<void>()            { return Pipe::Default; }
getPipe()1531 template <> constexpr Pipe getPipe<AllPipes>()        { return Pipe::A; }
1532 
SWSB(SWSBInfo info)1533 constexpr SWSBInfo SWSB(SWSBInfo info)                                        { return info; }
SWSB(Pipe pipe,int dist)1534 constexpr SWSBInfo SWSB(Pipe pipe, int dist)                                  { return SWSBInfo(pipe, dist); }
SWSB(int dist)1535 template <typename T = void> constexpr SWSBInfo SWSB(int dist)                { return SWSB(getPipe<T>(), dist); }
SWSB(SWSBInfo info,int dist)1536 template <typename T = void> constexpr SWSBInfo SWSB(SWSBInfo info, int dist) { return SWSB<T>(dist) | info; }
1537 
1538 class InstructionModifier {
1539 protected:
1540     union {
1541         struct {
1542             unsigned execSize : 8;          // Execution size as integer (for internal use).
1543             unsigned accessMode : 1;        // From here on matches the low 64-bits of the binary format for Gen8-11
1544             unsigned noDDClr : 1;
1545             unsigned noDDChk : 1;
1546             unsigned chanOff : 3;
1547             unsigned threadCtrl : 2;
1548             unsigned predCtrl : 4;
1549             unsigned predInv : 1;
1550             unsigned eSizeField : 3;
1551             unsigned cmod : 4;              // Also stores channel mask temporarily for surface r/w
1552             unsigned accWrCtrl : 1;         // = noSrcDepSet for send, = branchCtrl for branch instructions
1553             unsigned cmptCtrl : 1;
1554             unsigned debugCtrl : 1;
1555             unsigned saturate : 1;
1556             unsigned flagSubRegNum : 1;
1557             unsigned flagRegNum : 1;
1558             unsigned maskCtrl : 1;
1559             unsigned _zeros_: 9;
1560             unsigned flagRegNum1 : 1;
1561             unsigned autoSWSB : 1;
1562             unsigned fusionCtrl : 1;        // Gen12
1563             unsigned eot : 1;
1564             unsigned swsb : 16;
1565         } parts;
1566         uint64_t all;
1567     };
1568 
InstructionModifier(uint64_t all_)1569     constexpr InstructionModifier(uint64_t all_) : all(all_) {}
1570 
1571     friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTag12 tag);
1572     friend inline void encodeCommon12(Instruction12 &i, Opcode opcode, const InstructionModifier &mod, const RegData &dst, EncodingTagXeHPC tag);
1573 
1574 public:
getExecSize() const1575     constexpr int getExecSize()            const { return parts.execSize; }
isAlign16() const1576     constexpr bool isAlign16()             const { return parts.accessMode; }
isNoDDClr() const1577     constexpr bool isNoDDClr()             const { return parts.noDDClr; }
isNoDDChk() const1578     constexpr bool isNoDDChk()             const { return parts.noDDChk; }
getChannelOffset() const1579     constexpr int getChannelOffset()       const { return parts.chanOff << 2; }
getThreadCtrl() const1580     constexpr ThreadCtrl getThreadCtrl()   const { return static_cast<ThreadCtrl>(parts.threadCtrl); }
isAtomic() const1581     constexpr bool isAtomic()              const { return getThreadCtrl() == ThreadCtrl::Atomic; }
getPredCtrl() const1582     constexpr PredCtrl getPredCtrl()       const { return static_cast<PredCtrl>(parts.predCtrl); }
isPredInv() const1583     constexpr bool isPredInv()             const { return parts.predInv; }
getCMod() const1584     constexpr ConditionModifier getCMod()  const { return static_cast<ConditionModifier>(parts.cmod); }
isAccWrEn() const1585     constexpr bool isAccWrEn()             const { return parts.accWrCtrl; }
getBranchCtrl() const1586     constexpr bool getBranchCtrl()         const { return parts.accWrCtrl; }
isCompact() const1587     constexpr bool isCompact()             const { return parts.cmptCtrl; }
isBreakpoint() const1588     constexpr bool isBreakpoint()          const { return parts.debugCtrl; }
isSaturate() const1589     constexpr bool isSaturate()            const { return parts.saturate; }
getFlagReg() const1590     constexpr14 FlagRegister getFlagReg()  const { return FlagRegister((parts.flagRegNum1 << 1) | parts.flagRegNum, parts.flagSubRegNum); }
isWrEn() const1591     constexpr bool isWrEn()                const { return parts.maskCtrl; }
isAutoSWSB() const1592     constexpr bool isAutoSWSB()            const { return parts.autoSWSB; }
isSerialized() const1593     constexpr bool isSerialized()          const { return parts.fusionCtrl; }
isEOT() const1594     constexpr bool isEOT()                 const { return parts.eot; }
getSWSB() const1595     constexpr SWSBInfo getSWSB()           const { return SWSBInfo(parts.swsb); }
getAll() const1596     constexpr uint64_t getAll()            const { return all; }
1597 
setExecSize(int execSize_)1598     constexpr14 void setExecSize(int execSize_)              { parts.execSize = execSize_; parts.eSizeField = utils::log2(execSize_); }
setPredCtrl(PredCtrl predCtrl_)1599     constexpr14 void setPredCtrl(PredCtrl predCtrl_)         { parts.predCtrl = static_cast<unsigned>(predCtrl_); }
setPredInv(bool predInv_)1600     constexpr14 void setPredInv(bool predInv_)               { parts.predInv = predInv_; }
setCMod(const ConditionModifier & cmod_)1601     constexpr14 void setCMod(const ConditionModifier &cmod_) { parts.cmod = static_cast<unsigned>(cmod_); }
setBranchCtrl(bool branchCtrl)1602     constexpr14 void setBranchCtrl(bool branchCtrl)          { parts.accWrCtrl = branchCtrl; }
setFlagReg(FlagRegister & flag)1603     constexpr14 void setFlagReg(FlagRegister &flag)          { parts.flagRegNum1 = flag.getBase() >> 1; parts.flagRegNum = flag.getBase() & 1; parts.flagSubRegNum = flag.getOffset(); }
setWrEn(bool maskCtrl_)1604     constexpr14 void setWrEn(bool maskCtrl_)                 { parts.maskCtrl = maskCtrl_; }
setAutoSWSB(bool autoSWSB_)1605     constexpr14 void setAutoSWSB(bool autoSWSB_)             { parts.autoSWSB = autoSWSB_; }
setSWSB(SWSBInfo swsb_)1606     constexpr14 void setSWSB(SWSBInfo swsb_)                 { parts.swsb = swsb_.all; }
setSWSB(uint16_t swsb_)1607     constexpr14 void setSWSB(uint16_t swsb_)                 { parts.swsb = swsb_; }
1608 
InstructionModifier()1609     constexpr InstructionModifier() : all(0) {}
1610 
1611     // Hardcoded shift counts are a workaround for MSVC v140 bug.
InstructionModifier(const PredCtrl & predCtrl_)1612     constexpr /* implicit */ InstructionModifier(const PredCtrl &predCtrl_)
1613         : all{static_cast<uint64_t>(predCtrl_) << 16} {}
1614 
InstructionModifier(const ThreadCtrl & threadCtrl_)1615     constexpr /* implicit */ InstructionModifier(const ThreadCtrl &threadCtrl_)
1616         : all{static_cast<uint64_t>(threadCtrl_) << 14} {}
1617 
InstructionModifier(const ConditionModifier & cmod_)1618     constexpr /* implicit */ InstructionModifier(const ConditionModifier &cmod_)
1619         : all{static_cast<uint64_t>(cmod_) << 24} {}
1620 
InstructionModifier(const int & execSize_)1621     constexpr14 /* implicit */ InstructionModifier(const int &execSize_) : InstructionModifier() {
1622         setExecSize(execSize_);
1623     }
InstructionModifier(const SWSBInfo & swsb)1624     constexpr14 /* implicit */ InstructionModifier(const SWSBInfo &swsb) : InstructionModifier() {
1625         parts.swsb = swsb.all;
1626     }
InstructionModifier(const SBID & sb)1627     constexpr14 /* implicit */ InstructionModifier(const SBID &sb)   : InstructionModifier(SWSB(sb)) {}
1628 
1629 protected:
InstructionModifier(bool accessMode_,bool noDDClr_,bool noDDChk_,unsigned chanOff_,bool accWrCtrl_,bool debugCtrl_,bool saturate_,bool maskCtrl_,bool autoSWSB_,bool fusionCtrl_,bool eot_)1630     constexpr InstructionModifier(bool accessMode_, bool noDDClr_, bool noDDChk_, unsigned chanOff_, bool accWrCtrl_,
1631                                   bool debugCtrl_, bool saturate_, bool maskCtrl_, bool autoSWSB_, bool fusionCtrl_, bool eot_)
1632         : all{(uint64_t(accessMode_) << 8) | (uint64_t(noDDClr_) << 9) | (uint64_t(noDDChk_) << 10) | (uint64_t(chanOff_ >> 2) << 11)
1633             | (uint64_t(accWrCtrl_) << 28) | (uint64_t(debugCtrl_) << 30) | (uint64_t(saturate_) << 31)
1634             | (uint64_t(maskCtrl_) << 34) | (uint64_t(autoSWSB_) << 45) | (uint64_t(fusionCtrl_) << 46) | (uint64_t(eot_) << 47)} {}
1635 
1636 public:
createAccessMode(int accessMode_)1637     static constexpr InstructionModifier createAccessMode(int accessMode_) {
1638         return InstructionModifier(accessMode_, false, false, 0, false, false, false, false, false, false, false);
1639     }
createNoDDClr()1640     static constexpr InstructionModifier createNoDDClr() {
1641         return InstructionModifier(false, true, false, 0, false, false, false, false, false, false, false);
1642     }
createNoDDChk()1643     static constexpr InstructionModifier createNoDDChk() {
1644         return InstructionModifier(false, false, true, 0, false, false, false, false, false, false, false);
1645     }
createChanOff(int offset)1646     static constexpr InstructionModifier createChanOff(int offset) {
1647         return InstructionModifier(false, false, false, offset, false, false, false, false, false, false, false);
1648     }
createAccWrCtrl()1649     static constexpr InstructionModifier createAccWrCtrl() {
1650         return InstructionModifier(false, false, false, 0, true, false, false, false, false, false, false);
1651     }
createDebugCtrl()1652     static constexpr InstructionModifier createDebugCtrl() {
1653         return InstructionModifier(false, false, false, 0, false, true, false, false, false, false, false);
1654     }
createSaturate()1655     static constexpr InstructionModifier createSaturate() {
1656         return InstructionModifier(false, false, false, 0, false, false, true, false, false, false, false);
1657     }
createMaskCtrl(bool maskCtrl_)1658     static constexpr InstructionModifier createMaskCtrl(bool maskCtrl_) {
1659         return InstructionModifier(false, false, false, 0, false, false, false, maskCtrl_, false, false, false);
1660     }
createAutoSWSB()1661     static constexpr InstructionModifier createAutoSWSB() {
1662         return InstructionModifier(false, false, false, 0, false, false, false, false, true, false, false);
1663     }
createSerialized()1664     static constexpr InstructionModifier createSerialized() {
1665         return InstructionModifier(false, false, false, 0, false, false, false, false, false, true, false);
1666     }
createEOT()1667     static constexpr InstructionModifier createEOT() {
1668         return InstructionModifier(false, false, false, 0, false, false, false, false, false, false, true);
1669     }
1670 
1671     friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2);
1672     friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &mod2);
1673     friend constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2);
1674 
1675     friend constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2);
1676 
operator ~()1677     constexpr14 InstructionModifier operator~() {
1678         InstructionModifier mod = *this;
1679         mod.parts.predInv = ~mod.parts.predInv;
1680         return mod;
1681     }
1682 
1683     template <typename T>
operator |=(const T & mod)1684     InstructionModifier &operator|=(const T &mod) {
1685         *this = *this | mod;
1686         return *this;
1687     }
1688 
operator ^=(const InstructionModifier & mod)1689     InstructionModifier &operator^=(const InstructionModifier &mod) {
1690         *this = *this ^ mod;
1691         return *this;
1692     }
1693 };
1694 
operator |(const InstructionModifier & mod1,const InstructionModifier & mod2)1695 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const InstructionModifier &mod2)
1696 {
1697     return InstructionModifier(mod1.all | mod2.all);
1698 }
1699 
1700 
operator |(const InstructionModifier & mod1,const FlagRegister & flag)1701 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const FlagRegister &flag)
1702 {
1703     InstructionModifier mod = mod1;
1704 
1705     mod.parts.flagRegNum1 = flag.getBase() >> 1;
1706     mod.parts.flagRegNum = flag.getBase() & 1;
1707     mod.parts.flagSubRegNum = flag.getOffset();
1708 
1709     if (mod.getCMod() == ConditionModifier::none) {
1710         mod.parts.predInv = flag.getNeg();
1711         mod.parts.predCtrl = static_cast<int>(PredCtrl::Normal);
1712     }
1713 
1714     return mod;
1715 }
1716 
operator |(const InstructionModifier & mod1,const PredCtrl & mod2)1717 inline constexpr14 InstructionModifier operator|(const InstructionModifier &mod1, const PredCtrl &mod2)
1718 {
1719     InstructionModifier mod = mod1;
1720     mod.parts.predCtrl = static_cast<int>(mod2);
1721     return mod;
1722 }
1723 
operator ^(const InstructionModifier & mod1,const InstructionModifier & mod2)1724 inline constexpr14 InstructionModifier operator^(const InstructionModifier &mod1, const InstructionModifier &mod2)
1725 {
1726     return InstructionModifier(mod1.all ^ mod2.all);
1727 }
1728 
1729 class Immediate {
1730 protected:
1731     uint64_t payload;
1732     DataType type;
1733     bool hiddenType = false;
1734 
Immediate(uint64_t payload_,DataType type_)1735     Immediate(uint64_t payload_, DataType type_) : payload(payload_), type(type_) {}
1736 
setPayload(T imm)1737     template <typename T> typename std::enable_if<sizeof(T) == 2>::type setPayload(T imm) {
1738         uint32_t ximm = utils::bitcast<T, uint16_t>(imm);
1739         payload = ximm | (ximm << 16);
1740     }
setPayload(T imm)1741     template <typename T> typename std::enable_if<sizeof(T) == 4>::type setPayload(T imm) {
1742         payload = utils::bitcast<T, uint32_t>(imm);
1743     }
setPayload(T imm)1744     template <typename T> typename std::enable_if<sizeof(T) == 8>::type setPayload(T imm) {
1745         payload = utils::bitcast<T, uint64_t>(imm);
1746     }
1747 
set(T imm)1748     template <typename T> void set(T imm) {
1749         setPayload<T>(imm);
1750         type = getDataType<T>();
1751     }
1752 
shrinkSigned(T imm)1753     template <typename T> void shrinkSigned(T imm) {
1754         if (imm == T(int16_t(imm)))       set<int16_t>(imm);
1755         else if (imm == T(uint16_t(imm))) set<uint16_t>(imm);
1756         else if (imm == T(int32_t(imm)))  set<int32_t>(imm);
1757         else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1758         else                              set(imm);
1759     }
1760 
shrinkUnsigned(T imm)1761     template <typename T> void shrinkUnsigned(T imm) {
1762         if (imm == T(uint16_t(imm)))      set<uint16_t>(imm);
1763         else if (imm == T(uint32_t(imm))) set<uint32_t>(imm);
1764         else                              set(imm);
1765     }
1766 
1767 public:
Immediate()1768     Immediate() : payload(0), type(DataType::invalid) {}
1769 
1770 #ifdef NGEN_ASM
1771     static const bool emptyOp = false;
1772 #endif
1773 
getType() const1774     constexpr14 DataType getType()           const { return type; }
operator uint64_t() const1775     explicit constexpr14 operator uint64_t() const { return payload; }
getMods() const1776     constexpr14 int getMods()                const { return 0; }
isARF() const1777     constexpr14 bool isARF()                 const { return false; }
1778 
setType(DataType type_)1779     Immediate &setType(DataType type_)             { type = type_; return *this; }
1780 
Immediate(uint16_t imm)1781     Immediate(uint16_t imm) { set(imm); }
Immediate(int16_t imm)1782     Immediate(int16_t  imm) { set(imm); }
Immediate(uint32_t imm)1783     Immediate(uint32_t imm) { shrinkUnsigned(imm); }
Immediate(int32_t imm)1784     Immediate(int32_t  imm) { shrinkSigned(imm); }
Immediate(uint64_t imm)1785     Immediate(uint64_t imm) { shrinkUnsigned(imm); }
Immediate(int64_t imm)1786     Immediate(int64_t  imm) { shrinkSigned(imm); }
1787 
Immediate(float imm)1788     Immediate(float    imm) { set(imm); }
Immediate(double imm)1789     Immediate(double   imm) { set(imm); }
1790 #ifdef NGEN_HALF_TYPE
Immediate(half imm)1791     Immediate(half     imm) { set(imm); }
1792 #endif
1793 #ifdef NGEN_BFLOAT16_TYPE
Immediate(bfloat16 imm)1794     Immediate(bfloat16 imm) { set(imm); }
1795 #endif
1796 
hideType() const1797     Immediate hideType() const {
1798         Immediate result = *this;
1799         result.hiddenType = true;
1800         return result;
1801     }
1802 
uw(uint16_t imm)1803     static inline Immediate uw(uint16_t imm) { return Immediate(imm); }
w(int16_t imm)1804     static inline Immediate  w(int16_t  imm) { return Immediate(imm); }
ud(uint32_t imm)1805     static inline Immediate ud(uint32_t imm) { Immediate i; i.set(imm); return i; }
d(int32_t imm)1806     static inline Immediate  d(int32_t  imm) { Immediate i; i.set(imm); return i; }
uq(uint64_t imm)1807     static inline Immediate uq(uint64_t imm) { Immediate i; i.set(imm); return i; }
q(int64_t imm)1808     static inline Immediate  q(int64_t  imm) { Immediate i; i.set(imm); return i; }
f(float imm)1809     static inline Immediate  f(float    imm) { return Immediate(imm); }
df(double imm)1810     static inline Immediate df(double   imm) { return Immediate(imm); }
1811 
hf(uint16_t f)1812     static inline Immediate hf(uint16_t f) {
1813         uint32_t fimm = f;
1814         fimm |= (fimm << 16);
1815         return Immediate(fimm, DataType::hf);
1816     }
1817 
bf(uint16_t f)1818     static inline Immediate bf(uint16_t f) {
1819         uint32_t fimm = f;
1820         fimm |= (fimm << 16);
1821         return Immediate(fimm, DataType::bf);
1822     }
1823 
1824 protected:
toUV(int8_t i)1825     static inline uint32_t toUV(int8_t i) {
1826 #ifdef NGEN_SAFE
1827         if (i & 0xF0) throw invalid_immediate_exception();
1828 #endif
1829         return i;
1830     }
1831 
1832 public:
uv(uint32_t i)1833     static inline Immediate uv(uint32_t i) {
1834         return Immediate(i, DataType::uv);
1835     }
1836 
uv(uint8_t i0,uint8_t i1,uint8_t i2,uint8_t i3,uint8_t i4,uint8_t i5,uint8_t i6,uint8_t i7)1837     static inline Immediate uv(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7) {
1838         uint32_t payload = (toUV(i0) << 0)
1839                          | (toUV(i1) << 4)
1840                          | (toUV(i2) << 8)
1841                          | (toUV(i3) << 12)
1842                          | (toUV(i4) << 16)
1843                          | (toUV(i5) << 20)
1844                          | (toUV(i6) << 24)
1845                          | (toUV(i7) << 28);
1846         return uv(payload);
1847     }
1848 
1849 protected:
toV(int8_t i)1850     static inline uint32_t toV(int8_t i) {
1851 #ifdef NGEN_SAFE
1852         if (i < -8 || i > 7) throw invalid_immediate_exception();
1853 #endif
1854         return (i & 0x7) | ((i >> 4) & 0x8);
1855     }
1856 
1857 public:
v(uint32_t i)1858     static inline Immediate v(uint32_t i) {
1859         return Immediate(i, DataType::v);
1860     }
1861 
v(int8_t i0,int8_t i1,int8_t i2,int8_t i3,int8_t i4,int8_t i5,int8_t i6,int8_t i7)1862     static inline Immediate v(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7) {
1863         uint32_t payload = (toV(i0) << 0)
1864                          | (toV(i1) << 4)
1865                          | (toV(i2) << 8)
1866                          | (toV(i3) << 12)
1867                          | (toV(i4) << 16)
1868                          | (toV(i5) << 20)
1869                          | (toV(i6) << 24)
1870                          | (toV(i7) << 28);
1871         return v(payload);
1872     }
1873 
toVF(float f)1874     static inline uint32_t toVF(float f) {
1875         uint32_t fi = utils::bitcast<float, uint32_t>(f);
1876         int exp = (fi >> 23) & 0xFF;
1877         int new_exp = exp - 127 + 3;
1878 
1879         if (f == 0.) new_exp = 0;
1880 
1881 #ifdef NGEN_SAFE
1882         if ((new_exp & ~7) || (fi & 0x0007FFFF))
1883             throw invalid_immediate_exception();
1884 #endif
1885 
1886         return ((fi >> 24) & 0x80)
1887              | ((new_exp & 0x7) << 4)
1888              | ((fi >> 19) & 0xF);
1889     }
1890 
vf(float f0,float f1,float f2,float f3)1891     static inline Immediate vf(float f0, float f1, float f2, float f3) {
1892         uint32_t payload = (toVF(f0) << 0)
1893                          | (toVF(f1) << 8)
1894                          | (toVF(f2) << 16)
1895                          | (toVF(f3) << 24);
1896 
1897         return Immediate(payload, DataType::vf);
1898     }
1899 
fixup(int execSize,DataType defaultType,bool isDest,int arity) const1900     void fixup(int execSize, DataType defaultType, bool isDest, int arity) const {
1901 #ifdef NGEN_SAFE
1902         if (getBytes(type) > (16 >> arity))
1903             throw invalid_immediate_exception();
1904 #endif
1905     }
1906 
isScalar() const1907     constexpr14 bool isScalar() const {
1908         switch (type) {
1909             case DataType::uv:
1910             case DataType::v:
1911             case DataType::vf:
1912                 return false;
1913             default:
1914                 return true;
1915         }
1916     }
1917 
forceInt32() const1918     Immediate forceInt32() const {
1919         auto result = *this;
1920         if (result.type == DataType::uw)
1921             result.set<uint32_t>(uint16_t(payload));
1922         else if (result.type == DataType::w)
1923             result.set<int32_t>(int16_t(payload));
1924         return result;
1925     }
1926 
1927 #ifdef NGEN_ASM
1928     inline void outputText(std::ostream &str, PrintDetail detail, LabelManager &man) const;
1929 #endif
1930 };
1931 
1932 // Compute ctrl field for bfn instruction.
1933 // e.g. ctrl = getBFNCtrl([](uint8_t a, uint8_t b, uint8_t c) { return (a & b) | (c & ~b); });
1934 template <typename F>
getBFNCtrl(F func)1935 inline uint8_t getBFNCtrl(F func) { return func(0xAA, 0xCC, 0xF0); }
1936 
1937 enum class BarrierType : uint8_t {
1938     ProducerConsumer = 0,
1939     Producer = 1,
1940     Consumer = 2,
1941 };
1942 
1943 /********************************************************************/
1944 /* HDC sends                                                        */
1945 /********************************************************************/
1946 union MessageDescriptor {
1947     uint32_t all;
1948     struct {
1949         unsigned funcCtrl : 19;     /* SF-dependent */
1950         unsigned header : 1;        /* is a header present? */
1951         unsigned responseLen : 5;   /* # of GRFs returned: valid range 0-16 */
1952         unsigned messageLen : 4;    /* # of GRFs sent in src0: valid range 1-15 */
1953         unsigned : 3;
1954     } parts;
1955     struct {
1956         unsigned index : 8;
1957         unsigned rest : 24;
1958     } bti;
1959     struct {
1960         unsigned index : 8;
1961         unsigned elements : 3;
1962         unsigned subtype : 2;
1963         unsigned subtype2 : 1;
1964         unsigned messageType : 5;
1965         unsigned header : 1;
1966         unsigned responseLen : 5;
1967         unsigned messageLen : 4;
1968         unsigned : 3;
1969     } block;
1970     struct {
1971         unsigned index : 8;
1972         unsigned simd16 : 1;
1973         unsigned legacySIMD : 1;
1974         unsigned elements : 2;
1975         unsigned : 1;
1976         unsigned : 1;
1977         unsigned messageType : 5;
1978         unsigned header : 1;
1979         unsigned responseLen : 5;
1980         unsigned messageLen : 4;
1981         unsigned : 3;
1982     } scattered;
1983     struct {
1984         unsigned index : 8;
1985         unsigned subtype : 2;
1986         unsigned elements : 2;
1987         unsigned simd16 : 1;
1988         unsigned : 1;
1989         unsigned messageType : 5;
1990         unsigned header : 1;
1991         unsigned responseLen : 5;
1992         unsigned messageLen : 4;
1993         unsigned : 3;
1994     } a64_scattered;
1995     struct {
1996         unsigned index : 8;
1997         unsigned atomicOp : 4;
1998         unsigned simd8 : 1;         // or data width.
1999         unsigned returnData : 1;
2000         unsigned messageType : 5;
2001         unsigned header : 1;
2002         unsigned responseLen : 5;
2003         unsigned messageLen : 4;
2004         unsigned : 3;
2005     } atomic;
2006     struct {
2007         unsigned index : 8;
2008         unsigned cmask : 4;
2009         unsigned simdMode : 2;
2010         unsigned messageType : 5;
2011         unsigned header : 1;
2012         unsigned responseLen : 5;
2013         unsigned messageLen : 4;
2014         unsigned : 3;
2015     } surface;
2016     struct {
2017         unsigned opcode : 6;
2018         unsigned : 1;
2019         unsigned addrSize : 2;
2020         unsigned dataSize : 3;
2021         unsigned vectSize : 3;
2022         unsigned transpose : 1;
2023         unsigned : 1;
2024         unsigned cache : 3;
2025         unsigned : 9;
2026         unsigned model : 2;
2027         unsigned : 1;
2028     } standardLSC;
2029     struct {
2030         unsigned : 12;
2031         unsigned cmask : 4;
2032         unsigned : 16;
2033     } cmask;
2034     struct {
2035         unsigned : 7;
2036         unsigned vnni : 1;
2037         unsigned : 24;
2038     } block2D;
2039 
MessageDescriptor()2040     MessageDescriptor() : all(0) {}
MessageDescriptor(uint32_t all_)2041     explicit constexpr MessageDescriptor(uint32_t all_) : all(all_) {}
2042 };
2043 
operator |(const MessageDescriptor & desc1,const MessageDescriptor & desc2)2044 inline constexpr MessageDescriptor operator|(const MessageDescriptor &desc1, const MessageDescriptor &desc2) {
2045     return MessageDescriptor{desc1.all | desc2.all};
2046 }
2047 
2048 union ExtendedMessageDescriptor {
2049     uint32_t all;
2050     struct {
2051         unsigned sfid : 5;
2052         unsigned eot : 1;
2053         unsigned extMessageLen : 5;    /* # of GRFs sent in src1: valid range 0-15 (pre-Gen12) */
2054         unsigned : 1;
2055         unsigned : 4;                  /* Part of exFuncCtrl for non-immediate sends */
2056         unsigned exFuncCtrl : 16;
2057     } parts;
2058     struct {
2059         unsigned : 12;
2060         signed offset : 20;
2061     } flat;
2062     struct {
2063         unsigned : 12;
2064         signed offset : 12;
2065         unsigned index : 8;
2066     } bti;
2067     struct {
2068         unsigned : 6;
2069         unsigned index : 26;
2070     } surface;
2071 
ExtendedMessageDescriptor()2072     ExtendedMessageDescriptor() : all(0) {}
operator =(SharedFunction sfid_)2073     ExtendedMessageDescriptor& operator=(SharedFunction sfid_) { parts.sfid = static_cast<int>(sfid_); return *this; }
2074 };
2075 
2076 enum class AtomicOp : uint16_t {
2077     cmpwr_2w = 0x00,
2078     and_ = 0x1801,
2079     or_ = 0x1902,
2080     xor_ = 0x1A03,
2081     mov = 0x0B04,
2082     inc = 0x0805,
2083     dec = 0x0906,
2084     add = 0x0C07,
2085     sub = 0x0D08,
2086     revsub = 0x09,
2087     imax = 0x0F0A,
2088     imin = 0x0E0B,
2089     umax = 0x110C,
2090     umin = 0x100D,
2091     cmpwr = 0x120E,
2092     predec = 0x000F,
2093     fmax = 0x1611,
2094     fmin = 0x1512,
2095     fcmpwr = 0x1713,
2096     fadd = 0x1314,
2097     fsub = 0x1415,
2098     fadd_64b = 0x1316,
2099     fsub_64b = 0x1417,
2100     load = 0x0A00,
2101     store = mov,
2102     cmpxchg = cmpwr,
2103     fcmpxchg = fcmpwr,
2104 };
2105 
operandCount(AtomicOp op)2106 static inline int operandCount(AtomicOp op) {
2107     switch (op) {
2108     case AtomicOp::inc:
2109     case AtomicOp::dec:
2110     case AtomicOp::predec:
2111     case AtomicOp::load:
2112         return 1;
2113     case AtomicOp::cmpwr_2w:
2114     case AtomicOp::cmpwr:
2115     case AtomicOp::fcmpwr:
2116         return 3;
2117     default:
2118         return 2;
2119     }
2120 }
2121 
isFloatAtomicOp(AtomicOp op)2122 static inline constexpr bool isFloatAtomicOp(AtomicOp op) {
2123     return static_cast<int>(op) & 0x10;
2124 }
2125 
2126 // Access types.
2127 enum class Access {Read, Write, AtomicInteger, AtomicFloat};
2128 
2129 // Address models.
2130 enum AddressModel : uint8_t {
2131     ModelInvalid = 0,
2132     ModelBTS = 1,
2133     ModelA32 = 2,
2134     ModelA64 = 4,
2135     ModelSLM = 8,
2136     ModelCC = 0x10,
2137     ModelSC = 0x20,
2138     ModelScratch = 0x40,
2139     ModelSS = 0x80,
2140     ModelBSS = 0x81,
2141 };
2142 
2143 class AddressBase {
2144 protected:
2145     uint32_t index;
2146     AddressModel model;
2147 
AddressBase(uint8_t index_,AddressModel model_)2148     constexpr AddressBase(uint8_t index_, AddressModel model_) : index(index_), model(model_) {}
2149 
2150     static const uint8_t invalidIndex = 0xF0;
2151 
2152 public:
AddressBase()2153     constexpr AddressBase() : AddressBase(invalidIndex, ModelInvalid) {}
2154 
getIndex() const2155     constexpr uint32_t getIndex()     const { return index; }
getModel() const2156     constexpr AddressModel getModel() const { return model; }
2157 
setIndex(uint8_t newIndex)2158     void setIndex(uint8_t newIndex)         { index = newIndex; }
2159 
createBTS(uint8_t index)2160     static constexpr AddressBase createBTS(uint8_t index) {
2161         return AddressBase(index, ModelBTS);
2162     }
createA32(bool coherent)2163     static constexpr AddressBase createA32(bool coherent) {
2164         return AddressBase(coherent ? 0xFF : 0xFD, ModelA32);
2165     }
createA64(bool coherent)2166     static constexpr AddressBase createA64(bool coherent) {
2167         return AddressBase(coherent ? 0xFF : 0xFD, ModelA64);
2168     }
createSLM()2169     static constexpr AddressBase createSLM() {
2170         return AddressBase(0xFE, ModelSLM);
2171     }
createCC(uint8_t index)2172     static constexpr AddressBase createCC(uint8_t index) {
2173         return AddressBase(index, ModelCC);
2174     }
createSC(uint8_t index)2175     static constexpr AddressBase createSC(uint8_t index) {
2176         return AddressBase(index, ModelSC);
2177     }
createSS(uint32_t index)2178     static constexpr AddressBase createSS(uint32_t index) {
2179         return AddressBase(index, ModelSS);
2180     }
createBSS(uint32_t index)2181     static constexpr AddressBase createBSS(uint32_t index) {
2182         return AddressBase(index, ModelBSS);
2183     }
2184 
isRO() const2185     inline constexpr bool isRO() const {
2186         return (getModel() == ModelSC || getModel() == ModelCC);
2187     }
isStateless() const2188     inline constexpr bool isStateless() const {
2189         return model & (ModelA32 | ModelA64);
2190     }
2191 
checkModel(uint8_t allowed)2192     void checkModel(uint8_t allowed) { checkModel(static_cast<AddressModel>(allowed)); }
checkModel(AddressModel allowed)2193     void checkModel(AddressModel allowed) {
2194 #ifdef NGEN_SAFE
2195         if (!(model & allowed))
2196             throw invalid_model_exception();
2197 #endif
2198     }
2199 };
2200 
2201 
2202 class block_hword {
2203 protected:
2204     uint8_t count;
2205 
2206 public:
block_hword(int count_=1)2207     block_hword(int count_ = 1) : count(count_) {};
2208 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2209     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2210     {
2211         int dataGRFCount = count;
2212         if (GRF::bytes(hw) == 64) dataGRFCount = (dataGRFCount + 1) >> 1;
2213 
2214         base.checkModel(ModelA64 | ModelBTS | ModelA32 | ModelSLM);
2215         desc.all = 0;
2216         desc.bti.index = base.getIndex();
2217         desc.block.elements = 1 + utils::log2(count);
2218         desc.block.header = true;
2219         desc.block.messageLen = 1;
2220         desc.block.responseLen = dataGRFCount;
2221 
2222         if (base.getModel() == ModelA64) {
2223             exdesc = SharedFunction::dc1;
2224             desc.block.subtype = 0x3;
2225             desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2226         } else {
2227             exdesc = SharedFunction::dc0;
2228             desc.block.messageType = 0x1;
2229             desc.block.subtype2 = 1;
2230         }
2231     }
2232 };
2233 
2234 class block_oword {
2235 protected:
2236     uint8_t count;
2237     uint8_t highHalf;
2238 
block_oword(uint8_t count_,bool highHalf_)2239     constexpr block_oword(uint8_t count_, bool highHalf_) : count(count_), highHalf(highHalf_) {}
2240 
2241 public:
block_oword(int count_=1)2242     block_oword(int count_ = 1) : count(count_), highHalf(false) {}
high()2243     static block_oword high() { return block_oword(1, true); }
2244 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2245     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2246     {
2247         int dataGRFCount = (GRF::bytes(hw) == 64) ? (count + 3) >> 2 : (count + 1) >> 1;
2248 
2249         base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelCC | ModelSLM);
2250         exdesc = (base.getModel() == ModelCC)  ? SharedFunction::dcro :
2251                  (base.getModel() == ModelA64) ? SharedFunction::dc1  :
2252                                                  SharedFunction::dc0;
2253 
2254         desc.all = 0;
2255         desc.bti.index = base.getIndex();
2256         desc.parts.header = true;
2257         desc.parts.messageLen = 1;
2258         desc.parts.responseLen = dataGRFCount;
2259         desc.block.elements = (count == 1) ? highHalf : (1 + utils::log2(count));
2260 
2261         if (base.getModel() == ModelA64)
2262             desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2263         else
2264             desc.block.messageType = (access == Access::Write) << 3;
2265     }
2266 };
2267 
2268 class aligned_block_oword {
2269 protected:
2270     uint8_t count;
2271     uint8_t highHalf;
2272 
aligned_block_oword(uint8_t count_,bool highHalf_)2273     constexpr aligned_block_oword(uint8_t count_, bool highHalf_) : count(count_), highHalf(highHalf_) {}
2274 
2275 public:
aligned_block_oword(int count_=1)2276     aligned_block_oword(int count_ = 1) : count(count_), highHalf(false) {}
high()2277     static aligned_block_oword high() { return aligned_block_oword(1, true); }
2278 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2279     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2280     {
2281         int dataGRFCount = (GRF::bytes(hw) == 64) ? (count + 3) >> 2 : (count + 1) >> 1;
2282 
2283         base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelCC | ModelSLM | ModelSC);
2284         exdesc = (base.getModel() == ModelCC || base.getModel() == ModelSC) ? SharedFunction::dcro :
2285                                               (base.getModel() == ModelA64) ? SharedFunction::dc1 :
2286                                                                               SharedFunction::dc0;
2287 
2288         desc.all = 0;
2289         desc.bti.index = base.getIndex();
2290         desc.parts.header = true;
2291         desc.parts.messageLen = 1;
2292         desc.parts.responseLen = dataGRFCount;
2293         desc.block.elements = (count == 1) ? highHalf : (1 + utils::log2(count));
2294 
2295         if (base.getModel() == ModelA64) {
2296             desc.block.messageType = (access == Access::Write) ? 0x15 : 0x14;
2297             desc.block.subtype = 1;
2298         } else if (base.getModel() == ModelSC)
2299             desc.block.messageType = 4;
2300         else
2301             desc.block.messageType = ((access == Access::Write) << 3) + 1;
2302     }
2303 };
2304 
2305 class scattered_byte {
2306 protected:
2307     uint8_t count;
2308 
2309 public:
scattered_byte(int count_=1)2310     scattered_byte(int count_ = 1) : count(count_) {}
2311 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2312     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2313     {
2314         bool a64 = (base.getModel() == ModelA64);
2315         int simd16 = mod.getExecSize() >> 4;
2316         int dataGRFCount = 1 + simd16;
2317         int addrGRFCount = dataGRFCount << int(a64);
2318         if (GRF::bytes(hw) == 64) {
2319             dataGRFCount = 1;
2320             addrGRFCount = 1 << int(a64);
2321             simd16 = 1;
2322         }
2323 
2324         base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2325         desc.all = 0;
2326         desc.bti.index = base.getIndex();
2327         desc.parts.header = false;
2328         desc.parts.messageLen = addrGRFCount;
2329         desc.parts.responseLen = dataGRFCount;
2330 
2331         if (a64) {
2332             exdesc = SharedFunction::dc1;
2333             desc.a64_scattered.elements = utils::log2(count);
2334             desc.a64_scattered.simd16 = simd16;
2335             desc.a64_scattered.subtype = 0;
2336         } else {
2337             exdesc = SharedFunction::dc0;
2338             desc.scattered.elements = utils::log2(count);
2339             desc.scattered.simd16 = simd16;
2340         }
2341 
2342         if (access == Access::Write)
2343             desc.scattered.messageType = a64 ? 0x1A : 0xC;
2344         else
2345             desc.scattered.messageType = a64 ? 0x10 : 0x4;
2346     }
2347 };
2348 
2349 class scattered_atomic {
2350 public:
applyAtomicOp(AtomicOp op,const RegData & dst,MessageDescriptor & desc) const2351     void applyAtomicOp(AtomicOp op, const RegData &dst, MessageDescriptor &desc) const
2352     {
2353         desc.atomic.returnData = !dst.isNull();
2354         desc.atomic.atomicOp = static_cast<int>(op) & 0xF;
2355     }
2356 };
2357 
2358 class scattered_word : public scattered_atomic {
2359 public:
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2360     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2361     {
2362         bool a64 = (base.getModel() == ModelA64);
2363         int simd16 = mod.getExecSize() >> 4;
2364         int addrGRFCount = (1 + simd16) << int(a64);
2365         int dataGRFCount = 1 + simd16;
2366         if (GRF::bytes(hw) == 64) {
2367             addrGRFCount = 1 << int(a64);
2368             dataGRFCount = 1;
2369             simd16 = 1;
2370         }
2371 
2372 #ifdef NGEN_SAFE
2373         if (!(access == Access::AtomicInteger || access == Access::AtomicFloat))
2374             throw invalid_load_store_exception();
2375 #endif
2376         base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2377         exdesc = SharedFunction::dc1;
2378         desc.all = 0;
2379         desc.bti.index = base.getIndex();
2380         desc.parts.header = false;
2381         desc.parts.messageLen = addrGRFCount;
2382         desc.parts.responseLen = dataGRFCount;
2383 
2384         if (access == Access::AtomicFloat)
2385             desc.atomic.messageType = a64 ? 0x1E : 0x1C;
2386         else
2387             desc.atomic.messageType = a64 ? 0x13 : 0x03;
2388 
2389         desc.atomic.simd8 = a64 ? 0 : !simd16;
2390     }
2391 };
2392 
2393 class scattered_dword : public scattered_atomic {
2394 protected:
2395     uint8_t count;
2396 
2397 public:
scattered_dword(int count_=1)2398     scattered_dword(int count_ = 1) : count(count_) {}
2399 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2400     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2401     {
2402         bool a64 = (base.getModel() == ModelA64);
2403         int simd16 = mod.getExecSize() >> 4;
2404         int addrGRFCount = (1 + simd16) << int(a64);
2405         int dataGRFCount = count * (1 + simd16);
2406         if (GRF::bytes(hw) == 64) {
2407             addrGRFCount = 1 << int(a64);
2408             dataGRFCount = count;
2409             simd16 = 1;
2410         }
2411 
2412         desc.all = 0;
2413         desc.bti.index = base.getIndex();
2414         desc.parts.header = false;
2415         desc.parts.messageLen = addrGRFCount;
2416         desc.parts.responseLen = dataGRFCount;
2417 
2418         if (access == Access::AtomicInteger || access == Access::AtomicFloat) {
2419             base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2420             exdesc = SharedFunction::dc1;
2421             if (access == Access::AtomicFloat)
2422                 desc.atomic.messageType = a64 ? 0x1D : 0x1B;
2423             else
2424                 desc.atomic.messageType = a64 ? 0x12 : 0x02;
2425             desc.atomic.simd8 = a64 ? 0 : !simd16;
2426         } else if (a64) {
2427             exdesc = SharedFunction::dc1;
2428             desc.a64_scattered.elements = utils::log2(count);
2429             desc.a64_scattered.simd16 = simd16;
2430             desc.a64_scattered.subtype = 0x1;
2431             desc.a64_scattered.messageType = (access == Access::Write) ? 0x1A : 0x10;
2432         } else {
2433             base.checkModel(ModelA32 | ModelBTS | ModelCC);
2434             exdesc = (base.getModel() == ModelCC) ? SharedFunction::dcro : SharedFunction::dc0;
2435             desc.scattered.elements = utils::log2(count);
2436             desc.scattered.legacySIMD = 1;
2437             desc.scattered.simd16 = simd16;
2438             desc.scattered.messageType = (access == Access::Write) ? 0xB : 0x3;
2439         }
2440     }
2441 };
2442 
2443 class scattered_qword : public scattered_atomic {
2444 protected:
2445     uint8_t count;
2446 
2447 public:
scattered_qword(int count_=1)2448     scattered_qword(int count_ = 1) : count(count_) {}
2449 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2450     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2451     {
2452         bool a64 = (base.getModel() == ModelA64);
2453         int simd16 = mod.getExecSize() >> 4;
2454         int addrGRFCount = (1 + simd16) << int(a64);
2455         int dataGRFCount = count * 2 * (1 + simd16);
2456         if (GRF::bytes(hw) == 64) {
2457             addrGRFCount = 1 << int(a64);
2458             dataGRFCount = count * 2;
2459             simd16 = 1;
2460         }
2461 
2462         base.checkModel(ModelA32 | ModelA64 | ModelBTS | ModelSLM);
2463         desc.all = 0;
2464         desc.bti.index = base.getIndex();
2465         desc.parts.header = false;
2466         desc.parts.messageLen = addrGRFCount;
2467         desc.parts.responseLen = dataGRFCount;
2468 
2469         if (access == Access::AtomicInteger || access == Access::AtomicFloat) {
2470             // Note: atomics have same encoding as scattered dword. The atomic operation type
2471             //   determines the length. The one exception is A64 atomic float.
2472             exdesc = SharedFunction::dc1;
2473             if (access == Access::AtomicFloat) {
2474                 desc.atomic.messageType = a64 ? 0x1D : 0x1B;
2475                 desc.atomic.simd8 = a64 ? 0 : !simd16;
2476             } else {
2477                 desc.atomic.messageType = a64 ? 0x12 : 0x02;
2478                 desc.atomic.simd8 = a64 ? 1 : !simd16;
2479             }
2480         } else if (a64) {
2481             exdesc = SharedFunction::dc1;
2482             desc.a64_scattered.elements = utils::log2(count);
2483             desc.a64_scattered.simd16 = simd16;
2484             desc.a64_scattered.subtype = 0x2;
2485             desc.a64_scattered.messageType = (access == Access::Write) ? 0x1A : 0x10;
2486         } else {
2487             exdesc = SharedFunction::dc0;
2488             desc.scattered.elements = utils::log2(count);
2489             desc.scattered.legacySIMD = 1;
2490             desc.scattered.simd16 = simd16;
2491             desc.scattered.messageType = (access == Access::Write) ? 0xD : 0x5;
2492         }
2493     }
2494 };
2495 
2496 class surface_dword {
2497 protected:
2498     ChannelMask cmask;
2499     bool structured;
2500 
2501 public:
surface_dword(ChannelMask cmask_=ChannelMask::r,bool structured_=false)2502     surface_dword(ChannelMask cmask_ = ChannelMask::r, bool structured_ = false) : cmask(cmask_), structured(structured_) {}
2503 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2504     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2505     {
2506         int simd16 = mod.getExecSize() >> 4;
2507         if (GRF::bytes(hw) == 64) simd16 = 1;
2508         int nChannels = utils::popcnt(0xF ^ static_cast<int8_t>(cmask));
2509         bool isA64 = base.getModel() == ModelA64;
2510         int addrGRFCount = (1 + simd16) << int(isA64) << int(structured);
2511         int dataGRFCount = nChannels * (1 + simd16);
2512         if (GRF::bytes(hw) == 64) {
2513             addrGRFCount = (addrGRFCount + 1) >> 1;
2514             dataGRFCount = (dataGRFCount + 1) >> 1;
2515         }
2516 
2517         base.checkModel(ModelBTS | ModelA32 | ModelA64 | ModelSLM);
2518 
2519         exdesc = SharedFunction::dc1;
2520 
2521         desc.all = 0;
2522         desc.bti.index = base.getIndex();
2523         desc.parts.header = false;
2524         desc.parts.messageLen = addrGRFCount;
2525         desc.parts.responseLen = dataGRFCount;
2526         desc.surface.messageType = (isA64 << 4) | ((access == Access::Write) << 3) | 0x01;
2527         desc.surface.cmask = static_cast<int>(cmask);
2528         desc.surface.simdMode = 2 - simd16;
2529     }
2530 };
2531 
2532 class media_block {
2533 protected:
2534     bool vls_override;
2535     uint8_t vls_offset;
2536     uint8_t width;
2537     uint8_t height;
2538 
2539 public:
media_block(int width_,int height_)2540     media_block(int width_, int height_) : vls_override(false), vls_offset(0),
2541         width(width_), height(height_) {}
media_block(int width_,int height_,int vls_offset_)2542     media_block(int width_, int height_, int vls_offset_) : vls_override(true),
2543         vls_offset(vls_offset_), width(width_), height(height_) {}
media_block()2544     media_block() : media_block(0, 0) {}
2545 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const RegData & addr) const2546     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const RegData &addr) const
2547     {
2548         exdesc = SharedFunction::dc1;
2549         desc.all = 0;
2550         desc.bti.index = base.getIndex();
2551         desc.block.messageType = (base.getModel() == ModelSC) ? 0x05 :
2552                                     (access == Access::Write) ? 0x0A :
2553                                                                 0x04;
2554         desc.block.elements = (vls_override << 2) | (vls_offset & 1);
2555         desc.block.header = true;
2556 
2557         int dataGRFCount = 0;
2558         if (width > 0) {
2559             int lg2_rows_per_2grf = std::min<int>(4, 6 - utils::bsr(width));
2560             dataGRFCount = utils::roundup_pow2((height + (1 << lg2_rows_per_2grf) - 1) >> lg2_rows_per_2grf);
2561         }
2562 
2563         desc.parts.responseLen = dataGRFCount;
2564         desc.parts.messageLen = 1;
2565     }
2566 };
2567 
2568 /********************************************************************/
2569 /* New dataport messages.                                           */
2570 /********************************************************************/
2571 enum class LSCOpcode : uint8_t {
2572     load = 0,
2573     load_cmask = 2,
2574     store = 4,
2575     store_cmask = 6,
2576     atomic_inc = 8,
2577     atomic_dec = 9,
2578     atomic_load = 0xA,
2579     atomic_store = 0xB,
2580     atomic_add = 0xC,
2581     atomic_sub = 0xD,
2582     atomic_min = 0xE,
2583     atomic_max = 0xF,
2584     atomic_umin = 0x10,
2585     atomic_umax = 0x11,
2586     atomic_cmpxchg = 0x12,
2587     atomic_fadd = 0x13,
2588     atomic_fsub = 0x14,
2589     atomic_fmin = 0x15,
2590     atomic_fmax = 0x16,
2591     atomic_fcmpxchg = 0x17,
2592     atomic_and = 0x18,
2593     atomic_or = 0x19,
2594     atomic_xor = 0x1A,
2595     load_status = 0x1B,
2596     store_uncompressed = 0x1C,
2597     ccs_update = 0x1D,
2598     rsi = 0x1E,
2599     fence = 0x1F,
2600     load_block = 1,
2601     load_2dblock = 3,
2602     store_block = 5,
2603     store_2dblock = 7,
2604 };
2605 
2606 enum class DataSizeLSC : uint16_t {
2607     D8 = 0x0100,
2608     D16 = 0x0201,
2609     D32 = 0x0402,
2610     D64 = 0x0803,
2611     D8U32 = 0x0404,
2612     D16U32 = 0x0405,
2613 };
2614 
getRegisterWidth(DataSizeLSC dsize)2615 static inline constexpr unsigned getRegisterWidth(DataSizeLSC dsize) {
2616     return static_cast<uint16_t>(dsize) >> 8;
2617 }
2618 
2619 enum class CacheSettingsLSC : uint8_t {
2620     Default   = 0,
2621     L1UC_L3UC = 1,
2622     L1UC_L3C  = 2,    L1UC_L3WB = 2,
2623     L1C_L3UC  = 3,    L1WT_L3UC = 3,
2624     L1C_L3C   = 4,    L1WT_L3WB = 4,
2625     L1S_L3UC  = 5,
2626     L1S_L3C   = 6,    L1S_L3WB  = 6,
2627     L1IAR_L3C = 7,    L1WB_L3WB = 7,
2628 };
2629 
2630 struct DataSpecLSC {
2631     MessageDescriptor desc;
2632     uint8_t vcount = 0;
2633     uint8_t dbytes = 0;
2634 
2635     enum { AddrSize16 = 1, AddrSize32 = 2, AddrSize64 = 3 };
2636     enum { AddrFlat = 0, AddrSS = 1, AddrBSS = 2, AddrBTI = 3 };
2637 
DataSpecLSCngen::DataSpecLSC2638     explicit constexpr DataSpecLSC(MessageDescriptor desc_, uint8_t vcount_ = 0, uint8_t dbytes_ = 0) : desc(desc_), vcount(vcount_), dbytes(dbytes_) {}
DataSpecLSCngen::DataSpecLSC2639     /* implicit */ DataSpecLSC(ChannelMask m) {
2640         desc.standardLSC.opcode = static_cast<uint8_t>(LSCOpcode::load_cmask);
2641         desc.cmask.cmask = static_cast<uint8_t>(m) ^ 0xF;
2642         vcount = utils::popcnt(desc.cmask.cmask);
2643     }
DataSpecLSCngen::DataSpecLSC2644     /* implicit */ DataSpecLSC(CacheSettingsLSC s) {
2645         desc.standardLSC.cache = static_cast<unsigned>(s);
2646     }
DataSpecLSCngen::DataSpecLSC2647     /* implicit */ constexpr DataSpecLSC(DataSizeLSC d) : desc((static_cast<uint32_t>(d) & 0x7) << 9), dbytes(getRegisterWidth(d)) {}
2648 
operator ()ngen::DataSpecLSC2649     DataSpecLSC operator()(int vcount) const {
2650         auto vsEncoded = (vcount <= 4) ? (vcount - 1) : (utils::log2(vcount) + 1);
2651         return *this | createV(vcount, vsEncoded);
2652     }
2653     friend inline constexpr DataSpecLSC operator|(const DataSpecLSC &s1, const DataSpecLSC &s2);
operator |=ngen::DataSpecLSC2654     constexpr14 DataSpecLSC &operator|=(const DataSpecLSC &other) {
2655         *this = *this | other;
2656         return *this;
2657     }
2658 
createVngen::DataSpecLSC2659     static constexpr DataSpecLSC createV(unsigned vcount, unsigned venc) { return DataSpecLSC{MessageDescriptor(venc << 12), uint8_t(vcount), 0}; }
createTransposengen::DataSpecLSC2660     static constexpr DataSpecLSC createTranspose()                       { return DataSpecLSC{MessageDescriptor(1 << 15)}; }
createVNNIngen::DataSpecLSC2661     static constexpr DataSpecLSC createVNNI()                            { return DataSpecLSC{MessageDescriptor(1 << 7)}; }
2662 
getDescriptorsngen::DataSpecLSC2663     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const GRFDisp &addr) const
2664     {
2665         bool a64 = (base.getModel() == ModelA64);
2666         desc = this->desc;
2667         exdesc = (base.getModel() == ModelSLM) ? SharedFunction::slm : SharedFunction::ugm;
2668 
2669         desc.standardLSC.addrSize = a64 ? AddrSize64 : AddrSize32;
2670 
2671         if (base.getModel() == ModelA32) base = AddressBase::createBTS(0xFF);
2672 
2673         switch (base.getModel()) {
2674             case ModelA64:
2675             case ModelSLM:
2676                 desc.standardLSC.model = AddrFlat;
2677                 exdesc.flat.offset = addr.getDisp();
2678                 break;
2679             case ModelBTS:
2680                 desc.standardLSC.model = AddrBTI;
2681                 exdesc.bti.index = base.getIndex();
2682                 exdesc.bti.offset = addr.getDisp();
2683                 break;
2684             case ModelSS:
2685             case ModelBSS:
2686                 desc.standardLSC.model = (base.getModel() == ModelSS ? AddrSS : AddrBSS);
2687                 exdesc.surface.index = base.getIndex();
2688                 break;
2689             default:
2690 #ifdef NGEN_SAFE
2691                 throw invalid_model_exception();
2692 #endif
2693                 break;
2694         }
2695 
2696         auto vc = std::max<unsigned>(vcount, 1);
2697         if (this->desc.standardLSC.transpose && !desc.standardLSC.opcode) {
2698             desc.parts.messageLen = 1;
2699             desc.parts.responseLen = GRF::bytesToGRFs(hw, dbytes * vc);
2700         } else {
2701             auto effSIMDGRFs = 1 + ((mod.getExecSize()) >> (GRF::log2Bytes(hw) - 1));
2702             desc.parts.messageLen = effSIMDGRFs * (a64 ? 2 : 1);
2703             desc.parts.responseLen = effSIMDGRFs * vc * (1 + (dbytes >> 3));
2704         }
2705 
2706         if (access == Access::Write)
2707             desc.standardLSC.opcode |= static_cast<uint8_t>(LSCOpcode::store);
2708     }
2709 
applyAtomicOpngen::DataSpecLSC2710     void applyAtomicOp(AtomicOp op, const RegData &dst, MessageDescriptor &desc) const
2711     {
2712         desc.standardLSC.opcode = static_cast<uint16_t>(op) >> 8;
2713     }
2714 };
2715 
scattered(const DataSpecLSC & dtype,int vsize=1)2716 static inline DataSpecLSC scattered(const DataSpecLSC &dtype, int vsize = 1) { return dtype(vsize); }
block(const DataSpecLSC & dtype,int vsize=1)2717 static inline DataSpecLSC block(const DataSpecLSC &dtype, int vsize = 1) { return dtype(vsize) | DataSpecLSC::createTranspose(); }
2718 
operator |(const DataSpecLSC & s1,const DataSpecLSC & s2)2719 inline constexpr DataSpecLSC operator|(const DataSpecLSC &s1, const DataSpecLSC &s2) {
2720     return DataSpecLSC{s1.desc | s2.desc, uint8_t(s1.vcount | s2.vcount), uint8_t(s1.dbytes | s2.dbytes)};
2721 }
2722 
2723 class block_2d : public DataSpecLSC {
2724 protected:
2725     uint8_t width, height, count;
2726 
2727 public:
block_2d(const DataSpecLSC & dtype_,int width_,int height_,int count_=1)2728     block_2d(const DataSpecLSC &dtype_, int width_, int height_, int count_ = 1) : DataSpecLSC(dtype_), width(width_), height(height_), count(count_) {}
2729 
operator |(block_2d left,const DataSpecLSC & right)2730     friend block_2d operator|(block_2d left, const DataSpecLSC &right) {
2731         left.DataSpecLSC::operator|=(right);
2732         return left;
2733     }
2734 
getDescriptors(HW hw,const InstructionModifier & mod,AddressBase base,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const GRFDisp & addr) const2735     template <Access access> void getDescriptors(HW hw, const InstructionModifier &mod, AddressBase base, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc, const GRFDisp &addr) const
2736     {
2737         base.checkModel(ModelA64);
2738 
2739         desc = this->desc;
2740 
2741         desc.standardLSC.opcode = static_cast<uint8_t>((access == Access::Write) ? LSCOpcode::store_2dblock : LSCOpcode::load_2dblock);
2742         desc.standardLSC.model = AddrFlat;
2743 
2744         auto w = width, h = height;
2745         if (this->desc.standardLSC.transpose) std::swap(w, h);
2746         desc.parts.messageLen = 1;
2747         desc.parts.responseLen = std::min(count * GRF::bytesToGRFs(hw, utils::roundup_pow2(w) * h * this->dbytes), 31);
2748 
2749         exdesc = SharedFunction::ugm;
2750         exdesc.flat.offset = addr.getDisp();
2751     }
2752 };
2753 
2754 // Generate descriptors for a load operation.
2755 template <typename DataSpec, typename Addr>
encodeLoadDescriptors(HW hw,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const InstructionModifier & mod,const RegData & dst,const DataSpec & spec,AddressBase base,const Addr & addr)2756 static inline void encodeLoadDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2757     const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const Addr &addr)
2758 {
2759     spec.template getDescriptors<Access::Read>(hw, mod, base, desc, exdesc, addr);
2760     if (dst.isNull())
2761         desc.parts.responseLen = 0;
2762 }
2763 
2764 // Generate descriptors for a store operation. Requires split send for pre-Gen12.
2765 template <typename DataSpec, typename Addr>
encodeStoreDescriptors(HW hw,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,const InstructionModifier & mod,const DataSpec & spec,AddressBase base,const Addr & addr)2766 static inline void encodeStoreDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2767     const InstructionModifier &mod, const DataSpec &spec, AddressBase base, const Addr &addr)
2768 {
2769 #ifdef NGEN_SAFE
2770     if (base.isRO()) throw read_only_exception();
2771 #endif
2772 
2773     spec.template getDescriptors<Access::Write>(hw, mod, base, desc, exdesc, addr);
2774     exdesc.parts.extMessageLen = desc.parts.responseLen;
2775     desc.parts.responseLen = 0;
2776 }
2777 
2778 // Generate descriptors for an atomic operation. Requires split send for binary and ternary atomics pre-Gen12.
2779 template <typename DataSpec, typename Addr>
encodeAtomicDescriptors(HW hw,MessageDescriptor & desc,ExtendedMessageDescriptor & exdesc,AtomicOp op,const InstructionModifier & mod,const RegData & dst,const DataSpec & spec,AddressBase base,const Addr & addr)2780 static inline void encodeAtomicDescriptors(HW hw, MessageDescriptor &desc, ExtendedMessageDescriptor &exdesc,
2781     AtomicOp op, const InstructionModifier &mod, const RegData &dst, const DataSpec &spec, AddressBase base, const Addr &addr)
2782 {
2783     if (isFloatAtomicOp(op))
2784         spec.template getDescriptors<Access::AtomicFloat>(hw, mod, base, desc, exdesc, addr);
2785     else
2786         spec.template getDescriptors<Access::AtomicInteger>(hw, mod, base, desc, exdesc, addr);
2787 
2788     spec.applyAtomicOp(op, dst, desc);
2789 
2790     exdesc.parts.extMessageLen = desc.parts.responseLen * (operandCount(op) - 1);
2791     if (dst.isNull())
2792         desc.parts.responseLen = 0;
2793 }
2794 
2795 } /* namespace ngen */
2796 
2797 
2798 #endif /* header guard */
2799