1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 /*
18  * Do not #include this file directly; ngen uses it internally.
19  */
20 
21 
22 // Pseudo-instructions and macros.
23 template <typename DT = void>
min_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)24 void min_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
25     sel(mod | lt | f0[0], dst, src0, src1);
26 }
27 template <typename DT = void>
min_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)28 void min_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
29     sel(mod | lt | f0[0], dst, src0, src1);
30 }
31 #ifndef NGEN_WINDOWS_COMPAT
32 template <typename DT = void>
min(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)33 void min(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
34     sel(mod | lt | f0[0], dst, src0, src1);
35 }
36 template <typename DT = void>
min(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)37 void min(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
38     sel(mod | lt | f0[0], dst, src0, src1);
39 }
40 #endif
41 template <typename DT = void>
max_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)42 void max_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
43     sel(mod | ge | f0[0], dst, src0, src1);
44 }
45 template <typename DT = void>
max_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)46 void max_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
47     sel(mod | ge | f0[0], dst, src0, src1);
48 }
49 #ifndef NGEN_WINDOWS_COMPAT
50 template <typename DT = void>
max(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)51 void max(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
52     sel(mod | ge | f0[0], dst, src0, src1);
53 }
54 template <typename DT = void>
max(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)55 void max(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
56     sel(mod | ge | f0[0], dst, src0, src1);
57 }
58 #endif
59 
60 template <typename DT = void>
bfi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2,const RegData & src3)61 void bfi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2, const RegData &src3) {
62     bfi1(mod, dst, src0, src1);
63     bfi2(mod, dst, dst, src2, src3);
64 }
65 
66 // Brief compare instructions.
67 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & src0,const RegData & src1)68 void cmp(const InstructionModifier &mod, const RegData &src0, const RegData &src1) {
69     auto dt = getDataType<DT>();
70     if (dt == DataType::invalid)
71         dt = src0.getType();
72     cmp<DT>(mod, null.retype(dt), src0, src1);
73 }
74 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & src0,const Immediate & src1)75 void cmp(const InstructionModifier &mod, const RegData &src0, const Immediate &src1) {
76     auto dt = getDataType<DT>();
77     if (dt == DataType::invalid)
78         dt = src0.getType();
79     cmp<DT>(mod, null.retype(dt), src0, src1);
80 }
81 
82 // Brief math instructions.
83 template <typename DT = void>
cos(const InstructionModifier & mod,const RegData & dst,const RegData & src0)84 void cos(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
85     math<DT>(mod, MathFunction::cos, dst, src0);
86 }
87 template <typename DT = void>
exp(const InstructionModifier & mod,const RegData & dst,const RegData & src0)88 void exp(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
89     math<DT>(mod, MathFunction::exp, dst, src0);
90 }
91 template <typename DT = void>
fdiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)92 void fdiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
93     math<DT>(mod, MathFunction::fdiv, dst, src0, src1);
94 }
95 template <typename DT = void>
fdiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)96 void fdiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
97     math<DT>(mod, MathFunction::fdiv, dst, src0, src1);
98 }
99 template <typename DT = void>
idiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)100 void idiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
101     math<DT>(mod, MathFunction::idiv, dst, src0, src1);
102 }
103 template <typename DT = void>
idiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)104 void idiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
105     math<DT>(mod, MathFunction::idiv, dst, src0, src1);
106 }
107 template <typename DT = void>
inv(const InstructionModifier & mod,const RegData & dst,const RegData & src0)108 void inv(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
109     math<DT>(mod, MathFunction::inv, dst, src0);
110 }
111 template <typename DT = void>
invm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)112 void invm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
113     math<DT>(mod, MathFunction::invm, dst, src0, src1);
114 }
115 template <typename DT = void>
iqot(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)116 void iqot(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
117     math<DT>(mod, MathFunction::iqot, dst, src0, src1);
118 }
119 template <typename DT = void>
iqot(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)120 void iqot(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
121     math<DT>(mod, MathFunction::iqot, dst, src0, src1);
122 }
123 template <typename DT = void>
irem(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)124 void irem(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
125     math<DT>(mod, MathFunction::irem, dst, src0, src1);
126 }
127 template <typename DT = void>
irem(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)128 void irem(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
129     math<DT>(mod, MathFunction::irem, dst, src0, src1);
130 }
131 template <typename DT = void>
log(const InstructionModifier & mod,const RegData & dst,const RegData & src0)132 void log(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
133     math<DT>(mod, MathFunction::log, dst, src0);
134 }
135 template <typename DT = void>
pow(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)136 void pow(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
137     math<DT>(mod, MathFunction::pow, dst, src0, src1);
138 }
139 template <typename DT = void>
pow(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)140 void pow(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
141     math<DT>(mod, MathFunction::pow, dst, src0, src1);
142 }
143 template <typename DT = void>
rsqt(const InstructionModifier & mod,const RegData & dst,const RegData & src0)144 void rsqt(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
145     math<DT>(mod, MathFunction::rsqt, dst, src0);
146 }
147 template <typename DT = void>
rsqtm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0)148 void rsqtm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0) {
149     math<DT>(mod, MathFunction::rsqtm, dst, src0);
150 }
151 template <typename DT = void>
sin(const InstructionModifier & mod,const RegData & dst,const RegData & src0)152 void sin(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
153     math<DT>(mod, MathFunction::sin, dst, src0);
154 }
155 template <typename DT = void>
sqt(const InstructionModifier & mod,const RegData & dst,const RegData & src0)156 void sqt(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
157     math<DT>(mod, MathFunction::sqt, dst, src0);
158 }
159 
160 #define TMP(n) tmp[n].retype(dst.getType())
161 
162 // IEEE 754-compliant divide math macro sequence.
163 //   Requires GRFs initialized with 0.0 and 1.0, as well as temporary GRFs (4 for single precision, 5 for double precision).
164 //   dst, num, denom must be distinct GRFs.
165 template <typename DT = void, typename A>
fdiv_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData num,RegData denom,RegData zero,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())166 void fdiv_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData num, RegData denom,
167                RegData zero, RegData one, const A &tmp, InstructionModifier cfmod = InstructionModifier())
168 {
169     DataType dt = getDataType<DT>();
170     if (dt == DataType::invalid)
171         dt = dst.getType();
172     if (cfmod.getExecSize() == 0)
173         cfmod = mod;
174 
175     Label labelSkip;
176 
177     switch (dt) {
178         case DataType::hf:
179             fdiv<DT>(mod, dst, num, denom);
180             break;
181         case DataType::f:
182             invm<DT>(mod | eo | flag,         dst | mme0,      num | nomme,   denom | nomme);
183             if_(cfmod | ~flag, labelSkip);
184 
185             madm<DT>(mod, TMP(0) | mme1,     zero | nomme,     num | nomme,     dst | mme0);
186             madm<DT>(mod, TMP(1) | mme2,      one | nomme,  -denom | nomme,     dst | mme0);
187             madm<DT>(mod, TMP(2) | mme3,      dst | mme0,   TMP(1) | mme2,      dst | mme0);
188             madm<DT>(mod, TMP(3) | mme4,      num | nomme,  -denom | nomme,  TMP(0) | mme1);
189             madm<DT>(mod, TMP(0) | mme5,   TMP(0) | mme1,   TMP(3) | mme4,   TMP(2) | mme3);
190             madm<DT>(mod, TMP(1) | mme6,      num | nomme,  -denom | nomme,  TMP(0) | mme5);
191             madm<DT>(mod,    dst | nomme,  TMP(0) | mme5,   TMP(1) | mme6,   TMP(2) | mme3);
192 
193             mark(labelSkip);
194             endif(cfmod);
195             break;
196         case DataType::df:
197             invm<DT>(mod | eo | flag,         dst | mme0,      num | nomme,   denom | nomme);
198             if_(cfmod | ~flag, labelSkip);
199 
200             madm<DT>(mod, TMP(0) | mme1,     zero | nomme,     num | nomme,     dst | mme0);
201             madm<DT>(mod, TMP(1) | mme2,      one | nomme,  -denom | nomme,     dst | mme0);
202             madm<DT>(mod, TMP(2) | mme3,      num | nomme,  -denom | nomme,  TMP(0) | mme1);
203             madm<DT>(mod, TMP(3) | mme4,      dst | mme0,   TMP(1) | mme2,      dst | mme0);
204             madm<DT>(mod, TMP(4) | mme5,      one | nomme,  -denom | nomme,  TMP(3) | mme4);
205             madm<DT>(mod,    dst | mme6,      dst | mme0,   TMP(1) | mme2,   TMP(3) | mme4);
206             madm<DT>(mod, TMP(0) | mme7,   TMP(0) | mme1,   TMP(2) | mme3,   TMP(3) | mme4);
207             madm<DT>(mod, TMP(3) | mme0,   TMP(3) | mme4,      dst | mme6,   TMP(4) | mme5);
208             madm<DT>(mod, TMP(2) | mme1,      num | nomme,  -denom | nomme,  TMP(0) | mme7);
209             madm<DT>(mod,    dst | nomme,  TMP(0) | mme7,   TMP(2) | mme1,   TMP(3) | mme0);
210 
211             mark(labelSkip);
212             endif(cfmod);
213             break;
214         default:
215 #ifdef NGEN_SAFE
216             throw invalid_type_exception();
217 #endif
218             break;
219     }
220 }
221 
222 // IEEE 754-compliant reciprocal math macro sequence.
223 //   Requires GRF initialized with 1.0, as well as 3 temporary GRFs.
224 //   dst and src must be distinct GRFs.
225 template <typename DT = void, typename A>
inv_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData src,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())226 void inv_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData src, RegData one,
227               const A &tmp, InstructionModifier cfmod = InstructionModifier())
228 {
229     DataType dt = getDataType<DT>();
230     if (dt == DataType::invalid)
231         dt = dst.getType();
232     if (cfmod.getExecSize() == 0)
233         cfmod = mod;
234 
235     Label labelSkip;
236 
237     switch (dt) {
238         case DataType::hf:
239             inv<DT>(mod, dst, src);
240             break;
241         case DataType::f:
242             invm<DT>(mod | eo | flag,         dst | mme0,      one | nomme,     src | nomme);
243             if_(cfmod | ~flag, labelSkip);
244 
245             madm<DT>(mod, TMP(1) | mme2,      one | nomme,    -src | nomme,     dst | mme0);
246             madm<DT>(mod, TMP(2) | mme3,      dst | mme0,   TMP(1) | mme2,      dst | mme0);
247             madm<DT>(mod, TMP(0) | mme5,      dst | mme0,   TMP(1) | mme2,   TMP(2) | mme3);
248             madm<DT>(mod, TMP(1) | mme6,      one | nomme,    -src | nomme,  TMP(0) | mme5);
249             madm<DT>(mod,    dst | nomme,  TMP(0) | mme5,   TMP(1) | mme6,   TMP(2) | mme3);
250 
251             mark(labelSkip);
252             endif(cfmod);
253             break;
254         case DataType::df:
255             invm<DT>(mod | eo | flag,        dst | mme0,      one | nomme,     src | nomme);
256             if_(cfmod | ~flag, labelSkip);
257 
258             madm<DT>(mod, TMP(0) | mme2,     one | nomme,    -src | nomme,     dst | mme0);
259             madm<DT>(mod, TMP(1) | mme4,     dst | mme0,   TMP(0) | mme2,      dst | mme0);
260             madm<DT>(mod, TMP(2) | mme5,     one | nomme,    -src | nomme,  TMP(1) | mme4);
261             madm<DT>(mod,    dst | mme6,     dst | mme0,   TMP(0) | mme2,   TMP(1) | mme4);
262             madm<DT>(mod, TMP(1) | mme0,  TMP(1) | mme4,      dst | mme6,   TMP(2) | mme5);
263             madm<DT>(mod, TMP(0) | mme1,     one | nomme,    -src | nomme,     dst | mme6);
264             madm<DT>(mod,    dst | nomme,    dst | mme6,   TMP(0) | mme1,   TMP(1) | mme0);
265 
266             mark(labelSkip);
267             endif(cfmod);
268             break;
269         default:
270 #ifdef NGEN_SAFE
271             throw invalid_type_exception();
272 #endif
273             break;
274     }
275 }
276 
277 // IEEE 754-compliant square root macro sequence.
278 //   Requires GRFs initialized with 0.0 and 0.5 (also 1.0 for double precision),
279 //     and temporary GRFs (3 for single precision, 4 for double precision).
280 //   dst and src must be distinct GRFs.
281 template <typename DT = void, typename A>
sqt_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData src,RegData zero,RegData oneHalf,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())282 void sqt_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData src,
283                RegData zero, RegData oneHalf, RegData one, const A &tmp, InstructionModifier cfmod = InstructionModifier())
284 {
285     DataType dt = getDataType<DT>();
286     if (dt == DataType::invalid)
287         dt = dst.getType();
288     if (cfmod.getExecSize() == 0)
289         cfmod = mod;
290 
291     Label labelSkip;
292 
293     switch (dt) {
294         case DataType::hf:
295             sqt<DT>(mod, dst, src);
296             break;
297         case DataType::f:
298             rsqtm<DT>(mod | eo | flag,        dst | mme0,       src | nomme);
299             if_(cfmod | ~flag, labelSkip);
300 
301             madm<DT>(mod, TMP(0) | mme1,     zero | nomme,  oneHalf | nomme,     dst | mme0);
302             madm<DT>(mod, TMP(1) | mme2,     zero | nomme,      src | nomme,     dst | mme0);
303             madm<DT>(mod, TMP(2) | mme3,  oneHalf | nomme,  -TMP(1) | mme2,   TMP(0) | mme1);
304             madm<DT>(mod, TMP(0) | mme4,   TMP(0) | mme1,    TMP(2) | mme3,   TMP(0) | mme1);
305             madm<DT>(mod,    dst | mme5,   TMP(1) | mme2,    TMP(2) | mme3,   TMP(1) | mme2);
306             madm<DT>(mod, TMP(2) | mme6,      src | nomme,     -dst | mme5,      dst | mme5);
307             madm<DT>(mod,    dst | nomme,     dst | mme5,    TMP(0) | mme4,   TMP(2) | mme6);
308 
309             mark(labelSkip);
310             endif(cfmod);
311             break;
312         case DataType::df:
313             rsqtm<DT>(mod | eo | flag,        dst | mme0,       src | nomme);
314             if_(cfmod | ~flag, labelSkip);
315 
316             madm<DT>(mod, TMP(0) | mme1,     zero | mme0,   oneHalf | nomme,     dst | mme0);
317             madm<DT>(mod, TMP(1) | mme2,     zero | mme0,       src | nomme,     dst | mme0);
318             madm<DT>(mod, TMP(2) | mme3,  oneHalf | nomme,  -TMP(1) | mme2,   TMP(0) | mme1);
319             madm<DT>(mod, TMP(3) | mme4,      one | nomme,  oneHalf | nomme,     dst | nomme);
320             madm<DT>(mod, TMP(3) | mme5,      one | nomme,   TMP(3) | mme4,   TMP(2) | mme3);
321             madm<DT>(mod,    dst | mme6,     zero | mme0,    TMP(2) | mme3,   TMP(1) | mme2);
322             madm<DT>(mod, TMP(2) | mme7,     zero | mme0,    TMP(2) | mme3,   TMP(0) | mme1);
323             madm<DT>(mod,    dst | mme6,   TMP(1) | mme2,    TMP(3) | mme5,      dst | mme6);
324             madm<DT>(mod, TMP(3) | mme5,   TMP(0) | mme1,    TMP(3) | mme5,   TMP(2) | mme7);
325             madm<DT>(mod, TMP(0) | mme1,      src | nomme,     -dst | mme6,      dst | mme6);
326             madm<DT>(mod,    dst | nomme,     dst | mme6,    TMP(0) | mme1,   TMP(3) | mme5);
327 
328             mark(labelSkip);
329             endif(cfmod);
330             break;
331         default:
332 #ifdef NGEN_SAFE
333             throw invalid_type_exception();
334 #endif
335             break;
336     }
337 }
338 
339 #undef TMP
340 
341 // Thread spawner messages.
threadend(const InstructionModifier & mod,const RegData & r0_info)342 void threadend(const InstructionModifier &mod, const RegData &r0_info) {
343     auto sf = SharedFunction::ts;
344     uint32_t exdesc = 0x20 | (static_cast<int>(sf) & 0xF);
345     send(8 | EOT | mod | NoMask, null, r0_info, exdesc, 0x2000010);
346 }
347 
threadend(const RegData & r0_info)348 void threadend(const RegData &r0_info) { threadend(InstructionModifier(), r0_info); }
349 
350 // Gateway messages.
barriermsg(const InstructionModifier & mod,const GRF & header)351 void barriermsg(const InstructionModifier &mod, const GRF &header)
352 {
353     uint32_t exdesc = static_cast<int>(SharedFunction::gtwy) & 0xF;
354     send(1 | mod | NoMask, null, header, exdesc, 0x2000004);
355 }
356 
barriermsg(const GRF & header)357 void barriermsg(const GRF &header) { barriermsg(InstructionModifier(), header); }
358 
barriersignal(const InstructionModifier & mod,const GRF & temp,const GRF & r0_info=r0)359 void barriersignal(const InstructionModifier &mod, const GRF &temp, const GRF &r0_info = r0)
360 {
361         and_(8 | NoMask, temp.ud(), r0_info.ud(2), uint32_t((hardware >= HW::Gen11) ? 0x7F000000 : 0x8F000000));
362     barriermsg(mod, temp);
363 }
364 
barriersignal(const InstructionModifier & mod,const GRF & temp,uint32_t threadCount,const GRF & r0_info=r0)365 void barriersignal(const InstructionModifier &mod, const GRF &temp, uint32_t threadCount, const GRF &r0_info = r0)
366 {
367     {
368         and_(8 | NoMask, temp.ud(), r0_info.ud(2), uint32_t((hardware >= HW::Gen11) ? 0x7F000000 : 0x8F000000));
369         mov(1 | NoMask, temp.ub(9), 0x80 | (threadCount & 0x7F));
370     }
371     barriermsg(mod, temp);
372 }
373 
barriersignal(const GRF & temp,const GRF & r0_info=r0)374 void barriersignal(const GRF &temp, const GRF &r0_info = r0) { barriersignal(InstructionModifier(), temp, r0_info); }
barriersignal(const GRF & temp,uint32_t threadCount,const GRF & r0_info=r0)375 void barriersignal(const GRF &temp, uint32_t threadCount, const GRF &r0_info = r0) { barriersignal(InstructionModifier(), temp, threadCount, r0_info); }
376 
barrierwait()377 void barrierwait()
378 {
379     if (isXe)
380         sync.bar(NoMask);
381     else
382         wait(NoMask, n0[0]);
383 }
384 
385 template <typename... Targs>
barrier(const Targs &...barrierArgs)386 void barrier(const Targs &...barrierArgs)
387 {
388     barriersignal(barrierArgs...);
389     barrierwait();
390 }
391 
392 // Global memory fence.
memfence(const InstructionModifier & mod,const RegData & dst,const RegData & header=GRF (0))393 void memfence(const InstructionModifier &mod, const RegData &dst, const RegData &header = GRF(0))
394 {
395     const uint32_t exdesc = static_cast<int>(SharedFunction::dc0) & 0xF;
396     send(8 | mod | NoMask, dst, header, exdesc, 0x219E000);
397 }
398 
memfence(const RegData & dst,const RegData & header=GRF (0))399 void memfence(const RegData &dst, const RegData &header = GRF(0)) { memfence(InstructionModifier(), dst, header); }
400 
401 // SLM-only memory fence.
slmfence(const InstructionModifier & mod,const RegData & dst,const RegData & header=GRF (0))402 void slmfence(const InstructionModifier &mod, const RegData &dst, const RegData &header = GRF(0))
403 {
404     const uint32_t exdesc = static_cast<int>(SharedFunction::dc0) & 0xF;
405     send(8 | mod | NoMask, dst, header, exdesc, 0x219E0FE);
406 }
407 
slmfence(const RegData & dst,const RegData & header=GRF (0))408 void slmfence(const RegData &dst, const RegData &header = GRF(0)) { slmfence(InstructionModifier(), dst, header); }
409 
410