1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 /*
18 * Do not #include this file directly; ngen uses it internally.
19 */
20
21
22 // Pseudo-instructions and macros.
23 template <typename DT = void>
min_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)24 void min_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
25 sel(mod | lt | f0[0], dst, src0, src1);
26 }
27 template <typename DT = void>
min_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)28 void min_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
29 sel(mod | lt | f0[0], dst, src0, src1);
30 }
31 #ifndef NGEN_WINDOWS_COMPAT
32 template <typename DT = void>
min(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)33 void min(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
34 sel(mod | lt | f0[0], dst, src0, src1);
35 }
36 template <typename DT = void>
min(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)37 void min(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
38 sel(mod | lt | f0[0], dst, src0, src1);
39 }
40 #endif
41 template <typename DT = void>
max_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)42 void max_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
43 sel(mod | ge | f0[0], dst, src0, src1);
44 }
45 template <typename DT = void>
max_(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)46 void max_(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
47 sel(mod | ge | f0[0], dst, src0, src1);
48 }
49 #ifndef NGEN_WINDOWS_COMPAT
50 template <typename DT = void>
max(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)51 void max(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
52 sel(mod | ge | f0[0], dst, src0, src1);
53 }
54 template <typename DT = void>
max(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)55 void max(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
56 sel(mod | ge | f0[0], dst, src0, src1);
57 }
58 #endif
59
60 template <typename DT = void>
bfi(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1,const RegData & src2,const RegData & src3)61 void bfi(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1, const RegData &src2, const RegData &src3) {
62 bfi1(mod, dst, src0, src1);
63 bfi2(mod, dst, dst, src2, src3);
64 }
65
66 // Brief compare instructions.
67 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & src0,const RegData & src1)68 void cmp(const InstructionModifier &mod, const RegData &src0, const RegData &src1) {
69 auto dt = getDataType<DT>();
70 if (dt == DataType::invalid)
71 dt = src0.getType();
72 cmp<DT>(mod, null.retype(dt), src0, src1);
73 }
74 template <typename DT = void>
cmp(const InstructionModifier & mod,const RegData & src0,const Immediate & src1)75 void cmp(const InstructionModifier &mod, const RegData &src0, const Immediate &src1) {
76 auto dt = getDataType<DT>();
77 if (dt == DataType::invalid)
78 dt = src0.getType();
79 cmp<DT>(mod, null.retype(dt), src0, src1);
80 }
81
82 // Brief math instructions.
83 template <typename DT = void>
cos(const InstructionModifier & mod,const RegData & dst,const RegData & src0)84 void cos(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
85 math<DT>(mod, MathFunction::cos, dst, src0);
86 }
87 template <typename DT = void>
exp(const InstructionModifier & mod,const RegData & dst,const RegData & src0)88 void exp(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
89 math<DT>(mod, MathFunction::exp, dst, src0);
90 }
91 template <typename DT = void>
fdiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)92 void fdiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
93 math<DT>(mod, MathFunction::fdiv, dst, src0, src1);
94 }
95 template <typename DT = void>
fdiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)96 void fdiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
97 math<DT>(mod, MathFunction::fdiv, dst, src0, src1);
98 }
99 template <typename DT = void>
idiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)100 void idiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
101 math<DT>(mod, MathFunction::idiv, dst, src0, src1);
102 }
103 template <typename DT = void>
idiv(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)104 void idiv(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
105 math<DT>(mod, MathFunction::idiv, dst, src0, src1);
106 }
107 template <typename DT = void>
inv(const InstructionModifier & mod,const RegData & dst,const RegData & src0)108 void inv(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
109 math<DT>(mod, MathFunction::inv, dst, src0);
110 }
111 template <typename DT = void>
invm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0,const ExtendedReg & src1)112 void invm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0, const ExtendedReg &src1) {
113 math<DT>(mod, MathFunction::invm, dst, src0, src1);
114 }
115 template <typename DT = void>
iqot(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)116 void iqot(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
117 math<DT>(mod, MathFunction::iqot, dst, src0, src1);
118 }
119 template <typename DT = void>
iqot(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)120 void iqot(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
121 math<DT>(mod, MathFunction::iqot, dst, src0, src1);
122 }
123 template <typename DT = void>
irem(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)124 void irem(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
125 math<DT>(mod, MathFunction::irem, dst, src0, src1);
126 }
127 template <typename DT = void>
irem(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)128 void irem(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
129 math<DT>(mod, MathFunction::irem, dst, src0, src1);
130 }
131 template <typename DT = void>
log(const InstructionModifier & mod,const RegData & dst,const RegData & src0)132 void log(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
133 math<DT>(mod, MathFunction::log, dst, src0);
134 }
135 template <typename DT = void>
pow(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const RegData & src1)136 void pow(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const RegData &src1) {
137 math<DT>(mod, MathFunction::pow, dst, src0, src1);
138 }
139 template <typename DT = void>
pow(const InstructionModifier & mod,const RegData & dst,const RegData & src0,const Immediate & src1)140 void pow(const InstructionModifier &mod, const RegData &dst, const RegData &src0, const Immediate &src1) {
141 math<DT>(mod, MathFunction::pow, dst, src0, src1);
142 }
143 template <typename DT = void>
rsqt(const InstructionModifier & mod,const RegData & dst,const RegData & src0)144 void rsqt(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
145 math<DT>(mod, MathFunction::rsqt, dst, src0);
146 }
147 template <typename DT = void>
rsqtm(const InstructionModifier & mod,const ExtendedReg & dst,const ExtendedReg & src0)148 void rsqtm(const InstructionModifier &mod, const ExtendedReg &dst, const ExtendedReg &src0) {
149 math<DT>(mod, MathFunction::rsqtm, dst, src0);
150 }
151 template <typename DT = void>
sin(const InstructionModifier & mod,const RegData & dst,const RegData & src0)152 void sin(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
153 math<DT>(mod, MathFunction::sin, dst, src0);
154 }
155 template <typename DT = void>
sqt(const InstructionModifier & mod,const RegData & dst,const RegData & src0)156 void sqt(const InstructionModifier &mod, const RegData &dst, const RegData &src0) {
157 math<DT>(mod, MathFunction::sqt, dst, src0);
158 }
159
160 #define TMP(n) tmp[n].retype(dst.getType())
161
162 // IEEE 754-compliant divide math macro sequence.
163 // Requires GRFs initialized with 0.0 and 1.0, as well as temporary GRFs (4 for single precision, 5 for double precision).
164 // dst, num, denom must be distinct GRFs.
165 template <typename DT = void, typename A>
fdiv_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData num,RegData denom,RegData zero,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())166 void fdiv_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData num, RegData denom,
167 RegData zero, RegData one, const A &tmp, InstructionModifier cfmod = InstructionModifier())
168 {
169 DataType dt = getDataType<DT>();
170 if (dt == DataType::invalid)
171 dt = dst.getType();
172 if (cfmod.getExecSize() == 0)
173 cfmod = mod;
174
175 Label labelSkip;
176
177 switch (dt) {
178 case DataType::hf:
179 fdiv<DT>(mod, dst, num, denom);
180 break;
181 case DataType::f:
182 invm<DT>(mod | eo | flag, dst | mme0, num | nomme, denom | nomme);
183 if_(cfmod | ~flag, labelSkip);
184
185 madm<DT>(mod, TMP(0) | mme1, zero | nomme, num | nomme, dst | mme0);
186 madm<DT>(mod, TMP(1) | mme2, one | nomme, -denom | nomme, dst | mme0);
187 madm<DT>(mod, TMP(2) | mme3, dst | mme0, TMP(1) | mme2, dst | mme0);
188 madm<DT>(mod, TMP(3) | mme4, num | nomme, -denom | nomme, TMP(0) | mme1);
189 madm<DT>(mod, TMP(0) | mme5, TMP(0) | mme1, TMP(3) | mme4, TMP(2) | mme3);
190 madm<DT>(mod, TMP(1) | mme6, num | nomme, -denom | nomme, TMP(0) | mme5);
191 madm<DT>(mod, dst | nomme, TMP(0) | mme5, TMP(1) | mme6, TMP(2) | mme3);
192
193 mark(labelSkip);
194 endif(cfmod);
195 break;
196 case DataType::df:
197 invm<DT>(mod | eo | flag, dst | mme0, num | nomme, denom | nomme);
198 if_(cfmod | ~flag, labelSkip);
199
200 madm<DT>(mod, TMP(0) | mme1, zero | nomme, num | nomme, dst | mme0);
201 madm<DT>(mod, TMP(1) | mme2, one | nomme, -denom | nomme, dst | mme0);
202 madm<DT>(mod, TMP(2) | mme3, num | nomme, -denom | nomme, TMP(0) | mme1);
203 madm<DT>(mod, TMP(3) | mme4, dst | mme0, TMP(1) | mme2, dst | mme0);
204 madm<DT>(mod, TMP(4) | mme5, one | nomme, -denom | nomme, TMP(3) | mme4);
205 madm<DT>(mod, dst | mme6, dst | mme0, TMP(1) | mme2, TMP(3) | mme4);
206 madm<DT>(mod, TMP(0) | mme7, TMP(0) | mme1, TMP(2) | mme3, TMP(3) | mme4);
207 madm<DT>(mod, TMP(3) | mme0, TMP(3) | mme4, dst | mme6, TMP(4) | mme5);
208 madm<DT>(mod, TMP(2) | mme1, num | nomme, -denom | nomme, TMP(0) | mme7);
209 madm<DT>(mod, dst | nomme, TMP(0) | mme7, TMP(2) | mme1, TMP(3) | mme0);
210
211 mark(labelSkip);
212 endif(cfmod);
213 break;
214 default:
215 #ifdef NGEN_SAFE
216 throw invalid_type_exception();
217 #endif
218 break;
219 }
220 }
221
222 // IEEE 754-compliant reciprocal math macro sequence.
223 // Requires GRF initialized with 1.0, as well as 3 temporary GRFs.
224 // dst and src must be distinct GRFs.
225 template <typename DT = void, typename A>
inv_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData src,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())226 void inv_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData src, RegData one,
227 const A &tmp, InstructionModifier cfmod = InstructionModifier())
228 {
229 DataType dt = getDataType<DT>();
230 if (dt == DataType::invalid)
231 dt = dst.getType();
232 if (cfmod.getExecSize() == 0)
233 cfmod = mod;
234
235 Label labelSkip;
236
237 switch (dt) {
238 case DataType::hf:
239 inv<DT>(mod, dst, src);
240 break;
241 case DataType::f:
242 invm<DT>(mod | eo | flag, dst | mme0, one | nomme, src | nomme);
243 if_(cfmod | ~flag, labelSkip);
244
245 madm<DT>(mod, TMP(1) | mme2, one | nomme, -src | nomme, dst | mme0);
246 madm<DT>(mod, TMP(2) | mme3, dst | mme0, TMP(1) | mme2, dst | mme0);
247 madm<DT>(mod, TMP(0) | mme5, dst | mme0, TMP(1) | mme2, TMP(2) | mme3);
248 madm<DT>(mod, TMP(1) | mme6, one | nomme, -src | nomme, TMP(0) | mme5);
249 madm<DT>(mod, dst | nomme, TMP(0) | mme5, TMP(1) | mme6, TMP(2) | mme3);
250
251 mark(labelSkip);
252 endif(cfmod);
253 break;
254 case DataType::df:
255 invm<DT>(mod | eo | flag, dst | mme0, one | nomme, src | nomme);
256 if_(cfmod | ~flag, labelSkip);
257
258 madm<DT>(mod, TMP(0) | mme2, one | nomme, -src | nomme, dst | mme0);
259 madm<DT>(mod, TMP(1) | mme4, dst | mme0, TMP(0) | mme2, dst | mme0);
260 madm<DT>(mod, TMP(2) | mme5, one | nomme, -src | nomme, TMP(1) | mme4);
261 madm<DT>(mod, dst | mme6, dst | mme0, TMP(0) | mme2, TMP(1) | mme4);
262 madm<DT>(mod, TMP(1) | mme0, TMP(1) | mme4, dst | mme6, TMP(2) | mme5);
263 madm<DT>(mod, TMP(0) | mme1, one | nomme, -src | nomme, dst | mme6);
264 madm<DT>(mod, dst | nomme, dst | mme6, TMP(0) | mme1, TMP(1) | mme0);
265
266 mark(labelSkip);
267 endif(cfmod);
268 break;
269 default:
270 #ifdef NGEN_SAFE
271 throw invalid_type_exception();
272 #endif
273 break;
274 }
275 }
276
277 // IEEE 754-compliant square root macro sequence.
278 // Requires GRFs initialized with 0.0 and 0.5 (also 1.0 for double precision),
279 // and temporary GRFs (3 for single precision, 4 for double precision).
280 // dst and src must be distinct GRFs.
281 template <typename DT = void, typename A>
sqt_ieee(const InstructionModifier & mod,FlagRegister flag,RegData dst,RegData src,RegData zero,RegData oneHalf,RegData one,const A & tmp,InstructionModifier cfmod=InstructionModifier ())282 void sqt_ieee(const InstructionModifier &mod, FlagRegister flag, RegData dst, RegData src,
283 RegData zero, RegData oneHalf, RegData one, const A &tmp, InstructionModifier cfmod = InstructionModifier())
284 {
285 DataType dt = getDataType<DT>();
286 if (dt == DataType::invalid)
287 dt = dst.getType();
288 if (cfmod.getExecSize() == 0)
289 cfmod = mod;
290
291 Label labelSkip;
292
293 switch (dt) {
294 case DataType::hf:
295 sqt<DT>(mod, dst, src);
296 break;
297 case DataType::f:
298 rsqtm<DT>(mod | eo | flag, dst | mme0, src | nomme);
299 if_(cfmod | ~flag, labelSkip);
300
301 madm<DT>(mod, TMP(0) | mme1, zero | nomme, oneHalf | nomme, dst | mme0);
302 madm<DT>(mod, TMP(1) | mme2, zero | nomme, src | nomme, dst | mme0);
303 madm<DT>(mod, TMP(2) | mme3, oneHalf | nomme, -TMP(1) | mme2, TMP(0) | mme1);
304 madm<DT>(mod, TMP(0) | mme4, TMP(0) | mme1, TMP(2) | mme3, TMP(0) | mme1);
305 madm<DT>(mod, dst | mme5, TMP(1) | mme2, TMP(2) | mme3, TMP(1) | mme2);
306 madm<DT>(mod, TMP(2) | mme6, src | nomme, -dst | mme5, dst | mme5);
307 madm<DT>(mod, dst | nomme, dst | mme5, TMP(0) | mme4, TMP(2) | mme6);
308
309 mark(labelSkip);
310 endif(cfmod);
311 break;
312 case DataType::df:
313 rsqtm<DT>(mod | eo | flag, dst | mme0, src | nomme);
314 if_(cfmod | ~flag, labelSkip);
315
316 madm<DT>(mod, TMP(0) | mme1, zero | mme0, oneHalf | nomme, dst | mme0);
317 madm<DT>(mod, TMP(1) | mme2, zero | mme0, src | nomme, dst | mme0);
318 madm<DT>(mod, TMP(2) | mme3, oneHalf | nomme, -TMP(1) | mme2, TMP(0) | mme1);
319 madm<DT>(mod, TMP(3) | mme4, one | nomme, oneHalf | nomme, dst | nomme);
320 madm<DT>(mod, TMP(3) | mme5, one | nomme, TMP(3) | mme4, TMP(2) | mme3);
321 madm<DT>(mod, dst | mme6, zero | mme0, TMP(2) | mme3, TMP(1) | mme2);
322 madm<DT>(mod, TMP(2) | mme7, zero | mme0, TMP(2) | mme3, TMP(0) | mme1);
323 madm<DT>(mod, dst | mme6, TMP(1) | mme2, TMP(3) | mme5, dst | mme6);
324 madm<DT>(mod, TMP(3) | mme5, TMP(0) | mme1, TMP(3) | mme5, TMP(2) | mme7);
325 madm<DT>(mod, TMP(0) | mme1, src | nomme, -dst | mme6, dst | mme6);
326 madm<DT>(mod, dst | nomme, dst | mme6, TMP(0) | mme1, TMP(3) | mme5);
327
328 mark(labelSkip);
329 endif(cfmod);
330 break;
331 default:
332 #ifdef NGEN_SAFE
333 throw invalid_type_exception();
334 #endif
335 break;
336 }
337 }
338
339 #undef TMP
340
341 // Thread spawner messages.
threadend(const InstructionModifier & mod,const RegData & r0_info)342 void threadend(const InstructionModifier &mod, const RegData &r0_info) {
343 auto sf = SharedFunction::ts;
344 uint32_t exdesc = 0x20 | (static_cast<int>(sf) & 0xF);
345 send(8 | EOT | mod | NoMask, null, r0_info, exdesc, 0x2000010);
346 }
347
threadend(const RegData & r0_info)348 void threadend(const RegData &r0_info) { threadend(InstructionModifier(), r0_info); }
349
350 // Gateway messages.
barriermsg(const InstructionModifier & mod,const GRF & header)351 void barriermsg(const InstructionModifier &mod, const GRF &header)
352 {
353 uint32_t exdesc = static_cast<int>(SharedFunction::gtwy) & 0xF;
354 send(1 | mod | NoMask, null, header, exdesc, 0x2000004);
355 }
356
barriermsg(const GRF & header)357 void barriermsg(const GRF &header) { barriermsg(InstructionModifier(), header); }
358
barriersignal(const InstructionModifier & mod,const GRF & temp,const GRF & r0_info=r0)359 void barriersignal(const InstructionModifier &mod, const GRF &temp, const GRF &r0_info = r0)
360 {
361 and_(8 | NoMask, temp.ud(), r0_info.ud(2), uint32_t((hardware >= HW::Gen11) ? 0x7F000000 : 0x8F000000));
362 barriermsg(mod, temp);
363 }
364
barriersignal(const InstructionModifier & mod,const GRF & temp,uint32_t threadCount,const GRF & r0_info=r0)365 void barriersignal(const InstructionModifier &mod, const GRF &temp, uint32_t threadCount, const GRF &r0_info = r0)
366 {
367 {
368 and_(8 | NoMask, temp.ud(), r0_info.ud(2), uint32_t((hardware >= HW::Gen11) ? 0x7F000000 : 0x8F000000));
369 mov(1 | NoMask, temp.ub(9), 0x80 | (threadCount & 0x7F));
370 }
371 barriermsg(mod, temp);
372 }
373
barriersignal(const GRF & temp,const GRF & r0_info=r0)374 void barriersignal(const GRF &temp, const GRF &r0_info = r0) { barriersignal(InstructionModifier(), temp, r0_info); }
barriersignal(const GRF & temp,uint32_t threadCount,const GRF & r0_info=r0)375 void barriersignal(const GRF &temp, uint32_t threadCount, const GRF &r0_info = r0) { barriersignal(InstructionModifier(), temp, threadCount, r0_info); }
376
barrierwait()377 void barrierwait()
378 {
379 if (isXe)
380 sync.bar(NoMask);
381 else
382 wait(NoMask, n0[0]);
383 }
384
385 template <typename... Targs>
barrier(const Targs &...barrierArgs)386 void barrier(const Targs &...barrierArgs)
387 {
388 barriersignal(barrierArgs...);
389 barrierwait();
390 }
391
392 // Global memory fence.
memfence(const InstructionModifier & mod,const RegData & dst,const RegData & header=GRF (0))393 void memfence(const InstructionModifier &mod, const RegData &dst, const RegData &header = GRF(0))
394 {
395 const uint32_t exdesc = static_cast<int>(SharedFunction::dc0) & 0xF;
396 send(8 | mod | NoMask, dst, header, exdesc, 0x219E000);
397 }
398
memfence(const RegData & dst,const RegData & header=GRF (0))399 void memfence(const RegData &dst, const RegData &header = GRF(0)) { memfence(InstructionModifier(), dst, header); }
400
401 // SLM-only memory fence.
slmfence(const InstructionModifier & mod,const RegData & dst,const RegData & header=GRF (0))402 void slmfence(const InstructionModifier &mod, const RegData &dst, const RegData &header = GRF(0))
403 {
404 const uint32_t exdesc = static_cast<int>(SharedFunction::dc0) & 0xF;
405 send(8 | mod | NoMask, dst, header, exdesc, 0x219E0FE);
406 }
407
slmfence(const RegData & dst,const RegData & header=GRF (0))408 void slmfence(const RegData &dst, const RegData &header = GRF(0)) { slmfence(InstructionModifier(), dst, header); }
409
410