1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "BuildIR.h"
10 #include "../Timer.h"
11
12 using namespace vISA;
13
14
translateLscFence(SFID sfid,LSC_FENCE_OP fenceOp,LSC_SCOPE scope,int & status)15 G4_INST* IR_Builder::translateLscFence(
16 SFID sfid,
17 LSC_FENCE_OP fenceOp,
18 LSC_SCOPE scope,
19 int &status)
20 {
21 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
22
23 status = VISA_SUCCESS;
24 auto check =
25 [&] (bool z, const char *what) {
26 if (!z) {
27 MUST_BE_TRUE(false, what);
28 status = VISA_FAILURE;
29 }
30 };
31
32 // NOTE: fence requires 1 register sent and 1 returned for some foolish
33 // reason (synchronization requires it), so we must create dummy registers.
34 // I'd prefer to use the same register, but vISA blows up
35 // if we dare use the same dst as src (old? hardware restriction?),
36 // so we'll splurge and use two.
37 const RegionDesc *rd = getRegionStride1();
38
39 // G4_Declare *src0DummyRegDecl = createSendPayloadDcl(getGRFSize()/4, Type_UD);
40 G4_Declare *src0DummyRegDecl = getBuiltinR0();
41 G4_SrcRegRegion *src0Dummy = createSrc(
42 src0DummyRegDecl->getRegVar(),
43 0, 0, rd, Type_UD);
44 //
45 // I don't think vISA permits same dst as src0
46 // G4_Declare *dstDummyRegDecl = getBuiltinR0();
47 G4_DstRegRegion* dstDummy = nullptr;
48 if (!hasFenceControl())
49 {
50 G4_Declare* dstDummyRegDecl = createSendPayloadDcl(getGRFSize() / 4, Type_UD);
51 dstDummy = createDstRegRegion(dstDummyRegDecl, 1);
52 }
53 else
54 {
55 dstDummy = createNullDst(Type_UD);
56 }
57
58 G4_SrcRegRegion *src1NullReg = createNullSrc(Type_UD);
59 //
60 const int src1Len = 0; // no data needed in src1
61
62 const G4_ExecSize execSize = g4::SIMD1;
63 const G4_InstOpts instOpt = Get_Gen4_Emask(vISA_EMASK_M1_NM, execSize);
64
65 ///////////////////////////////////////////////////////////////////////////
66 uint32_t desc = 0, exDesc = 0;
67 // fence requires 1 non-null register sent and 1 non-null received,
68 // but the contents are undefined
69 const uint32_t LSC_FENCE_OPCODE = 0x1F;
70 desc |= LSC_FENCE_OPCODE; // LSC_FENCE
71 desc |= 1 << 25;
72 desc |= (hasFenceControl() ? 0 : 1) << 20;
73 //
74 switch (fenceOp) {
75 case LSC_FENCE_OP_NONE: desc |= 0 << 12; break;
76 case LSC_FENCE_OP_EVICT: desc |= 1 << 12; break;
77 case LSC_FENCE_OP_INVALIDATE: desc |= 2 << 12; break;
78 case LSC_FENCE_OP_DISCARD: desc |= 3 << 12; break;
79 case LSC_FENCE_OP_CLEAN: desc |= 4 << 12; break;
80 case LSC_FENCE_OP_FLUSHL3: desc |= 5 << 12; break;
81 case LSC_FENCE_OP_TYPE6: desc |= 6 << 12; break;
82 default: check(false, "invalid fence op");
83 }
84 switch (scope) {
85 case LSC_SCOPE_GROUP: desc |= 0 << 9; break;
86 case LSC_SCOPE_LOCAL: desc |= 1 << 9; break;
87 case LSC_SCOPE_TILE: desc |= 2 << 9; break;
88 case LSC_SCOPE_GPU: desc |= 3 << 9; break;
89 case LSC_SCOPE_GPUS: desc |= 4 << 9; break;
90 case LSC_SCOPE_SYSREL: desc |= 5 << 9; break;
91 case LSC_SCOPE_SYSACQ: desc |= 6 << 9; break;
92 default: check(false, "invalid fence scope");
93 }
94
95 if (sfid == SFID::UGM)
96 {
97 // special token telling EU to route the UGM fence to LSC even in
98 // backup mode. Without bit 18 set, the default behavior is for
99 // the UGM fence to be rerouted to HDC when the backup mode chicken
100 // bit is set.
101 desc |= getOption(vISA_LSCBackupMode) << 18;
102 }
103
104 (void) lscEncodeAddrSize(LSC_ADDR_SIZE_32b, desc, status);
105 G4_SendDescRaw *msgDesc = createSendMsgDesc(
106 sfid,
107 desc,
108 exDesc,
109 src1Len,
110 SendAccess::READ_WRITE,
111 nullptr);
112 G4_InstSend *fenceInst = createLscSendInst(
113 nullptr,
114 dstDummy,
115 src0Dummy,
116 src1NullReg,
117 execSize,
118 msgDesc,
119 instOpt,
120 LSC_ADDR_TYPE_FLAT,
121 true);
122 (void)fenceInst;
123
124 return fenceInst;
125 }
126
generateNamedBarrier(int numProducer,int numConsumer,NamedBarrierType type,G4_Operand * barrierId)127 void IR_Builder::generateNamedBarrier(
128 int numProducer, int numConsumer,
129 NamedBarrierType type, G4_Operand* barrierId)
130 {
131 struct NamedBarrierPayload
132 {
133 uint32_t id : 8;
134 uint32_t fence : 4;
135 uint32_t padding : 2;
136 uint32_t type : 2;
137 uint32_t consumer : 8;
138 uint32_t producer: 8;
139 };
140
141 union
142 {
143 NamedBarrierPayload payload;
144 uint32_t data;
145 } payload;
146
147 payload.data = 0;
148 payload.payload.consumer = numConsumer;
149 payload.payload.producer = numProducer;
150
151 auto getVal = [](NamedBarrierType type)
152 {
153 switch (type)
154 {
155 case NamedBarrierType::BOTH:
156 return 0;
157 case NamedBarrierType::PRODUCER:
158 return 1;
159 case NamedBarrierType::CONSUMER:
160 return 2;
161 default:
162 assert(false && "unrecognized NM barreir type");
163 return -1;
164 }
165 };
166 payload.payload.type = getVal(type);
167
168 G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
169 if (barrierId->isImm())
170 {
171 payload.payload.id = (uint8_t)barrierId->asImm()->getInt();
172 auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
173 auto src = createImm(payload.data, Type_UD);
174 createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
175 }
176 else
177 {
178 // barrier id should be a srcRegion with int type
179 // and (1) Hdr.2:ud barrierId 0xFF
180 // or (1) Hdr.2:ud Hdr.2 payload.data
181 assert(barrierId->isSrcRegRegion() && IS_INT(barrierId->getType()) && "expect barrier id to be int");
182 auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
183 auto src1 = createImm(0xFF, Type_UD);
184 createBinOp(G4_and, g4::SIMD1, dst, barrierId, src1, InstOpt_WriteEnable, true);
185 dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
186 auto orSrc0 = createSrc(header->getRegVar(), 0, 2,
187 getRegionScalar(), Type_UD);
188 auto orSrc1 = createImm(payload.data, Type_UD);
189 createBinOp(G4_or, g4::SIMD1, dst, orSrc0, orSrc1, InstOpt_WriteEnable, true);
190 }
191
192 // 1 message length, 0 response length, no header, no ack
193 int desc = (0x1 << 25) + 0x4;
194
195 auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
196 createSendInst(
197 nullptr,
198 G4_send,
199 g4::SIMD1,
200 createNullDst(Type_UD),
201 createSrcRegRegion(header, getRegionStride1()),
202 createImm(desc, Type_UD),
203 InstOpt_WriteEnable,
204 msgDesc,
205 true);
206 }
207
generateNamedBarrier(G4_Operand * barrierId,G4_SrcRegRegion * threadCount)208 void IR_Builder::generateNamedBarrier(G4_Operand* barrierId, G4_SrcRegRegion* threadCount)
209 {
210 G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
211
212 // mov (1) Hdr.2<1>:ud 0x0
213 // mov (2) Hdr.10<1>:ub threadcount:ub
214 // mov (1) Hdr.8<1>:ub barrierId:ub
215 auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
216 auto src = createImm(0, Type_UD);
217 createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
218 dst = createDst(header->getRegVar(), 0, 10, 1, Type_UB);
219 createMov(g4::SIMD2, dst, threadCount, InstOpt_WriteEnable, true);
220 dst = createDst(header->getRegVar(), 0, 8, 1, Type_UB);
221 createMov(g4::SIMD1, dst, barrierId, InstOpt_WriteEnable, true);
222
223 // 1 message length, 0 response length, no header, no ack
224 int desc = (0x1 << 25) + 0x4;
225
226 auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
227 createSendInst(
228 nullptr,
229 G4_send,
230 g4::SIMD1,
231 createNullDst(Type_UD),
232 createSrcRegRegion(header, getRegionStride1()),
233 createImm(desc, Type_UD),
234 InstOpt_WriteEnable,
235 msgDesc,
236 true);
237 }
238
generateSingleBarrier()239 void IR_Builder::generateSingleBarrier()
240 {
241 // single barrier: # producer = # consumer = # threads, barrier id = 0
242 // For now produce no fence
243 // Number of threads per threadgroup is r0.2[31:24]
244 // mov (1) Hdr.2<1>:ud 0x0
245 // mov (2) Hdr.10<1>:ub R0.11<0;1,0>:ub
246 // This SIMD2 byte move is broadcasting the thread group size
247 // from the r0 header into both the producer and consumer slots.
248 // Hdr.2:d[31:24,23:16]
249 G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
250 auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
251 auto src = createImm(0, Type_UD);
252 createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
253 dst = createDst(header->getRegVar(), 0 , 10, 1, Type_UB);
254 auto src0 = createSrc(getBuiltinR0()->getRegVar(), 0, 11,
255 getRegionScalar(), Type_UB);
256 createMov(g4::SIMD2, dst, src0, InstOpt_WriteEnable, true);
257 // 1 message length, 0 response length, no header, no ack
258 int desc = (0x1 << 25) + 0x4;
259
260 auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
261 createSendInst(
262 nullptr,
263 G4_send,
264 g4::SIMD1,
265 createNullDst(Type_UD),
266 createSrcRegRegion(header, getRegionStride1()),
267 createImm(desc, Type_UD),
268 InstOpt_WriteEnable,
269 msgDesc,
270 true);
271 }
272
checkNamedBarrierSrc(G4_Operand * src,bool isBarrierId)273 static void checkNamedBarrierSrc(G4_Operand* src, bool isBarrierId)
274 {
275 if (src->isImm())
276 {
277 if (isBarrierId)
278 {
279 uint32_t val = (uint32_t)src->asImm()->getInt();
280 assert(val < 32 && "illegal named barrier id");
281 }
282 }
283 else if (src->isSrcRegRegion())
284 {
285 assert(src->asSrcRegRegion()->isScalar() && "barrier id should have scalar region");
286 assert(IS_BTYPE(src->getType()) && "illegal barrier opperand type");
287 }
288 else
289 {
290 assert(false && "illegal barrier id operand");
291 }
292 }
293
translateVISANamedBarrierWait(G4_Operand * barrierId)294 int IR_Builder::translateVISANamedBarrierWait(G4_Operand* barrierId)
295 {
296 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
297
298 checkNamedBarrierSrc(barrierId, true);
299
300 G4_Operand* barSrc = barrierId;
301 if (barrierId->isSrcRegRegion()) {
302 // sync can take only flag src
303 G4_Declare* flagDecl = createTempFlag(1);
304 createMov(g4::SIMD1, createDstRegRegion(flagDecl, 1), barrierId,
305 InstOpt_WriteEnable, true);
306 barSrc = createSrcRegRegion(flagDecl, getRegionScalar());
307 }
308 // wait barrierId
309 createInst(nullptr, G4_wait, nullptr, g4::NOSAT, g4::SIMD1, nullptr, barSrc, nullptr,
310 InstOpt_WriteEnable, true);
311
312 return VISA_SUCCESS;
313 }
314
translateVISANamedBarrierSignal(G4_Operand * barrierId,G4_Operand * threadCount)315 int IR_Builder::translateVISANamedBarrierSignal(G4_Operand* barrierId, G4_Operand* threadCount)
316 {
317 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
318
319 checkNamedBarrierSrc(barrierId, true);
320 checkNamedBarrierSrc(threadCount, false);
321
322 if (threadCount->isImm())
323 {
324 int numThreads = (int)threadCount->asImm()->getInt();
325 generateNamedBarrier(numThreads, numThreads, NamedBarrierType::BOTH, barrierId);
326 }
327 else
328 {
329 generateNamedBarrier(barrierId, threadCount->asSrcRegRegion());
330 }
331
332 return VISA_SUCCESS;
333 }
334
335
336 // create a fence instruction to the data cache
337 // flushParam --
338 // bit 0 -- commit enable
339 // bit 1-4 -- L3 flush parameters
340 // bit 5 -- global/SLM
341 // bit 6 -- L1 flush
342 // bit 7 -- SW fence only; a scheduling barrier but does not generate any code
343 // bit 7, if set, takes precedence over other bits
createFenceInstruction(uint8_t flushParam,bool commitEnable,bool globalMemFence,bool isSendc=false)344 G4_INST* IR_Builder::createFenceInstruction(
345 uint8_t flushParam, bool commitEnable, bool globalMemFence,
346 bool isSendc = false)
347 {
348 #define L1_FLUSH_MASK 0x40
349
350 int flushBits = (flushParam >> 1) & 0xF;
351 assert(!supportsLSC() && "LSC fence should be handled elsewhere");
352 if (noL3Flush())
353 {
354 // L3 flush is no longer required for image memory
355 flushBits = 0;
356 }
357
358 bool L1Flush = (flushParam & L1_FLUSH_MASK) != 0 &&
359 !(hasSLMFence() && !globalMemFence);
360
361 int desc = 0x7 << 14 | ((commitEnable ? 1 : 0) << 13);
362
363 desc |= flushBits << 9;
364
365 if (L1Flush)
366 {
367 #define L1_FLUSH_BIT_LOC 8
368 desc |= 1 << L1_FLUSH_BIT_LOC;
369 }
370
371 G4_Declare *srcDcl = getBuiltinR0();
372 G4_Declare *dstDcl = createTempVar(8, Type_UD, Any);
373 G4_DstRegRegion *sendDstOpnd = commitEnable ? createDstRegRegion(dstDcl, 1) : createNullDst(Type_UD);
374 G4_SrcRegRegion *sendSrcOpnd = createSrcRegRegion(srcDcl, getRegionStride1());
375 uint8_t BTI = 0x0;
376
377 if (hasSLMFence())
378 {
379 // we must choose either GLOBAL_MEM_FENCE or SLM_FENCE
380 BTI = globalMemFence ? 0 : 0xfe;
381 }
382
383 // commitEnable = true: msg length = 1, response length = 1, dst == src
384 // commitEnable = false: msg length = 1, response length = 0, dst == null
385 return createSendInst(nullptr, sendDstOpnd, sendSrcOpnd, 1, (commitEnable ? 1 : 0), g4::SIMD8,
386 desc, SFID::DP_DC0, true, SendAccess::READ_WRITE, createImm(BTI, Type_UD), nullptr, InstOpt_WriteEnable, isSendc);
387 }
388
389 // create a default SLM fence (no flush)
createSLMFence()390 G4_INST* IR_Builder::createSLMFence()
391 {
392 bool commitEnable = needsFenceCommitEnable();
393 if (supportsLSC())
394 {
395 return translateLscFence(SFID::SLM, LSC_FENCE_OP_NONE, LSC_SCOPE_GROUP);
396 }
397 return createFenceInstruction(0, commitEnable, false, false);
398 }
399
400
translateVISAWaitInst(G4_Operand * mask)401 int IR_Builder::translateVISAWaitInst(G4_Operand* mask)
402 {
403 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
404
405 // clear TDR if mask is not null and not zero
406 if (mask && !(mask->isImm() && mask->asImm()->getInt() == 0))
407 {
408 // mov (1) f0.0<1>:uw <TDR_bits>:ub {NoMask}
409 G4_Declare* tmpFlagDcl = createTempFlag(1);
410 G4_DstRegRegion* newPredDef = createDstRegRegion(tmpFlagDcl, 1);
411 createMov(g4::SIMD1, newPredDef, mask, InstOpt_WriteEnable, true);
412
413 // (f0.0) and (8) tdr0.0<1>:uw tdr0.0<8;8,1>:uw 0x7FFF:uw {NoMask}
414 G4_Predicate* predOpnd = createPredicate(PredState_Plus, tmpFlagDcl->getRegVar(), 0, PRED_DEFAULT);
415 G4_DstRegRegion* TDROpnd = createDst(phyregpool.getTDRReg(), 0, 0, 1, Type_UW);
416 G4_SrcRegRegion* TDRSrc = createSrc(phyregpool.getTDRReg(), 0, 0, getRegionStride1(), Type_UW);
417 createInst(predOpnd, G4_and, NULL, g4::NOSAT, g4::SIMD8,
418 TDROpnd, TDRSrc, createImm(0x7FFF, Type_UW), InstOpt_WriteEnable, true);
419 }
420
421 createIntrinsicInst(nullptr, Intrinsic::Wait, g4::SIMD1,
422 nullptr, nullptr, nullptr, nullptr, InstOpt_WriteEnable, true);
423
424 return VISA_SUCCESS;
425 }
426
427
generateBarrierSend()428 void IR_Builder::generateBarrierSend()
429 {
430 if (hasUnifiedBarrier())
431 {
432 generateSingleBarrier();
433 return;
434 }
435
436 // 1 message length, 0 response length, no header, no ack
437 int desc = (0x1 << 25) + 0x4;
438
439 //get barrier id
440 G4_Declare *dcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
441
442 G4_SrcRegRegion* r0_src_opnd = createSrc(
443 builtinR0->getRegVar(),
444 0,
445 2,
446 getRegionScalar(),
447 Type_UD);
448
449 G4_DstRegRegion *dst1_opnd = createDstRegRegion(dcl, 1);
450
451 bool enableBarrierInstCounterBits = kernel.getOption(VISA_EnableBarrierInstCounterBits);
452 int mask = getBarrierMask(enableBarrierInstCounterBits);
453
454 G4_Imm *g4Imm = createImm(mask, Type_UD);
455
456 createBinOp(
457 G4_and,
458 g4::SIMD8,
459 dst1_opnd,
460 r0_src_opnd,
461 g4Imm,
462 InstOpt_WriteEnable,
463 true);
464
465 // Generate the barrier send message
466 auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
467 createSendInst(
468 NULL,
469 G4_send,
470 g4::SIMD1,
471 createNullDst(Type_UD),
472 createSrcRegRegion(dcl, getRegionStride1()),
473 createImm(desc, Type_UD),
474 InstOpt_WriteEnable,
475 msgDesc,
476 true);
477 }
478
generateBarrierWait()479 void IR_Builder::generateBarrierWait()
480 {
481 G4_Operand* waitSrc = nullptr;
482 if (!hasUnifiedBarrier()) {
483
484 if (getPlatform() < GENX_TGLLP) {
485 // before Xe: wait n0.0<0;1,0>:ud
486 waitSrc = createSrc(phyregpool.getN0Reg(),
487 0, 0, getRegionScalar(), Type_UD);
488 } else {
489 // Xe: sync.bar null
490 waitSrc = createNullSrc(Type_UD);
491 }
492 }
493 else {
494 if (getPlatform() >= GENX_PVC) {
495 // PVC: sync.bar 0
496 waitSrc = createImm(0, Type_UD);
497 } else {
498 // DG2: sync.bar null
499 waitSrc = createNullSrc(Type_UD);
500 }
501 }
502 createInst(nullptr, G4_wait, nullptr, g4::NOSAT, g4::SIMD1,
503 nullptr, waitSrc, nullptr, InstOpt_WriteEnable, true);
504 }
505
translateVISASyncInst(ISA_Opcode opcode,unsigned int mask)506 int IR_Builder::translateVISASyncInst(ISA_Opcode opcode, unsigned int mask)
507 {
508 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
509
510 switch (opcode)
511 {
512 case ISA_BARRIER:
513 {
514 generateBarrierSend();
515 generateBarrierWait();
516 }
517 break;
518 case ISA_SAMPLR_CACHE_FLUSH:
519 {
520 // msg length = 1, response length = 1, header_present = 1,
521 // Bit 16-12 = 11111 for Sampler Message Type
522 // Bit 18-17 = 11 for SIMD32 mode
523 int desc = (1 << 25) + (1 << 20) + (1 << 19) + (0x3 << 17) + (0x1F << 12);
524
525 G4_Declare *dcl = getBuiltinR0();
526 G4_Declare *dstDcl = createTempVar(8, Type_UD, Any);
527 G4_DstRegRegion* sendDstOpnd = createDstRegRegion(dstDcl, 1);
528 G4_SrcRegRegion* sendMsgOpnd = createSrcRegRegion(dcl, getRegionStride1());
529
530 auto msgDesc = createSyncMsgDesc(SFID::SAMPLER, desc);
531 createSendInst(nullptr, G4_send, g4::SIMD8, sendDstOpnd, sendMsgOpnd,
532 createImm(desc, Type_UD), 0, msgDesc, true);
533
534 G4_SrcRegRegion* moveSrcOpnd = createSrc(dstDcl->getRegVar(), 0, 0, getRegionStride1(), Type_UD);
535 createMovInst(dstDcl, 0, 0, g4::SIMD8, NULL, NULL, moveSrcOpnd);
536 }
537 break;
538 case ISA_WAIT:
539 {
540 //This should be handled by translateVISAWait() now
541 MUST_BE_TRUE(false, "Should not reach here");
542 }
543 break;
544 case ISA_YIELD:
545 {
546 G4_INST* lastInst = instList.empty() ? nullptr : instList.back();
547 if (lastInst && lastInst->opcode() != G4_label)
548 {
549 lastInst->setOptionOn(InstOpt_Switch);
550 }
551 else
552 {
553 // dummy move to apply the {switch}
554 G4_SrcRegRegion* srcOpnd = createSrc(getBuiltinR0()->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
555 G4_DstRegRegion* dstOpnd = createDst(getBuiltinR0()->getRegVar(), 0, 0, 1, Type_UD);
556
557 G4_INST* nop = createMov(g4::SIMD1, dstOpnd, srcOpnd, InstOpt_NoOpt, true);
558 nop->setOptionOn(InstOpt_Switch);
559 }
560 }
561 break;
562 case ISA_FENCE:
563 {
564 #define GLOBAL_MASK 0x20
565 union fenceParam
566 {
567 VISAFenceMask mask;
568 uint8_t data;
569 };
570
571 fenceParam fenceMask;
572 fenceMask.data = mask & 0xFF;
573 bool globalFence = (mask & GLOBAL_MASK) == 0;
574
575 if (fenceMask.mask.SWFence)
576 {
577 createIntrinsicInst(
578 nullptr, Intrinsic::MemFence, g4::SIMD1,
579 nullptr, nullptr, nullptr, nullptr, InstOpt_NoOpt, true);
580 }
581 else if (VISA_WA_CHECK(m_pWaTable, WADisableWriteCommitForPageFault))
582 {
583 // write commit does not work under page fault
584 // so we generate a fence without commit, followed by a read surface info to BTI 0
585 createFenceInstruction((uint8_t) mask & 0xFF, false, globalFence);
586 G4_Imm* surface = createImm(0, Type_UD);
587 G4_Declare* zeroLOD = createTempVar(8, Type_UD, Any);
588 createMovInst(zeroLOD, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD));
589 G4_SrcRegRegion* sendSrc = createSrcRegRegion(zeroLOD, getRegionStride1());
590 G4_DstRegRegion* sendDst = createDstRegRegion(zeroLOD, 1);
591 ChannelMask maskR = ChannelMask::createFromAPI(CHANNEL_MASK_R);
592 translateVISAResInfoInst(EXEC_SIZE_8, vISA_EMASK_M1, maskR, surface, sendSrc, sendDst);
593 }
594 else if (supportsLSC())
595 {
596 // translate legacy fence into the LSC fence
597 // for local fence we translate into a SLM fence with TG scope
598 // for global fence we translate into a untyped and typed fence with GPU scope
599 // ToDo: may need a global flag to let user control the fence scope
600 if (globalFence)
601 {
602 auto fenceControl = supportsSampler() ? LSC_FENCE_OP_EVICT : LSC_FENCE_OP_NONE;
603 if (fenceMask.mask.flushRWCache)
604 {
605 fenceControl = LSC_FENCE_OP_FLUSHL3;
606 }
607 translateLscFence(SFID::UGM, fenceControl, LSC_SCOPE_GPU);
608 translateLscFence(SFID::TGM, fenceControl, LSC_SCOPE_GPU);
609 }
610 else
611 {
612 translateLscFence(SFID::SLM, LSC_FENCE_OP_NONE, LSC_SCOPE_GROUP);
613 }
614 }
615 else
616 {
617 createFenceInstruction((uint8_t) mask & 0xFF, (mask & 0x1) == 0x1, globalFence);
618 // The move to ensure the fence is actually complete will be added at the end of compilation,
619 // in Optimizer::HWWorkaround()
620 }
621 break;
622 }
623 default:
624 return VISA_FAILURE;
625 }
626
627 return VISA_SUCCESS;
628 }
629
translateVISASplitBarrierInst(bool isSignal)630 int IR_Builder::translateVISASplitBarrierInst(bool isSignal)
631 {
632 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
633
634 if (isSignal)
635 {
636 generateBarrierSend();
637 }
638 else
639 {
640 generateBarrierWait();
641 }
642
643 return VISA_SUCCESS;
644 }
645