1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "BuildIR.h"
10 #include "../Timer.h"
11 
12 using namespace vISA;
13 
14 
translateLscFence(SFID sfid,LSC_FENCE_OP fenceOp,LSC_SCOPE scope,int & status)15 G4_INST* IR_Builder::translateLscFence(
16     SFID                    sfid,
17     LSC_FENCE_OP            fenceOp,
18     LSC_SCOPE               scope,
19     int                    &status)
20 {
21     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
22 
23     status = VISA_SUCCESS;
24     auto check =
25         [&] (bool z, const char *what) {
26         if (!z) {
27             MUST_BE_TRUE(false, what);
28             status = VISA_FAILURE;
29         }
30     };
31 
32     // NOTE: fence requires 1 register sent and 1 returned for some foolish
33     // reason (synchronization requires it), so we must create dummy registers.
34     // I'd prefer to use the same register, but vISA blows up
35     // if we dare use the same dst as src (old? hardware restriction?),
36     // so we'll splurge and use two.
37     const RegionDesc *rd = getRegionStride1();
38 
39     // G4_Declare *src0DummyRegDecl = createSendPayloadDcl(getGRFSize()/4, Type_UD);
40     G4_Declare *src0DummyRegDecl = getBuiltinR0();
41     G4_SrcRegRegion *src0Dummy = createSrc(
42         src0DummyRegDecl->getRegVar(),
43         0, 0, rd, Type_UD);
44     //
45     // I don't think vISA permits same dst as src0
46     // G4_Declare *dstDummyRegDecl = getBuiltinR0();
47     G4_DstRegRegion* dstDummy = nullptr;
48     if (!hasFenceControl())
49     {
50         G4_Declare* dstDummyRegDecl = createSendPayloadDcl(getGRFSize() / 4, Type_UD);
51         dstDummy = createDstRegRegion(dstDummyRegDecl, 1);
52     }
53     else
54     {
55         dstDummy = createNullDst(Type_UD);
56     }
57 
58     G4_SrcRegRegion *src1NullReg = createNullSrc(Type_UD);
59     //
60     const int src1Len = 0; // no data needed in src1
61 
62     const G4_ExecSize execSize = g4::SIMD1;
63     const G4_InstOpts instOpt = Get_Gen4_Emask(vISA_EMASK_M1_NM, execSize);
64 
65     ///////////////////////////////////////////////////////////////////////////
66     uint32_t desc = 0, exDesc = 0;
67     // fence requires 1 non-null register sent and 1 non-null received,
68     // but the contents are undefined
69     const uint32_t LSC_FENCE_OPCODE = 0x1F;
70     desc |= LSC_FENCE_OPCODE; // LSC_FENCE
71     desc |= 1 << 25;
72     desc |= (hasFenceControl() ? 0 : 1) << 20;
73     //
74     switch (fenceOp) {
75     case LSC_FENCE_OP_NONE:        desc |= 0 << 12; break;
76     case LSC_FENCE_OP_EVICT:       desc |= 1 << 12; break;
77     case LSC_FENCE_OP_INVALIDATE:  desc |= 2 << 12; break;
78     case LSC_FENCE_OP_DISCARD:     desc |= 3 << 12; break;
79     case LSC_FENCE_OP_CLEAN:       desc |= 4 << 12; break;
80     case LSC_FENCE_OP_FLUSHL3:     desc |= 5 << 12; break;
81     case LSC_FENCE_OP_TYPE6:       desc |= 6 << 12; break;
82     default: check(false, "invalid fence op");
83     }
84     switch (scope) {
85     case LSC_SCOPE_GROUP:   desc |= 0 << 9; break;
86     case LSC_SCOPE_LOCAL:   desc |= 1 << 9; break;
87     case LSC_SCOPE_TILE:    desc |= 2 << 9; break;
88     case LSC_SCOPE_GPU:     desc |= 3 << 9; break;
89     case LSC_SCOPE_GPUS:    desc |= 4 << 9; break;
90     case LSC_SCOPE_SYSREL:  desc |= 5 << 9; break;
91     case LSC_SCOPE_SYSACQ:  desc |= 6 << 9; break;
92     default: check(false, "invalid fence scope");
93     }
94 
95     if (sfid == SFID::UGM)
96     {
97         // special token telling EU to route the UGM fence to LSC even in
98         // backup mode.  Without bit 18 set, the default behavior is for
99         // the UGM fence to be rerouted to HDC when the backup mode chicken
100         // bit is set.
101         desc |= getOption(vISA_LSCBackupMode) << 18;
102     }
103 
104     (void) lscEncodeAddrSize(LSC_ADDR_SIZE_32b, desc, status);
105     G4_SendDescRaw *msgDesc = createSendMsgDesc(
106         sfid,
107         desc,
108         exDesc,
109         src1Len,
110         SendAccess::READ_WRITE,
111         nullptr);
112     G4_InstSend *fenceInst = createLscSendInst(
113         nullptr,
114         dstDummy,
115         src0Dummy,
116         src1NullReg,
117         execSize,
118         msgDesc,
119         instOpt,
120         LSC_ADDR_TYPE_FLAT,
121         true);
122     (void)fenceInst;
123 
124     return fenceInst;
125 }
126 
generateNamedBarrier(int numProducer,int numConsumer,NamedBarrierType type,G4_Operand * barrierId)127 void IR_Builder::generateNamedBarrier(
128     int numProducer, int numConsumer,
129     NamedBarrierType type, G4_Operand* barrierId)
130 {
131     struct NamedBarrierPayload
132     {
133         uint32_t id : 8;
134         uint32_t fence : 4;
135         uint32_t padding : 2;
136         uint32_t type : 2;
137         uint32_t consumer : 8;
138         uint32_t producer: 8;
139     };
140 
141     union
142     {
143         NamedBarrierPayload payload;
144         uint32_t data;
145     } payload;
146 
147     payload.data = 0;
148     payload.payload.consumer = numConsumer;
149     payload.payload.producer = numProducer;
150 
151     auto getVal = [](NamedBarrierType type)
152     {
153         switch (type)
154         {
155         case NamedBarrierType::BOTH:
156             return 0;
157         case NamedBarrierType::PRODUCER:
158             return 1;
159         case NamedBarrierType::CONSUMER:
160             return 2;
161         default:
162             assert(false && "unrecognized NM barreir type");
163             return -1;
164         }
165     };
166     payload.payload.type = getVal(type);
167 
168     G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
169     if (barrierId->isImm())
170     {
171         payload.payload.id = (uint8_t)barrierId->asImm()->getInt();
172         auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
173         auto src = createImm(payload.data, Type_UD);
174         createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
175     }
176     else
177     {
178         // barrier id should be a srcRegion with int type
179         // and (1) Hdr.2:ud barrierId 0xFF
180         // or (1) Hdr.2:ud Hdr.2 payload.data
181         assert(barrierId->isSrcRegRegion() && IS_INT(barrierId->getType()) && "expect barrier id to be int");
182         auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
183         auto src1 = createImm(0xFF, Type_UD);
184         createBinOp(G4_and, g4::SIMD1, dst, barrierId, src1, InstOpt_WriteEnable, true);
185         dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
186         auto orSrc0 = createSrc(header->getRegVar(), 0, 2,
187             getRegionScalar(), Type_UD);
188         auto orSrc1 = createImm(payload.data, Type_UD);
189         createBinOp(G4_or, g4::SIMD1, dst, orSrc0, orSrc1, InstOpt_WriteEnable, true);
190     }
191 
192     // 1 message length, 0 response length, no header, no ack
193     int desc = (0x1 << 25) + 0x4;
194 
195     auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
196     createSendInst(
197         nullptr,
198         G4_send,
199         g4::SIMD1,
200         createNullDst(Type_UD),
201         createSrcRegRegion(header, getRegionStride1()),
202         createImm(desc, Type_UD),
203         InstOpt_WriteEnable,
204         msgDesc,
205         true);
206 }
207 
generateNamedBarrier(G4_Operand * barrierId,G4_SrcRegRegion * threadCount)208 void IR_Builder::generateNamedBarrier(G4_Operand* barrierId, G4_SrcRegRegion* threadCount)
209 {
210     G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
211 
212     // mov (1) Hdr.2<1>:ud 0x0
213     // mov (2) Hdr.10<1>:ub threadcount:ub
214     // mov (1) Hdr.8<1>:ub barrierId:ub
215     auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
216     auto src = createImm(0, Type_UD);
217     createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
218     dst = createDst(header->getRegVar(), 0, 10, 1, Type_UB);
219     createMov(g4::SIMD2, dst, threadCount, InstOpt_WriteEnable, true);
220     dst = createDst(header->getRegVar(), 0, 8, 1, Type_UB);
221     createMov(g4::SIMD1, dst, barrierId, InstOpt_WriteEnable, true);
222 
223     // 1 message length, 0 response length, no header, no ack
224     int desc = (0x1 << 25) + 0x4;
225 
226     auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
227     createSendInst(
228         nullptr,
229         G4_send,
230         g4::SIMD1,
231         createNullDst(Type_UD),
232         createSrcRegRegion(header, getRegionStride1()),
233         createImm(desc, Type_UD),
234         InstOpt_WriteEnable,
235         msgDesc,
236         true);
237 }
238 
generateSingleBarrier()239 void IR_Builder::generateSingleBarrier()
240 {
241     // single barrier: # producer = # consumer = # threads, barrier id = 0
242     // For now produce no fence
243     // Number of threads per threadgroup is r0.2[31:24]
244     //   mov (1) Hdr.2<1>:ud 0x0
245     //   mov (2) Hdr.10<1>:ub R0.11<0;1,0>:ub
246     // This SIMD2 byte move is broadcasting the thread group size
247     // from the r0 header into both the producer and consumer slots.
248     //   Hdr.2:d[31:24,23:16]
249     G4_Declare* header = createTempVar(8, Type_UD, GRFALIGN);
250     auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD);
251     auto src = createImm(0, Type_UD);
252     createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true);
253     dst = createDst(header->getRegVar(), 0 , 10, 1, Type_UB);
254     auto src0 = createSrc(getBuiltinR0()->getRegVar(), 0, 11,
255         getRegionScalar(), Type_UB);
256     createMov(g4::SIMD2, dst, src0, InstOpt_WriteEnable, true);
257     // 1 message length, 0 response length, no header, no ack
258     int desc = (0x1 << 25) + 0x4;
259 
260     auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
261     createSendInst(
262         nullptr,
263         G4_send,
264         g4::SIMD1,
265         createNullDst(Type_UD),
266         createSrcRegRegion(header, getRegionStride1()),
267         createImm(desc, Type_UD),
268         InstOpt_WriteEnable,
269         msgDesc,
270         true);
271 }
272 
checkNamedBarrierSrc(G4_Operand * src,bool isBarrierId)273 static void checkNamedBarrierSrc(G4_Operand* src, bool isBarrierId)
274 {
275     if (src->isImm())
276     {
277         if (isBarrierId)
278         {
279             uint32_t val = (uint32_t)src->asImm()->getInt();
280             assert(val < 32 && "illegal named barrier id");
281         }
282     }
283     else if (src->isSrcRegRegion())
284     {
285         assert(src->asSrcRegRegion()->isScalar() && "barrier id should have scalar region");
286         assert(IS_BTYPE(src->getType()) && "illegal barrier opperand type");
287     }
288     else
289     {
290         assert(false && "illegal barrier id operand");
291     }
292 }
293 
translateVISANamedBarrierWait(G4_Operand * barrierId)294 int IR_Builder::translateVISANamedBarrierWait(G4_Operand* barrierId)
295 {
296     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
297 
298     checkNamedBarrierSrc(barrierId, true);
299 
300     G4_Operand* barSrc = barrierId;
301     if (barrierId->isSrcRegRegion()) {
302         // sync can take only flag src
303         G4_Declare* flagDecl = createTempFlag(1);
304         createMov(g4::SIMD1, createDstRegRegion(flagDecl, 1), barrierId,
305             InstOpt_WriteEnable, true);
306         barSrc = createSrcRegRegion(flagDecl, getRegionScalar());
307     }
308     // wait barrierId
309     createInst(nullptr, G4_wait, nullptr, g4::NOSAT, g4::SIMD1, nullptr, barSrc, nullptr,
310         InstOpt_WriteEnable, true);
311 
312     return VISA_SUCCESS;
313 }
314 
translateVISANamedBarrierSignal(G4_Operand * barrierId,G4_Operand * threadCount)315 int IR_Builder::translateVISANamedBarrierSignal(G4_Operand* barrierId, G4_Operand* threadCount)
316 {
317     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
318 
319     checkNamedBarrierSrc(barrierId, true);
320     checkNamedBarrierSrc(threadCount, false);
321 
322     if (threadCount->isImm())
323     {
324         int numThreads = (int)threadCount->asImm()->getInt();
325         generateNamedBarrier(numThreads, numThreads, NamedBarrierType::BOTH, barrierId);
326     }
327     else
328     {
329         generateNamedBarrier(barrierId, threadCount->asSrcRegRegion());
330     }
331 
332     return VISA_SUCCESS;
333 }
334 
335 
336 // create a fence instruction to the data cache
337 // flushParam --
338 //              bit 0 -- commit enable
339 //              bit 1-4 -- L3 flush parameters
340 //              bit 5 -- global/SLM
341 //              bit 6 -- L1 flush
342 //              bit 7 -- SW fence only; a scheduling barrier but does not generate any code
343 // bit 7, if set, takes precedence over other bits
createFenceInstruction(uint8_t flushParam,bool commitEnable,bool globalMemFence,bool isSendc=false)344 G4_INST* IR_Builder::createFenceInstruction(
345     uint8_t flushParam, bool commitEnable, bool globalMemFence,
346     bool isSendc = false)
347 {
348 #define L1_FLUSH_MASK 0x40
349 
350     int flushBits = (flushParam >> 1) & 0xF;
351     assert(!supportsLSC() && "LSC fence should be handled elsewhere");
352     if (noL3Flush())
353     {
354         // L3 flush is no longer required for image memory
355         flushBits = 0;
356     }
357 
358     bool L1Flush = (flushParam & L1_FLUSH_MASK) != 0 &&
359         !(hasSLMFence() && !globalMemFence);
360 
361     int desc = 0x7 << 14 | ((commitEnable ? 1 : 0) << 13);
362 
363     desc |= flushBits << 9;
364 
365     if (L1Flush)
366     {
367 #define L1_FLUSH_BIT_LOC 8
368         desc |= 1 << L1_FLUSH_BIT_LOC;
369     }
370 
371     G4_Declare *srcDcl = getBuiltinR0();
372     G4_Declare *dstDcl = createTempVar(8, Type_UD, Any);
373     G4_DstRegRegion *sendDstOpnd = commitEnable ? createDstRegRegion(dstDcl, 1) : createNullDst(Type_UD);
374     G4_SrcRegRegion *sendSrcOpnd = createSrcRegRegion(srcDcl, getRegionStride1());
375     uint8_t BTI = 0x0;
376 
377     if (hasSLMFence())
378     {
379         // we must choose either GLOBAL_MEM_FENCE or SLM_FENCE
380         BTI = globalMemFence ? 0 : 0xfe;
381     }
382 
383     // commitEnable = true: msg length = 1, response length = 1, dst == src
384     // commitEnable = false: msg length = 1, response length = 0, dst == null
385     return createSendInst(nullptr, sendDstOpnd, sendSrcOpnd, 1, (commitEnable ? 1 : 0), g4::SIMD8,
386         desc, SFID::DP_DC0, true, SendAccess::READ_WRITE, createImm(BTI, Type_UD), nullptr, InstOpt_WriteEnable, isSendc);
387 }
388 
389 // create a default SLM fence (no flush)
createSLMFence()390 G4_INST* IR_Builder::createSLMFence()
391 {
392     bool commitEnable = needsFenceCommitEnable();
393     if (supportsLSC())
394     {
395         return translateLscFence(SFID::SLM, LSC_FENCE_OP_NONE, LSC_SCOPE_GROUP);
396     }
397     return createFenceInstruction(0, commitEnable, false, false);
398 }
399 
400 
translateVISAWaitInst(G4_Operand * mask)401 int IR_Builder::translateVISAWaitInst(G4_Operand* mask)
402 {
403     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
404 
405     // clear TDR if mask is not null and not zero
406     if (mask && !(mask->isImm() && mask->asImm()->getInt() == 0))
407     {
408         // mov (1) f0.0<1>:uw <TDR_bits>:ub {NoMask}
409         G4_Declare* tmpFlagDcl = createTempFlag(1);
410         G4_DstRegRegion* newPredDef = createDstRegRegion(tmpFlagDcl, 1);
411         createMov(g4::SIMD1, newPredDef, mask, InstOpt_WriteEnable, true);
412 
413         // (f0.0) and (8) tdr0.0<1>:uw tdr0.0<8;8,1>:uw 0x7FFF:uw {NoMask}
414         G4_Predicate* predOpnd = createPredicate(PredState_Plus, tmpFlagDcl->getRegVar(), 0, PRED_DEFAULT);
415         G4_DstRegRegion* TDROpnd = createDst(phyregpool.getTDRReg(), 0, 0, 1, Type_UW);
416         G4_SrcRegRegion* TDRSrc = createSrc(phyregpool.getTDRReg(), 0, 0, getRegionStride1(), Type_UW);
417         createInst(predOpnd, G4_and, NULL, g4::NOSAT, g4::SIMD8,
418             TDROpnd, TDRSrc, createImm(0x7FFF, Type_UW), InstOpt_WriteEnable, true);
419     }
420 
421     createIntrinsicInst(nullptr, Intrinsic::Wait, g4::SIMD1,
422         nullptr, nullptr, nullptr, nullptr, InstOpt_WriteEnable, true);
423 
424     return VISA_SUCCESS;
425 }
426 
427 
generateBarrierSend()428 void IR_Builder::generateBarrierSend()
429 {
430     if (hasUnifiedBarrier())
431     {
432         generateSingleBarrier();
433         return;
434     }
435 
436     // 1 message length, 0 response length, no header, no ack
437     int desc = (0x1 << 25) + 0x4;
438 
439     //get barrier id
440     G4_Declare *dcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
441 
442     G4_SrcRegRegion* r0_src_opnd = createSrc(
443         builtinR0->getRegVar(),
444         0,
445         2,
446         getRegionScalar(),
447         Type_UD);
448 
449     G4_DstRegRegion *dst1_opnd = createDstRegRegion(dcl, 1);
450 
451     bool enableBarrierInstCounterBits = kernel.getOption(VISA_EnableBarrierInstCounterBits);
452     int mask = getBarrierMask(enableBarrierInstCounterBits);
453 
454     G4_Imm *g4Imm = createImm(mask, Type_UD);
455 
456     createBinOp(
457         G4_and,
458         g4::SIMD8,
459         dst1_opnd,
460         r0_src_opnd,
461         g4Imm,
462         InstOpt_WriteEnable,
463         true);
464 
465     // Generate the barrier send message
466     auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc);
467     createSendInst(
468         NULL,
469         G4_send,
470         g4::SIMD1,
471         createNullDst(Type_UD),
472         createSrcRegRegion(dcl, getRegionStride1()),
473         createImm(desc, Type_UD),
474         InstOpt_WriteEnable,
475         msgDesc,
476         true);
477 }
478 
generateBarrierWait()479 void IR_Builder::generateBarrierWait()
480 {
481     G4_Operand* waitSrc = nullptr;
482     if (!hasUnifiedBarrier()) {
483 
484         if (getPlatform() < GENX_TGLLP) {
485             // before Xe: wait n0.0<0;1,0>:ud
486             waitSrc = createSrc(phyregpool.getN0Reg(),
487                 0, 0, getRegionScalar(), Type_UD);
488         } else {
489             // Xe: sync.bar null
490             waitSrc = createNullSrc(Type_UD);
491         }
492     }
493     else {
494         if (getPlatform() >= GENX_PVC) {
495             // PVC: sync.bar 0
496             waitSrc = createImm(0, Type_UD);
497         } else {
498             // DG2: sync.bar null
499             waitSrc = createNullSrc(Type_UD);
500         }
501     }
502     createInst(nullptr, G4_wait, nullptr, g4::NOSAT, g4::SIMD1,
503         nullptr, waitSrc, nullptr, InstOpt_WriteEnable, true);
504 }
505 
translateVISASyncInst(ISA_Opcode opcode,unsigned int mask)506 int IR_Builder::translateVISASyncInst(ISA_Opcode opcode, unsigned int mask)
507 {
508     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
509 
510     switch (opcode)
511     {
512     case ISA_BARRIER:
513     {
514         generateBarrierSend();
515         generateBarrierWait();
516     }
517     break;
518     case ISA_SAMPLR_CACHE_FLUSH:
519     {
520         // msg length = 1, response length = 1, header_present = 1,
521         // Bit 16-12 = 11111 for Sampler Message Type
522         // Bit 18-17 = 11 for SIMD32 mode
523         int desc = (1 << 25) + (1 << 20) + (1 << 19) + (0x3 << 17) + (0x1F << 12);
524 
525         G4_Declare *dcl = getBuiltinR0();
526         G4_Declare *dstDcl = createTempVar(8, Type_UD, Any);
527         G4_DstRegRegion* sendDstOpnd = createDstRegRegion(dstDcl, 1);
528         G4_SrcRegRegion* sendMsgOpnd = createSrcRegRegion(dcl, getRegionStride1());
529 
530         auto msgDesc = createSyncMsgDesc(SFID::SAMPLER, desc);
531         createSendInst(nullptr, G4_send, g4::SIMD8, sendDstOpnd, sendMsgOpnd,
532             createImm(desc, Type_UD), 0, msgDesc, true);
533 
534         G4_SrcRegRegion* moveSrcOpnd = createSrc(dstDcl->getRegVar(), 0, 0, getRegionStride1(), Type_UD);
535         createMovInst(dstDcl, 0, 0, g4::SIMD8, NULL, NULL, moveSrcOpnd);
536     }
537     break;
538     case ISA_WAIT:
539     {
540         //This should be handled by translateVISAWait() now
541         MUST_BE_TRUE(false, "Should not reach here");
542     }
543     break;
544     case ISA_YIELD:
545     {
546         G4_INST* lastInst = instList.empty() ? nullptr : instList.back();
547         if (lastInst && lastInst->opcode() != G4_label)
548         {
549             lastInst->setOptionOn(InstOpt_Switch);
550         }
551         else
552         {
553             // dummy move to apply the {switch}
554             G4_SrcRegRegion* srcOpnd = createSrc(getBuiltinR0()->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
555             G4_DstRegRegion* dstOpnd = createDst(getBuiltinR0()->getRegVar(), 0, 0, 1, Type_UD);
556 
557             G4_INST* nop = createMov(g4::SIMD1, dstOpnd, srcOpnd, InstOpt_NoOpt, true);
558             nop->setOptionOn(InstOpt_Switch);
559         }
560     }
561     break;
562     case ISA_FENCE:
563     {
564 #define GLOBAL_MASK 0x20
565         union fenceParam
566         {
567             VISAFenceMask mask;
568             uint8_t data;
569         };
570 
571         fenceParam fenceMask;
572         fenceMask.data = mask & 0xFF;
573         bool globalFence = (mask & GLOBAL_MASK) == 0;
574 
575         if (fenceMask.mask.SWFence)
576         {
577             createIntrinsicInst(
578                 nullptr, Intrinsic::MemFence, g4::SIMD1,
579                 nullptr, nullptr, nullptr, nullptr, InstOpt_NoOpt, true);
580         }
581         else if (VISA_WA_CHECK(m_pWaTable, WADisableWriteCommitForPageFault))
582         {
583             // write commit does not work under page fault
584             // so we generate a fence without commit, followed by a read surface info to BTI 0
585             createFenceInstruction((uint8_t) mask & 0xFF, false, globalFence);
586             G4_Imm* surface = createImm(0, Type_UD);
587             G4_Declare* zeroLOD = createTempVar(8, Type_UD, Any);
588             createMovInst(zeroLOD, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD));
589             G4_SrcRegRegion* sendSrc = createSrcRegRegion(zeroLOD, getRegionStride1());
590             G4_DstRegRegion* sendDst = createDstRegRegion(zeroLOD, 1);
591             ChannelMask maskR = ChannelMask::createFromAPI(CHANNEL_MASK_R);
592             translateVISAResInfoInst(EXEC_SIZE_8, vISA_EMASK_M1, maskR, surface, sendSrc, sendDst);
593         }
594         else if (supportsLSC())
595         {
596             // translate legacy fence into the LSC fence
597             // for local fence we translate into a SLM fence with TG scope
598             // for global fence we translate into a untyped and typed fence with GPU scope
599             // ToDo: may need a global flag to let user control the fence scope
600             if (globalFence)
601             {
602                 auto fenceControl = supportsSampler() ? LSC_FENCE_OP_EVICT : LSC_FENCE_OP_NONE;
603                 if (fenceMask.mask.flushRWCache)
604                 {
605                     fenceControl = LSC_FENCE_OP_FLUSHL3;
606                 }
607                 translateLscFence(SFID::UGM, fenceControl, LSC_SCOPE_GPU);
608                 translateLscFence(SFID::TGM, fenceControl, LSC_SCOPE_GPU);
609             }
610             else
611             {
612                 translateLscFence(SFID::SLM, LSC_FENCE_OP_NONE, LSC_SCOPE_GROUP);
613             }
614         }
615         else
616         {
617             createFenceInstruction((uint8_t) mask & 0xFF, (mask & 0x1) == 0x1, globalFence);
618             // The move to ensure the fence is actually complete will be added at the end of compilation,
619             // in Optimizer::HWWorkaround()
620         }
621         break;
622     }
623     default:
624         return VISA_FAILURE;
625     }
626 
627     return VISA_SUCCESS;
628 }
629 
translateVISASplitBarrierInst(bool isSignal)630 int IR_Builder::translateVISASplitBarrierInst(bool isSignal)
631 {
632     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
633 
634     if (isSignal)
635     {
636         generateBarrierSend();
637     }
638     else
639     {
640         generateBarrierWait();
641     }
642 
643     return VISA_SUCCESS;
644 }
645