1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "BuildIR.h"
10 #include "../Timer.h"
11 
12 #include <cmath>
13 
14 using namespace vISA;
15 
16 #define SET_DATAPORT_MESSAGE_TYPE(dest, value)\
17     dest |= value << 14;
18 
19 #define MESSAGE_SPECIFIC_CONTROL 8
20 
21 
getObjWidth(unsigned blockWidth,unsigned blockHeight,G4_Declare * dcl)22 static unsigned int getObjWidth(
23     unsigned blockWidth, unsigned blockHeight, G4_Declare * dcl)
24 {
25     // makes sure io_width is divisible by 4
26     unsigned ioWidth = (blockWidth + TypeSize(Type_D) - 1) & (~(TypeSize(Type_D) - 1));
27     // gets next power of 2 size
28     return Round_Up_Pow2(ioWidth / dcl->getElemSize()) * dcl->getElemSize();
29 }
30 
31 
32 /*
33 * Translates Media Block read CISA inst.
34 *
35 * read(I, X, Y, matrix<int,C,R> M)
36 * Assume C = R = 8 then code shoud look like
37 *
38 * .declare  VX Base=m ElementSize=4 Type=ud Total=8
39 * .declare  VY Base=r ElementSize=4 Type=ud Total=8
40 *
41 * mov  (8)     VX(0,0)<1>,  r0.0:ud
42 * mov  (1)     VX(0,2)<1>,  0x0007001f   // 8 rows, 32 bytes
43 * mov  (1)     VX(0,1)<1>,  Y
44 * mov  (1)     VX(0,0)<1>,  X
45 * send (8)     VY(0,0)<1>,  VX(0,0),    null,  0x04186000
46 * mov  (8)     M(0,0)<1>,   VY(0,0)
47 *
48 * 0x0007001f == (R-1)<<16 + C * sizeof(el_type) - 1;
49 *
50 * 0x04186000 ==
51 *  (((ObjectSize - 1) / numEltPerGRF<Type_UB>() + 1)) << 16 +
52 *          0x4100000 + 0x6000 + I;
53 *
54 * ObjectSize = RoundUpPow2(C) * R * sizeof(el_type);
55 */
translateVISAMediaLoadInst(MEDIA_LD_mod mod,G4_Operand * surface,unsigned planeID,unsigned blockWidth,unsigned blockHeight,G4_Operand * xOffOpnd,G4_Operand * yOffOpnd,G4_DstRegRegion * dstOpnd)56 int IR_Builder::translateVISAMediaLoadInst(
57     MEDIA_LD_mod mod,
58     G4_Operand* surface,
59     unsigned planeID,
60     unsigned blockWidth,
61     unsigned blockHeight,
62     G4_Operand* xOffOpnd,
63     G4_Operand* yOffOpnd,
64     G4_DstRegRegion* dstOpnd)
65 {
66     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
67 
68     unsigned temp;
69 
70     unsigned objWidth = 0;
71     if (blockWidth != 0)
72     {
73         objWidth = getObjWidth(blockWidth, blockHeight, dstOpnd->getBase()->asRegVar()->getDeclare());
74     }
75     unsigned obj_size = objWidth * blockHeight;
76 
77     /* mov (8)      VX(0,0)<1>,  r0:ud  */
78     // add dcl for VX
79     G4_Declare *dcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
80 
81     // create MOV inst
82     createMovR0Inst(dcl, 0, 0, true);
83     /* mov (1)      VX(0,2)<1>,    CONST[R,C]  */
84     temp = (blockHeight - 1) << 16 | (blockWidth - 1);
85     createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
86     /* mov (1)     VX(0,0)<1>,    X  */
87     createMovInst(dcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
88     /* mov (1)     VX(0,1)<1>,   Y  */
89     createMovInst(dcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
90 
91     // send's operands preparation
92     // create a currDst for VX
93     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
94 
95     // mediaread overwrites entire GRF
96     bool via_temp = false;
97     G4_Operand *original_dst = NULL;
98     G4_Declare *new_dcl = NULL;
99 
100     if (obj_size < numEltPerGRF<Type_UB>())
101     {
102         via_temp = true;
103     }
104     else
105     {
106         unsigned byte_subregoff =
107             dstOpnd->asDstRegRegion()->getSubRegOff() * dstOpnd->getTypeSize();
108         G4_VarBase *base = dstOpnd->asDstRegRegion()->getBase();
109         G4_Declare *dcl = base->asRegVar()->getDeclare();
110 
111         if (byte_subregoff  % numEltPerGRF<Type_UB>() != 0)
112         {
113             via_temp = true;
114         }
115         else
116         {
117             G4_Declare *aliasdcl = dcl;
118             bool false_alias_align = false;
119             while (aliasdcl->getAliasDeclare()) {
120                 if (aliasdcl->getAliasOffset() % numEltPerGRF<Type_UB>() != 0) {
121                     false_alias_align = true;
122                     break;
123                 }
124                 aliasdcl = aliasdcl->getAliasDeclare();
125             }
126             if (false_alias_align) {
127                 via_temp = true;
128             }
129         }
130     }
131 
132     if (via_temp == true)
133     {
134         original_dst = dstOpnd;
135         new_dcl = createTempVar(numEltPerGRF<Type_UB>()/TypeSize(Type_UD),
136             Type_UD, GRFALIGN);
137         G4_DstRegRegion* tmp_dst_opnd = createDst(
138             new_dcl->getRegVar(),
139             0,
140             0,
141             1,
142             Type_UD);
143 
144         dstOpnd = tmp_dst_opnd;
145     }
146 
147     G4_DstRegRegion* d = checkSendDst(dstOpnd->asDstRegRegion());
148 
149     temp = 0;
150     if ((mod == MEDIA_LD_top) || (mod == MEDIA_LD_top_mod)) {
151         temp += 0x6 << MESSAGE_SPECIFIC_CONTROL;    // Read top fields
152     } else if ((mod == MEDIA_LD_bottom) || (mod == MEDIA_LD_bottom_mod)) {
153         temp += 0x7 << MESSAGE_SPECIFIC_CONTROL;    // Read bottom fields
154     }
155 
156     SET_DATAPORT_MESSAGE_TYPE(temp, DC1_MEDIA_BLOCK_READ)
157 
158     temp += planeID;
159 
160     G4_ExecSize send_exec_size(GENX_DATAPORT_IO_SZ);
161     if (IS_WTYPE(d->getType()))
162     {
163         send_exec_size *= 2;
164     }
165 
166     createSendInst(
167         NULL,
168         d,
169         payload,
170         1,
171         (obj_size - 1) / numEltPerGRF<Type_UB>() + 1,
172         send_exec_size,
173         temp,
174         SFID::DP_DC1,
175         1,
176         SendAccess::READ_ONLY,
177         surface,
178         NULL,
179         InstOpt_WriteEnable,
180         false);
181 
182     if (via_temp)
183     {
184         G4_Declare *new_dcl2 = createTempVar(
185             numEltPerGRF<Type_UB>()/original_dst->getTypeSize(),
186             original_dst->getType(), GRFALIGN);
187 
188         new_dcl2->setAliasDeclare(new_dcl, 0);
189 
190         unsigned short remained_ele = obj_size / original_dst->getTypeSize();
191         // max execution size is 32
192         G4_ExecSize curr_exec_size = G4_ExecSize(getNativeExecSize() * 2);
193         unsigned char curr_offset = 0;
194 
195         G4_Type dstType = original_dst->getType();
196         while (remained_ele >= 1)
197         {
198             short dst_regoff = original_dst->asDstRegRegion()->getRegOff();
199             short dst_subregoff = original_dst->asDstRegRegion()->getSubRegOff();
200             if (remained_ele >= curr_exec_size)
201             {
202                 G4_SrcRegRegion *tmp_src_opnd = createSrc(
203                     new_dcl2->getRegVar(),
204                     0,
205                     curr_offset,
206                     curr_exec_size == g4::SIMD1 ? getRegionScalar() : getRegionStride1(),
207                     original_dst->getType());
208 
209                 dst_subregoff += curr_offset;
210                 short ele_per_grf = numEltPerGRF<Type_UB>()/TypeSize(dstType);
211                 if (dst_subregoff >= ele_per_grf)
212                 {
213                     dst_regoff += 1;
214                     dst_subregoff -= ele_per_grf;
215                 }
216                 G4_DstRegRegion* tmp_dst_opnd = createDst(
217                     original_dst->asDstRegRegion()->getBase(),
218                     dst_regoff,
219                     dst_subregoff,
220                     1,
221                     original_dst->getType());
222 
223                 createMov(curr_exec_size, tmp_dst_opnd, tmp_src_opnd, InstOpt_WriteEnable, true);
224                 curr_offset += curr_exec_size;
225                 remained_ele -= curr_exec_size;
226             }
227             curr_exec_size /= 2;
228         }
229     }
230 
231     return VISA_SUCCESS;
232 }
233 
234 /*
235 * Translates Media Block write CISA inst.
236 *
237 * write(I, X, Y, matrix<int,C,R> M)
238 * Assume C = R = 8 then code shoud look like
239 *
240 * .declare  VX Base=m ElementSize=4 Type=ud Total=72
241 * .declare  VY Base=m ElementSize=4 Type=ud Total=64 ALIAS(VX,32)
242 *
243 * mov  (8)     VX(0,0)<1>,  r0.0:ud
244 * mov  (64)    VY(0,0)<1>,  M
245 * mov  (1)     VX(0,2)<1>,  0x0007001f   // 8 rows, 32 bytes
246 * mov  (1)     VX(0,1)<1>,  Y
247 * mov  (1)     VX(0,0)<1>,  X
248 * send (8)     null<1>,  VX(0,0),  null,   0x05902000
249 *
250 * 72 = 8 + C * R
251 * 0x0007001f is (R-1)<<16 + C * sizeof(el_type) - 1
252 *
253 * 0x05902000 ==
254 *  ((((ObjectSize - 1) / numEltPerGRF<Type_UB>() + 1)) + 1)<<20 +
255 *          0x5000000 + 0x2000 + I
256 * ObjectSize = RoundUpPow2(C) * R * sizeof(el_type)
257 */
translateVISAMediaStoreInst(MEDIA_ST_mod mod,G4_Operand * surface,unsigned planeID,unsigned blockWidth,unsigned blockHeight,G4_Operand * xOffOpnd,G4_Operand * yOffOpnd,G4_SrcRegRegion * srcOpnd)258 int IR_Builder::translateVISAMediaStoreInst(
259     MEDIA_ST_mod mod,
260     G4_Operand* surface,
261     unsigned planeID,
262     unsigned blockWidth,
263     unsigned blockHeight,
264     G4_Operand* xOffOpnd,
265     G4_Operand* yOffOpnd,
266     G4_SrcRegRegion* srcOpnd)
267 {
268     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
269 
270     int objWidth = 0;
271     if (blockWidth != 0)
272     {
273         objWidth = getObjWidth(blockWidth, blockHeight, srcOpnd->getBase()->asRegVar()->getDeclare());
274     }
275     unsigned obj_size = objWidth * blockHeight;
276     unsigned int new_obj_size = obj_size;
277 
278     auto setTopBottomForDesc = [](uint32_t desc, MEDIA_ST_mod mod)
279     {
280         if (mod == MEDIA_ST_top)
281         {
282             return desc + (0x6 << MESSAGE_SPECIFIC_CONTROL);    // Write top fields
283         }
284         else if (mod == MEDIA_ST_bottom)
285         {
286             return desc + (0x7 << MESSAGE_SPECIFIC_CONTROL);    // Write bottom fields
287         }
288         return desc;
289     };
290 
291     bool forceSplitSend = shouldForceSplitSend(surface);
292     if (forceSplitSend || useSends())
293     {
294         // use split send
295         G4_Declare *headerDcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
296         createMovR0Inst(headerDcl, 0, 0, true);
297         /* mov (1)      VX(0,2)<1>,    CONST[R,C]  */
298         uint32_t temp = (blockHeight - 1) << 16 | (blockWidth - 1);
299         createMovInst(headerDcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
300 
301         /* mov (1)     VX(0,0)<1>,    X  */
302         createMovInst(headerDcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
303 
304         /* mov (1)     VX(0,1)<1>,   Y  */
305         createMovInst(headerDcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
306 
307         G4_SrcRegRegion* headerOpnd = createSrcRegRegion(headerDcl, getRegionStride1());
308 
309         unsigned msgDesc = setTopBottomForDesc(0, mod);
310         SET_DATAPORT_MESSAGE_TYPE(msgDesc, DC1_MEDIA_BLOCK_WRITE)
311 
312             msgDesc += planeID;
313         // message length = 1, response length = 0, header present = 1
314         msgDesc += (1 << getSendMsgLengthBitOffset()) + (1 << getSendHeaderPresentBitOffset());
315         G4_DstRegRegion *dstOpnd = createNullDst(Type_UD);
316 
317         unsigned extMsgLength = (obj_size - 1) / numEltPerGRF<Type_UB>() + 1;
318         uint16_t extFuncCtrl = 0;
319 
320         G4_SendDescRaw * desc = createSendMsgDesc(msgDesc, 0, 1, SFID::DP_DC1,
321             extMsgLength, extFuncCtrl, SendAccess::WRITE_ONLY, surface);
322 
323         createSplitSendInst(
324             nullptr, dstOpnd, headerOpnd, srcOpnd, g4::SIMD8, desc, InstOpt_WriteEnable, false);
325     }
326     else
327     {
328         uint32_t temp =  new_obj_size/TypeSize(Type_UD) + GENX_DATAPORT_IO_SZ;
329 
330         G4_Declare *dcl = createSendPayloadDcl(temp, Type_UD);
331 
332         /* mov  (c*r)    VX(1,0)<1>,  M */
333         /* decl for data to write */
334         temp =  obj_size/TypeSize(Type_UD);
335 
336         createMovSendSrcInst(dcl, 1, 0, temp, srcOpnd, InstOpt_WriteEnable);
337 
338         createMovR0Inst(dcl, 0, 0, true);
339 
340         /* mov (1)      VX(0,2)<1>,    CONST[R,C]  */
341         temp = (blockHeight - 1) << 16 | (blockWidth - 1);
342         createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
343 
344         /* mov (1)     VX(0,0)<1>,    X  */
345         createMovInst(dcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
346 
347         /* mov (1)     VX(0,1)<1>,   Y  */
348         createMovInst(dcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
349 
350         // send's operands preparation
351         /* Size of whole operand in UINT elements */
352         G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
353 
354         uint32_t funcCtrl = setTopBottomForDesc(0, mod);
355         SET_DATAPORT_MESSAGE_TYPE(funcCtrl, DC1_MEDIA_BLOCK_WRITE);
356 
357         funcCtrl += planeID;
358         G4_DstRegRegion *post_dst_opnd = createNullDst(Type_UD);
359 
360         createSendInst(
361             NULL,
362             post_dst_opnd,
363             payload,
364             ((obj_size - 1) / numEltPerGRF<Type_UB>() + 1) + 1,
365             0,
366             G4_ExecSize(GENX_DATAPORT_IO_SZ),
367             funcCtrl,
368             SFID::DP_DC1,
369             1,
370             SendAccess::WRITE_ONLY,
371             surface,
372             NULL,
373             InstOpt_WriteEnable,
374             false);
375     }
376 
377     return VISA_SUCCESS;
378 }
379 
380 
translateVISAVmeImeInst(uint8_t stream_mode,uint8_t search_ctrl,G4_Operand * surfaceOpnd,G4_Operand * uniInputOpnd,G4_Operand * imeInputOpnd,G4_Operand * ref0Opnd,G4_Operand * ref1Opnd,G4_Operand * costCenterOpnd,G4_DstRegRegion * outputOpnd)381 int IR_Builder::translateVISAVmeImeInst(
382     uint8_t stream_mode,
383     uint8_t search_ctrl,
384     G4_Operand* surfaceOpnd,
385     G4_Operand* uniInputOpnd,
386     G4_Operand* imeInputOpnd,
387     G4_Operand* ref0Opnd,
388     G4_Operand* ref1Opnd,
389     G4_Operand* costCenterOpnd,
390     G4_DstRegRegion* outputOpnd)
391 
392 {
393     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
394 
395     // add dcl for VX
396     unsigned input_size_dw;
397 
398     unsigned uni_input_size;
399 
400     uni_input_size = 4;
401 
402     if ((COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN &&
403         (COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN_OUT) {
404         input_size_dw = (uni_input_size + 2)*32/TypeSize(Type_UD);
405     } else if ((COMMON_ISA_VME_SEARCH_CTRL) search_ctrl == VME_SEARCH_DUAL_REF_DUAL_REC) {
406         input_size_dw = (uni_input_size + 6)*32/TypeSize(Type_UD);
407     } else {
408         input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
409     }
410 
411     G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
412 
413     // mov  (96)    VX(0,0)<1>,  UNIInput
414     createMovSendSrcInst(dcl, 0, 0,
415         uni_input_size*32/TypeSize(Type_UD),
416         uniInputOpnd, InstOpt_WriteEnable);
417 
418     // mov  (192)   VX(3,0)<1>,  IMEInput
419     createMovSendSrcInst(dcl, (short) uni_input_size, 0,
420         (input_size_dw - uni_input_size*32/TypeSize(Type_UD)),
421         imeInputOpnd, InstOpt_WriteEnable);
422 
423     // and  (1)     VX(0,13)<1>, VX(0,13):ub, 0xF8
424     G4_DstRegRegion *tmp_dst1_opnd = createDst(
425         dcl->getRegVar(),
426         0,
427         13,
428         1,
429         Type_UB);
430 
431     G4_SrcRegRegion *tmp_src1_opnd = createSrc(
432         dcl->getRegVar(),
433         0,
434         13,
435         getRegionScalar(),
436         Type_UB);
437 
438     createBinOp(G4_and, g4::SIMD1, tmp_dst1_opnd, tmp_src1_opnd,
439         createImm(0xF8, Type_UW), InstOpt_WriteEnable, true);
440 
441     // or   (1)     VX(0,13)<1>, VX(0,13):ub, searchCtrl
442     G4_DstRegRegion *tmp_dst2_opnd = createDst(
443         dcl->getRegVar(),
444         0,
445         13,
446         1,
447         Type_UB);
448 
449     G4_SrcRegRegion *tmp_src2_opnd = createSrc(
450         dcl->getRegVar(),
451         0,
452         13,
453         getRegionScalar(),
454         Type_UB);
455 
456     createBinOp(G4_or, g4::SIMD1, tmp_dst2_opnd, tmp_src2_opnd,
457         createImm(search_ctrl, Type_UW), InstOpt_WriteEnable, true);
458 
459     // mov  (2)     VA(0,0)<1>,  ref0
460     // since ref0 is converted from UW to UD, move it as 1 UD
461     createMovSendSrcInst(dcl, 0, 0, 1, ref0Opnd, InstOpt_WriteEnable);
462 
463     createMovSendSrcInst(dcl, 0, 1, 1, ref1Opnd, InstOpt_WriteEnable);
464 
465     createMovSendSrcInst(dcl, 3, 0, 8, costCenterOpnd, InstOpt_WriteEnable);
466 
467     // send's operands preparation
468     // create a currDst for VX
469     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
470 
471     G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
472 
473     unsigned temp = 0;            // Bit 7-0 of message descriptor
474     temp += 0x2 << 13;            // Bit 14-13 of message descriptor
475     temp += stream_mode << 15;     // Bit 16-15 of message descriptor
476 
477     unsigned regs2rcv;
478 
479     if ((COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_OUT &&
480         (COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN_OUT) {
481         regs2rcv = 224/numEltPerGRF<Type_UB>();
482     } else if ((COMMON_ISA_VME_SEARCH_CTRL) search_ctrl == VME_SEARCH_DUAL_REF_DUAL_REC) {
483         regs2rcv = 352/numEltPerGRF<Type_UB>();
484     } else {
485         regs2rcv = 288/numEltPerGRF<Type_UB>();
486     }
487 
488     createSendInst(
489         NULL,
490         d,
491         payload,
492         input_size_dw / GENX_DATAPORT_IO_SZ,
493         regs2rcv,
494         G4_ExecSize(GENX_DATAPORT_IO_SZ),
495         temp,
496         SFID::VME,
497         true,
498         SendAccess::READ_ONLY,
499         surfaceOpnd,
500         NULL,
501         InstOpt_WriteEnable,
502         false);
503 
504     return VISA_SUCCESS;
505 }
506 
translateVISAVmeSicInst(G4_Operand * surfaceOpnd,G4_Operand * uniInputOpnd,G4_Operand * sicInputOpnd,G4_DstRegRegion * outputOpnd)507 int IR_Builder::translateVISAVmeSicInst(
508     G4_Operand* surfaceOpnd,
509     G4_Operand* uniInputOpnd,
510     G4_Operand* sicInputOpnd,
511     G4_DstRegRegion* outputOpnd)
512 {
513     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
514 
515     unsigned uni_input_size;
516 
517     uni_input_size = 4;
518 
519     // add dcl for VX
520     unsigned input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
521 
522     G4_Declare *dcl = NULL;
523     G4_Declare *topDcl = uniInputOpnd->getTopDcl();
524 
525     // check if uniInputOpnd and sicInputOpnd are alias to the
526     // same top level decl with consistent payload layout
527     if ((topDcl == sicInputOpnd->getTopDcl()) &&
528         (uniInputOpnd->getByteOffset() == 0) &&
529         (sicInputOpnd->getByteOffset() == uni_input_size*32) &&
530         (topDcl->getByteSize() >= uni_input_size*32 + 128))
531     {
532         dcl = topDcl;
533     }
534     else
535     {
536         dcl = createSendPayloadDcl(input_size_dw, Type_UD);
537         // mov  (96)    VX(0,0)<1>,  UNIInput
538         createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), uniInputOpnd, InstOpt_WriteEnable);
539         // mov  (128)   VX(3,0)<1>,  SICInput
540         createMovSendSrcInst(dcl, (short) uni_input_size, 0, 128/TypeSize(Type_UD), sicInputOpnd, InstOpt_WriteEnable);
541     }
542 
543     // send's operands preparation
544     // create a currDst for VX
545     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
546 
547     G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
548 
549     unsigned temp = 0;            // Bit 7-0 of message descriptor
550     temp += 0x1 << 13;            // Bit 14-13 of message descriptor
551 
552     unsigned regs2rcv = 7;
553 
554     createSendInst(
555         NULL,
556         d,
557         payload,
558         input_size_dw / GENX_DATAPORT_IO_SZ,
559         regs2rcv,
560         G4_ExecSize(GENX_DATAPORT_IO_SZ),
561         temp,
562         SFID::CRE,
563         true,
564         SendAccess::READ_ONLY,
565         surfaceOpnd,
566         NULL,
567         InstOpt_WriteEnable,
568         false);
569 
570     return VISA_SUCCESS;
571 }
572 
translateVISAVmeFbrInst(G4_Operand * surfaceOpnd,G4_Operand * unitInputOpnd,G4_Operand * fbrInputOpnd,G4_Operand * fbrMbModOpnd,G4_Operand * fbrSubMbShapeOpnd,G4_Operand * fbrSubPredModeOpnd,G4_DstRegRegion * outputOpnd)573 int IR_Builder::translateVISAVmeFbrInst(
574     G4_Operand* surfaceOpnd,
575     G4_Operand* unitInputOpnd,
576     G4_Operand* fbrInputOpnd,
577     G4_Operand* fbrMbModOpnd,
578     G4_Operand* fbrSubMbShapeOpnd,
579     G4_Operand* fbrSubPredModeOpnd,
580     G4_DstRegRegion* outputOpnd)
581 {
582     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
583 
584     unsigned uni_input_size;
585 
586     uni_input_size = 4;
587 
588     // add dcl for VX
589     unsigned input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
590 
591     G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
592 
593     // mov  (96)    VX(0,0)<1>,  UNIInput
594     createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), unitInputOpnd, InstOpt_WriteEnable);
595 
596     // mov  (128)   VX(3,0)<1>,  FBRInput
597     createMovSendSrcInst(dcl, (short) uni_input_size, 0, 128/TypeSize(Type_UD), fbrInputOpnd, InstOpt_WriteEnable);
598 
599     // mov  (1)     VX(2,20)<1>, FBRMbMode
600     G4_DstRegRegion* tmp_dst1_opnd = createDst(
601         dcl->getRegVar(),
602         2,
603         20,
604         1,
605         Type_UB);
606 
607     createMov(
608         g4::SIMD1,
609         tmp_dst1_opnd,
610         fbrMbModOpnd,
611         InstOpt_WriteEnable,
612         true);
613 
614     // mov  (1)     VX(2,21)<1>, FBRSubMbShape
615     G4_DstRegRegion* tmp_dst2_opnd = createDst(
616         dcl->getRegVar(),
617         2,
618         21,
619         1,
620         Type_UB);
621 
622     createMov(
623         g4::SIMD1,
624         tmp_dst2_opnd,
625         fbrSubMbShapeOpnd,
626         InstOpt_WriteEnable,
627         true);
628 
629     //  mov  (1)     VX(2,22)<1>, FBRSubPredMode
630     G4_DstRegRegion* tmp_dst3_opnd = createDst(
631         dcl->getRegVar(),
632         2,
633         22,
634         1,
635         Type_UB);
636 
637     createMov(
638         g4::SIMD1,
639         tmp_dst3_opnd,
640         fbrSubPredModeOpnd,
641         InstOpt_WriteEnable,
642         true);
643 
644     // send's operands preparation
645     // create a currDst for VX
646     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
647 
648     G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
649 
650     unsigned temp = 0;            // Bit 7-0 of message descriptor
651     temp += 0x3 << 13;            // Bit 14-13 of message descriptor
652 
653     unsigned regs2rcv = 7;
654 
655     createSendInst(
656         NULL,
657         d,
658         payload,
659         input_size_dw / GENX_DATAPORT_IO_SZ,
660         regs2rcv,
661         G4_ExecSize(GENX_DATAPORT_IO_SZ),
662         temp,
663         SFID::CRE,
664         true,  //head_present?
665         SendAccess::READ_ONLY,
666         surfaceOpnd,
667         NULL,
668         InstOpt_WriteEnable,
669         false);
670 
671     return VISA_SUCCESS;
672 }
673 
translateVISAVmeIdmInst(G4_Operand * surfaceOpnd,G4_Operand * unitInputOpnd,G4_Operand * idmInputOpnd,G4_DstRegRegion * outputOpnd)674 int IR_Builder::translateVISAVmeIdmInst(
675     G4_Operand* surfaceOpnd,
676     G4_Operand* unitInputOpnd,
677     G4_Operand* idmInputOpnd,
678     G4_DstRegRegion* outputOpnd)
679 {
680     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
681 
682     unsigned uni_input_size;
683 
684     uni_input_size = 4;
685 
686     // add dcl for VX
687     unsigned input_size_dw = (uni_input_size + 1)*32/TypeSize(Type_UD);
688 
689     G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
690 
691     // mov  (128)    VX(0,0)<1>,  UNIInput
692     createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), unitInputOpnd, InstOpt_WriteEnable);
693 
694     // mov  (32)   VX(3,0)<1>,  IDMInput
695     createMovSendSrcInst(dcl, (short) uni_input_size, 0, 32/TypeSize(Type_UD), idmInputOpnd, InstOpt_WriteEnable);
696 
697     // send's operands preparation
698     // create a currDst for VX
699     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
700 
701     G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
702 
703     unsigned temp = 0;            // Bit 7-0 of message descriptor
704                                   // temp += 0x0 << 13;            // Bit 14-13 of message descriptor
705 
706     unsigned regs2rcv = 16;
707 
708     // dst is already UW
709     createSendInst(
710         NULL,
711         d,
712         payload,
713         input_size_dw / GENX_DATAPORT_IO_SZ,
714         regs2rcv,
715         G4_ExecSize(GENX_DATAPORT_IO_SZ),
716         temp,
717         SFID::VME,
718         true,
719         SendAccess::READ_ONLY,
720         surfaceOpnd,
721         NULL,
722         InstOpt_WriteEnable,
723         false);
724 
725     return VISA_SUCCESS;
726 }
727 
728 
translateVISASamplerVAGenericInst(G4_Operand * surface,G4_Operand * sampler,G4_Operand * uOffOpnd,G4_Operand * vOffOpnd,G4_Operand * vSizeOpnd,G4_Operand * hSizeOpnd,G4_Operand * mmfMode,unsigned char cntrl,unsigned char msgSeq,VA_fopcode fopcode,G4_DstRegRegion * dstOpnd,G4_Type dstType,unsigned dstSize,bool isBigKernel)729 int IR_Builder::translateVISASamplerVAGenericInst(
730     G4_Operand*   surface,   G4_Operand*   sampler,
731     G4_Operand*   uOffOpnd , G4_Operand*   vOffOpnd,
732     G4_Operand*   vSizeOpnd, G4_Operand*   hSizeOpnd,
733     G4_Operand*   mmfMode,   unsigned char cntrl,
734     unsigned char msgSeq,    VA_fopcode    fopcode,
735     G4_DstRegRegion*dstOpnd, G4_Type       dstType,
736     unsigned      dstSize,
737     bool isBigKernel)
738 {
739     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
740 
741     G4_Declare* dcl  = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ, Type_UD);
742     G4_Declare *dcl1 = createSendPayloadDcl(8,                      Type_UD);
743     G4_Declare *dclF = createSendPayloadDcl(8,                      Type_F);
744     dcl1->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
745     dclF->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
746 
747     /// Message Sequence Setup:
748     /// When Functionality is MINMAX/BoolCentroid/Centroid, value is binary 1x.
749     switch (fopcode)
750     {
751     case       MINMAX_FOPCODE:
752     case     Centroid_FOPCODE:
753     case BoolCentroid_FOPCODE:
754         msgSeq = 0x2;
755         break;
756     default:
757         break; // Prevent gcc warning
758     }
759 
760     /// Message Header Setup
761     /// 19:18 output control format | 15 Alpha Write Channel Mask ARGB = 1101 = 0xD for sampler8x8
762     unsigned msg_header = (cntrl << 18) + (0xD << 12);
763 
764     /// Media Payload Setup
765     /// M1.7: 31:28 (Functionality) | 27 (IEF) | 26:25 (MSG_SEQ) | 24:23 (MMF_MODE) | 22:0 (Group ID Number)
766     G4_Operand* mediaPayld_var = createImm(0, Type_UD);
767     G4_Operand* mediaPayld_imm = NULL;
768 
769     if (fopcode ==  Convolve_FOPCODE)
770     {
771         mediaPayld_imm = createImm(
772             (((unsigned)fopcode) << 28) |
773             (0 << 27) |
774             (msgSeq << 25) |
775             (isBigKernel << 23), Type_UD);
776 
777     }
778     else if (fopcode == MINMAX_FOPCODE || fopcode == MINMAXFILTER_FOPCODE)
779     {
780         mediaPayld_imm = createImm(
781             (((unsigned)fopcode) << 28) |
782             (0 << 27) |
783             (msgSeq << 25) |
784             (((mmfMode && mmfMode->isImm()) ? mmfMode->asImm()->getInt()    : 0) << 23),
785             Type_UD);
786 
787         /// Support non-constant MMF_ENABLE parameters.
788         /// Reuse for non-constant exec/control modes.
789         if (mmfMode && !mmfMode->isImm())
790         {
791             G4_DstRegRegion  media_payload_dst(Direct, dcl1->getRegVar(), 0, 7, 1, Type_UD);
792             mediaPayld_var = createSrc(dcl1->getRegVar(), 0, 7, getRegionScalar(), Type_UD);
793             createBinOp(G4_shl, g4::SIMD1,
794                 createDstRegRegion(media_payload_dst), mmfMode, createImm(23, Type_UD), InstOpt_WriteEnable, true);
795         }
796     }
797     else
798     {
799         mediaPayld_imm = createImm((((unsigned)fopcode) << 28)                   |
800             (    0 << 27)                               |
801             (msgSeq << 25)                               |
802             (0x3 << 23), Type_UD);
803     }
804 
805     /// Message Descriptor Setup
806     unsigned msg_descriptor = (0x3 << 17) + (0xB  << 12);
807 
808     createMovR0Inst(dcl, 0, 0, true);
809     createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(msg_header, Type_UD), true); /// mov msg_header
810     if (hasBindlessSampler())
811     {
812         // clear M0.3 bit 0 (sampler state base address select)
813         // and (1) M0.3<1>:ud M0.3<0;1,0>:ud 0xFFFFFFFE:ud
814         G4_SrcRegRegion* src0 = createSrc(dcl->getRegVar(), 0, 3,
815             getRegionScalar(), Type_UD);
816         G4_Imm* src1 = createImm(0xFFFFFFFE, Type_UD);
817         G4_DstRegRegion* dst = createDst(dcl->getRegVar(), 0, 3, 1, Type_UD);
818         (void) createBinOp(G4_and, g4::SIMD1, dst, src0, src1, InstOpt_WriteEnable, true);
819     }
820     createMovInst(dcl1, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD), true); /// zero out
821     createMovInst(dclF, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd, true); /// mov u opnd
822     createMovInst(dclF, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd, true); /// mov v opnd
823     createAddInst(dcl1, 0, 7, g4::SIMD1, NULL, NULL, mediaPayld_var, mediaPayld_imm, InstOpt_WriteEnable); /// store payload bits
824     G4_SrcRegRegion* src = createSrc(dcl1->getRegVar(), 0, 7,
825         getRegionScalar(), Type_UD);
826 
827     createAddInst(dcl1, 0, 7, g4::SIMD1, NULL, NULL, src,
828         createSrcRegRegion(builtinHWTID, getRegionScalar()), InstOpt_WriteEnable);
829     // later phases need FFTID
830     preDefVars.setHasPredefined(PreDefinedVarsInternal::HW_TID, true);
831     /// M1.0: [DevBDW+] Function = Centroid/BoolCentroid v/h direction size.
832     if (vSizeOpnd)
833     {
834         G4_Operand* h_sz_shl_opnd = NULL;
835 
836         if (!hSizeOpnd || hSizeOpnd->isImm())
837             h_sz_shl_opnd = createImm((hSizeOpnd ? (hSizeOpnd->asImm()->getInt() << 4) : 0), Type_UD);
838         else
839         {
840             h_sz_shl_opnd = createSrc(dcl1->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
841             G4_DstRegRegion* temp_dst = createDst(dcl1->getRegVar(), 0, 0, 1, Type_UD);
842             createBinOp(G4_shl, g4::SIMD1, temp_dst, hSizeOpnd,
843                 createImm(4, Type_UD), InstOpt_WriteEnable, true);
844         }
845         createAddInst(dcl1, 0, 0, g4::SIMD1, NULL, NULL, vSizeOpnd, h_sz_shl_opnd, InstOpt_WriteEnable);
846     }
847 
848     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
849     G4_DstRegRegion* post_dst = checkSendDst(dstOpnd->asDstRegRegion());
850     int reg_receive = dstSize/numEltPerGRF<Type_UB>();
851     if (reg_receive < 1)
852         reg_receive = 1;
853     createSendInst(NULL, post_dst, payload, 2, reg_receive, g4::SIMD8,
854         msg_descriptor, SFID::SAMPLER, 1, SendAccess::READ_ONLY, surface, sampler, InstOpt_WriteEnable, false);
855 
856     return VISA_SUCCESS;
857 }
858 
859 /*
860 * Translates Sampler API intrinsic.
861 *output matrix, ChannelMask, SurfaceIndex, SamplerIndex, u, v, deltaU, deltaV
862 *u2d, OutputFormatControl=0, v2d=0.0, AVSExecMode=0, EIFbypass=false
863 * sample8x8AVS(matrix<unsigned short, N, 64> &M, samplerType,  channelMask, surfIndex, samplerIndex, u, v, deltaU, deltaV, u2d,
864 OutputFormatControl=0, v2d, AVSExecMode, EIFbypass=false);
865 *
866 * Assuming: N = 4, channelMask=ABGR_ENABLE, surfIndex = 0x21, samplerIndex = 0x4,
867 *           then the generated code should look like the following for GT:
868 *
869 * .declare  VX Base=m ElementSize=4 Type=ud Total=16
870 * .declare  VA Base=m ElementSize=4 Type=f Total=8  ALIAS(VX,8)
871 * .declare  VY Base=r ElementSize=2 Type=uw Total=256
872 *
873 * mov  (8)     VX(0,0)<1>,  r0:ud
874 * mov  (1)     VX(0,2)<1>,  0 channel mask [12,15], output format control [16,17] 0
875 * mov  (1)     VA(0,0)<1>,  v2d
876 * mov  (1)     VA(0,1)<1>,  vertical block number
877 * mov  (1)     VA(0,2)<1>,  u
878 * mov  (1)     VA(0,3)<1>,  v
879 * mov  (1)     VA(0,4)<1>,  deltaU
880 * mov  (1)     VA(0,5)<1>,  deltaV
881 * mov  (1)     VA(0,6)<1>,  u2d
882 * mov  (1)     VA(0,7)<1>,
883 [0:22]  GroupID
884 [23:24] Reserved
885 [25:26] 1x - 16x8
886 0x - 16x4
887 [27]    EIF Bypass
888 [28:31] 0000 - AVS Scaling
889 * send (16)    VY(0,0)<1>,  VX(0,0),    0x2,   0x048bc421
890 * mov  (256)   M(0,0)<1>,   VY(0,0)
891 *
892 * VX: message header
893 *
894 * VA: SIMD32 media payload
895 *
896 * ex_desc: 0x2 == 0010 (Target Function ID: Sampling Engine)
897 *
898 * desc: 0x050EB000 == Bit 31-29: 000 (Reserved)
899 *                     Bit 28-25: 0010 (Message Length = 2)
900 *                     Bit 24-20: 10000 (Response Message Length = 16)
901 *                     Bit 19:    1 (Header present)
902 *                     Bit 18-17: 11 (SIMD Mode = SIMD32/64)
903 *                     Bit 16-12: 01011 (Message Type = sample8x8 Media layout)
904 *                     Bit 11-8:  0000 + samplerIndex  (Sampler Index)
905 *                     Bit 7-0:   00000000 + surfIndex (Binding Table Index)
906 *
907 */
translateVISAAvsInst(G4_Operand * surface,G4_Operand * sampler,ChannelMask channel,unsigned numEnabledChannels,G4_Operand * deltaUOpnd,G4_Operand * uOffOpnd,G4_Operand * deltaVOpnd,G4_Operand * vOffOpnd,G4_Operand * u2dOpnd,G4_Operand * groupIDOpnd,G4_Operand * verticalBlockNumberOpnd,unsigned char cntrl,G4_Operand * v2dOpnd,unsigned char execMode,G4_Operand * eifbypass,G4_DstRegRegion * dstOpnd)908 int IR_Builder::translateVISAAvsInst(
909     G4_Operand* surface,
910     G4_Operand* sampler,
911     ChannelMask channel,
912     unsigned numEnabledChannels,
913     G4_Operand* deltaUOpnd,
914     G4_Operand* uOffOpnd,
915     G4_Operand* deltaVOpnd,
916     G4_Operand* vOffOpnd,
917     G4_Operand* u2dOpnd,
918     G4_Operand* groupIDOpnd,
919     G4_Operand* verticalBlockNumberOpnd,
920     unsigned char cntrl,
921     G4_Operand* v2dOpnd,
922     unsigned char execMode,
923     G4_Operand* eifbypass,
924     G4_DstRegRegion* dstOpnd)
925 {
926     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
927 
928 
929     {
930         /*
931         * mov  (8)     VX(0,0)<1>,  r0:ud
932         * mov  (1)     VX(0,2)<1>,  0 channel mask [12,15], output format control [16,17] 0
933         * mov  (1)     VA(0,0)<1>,  v2d
934         * mov  (1)     VA(0,1)<1>,  vertical block number
935         * mov  (1)     VA(0,2)<1>,  u
936         * mov  (1)     VA(0,3)<1>,  v
937         * mov  (1)     VA(0,4)<1>,  deltaU
938         * mov  (1)     VA(0,5)<1>,  deltaV
939         * mov  (1)     VA(0,6)<1>,  u2d
940         * mov  (1)     VA(0,7)<1>,
941         [0:22]  GroupID
942         [23:24] Reserved
943         [25:26] 1x - 16x8
944         0x - 16x4
945         [27]    EIF Bypass
946         [28:31] 0000 - AVS Scaling
947         */
948         unsigned int number_elements_returned = 64;
949         G4_Type output_type = Type_UW;
950 
951         if (cntrl > 1)
952             output_type = Type_UB;
953 
954 
955         if (execMode == AVS_16x8)
956         {
957             number_elements_returned = 128;
958             numEnabledChannels *= 2;
959         }
960 
961         if (execMode == AVS_8x4)
962         {
963             number_elements_returned = 32;
964         }
965 
966         if (execMode == AVS_4x4)
967         {
968             number_elements_returned = 16;
969         }
970 
971         unsigned obj_size = number_elements_returned*numEnabledChannels*TypeSize(output_type);
972         // mov (8)      VX(0,0)<1>,  r0:ud
973         // add dcl for VX
974         G4_Declare *dcl = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ, Type_UD);
975 
976         // mov  VX(0,0)<1>, r0
977         createMovR0Inst(dcl, 0, 0, true);
978         /* mov (1)     VX(0,2)<1>,   0  */
979         unsigned cmask = channel.getHWEncoding() << 12;
980         cmask += cntrl << 18;
981         createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(cmask, Type_UD), true);
982 
983         G4_Declare *dcl1 = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_F);
984         dcl1->setAliasDeclare(dcl, numEltPerGRF<Type_UB>());
985 
986         /*
987         Keeping destination type as UD, otherwise w-->f conversion happens,
988         which affects the results.
989         */
990         G4_Declare *dcl1_ud = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
991         dcl1_ud->setAliasDeclare(dcl, numEltPerGRF<Type_UB>());
992 
993         // mov  (1)     VA(0,0)<1>,  v2d
994         createMovInst(dcl1, 0, 0, g4::SIMD1, NULL, NULL, v2dOpnd, true);
995 
996         // mov  (1)     VA(0,1)<1>,  vertical block number
997         createMovInst(dcl1_ud, 0, 1, g4::SIMD1, NULL, NULL, verticalBlockNumberOpnd, true);
998         // mov  (1)     VA(1,2)<1>,  u
999         createMovInst(dcl1, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd, true);
1000         // mov  (1)     VA(1,3)<1>,  v
1001         createMovInst(dcl1, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd, true);
1002         // mov  (1)     VA(1,4)<1>,  deltaU
1003         createMovInst(dcl1, 0, 4, g4::SIMD1, NULL, NULL, deltaUOpnd, true);
1004         // mov  (1)     VA(1,5)<1>,  deltaV
1005         createMovInst(dcl1, 0, 5, g4::SIMD1, NULL, NULL, deltaVOpnd, true);
1006         // mov  (1)     VA(0,6)<1>,  U2d
1007         createMovInst(dcl1, 0, 6, g4::SIMD1, NULL, NULL, u2dOpnd, true);
1008 
1009         {
1010             /*
1011             [23:24] Reserved
1012             [25:26] 1x - 16x8
1013             0x - 16x4
1014             [27]    EIF Bypass
1015             [28:31] 0000 - AVS Scaling
1016             */
1017             unsigned int upper_bits = 0;
1018             upper_bits += execMode << 25;
1019 
1020             if (eifbypass->isImm())
1021             {
1022                 upper_bits += (eifbypass->asImm()->getInt() & 1) << 27;
1023 
1024                 G4_DstRegRegion* dst2_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1025                 createBinOp(G4_add, g4::SIMD1, dst2_opnd, groupIDOpnd,
1026                     createImm(upper_bits, Type_UD), InstOpt_WriteEnable, true);
1027             }
1028             else
1029             {
1030                 // extract lsb of eifbypass
1031                 G4_DstRegRegion* dst2_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1032                 createBinOp(G4_and, g4::SIMD1, dst2_opnd, eifbypass,
1033                     createImm(1, Type_UD), InstOpt_WriteEnable, true);
1034 
1035                 // eifbypass << 27
1036                 G4_SrcRegRegion* src2_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1037                 G4_DstRegRegion* dst3_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1038                 createBinOp(G4_shl, g4::SIMD1, dst3_opnd, src2_opnd,
1039                     createImm(27, Type_UD), InstOpt_WriteEnable, true);
1040 
1041                 // upper_bits + (eifbypass << 27)
1042                 G4_SrcRegRegion* src3_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1043                 G4_DstRegRegion* dst4_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1044                 createBinOp(G4_add, g4::SIMD1, dst4_opnd, src3_opnd,
1045                     createImm(upper_bits, Type_UD), InstOpt_WriteEnable, true);
1046 
1047                 G4_DstRegRegion* dst5_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1048                 G4_SrcRegRegion* src_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1049                 createBinOp(G4_add, g4::SIMD1, dst5_opnd, groupIDOpnd, src_opnd, InstOpt_WriteEnable, true);
1050 
1051             }
1052         }
1053 
1054         /*
1055         * desc: 0x050EB000 == Bit 31-29: 000 (Reserved)
1056         *                     Bit 28-25: 0010 (Message Length = 2)
1057         *                     Bit 24-20: 10000 (Response Message Length = 16)
1058         *                     Bit 19:    1 (Header present)
1059         *                     Bit 18-17: 11 (SIMD Mode = SIMD32/64)
1060         *                     Bit 16-12: 01011 (Message Type = sample8x8 Media layout)
1061         *                     Bit 11-8:  0000 + samplerIndex  (Sampler Index)
1062         *                     Bit 7-0:   00000000 + surfIndex (Binding Table Index)
1063         */
1064 
1065         // Set bit 9-8 for the message descriptor
1066         unsigned temp = 0;
1067         temp += 0xB << 12;  // Bit 15-12 = 1100 for Sampler Message Type
1068         temp += 0x3 << 17;  // Bit 17-16 = 11 for SIMD32 mode
1069 
1070                             // send's operands preparation
1071                             // create a currDst for VX
1072         G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
1073 
1074         G4_DstRegRegion* d = checkSendDst(dstOpnd->asDstRegRegion());
1075 
1076         createSendInst(
1077             NULL,
1078             d,
1079             payload,
1080             2,
1081             obj_size/numEltPerGRF<Type_UB>(),
1082             g4::SIMD16,
1083             temp,
1084             SFID::SAMPLER,
1085             1,
1086             SendAccess::READ_ONLY,
1087             surface,
1088             sampler,
1089             InstOpt_WriteEnable,
1090             false);
1091     }
1092 
1093     return VISA_SUCCESS;
1094 }
1095 
1096 
translateVISAVaSklPlusGeneralInst(ISA_VA_Sub_Opcode sub_opcode,G4_Operand * surface,G4_Operand * sampler,unsigned char mode,unsigned char functionality,G4_Operand * uOffOpnd,G4_Operand * vOffOpnd,G4_Operand * offsetsOpnd,G4_Operand * loopCountOpnd,G4_Operand * pixelHMaskOpnd,G4_Operand * pixelVMaskLeftOpnd,G4_Operand * pixelVMaskRightOpnd,G4_Operand * disparityOpnd,G4_Operand * verticalOriginOpnd,G4_Operand * horizontalOriginOpnd,G4_Operand * xDirectionSizeOpnd,G4_Operand * yDirectionSizeOpnd,G4_Operand * xDirectionSearchSizeOpnd,G4_Operand * yDirectionSearchSizeOpnd,G4_DstRegRegion * dstOpnd,G4_Type dstType,unsigned dstSize,unsigned char pixelSize,G4_Operand * dstSurfaceOpnd,G4_Operand * dstXOpnd,G4_Operand * dstYOpnd,bool hdcMode)1097 int IR_Builder::translateVISAVaSklPlusGeneralInst(
1098     ISA_VA_Sub_Opcode sub_opcode,
1099     G4_Operand* surface, G4_Operand* sampler,
1100     unsigned char mode, unsigned char functionality,
1101     G4_Operand* uOffOpnd, G4_Operand* vOffOpnd ,
1102 
1103     //1pixel convolve
1104     G4_Operand * offsetsOpnd,
1105 
1106     //FloodFill
1107     G4_Operand* loopCountOpnd,             G4_Operand* pixelHMaskOpnd,
1108     G4_Operand* pixelVMaskLeftOpnd,        G4_Operand* pixelVMaskRightOpnd,
1109 
1110     //LBP Correlation
1111     G4_Operand* disparityOpnd,
1112 
1113     //Correlation Search
1114     G4_Operand* verticalOriginOpnd,        G4_Operand* horizontalOriginOpnd,
1115     G4_Operand* xDirectionSizeOpnd,        G4_Operand* yDirectionSizeOpnd,
1116     G4_Operand* xDirectionSearchSizeOpnd , G4_Operand* yDirectionSearchSizeOpnd,
1117 
1118     G4_DstRegRegion* dstOpnd, G4_Type dstType, unsigned dstSize,
1119 
1120     //HDC
1121     unsigned char pixelSize, G4_Operand* dstSurfaceOpnd,
1122     G4_Operand *dstXOpnd,    G4_Operand* dstYOpnd,
1123     bool hdcMode)
1124 {
1125     TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
1126 
1127     G4_Declare* dcl  = NULL;
1128     G4_Declare *dcl_offsets = NULL;
1129 
1130     unsigned int reg_to_send = 2;
1131     //for offsets
1132     if ((sub_opcode == VA_OP_CODE_1PIXEL_CONVOLVE && mode == VA_CONV_16x1) ||
1133         sub_opcode == ISA_HDC_1PIXELCONV)
1134     {
1135         dcl = createSendPayloadDcl(4 * GENX_SAMPLER_IO_SZ , Type_UD);
1136         //16 pairs of x,y coordinates
1137         dcl_offsets = createSendPayloadDcl(32                      , Type_W);
1138         dcl_offsets->setAliasDeclare(dcl, numEltPerGRF<Type_UB>() * 2);
1139         reg_to_send = 4;
1140     }
1141     else
1142         dcl = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ , Type_UD);
1143 
1144     G4_Declare *dcl_payload_UD = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
1145     G4_Declare *dcl_payload_F = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_F);
1146     G4_Declare *dcl_payload_UW = createSendPayloadDcl(GENX_DATAPORT_IO_SZ * 2, Type_UW);
1147 
1148     dcl_payload_UD->setAliasDeclare (dcl,  numEltPerGRF<Type_UB>());
1149     dcl_payload_F->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
1150     dcl_payload_UW->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
1151 
1152     /// Message Header Setup
1153     /// 19:18 output control format | 15 Alpha Write Channel Mask ARGB = 1101 = 0xD for sampler8x8
1154     unsigned msg_header = (0xD << 12);
1155 
1156     //if MMF based on pixel size set output format control.
1157     if (sub_opcode == ISA_HDC_MMF && pixelSize)
1158     {
1159         msg_header = msg_header + (0x2 << 18);
1160     }
1161 
1162     //I guess this is still needed just to be sure payload is really initiazlied.
1163     //since full register initalization is conservative some registers
1164     //can still be not initialized and then used for payload
1165     if (m_options->getOption(vISA_InitPayload))
1166     {
1167         createMovInst(dcl_payload_UD, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD));
1168     }
1169     // mov  VX(0,0)<1>, r0
1170     createMovR0Inst(dcl, 0, 0);
1171     createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(msg_header, Type_UD));
1172 
1173     //set dst BTI, In M0.2 bits 24:31
1174     if (hdcMode)
1175     {
1176         G4_Declare *dcl_temp = createDeclareNoLookup(
1177             "tmp_shl_dst_bti",
1178             G4_GRF ,
1179             1,
1180             1,
1181             Type_UD);
1182 
1183         //Creating dst of the shift to be used in shift instruction
1184         //Creating src of src to use in the subsequent add instruction
1185         G4_Operand* shift_immed = createSrc(dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1186         G4_DstRegRegion* temp_dst = createDst(dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1187 
1188         //creating a src and for m0.2
1189         G4_SrcRegRegion* m0_2_src = createSrc(dcl->getRegVar(), 0, 2, getRegionScalar(), Type_UD);
1190         G4_DstRegRegion* m0_2_dst = createDst(dcl->getRegVar(), 0, 2, 1, Type_UD);
1191 
1192         createBinOp(G4_shl, g4::SIMD1, temp_dst, dstSurfaceOpnd, createImm(24, Type_UD), InstOpt_WriteEnable, true);
1193         createBinOp(G4_add, g4::SIMD1, m0_2_dst, m0_2_src, shift_immed, InstOpt_WriteEnable, true);
1194     }
1195 
1196     // set x_offset In M0.4 0:15
1197     // set y_offset In M0.4 16:31
1198     if (hdcMode)
1199     {
1200         G4_Declare *dcl_temp = createDeclareNoLookup(
1201             "tmp_shl_y_offset",
1202             G4_GRF ,
1203             1,
1204             1,
1205             Type_UD);
1206 
1207         // Creating dst of the shift to be used in shift instruction
1208         // Creating src of src to use in the subsequent add instruction
1209         G4_Operand * shift_immed = createSrc(dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1210         G4_DstRegRegion* temp_dst = createDst(dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1211 
1212         // creating a src and for m0.4
1213         G4_DstRegRegion* m0_4_dst = createDst(dcl->getRegVar(), 0, 4, 1, Type_UD);
1214 
1215         createBinOp(G4_shl, g4::SIMD1, temp_dst, dstYOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1216         createBinOp(G4_add, g4::SIMD1, m0_4_dst, dstXOpnd, shift_immed, InstOpt_WriteEnable, true);
1217     }
1218 
1219     // set dst surface format based on pixel size M0.5 0:4
1220     if (hdcMode)
1221     {
1222         int surface_format = 0;
1223         if (pixelSize == 0) {
1224             surface_format = 6; // PLANAR_Y16_SNORM
1225         } else if (pixelSize == 1) {
1226             surface_format = 5; // PLANAR_Y8_UNORM
1227         } else {
1228             ASSERT_USER(false,
1229                 "Invalid surface format for SKL+ VA HDC");
1230         }
1231         createMovInst(dcl, 0, 5, g4::SIMD1, NULL, NULL, createImm(surface_format, Type_UD));
1232     }
1233 
1234     // setting M2.1 vertical  block offset to 0
1235     // for LBP correlation setting M2.0 to 0, since only upper 16 bits are set
1236     // later by adding to shl result
1237     createMovInst(dcl_payload_UD, 0, 1, g4::SIMD1, NULL, NULL, createImm(0, Type_UD));
1238 
1239     // setting up M1.7
1240     unsigned int m1_7 = sampler8x8_group_id++;
1241 
1242     ISA_VA_Sub_Opcode originalSubOpcode = sub_opcode;
1243 
1244     // HDC uses the same sub opcodes as regular VA,
1245     // but with return register set to 0.
1246     switch (sub_opcode)
1247     {
1248     case ISA_HDC_CONV:
1249         sub_opcode = Convolve_FOPCODE;
1250         break;
1251     case ISA_HDC_MMF:
1252         sub_opcode = MINMAXFILTER_FOPCODE;
1253         break;
1254     case ISA_HDC_ERODE:
1255         sub_opcode = ERODE_FOPCODE;
1256         break;
1257     case ISA_HDC_DILATE:
1258         sub_opcode = Dilate_FOPCODE;
1259         break;
1260     case ISA_HDC_LBPCORRELATION:
1261         sub_opcode = VA_OP_CODE_LBP_CORRELATION;
1262         break;
1263     case ISA_HDC_LBPCREATION:
1264         sub_opcode = VA_OP_CODE_LBP_CREATION;
1265         break;
1266     case ISA_HDC_1DCONV_H:
1267         sub_opcode = VA_OP_CODE_1D_CONVOLVE_HORIZONTAL;
1268         break;
1269     case ISA_HDC_1DCONV_V:
1270         sub_opcode = VA_OP_CODE_1D_CONVOLVE_VERTICAL;
1271         break;
1272     case ISA_HDC_1PIXELCONV:
1273         sub_opcode = VA_OP_CODE_1PIXEL_CONVOLVE;
1274         break;
1275     default:
1276         break; // Prevent gcc warning
1277     }
1278     //setting VA operation
1279     m1_7 |= (unsigned int)sub_opcode<<28;
1280 
1281     //setting IEF bypass to 1
1282     m1_7 |= 0x1<<27;
1283 
1284     //setting message sequence
1285     m1_7 |= (mode & 0x3) << 25;
1286 
1287     //setting functionality
1288     m1_7 |= (functionality & 0x3) << 23;
1289     createMovInst(dcl_payload_UD, 0, 7, g4::SIMD1, NULL, NULL, createImm(m1_7, Type_UD));
1290 
1291     /*
1292     case VA_OP_CODE_1D_CONVOLVE_HORIZONTAL:
1293     case VA_OP_CODE_1D_CONVOLVE_VERTICAL:
1294     case VA_OP_CODE_1PIXEL_CONVOLVE:
1295     case VA_OP_CODE_FLOOD_FILL:
1296     case VA_OP_CODE_LBP_CREATION:
1297     case VA_OP_CODE_LBP_CORRELATION:
1298     case VA_OP_CODE_CORRELATION_SEARCH:
1299     */
1300 
1301     //setting m1_5 and m1_4
1302     if (sub_opcode == VA_OP_CODE_CORRELATION_SEARCH)
1303     {
1304         createMovInst(dcl_payload_F, 0, 5, g4::SIMD1, NULL, NULL, verticalOriginOpnd);
1305         createMovInst(dcl_payload_F, 0, 4, g4::SIMD1, NULL, NULL, horizontalOriginOpnd);
1306     }
1307 
1308     //setting m1_3
1309     if (vOffOpnd != NULL)
1310     {
1311         createMovInst(dcl_payload_F, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd);
1312     }
1313 
1314     //setting m1_2
1315     if (uOffOpnd != NULL)
1316     {
1317         createMovInst(dcl_payload_F, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd);
1318     }
1319 
1320     if (sub_opcode == VA_OP_CODE_FLOOD_FILL)
1321     {
1322         createMovSendSrcInst(dcl_payload_UD, 0, 2, 5, pixelHMaskOpnd, 0);
1323     }
1324 
1325     if ((sub_opcode == VA_OP_CODE_1PIXEL_CONVOLVE  && mode == VA_CONV_16x1) ||
1326         originalSubOpcode == ISA_HDC_1PIXELCONV)
1327     {
1328         const RegionDesc *rd = getRegionStride1();
1329         G4_Operand *offsets_opnd_temp = createSrc(
1330             offsetsOpnd->asSrcRegRegion()->getBase(),
1331             0,
1332             0,
1333             rd,
1334             Type_W);
1335 
1336         createMovInst(dcl_offsets, 0, 0, g4::SIMD32, NULL, NULL, offsets_opnd_temp);
1337     }
1338 
1339     //creating temp for intermediate computations
1340     G4_Declare *dcl_temp = createDeclareNoLookup(
1341         "tmp_shl",
1342         G4_GRF ,
1343         1,
1344         1,
1345         Type_UD);
1346     G4_SrcRegRegion temp_src(Mod_src_undef,Direct,dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1347     G4_DstRegRegion temp_dst(Direct, dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1348 
1349     //creating a src and for m1.0
1350     G4_SrcRegRegion m1_0_src(Mod_src_undef,Direct,dcl_payload_UD->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1351     G4_DstRegRegion m1_0_dst(Direct, dcl_payload_UD->getRegVar(), 0, 0, 1, Type_UD);
1352 
1353     G4_Operand * shift_immed = NULL;
1354 
1355     //setting m1_0
1356     switch (sub_opcode)
1357     {
1358     case VA_OP_CODE_FLOOD_FILL:
1359     {
1360         createMovInst(dcl_payload_UD, 0, 0, g4::SIMD1, NULL, NULL, pixelVMaskLeftOpnd);
1361 
1362         if (pixelVMaskRightOpnd->isImm())
1363         {
1364             shift_immed = createImm(pixelVMaskRightOpnd->asImm()->getInt() << 10,Type_UD);
1365             createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1366         } else {
1367 
1368             createBinOp(G4_shl, g4::SIMD1,
1369                 createDstRegRegion(temp_dst), pixelVMaskRightOpnd, createImm(10, Type_UD), InstOpt_WriteEnable, true);
1370             shift_immed = createSrcRegRegion(temp_src);
1371             createBinOp(G4_add, g4::SIMD1,
1372                 createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1373         }
1374 
1375         if (loopCountOpnd->isImm())
1376         {
1377             shift_immed = createImm(loopCountOpnd->asImm()->getInt() << 24, Type_UD);
1378         } else {
1379             createBinOp(G4_shl, g4::SIMD1,
1380                 createDstRegRegion(temp_dst), loopCountOpnd, createImm(24, Type_UD), InstOpt_WriteEnable, true);
1381             shift_immed = createSrcRegRegion(temp_src);
1382         }
1383         createBinOp(G4_add, g4::SIMD1,
1384             createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1385         break;
1386     }
1387     case VA_OP_CODE_LBP_CORRELATION:
1388     {
1389         //setting disparity
1390         if (disparityOpnd->isImm())
1391         {
1392             shift_immed = createImm(disparityOpnd->asImm()->getInt() << 16, Type_UD);
1393             createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1394         }
1395         else
1396         {
1397             createBinOp(G4_shl, g4::SIMD1,
1398                 createDstRegRegion(m1_0_dst), disparityOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1399         }
1400 
1401         break;
1402     }
1403     case VA_OP_CODE_CORRELATION_SEARCH:
1404     {
1405         /*
1406         G4_Operand* verticalOriginOpnd    , G4_Operand* horizontalOriginOpnd  ,
1407         G4_Operand* xDirectionSizeOpnd   , G4_Operand* yDirectionSizeOpnd   ,
1408         G4_Operand* xDirectionSearchSizeOpnd , G4_Operand* yDirectionSearchSizeOpnd ,
1409         */
1410         createMovInst(dcl_payload_UD, 0, 0, g4::SIMD1, NULL, NULL, xDirectionSizeOpnd);
1411 
1412         //setting y-direction size of the source for correlation.
1413         if (yDirectionSizeOpnd->isImm())
1414         {
1415             shift_immed = createImm(yDirectionSizeOpnd->asImm()->getInt() << 4, Type_UD);
1416             createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1417         }
1418         else
1419         {
1420             createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), yDirectionSizeOpnd, createImm(4, Type_UD), InstOpt_WriteEnable, true);
1421             shift_immed = createSrcRegRegion(temp_src);
1422             createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1423         }
1424 
1425 
1426         //31:16 reserved
1427 
1428         //setting x-direction search size
1429         if (xDirectionSearchSizeOpnd->isImm())
1430         {
1431             shift_immed = createImm(xDirectionSearchSizeOpnd->asImm()->getInt() << 8, Type_UD);
1432         } else {
1433             createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), xDirectionSearchSizeOpnd, createImm(8, Type_UD), InstOpt_WriteEnable, true);
1434             shift_immed = createSrcRegRegion(temp_src);
1435         }
1436         createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1437 
1438         //setting y-direction search size.
1439         if (yDirectionSearchSizeOpnd->isImm())
1440         {
1441             shift_immed = createImm(yDirectionSearchSizeOpnd->asImm()->getInt() << 16, Type_UD);
1442         } else {
1443             createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), yDirectionSearchSizeOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1444             shift_immed = createSrcRegRegion(temp_src);
1445         }
1446         createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1447 
1448         break;
1449     }
1450     default:
1451         break; // Prevent gcc warning
1452     }
1453 
1454     G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
1455     G4_DstRegRegion* post_dst = NULL;
1456 
1457     unsigned int reg_to_receive = 0;
1458 
1459     if (!hdcMode)
1460     {
1461         post_dst = checkSendDst(dstOpnd);
1462         if ((dstSize %  numEltPerGRF<Type_UB>()) != 0)
1463         {
1464             reg_to_receive = (unsigned int) std::ceil((double)dstSize/numEltPerGRF<Type_UB>());
1465         }
1466         else
1467         {
1468             reg_to_receive = dstSize/numEltPerGRF<Type_UB>();
1469         }
1470     } else {
1471         post_dst = createNullDst(Type_UD);
1472     }
1473 
1474     /// Message Descriptor Setup
1475     /// 18:17 SIMD Mode (SIMD32/64 = 3)  |  16:12 Message Type (sampler8x8 = 01011 = 0xB)
1476     unsigned msg_descriptor = (0x3 << 17) + (0xB  << 12);
1477     createSendInst(NULL, post_dst, payload, reg_to_send, reg_to_receive, g4::SIMD8,
1478         msg_descriptor, SFID::SAMPLER, 1, SendAccess::READ_ONLY, surface, sampler, 0, false);
1479 
1480     return VISA_SUCCESS;
1481 }
1482