1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "BuildIR.h"
10 #include "../Timer.h"
11
12 #include <cmath>
13
14 using namespace vISA;
15
16 #define SET_DATAPORT_MESSAGE_TYPE(dest, value)\
17 dest |= value << 14;
18
19 #define MESSAGE_SPECIFIC_CONTROL 8
20
21
getObjWidth(unsigned blockWidth,unsigned blockHeight,G4_Declare * dcl)22 static unsigned int getObjWidth(
23 unsigned blockWidth, unsigned blockHeight, G4_Declare * dcl)
24 {
25 // makes sure io_width is divisible by 4
26 unsigned ioWidth = (blockWidth + TypeSize(Type_D) - 1) & (~(TypeSize(Type_D) - 1));
27 // gets next power of 2 size
28 return Round_Up_Pow2(ioWidth / dcl->getElemSize()) * dcl->getElemSize();
29 }
30
31
32 /*
33 * Translates Media Block read CISA inst.
34 *
35 * read(I, X, Y, matrix<int,C,R> M)
36 * Assume C = R = 8 then code shoud look like
37 *
38 * .declare VX Base=m ElementSize=4 Type=ud Total=8
39 * .declare VY Base=r ElementSize=4 Type=ud Total=8
40 *
41 * mov (8) VX(0,0)<1>, r0.0:ud
42 * mov (1) VX(0,2)<1>, 0x0007001f // 8 rows, 32 bytes
43 * mov (1) VX(0,1)<1>, Y
44 * mov (1) VX(0,0)<1>, X
45 * send (8) VY(0,0)<1>, VX(0,0), null, 0x04186000
46 * mov (8) M(0,0)<1>, VY(0,0)
47 *
48 * 0x0007001f == (R-1)<<16 + C * sizeof(el_type) - 1;
49 *
50 * 0x04186000 ==
51 * (((ObjectSize - 1) / numEltPerGRF<Type_UB>() + 1)) << 16 +
52 * 0x4100000 + 0x6000 + I;
53 *
54 * ObjectSize = RoundUpPow2(C) * R * sizeof(el_type);
55 */
translateVISAMediaLoadInst(MEDIA_LD_mod mod,G4_Operand * surface,unsigned planeID,unsigned blockWidth,unsigned blockHeight,G4_Operand * xOffOpnd,G4_Operand * yOffOpnd,G4_DstRegRegion * dstOpnd)56 int IR_Builder::translateVISAMediaLoadInst(
57 MEDIA_LD_mod mod,
58 G4_Operand* surface,
59 unsigned planeID,
60 unsigned blockWidth,
61 unsigned blockHeight,
62 G4_Operand* xOffOpnd,
63 G4_Operand* yOffOpnd,
64 G4_DstRegRegion* dstOpnd)
65 {
66 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
67
68 unsigned temp;
69
70 unsigned objWidth = 0;
71 if (blockWidth != 0)
72 {
73 objWidth = getObjWidth(blockWidth, blockHeight, dstOpnd->getBase()->asRegVar()->getDeclare());
74 }
75 unsigned obj_size = objWidth * blockHeight;
76
77 /* mov (8) VX(0,0)<1>, r0:ud */
78 // add dcl for VX
79 G4_Declare *dcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
80
81 // create MOV inst
82 createMovR0Inst(dcl, 0, 0, true);
83 /* mov (1) VX(0,2)<1>, CONST[R,C] */
84 temp = (blockHeight - 1) << 16 | (blockWidth - 1);
85 createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
86 /* mov (1) VX(0,0)<1>, X */
87 createMovInst(dcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
88 /* mov (1) VX(0,1)<1>, Y */
89 createMovInst(dcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
90
91 // send's operands preparation
92 // create a currDst for VX
93 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
94
95 // mediaread overwrites entire GRF
96 bool via_temp = false;
97 G4_Operand *original_dst = NULL;
98 G4_Declare *new_dcl = NULL;
99
100 if (obj_size < numEltPerGRF<Type_UB>())
101 {
102 via_temp = true;
103 }
104 else
105 {
106 unsigned byte_subregoff =
107 dstOpnd->asDstRegRegion()->getSubRegOff() * dstOpnd->getTypeSize();
108 G4_VarBase *base = dstOpnd->asDstRegRegion()->getBase();
109 G4_Declare *dcl = base->asRegVar()->getDeclare();
110
111 if (byte_subregoff % numEltPerGRF<Type_UB>() != 0)
112 {
113 via_temp = true;
114 }
115 else
116 {
117 G4_Declare *aliasdcl = dcl;
118 bool false_alias_align = false;
119 while (aliasdcl->getAliasDeclare()) {
120 if (aliasdcl->getAliasOffset() % numEltPerGRF<Type_UB>() != 0) {
121 false_alias_align = true;
122 break;
123 }
124 aliasdcl = aliasdcl->getAliasDeclare();
125 }
126 if (false_alias_align) {
127 via_temp = true;
128 }
129 }
130 }
131
132 if (via_temp == true)
133 {
134 original_dst = dstOpnd;
135 new_dcl = createTempVar(numEltPerGRF<Type_UB>()/TypeSize(Type_UD),
136 Type_UD, GRFALIGN);
137 G4_DstRegRegion* tmp_dst_opnd = createDst(
138 new_dcl->getRegVar(),
139 0,
140 0,
141 1,
142 Type_UD);
143
144 dstOpnd = tmp_dst_opnd;
145 }
146
147 G4_DstRegRegion* d = checkSendDst(dstOpnd->asDstRegRegion());
148
149 temp = 0;
150 if ((mod == MEDIA_LD_top) || (mod == MEDIA_LD_top_mod)) {
151 temp += 0x6 << MESSAGE_SPECIFIC_CONTROL; // Read top fields
152 } else if ((mod == MEDIA_LD_bottom) || (mod == MEDIA_LD_bottom_mod)) {
153 temp += 0x7 << MESSAGE_SPECIFIC_CONTROL; // Read bottom fields
154 }
155
156 SET_DATAPORT_MESSAGE_TYPE(temp, DC1_MEDIA_BLOCK_READ)
157
158 temp += planeID;
159
160 G4_ExecSize send_exec_size(GENX_DATAPORT_IO_SZ);
161 if (IS_WTYPE(d->getType()))
162 {
163 send_exec_size *= 2;
164 }
165
166 createSendInst(
167 NULL,
168 d,
169 payload,
170 1,
171 (obj_size - 1) / numEltPerGRF<Type_UB>() + 1,
172 send_exec_size,
173 temp,
174 SFID::DP_DC1,
175 1,
176 SendAccess::READ_ONLY,
177 surface,
178 NULL,
179 InstOpt_WriteEnable,
180 false);
181
182 if (via_temp)
183 {
184 G4_Declare *new_dcl2 = createTempVar(
185 numEltPerGRF<Type_UB>()/original_dst->getTypeSize(),
186 original_dst->getType(), GRFALIGN);
187
188 new_dcl2->setAliasDeclare(new_dcl, 0);
189
190 unsigned short remained_ele = obj_size / original_dst->getTypeSize();
191 // max execution size is 32
192 G4_ExecSize curr_exec_size = G4_ExecSize(getNativeExecSize() * 2);
193 unsigned char curr_offset = 0;
194
195 G4_Type dstType = original_dst->getType();
196 while (remained_ele >= 1)
197 {
198 short dst_regoff = original_dst->asDstRegRegion()->getRegOff();
199 short dst_subregoff = original_dst->asDstRegRegion()->getSubRegOff();
200 if (remained_ele >= curr_exec_size)
201 {
202 G4_SrcRegRegion *tmp_src_opnd = createSrc(
203 new_dcl2->getRegVar(),
204 0,
205 curr_offset,
206 curr_exec_size == g4::SIMD1 ? getRegionScalar() : getRegionStride1(),
207 original_dst->getType());
208
209 dst_subregoff += curr_offset;
210 short ele_per_grf = numEltPerGRF<Type_UB>()/TypeSize(dstType);
211 if (dst_subregoff >= ele_per_grf)
212 {
213 dst_regoff += 1;
214 dst_subregoff -= ele_per_grf;
215 }
216 G4_DstRegRegion* tmp_dst_opnd = createDst(
217 original_dst->asDstRegRegion()->getBase(),
218 dst_regoff,
219 dst_subregoff,
220 1,
221 original_dst->getType());
222
223 createMov(curr_exec_size, tmp_dst_opnd, tmp_src_opnd, InstOpt_WriteEnable, true);
224 curr_offset += curr_exec_size;
225 remained_ele -= curr_exec_size;
226 }
227 curr_exec_size /= 2;
228 }
229 }
230
231 return VISA_SUCCESS;
232 }
233
234 /*
235 * Translates Media Block write CISA inst.
236 *
237 * write(I, X, Y, matrix<int,C,R> M)
238 * Assume C = R = 8 then code shoud look like
239 *
240 * .declare VX Base=m ElementSize=4 Type=ud Total=72
241 * .declare VY Base=m ElementSize=4 Type=ud Total=64 ALIAS(VX,32)
242 *
243 * mov (8) VX(0,0)<1>, r0.0:ud
244 * mov (64) VY(0,0)<1>, M
245 * mov (1) VX(0,2)<1>, 0x0007001f // 8 rows, 32 bytes
246 * mov (1) VX(0,1)<1>, Y
247 * mov (1) VX(0,0)<1>, X
248 * send (8) null<1>, VX(0,0), null, 0x05902000
249 *
250 * 72 = 8 + C * R
251 * 0x0007001f is (R-1)<<16 + C * sizeof(el_type) - 1
252 *
253 * 0x05902000 ==
254 * ((((ObjectSize - 1) / numEltPerGRF<Type_UB>() + 1)) + 1)<<20 +
255 * 0x5000000 + 0x2000 + I
256 * ObjectSize = RoundUpPow2(C) * R * sizeof(el_type)
257 */
translateVISAMediaStoreInst(MEDIA_ST_mod mod,G4_Operand * surface,unsigned planeID,unsigned blockWidth,unsigned blockHeight,G4_Operand * xOffOpnd,G4_Operand * yOffOpnd,G4_SrcRegRegion * srcOpnd)258 int IR_Builder::translateVISAMediaStoreInst(
259 MEDIA_ST_mod mod,
260 G4_Operand* surface,
261 unsigned planeID,
262 unsigned blockWidth,
263 unsigned blockHeight,
264 G4_Operand* xOffOpnd,
265 G4_Operand* yOffOpnd,
266 G4_SrcRegRegion* srcOpnd)
267 {
268 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
269
270 int objWidth = 0;
271 if (blockWidth != 0)
272 {
273 objWidth = getObjWidth(blockWidth, blockHeight, srcOpnd->getBase()->asRegVar()->getDeclare());
274 }
275 unsigned obj_size = objWidth * blockHeight;
276 unsigned int new_obj_size = obj_size;
277
278 auto setTopBottomForDesc = [](uint32_t desc, MEDIA_ST_mod mod)
279 {
280 if (mod == MEDIA_ST_top)
281 {
282 return desc + (0x6 << MESSAGE_SPECIFIC_CONTROL); // Write top fields
283 }
284 else if (mod == MEDIA_ST_bottom)
285 {
286 return desc + (0x7 << MESSAGE_SPECIFIC_CONTROL); // Write bottom fields
287 }
288 return desc;
289 };
290
291 bool forceSplitSend = shouldForceSplitSend(surface);
292 if (forceSplitSend || useSends())
293 {
294 // use split send
295 G4_Declare *headerDcl = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
296 createMovR0Inst(headerDcl, 0, 0, true);
297 /* mov (1) VX(0,2)<1>, CONST[R,C] */
298 uint32_t temp = (blockHeight - 1) << 16 | (blockWidth - 1);
299 createMovInst(headerDcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
300
301 /* mov (1) VX(0,0)<1>, X */
302 createMovInst(headerDcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
303
304 /* mov (1) VX(0,1)<1>, Y */
305 createMovInst(headerDcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
306
307 G4_SrcRegRegion* headerOpnd = createSrcRegRegion(headerDcl, getRegionStride1());
308
309 unsigned msgDesc = setTopBottomForDesc(0, mod);
310 SET_DATAPORT_MESSAGE_TYPE(msgDesc, DC1_MEDIA_BLOCK_WRITE)
311
312 msgDesc += planeID;
313 // message length = 1, response length = 0, header present = 1
314 msgDesc += (1 << getSendMsgLengthBitOffset()) + (1 << getSendHeaderPresentBitOffset());
315 G4_DstRegRegion *dstOpnd = createNullDst(Type_UD);
316
317 unsigned extMsgLength = (obj_size - 1) / numEltPerGRF<Type_UB>() + 1;
318 uint16_t extFuncCtrl = 0;
319
320 G4_SendDescRaw * desc = createSendMsgDesc(msgDesc, 0, 1, SFID::DP_DC1,
321 extMsgLength, extFuncCtrl, SendAccess::WRITE_ONLY, surface);
322
323 createSplitSendInst(
324 nullptr, dstOpnd, headerOpnd, srcOpnd, g4::SIMD8, desc, InstOpt_WriteEnable, false);
325 }
326 else
327 {
328 uint32_t temp = new_obj_size/TypeSize(Type_UD) + GENX_DATAPORT_IO_SZ;
329
330 G4_Declare *dcl = createSendPayloadDcl(temp, Type_UD);
331
332 /* mov (c*r) VX(1,0)<1>, M */
333 /* decl for data to write */
334 temp = obj_size/TypeSize(Type_UD);
335
336 createMovSendSrcInst(dcl, 1, 0, temp, srcOpnd, InstOpt_WriteEnable);
337
338 createMovR0Inst(dcl, 0, 0, true);
339
340 /* mov (1) VX(0,2)<1>, CONST[R,C] */
341 temp = (blockHeight - 1) << 16 | (blockWidth - 1);
342 createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(temp, Type_UD), true);
343
344 /* mov (1) VX(0,0)<1>, X */
345 createMovInst(dcl, 0, 0, g4::SIMD1, NULL, NULL, xOffOpnd, true);
346
347 /* mov (1) VX(0,1)<1>, Y */
348 createMovInst(dcl, 0, 1, g4::SIMD1, NULL, NULL, yOffOpnd, true);
349
350 // send's operands preparation
351 /* Size of whole operand in UINT elements */
352 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
353
354 uint32_t funcCtrl = setTopBottomForDesc(0, mod);
355 SET_DATAPORT_MESSAGE_TYPE(funcCtrl, DC1_MEDIA_BLOCK_WRITE);
356
357 funcCtrl += planeID;
358 G4_DstRegRegion *post_dst_opnd = createNullDst(Type_UD);
359
360 createSendInst(
361 NULL,
362 post_dst_opnd,
363 payload,
364 ((obj_size - 1) / numEltPerGRF<Type_UB>() + 1) + 1,
365 0,
366 G4_ExecSize(GENX_DATAPORT_IO_SZ),
367 funcCtrl,
368 SFID::DP_DC1,
369 1,
370 SendAccess::WRITE_ONLY,
371 surface,
372 NULL,
373 InstOpt_WriteEnable,
374 false);
375 }
376
377 return VISA_SUCCESS;
378 }
379
380
translateVISAVmeImeInst(uint8_t stream_mode,uint8_t search_ctrl,G4_Operand * surfaceOpnd,G4_Operand * uniInputOpnd,G4_Operand * imeInputOpnd,G4_Operand * ref0Opnd,G4_Operand * ref1Opnd,G4_Operand * costCenterOpnd,G4_DstRegRegion * outputOpnd)381 int IR_Builder::translateVISAVmeImeInst(
382 uint8_t stream_mode,
383 uint8_t search_ctrl,
384 G4_Operand* surfaceOpnd,
385 G4_Operand* uniInputOpnd,
386 G4_Operand* imeInputOpnd,
387 G4_Operand* ref0Opnd,
388 G4_Operand* ref1Opnd,
389 G4_Operand* costCenterOpnd,
390 G4_DstRegRegion* outputOpnd)
391
392 {
393 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
394
395 // add dcl for VX
396 unsigned input_size_dw;
397
398 unsigned uni_input_size;
399
400 uni_input_size = 4;
401
402 if ((COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN &&
403 (COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN_OUT) {
404 input_size_dw = (uni_input_size + 2)*32/TypeSize(Type_UD);
405 } else if ((COMMON_ISA_VME_SEARCH_CTRL) search_ctrl == VME_SEARCH_DUAL_REF_DUAL_REC) {
406 input_size_dw = (uni_input_size + 6)*32/TypeSize(Type_UD);
407 } else {
408 input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
409 }
410
411 G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
412
413 // mov (96) VX(0,0)<1>, UNIInput
414 createMovSendSrcInst(dcl, 0, 0,
415 uni_input_size*32/TypeSize(Type_UD),
416 uniInputOpnd, InstOpt_WriteEnable);
417
418 // mov (192) VX(3,0)<1>, IMEInput
419 createMovSendSrcInst(dcl, (short) uni_input_size, 0,
420 (input_size_dw - uni_input_size*32/TypeSize(Type_UD)),
421 imeInputOpnd, InstOpt_WriteEnable);
422
423 // and (1) VX(0,13)<1>, VX(0,13):ub, 0xF8
424 G4_DstRegRegion *tmp_dst1_opnd = createDst(
425 dcl->getRegVar(),
426 0,
427 13,
428 1,
429 Type_UB);
430
431 G4_SrcRegRegion *tmp_src1_opnd = createSrc(
432 dcl->getRegVar(),
433 0,
434 13,
435 getRegionScalar(),
436 Type_UB);
437
438 createBinOp(G4_and, g4::SIMD1, tmp_dst1_opnd, tmp_src1_opnd,
439 createImm(0xF8, Type_UW), InstOpt_WriteEnable, true);
440
441 // or (1) VX(0,13)<1>, VX(0,13):ub, searchCtrl
442 G4_DstRegRegion *tmp_dst2_opnd = createDst(
443 dcl->getRegVar(),
444 0,
445 13,
446 1,
447 Type_UB);
448
449 G4_SrcRegRegion *tmp_src2_opnd = createSrc(
450 dcl->getRegVar(),
451 0,
452 13,
453 getRegionScalar(),
454 Type_UB);
455
456 createBinOp(G4_or, g4::SIMD1, tmp_dst2_opnd, tmp_src2_opnd,
457 createImm(search_ctrl, Type_UW), InstOpt_WriteEnable, true);
458
459 // mov (2) VA(0,0)<1>, ref0
460 // since ref0 is converted from UW to UD, move it as 1 UD
461 createMovSendSrcInst(dcl, 0, 0, 1, ref0Opnd, InstOpt_WriteEnable);
462
463 createMovSendSrcInst(dcl, 0, 1, 1, ref1Opnd, InstOpt_WriteEnable);
464
465 createMovSendSrcInst(dcl, 3, 0, 8, costCenterOpnd, InstOpt_WriteEnable);
466
467 // send's operands preparation
468 // create a currDst for VX
469 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
470
471 G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
472
473 unsigned temp = 0; // Bit 7-0 of message descriptor
474 temp += 0x2 << 13; // Bit 14-13 of message descriptor
475 temp += stream_mode << 15; // Bit 16-15 of message descriptor
476
477 unsigned regs2rcv;
478
479 if ((COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_OUT &&
480 (COMMON_ISA_VME_STREAM_MODE) stream_mode != VME_STREAM_IN_OUT) {
481 regs2rcv = 224/numEltPerGRF<Type_UB>();
482 } else if ((COMMON_ISA_VME_SEARCH_CTRL) search_ctrl == VME_SEARCH_DUAL_REF_DUAL_REC) {
483 regs2rcv = 352/numEltPerGRF<Type_UB>();
484 } else {
485 regs2rcv = 288/numEltPerGRF<Type_UB>();
486 }
487
488 createSendInst(
489 NULL,
490 d,
491 payload,
492 input_size_dw / GENX_DATAPORT_IO_SZ,
493 regs2rcv,
494 G4_ExecSize(GENX_DATAPORT_IO_SZ),
495 temp,
496 SFID::VME,
497 true,
498 SendAccess::READ_ONLY,
499 surfaceOpnd,
500 NULL,
501 InstOpt_WriteEnable,
502 false);
503
504 return VISA_SUCCESS;
505 }
506
translateVISAVmeSicInst(G4_Operand * surfaceOpnd,G4_Operand * uniInputOpnd,G4_Operand * sicInputOpnd,G4_DstRegRegion * outputOpnd)507 int IR_Builder::translateVISAVmeSicInst(
508 G4_Operand* surfaceOpnd,
509 G4_Operand* uniInputOpnd,
510 G4_Operand* sicInputOpnd,
511 G4_DstRegRegion* outputOpnd)
512 {
513 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
514
515 unsigned uni_input_size;
516
517 uni_input_size = 4;
518
519 // add dcl for VX
520 unsigned input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
521
522 G4_Declare *dcl = NULL;
523 G4_Declare *topDcl = uniInputOpnd->getTopDcl();
524
525 // check if uniInputOpnd and sicInputOpnd are alias to the
526 // same top level decl with consistent payload layout
527 if ((topDcl == sicInputOpnd->getTopDcl()) &&
528 (uniInputOpnd->getByteOffset() == 0) &&
529 (sicInputOpnd->getByteOffset() == uni_input_size*32) &&
530 (topDcl->getByteSize() >= uni_input_size*32 + 128))
531 {
532 dcl = topDcl;
533 }
534 else
535 {
536 dcl = createSendPayloadDcl(input_size_dw, Type_UD);
537 // mov (96) VX(0,0)<1>, UNIInput
538 createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), uniInputOpnd, InstOpt_WriteEnable);
539 // mov (128) VX(3,0)<1>, SICInput
540 createMovSendSrcInst(dcl, (short) uni_input_size, 0, 128/TypeSize(Type_UD), sicInputOpnd, InstOpt_WriteEnable);
541 }
542
543 // send's operands preparation
544 // create a currDst for VX
545 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
546
547 G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
548
549 unsigned temp = 0; // Bit 7-0 of message descriptor
550 temp += 0x1 << 13; // Bit 14-13 of message descriptor
551
552 unsigned regs2rcv = 7;
553
554 createSendInst(
555 NULL,
556 d,
557 payload,
558 input_size_dw / GENX_DATAPORT_IO_SZ,
559 regs2rcv,
560 G4_ExecSize(GENX_DATAPORT_IO_SZ),
561 temp,
562 SFID::CRE,
563 true,
564 SendAccess::READ_ONLY,
565 surfaceOpnd,
566 NULL,
567 InstOpt_WriteEnable,
568 false);
569
570 return VISA_SUCCESS;
571 }
572
translateVISAVmeFbrInst(G4_Operand * surfaceOpnd,G4_Operand * unitInputOpnd,G4_Operand * fbrInputOpnd,G4_Operand * fbrMbModOpnd,G4_Operand * fbrSubMbShapeOpnd,G4_Operand * fbrSubPredModeOpnd,G4_DstRegRegion * outputOpnd)573 int IR_Builder::translateVISAVmeFbrInst(
574 G4_Operand* surfaceOpnd,
575 G4_Operand* unitInputOpnd,
576 G4_Operand* fbrInputOpnd,
577 G4_Operand* fbrMbModOpnd,
578 G4_Operand* fbrSubMbShapeOpnd,
579 G4_Operand* fbrSubPredModeOpnd,
580 G4_DstRegRegion* outputOpnd)
581 {
582 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
583
584 unsigned uni_input_size;
585
586 uni_input_size = 4;
587
588 // add dcl for VX
589 unsigned input_size_dw = (uni_input_size + 4)*32/TypeSize(Type_UD);
590
591 G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
592
593 // mov (96) VX(0,0)<1>, UNIInput
594 createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), unitInputOpnd, InstOpt_WriteEnable);
595
596 // mov (128) VX(3,0)<1>, FBRInput
597 createMovSendSrcInst(dcl, (short) uni_input_size, 0, 128/TypeSize(Type_UD), fbrInputOpnd, InstOpt_WriteEnable);
598
599 // mov (1) VX(2,20)<1>, FBRMbMode
600 G4_DstRegRegion* tmp_dst1_opnd = createDst(
601 dcl->getRegVar(),
602 2,
603 20,
604 1,
605 Type_UB);
606
607 createMov(
608 g4::SIMD1,
609 tmp_dst1_opnd,
610 fbrMbModOpnd,
611 InstOpt_WriteEnable,
612 true);
613
614 // mov (1) VX(2,21)<1>, FBRSubMbShape
615 G4_DstRegRegion* tmp_dst2_opnd = createDst(
616 dcl->getRegVar(),
617 2,
618 21,
619 1,
620 Type_UB);
621
622 createMov(
623 g4::SIMD1,
624 tmp_dst2_opnd,
625 fbrSubMbShapeOpnd,
626 InstOpt_WriteEnable,
627 true);
628
629 // mov (1) VX(2,22)<1>, FBRSubPredMode
630 G4_DstRegRegion* tmp_dst3_opnd = createDst(
631 dcl->getRegVar(),
632 2,
633 22,
634 1,
635 Type_UB);
636
637 createMov(
638 g4::SIMD1,
639 tmp_dst3_opnd,
640 fbrSubPredModeOpnd,
641 InstOpt_WriteEnable,
642 true);
643
644 // send's operands preparation
645 // create a currDst for VX
646 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
647
648 G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
649
650 unsigned temp = 0; // Bit 7-0 of message descriptor
651 temp += 0x3 << 13; // Bit 14-13 of message descriptor
652
653 unsigned regs2rcv = 7;
654
655 createSendInst(
656 NULL,
657 d,
658 payload,
659 input_size_dw / GENX_DATAPORT_IO_SZ,
660 regs2rcv,
661 G4_ExecSize(GENX_DATAPORT_IO_SZ),
662 temp,
663 SFID::CRE,
664 true, //head_present?
665 SendAccess::READ_ONLY,
666 surfaceOpnd,
667 NULL,
668 InstOpt_WriteEnable,
669 false);
670
671 return VISA_SUCCESS;
672 }
673
translateVISAVmeIdmInst(G4_Operand * surfaceOpnd,G4_Operand * unitInputOpnd,G4_Operand * idmInputOpnd,G4_DstRegRegion * outputOpnd)674 int IR_Builder::translateVISAVmeIdmInst(
675 G4_Operand* surfaceOpnd,
676 G4_Operand* unitInputOpnd,
677 G4_Operand* idmInputOpnd,
678 G4_DstRegRegion* outputOpnd)
679 {
680 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
681
682 unsigned uni_input_size;
683
684 uni_input_size = 4;
685
686 // add dcl for VX
687 unsigned input_size_dw = (uni_input_size + 1)*32/TypeSize(Type_UD);
688
689 G4_Declare *dcl = createSendPayloadDcl(input_size_dw, Type_UD);
690
691 // mov (128) VX(0,0)<1>, UNIInput
692 createMovSendSrcInst(dcl, 0, 0, uni_input_size*32/TypeSize(Type_UD), unitInputOpnd, InstOpt_WriteEnable);
693
694 // mov (32) VX(3,0)<1>, IDMInput
695 createMovSendSrcInst(dcl, (short) uni_input_size, 0, 32/TypeSize(Type_UD), idmInputOpnd, InstOpt_WriteEnable);
696
697 // send's operands preparation
698 // create a currDst for VX
699 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
700
701 G4_DstRegRegion* d = checkSendDst(outputOpnd->asDstRegRegion());
702
703 unsigned temp = 0; // Bit 7-0 of message descriptor
704 // temp += 0x0 << 13; // Bit 14-13 of message descriptor
705
706 unsigned regs2rcv = 16;
707
708 // dst is already UW
709 createSendInst(
710 NULL,
711 d,
712 payload,
713 input_size_dw / GENX_DATAPORT_IO_SZ,
714 regs2rcv,
715 G4_ExecSize(GENX_DATAPORT_IO_SZ),
716 temp,
717 SFID::VME,
718 true,
719 SendAccess::READ_ONLY,
720 surfaceOpnd,
721 NULL,
722 InstOpt_WriteEnable,
723 false);
724
725 return VISA_SUCCESS;
726 }
727
728
translateVISASamplerVAGenericInst(G4_Operand * surface,G4_Operand * sampler,G4_Operand * uOffOpnd,G4_Operand * vOffOpnd,G4_Operand * vSizeOpnd,G4_Operand * hSizeOpnd,G4_Operand * mmfMode,unsigned char cntrl,unsigned char msgSeq,VA_fopcode fopcode,G4_DstRegRegion * dstOpnd,G4_Type dstType,unsigned dstSize,bool isBigKernel)729 int IR_Builder::translateVISASamplerVAGenericInst(
730 G4_Operand* surface, G4_Operand* sampler,
731 G4_Operand* uOffOpnd , G4_Operand* vOffOpnd,
732 G4_Operand* vSizeOpnd, G4_Operand* hSizeOpnd,
733 G4_Operand* mmfMode, unsigned char cntrl,
734 unsigned char msgSeq, VA_fopcode fopcode,
735 G4_DstRegRegion*dstOpnd, G4_Type dstType,
736 unsigned dstSize,
737 bool isBigKernel)
738 {
739 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
740
741 G4_Declare* dcl = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ, Type_UD);
742 G4_Declare *dcl1 = createSendPayloadDcl(8, Type_UD);
743 G4_Declare *dclF = createSendPayloadDcl(8, Type_F);
744 dcl1->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
745 dclF->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
746
747 /// Message Sequence Setup:
748 /// When Functionality is MINMAX/BoolCentroid/Centroid, value is binary 1x.
749 switch (fopcode)
750 {
751 case MINMAX_FOPCODE:
752 case Centroid_FOPCODE:
753 case BoolCentroid_FOPCODE:
754 msgSeq = 0x2;
755 break;
756 default:
757 break; // Prevent gcc warning
758 }
759
760 /// Message Header Setup
761 /// 19:18 output control format | 15 Alpha Write Channel Mask ARGB = 1101 = 0xD for sampler8x8
762 unsigned msg_header = (cntrl << 18) + (0xD << 12);
763
764 /// Media Payload Setup
765 /// M1.7: 31:28 (Functionality) | 27 (IEF) | 26:25 (MSG_SEQ) | 24:23 (MMF_MODE) | 22:0 (Group ID Number)
766 G4_Operand* mediaPayld_var = createImm(0, Type_UD);
767 G4_Operand* mediaPayld_imm = NULL;
768
769 if (fopcode == Convolve_FOPCODE)
770 {
771 mediaPayld_imm = createImm(
772 (((unsigned)fopcode) << 28) |
773 (0 << 27) |
774 (msgSeq << 25) |
775 (isBigKernel << 23), Type_UD);
776
777 }
778 else if (fopcode == MINMAX_FOPCODE || fopcode == MINMAXFILTER_FOPCODE)
779 {
780 mediaPayld_imm = createImm(
781 (((unsigned)fopcode) << 28) |
782 (0 << 27) |
783 (msgSeq << 25) |
784 (((mmfMode && mmfMode->isImm()) ? mmfMode->asImm()->getInt() : 0) << 23),
785 Type_UD);
786
787 /// Support non-constant MMF_ENABLE parameters.
788 /// Reuse for non-constant exec/control modes.
789 if (mmfMode && !mmfMode->isImm())
790 {
791 G4_DstRegRegion media_payload_dst(Direct, dcl1->getRegVar(), 0, 7, 1, Type_UD);
792 mediaPayld_var = createSrc(dcl1->getRegVar(), 0, 7, getRegionScalar(), Type_UD);
793 createBinOp(G4_shl, g4::SIMD1,
794 createDstRegRegion(media_payload_dst), mmfMode, createImm(23, Type_UD), InstOpt_WriteEnable, true);
795 }
796 }
797 else
798 {
799 mediaPayld_imm = createImm((((unsigned)fopcode) << 28) |
800 ( 0 << 27) |
801 (msgSeq << 25) |
802 (0x3 << 23), Type_UD);
803 }
804
805 /// Message Descriptor Setup
806 unsigned msg_descriptor = (0x3 << 17) + (0xB << 12);
807
808 createMovR0Inst(dcl, 0, 0, true);
809 createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(msg_header, Type_UD), true); /// mov msg_header
810 if (hasBindlessSampler())
811 {
812 // clear M0.3 bit 0 (sampler state base address select)
813 // and (1) M0.3<1>:ud M0.3<0;1,0>:ud 0xFFFFFFFE:ud
814 G4_SrcRegRegion* src0 = createSrc(dcl->getRegVar(), 0, 3,
815 getRegionScalar(), Type_UD);
816 G4_Imm* src1 = createImm(0xFFFFFFFE, Type_UD);
817 G4_DstRegRegion* dst = createDst(dcl->getRegVar(), 0, 3, 1, Type_UD);
818 (void) createBinOp(G4_and, g4::SIMD1, dst, src0, src1, InstOpt_WriteEnable, true);
819 }
820 createMovInst(dcl1, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD), true); /// zero out
821 createMovInst(dclF, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd, true); /// mov u opnd
822 createMovInst(dclF, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd, true); /// mov v opnd
823 createAddInst(dcl1, 0, 7, g4::SIMD1, NULL, NULL, mediaPayld_var, mediaPayld_imm, InstOpt_WriteEnable); /// store payload bits
824 G4_SrcRegRegion* src = createSrc(dcl1->getRegVar(), 0, 7,
825 getRegionScalar(), Type_UD);
826
827 createAddInst(dcl1, 0, 7, g4::SIMD1, NULL, NULL, src,
828 createSrcRegRegion(builtinHWTID, getRegionScalar()), InstOpt_WriteEnable);
829 // later phases need FFTID
830 preDefVars.setHasPredefined(PreDefinedVarsInternal::HW_TID, true);
831 /// M1.0: [DevBDW+] Function = Centroid/BoolCentroid v/h direction size.
832 if (vSizeOpnd)
833 {
834 G4_Operand* h_sz_shl_opnd = NULL;
835
836 if (!hSizeOpnd || hSizeOpnd->isImm())
837 h_sz_shl_opnd = createImm((hSizeOpnd ? (hSizeOpnd->asImm()->getInt() << 4) : 0), Type_UD);
838 else
839 {
840 h_sz_shl_opnd = createSrc(dcl1->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
841 G4_DstRegRegion* temp_dst = createDst(dcl1->getRegVar(), 0, 0, 1, Type_UD);
842 createBinOp(G4_shl, g4::SIMD1, temp_dst, hSizeOpnd,
843 createImm(4, Type_UD), InstOpt_WriteEnable, true);
844 }
845 createAddInst(dcl1, 0, 0, g4::SIMD1, NULL, NULL, vSizeOpnd, h_sz_shl_opnd, InstOpt_WriteEnable);
846 }
847
848 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
849 G4_DstRegRegion* post_dst = checkSendDst(dstOpnd->asDstRegRegion());
850 int reg_receive = dstSize/numEltPerGRF<Type_UB>();
851 if (reg_receive < 1)
852 reg_receive = 1;
853 createSendInst(NULL, post_dst, payload, 2, reg_receive, g4::SIMD8,
854 msg_descriptor, SFID::SAMPLER, 1, SendAccess::READ_ONLY, surface, sampler, InstOpt_WriteEnable, false);
855
856 return VISA_SUCCESS;
857 }
858
859 /*
860 * Translates Sampler API intrinsic.
861 *output matrix, ChannelMask, SurfaceIndex, SamplerIndex, u, v, deltaU, deltaV
862 *u2d, OutputFormatControl=0, v2d=0.0, AVSExecMode=0, EIFbypass=false
863 * sample8x8AVS(matrix<unsigned short, N, 64> &M, samplerType, channelMask, surfIndex, samplerIndex, u, v, deltaU, deltaV, u2d,
864 OutputFormatControl=0, v2d, AVSExecMode, EIFbypass=false);
865 *
866 * Assuming: N = 4, channelMask=ABGR_ENABLE, surfIndex = 0x21, samplerIndex = 0x4,
867 * then the generated code should look like the following for GT:
868 *
869 * .declare VX Base=m ElementSize=4 Type=ud Total=16
870 * .declare VA Base=m ElementSize=4 Type=f Total=8 ALIAS(VX,8)
871 * .declare VY Base=r ElementSize=2 Type=uw Total=256
872 *
873 * mov (8) VX(0,0)<1>, r0:ud
874 * mov (1) VX(0,2)<1>, 0 channel mask [12,15], output format control [16,17] 0
875 * mov (1) VA(0,0)<1>, v2d
876 * mov (1) VA(0,1)<1>, vertical block number
877 * mov (1) VA(0,2)<1>, u
878 * mov (1) VA(0,3)<1>, v
879 * mov (1) VA(0,4)<1>, deltaU
880 * mov (1) VA(0,5)<1>, deltaV
881 * mov (1) VA(0,6)<1>, u2d
882 * mov (1) VA(0,7)<1>,
883 [0:22] GroupID
884 [23:24] Reserved
885 [25:26] 1x - 16x8
886 0x - 16x4
887 [27] EIF Bypass
888 [28:31] 0000 - AVS Scaling
889 * send (16) VY(0,0)<1>, VX(0,0), 0x2, 0x048bc421
890 * mov (256) M(0,0)<1>, VY(0,0)
891 *
892 * VX: message header
893 *
894 * VA: SIMD32 media payload
895 *
896 * ex_desc: 0x2 == 0010 (Target Function ID: Sampling Engine)
897 *
898 * desc: 0x050EB000 == Bit 31-29: 000 (Reserved)
899 * Bit 28-25: 0010 (Message Length = 2)
900 * Bit 24-20: 10000 (Response Message Length = 16)
901 * Bit 19: 1 (Header present)
902 * Bit 18-17: 11 (SIMD Mode = SIMD32/64)
903 * Bit 16-12: 01011 (Message Type = sample8x8 Media layout)
904 * Bit 11-8: 0000 + samplerIndex (Sampler Index)
905 * Bit 7-0: 00000000 + surfIndex (Binding Table Index)
906 *
907 */
translateVISAAvsInst(G4_Operand * surface,G4_Operand * sampler,ChannelMask channel,unsigned numEnabledChannels,G4_Operand * deltaUOpnd,G4_Operand * uOffOpnd,G4_Operand * deltaVOpnd,G4_Operand * vOffOpnd,G4_Operand * u2dOpnd,G4_Operand * groupIDOpnd,G4_Operand * verticalBlockNumberOpnd,unsigned char cntrl,G4_Operand * v2dOpnd,unsigned char execMode,G4_Operand * eifbypass,G4_DstRegRegion * dstOpnd)908 int IR_Builder::translateVISAAvsInst(
909 G4_Operand* surface,
910 G4_Operand* sampler,
911 ChannelMask channel,
912 unsigned numEnabledChannels,
913 G4_Operand* deltaUOpnd,
914 G4_Operand* uOffOpnd,
915 G4_Operand* deltaVOpnd,
916 G4_Operand* vOffOpnd,
917 G4_Operand* u2dOpnd,
918 G4_Operand* groupIDOpnd,
919 G4_Operand* verticalBlockNumberOpnd,
920 unsigned char cntrl,
921 G4_Operand* v2dOpnd,
922 unsigned char execMode,
923 G4_Operand* eifbypass,
924 G4_DstRegRegion* dstOpnd)
925 {
926 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
927
928
929 {
930 /*
931 * mov (8) VX(0,0)<1>, r0:ud
932 * mov (1) VX(0,2)<1>, 0 channel mask [12,15], output format control [16,17] 0
933 * mov (1) VA(0,0)<1>, v2d
934 * mov (1) VA(0,1)<1>, vertical block number
935 * mov (1) VA(0,2)<1>, u
936 * mov (1) VA(0,3)<1>, v
937 * mov (1) VA(0,4)<1>, deltaU
938 * mov (1) VA(0,5)<1>, deltaV
939 * mov (1) VA(0,6)<1>, u2d
940 * mov (1) VA(0,7)<1>,
941 [0:22] GroupID
942 [23:24] Reserved
943 [25:26] 1x - 16x8
944 0x - 16x4
945 [27] EIF Bypass
946 [28:31] 0000 - AVS Scaling
947 */
948 unsigned int number_elements_returned = 64;
949 G4_Type output_type = Type_UW;
950
951 if (cntrl > 1)
952 output_type = Type_UB;
953
954
955 if (execMode == AVS_16x8)
956 {
957 number_elements_returned = 128;
958 numEnabledChannels *= 2;
959 }
960
961 if (execMode == AVS_8x4)
962 {
963 number_elements_returned = 32;
964 }
965
966 if (execMode == AVS_4x4)
967 {
968 number_elements_returned = 16;
969 }
970
971 unsigned obj_size = number_elements_returned*numEnabledChannels*TypeSize(output_type);
972 // mov (8) VX(0,0)<1>, r0:ud
973 // add dcl for VX
974 G4_Declare *dcl = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ, Type_UD);
975
976 // mov VX(0,0)<1>, r0
977 createMovR0Inst(dcl, 0, 0, true);
978 /* mov (1) VX(0,2)<1>, 0 */
979 unsigned cmask = channel.getHWEncoding() << 12;
980 cmask += cntrl << 18;
981 createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(cmask, Type_UD), true);
982
983 G4_Declare *dcl1 = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_F);
984 dcl1->setAliasDeclare(dcl, numEltPerGRF<Type_UB>());
985
986 /*
987 Keeping destination type as UD, otherwise w-->f conversion happens,
988 which affects the results.
989 */
990 G4_Declare *dcl1_ud = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
991 dcl1_ud->setAliasDeclare(dcl, numEltPerGRF<Type_UB>());
992
993 // mov (1) VA(0,0)<1>, v2d
994 createMovInst(dcl1, 0, 0, g4::SIMD1, NULL, NULL, v2dOpnd, true);
995
996 // mov (1) VA(0,1)<1>, vertical block number
997 createMovInst(dcl1_ud, 0, 1, g4::SIMD1, NULL, NULL, verticalBlockNumberOpnd, true);
998 // mov (1) VA(1,2)<1>, u
999 createMovInst(dcl1, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd, true);
1000 // mov (1) VA(1,3)<1>, v
1001 createMovInst(dcl1, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd, true);
1002 // mov (1) VA(1,4)<1>, deltaU
1003 createMovInst(dcl1, 0, 4, g4::SIMD1, NULL, NULL, deltaUOpnd, true);
1004 // mov (1) VA(1,5)<1>, deltaV
1005 createMovInst(dcl1, 0, 5, g4::SIMD1, NULL, NULL, deltaVOpnd, true);
1006 // mov (1) VA(0,6)<1>, U2d
1007 createMovInst(dcl1, 0, 6, g4::SIMD1, NULL, NULL, u2dOpnd, true);
1008
1009 {
1010 /*
1011 [23:24] Reserved
1012 [25:26] 1x - 16x8
1013 0x - 16x4
1014 [27] EIF Bypass
1015 [28:31] 0000 - AVS Scaling
1016 */
1017 unsigned int upper_bits = 0;
1018 upper_bits += execMode << 25;
1019
1020 if (eifbypass->isImm())
1021 {
1022 upper_bits += (eifbypass->asImm()->getInt() & 1) << 27;
1023
1024 G4_DstRegRegion* dst2_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1025 createBinOp(G4_add, g4::SIMD1, dst2_opnd, groupIDOpnd,
1026 createImm(upper_bits, Type_UD), InstOpt_WriteEnable, true);
1027 }
1028 else
1029 {
1030 // extract lsb of eifbypass
1031 G4_DstRegRegion* dst2_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1032 createBinOp(G4_and, g4::SIMD1, dst2_opnd, eifbypass,
1033 createImm(1, Type_UD), InstOpt_WriteEnable, true);
1034
1035 // eifbypass << 27
1036 G4_SrcRegRegion* src2_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1037 G4_DstRegRegion* dst3_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1038 createBinOp(G4_shl, g4::SIMD1, dst3_opnd, src2_opnd,
1039 createImm(27, Type_UD), InstOpt_WriteEnable, true);
1040
1041 // upper_bits + (eifbypass << 27)
1042 G4_SrcRegRegion* src3_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1043 G4_DstRegRegion* dst4_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1044 createBinOp(G4_add, g4::SIMD1, dst4_opnd, src3_opnd,
1045 createImm(upper_bits, Type_UD), InstOpt_WriteEnable, true);
1046
1047 G4_DstRegRegion* dst5_opnd = createDst(dcl1_ud->getRegVar(), 0, 7, 1, Type_UD);
1048 G4_SrcRegRegion* src_opnd = createSrc(dcl1_ud->getRegVar(), 0, 7, getRegionScalar(), dcl1_ud->getElemType());
1049 createBinOp(G4_add, g4::SIMD1, dst5_opnd, groupIDOpnd, src_opnd, InstOpt_WriteEnable, true);
1050
1051 }
1052 }
1053
1054 /*
1055 * desc: 0x050EB000 == Bit 31-29: 000 (Reserved)
1056 * Bit 28-25: 0010 (Message Length = 2)
1057 * Bit 24-20: 10000 (Response Message Length = 16)
1058 * Bit 19: 1 (Header present)
1059 * Bit 18-17: 11 (SIMD Mode = SIMD32/64)
1060 * Bit 16-12: 01011 (Message Type = sample8x8 Media layout)
1061 * Bit 11-8: 0000 + samplerIndex (Sampler Index)
1062 * Bit 7-0: 00000000 + surfIndex (Binding Table Index)
1063 */
1064
1065 // Set bit 9-8 for the message descriptor
1066 unsigned temp = 0;
1067 temp += 0xB << 12; // Bit 15-12 = 1100 for Sampler Message Type
1068 temp += 0x3 << 17; // Bit 17-16 = 11 for SIMD32 mode
1069
1070 // send's operands preparation
1071 // create a currDst for VX
1072 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
1073
1074 G4_DstRegRegion* d = checkSendDst(dstOpnd->asDstRegRegion());
1075
1076 createSendInst(
1077 NULL,
1078 d,
1079 payload,
1080 2,
1081 obj_size/numEltPerGRF<Type_UB>(),
1082 g4::SIMD16,
1083 temp,
1084 SFID::SAMPLER,
1085 1,
1086 SendAccess::READ_ONLY,
1087 surface,
1088 sampler,
1089 InstOpt_WriteEnable,
1090 false);
1091 }
1092
1093 return VISA_SUCCESS;
1094 }
1095
1096
translateVISAVaSklPlusGeneralInst(ISA_VA_Sub_Opcode sub_opcode,G4_Operand * surface,G4_Operand * sampler,unsigned char mode,unsigned char functionality,G4_Operand * uOffOpnd,G4_Operand * vOffOpnd,G4_Operand * offsetsOpnd,G4_Operand * loopCountOpnd,G4_Operand * pixelHMaskOpnd,G4_Operand * pixelVMaskLeftOpnd,G4_Operand * pixelVMaskRightOpnd,G4_Operand * disparityOpnd,G4_Operand * verticalOriginOpnd,G4_Operand * horizontalOriginOpnd,G4_Operand * xDirectionSizeOpnd,G4_Operand * yDirectionSizeOpnd,G4_Operand * xDirectionSearchSizeOpnd,G4_Operand * yDirectionSearchSizeOpnd,G4_DstRegRegion * dstOpnd,G4_Type dstType,unsigned dstSize,unsigned char pixelSize,G4_Operand * dstSurfaceOpnd,G4_Operand * dstXOpnd,G4_Operand * dstYOpnd,bool hdcMode)1097 int IR_Builder::translateVISAVaSklPlusGeneralInst(
1098 ISA_VA_Sub_Opcode sub_opcode,
1099 G4_Operand* surface, G4_Operand* sampler,
1100 unsigned char mode, unsigned char functionality,
1101 G4_Operand* uOffOpnd, G4_Operand* vOffOpnd ,
1102
1103 //1pixel convolve
1104 G4_Operand * offsetsOpnd,
1105
1106 //FloodFill
1107 G4_Operand* loopCountOpnd, G4_Operand* pixelHMaskOpnd,
1108 G4_Operand* pixelVMaskLeftOpnd, G4_Operand* pixelVMaskRightOpnd,
1109
1110 //LBP Correlation
1111 G4_Operand* disparityOpnd,
1112
1113 //Correlation Search
1114 G4_Operand* verticalOriginOpnd, G4_Operand* horizontalOriginOpnd,
1115 G4_Operand* xDirectionSizeOpnd, G4_Operand* yDirectionSizeOpnd,
1116 G4_Operand* xDirectionSearchSizeOpnd , G4_Operand* yDirectionSearchSizeOpnd,
1117
1118 G4_DstRegRegion* dstOpnd, G4_Type dstType, unsigned dstSize,
1119
1120 //HDC
1121 unsigned char pixelSize, G4_Operand* dstSurfaceOpnd,
1122 G4_Operand *dstXOpnd, G4_Operand* dstYOpnd,
1123 bool hdcMode)
1124 {
1125 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
1126
1127 G4_Declare* dcl = NULL;
1128 G4_Declare *dcl_offsets = NULL;
1129
1130 unsigned int reg_to_send = 2;
1131 //for offsets
1132 if ((sub_opcode == VA_OP_CODE_1PIXEL_CONVOLVE && mode == VA_CONV_16x1) ||
1133 sub_opcode == ISA_HDC_1PIXELCONV)
1134 {
1135 dcl = createSendPayloadDcl(4 * GENX_SAMPLER_IO_SZ , Type_UD);
1136 //16 pairs of x,y coordinates
1137 dcl_offsets = createSendPayloadDcl(32 , Type_W);
1138 dcl_offsets->setAliasDeclare(dcl, numEltPerGRF<Type_UB>() * 2);
1139 reg_to_send = 4;
1140 }
1141 else
1142 dcl = createSendPayloadDcl(2 * GENX_SAMPLER_IO_SZ , Type_UD);
1143
1144 G4_Declare *dcl_payload_UD = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_UD);
1145 G4_Declare *dcl_payload_F = createSendPayloadDcl(GENX_DATAPORT_IO_SZ, Type_F);
1146 G4_Declare *dcl_payload_UW = createSendPayloadDcl(GENX_DATAPORT_IO_SZ * 2, Type_UW);
1147
1148 dcl_payload_UD->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
1149 dcl_payload_F->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
1150 dcl_payload_UW->setAliasDeclare (dcl, numEltPerGRF<Type_UB>());
1151
1152 /// Message Header Setup
1153 /// 19:18 output control format | 15 Alpha Write Channel Mask ARGB = 1101 = 0xD for sampler8x8
1154 unsigned msg_header = (0xD << 12);
1155
1156 //if MMF based on pixel size set output format control.
1157 if (sub_opcode == ISA_HDC_MMF && pixelSize)
1158 {
1159 msg_header = msg_header + (0x2 << 18);
1160 }
1161
1162 //I guess this is still needed just to be sure payload is really initiazlied.
1163 //since full register initalization is conservative some registers
1164 //can still be not initialized and then used for payload
1165 if (m_options->getOption(vISA_InitPayload))
1166 {
1167 createMovInst(dcl_payload_UD, 0, 0, g4::SIMD8, NULL, NULL, createImm(0, Type_UD));
1168 }
1169 // mov VX(0,0)<1>, r0
1170 createMovR0Inst(dcl, 0, 0);
1171 createMovInst(dcl, 0, 2, g4::SIMD1, NULL, NULL, createImm(msg_header, Type_UD));
1172
1173 //set dst BTI, In M0.2 bits 24:31
1174 if (hdcMode)
1175 {
1176 G4_Declare *dcl_temp = createDeclareNoLookup(
1177 "tmp_shl_dst_bti",
1178 G4_GRF ,
1179 1,
1180 1,
1181 Type_UD);
1182
1183 //Creating dst of the shift to be used in shift instruction
1184 //Creating src of src to use in the subsequent add instruction
1185 G4_Operand* shift_immed = createSrc(dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1186 G4_DstRegRegion* temp_dst = createDst(dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1187
1188 //creating a src and for m0.2
1189 G4_SrcRegRegion* m0_2_src = createSrc(dcl->getRegVar(), 0, 2, getRegionScalar(), Type_UD);
1190 G4_DstRegRegion* m0_2_dst = createDst(dcl->getRegVar(), 0, 2, 1, Type_UD);
1191
1192 createBinOp(G4_shl, g4::SIMD1, temp_dst, dstSurfaceOpnd, createImm(24, Type_UD), InstOpt_WriteEnable, true);
1193 createBinOp(G4_add, g4::SIMD1, m0_2_dst, m0_2_src, shift_immed, InstOpt_WriteEnable, true);
1194 }
1195
1196 // set x_offset In M0.4 0:15
1197 // set y_offset In M0.4 16:31
1198 if (hdcMode)
1199 {
1200 G4_Declare *dcl_temp = createDeclareNoLookup(
1201 "tmp_shl_y_offset",
1202 G4_GRF ,
1203 1,
1204 1,
1205 Type_UD);
1206
1207 // Creating dst of the shift to be used in shift instruction
1208 // Creating src of src to use in the subsequent add instruction
1209 G4_Operand * shift_immed = createSrc(dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1210 G4_DstRegRegion* temp_dst = createDst(dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1211
1212 // creating a src and for m0.4
1213 G4_DstRegRegion* m0_4_dst = createDst(dcl->getRegVar(), 0, 4, 1, Type_UD);
1214
1215 createBinOp(G4_shl, g4::SIMD1, temp_dst, dstYOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1216 createBinOp(G4_add, g4::SIMD1, m0_4_dst, dstXOpnd, shift_immed, InstOpt_WriteEnable, true);
1217 }
1218
1219 // set dst surface format based on pixel size M0.5 0:4
1220 if (hdcMode)
1221 {
1222 int surface_format = 0;
1223 if (pixelSize == 0) {
1224 surface_format = 6; // PLANAR_Y16_SNORM
1225 } else if (pixelSize == 1) {
1226 surface_format = 5; // PLANAR_Y8_UNORM
1227 } else {
1228 ASSERT_USER(false,
1229 "Invalid surface format for SKL+ VA HDC");
1230 }
1231 createMovInst(dcl, 0, 5, g4::SIMD1, NULL, NULL, createImm(surface_format, Type_UD));
1232 }
1233
1234 // setting M2.1 vertical block offset to 0
1235 // for LBP correlation setting M2.0 to 0, since only upper 16 bits are set
1236 // later by adding to shl result
1237 createMovInst(dcl_payload_UD, 0, 1, g4::SIMD1, NULL, NULL, createImm(0, Type_UD));
1238
1239 // setting up M1.7
1240 unsigned int m1_7 = sampler8x8_group_id++;
1241
1242 ISA_VA_Sub_Opcode originalSubOpcode = sub_opcode;
1243
1244 // HDC uses the same sub opcodes as regular VA,
1245 // but with return register set to 0.
1246 switch (sub_opcode)
1247 {
1248 case ISA_HDC_CONV:
1249 sub_opcode = Convolve_FOPCODE;
1250 break;
1251 case ISA_HDC_MMF:
1252 sub_opcode = MINMAXFILTER_FOPCODE;
1253 break;
1254 case ISA_HDC_ERODE:
1255 sub_opcode = ERODE_FOPCODE;
1256 break;
1257 case ISA_HDC_DILATE:
1258 sub_opcode = Dilate_FOPCODE;
1259 break;
1260 case ISA_HDC_LBPCORRELATION:
1261 sub_opcode = VA_OP_CODE_LBP_CORRELATION;
1262 break;
1263 case ISA_HDC_LBPCREATION:
1264 sub_opcode = VA_OP_CODE_LBP_CREATION;
1265 break;
1266 case ISA_HDC_1DCONV_H:
1267 sub_opcode = VA_OP_CODE_1D_CONVOLVE_HORIZONTAL;
1268 break;
1269 case ISA_HDC_1DCONV_V:
1270 sub_opcode = VA_OP_CODE_1D_CONVOLVE_VERTICAL;
1271 break;
1272 case ISA_HDC_1PIXELCONV:
1273 sub_opcode = VA_OP_CODE_1PIXEL_CONVOLVE;
1274 break;
1275 default:
1276 break; // Prevent gcc warning
1277 }
1278 //setting VA operation
1279 m1_7 |= (unsigned int)sub_opcode<<28;
1280
1281 //setting IEF bypass to 1
1282 m1_7 |= 0x1<<27;
1283
1284 //setting message sequence
1285 m1_7 |= (mode & 0x3) << 25;
1286
1287 //setting functionality
1288 m1_7 |= (functionality & 0x3) << 23;
1289 createMovInst(dcl_payload_UD, 0, 7, g4::SIMD1, NULL, NULL, createImm(m1_7, Type_UD));
1290
1291 /*
1292 case VA_OP_CODE_1D_CONVOLVE_HORIZONTAL:
1293 case VA_OP_CODE_1D_CONVOLVE_VERTICAL:
1294 case VA_OP_CODE_1PIXEL_CONVOLVE:
1295 case VA_OP_CODE_FLOOD_FILL:
1296 case VA_OP_CODE_LBP_CREATION:
1297 case VA_OP_CODE_LBP_CORRELATION:
1298 case VA_OP_CODE_CORRELATION_SEARCH:
1299 */
1300
1301 //setting m1_5 and m1_4
1302 if (sub_opcode == VA_OP_CODE_CORRELATION_SEARCH)
1303 {
1304 createMovInst(dcl_payload_F, 0, 5, g4::SIMD1, NULL, NULL, verticalOriginOpnd);
1305 createMovInst(dcl_payload_F, 0, 4, g4::SIMD1, NULL, NULL, horizontalOriginOpnd);
1306 }
1307
1308 //setting m1_3
1309 if (vOffOpnd != NULL)
1310 {
1311 createMovInst(dcl_payload_F, 0, 3, g4::SIMD1, NULL, NULL, vOffOpnd);
1312 }
1313
1314 //setting m1_2
1315 if (uOffOpnd != NULL)
1316 {
1317 createMovInst(dcl_payload_F, 0, 2, g4::SIMD1, NULL, NULL, uOffOpnd);
1318 }
1319
1320 if (sub_opcode == VA_OP_CODE_FLOOD_FILL)
1321 {
1322 createMovSendSrcInst(dcl_payload_UD, 0, 2, 5, pixelHMaskOpnd, 0);
1323 }
1324
1325 if ((sub_opcode == VA_OP_CODE_1PIXEL_CONVOLVE && mode == VA_CONV_16x1) ||
1326 originalSubOpcode == ISA_HDC_1PIXELCONV)
1327 {
1328 const RegionDesc *rd = getRegionStride1();
1329 G4_Operand *offsets_opnd_temp = createSrc(
1330 offsetsOpnd->asSrcRegRegion()->getBase(),
1331 0,
1332 0,
1333 rd,
1334 Type_W);
1335
1336 createMovInst(dcl_offsets, 0, 0, g4::SIMD32, NULL, NULL, offsets_opnd_temp);
1337 }
1338
1339 //creating temp for intermediate computations
1340 G4_Declare *dcl_temp = createDeclareNoLookup(
1341 "tmp_shl",
1342 G4_GRF ,
1343 1,
1344 1,
1345 Type_UD);
1346 G4_SrcRegRegion temp_src(Mod_src_undef,Direct,dcl_temp->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1347 G4_DstRegRegion temp_dst(Direct, dcl_temp->getRegVar(), 0, 0,1, Type_UD);
1348
1349 //creating a src and for m1.0
1350 G4_SrcRegRegion m1_0_src(Mod_src_undef,Direct,dcl_payload_UD->getRegVar(), 0, 0, getRegionScalar(), Type_UD);
1351 G4_DstRegRegion m1_0_dst(Direct, dcl_payload_UD->getRegVar(), 0, 0, 1, Type_UD);
1352
1353 G4_Operand * shift_immed = NULL;
1354
1355 //setting m1_0
1356 switch (sub_opcode)
1357 {
1358 case VA_OP_CODE_FLOOD_FILL:
1359 {
1360 createMovInst(dcl_payload_UD, 0, 0, g4::SIMD1, NULL, NULL, pixelVMaskLeftOpnd);
1361
1362 if (pixelVMaskRightOpnd->isImm())
1363 {
1364 shift_immed = createImm(pixelVMaskRightOpnd->asImm()->getInt() << 10,Type_UD);
1365 createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1366 } else {
1367
1368 createBinOp(G4_shl, g4::SIMD1,
1369 createDstRegRegion(temp_dst), pixelVMaskRightOpnd, createImm(10, Type_UD), InstOpt_WriteEnable, true);
1370 shift_immed = createSrcRegRegion(temp_src);
1371 createBinOp(G4_add, g4::SIMD1,
1372 createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1373 }
1374
1375 if (loopCountOpnd->isImm())
1376 {
1377 shift_immed = createImm(loopCountOpnd->asImm()->getInt() << 24, Type_UD);
1378 } else {
1379 createBinOp(G4_shl, g4::SIMD1,
1380 createDstRegRegion(temp_dst), loopCountOpnd, createImm(24, Type_UD), InstOpt_WriteEnable, true);
1381 shift_immed = createSrcRegRegion(temp_src);
1382 }
1383 createBinOp(G4_add, g4::SIMD1,
1384 createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1385 break;
1386 }
1387 case VA_OP_CODE_LBP_CORRELATION:
1388 {
1389 //setting disparity
1390 if (disparityOpnd->isImm())
1391 {
1392 shift_immed = createImm(disparityOpnd->asImm()->getInt() << 16, Type_UD);
1393 createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1394 }
1395 else
1396 {
1397 createBinOp(G4_shl, g4::SIMD1,
1398 createDstRegRegion(m1_0_dst), disparityOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1399 }
1400
1401 break;
1402 }
1403 case VA_OP_CODE_CORRELATION_SEARCH:
1404 {
1405 /*
1406 G4_Operand* verticalOriginOpnd , G4_Operand* horizontalOriginOpnd ,
1407 G4_Operand* xDirectionSizeOpnd , G4_Operand* yDirectionSizeOpnd ,
1408 G4_Operand* xDirectionSearchSizeOpnd , G4_Operand* yDirectionSearchSizeOpnd ,
1409 */
1410 createMovInst(dcl_payload_UD, 0, 0, g4::SIMD1, NULL, NULL, xDirectionSizeOpnd);
1411
1412 //setting y-direction size of the source for correlation.
1413 if (yDirectionSizeOpnd->isImm())
1414 {
1415 shift_immed = createImm(yDirectionSizeOpnd->asImm()->getInt() << 4, Type_UD);
1416 createMov(g4::SIMD1, createDstRegRegion(m1_0_dst), shift_immed, InstOpt_NoOpt, true);
1417 }
1418 else
1419 {
1420 createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), yDirectionSizeOpnd, createImm(4, Type_UD), InstOpt_WriteEnable, true);
1421 shift_immed = createSrcRegRegion(temp_src);
1422 createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1423 }
1424
1425
1426 //31:16 reserved
1427
1428 //setting x-direction search size
1429 if (xDirectionSearchSizeOpnd->isImm())
1430 {
1431 shift_immed = createImm(xDirectionSearchSizeOpnd->asImm()->getInt() << 8, Type_UD);
1432 } else {
1433 createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), xDirectionSearchSizeOpnd, createImm(8, Type_UD), InstOpt_WriteEnable, true);
1434 shift_immed = createSrcRegRegion(temp_src);
1435 }
1436 createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1437
1438 //setting y-direction search size.
1439 if (yDirectionSearchSizeOpnd->isImm())
1440 {
1441 shift_immed = createImm(yDirectionSearchSizeOpnd->asImm()->getInt() << 16, Type_UD);
1442 } else {
1443 createBinOp(G4_shl, g4::SIMD1, createDstRegRegion(temp_dst), yDirectionSearchSizeOpnd, createImm(16, Type_UD), InstOpt_WriteEnable, true);
1444 shift_immed = createSrcRegRegion(temp_src);
1445 }
1446 createBinOp(G4_add, g4::SIMD1, createDstRegRegion(m1_0_dst), createSrcRegRegion(m1_0_src), shift_immed, InstOpt_WriteEnable, true);
1447
1448 break;
1449 }
1450 default:
1451 break; // Prevent gcc warning
1452 }
1453
1454 G4_SrcRegRegion* payload = createSrcRegRegion(dcl, getRegionStride1());
1455 G4_DstRegRegion* post_dst = NULL;
1456
1457 unsigned int reg_to_receive = 0;
1458
1459 if (!hdcMode)
1460 {
1461 post_dst = checkSendDst(dstOpnd);
1462 if ((dstSize % numEltPerGRF<Type_UB>()) != 0)
1463 {
1464 reg_to_receive = (unsigned int) std::ceil((double)dstSize/numEltPerGRF<Type_UB>());
1465 }
1466 else
1467 {
1468 reg_to_receive = dstSize/numEltPerGRF<Type_UB>();
1469 }
1470 } else {
1471 post_dst = createNullDst(Type_UD);
1472 }
1473
1474 /// Message Descriptor Setup
1475 /// 18:17 SIMD Mode (SIMD32/64 = 3) | 16:12 Message Type (sampler8x8 = 01011 = 0xB)
1476 unsigned msg_descriptor = (0x3 << 17) + (0xB << 12);
1477 createSendInst(NULL, post_dst, payload, reg_to_send, reg_to_receive, g4::SIMD8,
1478 msg_descriptor, SFID::SAMPLER, 1, SendAccess::READ_ONLY, surface, sampler, 0, false);
1479
1480 return VISA_SUCCESS;
1481 }
1482