1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "BuildIR.h"
10 #include "../Timer.h"
11
12 using namespace vISA;
13
14
translateVISAAddrInst(ISA_Opcode opcode,VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_DstRegRegion * dstOpnd,G4_Operand * src0Opnd,G4_Operand * src1Opnd)15 int IR_Builder::translateVISAAddrInst(
16 ISA_Opcode opcode, VISA_Exec_Size executionSize, VISA_EMask_Ctrl emask,
17 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd)
18 {
19 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
20
21 G4_ExecSize exsize = toExecSize(executionSize);
22 G4_InstOpts instOpt = Get_Gen4_Emask(emask, exsize);
23
24 if (src1Opnd && src0Opnd->isAddrExp() && src1Opnd->isImm())
25 {
26 src0Opnd->asAddrExp()->setOffset(src0Opnd->asAddrExp()->getOffset() + (int)src1Opnd->asImm()->getInt());
27 src1Opnd = NULL;
28 }
29
30 if (src0Opnd->isAddrExp() &&
31 src1Opnd == NULL)
32 {
33 createMov(
34 exsize,
35 dstOpnd,
36 src0Opnd,
37 instOpt,
38 true);
39 }
40 else
41 {
42 createInst(
43 NULL,
44 GetGenOpcodeFromVISAOpcode((ISA_Opcode)opcode),
45 NULL,
46 g4::NOSAT,
47 exsize,
48 dstOpnd,
49 src0Opnd,
50 src1Opnd,
51 instOpt,
52 true);
53 }
54
55 return VISA_SUCCESS;
56 }
57
translateVISAArithmeticInst(ISA_Opcode opcode,VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_Predicate * predOpnd,G4_Sat saturate,G4_CondMod * condMod,G4_DstRegRegion * dstOpnd,G4_Operand * src0Opnd,G4_Operand * src1Opnd,G4_Operand * src2Opnd,G4_DstRegRegion * carryBorrow)58 int IR_Builder::translateVISAArithmeticInst(
59 ISA_Opcode opcode, VISA_Exec_Size executionSize, VISA_EMask_Ctrl emask,
60 G4_Predicate *predOpnd, G4_Sat saturate, G4_CondMod* condMod,
61 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd,
62 G4_Operand *src2Opnd, G4_DstRegRegion *carryBorrow)
63 {
64 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
65
66 unsigned int instOpt = 0;
67 G4_ExecSize exsize = toExecSize(executionSize);
68 instOpt |= Get_Gen4_Emask(emask, exsize);
69
70 if (IsMathInst(opcode))
71 {
72 if (src1Opnd == NULL)
73 {
74 // create a null operand
75 src1Opnd = createNullSrc(src0Opnd->getType());
76 }
77
78 G4_MathOp mathOp = Get_MathFuncCtrl(opcode, dstOpnd->getType());
79
80 if (!hasFdivPow() && mathOp == MATH_FDIV)
81 {
82 expandFdiv(exsize, predOpnd, saturate, dstOpnd, src0Opnd, src1Opnd, instOpt);
83 }
84 else if (!hasFdivPow() && mathOp == MATH_POW)
85 {
86 expandPow(exsize, predOpnd, saturate, dstOpnd, src0Opnd, src1Opnd, instOpt);
87 }
88 else
89 {
90 createMathInst(
91 predOpnd,
92 saturate,
93 exsize,
94 dstOpnd,
95 src0Opnd,
96 src1Opnd,
97 mathOp,
98 instOpt,
99 true);
100 }
101 }
102 else if (ISA_Inst_Table[opcode].n_srcs == 3)
103 {
104 if (opcode == ISA_ADD3O)
105 {
106 assert(predOpnd != nullptr && "predicate operand couldn't be nullptr");
107 condMod = createCondMod(Mod_o, predOpnd->getBase(), 0);
108 predOpnd = nullptr;
109 }
110
111 // do not check type of sources, float and integer are supported
112 createInst(
113 predOpnd,
114 GetGenOpcodeFromVISAOpcode(opcode),
115 condMod,
116 saturate,
117 exsize,
118 dstOpnd,
119 src0Opnd,
120 src1Opnd,
121 src2Opnd,
122 instOpt,
123 true);
124 }
125 else
126 {
127 auto inst = createInst(
128 predOpnd,
129 GetGenOpcodeFromVISAOpcode(opcode),
130 condMod,
131 saturate,
132 exsize,
133 dstOpnd,
134 src0Opnd,
135 src1Opnd,
136 instOpt,
137 true);
138
139 if (opcode == ISA_ADDC || opcode == ISA_SUBB)
140 {
141 G4_DstRegRegion *accDstOpnd = createDst(
142 phyregpool.getAcc0Reg(),
143 0,
144 0,
145 1,
146 dstOpnd->getType());
147
148 inst->setImplAccDst(accDstOpnd);
149 inst->setOptionOn(InstOpt_AccWrCtrl);
150
151 //mov dst acc
152 G4_SrcRegRegion *accSrcOpnd = createSrc(
153 phyregpool.getAcc0Reg(),
154 0,
155 0,
156 getRegionStride1(),
157 dstOpnd->getType());
158
159 createMov(
160 exsize,
161 carryBorrow,
162 accSrcOpnd,
163 instOpt,
164 true);
165 }
166
167 }
168
169 return VISA_SUCCESS;
170 }
171
translateVISADpasInst(VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_opcode opc,G4_DstRegRegion * dstOpnd,G4_SrcRegRegion * src0Opnd,G4_SrcRegRegion * src1Opnd,G4_SrcRegRegion * src2Opnd,G4_SrcRegRegion * src3Opnd,GenPrecision A,GenPrecision W,uint8_t D,uint8_t C)172 int IR_Builder::translateVISADpasInst(
173 VISA_Exec_Size executionSize, VISA_EMask_Ctrl emask, G4_opcode opc,
174 G4_DstRegRegion *dstOpnd, G4_SrcRegRegion *src0Opnd, G4_SrcRegRegion *src1Opnd, G4_SrcRegRegion *src2Opnd,
175 G4_SrcRegRegion* src3Opnd, GenPrecision A, GenPrecision W, uint8_t D, uint8_t C)
176 {
177 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
178
179 G4_ExecSize exsize = toExecSize(executionSize);
180 G4_InstOpts instOpt = Get_Gen4_Emask(emask, exsize);
181 if (hasBFDstforDPAS() && (A == GenPrecision::BF16 || A == GenPrecision::BF8))
182 {
183 // PVC allows BF dst and src0, and they are W/UW when coming into vISA,
184 // so we fix the type here
185 if (dstOpnd->getType() == Type_W || dstOpnd->getType() == Type_UW)
186 {
187 dstOpnd->setType(Type_BF);
188 }
189 if (src0Opnd->getType() == Type_W || src0Opnd->getType() == Type_UW)
190 {
191 src0Opnd->setType(Type_BF);
192 }
193 }
194
195 if (src0Opnd->isNullReg())
196 {
197 src0Opnd->setType(dstOpnd->getType());
198 }
199
200 createDpasInst(
201 opc,
202 exsize,
203 dstOpnd,
204 src0Opnd,
205 src1Opnd,
206 src2Opnd,
207 src3Opnd,
208 instOpt,
209 A, W, D, C,
210 true);
211
212 return VISA_SUCCESS;
213 }
214
translateVISABfnInst(uint8_t booleanFuncCtrl,VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_Predicate * predOpnd,G4_Sat saturate,G4_CondMod * condMod,G4_DstRegRegion * dstOpnd,G4_Operand * src0Opnd,G4_Operand * src1Opnd,G4_Operand * src2Opnd)215 int IR_Builder::translateVISABfnInst(
216 uint8_t booleanFuncCtrl, VISA_Exec_Size executionSize, VISA_EMask_Ctrl emask,
217 G4_Predicate *predOpnd, G4_Sat saturate, G4_CondMod* condMod,
218 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd, G4_Operand *src2Opnd)
219 {
220 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
221
222 unsigned int instOpt = 0;
223 G4_ExecSize exsize = toExecSize(executionSize);
224 instOpt |= Get_Gen4_Emask(emask, exsize);
225
226 createBfnInst(
227 booleanFuncCtrl,
228 predOpnd,
229 condMod,
230 saturate,
231 exsize,
232 dstOpnd,
233 src0Opnd,
234 src1Opnd,
235 src2Opnd,
236 instOpt,
237 true);
238
239 return VISA_SUCCESS;
240 }
241
needs32BitFlag(uint32_t opt)242 static bool needs32BitFlag(uint32_t opt)
243 {
244 switch (opt & InstOpt_QuarterMasks)
245 {
246 case InstOpt_M16:
247 case InstOpt_M20:
248 case InstOpt_M24:
249 case InstOpt_M28:
250 return true;
251 default:
252 return false;
253 }
254 }
255
256
translateVISACompareInst(ISA_Opcode opcode,VISA_Exec_Size execsize,VISA_EMask_Ctrl emask,VISA_Cond_Mod relOp,G4_DstRegRegion * dstOpnd,G4_Operand * src0Opnd,G4_Operand * src1Opnd)257 int IR_Builder::translateVISACompareInst(
258 ISA_Opcode opcode, VISA_Exec_Size execsize, VISA_EMask_Ctrl emask, VISA_Cond_Mod relOp,
259 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd)
260 {
261 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
262
263 G4_CondMod* condMod = NULL;
264 G4_ExecSize exsize = toExecSize(execsize);
265 G4_InstOpts inst_opt = Get_Gen4_Emask(emask, exsize);
266 const char *varName = "PTemp";
267
268 uint8_t numWords = (exsize + 15)/16;
269 if (needs32BitFlag(inst_opt))
270 {
271 // for H2, Q3, etc. we must use 32-bit flag regardless of execution size
272 numWords = 2;
273 }
274 //TODO: Can eliminate the flag temp creation. Might need further changes
275 G4_Declare *dcl = createDeclareNoLookup(
276 createStringCopy(varName, mem),
277 G4_FLAG,
278 numWords,
279 1,
280 Type_UW);
281 dcl->setNumberFlagElements(exsize);
282
283 condMod = createCondMod(
284 Get_G4_CondModifier_From_Common_ISA_CondModifier(relOp),
285 dcl->getRegVar(),
286 0);
287
288 createInst(
289 NULL,
290 GetGenOpcodeFromVISAOpcode(opcode),
291 condMod,
292 g4::NOSAT,
293 exsize,
294 dstOpnd,
295 src0Opnd,
296 src1Opnd,
297 inst_opt,
298 true);
299
300 return VISA_SUCCESS;
301 }
302
translateVISACompareInst(ISA_Opcode opcode,VISA_Exec_Size execsize,VISA_EMask_Ctrl emask,VISA_Cond_Mod relOp,G4_Declare * predDst,G4_Operand * src0Opnd,G4_Operand * src1Opnd)303 int IR_Builder::translateVISACompareInst(
304 ISA_Opcode opcode, VISA_Exec_Size execsize, VISA_EMask_Ctrl emask, VISA_Cond_Mod relOp,
305 G4_Declare* predDst, G4_Operand *src0Opnd, G4_Operand *src1Opnd)
306 {
307 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
308
309 G4_ExecSize exsize = toExecSize(execsize);
310 G4_InstOpts inst_opt = Get_Gen4_Emask(emask, exsize);
311 // If it's mix mode HF,F, it will be split down the road anyway, so behavior doesn't change.
312 G4_Type src0Type = src0Opnd->getType();
313 G4_Type src1Type = src1Opnd->getType();
314 G4_Type dstType;
315 if (IS_TYPE_FLOAT_ALL(src0Type))
316 {
317 dstType = (TypeSize(src0Type) > TypeSize(src1Type)) ? src0Type : src1Type;
318 }
319 else
320 {
321 // FIXME: why does exec size matter here?
322 dstType = exsize == 16 ? Type_W :
323 (TypeSize(src0Type) > TypeSize(src1Type) ? src0Type : src1Type);
324 if (IS_VTYPE(dstType))
325 {
326 dstType = Type_UD;
327 }
328 }
329 auto nullDst = createNullDst(dstType);
330
331 G4_CondMod* condMod = createCondMod(
332 Get_G4_CondModifier_From_Common_ISA_CondModifier(relOp),
333 predDst->getRegVar(), 0);
334
335 createInst(
336 NULL,
337 GetGenOpcodeFromVISAOpcode(opcode),
338 condMod,
339 g4::NOSAT,
340 exsize,
341 nullDst,
342 src0Opnd,
343 src1Opnd,
344 inst_opt,
345 true);
346
347 return VISA_SUCCESS;
348 }
349
350
translateVISALogicInst(ISA_Opcode opcode,G4_Predicate * predOpnd,G4_Sat saturate,VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_DstRegRegion * dst,G4_Operand * src0,G4_Operand * src1,G4_Operand * src2,G4_Operand * src3)351 int IR_Builder::translateVISALogicInst(
352 ISA_Opcode opcode, G4_Predicate *predOpnd,
353 G4_Sat saturate, VISA_Exec_Size executionSize, VISA_EMask_Ctrl emask,
354 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
355 G4_Operand* src2, G4_Operand* src3)
356 {
357 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
358
359 G4_ExecSize exsize = toExecSize(executionSize);
360 G4_InstOpts inst_opt = Get_Gen4_Emask(emask, exsize);
361 G4_Operand *g4Srcs[COMMON_ISA_MAX_NUM_SRC] = {src0, src1, src2, src3};
362
363 G4_opcode g4_op = GetGenOpcodeFromVISAOpcode(opcode);
364 if (dst->getBase() && dst->getBase()->isFlag())
365 {
366 g4_op = Get_Pseudo_Opcode(opcode);
367 if (g4_op == G4_illegal)
368 {
369 return VISA_FAILURE;
370 }
371 }
372
373 for (int i = 0; i < ISA_Inst_Table[opcode].n_srcs; i++)
374 {
375 if (g4Srcs[i]->isSrcRegRegion() &&
376 !isShiftOp(opcode) &&
377 (g4Srcs[i]->asSrcRegRegion()->getModifier() == Mod_Minus ||
378 g4Srcs[i]->asSrcRegRegion()->getModifier() == Mod_Minus_Abs))
379 {
380 G4_Type tmpType = g4Srcs[i]->asSrcRegRegion()->getType();
381 G4_Declare *tempDcl = createTempVar(exsize, tmpType, Any);
382 G4_DstRegRegion* tmp_dst_opnd = createDst(tempDcl->getRegVar(), 0, 0, 1, tmpType);
383
384 uint16_t vs = exsize;
385 if ((unsigned)exsize * g4Srcs[i]->asSrcRegRegion()->getTypeSize() > numEltPerGRF<Type_UB>())
386 {
387 vs /= 2;
388 }
389
390 createMov(exsize, tmp_dst_opnd, g4Srcs[i], inst_opt, true);
391
392 g4Srcs[i] = createSrcRegRegion(tempDcl, getRegionStride1());
393 }
394 }
395
396 if (opcode == ISA_BFI || opcode == ISA_BFE || opcode == ISA_BFREV)
397 {
398 // convert all immediates to D or UD as required by HW
399 // ToDo: maybe we should move this to HW conformity?
400 for (int i = 0; i < 4; i++)
401 {
402 if (g4Srcs[i] != NULL && g4Srcs[i]->isImm())
403 {
404 G4_Imm* imm = g4Srcs[i]->asImm();
405 switch (imm->getType())
406 {
407 case Type_W:
408 g4Srcs[i] = createImm(imm->getInt(), Type_D);
409 break;
410 case Type_UW:
411 g4Srcs[i] = createImm(imm->getInt(), Type_UD);
412 break;
413 default:
414 // ignore other types to be consistent with old behavior
415 break;
416 }
417 }
418 }
419 }
420
421 if (opcode == ISA_BFI)
422 {
423 // split into
424 // bfi1 tmp src0 src1
425 // bfi2 dst tmp src2 src3
426 G4_Declare* tmpDcl = createTempVar(exsize, g4Srcs[0]->getType(), GRFALIGN);
427 G4_DstRegRegion* tmpDst = createDstRegRegion(tmpDcl, 1);
428 createInst(
429 predOpnd,
430 g4_op,
431 NULL,
432 saturate,
433 exsize, // it is number of bits for predicate logic op
434 tmpDst,
435 g4Srcs[0],
436 g4Srcs[1],
437 inst_opt,
438 true);
439
440 G4_SrcRegRegion* src0 = createSrcRegRegion(tmpDcl,
441 (exsize == 1) ? getRegionScalar() : getRegionStride1());
442 createInst(
443 predOpnd,
444 G4_bfi2,
445 NULL,
446 saturate,
447 exsize, // it is number of bits for predicate logic op
448 dst,
449 src0,
450 g4Srcs[2],
451 g4Srcs[3],
452 inst_opt,
453 true);
454 }
455 else
456 {
457 // create inst
458 createInst(
459 predOpnd,
460 g4_op,
461 NULL,
462 saturate,
463 exsize, // it is number of bits for predicate logic op
464 dst,
465 g4Srcs[0],
466 g4Srcs[1],
467 g4Srcs[2],
468 inst_opt,
469 true);
470 }
471
472 return VISA_SUCCESS;
473 }
474
475
translateVISADataMovementInst(ISA_Opcode opcode,CISA_MIN_MAX_SUB_OPCODE subOpcode,G4_Predicate * predOpnd,VISA_Exec_Size executionSize,VISA_EMask_Ctrl emask,G4_Sat saturate,G4_DstRegRegion * dstOpnd,G4_Operand * src0Opnd,G4_Operand * src1Opnd)476 int IR_Builder::translateVISADataMovementInst(
477 ISA_Opcode opcode,
478 CISA_MIN_MAX_SUB_OPCODE subOpcode,
479 G4_Predicate *predOpnd,
480 VISA_Exec_Size executionSize,
481 VISA_EMask_Ctrl emask,
482 G4_Sat saturate,
483 G4_DstRegRegion *dstOpnd,
484 G4_Operand *src0Opnd,
485 G4_Operand *src1Opnd)
486 {
487 TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
488
489 G4_ExecSize exsize = toExecSize(executionSize);
490 G4_InstOpts inst_opt = Get_Gen4_Emask(emask, exsize);
491 G4_CondMod* condMod = NULL;
492
493 if (opcode == ISA_MOVS)
494 {
495 if (src0Opnd->isSrcRegRegion())
496 src0Opnd->asSrcRegRegion()->setType(Type_UD);
497 dstOpnd->setType(Type_UD);
498 MUST_BE_TRUE(saturate == g4::NOSAT,
499 "saturation forbidden on this instruction");
500 createInst(
501 predOpnd,
502 G4_mov,
503 NULL,
504 g4::NOSAT,
505 exsize,
506 dstOpnd,
507 src0Opnd,
508 NULL,
509 inst_opt,
510 true);
511 }
512 else if (opcode == ISA_SETP)
513 {
514 // Src0 must have integer type. If src0 is a general or indirect operand,
515 // the LSB in each src0 element determines the corresponding dst element's Bool value.
516 // If src0 is an immediate operand, each of its bits from the LSB to MSB is used
517 // to set the Bool value in the corresponding dst element.
518 // Predication is not supported for this instruction.
519
520
521 /*
522 * 1. Mask operand is const or scalar
523 * mov (1) f0.0 src {NoMask}
524 * 2. Mask operand is stream.
525 * and.nz.f0.0 (n) null src 0x1:uw
526 */
527
528 // vISA spec does not allow 1 as the execution size anymore.
529 // This is a hack to allow execution size 1
530 // and we make sure it is a scalar region in this case.
531 if (kernel.getKernelType() == VISA_CM)
532 {
533 if (exsize == 1 && src0Opnd->isSrcRegRegion())
534 {
535 G4_SrcRegRegion *region = src0Opnd->asSrcRegRegion();
536 if (!region->isScalar())
537 region->setRegion(getRegionScalar());
538 }
539 }
540
541 if (src0Opnd->isImm() || (src0Opnd->isSrcRegRegion() &&
542 (src0Opnd->asSrcRegRegion()->isScalar())))
543 {
544 dstOpnd->setType(exsize == 32 ? Type_UD: Type_UW);
545 if (emask == vISA_EMASK_M5_NM)
546 {
547 // write to f0.1/f1.1 instead
548 MUST_BE_TRUE(dstOpnd->getTopDcl()->getNumberFlagElements() == 32, "Dst must have 32 flag elements");
549 dstOpnd = createDstWithNewSubRegOff(dstOpnd, 1);
550 }
551 createInst(
552 predOpnd,
553 G4_mov,
554 NULL,
555 saturate,
556 g4::SIMD1,
557 dstOpnd,
558 src0Opnd,
559 NULL,
560 InstOpt_WriteEnable,
561 true);
562 }
563 else if (src0Opnd->isSrcRegRegion() && src0Opnd->asSrcRegRegion()->isScalar() == false)
564 {
565 G4_DstRegRegion *null_dst_opnd = createNullDst(Type_UD);
566 condMod = createCondMod(
567 Mod_ne,
568 dstOpnd->asDstRegRegion()->getBase()->asRegVar(),
569 0);
570
571 createInst(
572 predOpnd,
573 G4_and,
574 condMod,
575 saturate,
576 exsize,
577 null_dst_opnd,
578 src0Opnd,
579 createImm(1, Type_UW),
580 inst_opt,
581 true);
582 }
583 else
584 {
585 return VISA_FAILURE;
586 }
587 }
588 else if (opcode == ISA_BF_CVT)
589 {
590 // translate UW to BF
591 if (dstOpnd->getType() == Type_UW ||
592 dstOpnd->getType() == Type_HF) // Temp compatibility (toBeRemovedSoon)
593 {
594 dstOpnd->setType(Type_BF);
595 }
596 else
597 {
598 assert(src0Opnd->isSrcRegRegion() &&
599 (src0Opnd->getType() == Type_UW || src0Opnd->getType() == Type_HF) &&
600 "src0Opnd must be a src region with HF type");
601 src0Opnd->asSrcRegRegion()->setType(Type_BF);
602 }
603
604 createMov(exsize, dstOpnd, src0Opnd, inst_opt, true);
605 }
606 else if (opcode == ISA_FCVT)
607 {
608 (void)createInst(nullptr, G4_fcvt, nullptr, g4::NOSAT, exsize,
609 dstOpnd, src0Opnd, nullptr, inst_opt, true);
610 }
611 else
612 {
613 if (opcode == ISA_FMINMAX)
614 {
615 condMod = createCondMod(
616 subOpcode == CISA_DM_FMAX ? Mod_ge : Mod_l,
617 nullptr,
618 0);
619 }
620
621 if (opcode == ISA_MOV && src0Opnd->isSrcRegRegion() && src0Opnd->asSrcRegRegion()->isFlag())
622 {
623 // src0 is a flag
624 // mov (1) dst src0<0;1:0>:uw (ud if flag has 32 elements)
625 G4_Declare* flagDcl = src0Opnd->getTopDcl();
626 src0Opnd->asSrcRegRegion()->setType(flagDcl->getNumberFlagElements() > 16 ? Type_UD : Type_UW);
627 }
628
629 createInst(
630 predOpnd,
631 GetGenOpcodeFromVISAOpcode(opcode),
632 condMod,
633 saturate,
634 exsize,
635 dstOpnd,
636 src0Opnd,
637 src1Opnd,
638 inst_opt,
639 true);
640 }
641
642 return VISA_SUCCESS;
643 }
644
645