1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "G4_Verifier.hpp"
10
11 #include <sstream>
12
13 using namespace vISA;
14
verifyG4Kernel(G4_Kernel & k,Optimizer::PassIndex index,bool alwaysOn,G4Verifier::VerifyControl ctrl)15 void verifyG4Kernel(
16 G4_Kernel &k, Optimizer::PassIndex index,
17 bool alwaysOn, G4Verifier::VerifyControl ctrl)
18 {
19 if (alwaysOn || k.fg.builder->getOption(VISA_FullIRVerify))
20 {
21 G4Verifier verifier(k, ctrl, index);
22 verifier.verify();
23 }
24 }
25
verifyG4Inst(G4_Kernel & kernel,G4_INST * inst,Optimizer::PassIndex index)26 void verifyG4Inst(G4_Kernel &kernel, G4_INST *inst, Optimizer::PassIndex index)
27 {
28 G4Verifier verifier(kernel, G4Verifier::VC_ASSERT, index);
29 verifier.verifyInst(inst);
30 }
31
32 std::atomic<int> G4Verifier::index(0);
33
G4Verifier(G4_Kernel & k,VerifyControl ctrl,Optimizer::PassIndex index)34 G4Verifier::G4Verifier(G4_Kernel &k, VerifyControl ctrl, Optimizer::PassIndex index)
35 : kernel(k), verifyCtrl(ctrl), passIndex(index)
36 {
37 if (ctrl == VC_AppendDump || ctrl == VC_NewDump)
38 {
39 const char* buf = nullptr;
40 k.getOptions()->getOption(VISA_AsmFileName, buf);
41 std::string dumpName;
42 if (buf != nullptr)
43 {
44 dumpName = std::string(buf);
45 }
46 dumpName += ".g4verify.dump.txt";
47 if (ctrl == VC_AppendDump)
48 dumpText.open(dumpName, std::ofstream::app);
49 else
50 dumpText.open(dumpName, std::ofstream::trunc);
51 }
52 }
53
verify()54 void G4Verifier::verify()
55 {
56 // For each instruction do verification.
57 for (auto BBI = kernel.fg.cbegin(), BBE = kernel.fg.cend(); BBI != BBE; ++BBI)
58 {
59 auto bb = *BBI;
60 for (auto I = bb->begin(), E = bb->end(); I != E; ++I)
61 {
62 G4_INST *inst = *I;
63 verifyInst(inst);
64 }
65 }
66 }
67
verifyInst(G4_INST * inst)68 bool G4Verifier::verifyInst(G4_INST *inst)
69 {
70 ASSERT_USER(inst != NULL, "null instruction unexpected");
71 if (inst)
72 {
73 verifyOpcode(inst);
74 verifyOpnd(inst->getDst(), inst);
75 verifyOpnd(inst->getSrc(0), inst);
76 verifyOpnd(inst->getSrc(1), inst);
77 verifyOpnd(inst->getSrc(2), inst);
78 verifyOpnd(inst->getPredicate(), inst);
79 verifyOpnd(inst->getCondMod(), inst);
80 verifyOpnd(inst->getImplAccDst(), inst);
81 verifyOpnd(inst->getImplAccSrc(), inst);
82
83 if (inst->isSend())
84 {
85 verifySend(inst);
86 }
87 else if (inst->isDpas())
88 {
89 verifyDpas(inst);
90 }
91 verifyAccMov(inst);
92
93 verifyDstSrcOverlap(inst);
94
95 if (passIndex == Optimizer::PI_cleanMessageHeader ||
96 passIndex == Optimizer::PI_renameRegister ||
97 passIndex == Optimizer::PI_localDefHoisting ||
98 passIndex == Optimizer::PI_localCopyPropagation ||
99 passIndex == Optimizer::PI_localInstCombine ||
100 passIndex == Optimizer::PI_reassociateConst ||
101 passIndex == Optimizer::PI_cselPeepHoleOpt)
102 {
103 // def-use chain should be valid after these passes
104 return verifyDefUseChain(inst);
105 }
106
107 if (passIndex == Optimizer::PI_HWConformityChk
108 || passIndex == Optimizer::PI_addSWSBInfo)
109 {
110 // feature verification. Do it twice for now.
111 verifyBFMixedMode(inst);
112 }
113 }
114 return true;
115 }
116
117 // Returns true if this use is defined by the defInst (dst, condMod, or acc)
118 // Otherwise returns false.
checkDefUse(G4_INST * defInst,G4_Operand * use)119 static bool checkDefUse(G4_INST* defInst, G4_Operand *use)
120 {
121 if (!use)
122 return false;
123
124 G4_Operand *dst = defInst->getOperand(Opnd_dst);
125 G4_Operand *condMod = defInst->getOperand(Opnd_condMod);
126
127 if (use->isAccReg())
128 {
129 // use is acc
130 // ToDo: we should check if acc is re-defined in between as well
131 if (defInst->getImplAccDst() != NULL || dst->isAccReg())
132 {
133 return true;
134 }
135 }
136
137 if (dst && Rel_disjoint != use->compareOperand(dst))
138 return true;
139
140 if (condMod && Rel_disjoint != use->compareOperand(condMod))
141 return true;
142
143 return false;
144 }
145
verifyDefUseChain(G4_INST * inst)146 bool G4Verifier::verifyDefUseChain(G4_INST *inst)
147 {
148 bool isValid = true;
149
150 for (auto I = inst->use_begin(), E = inst->use_end(); I != E; ++I)
151 {
152 auto DU = *I;
153 // A valid def-use satisfies
154 //
155 // inst[dst/condMod] defines DU.first[DU.second]
156 //
157 G4_Operand *use = (DU.first)->getOperand(DU.second);
158 if (!checkDefUse(inst, use))
159 {
160 isValid = false;
161 printDefUse(inst, DU.first, DU.second);
162 assertIfEnable();
163 }
164 }
165
166 for (auto I = inst->def_begin(), E = inst->def_end(); I != E; ++I)
167 {
168 auto UD = *I;
169 // A valid use-def satisfies
170 //
171 // UD.first[dst/condMod] defines inst[UD.second]
172 //
173 G4_Operand *use = inst->getOperand(UD.second);
174 if (!checkDefUse(UD.first, use))
175 {
176 isValid = false;
177 printDefUse(UD.first, inst, UD.second);
178 assertIfEnable();
179 }
180 }
181
182 return isValid;
183 }
184
printDefUseImpl(std::ostream & os,G4_INST * def,G4_INST * use,Gen4_Operand_Number pos)185 void G4Verifier::printDefUseImpl(
186 std::ostream &os, G4_INST *def, G4_INST *use, Gen4_Operand_Number pos)
187 {
188 os << "\n def: ";
189 def->emit(os);
190 os << "\n user: ";
191 use->emit(os);
192 os << "\n opnd: ";
193 if (use->getOperand(pos))
194 {
195 use->getOperand(pos)->emit(os);
196 }
197 }
198
199 /// Dump or warn def-use.
printDefUse(G4_INST * def,G4_INST * use,Gen4_Operand_Number pos)200 void G4Verifier::printDefUse(G4_INST *def, G4_INST *use, Gen4_Operand_Number pos)
201 {
202 if (dumpText.is_open() && dumpText.good())
203 {
204 dumpText << "\n\nIndex: " << index++;
205 printDefUseImpl(dumpText, def, use, pos);
206 }
207 else if (verifyCtrl == VC_WARN)
208 {
209 std::cerr << "\n\nInvalid def-use pair detected!!\n";
210 printDefUseImpl(std::cerr, def, use, pos);
211 }
212 }
213
assertIfEnable() const214 void G4Verifier::assertIfEnable() const
215 {
216 MUST_BE_TRUE(false, "G4Verification failure");
217 }
218
dataHazardCheck(G4_Operand * dst,G4_Operand * src)219 bool G4Verifier::dataHazardCheck(G4_Operand *dst, G4_Operand *src)
220 {
221 G4_RegVar* dstVar = static_cast<G4_RegVar*>(dst->asDstRegRegion()->getBase());
222 G4_RegVar* srcVar = static_cast<G4_RegVar*>(src->asSrcRegRegion()->getBase());
223 if (!dstVar->isRegVar() || !dstVar->isGreg() || !srcVar->isRegVar() || !srcVar->isGreg())
224 {
225 return false;
226 }
227
228 int dstStart = dst->getLinearizedStart();
229 int dstEnd = dst->getLinearizedEnd();
230 int srcStart = src->getLinearizedStart();
231 int srcEnd = src->getLinearizedEnd();
232
233 if (dstEnd < srcStart ||
234 srcEnd < dstStart)
235 {
236 return false;
237 }
238
239 int dstReg = dstStart / numEltPerGRF<Type_UB>();
240 int dstRegNum = (dstEnd - dstStart + numEltPerGRF<Type_UB>()) / numEltPerGRF<Type_UB>();
241 int srcReg = srcStart / numEltPerGRF<Type_UB>();
242 int srcRegNum = (srcEnd - srcStart + numEltPerGRF<Type_UB>()) / numEltPerGRF<Type_UB>();
243 int srcReg2 = -1;
244
245 if (srcRegNum > 1)
246 {
247 srcReg2 = srcReg + 1;
248 }
249
250 if (dstRegNum >= 2 && srcRegNum == 1)
251 {
252 srcReg2 = srcReg;
253 }
254
255 if (dstReg == srcReg2)
256 {
257 return true;
258 }
259
260 return false;
261 }
262
verifyDstSrcOverlap(G4_INST * inst)263 void G4Verifier::verifyDstSrcOverlap(G4_INST* inst)
264 {
265 if (passIndex == Optimizer::PI_regAlloc && kernel.fg.builder->avoidDstSrcOverlap())
266 {
267 G4_DstRegRegion* dst = inst->getDst();
268
269 if (inst->isSend() || dst == NULL || dst->isNullReg() || inst->opcode() == G4_madm)
270 {
271 return;
272 }
273
274 if (!inst->isComprInst())
275 {
276 return;
277 }
278
279 int dstStart = dst->getLinearizedStart() / numEltPerGRF<Type_UB>();
280 int dstEnd = dst->getLinearizedEnd() / numEltPerGRF<Type_UB>();
281
282 for (int i = 0; i < inst->getNumSrc(); i++)
283 {
284 G4_Operand* src = inst->getSrc(i);
285 if (src != NULL && !src->isNullReg() && src->getTopDcl() &&
286 (src->getTopDcl()->getRegFile() == G4_GRF || src->getTopDcl()->getRegFile() == G4_INPUT))
287 {
288 bool overlap = dataHazardCheck(dst, src);
289
290 int srcStart = src->getLinearizedStart() / numEltPerGRF<Type_UB>();
291 int srcEnd = src->getLinearizedEnd() / numEltPerGRF<Type_UB>();
292 if (dstEnd != dstStart ||
293 srcStart != srcEnd) //Any operand is more than 2 GRF
294 {
295 MUST_BE_TRUE(!overlap, "dst and src0 overlap");
296 }
297 }
298 }
299 }
300 }
301
verifySend(G4_INST * inst)302 void G4Verifier::verifySend(G4_INST* inst)
303 {
304 MUST_BE_TRUE(inst->isSend(), "expect send inst");
305 if (passIndex == Optimizer::PI_regAlloc)
306 {
307 G4_DstRegRegion* dst = inst->getDst();
308 G4_SrcRegRegion* src0 = inst->getSrc(0)->asSrcRegRegion();
309 G4_SrcRegRegion* src1 = inst->isSplitSend() ? inst->getSrc(1)->asSrcRegRegion() : nullptr;
310
311 if (inst->isEOT() && kernel.fg.builder->hasEOTGRFBinding())
312 {
313 auto checkEOTSrc = [](G4_SrcRegRegion* src) {
314 const unsigned int EOTStart = 112 * numEltPerGRF<Type_UB>();
315 if (src->isNullReg())
316 {
317 return true;
318 }
319 return src->getLinearizedStart() >= EOTStart;
320 };
321
322 if (kernel.getNumRegTotal() >= 128)
323 {
324 MUST_BE_TRUE(checkEOTSrc(src0), "src0 for EOT send is not in r112-r127");
325 if (src1 != nullptr)
326 {
327 MUST_BE_TRUE(checkEOTSrc(src1), "src1 for EOT sends is not in r112-r127");
328 }
329 }
330 }
331
332 if (inst->isSplitSend())
333 {
334 if (src0->getBase()->isGreg() && src1 && src1->getBase()->isGreg())
335 {
336 int src0Start = src0->getLinearizedStart() / numEltPerGRF<Type_UB>();
337 int src0End = src0Start + inst->getMsgDesc()->getSrc0LenRegs() - 1;
338 int src1Start = src1->getLinearizedStart() / numEltPerGRF<Type_UB>();
339 int src1End = src1Start + inst->getMsgDesc()->getSrc1LenRegs() - 1;
340 bool noOverlap = src0End < src1Start ||
341 src1End < src0Start;
342 MUST_BE_TRUE(noOverlap, "split send src0 and src1 overlap");
343 }
344 }
345
346 if (kernel.fg.builder->WaDisableSendSrcDstOverlap())
347 {
348 if (!dst->isNullReg())
349 {
350 if (src0->getBase()->isGreg())
351 {
352 bool noOverlap = dst->getLinearizedEnd() < src0->getLinearizedStart() ||
353 src0->getLinearizedEnd() < dst->getLinearizedStart();
354 MUST_BE_TRUE(noOverlap, "send dst and src0 overlap");
355 }
356 if (src1 && !src1->isNullReg())
357 {
358 bool noOverlap = dst->getLinearizedEnd() < src1->getLinearizedStart() ||
359 src1->getLinearizedEnd() < dst->getLinearizedStart();
360 MUST_BE_TRUE(noOverlap, "split send dst and src1 overlap");
361 }
362 }
363 }
364 }
365 }
366
367
verifyOpnd(G4_Operand * opnd,G4_INST * inst)368 void G4Verifier::verifyOpnd(G4_Operand* opnd, G4_INST* inst)
369 {
370 if (inst->isDpas())
371 {
372 // Temporarily skip for now
373 return;
374 }
375
376 uint8_t execSize = inst->getExecSize();
377
378 if (opnd == NULL)
379 {
380 return;
381 }
382
383 if (inst->opcode() == G4_sel && opnd->isCondMod())
384 {
385 // conditional modifier for sel is a don't care, so we can skip verification
386 return;
387 }
388
389 // FIXME: If isImm() condition is removed then some assertions are hit.
390 // This means somewhere in Jitter operand sharing is happening for
391 // immediate type operands. This should be fixed.
392 // For Imm, AddrExp, AddrExpList, Labels, hashtable lookup is
393 // performed at creation time unline SrcRegion, DstRegion,
394 // Predicate, CondMod. This means former type of operands
395 // can be shared across instructions.
396 if (opnd->getInst() != inst &&
397 opnd->isLabel() == false &&
398 opnd->isImm() == false &&
399 opnd->isNullReg() == false &&
400 opnd->isAddrExp() == false)
401 {
402 DEBUG_VERBOSE("operand does not have exactly one owning instruction (shared or orphaned)");
403
404 std::cerr << "operand: ";
405 opnd->emit(std::cerr);
406 std::cerr << " in instruction:\n ";
407 inst->emit(std::cerr);
408 std::cerr << "\n";
409
410 if (opnd->getInst() == NULL)
411 {
412 DEBUG_VERBOSE("operand has no owner instruction (orphaned)");
413 MUST_BE_TRUE(false, "operand has no owner instruction (orphaned)");
414 }
415 else
416 {
417 DEBUG_VERBOSE("operand pointer is shared by another instruction");
418 MUST_BE_TRUE(false, "operand pointer is shared by another instruction");
419 }
420 DEBUG_VERBOSE(std::endl);
421 }
422
423 if (inst->isSend())
424 {
425 // send dst/src may not be GRF-aligned before HW conformity,
426 // so we only check their bound in RA
427 if (passIndex != Optimizer::PI_regAlloc)
428 {
429 return;
430 }
431
432 if (opnd == inst->getDst())
433 {
434 if (opnd->isRightBoundSet() && !opnd->isNullReg())
435 {
436 unsigned int correctRB =
437 ((inst->getMsgDesc()->getDstLenRegs() + opnd->asDstRegRegion()->getRegOff()) * numEltPerGRF<Type_UB>()) - 1;
438 uint32_t dstLenBytes = inst->getMsgDesc()->getDstLenBytes();
439 if (dstLenBytes < getGRFSize()) {
440 correctRB = opnd->getLeftBound() + dstLenBytes - 1;
441 } else if (opnd->getTopDcl()->getByteSize() < numEltPerGRF<Type_UB>()) {
442 correctRB = opnd->getLeftBound() + opnd->getTopDcl()->getByteSize() - 1;
443 }
444
445 G4_Declare* parentDcl = opnd->getBase()->asRegVar()->getDeclare();
446 while (parentDcl != NULL)
447 {
448 correctRB += parentDcl->getAliasOffset();
449 parentDcl = parentDcl->getAliasDeclare();
450 }
451
452 correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
453
454 if (opnd->getRightBound() != correctRB)
455 {
456 DEBUG_VERBOSE("Right bound mismatch for send inst dst. Orig rb = " <<
457 opnd->getRightBound() << ", correct rb = " << correctRB << std::endl);
458
459 inst->emit(std::cerr);
460 DEBUG_VERBOSE(std::endl);
461 MUST_BE_TRUE(false, "Right bound mismatch!");
462 }
463 }
464 }
465 else if (opnd == inst->getSrc(0) || opnd == inst->getSrc(1))
466 {
467 if (opnd->isRightBoundSet())
468 {
469 int msgLength = (opnd == inst->getSrc(0)) ? inst->getMsgDesc()->getSrc0LenRegs() : inst->getMsgDesc()->getSrc1LenRegs();
470 unsigned int numBytes = opnd->getTopDcl()->getByteSize();
471 unsigned int correctRB = 0;
472 if (numBytes < numEltPerGRF<Type_UB>())
473 {
474 correctRB = opnd->asSrcRegRegion()->getRegOff() * numEltPerGRF<Type_UB>() + numBytes - 1;
475 }
476 else
477 {
478 correctRB = ((msgLength + opnd->asSrcRegRegion()->getRegOff()) * numEltPerGRF<Type_UB>()) - 1;
479 }
480
481 G4_Declare* parentDcl = opnd->getBase()->asRegVar()->getDeclare();
482 while (parentDcl != NULL)
483 {
484 correctRB += parentDcl->getAliasOffset();
485 parentDcl = parentDcl->getAliasDeclare();
486 }
487
488 correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
489
490 if (opnd->getRightBound() != correctRB)
491 {
492 DEBUG_VERBOSE("Right bound mismatch for send inst src0. Orig rb = " <<
493 opnd->getRightBound() << ", correct rb = " << correctRB << std::endl);
494
495 inst->emit(std::cerr);
496 DEBUG_VERBOSE(std::endl);
497 MUST_BE_TRUE(false, "Right bound mismatch!");
498 }
499 }
500 }
501 }
502 else
503 {
504 if (opnd->isSrcRegRegion() && opnd->isRightBoundSet())
505 {
506 G4_SrcRegRegion newRgn(*(opnd->asSrcRegRegion()));
507
508 newRgn.setInst(inst);
509 newRgn.computeLeftBound();
510 newRgn.computeRightBound(execSize);
511
512 if (inst->isPseudoUse())
513 {
514 G4_Declare* topdcl = newRgn.getBase()->asRegVar()->getDeclare();
515
516 while (topdcl->getAliasDeclare() != NULL)
517 {
518 topdcl = topdcl->getAliasDeclare();
519 }
520
521 newRgn.setLeftBound(0);
522 newRgn.setRightBound(topdcl->getByteSize() - 1);
523 }
524
525 if ((opnd->getRightBound() - opnd->getLeftBound()) > (2u * numEltPerGRF<Type_UB>()) &&
526 (inst->isPseudoUse() == false))
527 {
528 if (!(inst->opcode() == G4_pln && inst->getSrc(1) == opnd))
529 {
530 DEBUG_VERBOSE("Difference between left/right bound is greater than 2 GRF for src region. Single non-send opnd cannot span 2 GRFs. lb = " <<
531 opnd->getLeftBound() << ", rb = " << opnd->getRightBound() << std::endl);
532 inst->emit(std::cerr);
533 DEBUG_VERBOSE(std::endl);
534 MUST_BE_TRUE(false, "Left/right bound span incorrect!");
535 }
536 }
537
538 if (inst->opcode() == G4_pln &&
539 inst->getSrc(1) == opnd)
540 {
541 // For pln, src1 uses 2 GRFs if exec size <= 8
542 // and 4 GRFs if exec size == 16
543 newRgn.computeRightBound(inst->getExecSize() > g4::SIMD8 ?
544 inst->getExecSize() : G4_ExecSize(inst->getExecSize() * 2));
545
546 if (inst->getExecSize() > g4::SIMD8)
547 {
548 newRgn.setRightBound(newRgn.getRightBound() * 2 - newRgn.getLeftBound() + 1);
549 }
550 }
551
552 if (inst->getMaskOffset() > 0 &&
553 opnd == inst->getImplAccSrc())
554 {
555 // Update left/right bound as per inst mask offset, eg Q2
556 // has offset 8
557 G4_Type extype;
558 int extypesize;
559 unsigned int multiplicationFactor = 1;
560 if (opnd->isAccReg())
561 {
562 // Right bound granularity is in terms of
563 // bytes for Acc registers
564 multiplicationFactor = 4;
565 }
566
567 extype = inst->getOpExecType(extypesize);
568 if ((IS_WTYPE(extype) || IS_DTYPE(extype)))
569 {
570 // This condition is a result of HW Conformity requirement
571 // that for exec type = D/DW, only acc0 is used even when
572 // qtr control is set to Q2/H2
573 newRgn.setLeftBound(0);
574 newRgn.setRightBound(31);
575 }
576 else
577 {
578 newRgn.setLeftBound(newRgn.getLeftBound() + (inst->getMaskOffset() * multiplicationFactor));
579 newRgn.setRightBound(newRgn.getRightBound() + (inst->getMaskOffset() * multiplicationFactor));
580 }
581 }
582
583 if (opnd->getLeftBound() != newRgn.getLeftBound())
584 {
585 DEBUG_VERBOSE("Left bound mismatch for src opnd for following inst. Orig lb = " <<
586 opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
587 inst->emit(std::cerr);
588 DEBUG_VERBOSE(std::endl);
589 MUST_BE_TRUE(false, "Left bound mismatch!");
590 }
591
592 if (opnd->getRightBound() != newRgn.getRightBound())
593 {
594 DEBUG_VERBOSE("Right bound mismatch for src opnd for following inst. Orig rb = " <<
595 opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
596
597 inst->emit(std::cerr);
598 DEBUG_VERBOSE(std::endl);
599 MUST_BE_TRUE(false, "Right bound mismatch!");
600 }
601 }
602 else if (opnd->isDstRegRegion() && opnd->isRightBoundSet() && !opnd->isNullReg())
603 {
604 G4_DstRegRegion newRgn(*(opnd->asDstRegRegion()));
605 newRgn.setInst(inst);
606 newRgn.computeLeftBound();
607 newRgn.computeRightBound(execSize);
608
609 if (inst->isPseudoKill())
610 {
611 G4_Declare* topdcl = newRgn.getBase()->asRegVar()->getDeclare();
612
613 while (topdcl->getAliasDeclare() != NULL)
614 {
615 topdcl = topdcl->getAliasDeclare();
616 }
617
618 newRgn.setLeftBound(0);
619 newRgn.setRightBound(topdcl->getByteSize() - 1);
620 }
621
622 if ((opnd->getRightBound() - opnd->getLeftBound()) > (2u * numEltPerGRF<Type_UB>()) &&
623 (inst->isPseudoKill() == false) && (inst->opcode() != G4_madw))
624 {
625 DEBUG_VERBOSE("Difference between left/right bound is greater than 2 GRF for dst region. Single non-send opnd cannot span 2 GRFs. lb = " <<
626 opnd->getLeftBound() << ", rb = " << opnd->getRightBound() << std::endl);
627 inst->emit(std::cerr);
628 DEBUG_VERBOSE(std::endl);
629 MUST_BE_TRUE(false, "Left/right bound span incorrect!");
630 }
631
632 if (inst->getMaskOffset() > 0 &&
633 opnd == inst->getImplAccDst())
634 {
635 // Update left/right bound as per inst mask offset, eg Q2
636 // has offset 8
637 G4_Type extype;
638 int extypesize;
639 unsigned int multiplicationFactor = 1;
640 if (opnd->isAccReg())
641 {
642 // Right bound granularity is in terms of
643 // bytes for Acc registers
644 multiplicationFactor = 4;
645 }
646
647 extype = inst->getOpExecType(extypesize);
648
649 if ((IS_WTYPE(extype) || IS_DTYPE(extype)))
650 {
651 // This condition is a result of HW Conformity requirement
652 // that for exec type = D/DW, only acc0 is used even when
653 // qtr control is set to Q2/H2
654 newRgn.setLeftBound(0);
655 newRgn.setRightBound(31);
656 }
657 else
658 {
659 newRgn.setLeftBound(newRgn.getLeftBound() + (inst->getMaskOffset() * multiplicationFactor));
660 newRgn.setRightBound(newRgn.getRightBound() + (inst->getMaskOffset() * multiplicationFactor));
661 }
662 }
663
664 if (opnd->getLeftBound() != newRgn.getLeftBound())
665 {
666 DEBUG_VERBOSE("Left bound mismatch for dst opnd for following inst. Orig lb = " <<
667 opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
668
669 inst->emit(std::cerr);
670 DEBUG_VERBOSE(std::endl);
671 MUST_BE_TRUE(false, "Left bound mismatch");
672 }
673
674 if (opnd->getRightBound() != newRgn.getRightBound())
675 {
676 DEBUG_VERBOSE("Right bound mismatch for dst opnd for following inst. Orig rb = " <<
677 opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
678
679 inst->emit(std::cerr);
680 DEBUG_VERBOSE(std::endl);
681 MUST_BE_TRUE(false, "Right bound mismatch!");
682 }
683 }
684 else if (opnd->isPredicate() && opnd->isRightBoundSet())
685 {
686 G4_Predicate newRgn(*(opnd->asPredicate()));
687
688 newRgn.setLeftBound(0);
689 newRgn.computeRightBound(execSize);
690
691 if (inst->getMaskOffset() > 0)
692 {
693 // Update left/right bound as per inst mask offset, eg Q2
694 // has offset 8
695 newRgn.setLeftBound(newRgn.getLeftBound() + inst->getMaskOffset());
696 newRgn.setRightBound(newRgn.getRightBound() + inst->getMaskOffset());
697 }
698
699 if (opnd->getLeftBound() != newRgn.getLeftBound())
700 {
701 DEBUG_VERBOSE("Left bound mismatch for pred opnd for following inst. Orig lb = " <<
702 opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
703
704 inst->emit(std::cerr);
705 DEBUG_VERBOSE(std::endl);
706 MUST_BE_TRUE(false, "Left bound mismatch");
707 }
708
709 if (opnd->getRightBound() != newRgn.getRightBound())
710 {
711 DEBUG_VERBOSE("Right bound mismatch for pred opnd for following inst. Orig rb = " <<
712 opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
713
714 inst->emit(std::cerr);
715 DEBUG_VERBOSE(std::endl);
716 MUST_BE_TRUE(false, "Right bound mismatch!");
717 }
718 }
719 else if (opnd->isCondMod() && opnd->isRightBoundSet())
720 {
721 G4_CondMod newRgn(*(opnd->asCondMod()));
722
723 newRgn.setLeftBound(0);
724 newRgn.computeRightBound(execSize);
725
726 if (inst->getMaskOffset() > 0)
727 {
728 // Update left/right bound as per inst mask offset, eg Q2
729 // has offset 8
730 newRgn.setLeftBound(newRgn.getLeftBound() + inst->getMaskOffset());
731 newRgn.setRightBound(newRgn.getRightBound() + inst->getMaskOffset());
732 }
733
734 if (opnd->getLeftBound() != newRgn.getLeftBound())
735 {
736 DEBUG_VERBOSE("Left bound mismatch for cond mod opnd for following inst. Orig lb = " <<
737 opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
738
739 inst->emit(std::cerr);
740 DEBUG_VERBOSE(std::endl);
741 MUST_BE_TRUE(false, "Left bound mismatch");
742 }
743
744 if (opnd->getRightBound() != newRgn.getRightBound())
745 {
746 DEBUG_VERBOSE("Right bound mismatch for cond mod opnd for following inst. Orig rb = " <<
747 opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
748
749 inst->emit(std::cerr);
750 DEBUG_VERBOSE(std::endl);
751 MUST_BE_TRUE(false, "Right bound mismatch!");
752 }
753 }
754 else
755 {
756 // Not implemented
757 }
758
759 if (passIndex == Optimizer::PI_regAlloc)
760 {
761 // alignment checks that can only be performed post RA
762 bool threeSrcAlign16 = (inst->getNumSrc() == 3) && !inst->isSend() && !kernel.fg.builder->hasAlign1Ternary();
763 bool nonScalar = (opnd->isSrcRegRegion() && !opnd->asSrcRegRegion()->isScalar()) ||
764 (opnd->isDstRegRegion() && inst->getExecSize() > g4::SIMD2);
765 bool isAssigned = opnd->isRegRegion() && opnd->getBase()->isRegVar() &&
766 opnd->getBase()->asRegVar()->isPhyRegAssigned();
767 // allow replicated DF source opnd with <2;2,0> region
768 bool isReplicated = (opnd->getType() == Type_DF) &&
769 opnd->isSrcRegRegion() &&
770 (opnd->asSrcRegRegion()->getRegion()->width == 2) &&
771 (opnd->asSrcRegRegion()->getRegion()->horzStride == 0) &&
772 (opnd->asSrcRegRegion()->getRegion()->vertStride == 2);
773 if (threeSrcAlign16 && nonScalar && isAssigned &&
774 opnd->getLinearizedStart() % 16 != 0 &&
775 !isReplicated)
776 {
777 MUST_BE_TRUE(false, "dp2/dp3/dp4/dph and non-scalar 3src op must be align16!");
778 }
779
780 // check acc source alignment
781 // for explicit acc source, it and the inst's dst should both be oword-aligned
782 // for implicit acc source, its subreg offset should be identical to that of the dst
783 if (opnd->isAccReg())
784 {
785 uint32_t offset = opnd->getLinearizedStart() % 32;
786 if (inst->getDst())
787 {
788 uint32_t dstOffset = inst->getDst()->getLinearizedStart() % 32;
789 if (opnd == inst->getImplAccSrc())
790 {
791 assert(offset == dstOffset && "implicit acc source must have identical offset as dst");
792 }
793 else if (opnd->isSrcRegRegion())
794 {
795 assert((offset % 16 == 0 && dstOffset % 16 == 0) &&
796 "explicit acc source and its dst must be oword-aligned");
797 }
798 }
799 }
800
801 // if src0 is V/UV/VF imm, dst must be 16 byte aligned.
802 if (inst->opcode() == G4_mov && IS_VTYPE(inst->getSrc(0)->getType()))
803 {
804 auto dst = inst->getDst();
805 // should we assert if dst is not phyReg assigned?
806 if (dst)
807 {
808 bool dstIsAssigned = dst->getBase()->isRegVar() && dst->getBase()->asRegVar()->isPhyRegAssigned();
809 if (dstIsAssigned && dst->getLinearizedStart() % 16 != 0)
810 {
811 assert(false && "destination of move instruction with V/VF imm is not 16-byte aligned");
812 }
813 }
814 }
815
816 // check if the oprands with mme are GRF-aligned.
817 if (opnd->getAccRegSel() != ACC_UNDEFINED)
818 {
819 assert(opnd->getLinearizedStart() % numEltPerGRF<Type_UB>() == 0 && "operand with mme must be GRF-aligned");
820 }
821 }
822 }
823 }
824
verifyLifetimeConsistency(G4_BB * bb)825 void verifyLifetimeConsistency(G4_BB* bb)
826 {
827 // Verify whether misplaced pseudo_kill/lifetime.end is seen in BB
828 // Following code patterns are incorrect:
829 // mov (1) A,
830 // ...
831 // pseudo_kill A
832 // As per VISA spec, we allow a single instance of pseudo_kill per
833 // variable. Later RA's liveness may insert multiple. This will
834 // not be invoked after RA anyway. As a precaution, we return
835 // if no unassigned register is found.
836 //
837 // Similarly for lifetime.end
838 // lifetime.end A
839 // ...
840 // mov (1) A,
841 // This is illegal because lifetime.end appears before last use
842 // in BB
843 bool unassignedFound = false;
844
845 for (INST_LIST_ITER it = bb->begin(), end = bb->end();
846 it != end;
847 it++)
848 {
849 G4_INST* curInst = (*it);
850
851 std::stack<G4_Operand*> opnds;
852 opnds.push(curInst->getDst());
853 opnds.push(curInst->getSrc(0));
854 opnds.push(curInst->getSrc(1));
855 opnds.push(curInst->getSrc(2));
856 opnds.push(curInst->getPredicate());
857 opnds.push(curInst->getCondMod());
858
859 while (!opnds.empty())
860 {
861 G4_Operand* curOpnd = opnds.top();
862 opnds.pop();
863
864 if (curOpnd != NULL && curOpnd->getTopDcl() != NULL)
865 {
866 G4_Declare* topdcl = curOpnd->getTopDcl();
867
868 if (topdcl->getRegVar() &&
869 !topdcl->getRegVar()->isPhyRegAssigned())
870 {
871 unassignedFound = true;
872 }
873 }
874 }
875 }
876
877 if (unassignedFound == true)
878 {
879 typedef std::map<G4_Declare*, std::pair<G4_INST*, unsigned int>> dclInstMap;
880 typedef dclInstMap::iterator dclInstMapIter;
881 dclInstMap pseudoKills;
882 dclInstMap lifetimeEnd;
883
884 unsigned int instId = 0;
885
886 // First populate all pseudo_kills and lifetime.end instructions
887 // in BB's inst list. Later run second loop to check whether
888 // lifetime rules are flouted.
889 for (INST_LIST_ITER it = bb->begin(), end = bb->end();
890 it != end;
891 it++, instId++)
892 {
893 G4_INST* curInst = (*it);
894 std::pair<G4_INST*, unsigned int> instPair;
895
896 instPair.first = curInst;
897 instPair.second = instId;
898
899 if (curInst->isPseudoKill())
900 {
901 pseudoKills.insert(make_pair(GetTopDclFromRegRegion(curInst->getDst()), instPair));
902 }
903
904 if (curInst->isLifeTimeEnd())
905 {
906 lifetimeEnd.insert(make_pair(GetTopDclFromRegRegion(curInst->getSrc(0)), instPair));
907 }
908 }
909
910 instId = 0;
911 for (INST_LIST_ITER it = bb->begin(), end = bb->end();
912 it != end;
913 it++, instId++)
914 {
915 G4_INST* curInst = (*it);
916
917 if (curInst->isPseudoKill() ||
918 curInst->isLifeTimeEnd())
919 {
920 continue;
921 }
922
923 std::stack<G4_Operand*> opnds;
924 opnds.push(curInst->getDst());
925 opnds.push(curInst->getSrc(0));
926 opnds.push(curInst->getSrc(1));
927 opnds.push(curInst->getSrc(2));
928 opnds.push(curInst->getPredicate());
929 opnds.push(curInst->getCondMod());
930
931 while (!opnds.empty())
932 {
933 G4_Operand* curOpnd = opnds.top();
934 opnds.pop();
935
936 if (curOpnd != NULL && curOpnd->getTopDcl() != NULL)
937 {
938 G4_Declare* topdcl = curOpnd->getTopDcl();
939
940 // Check whether topdcl has been written to map
941 dclInstMapIter killsIt = pseudoKills.find(topdcl);
942
943 if (killsIt != pseudoKills.end())
944 {
945 unsigned int foundAtId = (*killsIt).second.second;
946
947 if (foundAtId > instId)
948 {
949 DEBUG_VERBOSE("Found a definition before pseudo_kill.");
950 (*killsIt).second.first->emit(std::cerr);
951 DEBUG_VERBOSE(std::endl);
952 curInst->emit(std::cerr);
953 DEBUG_VERBOSE(std::endl);
954 }
955 }
956
957 dclInstMapIter lifetimeEndIter = lifetimeEnd.find(topdcl);
958
959 if (lifetimeEndIter != lifetimeEnd.end())
960 {
961 unsigned int foundAtId = (*lifetimeEndIter).second.second;
962
963 if (foundAtId < instId)
964 {
965 DEBUG_VERBOSE("Found a use after lifetime.end.");
966 (*lifetimeEndIter).second.first->emit(std::cerr);
967 DEBUG_VERBOSE(std::endl);
968 curInst->emit(std::cerr);
969 DEBUG_VERBOSE(std::endl);
970 }
971 }
972 }
973 }
974 }
975 }
976 }
977
verifyOpcode(G4_INST * inst)978 void G4Verifier::verifyOpcode(G4_INST* inst)
979 {
980 switch (inst->opcode())
981 {
982 case G4_dp2:
983 case G4_dp3:
984 case G4_dp4:
985 assert(kernel.fg.builder->hasDotProductInst() && "unsupported opcode");
986 break;
987 case G4_lrp:
988 assert(kernel.fg.builder->hasLRP() && "unsupported opcode");
989 break;
990 case G4_madm:
991 assert(kernel.fg.builder->hasMadm() && "unsupported opcode");
992 break;
993 default:
994 break;
995 }
996
997 if (passIndex == Optimizer::PI_regAlloc)
998 {
999 //ToDo: add more checks for psuedo inst after RA
1000 assert(!inst->isPseudoLogic() && "pseudo logic inst should be lowered before RA");
1001 }
1002
1003 if (inst->getSaturate())
1004 {
1005 assert(inst->canSupportSaturate() && "saturate is set to true but instruction does not support saturation");
1006 }
1007
1008 }
1009
verifyDpas(G4_INST * inst)1010 void G4Verifier::verifyDpas(G4_INST* inst)
1011 {
1012 // Verify region and size of each operands
1013 G4_InstDpas* dpasInst = inst->asDpasInst();
1014
1015 if (dpasInst->getPredicate() || dpasInst->getCondMod())
1016 {
1017 DEBUG_VERBOSE("dpas: should not have predicate nor condMod");
1018 inst->emit(std::cerr);
1019 DEBUG_VERBOSE(std::endl);
1020 MUST_BE_TRUE(false, "dpas: may not have predicate/condMod");
1021 }
1022
1023 G4_DstRegRegion* dst = dpasInst->getDst();
1024 G4_Type dTy = dst->getType();
1025 G4_SrcRegRegion* src0 = dpasInst->getSrc(0)->asSrcRegRegion();
1026 G4_Type s0Ty = src0->getType();
1027 G4_SrcRegRegion* src1 = dpasInst->getSrc(1)->asSrcRegRegion();
1028 G4_Type s1Ty = src1->getType();
1029 G4_SrcRegRegion* src2 = dpasInst->getSrc(2)->asSrcRegRegion();
1030 G4_Type s2Ty = src2->getType();
1031 G4_Operand* opnd3 = dpasInst->getSrc(3);
1032 G4_SrcRegRegion* src3 = opnd3 ? opnd3->asSrcRegRegion() : nullptr;
1033 G4_Type s3Ty = src3 ? src3->getType() : Type_UNDEF;
1034
1035 // No source modifier
1036 if (src0->hasModifier() || src1->hasModifier() || src2->hasModifier() ||
1037 (src3 && src3->hasModifier()))
1038 {
1039 DEBUG_VERBOSE("dpas: should not have source modifier");
1040 inst->emit(std::cerr);
1041 DEBUG_VERBOSE(std::endl);
1042 MUST_BE_TRUE(false, "dpas: may not have source modifier");
1043 }
1044
1045 // No indirect register access
1046 if (src0->isIndirect() || src1->isIndirect() || src2->isIndirect() || dst->isIndirect() ||
1047 (src3 && src3->isIndirect()))
1048 {
1049 DEBUG_VERBOSE("dpas: no indirect register access supported!");
1050 inst->emit(std::cerr);
1051 DEBUG_VERBOSE(std::endl);
1052 MUST_BE_TRUE(false, "dpas: no indirect register access supported!");
1053 }
1054
1055 if (!(s1Ty == Type_UD || s1Ty == Type_D) || !(s2Ty == Type_UD || s2Ty == Type_D))
1056 {
1057 DEBUG_VERBOSE("dpas: incorrect type for src1 or src2!");
1058 inst->emit(std::cerr);
1059 DEBUG_VERBOSE(std::endl);
1060 MUST_BE_TRUE(false, "dpas: wrong type for src1 or src2");
1061 }
1062
1063 if (dpasInst->isInt())
1064 {
1065 if (!(s0Ty == Type_UD || s0Ty == Type_D) || !(dTy == Type_UD || dTy == Type_D))
1066 {
1067 DEBUG_VERBOSE("dpas: incorrect int type for src0 or dst!");
1068 inst->emit(std::cerr);
1069 DEBUG_VERBOSE(std::endl);
1070 MUST_BE_TRUE(false, "dpas: wrong int type for src0 or dst");
1071 }
1072 }
1073 else if (dpasInst->isFP16() || dpasInst->isBF16())
1074 {
1075 G4_Type prec = Type_UNDEF;
1076 if (dpasInst->getPlatform() >= GENX_PVC)
1077 {
1078 prec = dpasInst->isBF16() ? Type_BF : Type_HF;
1079 }
1080 if (!(dTy == Type_F || dTy == prec) || !(s0Ty == Type_F || s0Ty == prec))
1081 {
1082 DEBUG_VERBOSE("dpas: incorrect float type for dst or src0!");
1083 inst->emit(std::cerr);
1084 DEBUG_VERBOSE(std::endl);
1085 MUST_BE_TRUE(false, "dpas: wrong float type for dst or src0");
1086 }
1087 }
1088 else if (dpasInst->isTF32())
1089 {
1090 if (dTy != Type_F || s0Ty != Type_F)
1091 {
1092 DEBUG_VERBOSE("dpas: incorrect TF32 type for dst or src0 (expected F)!");
1093 inst->emit(std::cerr);
1094 DEBUG_VERBOSE(std::endl);
1095 MUST_BE_TRUE(false, "dpas: should be float type for dst or src0");
1096 }
1097 }
1098 else if (dpasInst->isBF8())
1099 {
1100 if (!(dTy == Type_F || dTy == Type_BF || dTy == Type_HF) ||
1101 !(s0Ty == Type_F || s0Ty == Type_BF || s0Ty == Type_HF))
1102 {
1103 DEBUG_VERBOSE("dpas: incorrect type for dst or src0 (expected F, BF, HF)!");
1104 inst->emit(std::cerr);
1105 DEBUG_VERBOSE(std::endl);
1106 MUST_BE_TRUE(false, "dpas: should be type(F, BF, HF) for dst or src0");
1107 }
1108 }
1109
1110 else
1111 {
1112 DEBUG_VERBOSE("dpas: invalid!");
1113 inst->emit(std::cerr);
1114 DEBUG_VERBOSE(std::endl);
1115 MUST_BE_TRUE(false, "dpas: invalid");
1116 }
1117
1118 // region check, enforce <1;1,0> for source region, <1> for dst
1119 auto isSrcRegion110 = [](const RegionDesc* RD) -> bool {
1120 return RD->vertStride == 1 && RD->width == 1 && RD->horzStride == 0;
1121 };
1122
1123 if (dst->getHorzStride() != 1 ||
1124 (!src0->isNullReg() && !isSrcRegion110(src0->getRegion())) ||
1125 !isSrcRegion110(src1->getRegion()) ||
1126 !isSrcRegion110(src2->getRegion()) ||
1127 (src3 && !isSrcRegion110(src3->getRegion())))
1128 {
1129 DEBUG_VERBOSE("dpas: src region should be <1;1,0> and dst region <1>!");
1130 inst->emit(std::cerr);
1131 DEBUG_VERBOSE(std::endl);
1132 MUST_BE_TRUE(false, "dpas: src region should be <1;1,0> and dst region <1>!");
1133 }
1134
1135 // register alignment & size
1136 // dst & src0 : aligned on execsize
1137 // src1 : aligned on grf
1138 // src2 : aligned on systolic depth * OPS_PER_CHAN
1139 if (passIndex == Optimizer::PI_regAlloc)
1140 {
1141 uint32_t D = dpasInst->getSystolicDepth();
1142 uint32_t ES = dpasInst->getExecSize();
1143 uint32_t RC = dpasInst->getRepeatCount();
1144 uint32_t Src1_D = D;
1145
1146 uint32_t dAlignBytes = TypeSize(dTy) * ES;
1147 uint32_t s0AlignBytes = TypeSize(s0Ty) * ES;
1148 if ((dst->getLinearizedStart() % dAlignBytes) != 0 ||
1149 (src0->getLinearizedStart() % s0AlignBytes) != 0)
1150 {
1151 DEBUG_VERBOSE("dpas: dst/src0's subreg offset should be multiple of execsize!");
1152 inst->emit(std::cerr);
1153 DEBUG_VERBOSE(std::endl);
1154 MUST_BE_TRUE(false, "dpas: dst/src0's subreg offset should be multiple of execsize!");
1155 }
1156
1157 uint32_t dBytes = dst->getLinearizedEnd() - dst->getLinearizedStart() + 1;
1158 uint32_t s0Bytes = src0->getLinearizedEnd() - src0->getLinearizedStart() + 1;
1159 if (dBytes != (dAlignBytes * RC) || (!src0->isNullReg() && s0Bytes != s0AlignBytes * RC))
1160 {
1161 DEBUG_VERBOSE("dpas: dst/src0's size is wrong!");
1162 inst->emit(std::cerr);
1163 DEBUG_VERBOSE(std::endl);
1164 MUST_BE_TRUE(false, "dpas: dst/src0's size is wrong!");
1165 }
1166
1167 if ((src1->getLinearizedStart() % numEltPerGRF<Type_UB>()) != 0)
1168 {
1169 DEBUG_VERBOSE("dpas: src1's subreg offset should be 0!");
1170 inst->emit(std::cerr);
1171 DEBUG_VERBOSE(std::endl);
1172 MUST_BE_TRUE(false, "dpas: src1's subreg offset should be 0!");
1173 }
1174
1175
1176 // bytes per lane per depth
1177 uint32_t bytes1PerLD = dpasInst->getSrc1SizePerLaneInByte();
1178 uint32_t s1Bytes = src1->getLinearizedEnd() - src1->getLinearizedStart() + 1;
1179 if (s1Bytes != (bytes1PerLD * Src1_D * ES))
1180 {
1181 DEBUG_VERBOSE("dpas: src1's size is wrong!");
1182 inst->emit(std::cerr);
1183 DEBUG_VERBOSE(std::endl);
1184 MUST_BE_TRUE(false, "dpas: src1's size is wrong!");
1185 }
1186
1187 uint32_t s2AlignBytes = dpasInst->getSrc2SizePerLaneInByte() * D;
1188 if ((src2->getLinearizedStart() % s2AlignBytes) != 0)
1189 {
1190 DEBUG_VERBOSE("dpas: src2's subreg offset is incorrec!");
1191 inst->emit(std::cerr);
1192 DEBUG_VERBOSE(std::endl);
1193 MUST_BE_TRUE(false, "dpas: src2's subreg offset is incorrect!");
1194 }
1195
1196 uint32_t s2Bytes = src2->getLinearizedEnd() - src2->getLinearizedStart() + 1;
1197 uint32_t correctBytes = s2AlignBytes * RC;
1198 if (dpasInst->opcode() == G4_dpasw) {
1199 correctBytes = s2AlignBytes * ((RC + 1) / 2);
1200 }
1201 if (s2Bytes != correctBytes)
1202 {
1203 DEBUG_VERBOSE("dpas: src2's size is wrong!");
1204 inst->emit(std::cerr);
1205 DEBUG_VERBOSE(std::endl);
1206 MUST_BE_TRUE(false, "dpas: src2's size is wrong!");
1207 }
1208
1209 }
1210 }
1211
verifyAccMov(G4_INST * inst)1212 void G4Verifier::verifyAccMov(G4_INST* inst)
1213 {
1214 const G4_Operand* src = inst->getSrc(0);
1215 const G4_Operand* dst = inst->getDst();
1216 if (kernel.fg.builder->hasFormatConversionACCRestrictions() &&
1217 inst->opcode() == G4_mov &&
1218 (src->isAccReg() || dst->isAccReg()))
1219 {
1220 const bool allowedICombination = (IS_DTYPE(src->getType()) || src->getType() == Type_W || src->getType() == Type_UW) &&
1221 (IS_DTYPE(dst->getType()) || dst->getType() == Type_W || dst->getType() == Type_UW);
1222 const bool allowedFCombination = (src->getType() == Type_F || src->getType() == Type_HF) &&
1223 (dst->getType() == Type_F || dst->getType() == Type_HF);
1224 const bool allowedDFCombination = src->getType() == Type_DF &&
1225 dst->getType() == Type_DF;
1226 if (!allowedICombination && !allowedFCombination && !allowedDFCombination)
1227 {
1228 DEBUG_VERBOSE("Invalid type combination during mov format conversion when accumulator is used as src or dst!");
1229 inst->emit(std::cerr);
1230 DEBUG_VERBOSE(std::endl);
1231 MUST_BE_TRUE(false, "Invalid type combination during mov format conversion when accumulator is used as src or dst!");
1232 }
1233 }
1234 }
1235
1236 //
1237 // Mixed mode instruction allows bfloat16 operands in the following cases:
1238 // 1. dst, src0, and src1 for 2 source instructions format not involving multiplier(mov, add, cmp, sel).
1239 // 2. dst and src0 for 2 source instructions format involving multiplier(mul, mac etc).
1240 // 3. dst, src0, and src1 for 3 source instructions format(mad).
1241 // 4. Broadcast of bfloat16 scalar is not supported.
1242 // 5. Unpacked bfloat16 destination with stride 2 when register offset is 0 or 1.
1243 // 6. Packed bfloat16 source and destination when register offset is 0 or 8.
1244 // 7. Execution size must not be greater than 8.
1245 // 8. Instructions with pure bfloat16 operands are not supported.
1246 //
1247 // **More examples**
1248 // 1. BF imm is not allowed
1249 // mov (1|M0) r12.0<1>:f 0xffff:bf - ILLEGAL "Imm operand with BF type is not allowed"
1250 // 2. BF scalar operand can be used in SIMD1
1251 // mul (1|M0) r14.0<1>:f r11.0<0;1,0>:bf r12.3<0;1,0>:f - OK
1252 // 3. For SIMD1, scalar operands (both dst/src) of F or BF can have any subreg!
1253 // add (1|M0) r16.3<1>:bf r11.0<0;1,0>:f r12.3<0;1,0>:f - OK
1254 // 4. F Operand should have subreg = 0 if execSize > SIMD1
1255 // add (2|M0) r10.4<1>:f r11.0<1;1,0>:bf 0x12345:f
1256 // ILLEGAL "Src0 regioning must be aligned to destination or scalar for Float/64bit pipes"
1257 // 5. Others
1258 // add (8|M0) r16.0<2>:bf r11.0<1;1,0>:f r12.0<1;1,0>:f- OK
1259 // add (8|M0) r16.1<2>:bf r11.0<1;1,0>:f r12.8<1;1,0>:f- OK
1260 // add (8|M0) r16.0<1>:bf r11.0<1;1,0>:f r12.8<1;1,0>:f- OK
1261 // add (8|M0) r16.8<1>:bf r11.0<1;1,0>:f r12.0<1;1,0>:f- OK
1262 // Note that float source operands can be scalar region <0;1,0>
1263 //
1264 // For PVC, case 6 should be "Execution size must not be greater than 16."
verifyBFMixedMode(G4_INST * inst)1265 void G4Verifier::verifyBFMixedMode(G4_INST* inst)
1266 {
1267 auto useGivenType = [](G4_INST* I, G4_Type GivenTy) -> bool
1268 {
1269 G4_Operand* dst = I->getDst();
1270 if (I->isPseudoAddrMovIntrinsic())
1271 {
1272 return false;
1273 }
1274 // Skip compare's dst (?)
1275 if (dst && !dst->isNullReg() && !I->isCompare())
1276 {
1277 if (dst->getType() == GivenTy)
1278 return true;
1279 }
1280 for (int i = 0; i < I->getNumSrc(); ++i)
1281 {
1282 G4_Operand* src = I->getSrc(i);
1283 if (src && !src->isNullReg())
1284 {
1285 if (src->getType() == GivenTy)
1286 return true;
1287 }
1288 }
1289 return false;
1290 };
1291
1292 // Skip dpas/send as it has been verified separately
1293 if (inst->isDpas() || inst->isSend())
1294 return;
1295
1296 // Skip if no BF usage
1297 if (!useGivenType(inst, Type_BF))
1298 return;
1299
1300 if (!kernel.fg.builder->hasBFMixMode())
1301 {
1302 DEBUG_VERBOSE("BF type: BF mixed mode not supported!");
1303 inst->emit(std::cerr);
1304 DEBUG_VERBOSE(std::endl);
1305 MUST_BE_TRUE(false, "BF type: BF mixed mode not supported!!");
1306 }
1307
1308 // case 8, pure bf not supported
1309 if (!useGivenType(inst, Type_F))
1310 {
1311 DEBUG_VERBOSE("Pure BF operands are not supported!");
1312 inst->emit(std::cerr);
1313 DEBUG_VERBOSE(std::endl);
1314 MUST_BE_TRUE(false, "Pure BF operands are not supported!!");
1315 }
1316
1317 switch (inst->opcode())
1318 {
1319 case G4_mul:
1320 case G4_mac:
1321 {
1322 // case 2
1323 G4_Operand* src1 = inst->getSrc(1);
1324 if (src1->getType() != Type_F)
1325 {
1326 DEBUG_VERBOSE("Src1 in BF mixed mode must be F!");
1327 inst->emit(std::cerr);
1328 DEBUG_VERBOSE(std::endl);
1329 MUST_BE_TRUE(false, "Src1 in BF mixed mode must be F!");
1330 }
1331 break;
1332 }
1333 case G4_mad:
1334 case G4_pseudo_mad:
1335 {
1336 // case 3
1337 G4_Operand* src2 = inst->getSrc(2);
1338 if (src2->getType() != Type_F)
1339 {
1340 DEBUG_VERBOSE("Src2 in BF mixed mode must be F!");
1341 inst->emit(std::cerr);
1342 DEBUG_VERBOSE(std::endl);
1343 MUST_BE_TRUE(false, "Src2 in BF mixed mode must be F!");
1344 }
1345 break;
1346 }
1347 case G4_mov:
1348 {
1349 if (inst->getSrc(0)->getType() == Type_BF)
1350 {
1351 // bf->f is just a left shift, bf mix restriction does not apply.
1352 return;
1353 }
1354 // case 1
1355 break;
1356 }
1357 case G4_add:
1358 case G4_sel:
1359 case G4_cmp:
1360 { // case 1
1361 break;
1362 }
1363 default:
1364 DEBUG_VERBOSE("Instruction does not support BF type!");
1365 inst->emit(std::cerr);
1366 DEBUG_VERBOSE(std::endl);
1367 MUST_BE_TRUE(false, "Instruction does not support BF type!");
1368 break;
1369 }
1370
1371 uint32_t nativeES = kernel.fg.builder->getNativeExecSize();
1372 // verify dst
1373 G4_DstRegRegion* dreg = inst->getDst();
1374 if (dreg && !dreg->isNullReg() && !inst->isCompare())
1375 {
1376 uint32_t hs = dreg->getHorzStride();
1377 uint32_t so = dreg->getSubRegOff();
1378 bool isLegitPackedBF = (dreg->getType() == Type_BF
1379 && (hs == 1 && (so == 0 || so == nativeES)));
1380 bool isLegitUnpackedBF = (dreg->getType() == Type_BF
1381 && (hs == 2 && (so == 0 || so == 1)));
1382 bool isLegitF = (dreg->getType() == Type_F && (hs == 1 && so == 0));
1383 bool isLegitScalar = (inst->getExecSize() == g4::SIMD1 && hs == 1);
1384 if (!(isLegitPackedBF || isLegitUnpackedBF || isLegitF || isLegitScalar))
1385 {
1386 // case 5 & 6
1387 DEBUG_VERBOSE("BF/F Dst has illegal region and type combination!");
1388 inst->emit(std::cerr);
1389 DEBUG_VERBOSE(std::endl);
1390 MUST_BE_TRUE(false, "BF/F Dst has illegal region and type combination!");
1391 }
1392 }
1393
1394 // verify src
1395 for (int i = 0, sz = (int)inst->getNumSrc(); i < sz; ++i)
1396 {
1397 G4_Operand* src = inst->getSrc(i);
1398 if (!src || src->isNullReg() // sanity
1399 || (src->getType() == Type_F && src->isImm()))
1400 continue;
1401
1402 G4_Type srcTy = src->getType();
1403 if (srcTy == Type_BF &&
1404 (src->isImm() || (inst->getExecSize() != g4::SIMD1 && src->asSrcRegRegion()->getRegion()->isScalar())))
1405 {
1406 // case 4
1407 DEBUG_VERBOSE(" Src: Imm BF/broadcast scalar BF are not supported!");
1408 inst->emit(std::cerr);
1409 DEBUG_VERBOSE(std::endl);
1410 MUST_BE_TRUE(false, "Src: Imm BF/broadcast scalar BF are not supported!");
1411 }
1412
1413 G4_SrcRegRegion* sreg = src->asSrcRegRegion();
1414 uint32_t so = sreg->getSubRegOff();
1415 bool isLegitPackedBF = (srcTy == Type_BF
1416 && !sreg->getRegion()->isScalar()
1417 && sreg->getRegion()->isContiguous(inst->getExecSize()) && (so == 0 || so == nativeES));
1418 bool isLegitF = (srcTy == Type_F
1419 && !sreg->getRegion()->isScalar()
1420 && sreg->getRegion()->isContiguous(inst->getExecSize()) && so == 0);
1421 bool isLegitScalar = (sreg->getRegion()->isScalar()
1422 && (srcTy == Type_F || (srcTy == Type_BF && inst->getExecSize() == g4::SIMD1)));
1423 if (!(isLegitPackedBF || isLegitF || isLegitScalar))
1424 {
1425 // case 5 & 6
1426 DEBUG_VERBOSE("Src has illegal region and type combination!");
1427 inst->emit(std::cerr);
1428 DEBUG_VERBOSE(std::endl);
1429 MUST_BE_TRUE(false, "Src has illegal region and type combination!");
1430 }
1431 }
1432
1433 // case 7
1434 if (inst->getExecSize() > nativeES)
1435 {
1436 std::stringstream ss;
1437 ss << "Inst in BF mixed mode should have execsize <= " << nativeES << '\n';
1438 DEBUG_VERBOSE(ss.str().c_str());
1439 inst->emit(std::cerr);
1440 DEBUG_VERBOSE(std::endl);
1441 MUST_BE_TRUE(false, ss.str().c_str());
1442 }
1443 return;
1444 }
1445