1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "G4_Verifier.hpp"
10 
11 #include <sstream>
12 
13 using namespace vISA;
14 
verifyG4Kernel(G4_Kernel & k,Optimizer::PassIndex index,bool alwaysOn,G4Verifier::VerifyControl ctrl)15 void verifyG4Kernel(
16     G4_Kernel &k, Optimizer::PassIndex index,
17     bool alwaysOn, G4Verifier::VerifyControl ctrl)
18 {
19     if (alwaysOn || k.fg.builder->getOption(VISA_FullIRVerify))
20     {
21         G4Verifier verifier(k, ctrl, index);
22         verifier.verify();
23     }
24 }
25 
verifyG4Inst(G4_Kernel & kernel,G4_INST * inst,Optimizer::PassIndex index)26 void verifyG4Inst(G4_Kernel &kernel, G4_INST *inst, Optimizer::PassIndex index)
27 {
28     G4Verifier verifier(kernel, G4Verifier::VC_ASSERT, index);
29     verifier.verifyInst(inst);
30 }
31 
32 std::atomic<int> G4Verifier::index(0);
33 
G4Verifier(G4_Kernel & k,VerifyControl ctrl,Optimizer::PassIndex index)34 G4Verifier::G4Verifier(G4_Kernel &k, VerifyControl ctrl, Optimizer::PassIndex index)
35     : kernel(k), verifyCtrl(ctrl), passIndex(index)
36 {
37     if (ctrl == VC_AppendDump || ctrl == VC_NewDump)
38     {
39         const char* buf = nullptr;
40         k.getOptions()->getOption(VISA_AsmFileName, buf);
41         std::string dumpName;
42         if (buf != nullptr)
43         {
44             dumpName = std::string(buf);
45         }
46         dumpName += ".g4verify.dump.txt";
47         if (ctrl == VC_AppendDump)
48             dumpText.open(dumpName, std::ofstream::app);
49         else
50             dumpText.open(dumpName, std::ofstream::trunc);
51     }
52 }
53 
verify()54 void G4Verifier::verify()
55 {
56     // For each instruction do verification.
57     for (auto BBI = kernel.fg.cbegin(), BBE = kernel.fg.cend(); BBI != BBE; ++BBI)
58     {
59        auto bb = *BBI;
60        for (auto I = bb->begin(), E = bb->end(); I != E; ++I)
61        {
62            G4_INST *inst = *I;
63            verifyInst(inst);
64        }
65     }
66 }
67 
verifyInst(G4_INST * inst)68 bool G4Verifier::verifyInst(G4_INST *inst)
69 {
70     ASSERT_USER(inst != NULL, "null instruction unexpected");
71     if (inst)
72     {
73         verifyOpcode(inst);
74         verifyOpnd(inst->getDst(), inst);
75         verifyOpnd(inst->getSrc(0), inst);
76         verifyOpnd(inst->getSrc(1), inst);
77         verifyOpnd(inst->getSrc(2), inst);
78         verifyOpnd(inst->getPredicate(), inst);
79         verifyOpnd(inst->getCondMod(), inst);
80         verifyOpnd(inst->getImplAccDst(), inst);
81         verifyOpnd(inst->getImplAccSrc(), inst);
82 
83         if (inst->isSend())
84         {
85             verifySend(inst);
86         }
87         else if (inst->isDpas())
88         {
89             verifyDpas(inst);
90         }
91         verifyAccMov(inst);
92 
93         verifyDstSrcOverlap(inst);
94 
95         if (passIndex == Optimizer::PI_cleanMessageHeader ||
96             passIndex == Optimizer::PI_renameRegister ||
97             passIndex == Optimizer::PI_localDefHoisting ||
98             passIndex == Optimizer::PI_localCopyPropagation ||
99             passIndex == Optimizer::PI_localInstCombine ||
100             passIndex == Optimizer::PI_reassociateConst ||
101             passIndex == Optimizer::PI_cselPeepHoleOpt)
102         {
103             // def-use chain should be valid after these passes
104             return verifyDefUseChain(inst);
105         }
106 
107         if (passIndex == Optimizer::PI_HWConformityChk
108             || passIndex == Optimizer::PI_addSWSBInfo)
109         {
110             // feature verification. Do it twice for now.
111             verifyBFMixedMode(inst);
112         }
113     }
114     return true;
115 }
116 
117 // Returns true if this use is defined by the defInst (dst, condMod, or acc)
118 // Otherwise returns false.
checkDefUse(G4_INST * defInst,G4_Operand * use)119 static bool checkDefUse(G4_INST* defInst, G4_Operand *use)
120 {
121     if (!use)
122         return false;
123 
124     G4_Operand *dst = defInst->getOperand(Opnd_dst);
125     G4_Operand *condMod = defInst->getOperand(Opnd_condMod);
126 
127     if (use->isAccReg())
128     {
129         // use is acc
130         // ToDo: we should check if acc is re-defined in between as well
131         if (defInst->getImplAccDst() != NULL || dst->isAccReg())
132         {
133             return true;
134         }
135     }
136 
137     if (dst && Rel_disjoint != use->compareOperand(dst))
138         return true;
139 
140     if (condMod && Rel_disjoint != use->compareOperand(condMod))
141         return true;
142 
143     return false;
144 }
145 
verifyDefUseChain(G4_INST * inst)146 bool G4Verifier::verifyDefUseChain(G4_INST *inst)
147 {
148     bool isValid = true;
149 
150     for (auto I = inst->use_begin(), E = inst->use_end(); I != E; ++I)
151     {
152         auto DU = *I;
153         // A valid def-use satisfies
154         //
155         // inst[dst/condMod] defines DU.first[DU.second]
156         //
157         G4_Operand *use = (DU.first)->getOperand(DU.second);
158         if (!checkDefUse(inst, use))
159         {
160             isValid = false;
161             printDefUse(inst, DU.first, DU.second);
162             assertIfEnable();
163         }
164     }
165 
166     for (auto I = inst->def_begin(), E = inst->def_end(); I != E; ++I)
167     {
168         auto UD = *I;
169         // A valid use-def satisfies
170         //
171         // UD.first[dst/condMod] defines inst[UD.second]
172         //
173         G4_Operand *use = inst->getOperand(UD.second);
174         if (!checkDefUse(UD.first, use))
175         {
176             isValid = false;
177             printDefUse(UD.first, inst, UD.second);
178             assertIfEnable();
179         }
180     }
181 
182     return isValid;
183 }
184 
printDefUseImpl(std::ostream & os,G4_INST * def,G4_INST * use,Gen4_Operand_Number pos)185 void G4Verifier::printDefUseImpl(
186     std::ostream &os, G4_INST *def, G4_INST *use, Gen4_Operand_Number pos)
187 {
188     os << "\n  def: ";
189     def->emit(os);
190     os << "\n user: ";
191     use->emit(os);
192     os << "\n opnd: ";
193     if (use->getOperand(pos))
194     {
195         use->getOperand(pos)->emit(os);
196     }
197 }
198 
199 /// Dump or warn def-use.
printDefUse(G4_INST * def,G4_INST * use,Gen4_Operand_Number pos)200 void G4Verifier::printDefUse(G4_INST *def, G4_INST *use, Gen4_Operand_Number pos)
201 {
202     if (dumpText.is_open() && dumpText.good())
203     {
204         dumpText << "\n\nIndex: " << index++;
205         printDefUseImpl(dumpText, def, use, pos);
206     }
207     else if (verifyCtrl == VC_WARN)
208     {
209         std::cerr << "\n\nInvalid def-use pair detected!!\n";
210         printDefUseImpl(std::cerr, def, use, pos);
211     }
212 }
213 
assertIfEnable() const214 void G4Verifier::assertIfEnable() const
215 {
216     MUST_BE_TRUE(false, "G4Verification failure");
217 }
218 
dataHazardCheck(G4_Operand * dst,G4_Operand * src)219 bool G4Verifier::dataHazardCheck(G4_Operand *dst, G4_Operand *src)
220 {
221     G4_RegVar* dstVar = static_cast<G4_RegVar*>(dst->asDstRegRegion()->getBase());
222     G4_RegVar* srcVar = static_cast<G4_RegVar*>(src->asSrcRegRegion()->getBase());
223     if (!dstVar->isRegVar() || !dstVar->isGreg() || !srcVar->isRegVar() || !srcVar->isGreg())
224     {
225         return false;
226     }
227 
228     int dstStart = dst->getLinearizedStart();
229     int dstEnd = dst->getLinearizedEnd();
230     int srcStart = src->getLinearizedStart();
231     int srcEnd = src->getLinearizedEnd();
232 
233     if (dstEnd < srcStart ||
234         srcEnd < dstStart)
235     {
236         return false;
237     }
238 
239     int dstReg = dstStart / numEltPerGRF<Type_UB>();
240     int dstRegNum = (dstEnd - dstStart + numEltPerGRF<Type_UB>()) / numEltPerGRF<Type_UB>();
241     int srcReg = srcStart / numEltPerGRF<Type_UB>();
242     int srcRegNum = (srcEnd - srcStart + numEltPerGRF<Type_UB>()) / numEltPerGRF<Type_UB>();
243     int srcReg2 = -1;
244 
245     if (srcRegNum > 1)
246     {
247         srcReg2 = srcReg + 1;
248     }
249 
250     if (dstRegNum >= 2 && srcRegNum == 1)
251     {
252         srcReg2 = srcReg;
253     }
254 
255     if (dstReg == srcReg2)
256     {
257         return true;
258     }
259 
260     return false;
261 }
262 
verifyDstSrcOverlap(G4_INST * inst)263 void G4Verifier::verifyDstSrcOverlap(G4_INST* inst)
264 {
265     if (passIndex == Optimizer::PI_regAlloc && kernel.fg.builder->avoidDstSrcOverlap())
266     {
267         G4_DstRegRegion* dst = inst->getDst();
268 
269         if (inst->isSend() || dst == NULL || dst->isNullReg() || inst->opcode() == G4_madm)
270         {
271             return;
272         }
273 
274         if (!inst->isComprInst())
275         {
276             return;
277         }
278 
279         int dstStart = dst->getLinearizedStart() / numEltPerGRF<Type_UB>();
280         int dstEnd = dst->getLinearizedEnd() / numEltPerGRF<Type_UB>();
281 
282         for (int i = 0; i < inst->getNumSrc(); i++)
283         {
284             G4_Operand* src = inst->getSrc(i);
285             if (src != NULL && !src->isNullReg() && src->getTopDcl() &&
286                 (src->getTopDcl()->getRegFile() == G4_GRF || src->getTopDcl()->getRegFile() == G4_INPUT))
287             {
288                 bool overlap = dataHazardCheck(dst, src);
289 
290                 int srcStart = src->getLinearizedStart() / numEltPerGRF<Type_UB>();
291                 int srcEnd = src->getLinearizedEnd() / numEltPerGRF<Type_UB>();
292                 if (dstEnd != dstStart ||
293                     srcStart != srcEnd)  //Any operand is more than 2 GRF
294                 {
295                     MUST_BE_TRUE(!overlap, "dst and src0 overlap");
296                 }
297             }
298         }
299     }
300 }
301 
verifySend(G4_INST * inst)302 void G4Verifier::verifySend(G4_INST* inst)
303 {
304     MUST_BE_TRUE(inst->isSend(), "expect send inst");
305     if (passIndex == Optimizer::PI_regAlloc)
306     {
307         G4_DstRegRegion* dst = inst->getDst();
308         G4_SrcRegRegion* src0 = inst->getSrc(0)->asSrcRegRegion();
309         G4_SrcRegRegion* src1 = inst->isSplitSend() ? inst->getSrc(1)->asSrcRegRegion() : nullptr;
310 
311         if (inst->isEOT() && kernel.fg.builder->hasEOTGRFBinding())
312         {
313             auto checkEOTSrc = [](G4_SrcRegRegion* src) {
314                 const unsigned int EOTStart = 112 * numEltPerGRF<Type_UB>();
315                 if (src->isNullReg())
316                 {
317                     return true;
318                 }
319                 return src->getLinearizedStart() >= EOTStart;
320             };
321 
322             if (kernel.getNumRegTotal() >= 128)
323             {
324                 MUST_BE_TRUE(checkEOTSrc(src0), "src0 for EOT send is not in r112-r127");
325                 if (src1 != nullptr)
326                 {
327                     MUST_BE_TRUE(checkEOTSrc(src1), "src1 for EOT sends is not in r112-r127");
328                 }
329             }
330         }
331 
332         if (inst->isSplitSend())
333         {
334             if (src0->getBase()->isGreg() && src1 && src1->getBase()->isGreg())
335             {
336                 int src0Start = src0->getLinearizedStart() / numEltPerGRF<Type_UB>();
337                 int src0End = src0Start + inst->getMsgDesc()->getSrc0LenRegs() - 1;
338                 int src1Start = src1->getLinearizedStart() / numEltPerGRF<Type_UB>();
339                 int src1End = src1Start + inst->getMsgDesc()->getSrc1LenRegs() - 1;
340                 bool noOverlap = src0End < src1Start ||
341                     src1End < src0Start;
342                 MUST_BE_TRUE(noOverlap, "split send src0 and src1 overlap");
343             }
344         }
345 
346         if (kernel.fg.builder->WaDisableSendSrcDstOverlap())
347         {
348             if (!dst->isNullReg())
349             {
350                 if (src0->getBase()->isGreg())
351                 {
352                     bool noOverlap = dst->getLinearizedEnd() < src0->getLinearizedStart() ||
353                         src0->getLinearizedEnd() < dst->getLinearizedStart();
354                     MUST_BE_TRUE(noOverlap, "send dst and src0 overlap");
355                 }
356                 if (src1 && !src1->isNullReg())
357                 {
358                     bool noOverlap = dst->getLinearizedEnd() < src1->getLinearizedStart() ||
359                         src1->getLinearizedEnd() < dst->getLinearizedStart();
360                     MUST_BE_TRUE(noOverlap, "split send dst and src1 overlap");
361                 }
362             }
363         }
364     }
365 }
366 
367 
verifyOpnd(G4_Operand * opnd,G4_INST * inst)368 void G4Verifier::verifyOpnd(G4_Operand* opnd, G4_INST* inst)
369 {
370     if (inst->isDpas())
371     {
372         // Temporarily skip for now
373         return;
374     }
375 
376     uint8_t execSize = inst->getExecSize();
377 
378     if (opnd == NULL)
379     {
380         return;
381     }
382 
383     if (inst->opcode() == G4_sel && opnd->isCondMod())
384     {
385         // conditional modifier for sel is a don't care, so we can skip verification
386         return;
387     }
388 
389     // FIXME: If isImm() condition is removed then some assertions are hit.
390     // This means somewhere in Jitter operand sharing is happening for
391     // immediate type operands. This should be fixed.
392     // For Imm, AddrExp, AddrExpList, Labels, hashtable lookup is
393     // performed at creation time unline SrcRegion, DstRegion,
394     // Predicate, CondMod. This means former type of operands
395     // can be shared across instructions.
396     if (opnd->getInst() != inst &&
397         opnd->isLabel() == false &&
398         opnd->isImm() == false &&
399         opnd->isNullReg() == false &&
400         opnd->isAddrExp() == false)
401     {
402         DEBUG_VERBOSE("operand does not have exactly one owning instruction (shared or orphaned)");
403 
404         std::cerr << "operand: ";
405         opnd->emit(std::cerr);
406         std::cerr << " in instruction:\n  ";
407         inst->emit(std::cerr);
408         std::cerr << "\n";
409 
410         if (opnd->getInst() == NULL)
411         {
412             DEBUG_VERBOSE("operand has no owner instruction (orphaned)");
413             MUST_BE_TRUE(false, "operand has no owner instruction (orphaned)");
414         }
415         else
416         {
417             DEBUG_VERBOSE("operand pointer is shared by another instruction");
418             MUST_BE_TRUE(false, "operand pointer is shared by another instruction");
419         }
420         DEBUG_VERBOSE(std::endl);
421     }
422 
423     if (inst->isSend())
424     {
425         // send dst/src may not be GRF-aligned before HW conformity,
426         // so we only check their bound in RA
427         if (passIndex != Optimizer::PI_regAlloc)
428         {
429             return;
430         }
431 
432         if (opnd == inst->getDst())
433         {
434             if (opnd->isRightBoundSet() && !opnd->isNullReg())
435             {
436                 unsigned int correctRB =
437                     ((inst->getMsgDesc()->getDstLenRegs() + opnd->asDstRegRegion()->getRegOff()) * numEltPerGRF<Type_UB>()) - 1;
438                 uint32_t dstLenBytes = inst->getMsgDesc()->getDstLenBytes();
439                 if (dstLenBytes < getGRFSize()) {
440                     correctRB = opnd->getLeftBound() + dstLenBytes - 1;
441                 } else if (opnd->getTopDcl()->getByteSize() < numEltPerGRF<Type_UB>()) {
442                     correctRB = opnd->getLeftBound() + opnd->getTopDcl()->getByteSize() - 1;
443                 }
444 
445                 G4_Declare* parentDcl = opnd->getBase()->asRegVar()->getDeclare();
446                 while (parentDcl != NULL)
447                 {
448                     correctRB += parentDcl->getAliasOffset();
449                     parentDcl = parentDcl->getAliasDeclare();
450                 }
451 
452                 correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
453 
454                 if (opnd->getRightBound() != correctRB)
455                 {
456                     DEBUG_VERBOSE("Right bound mismatch for send inst dst. Orig rb = " <<
457                         opnd->getRightBound() << ", correct rb = " << correctRB << std::endl);
458 
459                     inst->emit(std::cerr);
460                     DEBUG_VERBOSE(std::endl);
461                     MUST_BE_TRUE(false, "Right bound mismatch!");
462                 }
463             }
464         }
465         else if (opnd == inst->getSrc(0) || opnd == inst->getSrc(1))
466         {
467             if (opnd->isRightBoundSet())
468             {
469                 int msgLength = (opnd == inst->getSrc(0)) ? inst->getMsgDesc()->getSrc0LenRegs() : inst->getMsgDesc()->getSrc1LenRegs();
470                 unsigned int numBytes = opnd->getTopDcl()->getByteSize();
471                 unsigned int correctRB = 0;
472                 if (numBytes < numEltPerGRF<Type_UB>())
473                 {
474                     correctRB = opnd->asSrcRegRegion()->getRegOff() * numEltPerGRF<Type_UB>() + numBytes - 1;
475                 }
476                 else
477                 {
478                     correctRB = ((msgLength + opnd->asSrcRegRegion()->getRegOff()) * numEltPerGRF<Type_UB>()) - 1;
479                 }
480 
481                 G4_Declare* parentDcl = opnd->getBase()->asRegVar()->getDeclare();
482                 while (parentDcl != NULL)
483                 {
484                     correctRB += parentDcl->getAliasOffset();
485                     parentDcl = parentDcl->getAliasDeclare();
486                 }
487 
488                 correctRB = std::min(correctRB, opnd->getTopDcl()->getByteSize() - 1);
489 
490                 if (opnd->getRightBound() != correctRB)
491                 {
492                     DEBUG_VERBOSE("Right bound mismatch for send inst src0. Orig rb = " <<
493                         opnd->getRightBound() << ", correct rb = " << correctRB << std::endl);
494 
495                     inst->emit(std::cerr);
496                     DEBUG_VERBOSE(std::endl);
497                     MUST_BE_TRUE(false, "Right bound mismatch!");
498                 }
499             }
500         }
501     }
502     else
503     {
504         if (opnd->isSrcRegRegion() && opnd->isRightBoundSet())
505         {
506             G4_SrcRegRegion newRgn(*(opnd->asSrcRegRegion()));
507 
508             newRgn.setInst(inst);
509             newRgn.computeLeftBound();
510             newRgn.computeRightBound(execSize);
511 
512             if (inst->isPseudoUse())
513             {
514                 G4_Declare* topdcl = newRgn.getBase()->asRegVar()->getDeclare();
515 
516                 while (topdcl->getAliasDeclare() != NULL)
517                 {
518                     topdcl = topdcl->getAliasDeclare();
519                 }
520 
521                 newRgn.setLeftBound(0);
522                 newRgn.setRightBound(topdcl->getByteSize() - 1);
523             }
524 
525             if ((opnd->getRightBound() - opnd->getLeftBound()) > (2u * numEltPerGRF<Type_UB>()) &&
526                 (inst->isPseudoUse() == false))
527             {
528                 if (!(inst->opcode() == G4_pln && inst->getSrc(1) == opnd))
529                 {
530                     DEBUG_VERBOSE("Difference between left/right bound is greater than 2 GRF for src region. Single non-send opnd cannot span 2 GRFs. lb = " <<
531                         opnd->getLeftBound() << ", rb = " << opnd->getRightBound() << std::endl);
532                     inst->emit(std::cerr);
533                     DEBUG_VERBOSE(std::endl);
534                     MUST_BE_TRUE(false, "Left/right bound span incorrect!");
535                 }
536             }
537 
538             if (inst->opcode() == G4_pln &&
539                 inst->getSrc(1) == opnd)
540             {
541                 // For pln, src1 uses 2 GRFs if exec size <= 8
542                 // and 4 GRFs if exec size == 16
543                 newRgn.computeRightBound(inst->getExecSize() > g4::SIMD8 ?
544                     inst->getExecSize() : G4_ExecSize(inst->getExecSize() * 2));
545 
546                 if (inst->getExecSize() > g4::SIMD8)
547                 {
548                     newRgn.setRightBound(newRgn.getRightBound() * 2 - newRgn.getLeftBound() + 1);
549                 }
550             }
551 
552             if (inst->getMaskOffset() > 0 &&
553                 opnd == inst->getImplAccSrc())
554             {
555                 // Update left/right bound as per inst mask offset, eg Q2
556                 // has offset 8
557                 G4_Type extype;
558                 int extypesize;
559                 unsigned int multiplicationFactor = 1;
560                 if (opnd->isAccReg())
561                 {
562                     // Right bound granularity is in terms of
563                     // bytes for Acc registers
564                     multiplicationFactor = 4;
565                 }
566 
567                 extype = inst->getOpExecType(extypesize);
568                 if ((IS_WTYPE(extype) || IS_DTYPE(extype)))
569                 {
570                     // This condition is a result of HW Conformity requirement
571                     // that for exec type = D/DW, only acc0 is used even when
572                     // qtr control is set to Q2/H2
573                     newRgn.setLeftBound(0);
574                     newRgn.setRightBound(31);
575                 }
576                 else
577                 {
578                     newRgn.setLeftBound(newRgn.getLeftBound() + (inst->getMaskOffset() * multiplicationFactor));
579                     newRgn.setRightBound(newRgn.getRightBound() + (inst->getMaskOffset() * multiplicationFactor));
580                 }
581             }
582 
583             if (opnd->getLeftBound() != newRgn.getLeftBound())
584             {
585                 DEBUG_VERBOSE("Left bound mismatch for src opnd for following inst. Orig lb = " <<
586                     opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
587                 inst->emit(std::cerr);
588                 DEBUG_VERBOSE(std::endl);
589                 MUST_BE_TRUE(false, "Left bound mismatch!");
590             }
591 
592             if (opnd->getRightBound() != newRgn.getRightBound())
593             {
594                 DEBUG_VERBOSE("Right bound mismatch for src opnd for following inst. Orig rb = " <<
595                     opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
596 
597                 inst->emit(std::cerr);
598                 DEBUG_VERBOSE(std::endl);
599                 MUST_BE_TRUE(false, "Right bound mismatch!");
600             }
601         }
602         else if (opnd->isDstRegRegion() && opnd->isRightBoundSet() && !opnd->isNullReg())
603         {
604             G4_DstRegRegion newRgn(*(opnd->asDstRegRegion()));
605             newRgn.setInst(inst);
606             newRgn.computeLeftBound();
607             newRgn.computeRightBound(execSize);
608 
609             if (inst->isPseudoKill())
610             {
611                 G4_Declare* topdcl = newRgn.getBase()->asRegVar()->getDeclare();
612 
613                 while (topdcl->getAliasDeclare() != NULL)
614                 {
615                     topdcl = topdcl->getAliasDeclare();
616                 }
617 
618                 newRgn.setLeftBound(0);
619                 newRgn.setRightBound(topdcl->getByteSize() - 1);
620             }
621 
622             if ((opnd->getRightBound() - opnd->getLeftBound()) > (2u * numEltPerGRF<Type_UB>()) &&
623                 (inst->isPseudoKill() == false) && (inst->opcode() != G4_madw))
624             {
625                 DEBUG_VERBOSE("Difference between left/right bound is greater than 2 GRF for dst region. Single non-send opnd cannot span 2 GRFs. lb = " <<
626                     opnd->getLeftBound() << ", rb = " << opnd->getRightBound() << std::endl);
627                 inst->emit(std::cerr);
628                 DEBUG_VERBOSE(std::endl);
629                 MUST_BE_TRUE(false, "Left/right bound span incorrect!");
630             }
631 
632             if (inst->getMaskOffset() > 0 &&
633                 opnd == inst->getImplAccDst())
634             {
635                 // Update left/right bound as per inst mask offset, eg Q2
636                 // has offset 8
637                 G4_Type extype;
638                 int extypesize;
639                 unsigned int multiplicationFactor = 1;
640                 if (opnd->isAccReg())
641                 {
642                     // Right bound granularity is in terms of
643                     // bytes for Acc registers
644                     multiplicationFactor = 4;
645                 }
646 
647                 extype = inst->getOpExecType(extypesize);
648 
649                 if ((IS_WTYPE(extype) || IS_DTYPE(extype)))
650                 {
651                     // This condition is a result of HW Conformity requirement
652                     // that for exec type = D/DW, only acc0 is used even when
653                     // qtr control is set to Q2/H2
654                     newRgn.setLeftBound(0);
655                     newRgn.setRightBound(31);
656                 }
657                 else
658                 {
659                     newRgn.setLeftBound(newRgn.getLeftBound() + (inst->getMaskOffset() * multiplicationFactor));
660                     newRgn.setRightBound(newRgn.getRightBound() + (inst->getMaskOffset() * multiplicationFactor));
661                 }
662             }
663 
664             if (opnd->getLeftBound() != newRgn.getLeftBound())
665             {
666                 DEBUG_VERBOSE("Left bound mismatch for dst opnd for following inst. Orig lb = " <<
667                     opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
668 
669                 inst->emit(std::cerr);
670                 DEBUG_VERBOSE(std::endl);
671                 MUST_BE_TRUE(false, "Left bound mismatch");
672             }
673 
674             if (opnd->getRightBound() != newRgn.getRightBound())
675             {
676                 DEBUG_VERBOSE("Right bound mismatch for dst opnd for following inst. Orig rb = " <<
677                     opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
678 
679                 inst->emit(std::cerr);
680                 DEBUG_VERBOSE(std::endl);
681                 MUST_BE_TRUE(false, "Right bound mismatch!");
682             }
683         }
684         else if (opnd->isPredicate() && opnd->isRightBoundSet())
685         {
686             G4_Predicate newRgn(*(opnd->asPredicate()));
687 
688             newRgn.setLeftBound(0);
689             newRgn.computeRightBound(execSize);
690 
691             if (inst->getMaskOffset() > 0)
692             {
693                 // Update left/right bound as per inst mask offset, eg Q2
694                 // has offset 8
695                 newRgn.setLeftBound(newRgn.getLeftBound() + inst->getMaskOffset());
696                 newRgn.setRightBound(newRgn.getRightBound() + inst->getMaskOffset());
697             }
698 
699             if (opnd->getLeftBound() != newRgn.getLeftBound())
700             {
701                 DEBUG_VERBOSE("Left bound mismatch for pred opnd for following inst. Orig lb = " <<
702                     opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
703 
704                 inst->emit(std::cerr);
705                 DEBUG_VERBOSE(std::endl);
706                 MUST_BE_TRUE(false, "Left bound mismatch");
707             }
708 
709             if (opnd->getRightBound() != newRgn.getRightBound())
710             {
711                 DEBUG_VERBOSE("Right bound mismatch for pred opnd for following inst. Orig rb = " <<
712                     opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
713 
714                 inst->emit(std::cerr);
715                 DEBUG_VERBOSE(std::endl);
716                 MUST_BE_TRUE(false, "Right bound mismatch!");
717             }
718         }
719         else if (opnd->isCondMod() && opnd->isRightBoundSet())
720         {
721             G4_CondMod newRgn(*(opnd->asCondMod()));
722 
723             newRgn.setLeftBound(0);
724             newRgn.computeRightBound(execSize);
725 
726             if (inst->getMaskOffset() > 0)
727             {
728                 // Update left/right bound as per inst mask offset, eg Q2
729                 // has offset 8
730                 newRgn.setLeftBound(newRgn.getLeftBound() + inst->getMaskOffset());
731                 newRgn.setRightBound(newRgn.getRightBound() + inst->getMaskOffset());
732             }
733 
734             if (opnd->getLeftBound() != newRgn.getLeftBound())
735             {
736                 DEBUG_VERBOSE("Left bound mismatch for cond mod opnd for following inst. Orig lb = " <<
737                     opnd->getLeftBound() << ", recomputed lb = " << newRgn.getLeftBound() << std::endl);
738 
739                 inst->emit(std::cerr);
740                 DEBUG_VERBOSE(std::endl);
741                 MUST_BE_TRUE(false, "Left bound mismatch");
742             }
743 
744             if (opnd->getRightBound() != newRgn.getRightBound())
745             {
746                 DEBUG_VERBOSE("Right bound mismatch for cond mod opnd for following inst. Orig rb = " <<
747                     opnd->getRightBound() << ", recomputed rb = " << newRgn.getRightBound() << std::endl);
748 
749                 inst->emit(std::cerr);
750                 DEBUG_VERBOSE(std::endl);
751                 MUST_BE_TRUE(false, "Right bound mismatch!");
752             }
753         }
754         else
755         {
756             // Not implemented
757         }
758 
759         if (passIndex == Optimizer::PI_regAlloc)
760         {
761             // alignment checks that can only be performed post RA
762             bool threeSrcAlign16 = (inst->getNumSrc() == 3) && !inst->isSend() && !kernel.fg.builder->hasAlign1Ternary();
763             bool nonScalar = (opnd->isSrcRegRegion() && !opnd->asSrcRegRegion()->isScalar()) ||
764                 (opnd->isDstRegRegion() && inst->getExecSize() > g4::SIMD2);
765             bool isAssigned = opnd->isRegRegion() && opnd->getBase()->isRegVar() &&
766                 opnd->getBase()->asRegVar()->isPhyRegAssigned();
767             // allow replicated DF source opnd with <2;2,0> region
768             bool isReplicated = (opnd->getType() == Type_DF) &&
769                 opnd->isSrcRegRegion() &&
770                 (opnd->asSrcRegRegion()->getRegion()->width == 2) &&
771                 (opnd->asSrcRegRegion()->getRegion()->horzStride == 0) &&
772                 (opnd->asSrcRegRegion()->getRegion()->vertStride == 2);
773             if (threeSrcAlign16 && nonScalar && isAssigned &&
774                 opnd->getLinearizedStart() % 16 != 0 &&
775                 !isReplicated)
776             {
777                 MUST_BE_TRUE(false, "dp2/dp3/dp4/dph and non-scalar 3src op must be align16!");
778             }
779 
780             // check acc source alignment
781             // for explicit acc source, it and the inst's dst should both be oword-aligned
782             // for implicit acc source, its subreg offset should be identical to that of the dst
783             if (opnd->isAccReg())
784             {
785                 uint32_t offset = opnd->getLinearizedStart() % 32;
786                 if (inst->getDst())
787                 {
788                     uint32_t dstOffset = inst->getDst()->getLinearizedStart() % 32;
789                     if (opnd == inst->getImplAccSrc())
790                     {
791                         assert(offset == dstOffset && "implicit acc source must have identical offset as dst");
792                     }
793                     else if (opnd->isSrcRegRegion())
794                     {
795                         assert((offset % 16 == 0 && dstOffset % 16 == 0) &&
796                             "explicit acc source and its dst must be oword-aligned");
797                     }
798                 }
799             }
800 
801             // if src0 is V/UV/VF imm, dst must be 16 byte aligned.
802             if (inst->opcode() == G4_mov && IS_VTYPE(inst->getSrc(0)->getType()))
803             {
804                 auto dst = inst->getDst();
805                 // should we assert if dst is not phyReg assigned?
806                 if (dst)
807                 {
808                     bool dstIsAssigned = dst->getBase()->isRegVar() && dst->getBase()->asRegVar()->isPhyRegAssigned();
809                     if (dstIsAssigned && dst->getLinearizedStart() % 16 != 0)
810                     {
811                         assert(false && "destination of move instruction with V/VF imm is not 16-byte aligned");
812                     }
813                 }
814             }
815 
816             // check if the oprands with mme are GRF-aligned.
817             if (opnd->getAccRegSel() != ACC_UNDEFINED)
818             {
819                 assert(opnd->getLinearizedStart() % numEltPerGRF<Type_UB>() == 0 && "operand with mme must be GRF-aligned");
820             }
821         }
822     }
823 }
824 
verifyLifetimeConsistency(G4_BB * bb)825 void verifyLifetimeConsistency(G4_BB* bb)
826 {
827     // Verify whether misplaced pseudo_kill/lifetime.end is seen in BB
828     // Following code patterns are incorrect:
829     // mov (1) A,
830     // ...
831     // pseudo_kill A
832     // As per VISA spec, we allow a single instance of pseudo_kill per
833     // variable. Later RA's liveness may insert multiple. This will
834     // not be invoked after RA anyway. As a precaution, we return
835     // if no unassigned register is found.
836     //
837     // Similarly for lifetime.end
838     // lifetime.end A
839     // ...
840     // mov (1) A,
841     // This is illegal because lifetime.end appears before last use
842     // in BB
843     bool unassignedFound = false;
844 
845     for (INST_LIST_ITER it = bb->begin(), end = bb->end();
846         it != end;
847         it++)
848     {
849         G4_INST* curInst = (*it);
850 
851         std::stack<G4_Operand*> opnds;
852         opnds.push(curInst->getDst());
853         opnds.push(curInst->getSrc(0));
854         opnds.push(curInst->getSrc(1));
855         opnds.push(curInst->getSrc(2));
856         opnds.push(curInst->getPredicate());
857         opnds.push(curInst->getCondMod());
858 
859         while (!opnds.empty())
860         {
861             G4_Operand* curOpnd = opnds.top();
862             opnds.pop();
863 
864             if (curOpnd != NULL && curOpnd->getTopDcl() != NULL)
865             {
866                 G4_Declare* topdcl = curOpnd->getTopDcl();
867 
868                 if (topdcl->getRegVar() &&
869                     !topdcl->getRegVar()->isPhyRegAssigned())
870                 {
871                     unassignedFound = true;
872                 }
873             }
874         }
875     }
876 
877     if (unassignedFound == true)
878     {
879         typedef std::map<G4_Declare*, std::pair<G4_INST*, unsigned int>> dclInstMap;
880         typedef dclInstMap::iterator dclInstMapIter;
881         dclInstMap pseudoKills;
882         dclInstMap lifetimeEnd;
883 
884         unsigned int instId = 0;
885 
886         // First populate all pseudo_kills and lifetime.end instructions
887         // in BB's inst list. Later run second loop to check whether
888         // lifetime rules are flouted.
889         for (INST_LIST_ITER it = bb->begin(), end = bb->end();
890             it != end;
891             it++, instId++)
892         {
893             G4_INST* curInst = (*it);
894             std::pair<G4_INST*, unsigned int> instPair;
895 
896             instPair.first = curInst;
897             instPair.second = instId;
898 
899             if (curInst->isPseudoKill())
900             {
901                 pseudoKills.insert(make_pair(GetTopDclFromRegRegion(curInst->getDst()), instPair));
902             }
903 
904             if (curInst->isLifeTimeEnd())
905             {
906                 lifetimeEnd.insert(make_pair(GetTopDclFromRegRegion(curInst->getSrc(0)), instPair));
907             }
908         }
909 
910         instId = 0;
911         for (INST_LIST_ITER it = bb->begin(), end = bb->end();
912             it != end;
913             it++, instId++)
914         {
915             G4_INST* curInst = (*it);
916 
917             if (curInst->isPseudoKill() ||
918                 curInst->isLifeTimeEnd())
919             {
920                 continue;
921             }
922 
923             std::stack<G4_Operand*> opnds;
924             opnds.push(curInst->getDst());
925             opnds.push(curInst->getSrc(0));
926             opnds.push(curInst->getSrc(1));
927             opnds.push(curInst->getSrc(2));
928             opnds.push(curInst->getPredicate());
929             opnds.push(curInst->getCondMod());
930 
931             while (!opnds.empty())
932             {
933                 G4_Operand* curOpnd = opnds.top();
934                 opnds.pop();
935 
936                 if (curOpnd != NULL && curOpnd->getTopDcl() != NULL)
937                 {
938                     G4_Declare* topdcl = curOpnd->getTopDcl();
939 
940                     // Check whether topdcl has been written to map
941                     dclInstMapIter killsIt = pseudoKills.find(topdcl);
942 
943                     if (killsIt != pseudoKills.end())
944                     {
945                         unsigned int foundAtId = (*killsIt).second.second;
946 
947                         if (foundAtId > instId)
948                         {
949                             DEBUG_VERBOSE("Found a definition before pseudo_kill.");
950                             (*killsIt).second.first->emit(std::cerr);
951                             DEBUG_VERBOSE(std::endl);
952                             curInst->emit(std::cerr);
953                             DEBUG_VERBOSE(std::endl);
954                         }
955                     }
956 
957                     dclInstMapIter lifetimeEndIter = lifetimeEnd.find(topdcl);
958 
959                     if (lifetimeEndIter != lifetimeEnd.end())
960                     {
961                         unsigned int foundAtId = (*lifetimeEndIter).second.second;
962 
963                         if (foundAtId < instId)
964                         {
965                             DEBUG_VERBOSE("Found a use after lifetime.end.");
966                             (*lifetimeEndIter).second.first->emit(std::cerr);
967                             DEBUG_VERBOSE(std::endl);
968                             curInst->emit(std::cerr);
969                             DEBUG_VERBOSE(std::endl);
970                         }
971                     }
972                 }
973             }
974         }
975     }
976 }
977 
verifyOpcode(G4_INST * inst)978 void G4Verifier::verifyOpcode(G4_INST* inst)
979 {
980     switch (inst->opcode())
981     {
982     case G4_dp2:
983     case G4_dp3:
984     case G4_dp4:
985         assert(kernel.fg.builder->hasDotProductInst() && "unsupported opcode");
986         break;
987     case G4_lrp:
988         assert(kernel.fg.builder->hasLRP() && "unsupported opcode");
989         break;
990     case G4_madm:
991         assert(kernel.fg.builder->hasMadm() && "unsupported opcode");
992         break;
993     default:
994         break;
995     }
996 
997     if (passIndex == Optimizer::PI_regAlloc)
998     {
999         //ToDo: add more checks for psuedo inst after RA
1000         assert(!inst->isPseudoLogic() && "pseudo logic inst should be lowered before RA");
1001     }
1002 
1003     if (inst->getSaturate())
1004     {
1005         assert(inst->canSupportSaturate() && "saturate is set to true but instruction does not support saturation");
1006     }
1007 
1008 }
1009 
verifyDpas(G4_INST * inst)1010 void G4Verifier::verifyDpas(G4_INST* inst)
1011 {
1012     // Verify region and size of each operands
1013     G4_InstDpas* dpasInst = inst->asDpasInst();
1014 
1015     if (dpasInst->getPredicate() || dpasInst->getCondMod())
1016     {
1017         DEBUG_VERBOSE("dpas: should not have predicate nor condMod");
1018         inst->emit(std::cerr);
1019         DEBUG_VERBOSE(std::endl);
1020         MUST_BE_TRUE(false, "dpas: may not have predicate/condMod");
1021     }
1022 
1023     G4_DstRegRegion* dst = dpasInst->getDst();
1024     G4_Type dTy = dst->getType();
1025     G4_SrcRegRegion* src0 = dpasInst->getSrc(0)->asSrcRegRegion();
1026     G4_Type s0Ty = src0->getType();
1027     G4_SrcRegRegion* src1 = dpasInst->getSrc(1)->asSrcRegRegion();
1028     G4_Type s1Ty = src1->getType();
1029     G4_SrcRegRegion* src2 = dpasInst->getSrc(2)->asSrcRegRegion();
1030     G4_Type s2Ty = src2->getType();
1031     G4_Operand* opnd3 = dpasInst->getSrc(3);
1032     G4_SrcRegRegion* src3 = opnd3 ? opnd3->asSrcRegRegion() : nullptr;
1033     G4_Type s3Ty = src3 ? src3->getType() : Type_UNDEF;
1034 
1035     // No source modifier
1036     if (src0->hasModifier() || src1->hasModifier() || src2->hasModifier() ||
1037         (src3 && src3->hasModifier()))
1038     {
1039         DEBUG_VERBOSE("dpas: should not have source modifier");
1040         inst->emit(std::cerr);
1041         DEBUG_VERBOSE(std::endl);
1042         MUST_BE_TRUE(false, "dpas: may not have source modifier");
1043     }
1044 
1045     // No indirect register access
1046     if (src0->isIndirect() || src1->isIndirect() || src2->isIndirect() || dst->isIndirect() ||
1047         (src3 && src3->isIndirect()))
1048     {
1049         DEBUG_VERBOSE("dpas: no indirect register access supported!");
1050         inst->emit(std::cerr);
1051         DEBUG_VERBOSE(std::endl);
1052         MUST_BE_TRUE(false, "dpas: no indirect register access supported!");
1053     }
1054 
1055     if (!(s1Ty == Type_UD || s1Ty == Type_D) || !(s2Ty == Type_UD || s2Ty == Type_D))
1056     {
1057         DEBUG_VERBOSE("dpas: incorrect type for src1 or src2!");
1058         inst->emit(std::cerr);
1059         DEBUG_VERBOSE(std::endl);
1060         MUST_BE_TRUE(false, "dpas: wrong type for src1 or src2");
1061     }
1062 
1063     if (dpasInst->isInt())
1064     {
1065         if (!(s0Ty == Type_UD || s0Ty == Type_D) || !(dTy == Type_UD || dTy == Type_D))
1066         {
1067             DEBUG_VERBOSE("dpas: incorrect int type for src0 or dst!");
1068             inst->emit(std::cerr);
1069             DEBUG_VERBOSE(std::endl);
1070             MUST_BE_TRUE(false, "dpas: wrong int type for src0 or dst");
1071         }
1072     }
1073     else if (dpasInst->isFP16() || dpasInst->isBF16())
1074     {
1075         G4_Type prec = Type_UNDEF;
1076         if (dpasInst->getPlatform() >= GENX_PVC)
1077         {
1078             prec = dpasInst->isBF16() ? Type_BF : Type_HF;
1079         }
1080         if (!(dTy == Type_F || dTy == prec) || !(s0Ty == Type_F || s0Ty == prec))
1081         {
1082             DEBUG_VERBOSE("dpas: incorrect float type for dst or src0!");
1083             inst->emit(std::cerr);
1084             DEBUG_VERBOSE(std::endl);
1085             MUST_BE_TRUE(false, "dpas: wrong float type for dst or src0");
1086         }
1087     }
1088     else if (dpasInst->isTF32())
1089     {
1090         if (dTy != Type_F || s0Ty != Type_F)
1091         {
1092             DEBUG_VERBOSE("dpas: incorrect TF32 type for dst or src0 (expected F)!");
1093             inst->emit(std::cerr);
1094             DEBUG_VERBOSE(std::endl);
1095             MUST_BE_TRUE(false, "dpas: should be float type for dst or src0");
1096         }
1097     }
1098     else if (dpasInst->isBF8())
1099     {
1100         if (!(dTy == Type_F || dTy == Type_BF || dTy == Type_HF) ||
1101             !(s0Ty == Type_F || s0Ty == Type_BF || s0Ty == Type_HF))
1102         {
1103             DEBUG_VERBOSE("dpas: incorrect type for dst or src0 (expected F, BF, HF)!");
1104             inst->emit(std::cerr);
1105             DEBUG_VERBOSE(std::endl);
1106             MUST_BE_TRUE(false, "dpas: should be type(F, BF, HF) for dst or src0");
1107         }
1108     }
1109 
1110     else
1111     {
1112         DEBUG_VERBOSE("dpas: invalid!");
1113         inst->emit(std::cerr);
1114         DEBUG_VERBOSE(std::endl);
1115         MUST_BE_TRUE(false, "dpas: invalid");
1116     }
1117 
1118     // region check, enforce <1;1,0> for source region, <1> for dst
1119     auto isSrcRegion110 = [](const RegionDesc* RD) -> bool {
1120         return RD->vertStride == 1 && RD->width == 1 && RD->horzStride == 0;
1121     };
1122 
1123     if (dst->getHorzStride() != 1 ||
1124         (!src0->isNullReg() && !isSrcRegion110(src0->getRegion())) ||
1125         !isSrcRegion110(src1->getRegion()) ||
1126         !isSrcRegion110(src2->getRegion()) ||
1127         (src3 && !isSrcRegion110(src3->getRegion())))
1128     {
1129         DEBUG_VERBOSE("dpas: src region should be <1;1,0> and dst region <1>!");
1130         inst->emit(std::cerr);
1131         DEBUG_VERBOSE(std::endl);
1132         MUST_BE_TRUE(false, "dpas: src region should be <1;1,0> and dst region <1>!");
1133     }
1134 
1135     // register alignment & size
1136     //   dst & src0 : aligned on execsize
1137     //   src1 : aligned on grf
1138     //   src2 : aligned on systolic depth * OPS_PER_CHAN
1139     if (passIndex == Optimizer::PI_regAlloc)
1140     {
1141         uint32_t D = dpasInst->getSystolicDepth();
1142         uint32_t ES = dpasInst->getExecSize();
1143         uint32_t RC = dpasInst->getRepeatCount();
1144         uint32_t Src1_D = D;
1145 
1146         uint32_t dAlignBytes = TypeSize(dTy) * ES;
1147         uint32_t s0AlignBytes = TypeSize(s0Ty) * ES;
1148         if ((dst->getLinearizedStart() % dAlignBytes) != 0 ||
1149             (src0->getLinearizedStart() % s0AlignBytes) != 0)
1150         {
1151             DEBUG_VERBOSE("dpas: dst/src0's subreg offset should be multiple of execsize!");
1152             inst->emit(std::cerr);
1153             DEBUG_VERBOSE(std::endl);
1154             MUST_BE_TRUE(false, "dpas: dst/src0's subreg offset should be multiple of execsize!");
1155         }
1156 
1157         uint32_t dBytes = dst->getLinearizedEnd() - dst->getLinearizedStart() + 1;
1158         uint32_t s0Bytes = src0->getLinearizedEnd() - src0->getLinearizedStart() + 1;
1159         if (dBytes != (dAlignBytes * RC) || (!src0->isNullReg() && s0Bytes != s0AlignBytes * RC))
1160         {
1161             DEBUG_VERBOSE("dpas: dst/src0's size is wrong!");
1162             inst->emit(std::cerr);
1163             DEBUG_VERBOSE(std::endl);
1164             MUST_BE_TRUE(false, "dpas: dst/src0's size is wrong!");
1165         }
1166 
1167         if ((src1->getLinearizedStart() % numEltPerGRF<Type_UB>()) != 0)
1168         {
1169             DEBUG_VERBOSE("dpas: src1's subreg offset should be 0!");
1170             inst->emit(std::cerr);
1171             DEBUG_VERBOSE(std::endl);
1172             MUST_BE_TRUE(false, "dpas: src1's subreg offset should be 0!");
1173         }
1174 
1175 
1176         // bytes per lane per depth
1177         uint32_t bytes1PerLD = dpasInst->getSrc1SizePerLaneInByte();
1178         uint32_t s1Bytes = src1->getLinearizedEnd() - src1->getLinearizedStart() + 1;
1179         if (s1Bytes != (bytes1PerLD * Src1_D * ES))
1180         {
1181             DEBUG_VERBOSE("dpas: src1's size is wrong!");
1182             inst->emit(std::cerr);
1183             DEBUG_VERBOSE(std::endl);
1184             MUST_BE_TRUE(false, "dpas: src1's size is wrong!");
1185         }
1186 
1187         uint32_t s2AlignBytes = dpasInst->getSrc2SizePerLaneInByte() * D;
1188         if ((src2->getLinearizedStart() % s2AlignBytes) != 0)
1189         {
1190             DEBUG_VERBOSE("dpas: src2's subreg offset is incorrec!");
1191             inst->emit(std::cerr);
1192             DEBUG_VERBOSE(std::endl);
1193             MUST_BE_TRUE(false, "dpas: src2's subreg offset is incorrect!");
1194         }
1195 
1196         uint32_t s2Bytes = src2->getLinearizedEnd() - src2->getLinearizedStart() + 1;
1197         uint32_t correctBytes = s2AlignBytes * RC;
1198         if (dpasInst->opcode() == G4_dpasw) {
1199             correctBytes = s2AlignBytes * ((RC + 1) / 2);
1200         }
1201         if (s2Bytes != correctBytes)
1202         {
1203             DEBUG_VERBOSE("dpas: src2's size is wrong!");
1204             inst->emit(std::cerr);
1205             DEBUG_VERBOSE(std::endl);
1206             MUST_BE_TRUE(false, "dpas: src2's size is wrong!");
1207         }
1208 
1209     }
1210 }
1211 
verifyAccMov(G4_INST * inst)1212 void G4Verifier::verifyAccMov(G4_INST* inst)
1213 {
1214     const G4_Operand* src = inst->getSrc(0);
1215     const G4_Operand* dst = inst->getDst();
1216     if (kernel.fg.builder->hasFormatConversionACCRestrictions() &&
1217         inst->opcode() == G4_mov &&
1218         (src->isAccReg() || dst->isAccReg()))
1219     {
1220         const bool allowedICombination = (IS_DTYPE(src->getType()) || src->getType() == Type_W || src->getType() == Type_UW) &&
1221             (IS_DTYPE(dst->getType()) || dst->getType() == Type_W || dst->getType() == Type_UW);
1222         const bool allowedFCombination = (src->getType() == Type_F || src->getType() == Type_HF) &&
1223             (dst->getType() == Type_F || dst->getType() == Type_HF);
1224         const bool allowedDFCombination = src->getType() == Type_DF &&
1225             dst->getType() == Type_DF;
1226         if (!allowedICombination && !allowedFCombination && !allowedDFCombination)
1227         {
1228             DEBUG_VERBOSE("Invalid type combination during mov format conversion when accumulator is used as src or dst!");
1229             inst->emit(std::cerr);
1230             DEBUG_VERBOSE(std::endl);
1231             MUST_BE_TRUE(false, "Invalid type combination during mov format conversion when accumulator is used as src or dst!");
1232         }
1233     }
1234 }
1235 
1236 //
1237 // Mixed mode instruction allows bfloat16 operands in the following cases:
1238 //   1. dst, src0, and src1 for 2 source instructions format not involving multiplier(mov, add, cmp, sel).
1239 //   2. dst and src0 for 2 source instructions format involving multiplier(mul, mac etc).
1240 //   3. dst, src0, and src1 for 3 source instructions format(mad).
1241 //   4. Broadcast of bfloat16 scalar is not supported.
1242 //   5. Unpacked bfloat16 destination with stride 2 when register offset is 0 or 1.
1243 //   6. Packed bfloat16 source and destination when register offset is 0 or 8.
1244 //   7. Execution size must not be greater than 8.
1245 //   8. Instructions with pure bfloat16 operands are not supported.
1246 //
1247 // **More examples**
1248 //   1. BF imm is not allowed
1249 //      mov  (1|M0)  r12.0<1>:f  0xffff:bf - ILLEGAL "Imm operand with BF type is not allowed"
1250 //   2. BF scalar operand can be used in SIMD1
1251 //      mul  (1|M0)  r14.0<1>:f  r11.0<0;1,0>:bf  r12.3<0;1,0>:f - OK
1252 //   3. For SIMD1, scalar operands (both dst/src) of F or BF can have any subreg!
1253 //      add  (1|M0)  r16.3<1>:bf  r11.0<0;1,0>:f  r12.3<0;1,0>:f - OK
1254 //   4. F Operand should have subreg = 0 if execSize > SIMD1
1255 //      add  (2|M0)  r10.4<1>:f  r11.0<1;1,0>:bf   0x12345:f
1256 //       ILLEGAL "Src0 regioning must be aligned to destination or scalar for Float/64bit pipes"
1257 //   5. Others
1258 //     add  (8|M0)  r16.0<2>:bf  r11.0<1;1,0>:f  r12.0<1;1,0>:f- OK
1259 //     add  (8|M0)  r16.1<2>:bf  r11.0<1;1,0>:f  r12.8<1;1,0>:f- OK
1260 //     add  (8|M0)  r16.0<1>:bf  r11.0<1;1,0>:f  r12.8<1;1,0>:f- OK
1261 //     add  (8|M0)  r16.8<1>:bf  r11.0<1;1,0>:f  r12.0<1;1,0>:f- OK
1262 //         Note that float source operands  can be scalar region <0;1,0>
1263 //
1264 //   For PVC, case 6 should be "Execution size must not be greater than 16."
verifyBFMixedMode(G4_INST * inst)1265 void G4Verifier::verifyBFMixedMode(G4_INST* inst)
1266 {
1267     auto useGivenType = [](G4_INST* I, G4_Type GivenTy) -> bool
1268     {
1269         G4_Operand* dst = I->getDst();
1270         if (I->isPseudoAddrMovIntrinsic())
1271         {
1272             return false;
1273         }
1274         // Skip compare's dst (?)
1275         if (dst && !dst->isNullReg() && !I->isCompare())
1276         {
1277             if (dst->getType() == GivenTy)
1278                 return true;
1279         }
1280         for (int i = 0; i < I->getNumSrc(); ++i)
1281         {
1282             G4_Operand* src = I->getSrc(i);
1283             if (src && !src->isNullReg())
1284             {
1285                 if (src->getType() == GivenTy)
1286                     return true;
1287             }
1288         }
1289         return false;
1290     };
1291 
1292     // Skip dpas/send as it has been verified separately
1293     if (inst->isDpas() || inst->isSend())
1294         return;
1295 
1296     // Skip if no BF usage
1297     if (!useGivenType(inst, Type_BF))
1298         return;
1299 
1300     if (!kernel.fg.builder->hasBFMixMode())
1301     {
1302         DEBUG_VERBOSE("BF type: BF mixed mode not supported!");
1303         inst->emit(std::cerr);
1304         DEBUG_VERBOSE(std::endl);
1305         MUST_BE_TRUE(false, "BF type: BF mixed mode not supported!!");
1306     }
1307 
1308     // case 8, pure bf not supported
1309     if (!useGivenType(inst, Type_F))
1310     {
1311         DEBUG_VERBOSE("Pure BF operands are not supported!");
1312         inst->emit(std::cerr);
1313         DEBUG_VERBOSE(std::endl);
1314         MUST_BE_TRUE(false, "Pure BF operands are not supported!!");
1315     }
1316 
1317     switch (inst->opcode())
1318     {
1319     case G4_mul:
1320     case G4_mac:
1321     {
1322         // case 2
1323         G4_Operand* src1 = inst->getSrc(1);
1324         if (src1->getType() != Type_F)
1325         {
1326             DEBUG_VERBOSE("Src1 in BF mixed mode must be F!");
1327             inst->emit(std::cerr);
1328             DEBUG_VERBOSE(std::endl);
1329             MUST_BE_TRUE(false, "Src1 in BF mixed mode must be F!");
1330         }
1331         break;
1332     }
1333     case G4_mad:
1334     case G4_pseudo_mad:
1335     {
1336         // case 3
1337         G4_Operand* src2 = inst->getSrc(2);
1338         if (src2->getType() != Type_F)
1339         {
1340             DEBUG_VERBOSE("Src2 in BF mixed mode must be F!");
1341             inst->emit(std::cerr);
1342             DEBUG_VERBOSE(std::endl);
1343             MUST_BE_TRUE(false, "Src2 in BF mixed mode must be F!");
1344         }
1345         break;
1346     }
1347     case G4_mov:
1348     {
1349         if (inst->getSrc(0)->getType() == Type_BF)
1350         {
1351             // bf->f is just a left shift, bf mix restriction does not apply.
1352             return;
1353         }
1354         // case 1
1355         break;
1356     }
1357     case G4_add:
1358     case G4_sel:
1359     case G4_cmp:
1360     {   // case 1
1361         break;
1362     }
1363     default:
1364         DEBUG_VERBOSE("Instruction does not support BF type!");
1365         inst->emit(std::cerr);
1366         DEBUG_VERBOSE(std::endl);
1367         MUST_BE_TRUE(false, "Instruction does not support BF type!");
1368         break;
1369     }
1370 
1371     uint32_t nativeES = kernel.fg.builder->getNativeExecSize();
1372     // verify dst
1373     G4_DstRegRegion* dreg = inst->getDst();
1374     if (dreg && !dreg->isNullReg() && !inst->isCompare())
1375     {
1376         uint32_t hs = dreg->getHorzStride();
1377         uint32_t so = dreg->getSubRegOff();
1378         bool isLegitPackedBF = (dreg->getType() == Type_BF
1379             && (hs == 1 && (so == 0 || so == nativeES)));
1380         bool isLegitUnpackedBF = (dreg->getType() == Type_BF
1381             && (hs == 2 && (so == 0 || so == 1)));
1382         bool isLegitF = (dreg->getType() == Type_F && (hs == 1 && so == 0));
1383         bool isLegitScalar = (inst->getExecSize() == g4::SIMD1 && hs == 1);
1384         if (!(isLegitPackedBF || isLegitUnpackedBF || isLegitF || isLegitScalar))
1385         {
1386             // case 5 & 6
1387             DEBUG_VERBOSE("BF/F Dst has illegal region and type combination!");
1388             inst->emit(std::cerr);
1389             DEBUG_VERBOSE(std::endl);
1390             MUST_BE_TRUE(false, "BF/F Dst has illegal region and type combination!");
1391         }
1392     }
1393 
1394     // verify src
1395     for (int i = 0, sz = (int)inst->getNumSrc(); i < sz; ++i)
1396     {
1397         G4_Operand* src = inst->getSrc(i);
1398         if (!src || src->isNullReg()    // sanity
1399             || (src->getType() == Type_F && src->isImm()))
1400             continue;
1401 
1402         G4_Type  srcTy = src->getType();
1403         if (srcTy == Type_BF &&
1404             (src->isImm() || (inst->getExecSize() != g4::SIMD1 && src->asSrcRegRegion()->getRegion()->isScalar())))
1405         {
1406             // case 4
1407             DEBUG_VERBOSE(" Src: Imm BF/broadcast scalar BF are not supported!");
1408             inst->emit(std::cerr);
1409             DEBUG_VERBOSE(std::endl);
1410             MUST_BE_TRUE(false, "Src: Imm BF/broadcast scalar BF are not supported!");
1411         }
1412 
1413         G4_SrcRegRegion* sreg = src->asSrcRegRegion();
1414         uint32_t so = sreg->getSubRegOff();
1415         bool isLegitPackedBF = (srcTy == Type_BF
1416             && !sreg->getRegion()->isScalar()
1417             && sreg->getRegion()->isContiguous(inst->getExecSize()) && (so == 0 || so == nativeES));
1418         bool isLegitF = (srcTy == Type_F
1419             && !sreg->getRegion()->isScalar()
1420             && sreg->getRegion()->isContiguous(inst->getExecSize()) && so == 0);
1421         bool isLegitScalar = (sreg->getRegion()->isScalar()
1422             && (srcTy == Type_F || (srcTy == Type_BF && inst->getExecSize() == g4::SIMD1)));
1423         if (!(isLegitPackedBF || isLegitF || isLegitScalar))
1424         {
1425             // case 5 & 6
1426             DEBUG_VERBOSE("Src has illegal region and type combination!");
1427             inst->emit(std::cerr);
1428             DEBUG_VERBOSE(std::endl);
1429             MUST_BE_TRUE(false, "Src has illegal region and type combination!");
1430         }
1431     }
1432 
1433     // case 7
1434     if (inst->getExecSize() > nativeES)
1435     {
1436         std::stringstream ss;
1437         ss << "Inst in BF mixed mode should have execsize <= " << nativeES << '\n';
1438         DEBUG_VERBOSE(ss.str().c_str());
1439         inst->emit(std::cerr);
1440         DEBUG_VERBOSE(std::endl);
1441         MUST_BE_TRUE(false, ss.str().c_str());
1442     }
1443     return;
1444 }
1445