1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "InstSplit.h"
10 
11 using namespace vISA;
12 
13 
InstSplitPass(IR_Builder * builder)14 InstSplitPass::InstSplitPass(IR_Builder* builder) : m_builder(builder)
15 {
16 }
17 
18 // This pass verifies instructions sizes with respect to SIMD width and operands' data type.
19 // Instructions that touch more than 2 GRFs are split evenly until they are within 2 GRFs.
20 // Instructions not considered for splitting:
21 //      - SIMD1, SIMD2, SIMD4 and SIMD8
22 //      - Send messages
23 //      - Plane
24 //      - Control flow, labels and return
25 //      - Dpas
26 //      - Instructions with indirect addressing other than 1x1 indirect region
run()27 void InstSplitPass::run()
28 {
29     for (INST_LIST_ITER it = m_builder->instList.begin(), instlistEnd = m_builder->instList.end(); it != instlistEnd; ++it)
30     {
31         G4_INST* inst = *it;
32 
33         if (inst->getExecSize() == g4::SIMD1)
34         {
35             continue;
36         }
37 
38         if (inst->isSend() || inst->opcode() == G4_label ||
39             inst->opcode() == G4_pln || inst->opcode() == G4_return ||
40             inst->isFlowControl() || inst->isPseudoLogic() || inst->opcode() == G4_madw)
41         {
42             continue;
43         }
44         if (inst->isDpas())
45         {
46             continue;
47         }
48 
49         it = splitInstruction(it, m_builder->instList);
50     }
51 }
52 
runOnBB(G4_BB * bb)53 void InstSplitPass::runOnBB(G4_BB* bb)
54 {
55     for (INST_LIST_ITER it = bb->begin(), instlistEnd = bb->end(); it != instlistEnd; ++it)
56     {
57         G4_INST* inst = *it;
58 
59         if (inst->getExecSize() == g4::SIMD1)
60         {
61             continue;
62         }
63 
64         if (inst->isSend() || inst->opcode() == G4_label ||
65             inst->opcode() == G4_pln || inst->opcode() == G4_return ||
66             inst->isFlowControl() || inst->isPseudoLogic())
67         {
68             continue;
69         }
70         if (inst->isDpas())
71         {
72             continue;
73         }
74 
75         it = splitInstruction(it, bb->getInstList());
76     }
77 }
78 
79 // Recursive function to split instructions that touch more than 2 GRF
80 // For example, with 32-byte GRF:
81 //    1 SIMD32 inst with 64-bit operand(s)
82 //    split into:
83 //                  -> 2 SIMD16 insts with 64-bit operand(s)
84 //    split again into:
85 //                  -> 4 SIMD8 insts with 64-bit operand(s)
splitInstruction(INST_LIST_ITER it,INST_LIST & instList)86 INST_LIST_ITER InstSplitPass::splitInstruction(INST_LIST_ITER it, INST_LIST& instList)
87 {
88     G4_INST* inst = *it;
89     bool doSplit = false;
90     G4_ExecSize execSize = inst->getExecSize();
91 
92     auto cross2GRF = [this](G4_Operand* opnd)
93     {
94         G4_SrcRegRegion* src = opnd->asSrcRegRegion();
95         uint32_t leftBound = 0, rightBound = 0;
96         computeSrcBounds(src, leftBound, rightBound);
97         return (rightBound - leftBound) > (getGRFSize() * 2u);
98     };
99 
100     auto cross2GRFDst = [inst, this](G4_DstRegRegion* dst)
101     {
102         if (dst->isNullReg())
103         {
104             return ((unsigned)inst->getExecSize() * dst->getTypeSize() * dst->getHorzStride()) > (getGRFSize() * 2u);
105         }
106         uint32_t leftBound = 0, rightBound = 0;
107         computeDstBounds(dst, leftBound, rightBound);
108         return (rightBound - leftBound) > (getGRFSize() * 2u);
109     };
110 
111     auto useTmpForSrc = [&](G4_SrcRegRegion* src) -> G4_SrcRegRegion*
112     {
113         // insert mov before current instruction
114         G4_Declare* dcl = m_builder->createTempVar(execSize, src->getType(), Any);
115         G4_SrcModifier modifier = src->getModifier();
116         src->setModifier(Mod_src_undef);
117 
118         G4_INST* movInst = m_builder->createMov(execSize, m_builder->createDstRegRegion(dcl, 1),
119             src, inst->getOption(), false);
120         movInst->inheritDIFrom(inst);
121 
122         INST_LIST_ITER newMovIter = instList.insert(it, movInst);
123 
124         // split new mov if needed
125         splitInstruction(newMovIter, instList);
126 
127         G4_SrcRegRegion* tmpSrc = m_builder->createSrcRegRegion(modifier, Direct, dcl->getRegVar(),
128             0, 0, m_builder->getRegionStride1(), dcl->getElemType());
129         return tmpSrc;
130     };
131 
132     // Check sources
133     for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; ++i)
134     {
135         if (!inst->getSrc(i)->isSrcRegRegion())
136             continue;
137         if (cross2GRF(inst->getSrc(i)))
138         {
139             doSplit = true;
140             break;
141         }
142         if (m_builder->getPlatform() >= XeHP_SDV)
143         {
144             // Instructions whose operands are 64b and have 2D regioning need to be split
145             // up front to help fixUnalignedRegions(..) covering 2D cases.
146             G4_SrcRegRegion* src = inst->getSrc(i)->asSrcRegRegion();
147             if ((src->getType() == Type_DF || IS_QTYPE(src->getType())) &&
148                 !src->getRegion()->isSingleStride(execSize))
149             {
150                 // Try splitting the inst if it's a mov. Otherwise, legalize
151                 // the inst by inserting a mov for the src, and split the new
152                 // mov if needed.
153                 if (inst->opcode() == G4_mov)
154                 {
155                     doSplit = true;
156                     break;
157                 }
158 
159                 auto tmpSrc = useTmpForSrc(src);
160                 assert(tmpSrc->getRegion()->isSingleStride(execSize));
161                 inst->setSrc(tmpSrc, i);
162             }
163         }
164     }
165 
166 
167     // Check destination
168     if (inst->getDst() && cross2GRFDst(inst->getDst()))
169     {
170         doSplit = true;
171     }
172 
173     // Handle split exceptions
174     if (!doSplit)
175     {
176         if (inst->opcode() == G4_cmp)
177         {
178             // Due to a simulator quirk, we need to split cmp instruction even if the
179             // dst operand of the compare is null, if it "looks" too large,
180             // that is, if the execution size is 16 and the comparison type
181             // is QW.
182             if (needSplitByExecSize(execSize) && inst->getDst()->isNullReg() &&
183                 (inst->getSrc(0)->getTypeSize() > 4 ||
184                     inst->getSrc(1)->getTypeSize() > 4))
185             {
186                 doSplit = true;
187             }
188         }
189     }
190 
191     if (!doSplit)
192     {
193         return it;
194     }
195 
196     G4_opcode op = inst->opcode();
197     G4_ExecSize newExecSize {execSize / 2};
198 
199     G4_DstRegRegion* dst = inst->getDst();
200     bool nullDst = dst && inst->hasNULLDst();
201 
202     // Check src/dst dependency
203     if (dst && !nullDst)
204     {
205         for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; i++)
206         {
207             bool useTmp = false;
208             G4_Operand* src = inst->getSrc(i);
209             G4_CmpRelation rel = compareSrcDstRegRegion(dst, src);
210             if (rel != Rel_disjoint)
211             {
212                 useTmp = (rel != Rel_eq) ||
213                     src->asSrcRegRegion()->getRegion()->isRepeatRegion(inst->getExecSize());
214             }
215 
216             if (useTmp)
217             {
218                 MUST_BE_TRUE(src != nullptr && src->isSrcRegRegion(), "source must be a SrcRegRegion");
219                 auto tmpSrc = useTmpForSrc(src->asSrcRegRegion());
220                 inst->setSrc(tmpSrc, i);
221             }
222         }
223     }
224 
225     // Create new predicate
226     G4_Predicate* newPred = NULL;
227     if (inst->getPredicate())
228     {
229         newPred = inst->getPredicate();
230         newPred->splitPred();
231     }
232 
233     // Create new condition modifier
234     G4_CondMod* newCondMod = NULL;
235     if (inst->getCondMod())
236     {
237         newCondMod = inst->getCondMod();
238         newCondMod->splitCondMod();
239     }
240 
241     INST_LIST_ITER newInstIterator = it;
242     for (int i = 0; i < execSize; i += newExecSize)
243     {
244         G4_INST* newInst = nullptr;
245 
246         // Create new destination
247         G4_DstRegRegion* newDst;
248         if (!nullDst)
249         {
250             newDst = m_builder->createSubDstOperand(dst, (uint16_t)i, newExecSize);
251         }
252         else
253         {
254             newDst = dst;
255         }
256 
257         // Create new split instruction
258         newInst = m_builder->makeSplittingInst(inst, newExecSize);
259         newInst->setDest(newDst);
260         newInst->setPredicate(m_builder->duplicateOperand(newPred));
261         newInst->setCondMod(m_builder->duplicateOperand(newCondMod));
262         newInstIterator = instList.insert(it, newInst);
263 
264         // Set new sources
265         for (int j = 0, numSrc = inst->getNumSrc(); j < numSrc; j++)
266         {
267             G4_Operand* src = inst->getSrc(j);
268             if (!src)
269                 continue;
270 
271             // Src1 for single source math should be arc reg null.
272             if (src->isImm() ||
273                 (inst->opcode() == G4_math && j == 1 && src->isNullReg()))
274             {
275                 newInst->setSrc(src, j);
276             }
277             else if (src->asSrcRegRegion()->isScalar() || (j == 0 && op == G4_line))
278             {
279                 newInst->setSrc(m_builder->duplicateOperand(src), j);
280             }
281             else
282             {
283                 newInst->setSrc(m_builder->createSubSrcOperand(src->asSrcRegRegion(), (uint16_t)i,
284                     newExecSize, (uint8_t)(src->asSrcRegRegion()->getRegion()->vertStride),
285                     (uint8_t)(src->asSrcRegRegion()->getRegion()->width)), j);
286             }
287         }
288 
289         // Set new mask
290         // FIXME: To update the mask in a CM kernel, the inst's BB should be divergent.
291         //        However, at this stage BBs are not constructed yet.
292         bool isCMKernel = m_builder->kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM;
293         bool needsMaskOffset = newCondMod || newPred || (!isCMKernel && !inst->isWriteEnableInst());
294         if (needsMaskOffset)
295         {
296             int newMaskOffset = inst->getMaskOffset() + (i == 0 ? 0 : newExecSize);
297             bool nibOk = m_builder->hasNibCtrl() &&
298                 (inst->getDst()->getTypeSize() == 8 || TypeSize(inst->getExecType()) == 8);
299             G4_InstOption newMask = G4_INST::offsetToMask(newExecSize, newMaskOffset, nibOk);
300             newInst->setMaskOption(newMask);
301         }
302 
303         // Call recursive splitting function
304         newInstIterator = splitInstruction(newInstIterator, instList);
305     }
306 
307     // remove original instruction
308     instList.erase(it);
309     return newInstIterator;
310 }
311 
needSplitByExecSize(G4_ExecSize execSize) const312 bool InstSplitPass::needSplitByExecSize(G4_ExecSize execSize) const
313 {
314     if (getGRFSize() == 64)
315     {
316         return execSize == g4::SIMD32;
317     }
318     return execSize == g4::SIMD16;
319 }
320 
321 // Compare regRegion of source operand and destination.
322 // We put this in a separate function since compareOperand from G4_DstRegRegion
323 // and G4_SrcRegRegion don't handle regions that cross 2 GRFs.
compareSrcDstRegRegion(G4_DstRegRegion * dstRegion,G4_Operand * opnd)324 G4_CmpRelation InstSplitPass::compareSrcDstRegRegion(G4_DstRegRegion* dstRegion, G4_Operand* opnd)
325 {
326 
327     G4_VarBase* dstBase = dstRegion->getBase();
328     G4_VarBase* srcBase = opnd->getBase();
329     G4_RegAccess dstAcc = dstRegion->getRegAccess();
330     G4_RegAccess srcAcc = opnd->getRegAccess();
331     G4_Declare* dstDcl = dstRegion->getTopDcl();
332     G4_Declare* srcDcl = opnd->getTopDcl();
333 
334     if (!opnd->isSrcRegRegion() || dstBase == nullptr || srcBase == nullptr)
335     {
336         // a null base operand can never interfere with anything
337         return Rel_disjoint;
338     }
339 
340     if (dstDcl == srcDcl && srcDcl != nullptr)
341     {
342         // special checks for pseudo kills
343         G4_INST* dstInst = dstRegion->getInst();
344         G4_INST* srcInst = opnd->getInst();
345         if (dstInst && (dstInst->isPseudoKill() || dstInst->isLifeTimeEnd()))
346         {
347             return Rel_interfere;
348         }
349 
350         if (srcInst && (srcInst->isPseudoKill() || srcInst->isLifeTimeEnd()))
351         {
352             return Rel_interfere;
353         }
354     }
355 
356     if (srcAcc == dstAcc && dstAcc != Direct)
357     {
358         // two indirect are assumed to interfere in the absence of pointer analysis
359         return Rel_interfere;
360     }
361     else if (srcAcc != dstAcc)
362     {
363         // direct v. indirect
364         auto mayInterfereWithIndirect = [](G4_Operand* direct, G4_Operand* indirect)
365         {
366             assert((direct->getRegAccess() == Direct && indirect->getRegAccess() == IndirGRF) &&
367                 "first opereand should be direct and second indirect");
368             return (direct->getTopDcl() && direct->getTopDcl()->getAddressed()) ||
369                 (direct->isAddress() && direct->getTopDcl() == indirect->getTopDcl());
370         };
371 
372         if ((srcAcc != Direct && mayInterfereWithIndirect(dstRegion, opnd)) ||
373             (dstAcc != Direct && mayInterfereWithIndirect(opnd, dstRegion)))
374         {
375             return Rel_interfere;
376         }
377         return Rel_disjoint;
378     }
379 
380     // Check if both are physically assigned
381     G4_VarBase* dstPhyReg = dstBase->isRegVar() ? dstBase->asRegVar()->getPhyReg() : dstBase;
382     G4_VarBase* srcPhyReg = srcBase->isRegVar() ? srcBase->asRegVar()->getPhyReg() : srcBase;
383     if (dstPhyReg && srcPhyReg)
384     {
385         assert(dstPhyReg->isPhyReg() && srcPhyReg->isPhyReg());
386         if (dstPhyReg->getKind() != srcPhyReg->getKind())
387             return Rel_disjoint;
388 
389         if (dstPhyReg->isPhyAreg())
390         {
391             if (dstPhyReg->asAreg()->getArchRegType() == AREG_NULL)
392             {
393                 //like NaN, a null ARF is disjoint to everyone including itself
394                 return Rel_disjoint;
395             }
396             return (dstPhyReg->asAreg()->getArchRegType() ==
397                 srcPhyReg->asAreg()->getArchRegType()) ? Rel_eq : Rel_disjoint;
398         }
399     }
400 
401     if (dstBase->getKind() != srcBase->getKind())
402     {
403         return Rel_disjoint;
404     }
405 
406     if (dstDcl != srcDcl)
407     {
408         return Rel_disjoint;
409     }
410 
411     // Lastly, check byte footprint for exact relation
412     uint32_t srcLeftBound = 0, srcRightBound = 0;
413     int maskSize = 8 * getGRFSize();
414     BitSet srcBitSet(maskSize, false);
415     computeSrcBounds(opnd->asSrcRegRegion(), srcLeftBound, srcRightBound);
416     generateBitMask(opnd, srcBitSet);
417 
418     uint32_t dstLeftBound = 0, dstRightBound = 0;
419     BitSet dstBitSet(maskSize, false);
420     computeDstBounds(dstRegion, dstLeftBound, dstRightBound);
421     generateBitMask(dstRegion, dstBitSet);
422 
423     if (dstRightBound < srcLeftBound || srcRightBound < dstLeftBound)
424     {
425         return Rel_disjoint;
426     }
427     else if (dstLeftBound == srcLeftBound &&
428         dstRightBound == srcRightBound &&
429         dstBitSet == srcBitSet)
430     {
431         return Rel_eq;
432     }
433     else
434     {
435 
436         BitSet tmp = dstBitSet;
437         dstBitSet &= srcBitSet;
438         if (dstBitSet.isEmpty())
439         {
440             return Rel_disjoint;
441         }
442 
443         dstBitSet = tmp;
444         dstBitSet -= srcBitSet;
445         if (dstBitSet.isEmpty())
446         {
447             return Rel_lt;
448         }
449         srcBitSet -= tmp;
450         return srcBitSet.isEmpty() ? Rel_gt : Rel_interfere;
451     }
452 }
453 
454 // Simplified function to calculate left/right bounds.
455 // InstSplitPass calls this function since the operand's internal computeBound function
456 // carries several aditional calculations and asserts restricted to 2 GRFs.
computeDstBounds(G4_DstRegRegion * dstRegion,uint32_t & leftBound,uint32_t & rightBound)457 void InstSplitPass::computeDstBounds(G4_DstRegRegion* dstRegion, uint32_t& leftBound, uint32_t& rightBound)
458 {
459     unsigned short typeSize = dstRegion->getTypeSize();
460 
461     // Calculate left bound
462     {
463         G4_VarBase* base = dstRegion->getBase();
464         G4_Declare* topDcl = NULL;
465         uint32_t subRegOff = dstRegion->getSubRegOff();
466         uint32_t regOff = dstRegion->getRegOff();
467         uint32_t newregoff = regOff, offset = 0;
468         if (base && base->isRegVar())
469         {
470             topDcl = base->asRegVar()->getDeclare();
471             if (!topDcl && base->asRegVar()->isGreg())
472             {
473                 newregoff = base->asRegVar()->asGreg()->getRegNum();
474             }
475         }
476 
477         if (topDcl)
478         {
479             while (topDcl->getAliasDeclare())
480             {
481                 offset += topDcl->getAliasOffset();
482                 topDcl = topDcl->getAliasDeclare();
483             }
484         }
485 
486         if (base != NULL && base->isAccReg())
487         {
488             leftBound = subRegOff * typeSize;
489             if (base->asAreg()->getArchRegType() == AREG_ACC1 || regOff == 1)
490             {
491                 leftBound += getGRFSize();
492             }
493         }
494         else if (topDcl)
495         {
496             if (dstRegion->getRegAccess() == Direct)
497             {
498                 leftBound = offset + newregoff * numEltPerGRF<Type_UB>() + subRegOff * typeSize;
499             }
500             else
501             {
502                 leftBound = subRegOff * TypeSize(ADDR_REG_TYPE);
503             }
504         }
505     }
506 
507     // Calculate right bound
508     {
509         if (dstRegion->getRegAccess() == Direct)
510         {
511             unsigned short s_size = dstRegion->getHorzStride() * typeSize;
512             unsigned totalBytes = (dstRegion->getInst()->getExecSize() - 1) * s_size + typeSize;
513             rightBound = leftBound + totalBytes - 1;
514             dstRegion->getHorzStride();
515         }
516         else
517         {
518             rightBound = leftBound + TypeSize(ADDR_REG_TYPE) - 1;
519         }
520     }
521 }
522 
523 // Simplified function to calculate left/right bounds.
524 // InstSplitPass calls this function since the operand's internal computeBound function
525 // carries several aditional calculations and asserts restricted to 2 GRFs.
computeSrcBounds(G4_SrcRegRegion * srcRegion,uint32_t & leftBound,uint32_t & rightBound)526 void InstSplitPass::computeSrcBounds(G4_SrcRegRegion* srcRegion, uint32_t& leftBound, uint32_t& rightBound)
527 {
528     unsigned short typeSize = srcRegion->getTypeSize();
529 
530     // Calculate left bound
531     {
532         G4_VarBase* base = srcRegion->getBase();
533         G4_Declare* topDcl = NULL;
534         uint32_t subRegOff = srcRegion->getSubRegOff();
535         uint32_t regOff = srcRegion->getRegOff();
536         unsigned newregoff = regOff, offset = 0;
537 
538         if (base)
539         {
540             if (base->isRegVar())
541             {
542                 topDcl = base->asRegVar()->getDeclare();
543                 if (!topDcl && base->asRegVar()->isGreg())
544                 {
545                     newregoff = base->asRegVar()->asGreg()->getRegNum();
546                 }
547             }
548         }
549 
550         if (topDcl)
551         {
552             while (topDcl->getAliasDeclare())
553             {
554                 offset += topDcl->getAliasOffset();
555                 topDcl = topDcl->getAliasDeclare();
556             }
557         }
558 
559         if (base != NULL && base->isAccReg())
560         {
561             leftBound = subRegOff * typeSize;
562             if (base->asAreg()->getArchRegType() == AREG_ACC1)
563             {
564                 leftBound += getGRFSize();
565             }
566         }
567         else if (topDcl)
568         {
569             if (srcRegion->getRegAccess() == Direct)
570             {
571                 leftBound = offset + newregoff * numEltPerGRF<Type_UB>() + subRegOff * typeSize;
572             }
573             else
574             {
575                 leftBound = subRegOff * TypeSize(ADDR_REG_TYPE);
576             }
577         }
578     }
579 
580     // Calculate right bound
581     {
582         if (srcRegion->getRegAccess() == Direct)
583         {
584             unsigned short hs = srcRegion->getRegion()->isScalar() ? 1 : srcRegion->getRegion()->horzStride;
585             unsigned short vs = srcRegion->getRegion()->isScalar() ? 0 : srcRegion->getRegion()->vertStride;
586 
587             if (srcRegion->getRegion()->isScalar())
588             {
589                 rightBound = leftBound + typeSize - 1;
590             }
591             else
592             {
593                 int numRows = srcRegion->getInst()->getExecSize() / srcRegion->getRegion()->width;
594                 if (numRows > 0)
595                 {
596                     rightBound = leftBound +
597                         (numRows - 1) * vs * typeSize +
598                         hs * (srcRegion->getRegion()->width - 1) * typeSize +
599                         typeSize - 1;
600                 }
601                 else
602                 {
603                     rightBound = leftBound +
604                         hs * (srcRegion->getInst()->getExecSize() - 1) * typeSize +
605                         typeSize - 1;
606                 }
607             }
608         }
609         else
610         {
611             unsigned short numAddrSubReg = 1;
612             if (srcRegion->getRegion()->isRegionWH())
613             {
614                 numAddrSubReg = srcRegion->getInst()->getExecSize() / srcRegion->getRegion()->width;
615             }
616             rightBound = leftBound + TypeSize(ADDR_REG_TYPE) * numAddrSubReg - 1;
617         }
618     }
619 }
620 
621 // Generates the byte footprint of an instruction's operand
generateBitMask(G4_Operand * opnd,BitSet & footprint)622 void InstSplitPass::generateBitMask(G4_Operand* opnd, BitSet& footprint)
623 {
624     uint64_t bitSeq = TypeFootprint(opnd->getType());
625     unsigned short typeSize = opnd->getTypeSize();
626 
627     if (opnd->isDstRegRegion())
628     {
629         if (opnd->getRegAccess() == Direct)
630         {
631             G4_DstRegRegion* dst = opnd->asDstRegRegion();
632             unsigned short horzStride = dst->getHorzStride();
633             unsigned short s_size = horzStride * typeSize;
634             for (uint8_t i = 0; i < opnd->getInst()->getExecSize(); ++i)
635             {
636                 int eltOffset = i * s_size;
637                 for (uint8_t j = 0; j < typeSize; j++)
638                 {
639                     footprint.set(eltOffset + j, true);
640                 }
641             }
642         }
643         else
644         {
645             footprint.set(0, true);
646             footprint.set(1, true);
647         }
648     }
649     else if (opnd->isSrcRegRegion())
650     {
651         G4_SrcRegRegion* src = opnd->asSrcRegRegion();
652         const RegionDesc* srcReg = src->getRegion();
653         if (opnd->getRegAccess() == Direct)
654         {
655             if (srcReg->isScalar())
656             {
657                 uint64_t mask = bitSeq;
658                 for (unsigned i = 0; i < typeSize; ++i)
659                 {
660                     if (mask & (1ULL << i))
661                     {
662                         footprint.set(i, true);
663                     }
664                 }
665             }
666             else
667             {
668                 for (int i = 0, numRows = opnd->getInst()->getExecSize() / srcReg->width; i < numRows; ++i)
669                 {
670                     for (int j = 0; j < srcReg->width; ++j)
671                     {
672                         int eltOffset = i * srcReg->vertStride * typeSize + j * srcReg->horzStride * typeSize;
673                         for (uint8_t k = 0; k < typeSize; k++)
674                         {
675                             footprint.set(eltOffset + k, true);
676                         }
677                     }
678                 }
679             }
680         }
681         else
682         {
683             unsigned short numAddrSubReg = 1;
684             if (srcReg->isRegionWH())
685             {
686                 numAddrSubReg = opnd->getInst()->getExecSize() / srcReg->width;
687             }
688             uint64_t mask = 0;
689             for (unsigned i = 0; i < numAddrSubReg; i++)
690             {
691                 mask |= ((uint64_t)0x3) << (i * 2);
692             }
693             for (unsigned i = 0; i < 64; ++i)
694             {
695                 if (mask & (1ULL << i))
696                 {
697                     footprint.set(i, true);
698                 }
699             }
700         }
701     }
702 }
703