1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "InstSplit.h"
10
11 using namespace vISA;
12
13
InstSplitPass(IR_Builder * builder)14 InstSplitPass::InstSplitPass(IR_Builder* builder) : m_builder(builder)
15 {
16 }
17
18 // This pass verifies instructions sizes with respect to SIMD width and operands' data type.
19 // Instructions that touch more than 2 GRFs are split evenly until they are within 2 GRFs.
20 // Instructions not considered for splitting:
21 // - SIMD1, SIMD2, SIMD4 and SIMD8
22 // - Send messages
23 // - Plane
24 // - Control flow, labels and return
25 // - Dpas
26 // - Instructions with indirect addressing other than 1x1 indirect region
run()27 void InstSplitPass::run()
28 {
29 for (INST_LIST_ITER it = m_builder->instList.begin(), instlistEnd = m_builder->instList.end(); it != instlistEnd; ++it)
30 {
31 G4_INST* inst = *it;
32
33 if (inst->getExecSize() == g4::SIMD1)
34 {
35 continue;
36 }
37
38 if (inst->isSend() || inst->opcode() == G4_label ||
39 inst->opcode() == G4_pln || inst->opcode() == G4_return ||
40 inst->isFlowControl() || inst->isPseudoLogic() || inst->opcode() == G4_madw)
41 {
42 continue;
43 }
44 if (inst->isDpas())
45 {
46 continue;
47 }
48
49 it = splitInstruction(it, m_builder->instList);
50 }
51 }
52
runOnBB(G4_BB * bb)53 void InstSplitPass::runOnBB(G4_BB* bb)
54 {
55 for (INST_LIST_ITER it = bb->begin(), instlistEnd = bb->end(); it != instlistEnd; ++it)
56 {
57 G4_INST* inst = *it;
58
59 if (inst->getExecSize() == g4::SIMD1)
60 {
61 continue;
62 }
63
64 if (inst->isSend() || inst->opcode() == G4_label ||
65 inst->opcode() == G4_pln || inst->opcode() == G4_return ||
66 inst->isFlowControl() || inst->isPseudoLogic())
67 {
68 continue;
69 }
70 if (inst->isDpas())
71 {
72 continue;
73 }
74
75 it = splitInstruction(it, bb->getInstList());
76 }
77 }
78
79 // Recursive function to split instructions that touch more than 2 GRF
80 // For example, with 32-byte GRF:
81 // 1 SIMD32 inst with 64-bit operand(s)
82 // split into:
83 // -> 2 SIMD16 insts with 64-bit operand(s)
84 // split again into:
85 // -> 4 SIMD8 insts with 64-bit operand(s)
splitInstruction(INST_LIST_ITER it,INST_LIST & instList)86 INST_LIST_ITER InstSplitPass::splitInstruction(INST_LIST_ITER it, INST_LIST& instList)
87 {
88 G4_INST* inst = *it;
89 bool doSplit = false;
90 G4_ExecSize execSize = inst->getExecSize();
91
92 auto cross2GRF = [this](G4_Operand* opnd)
93 {
94 G4_SrcRegRegion* src = opnd->asSrcRegRegion();
95 uint32_t leftBound = 0, rightBound = 0;
96 computeSrcBounds(src, leftBound, rightBound);
97 return (rightBound - leftBound) > (getGRFSize() * 2u);
98 };
99
100 auto cross2GRFDst = [inst, this](G4_DstRegRegion* dst)
101 {
102 if (dst->isNullReg())
103 {
104 return ((unsigned)inst->getExecSize() * dst->getTypeSize() * dst->getHorzStride()) > (getGRFSize() * 2u);
105 }
106 uint32_t leftBound = 0, rightBound = 0;
107 computeDstBounds(dst, leftBound, rightBound);
108 return (rightBound - leftBound) > (getGRFSize() * 2u);
109 };
110
111 auto useTmpForSrc = [&](G4_SrcRegRegion* src) -> G4_SrcRegRegion*
112 {
113 // insert mov before current instruction
114 G4_Declare* dcl = m_builder->createTempVar(execSize, src->getType(), Any);
115 G4_SrcModifier modifier = src->getModifier();
116 src->setModifier(Mod_src_undef);
117
118 G4_INST* movInst = m_builder->createMov(execSize, m_builder->createDstRegRegion(dcl, 1),
119 src, inst->getOption(), false);
120 movInst->inheritDIFrom(inst);
121
122 INST_LIST_ITER newMovIter = instList.insert(it, movInst);
123
124 // split new mov if needed
125 splitInstruction(newMovIter, instList);
126
127 G4_SrcRegRegion* tmpSrc = m_builder->createSrcRegRegion(modifier, Direct, dcl->getRegVar(),
128 0, 0, m_builder->getRegionStride1(), dcl->getElemType());
129 return tmpSrc;
130 };
131
132 // Check sources
133 for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; ++i)
134 {
135 if (!inst->getSrc(i)->isSrcRegRegion())
136 continue;
137 if (cross2GRF(inst->getSrc(i)))
138 {
139 doSplit = true;
140 break;
141 }
142 if (m_builder->getPlatform() >= XeHP_SDV)
143 {
144 // Instructions whose operands are 64b and have 2D regioning need to be split
145 // up front to help fixUnalignedRegions(..) covering 2D cases.
146 G4_SrcRegRegion* src = inst->getSrc(i)->asSrcRegRegion();
147 if ((src->getType() == Type_DF || IS_QTYPE(src->getType())) &&
148 !src->getRegion()->isSingleStride(execSize))
149 {
150 // Try splitting the inst if it's a mov. Otherwise, legalize
151 // the inst by inserting a mov for the src, and split the new
152 // mov if needed.
153 if (inst->opcode() == G4_mov)
154 {
155 doSplit = true;
156 break;
157 }
158
159 auto tmpSrc = useTmpForSrc(src);
160 assert(tmpSrc->getRegion()->isSingleStride(execSize));
161 inst->setSrc(tmpSrc, i);
162 }
163 }
164 }
165
166
167 // Check destination
168 if (inst->getDst() && cross2GRFDst(inst->getDst()))
169 {
170 doSplit = true;
171 }
172
173 // Handle split exceptions
174 if (!doSplit)
175 {
176 if (inst->opcode() == G4_cmp)
177 {
178 // Due to a simulator quirk, we need to split cmp instruction even if the
179 // dst operand of the compare is null, if it "looks" too large,
180 // that is, if the execution size is 16 and the comparison type
181 // is QW.
182 if (needSplitByExecSize(execSize) && inst->getDst()->isNullReg() &&
183 (inst->getSrc(0)->getTypeSize() > 4 ||
184 inst->getSrc(1)->getTypeSize() > 4))
185 {
186 doSplit = true;
187 }
188 }
189 }
190
191 if (!doSplit)
192 {
193 return it;
194 }
195
196 G4_opcode op = inst->opcode();
197 G4_ExecSize newExecSize {execSize / 2};
198
199 G4_DstRegRegion* dst = inst->getDst();
200 bool nullDst = dst && inst->hasNULLDst();
201
202 // Check src/dst dependency
203 if (dst && !nullDst)
204 {
205 for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; i++)
206 {
207 bool useTmp = false;
208 G4_Operand* src = inst->getSrc(i);
209 G4_CmpRelation rel = compareSrcDstRegRegion(dst, src);
210 if (rel != Rel_disjoint)
211 {
212 useTmp = (rel != Rel_eq) ||
213 src->asSrcRegRegion()->getRegion()->isRepeatRegion(inst->getExecSize());
214 }
215
216 if (useTmp)
217 {
218 MUST_BE_TRUE(src != nullptr && src->isSrcRegRegion(), "source must be a SrcRegRegion");
219 auto tmpSrc = useTmpForSrc(src->asSrcRegRegion());
220 inst->setSrc(tmpSrc, i);
221 }
222 }
223 }
224
225 // Create new predicate
226 G4_Predicate* newPred = NULL;
227 if (inst->getPredicate())
228 {
229 newPred = inst->getPredicate();
230 newPred->splitPred();
231 }
232
233 // Create new condition modifier
234 G4_CondMod* newCondMod = NULL;
235 if (inst->getCondMod())
236 {
237 newCondMod = inst->getCondMod();
238 newCondMod->splitCondMod();
239 }
240
241 INST_LIST_ITER newInstIterator = it;
242 for (int i = 0; i < execSize; i += newExecSize)
243 {
244 G4_INST* newInst = nullptr;
245
246 // Create new destination
247 G4_DstRegRegion* newDst;
248 if (!nullDst)
249 {
250 newDst = m_builder->createSubDstOperand(dst, (uint16_t)i, newExecSize);
251 }
252 else
253 {
254 newDst = dst;
255 }
256
257 // Create new split instruction
258 newInst = m_builder->makeSplittingInst(inst, newExecSize);
259 newInst->setDest(newDst);
260 newInst->setPredicate(m_builder->duplicateOperand(newPred));
261 newInst->setCondMod(m_builder->duplicateOperand(newCondMod));
262 newInstIterator = instList.insert(it, newInst);
263
264 // Set new sources
265 for (int j = 0, numSrc = inst->getNumSrc(); j < numSrc; j++)
266 {
267 G4_Operand* src = inst->getSrc(j);
268 if (!src)
269 continue;
270
271 // Src1 for single source math should be arc reg null.
272 if (src->isImm() ||
273 (inst->opcode() == G4_math && j == 1 && src->isNullReg()))
274 {
275 newInst->setSrc(src, j);
276 }
277 else if (src->asSrcRegRegion()->isScalar() || (j == 0 && op == G4_line))
278 {
279 newInst->setSrc(m_builder->duplicateOperand(src), j);
280 }
281 else
282 {
283 newInst->setSrc(m_builder->createSubSrcOperand(src->asSrcRegRegion(), (uint16_t)i,
284 newExecSize, (uint8_t)(src->asSrcRegRegion()->getRegion()->vertStride),
285 (uint8_t)(src->asSrcRegRegion()->getRegion()->width)), j);
286 }
287 }
288
289 // Set new mask
290 // FIXME: To update the mask in a CM kernel, the inst's BB should be divergent.
291 // However, at this stage BBs are not constructed yet.
292 bool isCMKernel = m_builder->kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM;
293 bool needsMaskOffset = newCondMod || newPred || (!isCMKernel && !inst->isWriteEnableInst());
294 if (needsMaskOffset)
295 {
296 int newMaskOffset = inst->getMaskOffset() + (i == 0 ? 0 : newExecSize);
297 bool nibOk = m_builder->hasNibCtrl() &&
298 (inst->getDst()->getTypeSize() == 8 || TypeSize(inst->getExecType()) == 8);
299 G4_InstOption newMask = G4_INST::offsetToMask(newExecSize, newMaskOffset, nibOk);
300 newInst->setMaskOption(newMask);
301 }
302
303 // Call recursive splitting function
304 newInstIterator = splitInstruction(newInstIterator, instList);
305 }
306
307 // remove original instruction
308 instList.erase(it);
309 return newInstIterator;
310 }
311
needSplitByExecSize(G4_ExecSize execSize) const312 bool InstSplitPass::needSplitByExecSize(G4_ExecSize execSize) const
313 {
314 if (getGRFSize() == 64)
315 {
316 return execSize == g4::SIMD32;
317 }
318 return execSize == g4::SIMD16;
319 }
320
321 // Compare regRegion of source operand and destination.
322 // We put this in a separate function since compareOperand from G4_DstRegRegion
323 // and G4_SrcRegRegion don't handle regions that cross 2 GRFs.
compareSrcDstRegRegion(G4_DstRegRegion * dstRegion,G4_Operand * opnd)324 G4_CmpRelation InstSplitPass::compareSrcDstRegRegion(G4_DstRegRegion* dstRegion, G4_Operand* opnd)
325 {
326
327 G4_VarBase* dstBase = dstRegion->getBase();
328 G4_VarBase* srcBase = opnd->getBase();
329 G4_RegAccess dstAcc = dstRegion->getRegAccess();
330 G4_RegAccess srcAcc = opnd->getRegAccess();
331 G4_Declare* dstDcl = dstRegion->getTopDcl();
332 G4_Declare* srcDcl = opnd->getTopDcl();
333
334 if (!opnd->isSrcRegRegion() || dstBase == nullptr || srcBase == nullptr)
335 {
336 // a null base operand can never interfere with anything
337 return Rel_disjoint;
338 }
339
340 if (dstDcl == srcDcl && srcDcl != nullptr)
341 {
342 // special checks for pseudo kills
343 G4_INST* dstInst = dstRegion->getInst();
344 G4_INST* srcInst = opnd->getInst();
345 if (dstInst && (dstInst->isPseudoKill() || dstInst->isLifeTimeEnd()))
346 {
347 return Rel_interfere;
348 }
349
350 if (srcInst && (srcInst->isPseudoKill() || srcInst->isLifeTimeEnd()))
351 {
352 return Rel_interfere;
353 }
354 }
355
356 if (srcAcc == dstAcc && dstAcc != Direct)
357 {
358 // two indirect are assumed to interfere in the absence of pointer analysis
359 return Rel_interfere;
360 }
361 else if (srcAcc != dstAcc)
362 {
363 // direct v. indirect
364 auto mayInterfereWithIndirect = [](G4_Operand* direct, G4_Operand* indirect)
365 {
366 assert((direct->getRegAccess() == Direct && indirect->getRegAccess() == IndirGRF) &&
367 "first opereand should be direct and second indirect");
368 return (direct->getTopDcl() && direct->getTopDcl()->getAddressed()) ||
369 (direct->isAddress() && direct->getTopDcl() == indirect->getTopDcl());
370 };
371
372 if ((srcAcc != Direct && mayInterfereWithIndirect(dstRegion, opnd)) ||
373 (dstAcc != Direct && mayInterfereWithIndirect(opnd, dstRegion)))
374 {
375 return Rel_interfere;
376 }
377 return Rel_disjoint;
378 }
379
380 // Check if both are physically assigned
381 G4_VarBase* dstPhyReg = dstBase->isRegVar() ? dstBase->asRegVar()->getPhyReg() : dstBase;
382 G4_VarBase* srcPhyReg = srcBase->isRegVar() ? srcBase->asRegVar()->getPhyReg() : srcBase;
383 if (dstPhyReg && srcPhyReg)
384 {
385 assert(dstPhyReg->isPhyReg() && srcPhyReg->isPhyReg());
386 if (dstPhyReg->getKind() != srcPhyReg->getKind())
387 return Rel_disjoint;
388
389 if (dstPhyReg->isPhyAreg())
390 {
391 if (dstPhyReg->asAreg()->getArchRegType() == AREG_NULL)
392 {
393 //like NaN, a null ARF is disjoint to everyone including itself
394 return Rel_disjoint;
395 }
396 return (dstPhyReg->asAreg()->getArchRegType() ==
397 srcPhyReg->asAreg()->getArchRegType()) ? Rel_eq : Rel_disjoint;
398 }
399 }
400
401 if (dstBase->getKind() != srcBase->getKind())
402 {
403 return Rel_disjoint;
404 }
405
406 if (dstDcl != srcDcl)
407 {
408 return Rel_disjoint;
409 }
410
411 // Lastly, check byte footprint for exact relation
412 uint32_t srcLeftBound = 0, srcRightBound = 0;
413 int maskSize = 8 * getGRFSize();
414 BitSet srcBitSet(maskSize, false);
415 computeSrcBounds(opnd->asSrcRegRegion(), srcLeftBound, srcRightBound);
416 generateBitMask(opnd, srcBitSet);
417
418 uint32_t dstLeftBound = 0, dstRightBound = 0;
419 BitSet dstBitSet(maskSize, false);
420 computeDstBounds(dstRegion, dstLeftBound, dstRightBound);
421 generateBitMask(dstRegion, dstBitSet);
422
423 if (dstRightBound < srcLeftBound || srcRightBound < dstLeftBound)
424 {
425 return Rel_disjoint;
426 }
427 else if (dstLeftBound == srcLeftBound &&
428 dstRightBound == srcRightBound &&
429 dstBitSet == srcBitSet)
430 {
431 return Rel_eq;
432 }
433 else
434 {
435
436 BitSet tmp = dstBitSet;
437 dstBitSet &= srcBitSet;
438 if (dstBitSet.isEmpty())
439 {
440 return Rel_disjoint;
441 }
442
443 dstBitSet = tmp;
444 dstBitSet -= srcBitSet;
445 if (dstBitSet.isEmpty())
446 {
447 return Rel_lt;
448 }
449 srcBitSet -= tmp;
450 return srcBitSet.isEmpty() ? Rel_gt : Rel_interfere;
451 }
452 }
453
454 // Simplified function to calculate left/right bounds.
455 // InstSplitPass calls this function since the operand's internal computeBound function
456 // carries several aditional calculations and asserts restricted to 2 GRFs.
computeDstBounds(G4_DstRegRegion * dstRegion,uint32_t & leftBound,uint32_t & rightBound)457 void InstSplitPass::computeDstBounds(G4_DstRegRegion* dstRegion, uint32_t& leftBound, uint32_t& rightBound)
458 {
459 unsigned short typeSize = dstRegion->getTypeSize();
460
461 // Calculate left bound
462 {
463 G4_VarBase* base = dstRegion->getBase();
464 G4_Declare* topDcl = NULL;
465 uint32_t subRegOff = dstRegion->getSubRegOff();
466 uint32_t regOff = dstRegion->getRegOff();
467 uint32_t newregoff = regOff, offset = 0;
468 if (base && base->isRegVar())
469 {
470 topDcl = base->asRegVar()->getDeclare();
471 if (!topDcl && base->asRegVar()->isGreg())
472 {
473 newregoff = base->asRegVar()->asGreg()->getRegNum();
474 }
475 }
476
477 if (topDcl)
478 {
479 while (topDcl->getAliasDeclare())
480 {
481 offset += topDcl->getAliasOffset();
482 topDcl = topDcl->getAliasDeclare();
483 }
484 }
485
486 if (base != NULL && base->isAccReg())
487 {
488 leftBound = subRegOff * typeSize;
489 if (base->asAreg()->getArchRegType() == AREG_ACC1 || regOff == 1)
490 {
491 leftBound += getGRFSize();
492 }
493 }
494 else if (topDcl)
495 {
496 if (dstRegion->getRegAccess() == Direct)
497 {
498 leftBound = offset + newregoff * numEltPerGRF<Type_UB>() + subRegOff * typeSize;
499 }
500 else
501 {
502 leftBound = subRegOff * TypeSize(ADDR_REG_TYPE);
503 }
504 }
505 }
506
507 // Calculate right bound
508 {
509 if (dstRegion->getRegAccess() == Direct)
510 {
511 unsigned short s_size = dstRegion->getHorzStride() * typeSize;
512 unsigned totalBytes = (dstRegion->getInst()->getExecSize() - 1) * s_size + typeSize;
513 rightBound = leftBound + totalBytes - 1;
514 dstRegion->getHorzStride();
515 }
516 else
517 {
518 rightBound = leftBound + TypeSize(ADDR_REG_TYPE) - 1;
519 }
520 }
521 }
522
523 // Simplified function to calculate left/right bounds.
524 // InstSplitPass calls this function since the operand's internal computeBound function
525 // carries several aditional calculations and asserts restricted to 2 GRFs.
computeSrcBounds(G4_SrcRegRegion * srcRegion,uint32_t & leftBound,uint32_t & rightBound)526 void InstSplitPass::computeSrcBounds(G4_SrcRegRegion* srcRegion, uint32_t& leftBound, uint32_t& rightBound)
527 {
528 unsigned short typeSize = srcRegion->getTypeSize();
529
530 // Calculate left bound
531 {
532 G4_VarBase* base = srcRegion->getBase();
533 G4_Declare* topDcl = NULL;
534 uint32_t subRegOff = srcRegion->getSubRegOff();
535 uint32_t regOff = srcRegion->getRegOff();
536 unsigned newregoff = regOff, offset = 0;
537
538 if (base)
539 {
540 if (base->isRegVar())
541 {
542 topDcl = base->asRegVar()->getDeclare();
543 if (!topDcl && base->asRegVar()->isGreg())
544 {
545 newregoff = base->asRegVar()->asGreg()->getRegNum();
546 }
547 }
548 }
549
550 if (topDcl)
551 {
552 while (topDcl->getAliasDeclare())
553 {
554 offset += topDcl->getAliasOffset();
555 topDcl = topDcl->getAliasDeclare();
556 }
557 }
558
559 if (base != NULL && base->isAccReg())
560 {
561 leftBound = subRegOff * typeSize;
562 if (base->asAreg()->getArchRegType() == AREG_ACC1)
563 {
564 leftBound += getGRFSize();
565 }
566 }
567 else if (topDcl)
568 {
569 if (srcRegion->getRegAccess() == Direct)
570 {
571 leftBound = offset + newregoff * numEltPerGRF<Type_UB>() + subRegOff * typeSize;
572 }
573 else
574 {
575 leftBound = subRegOff * TypeSize(ADDR_REG_TYPE);
576 }
577 }
578 }
579
580 // Calculate right bound
581 {
582 if (srcRegion->getRegAccess() == Direct)
583 {
584 unsigned short hs = srcRegion->getRegion()->isScalar() ? 1 : srcRegion->getRegion()->horzStride;
585 unsigned short vs = srcRegion->getRegion()->isScalar() ? 0 : srcRegion->getRegion()->vertStride;
586
587 if (srcRegion->getRegion()->isScalar())
588 {
589 rightBound = leftBound + typeSize - 1;
590 }
591 else
592 {
593 int numRows = srcRegion->getInst()->getExecSize() / srcRegion->getRegion()->width;
594 if (numRows > 0)
595 {
596 rightBound = leftBound +
597 (numRows - 1) * vs * typeSize +
598 hs * (srcRegion->getRegion()->width - 1) * typeSize +
599 typeSize - 1;
600 }
601 else
602 {
603 rightBound = leftBound +
604 hs * (srcRegion->getInst()->getExecSize() - 1) * typeSize +
605 typeSize - 1;
606 }
607 }
608 }
609 else
610 {
611 unsigned short numAddrSubReg = 1;
612 if (srcRegion->getRegion()->isRegionWH())
613 {
614 numAddrSubReg = srcRegion->getInst()->getExecSize() / srcRegion->getRegion()->width;
615 }
616 rightBound = leftBound + TypeSize(ADDR_REG_TYPE) * numAddrSubReg - 1;
617 }
618 }
619 }
620
621 // Generates the byte footprint of an instruction's operand
generateBitMask(G4_Operand * opnd,BitSet & footprint)622 void InstSplitPass::generateBitMask(G4_Operand* opnd, BitSet& footprint)
623 {
624 uint64_t bitSeq = TypeFootprint(opnd->getType());
625 unsigned short typeSize = opnd->getTypeSize();
626
627 if (opnd->isDstRegRegion())
628 {
629 if (opnd->getRegAccess() == Direct)
630 {
631 G4_DstRegRegion* dst = opnd->asDstRegRegion();
632 unsigned short horzStride = dst->getHorzStride();
633 unsigned short s_size = horzStride * typeSize;
634 for (uint8_t i = 0; i < opnd->getInst()->getExecSize(); ++i)
635 {
636 int eltOffset = i * s_size;
637 for (uint8_t j = 0; j < typeSize; j++)
638 {
639 footprint.set(eltOffset + j, true);
640 }
641 }
642 }
643 else
644 {
645 footprint.set(0, true);
646 footprint.set(1, true);
647 }
648 }
649 else if (opnd->isSrcRegRegion())
650 {
651 G4_SrcRegRegion* src = opnd->asSrcRegRegion();
652 const RegionDesc* srcReg = src->getRegion();
653 if (opnd->getRegAccess() == Direct)
654 {
655 if (srcReg->isScalar())
656 {
657 uint64_t mask = bitSeq;
658 for (unsigned i = 0; i < typeSize; ++i)
659 {
660 if (mask & (1ULL << i))
661 {
662 footprint.set(i, true);
663 }
664 }
665 }
666 else
667 {
668 for (int i = 0, numRows = opnd->getInst()->getExecSize() / srcReg->width; i < numRows; ++i)
669 {
670 for (int j = 0; j < srcReg->width; ++j)
671 {
672 int eltOffset = i * srcReg->vertStride * typeSize + j * srcReg->horzStride * typeSize;
673 for (uint8_t k = 0; k < typeSize; k++)
674 {
675 footprint.set(eltOffset + k, true);
676 }
677 }
678 }
679 }
680 }
681 else
682 {
683 unsigned short numAddrSubReg = 1;
684 if (srcReg->isRegionWH())
685 {
686 numAddrSubReg = opnd->getInst()->getExecSize() / srcReg->width;
687 }
688 uint64_t mask = 0;
689 for (unsigned i = 0; i < numAddrSubReg; i++)
690 {
691 mask |= ((uint64_t)0x3) << (i * 2);
692 }
693 for (unsigned i = 0; i < 64; ++i)
694 {
695 if (mask & (1ULL << i))
696 {
697 footprint.set(i, true);
698 }
699 }
700 }
701 }
702 }
703