1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "FlowGraph.h"
10 #include "BitSet.h"
11 #include "BuildIR.h"
12 #include "LocalDataflow.h"
13 #include <algorithm>
14 #include <unordered_map>
15 #include <vector>
16
17 using namespace vISA;
18
19 namespace {
20
getOpndFootprint(G4_Operand * opnd,BitSet & footprint)21 void getOpndFootprint(G4_Operand* opnd, BitSet& footprint)
22 {
23 opnd->updateFootPrint(footprint, true);
24 }
25
26 // Combine footprint.
27 //
28 // Use DefOnpd's foot print to kill the footprint being defined.
29 //
combineFootprint(G4_Operand * DefOpnd,BitSet & footprint)30 void combineFootprint(G4_Operand* DefOpnd, BitSet& footprint)
31 {
32 DefOpnd->updateFootPrint(footprint, false);
33 }
34
35 struct LocalLivenessInfo;
36
37 // This is the basic unit used to track live operands (not fully defined yet).
38 struct LiveNode {
LiveNode__anonf501e23d0111::LiveNode39 LiveNode(G4_INST* Inst, Gen4_Operand_Number OpNum)
40 : Inst(Inst)
41 , OpNum(OpNum)
42 , mask(getMaskSize(Inst, OpNum), 0)
43 {
44 auto opnd = getOperand();
45 if (opnd)
46 getOpndFootprint(opnd, mask);
47 }
48
49 // The instruction being tracked.
50 G4_INST* Inst;
51
52 // This indicates which operand being tracked.
53 Gen4_Operand_Number OpNum;
54
55 // This tracks byte/bits level of opnd being partially defined.
56 BitSet mask;
57
58 // The list of definition nodes seen, sorted in a reversed order.
59 //
60 // This data structure models the following du/ud chain, and it proves
61 // <I3, Src1> is a locally fully-defined operand.
62 //
63 // I1 : mov (8, M1) V33(0,0)<1>:d V32(0,0)<1;1,0>:q
64 // I2 : mov (8, M1) V33(1,0)<1>:d V32(2,0)<1;1,0>:q
65 // I3 : add (16, M1) V35<1>:d V34<1;1,0>:d V33(0,0)<1;1,0>:d
66 //
67 // LiveNode <I3, V33> will be partially defined by I2 or I1, and
68 // I1 together with I2 fully defines this live node.
69 //
70 std::vector<std::pair<G4_INST *, Gen4_Operand_Number>> DefNodes;
71
getOperand__anonf501e23d0111::LiveNode72 G4_Operand *getOperand() const
73 {
74 return Inst->getOperand(OpNum);
75 }
76
getMaskSize__anonf501e23d0111::LiveNode77 static unsigned getMaskSize(G4_INST *Inst, Gen4_Operand_Number OpNum)
78 {
79 G4_Operand *Opnd = Inst->getOperand(OpNum);
80 assert(Opnd && "null opnd");
81
82 if (Opnd)
83 {
84 G4_Declare *Dcl = Opnd->getTopDcl();
85 if (Dcl == nullptr) {
86 // There is no top declaration for this operand, so this is ARF.
87 return 32;
88 }
89 return Dcl->getRegVar()->isFlag() ? Dcl->getNumberFlagElements()
90 : Dcl->getByteSize();
91 }
92
93 return 0;
94 }
95
96 // Propocess a defintion to this live node. If this definition
97 // together with existing definitions fully kill this use, return true
98 // and add def-use links; return false otherwise.
99 bool addDefinition(G4_INST* Inst, Gen4_Operand_Number opndNum,
100 LocalLivenessInfo &LLI);
101
102 // Check if an operand depends on channel mask.
103 static bool dependsOnChannelMask(bool IsInSimdFlow, G4_INST* Inst,
104 Gen4_Operand_Number OpNum);
105
106 // Check if def-use is 'aligned' with channel mask.
107 static bool alignedWithChannelMask(G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
108 G4_INST *UseInst, Gen4_Operand_Number UseOpNum);
109
swap(LiveNode & a,LiveNode & b)110 friend void swap(LiveNode& a, LiveNode& b)
111 {
112 a.DefNodes.swap(b.DefNodes);
113 std::swap(a.Inst, b.Inst);
114 std::swap(a.OpNum, b.OpNum);
115 a.mask.swap(b.mask);
116 }
117 };
118
119 struct LocalLivenessInfo {
120 // Keep live nodes while scanning the block.
121 // Each declare is associated with a list of live nodes.
122 std::unordered_map<const G4_Declare*, std::vector<LiveNode>> LiveNodes;
123
124 // This indicates if this block is in simd-cf or not.
125 bool IsInSimdFlow;
126
127 // Default constructor.
LocalLivenessInfo__anonf501e23d0111::LocalLivenessInfo128 explicit LocalLivenessInfo(bool IsInSimdFlow)
129 : IsInSimdFlow(IsInSimdFlow)
130 {
131 }
132
133 // Populate global operands.
populateGlobals__anonf501e23d0111::LocalLivenessInfo134 void populateGlobals(GlobalOpndHashTable &globalOpndHT)
135 {
136 for (auto& Nodes : LiveNodes) {
137 for (auto &LN : Nodes.second) {
138 G4_Operand* Opnd = LN.Inst->getOperand(LN.OpNum);
139 assert(Opnd && "null operand");
140
141 // This is a temporal solution to allow optimizations
142 // on partially defined local variables.
143 //
144 // TODO: use pseudo-kill to mark such variables.
145 for (auto I = LN.DefNodes.rbegin(), E = LN.DefNodes.rend();
146 I != E; ++I)
147 I->first->addDefUse(LN.Inst, LN.OpNum);
148
149 globalOpndHT.addGlobalOpnd(Opnd);
150 }
151 Nodes.second.clear();
152 }
153 }
154 };
155
156 } // namespace
157
compOpnd(G4_Operand * Opnd1,G4_Operand * Opnd2)158 static G4_CmpRelation compOpnd(G4_Operand* Opnd1, G4_Operand* Opnd2)
159 {
160 if (Opnd1->isDstRegRegion())
161 return Opnd1->asDstRegRegion()->compareOperand(Opnd2);
162 else if (Opnd1->isCondMod())
163 return Opnd1->asCondMod()->compareOperand(Opnd2);
164 else if (Opnd1->isSrcRegRegion())
165 return Opnd1->asSrcRegRegion()->compareOperand(Opnd2);
166 else if (Opnd1->isPredicate())
167 return Opnd1->asPredicate()->compareOperand(Opnd2);
168
169 // all other cases, virtual call.
170 return Opnd1->compareOperand(Opnd2);
171 }
172
addDefinition(G4_INST * DefInst,Gen4_Operand_Number DefOpNum,LocalLivenessInfo & LLI)173 bool LiveNode::addDefinition(G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
174 LocalLivenessInfo &LLI)
175 {
176 // This definition does not overlap with this live node.
177 G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
178 G4_Operand* UseOpnd = getOperand();
179 G4_CmpRelation Rel = compOpnd(DefOpnd, UseOpnd);
180 if (Rel == G4_CmpRelation::Rel_disjoint)
181 return false;
182
183 // Check if this definition will be fully convered by a previous definition.
184 // This checks a single definition only. It is not optimal, but should cover
185 // common cases.
186 for (auto &Node : DefNodes) {
187 if (Node.second != DefOpNum)
188 continue;
189 G4_INST *PrevDefInst = Node.first;
190 if (PrevDefInst->getPredicate() || DefInst->getPredicate())
191 continue;
192 if (PrevDefInst->getMaskOffset() != DefInst->getMaskOffset())
193 continue;
194 if (!PrevDefInst->isWriteEnableInst() && DefInst->isWriteEnableInst())
195 continue;
196 G4_Operand* PrevDef = PrevDefInst->getOperand(Node.second);
197 G4_Operand* CurrDef = DefInst->getOperand(DefOpNum);
198 assert((PrevDef != nullptr) && (CurrDef != nullptr));
199 G4_CmpRelation DefRel = compOpnd(PrevDef, CurrDef);
200 if (DefRel == G4_CmpRelation::Rel_eq || DefRel == G4_CmpRelation::Rel_gt)
201 return false;
202 }
203
204 // Determine to count this definition's footprint or not.
205 auto CombineBitV = [=, &LLI]() {
206 // If definition is predicated and it is not sel, then
207 // this definition cannot count, unless use is predicated
208 // by the same predicate value.
209 if (G4_Predicate *DefPred = DefInst->getPredicate()) {
210 if (DefInst->opcode() != G4_opcode::G4_sel) {
211 // The following case is a full definition, when predicates
212 // have the same value.
213 // (+P1) mov (8, M1) V33(0,0)<1>:d 1 // DefInst
214 // (+P1) add (8, M1) V34(0,0)<1>:d V33(0,0)<1;1,0>:d V32(0,0)<1;1,0>:d
215 G4_Predicate *UsePred = this->Inst->getPredicate();
216 if (UsePred == nullptr || !DefPred->samePredicate(*UsePred))
217 return false;
218
219 // If UsePred is alive and has no definition, then UsePred should
220 // have the same value as DefPred.
221 G4_Declare *Dcl = UsePred->getTopDcl();
222 auto Iter = LLI.LiveNodes.find(Dcl);
223 if (Iter == LLI.LiveNodes.end())
224 return false;
225
226 // Find this live node..
227 auto NI = std::find_if(Iter->second.begin(), Iter->second.end(),
228 [=](const LiveNode &LN) {
229 return LN.Inst == this->Inst &&
230 LN.OpNum == Opnd_pred;
231 });
232
233 // Not alive or alive but partially defined.
234 if (NI == Iter->second.end() || !NI->DefNodes.empty())
235 return false;
236 }
237 }
238
239 // If def does not depend on channel mask, then combine its footprint.
240 if (!dependsOnChannelMask(LLI.IsInSimdFlow, DefInst, DefOpNum))
241 return true;
242
243 // Otherwise, if this use does not depend on channel mask, then do not combine.
244 if (!dependsOnChannelMask(LLI.IsInSimdFlow, this->Inst, this->OpNum))
245 return false;
246
247 return alignedWithChannelMask(DefInst, DefOpNum, this->Inst, this->OpNum);
248 };
249
250 if (CombineBitV()) {
251 G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
252 combineFootprint(DefOpnd, mask);
253 }
254
255 if (UseOpnd && mask.isEmpty(UseOpnd->getLeftBound(), UseOpnd->getRightBound())) {
256 // Use reverse_iterator as analysis is bottom-up. This makes
257 // early defs come first in the def-use lists.
258 if (!DefInst->isPseudoKill())
259 DefInst->addDefUse(this->Inst, this->OpNum);
260 for (auto I = DefNodes.rbegin(), E = DefNodes.rend(); I != E; ++I)
261 I->first->addDefUse(this->Inst, this->OpNum);
262 return true;
263 }
264
265 // This live node is not yet fully killed.
266 DefNodes.emplace_back(DefInst, DefOpNum);
267 return false;
268 }
269
dependsOnChannelMask(bool IsInSimdFlow,G4_INST * Inst,Gen4_Operand_Number OpNum)270 bool LiveNode::dependsOnChannelMask(bool IsInSimdFlow, G4_INST *Inst,
271 Gen4_Operand_Number OpNum)
272 {
273 if (!IsInSimdFlow || Inst->isWriteEnableInst())
274 return false;
275
276 // Otherwise.
277 return true;
278 }
279
280 // Check if def-use is 'aligned' with channel mask.
281 //
282 // CM = 11110000 (M8)
283 // 00001111 (M0)
284 //
285 // mov (8, M0) V33(0,0)<2>:d 1 // defines V33.{0,2,4,6}
286 // mov (8, M8) V33(0,1)<2>:d 2 // defines V33.{9,11,13,15}
287 // add (16, M0) V34.0<1>:d V33.0<1;1,0>:d 3 // uses V33.{0,1,2,3,12,13,14,15}
288 //
289 // This is not aligned, and it is not a full kill.
290 //
291 // mov (8, M0) V33(0,0)<1>:d 1 // defines V33.{0,1,2,3}
292 // mov (8, M8) V33(1,0)<1>:d 2 // defines V33.{12,13,14,15}
293 // add (16, M0) V34.0<1>:d V33.0<1;1,0>:d 3 // uses V33.{0,1,2,3,12,13,14,15}
294 //
295 // This is aligned, and it is a full kill.
296 //
alignedWithChannelMask(G4_INST * DefInst,Gen4_Operand_Number DefOpNum,G4_INST * UseInst,Gen4_Operand_Number UseOpNum)297 bool LiveNode::alignedWithChannelMask(
298 G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
299 G4_INST* UseInst, Gen4_Operand_Number UseOpNum)
300 {
301 G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
302 G4_Operand* UseOpnd = UseInst->getOperand(UseOpNum);
303 unsigned DefLB = DefOpnd->getLeftBound();
304 unsigned UseLB = UseOpnd->getLeftBound();
305 unsigned DefMaskOffset = DefInst->getMaskOffset();
306 unsigned UseMaskOffset = UseInst->getMaskOffset();
307
308 // Flag is tracking at bit level.
309 G4_Declare *Dcl = DefOpnd->getTopDcl();
310 if (Dcl && Dcl->getRegFile() == G4_RegFileKind::G4_FLAG) {
311 int DefOffset = int(DefLB - DefMaskOffset);
312 int UseOffset = int(UseLB - UseMaskOffset);
313 return DefOffset == UseOffset;
314 }
315
316 // Do not analyze instructions that may exceed two GRF boundary
317 if (DefInst->mayExceedTwoGRF() || UseInst->mayExceedTwoGRF())
318 return true;
319
320 // Check that all uses are defined under the righ emask, if defined.
321 unsigned DefStride = 1;
322 if (DefOpnd->isDstRegRegion()) {
323 DefStride = DefOpnd->asDstRegRegion()->getHorzStride();
324 }
325
326 bool IsContinguous = false;
327 if (UseOpnd->isSrcRegRegion()) {
328 G4_SrcRegRegion* UseReg = UseOpnd->asSrcRegRegion();
329 const RegionDesc* UseRegDesc = UseReg->getRegion();
330 IsContinguous = UseRegDesc->isContiguous(UseInst->getExecSize());
331 }
332
333 unsigned DefTySz = DefOpnd->getTypeSize();
334 unsigned UseTySz = UseOpnd->getTypeSize();
335
336 // Common cases.
337 if (DefStride == 1 && IsContinguous && DefTySz == UseTySz) {
338 int DefOffset = int(DefLB - DefMaskOffset * DefTySz);
339 int UseOffset = int(UseLB - UseMaskOffset * UseTySz);
340 return DefOffset == UseOffset;
341 }
342
343 // Other cases.
344 //
345 // Initial value -1 means byte not defined, for [DefLB, DefRB].
346 std::array<int, 128> DefByteMask;
347 DefByteMask.fill(-1);
348
349 // Set channel value for each defining byte.
350 int Channel = DefInst->getMaskOffset();
351 for (unsigned i = 0, n = DefInst->getExecSize(); i < n; ++i) {
352 for (unsigned j = 0; j < DefTySz; ++j)
353 DefByteMask[i * DefTySz * DefStride + j] = Channel;
354 ++Channel;
355 }
356
357 // In general, enumerate elements of use region and for each byte
358 // check if this byte is defined by a correct emask. Note that
359 // it is ok that some bytes are not defined.
360 //
361 Channel = UseInst->getMaskOffset();
362 if (UseOpnd->isSrcRegRegion()) {
363 unsigned DefRB = DefOpnd->getRightBound();
364 auto Desc = UseOpnd->asSrcRegRegion()->getRegion();
365 int hs = Desc->isScalar() ? 1 : Desc->horzStride;
366 int vs = Desc->isScalar() ? 0 : Desc->vertStride;
367 int numRows = UseInst->getExecSize() / Desc->width;
368
369 for (int i = 0; i < numRows; ++i) {
370 for (int j = 0; j < Desc->width; ++j) {
371 // Check this element's emask at byte level when defined.
372 int eltOffset = i * vs * UseTySz + j * hs * UseTySz;
373 if (UseLB + eltOffset >= DefLB &&
374 UseLB + eltOffset + UseTySz - 1 <= DefRB) {
375 int Offset = UseLB + eltOffset - DefLB;
376 for (unsigned k = 0; k < UseTySz; ++k) {
377 int Mask = DefByteMask[Offset + k];
378 if (Mask != -1 && Mask != Channel)
379 return false;
380 }
381 }
382 ++Channel;
383 }
384 }
385 }
386
387 return true;
388 }
389
390 // Remove one element from vector and return the iterator to the next element.
391 template <typename T, typename AllocatorTy>
392 typename std::vector<T, AllocatorTy>::iterator
kill(std::vector<T,AllocatorTy> & Elts,typename std::vector<T,AllocatorTy>::iterator Iter)393 kill(std::vector<T, AllocatorTy>& Elts,
394 typename std::vector<T, AllocatorTy>::iterator Iter)
395 {
396 assert(Iter != Elts.end());
397 assert(!Elts.empty());
398 if (&*Iter == &Elts.back()) {
399 // This is the last element so the next one is none.
400 Elts.pop_back();
401 return Elts.end();
402 }
403
404 // Not the last one, swap with the tail and keep the iterator unchanged.
405 std::swap(*Iter, Elts.back());
406 Elts.pop_back();
407 return Iter;
408 }
409
410 // Process reads. This simply creates a new live read node.
411 //
412 // Note that for an indirect operand, its top dcl is its address variable.
413 // This models def-use for the underlying address variable, not the
414 // variable being addressed. An indirect definition introduces a use too.
415 //
processReadOpnds(G4_BB * BB,G4_INST * Inst,LocalLivenessInfo & LLI)416 static void processReadOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI)
417 {
418 if (Inst->isPseudoKill())
419 {
420 return;
421 }
422
423 // (1) Indirect dst operand reads address.
424 G4_DstRegRegion* Dst = Inst->getDst();
425 if (Dst && Dst->isIndirect()) {
426 G4_Declare* Dcl = Dst->getTopDcl();
427 assert(Dcl && "out of sync");
428 LLI.LiveNodes[Dcl].emplace_back(
429 Inst, Gen4_Operand_Number::Opnd_dst);
430 }
431
432 // (2) Direct and indirect source operands.
433 for (auto OpNum :
434 { Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
435 Gen4_Operand_Number::Opnd_src2, Gen4_Operand_Number::Opnd_src3,
436 Gen4_Operand_Number::Opnd_src4, Gen4_Operand_Number::Opnd_src5,
437 Gen4_Operand_Number::Opnd_src6, Gen4_Operand_Number::Opnd_src7,
438 Gen4_Operand_Number::Opnd_pred, Gen4_Operand_Number::Opnd_implAccSrc }) {
439 G4_Operand* opnd = Inst->getOperand(OpNum);
440 if (opnd == nullptr || opnd->isImm() || opnd->isNullReg() || opnd->isLabel())
441 continue;
442
443 if (Inst->isPseudoAddrMovIntrinsic())
444 {
445 G4_Declare* Dcl = opnd->asAddrExp()->getRegVar()->getDeclare();
446 LLI.LiveNodes[Dcl].emplace_back(Inst, OpNum);
447 }
448 else
449 {
450 G4_Declare* Dcl = opnd->getTopDcl();
451 LLI.LiveNodes[Dcl].emplace_back(Inst, OpNum);
452 }
453 }
454 }
455
456 // Process writes. If this is a partial definition, then record this partial
457 // definition. When all partial definitions together define this live read node,
458 // it is killed and du/ud links are added.
459 //
processWriteOpnds(G4_BB * BB,G4_INST * Inst,LocalLivenessInfo & LLI)460 static void processWriteOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI)
461 {
462 if (Inst->isPseudoKill())
463 {
464 return;
465 }
466 for (auto OpNum : { Gen4_Operand_Number::Opnd_dst,
467 Gen4_Operand_Number::Opnd_condMod,
468 Gen4_Operand_Number::Opnd_implAccDst }) {
469 G4_Operand* opnd = Inst->getOperand(OpNum);
470 if (opnd == nullptr || opnd->isNullReg())
471 continue;
472
473 // Do not try to kill uses with indirect definitions.
474 if (opnd->isDstRegRegion() && opnd->asDstRegRegion()->isIndirect())
475 continue;
476
477 // Iterate all live nodes associated to the same declaration.
478 auto& Nodes = LLI.LiveNodes[opnd->getTopDcl()];
479 for (auto Iter = Nodes.begin(); Iter != Nodes.end(); /*empty*/) {
480 LiveNode &liveNode = *Iter;
481 if (liveNode.addDefinition(Inst, OpNum, LLI)) {
482 Iter = kill(Nodes, Iter);
483 continue;
484 }
485 ++Iter;
486 }
487 }
488 }
489
localDataFlowAnalysis()490 void FlowGraph::localDataFlowAnalysis()
491 {
492 for (auto BB : BBs) {
493 LocalLivenessInfo LLI(!BB->isAllLaneActive());
494 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
495 G4_INST* Inst = *I;
496 G4_opcode Op = Inst->opcode();
497 if (Op == G4_opcode::G4_return || Op == G4_opcode::G4_label)
498 continue;
499 if (Inst->isOptBarrier()) {
500 // Do not try to build def-use accross an optimization barrier,
501 // and this effectively disables optimizations across it.
502 LLI.populateGlobals(globalOpndHT);
503
504 // A barrier does not kill, but may introduce uses.
505 processReadOpnds(BB, Inst, LLI);
506 continue;
507 }
508 processWriteOpnds(BB, Inst, LLI);
509 processReadOpnds(BB, Inst, LLI);
510 }
511
512 // All left over live nodes are global.
513 LLI.populateGlobals(globalOpndHT);
514
515 // Sort use lists according to their local ids.
516 // This matches the use list order produced by forward
517 // reaching definition based analysis. It is better for
518 // optimizations not to rely on this order.
519 BB->resetLocalIds();
520 for (auto Inst : *BB) {
521 if (Inst->use_size() > 1) {
522 using Ty = std::pair<vISA::G4_INST *, Gen4_Operand_Number>;
523 auto Cmp = [](const Ty &lhs, const Ty &rhs) -> bool {
524 int lhsID = lhs.first->getLocalId();
525 int rhsID = rhs.first->getLocalId();
526 if (lhsID < rhsID)
527 return true;
528 else if (lhsID > rhsID)
529 return false;
530 return lhs.second < rhs.second;
531 };
532 Inst->sortUses(Cmp);
533 }
534 }
535 }
536 }
537
538 // Reset existing def-use
resetLocalDataFlowData()539 void FlowGraph::resetLocalDataFlowData()
540 {
541 globalOpndHT.clearHashTable();
542 for (auto bb : BBs)
543 {
544 for (auto inst : *bb)
545 {
546 inst->clearDef();
547 inst->clearUse();
548 }
549 }
550 }
551
analyzeBB(G4_BB * bb)552 void DefEscapeBBAnalysis::analyzeBB(G4_BB* bb)
553 {
554 // active defines in this BB, organized by root declare
555 invalidateBB(bb);
556 std::unordered_map<G4_Declare*, std::vector<G4_INST*> > activeDefines;
557 std::vector<G4_INST*> escapedDefs;
558 for (auto inst : *bb)
559 {
560 if (!inst->getDst())
561 {
562 continue;
563 }
564
565 // analyze GRF/Address dst
566 auto dst = inst->getDst();
567
568 if (dst->isIndirect())
569 {
570 escapedDefs.push_back(inst);
571 }
572 else if (dst->getTopDcl())
573 {
574 auto dcl = dst->getTopDcl()->getRootDeclare();
575 auto&& iter = activeDefines.find(dcl);
576 if (iter == activeDefines.end())
577 {
578 // first define for dcl in this BB
579 activeDefines[dcl] = { inst };
580 }
581 else
582 {
583 auto&& vec = iter->second;
584 // note size may shrink!
585 for (int i = 0; i < (int)vec.size(); ++i)
586 {
587
588 auto prevInst = vec[i];
589 if (inst->getMaskOffset() != prevInst->getMaskOffset() ||
590 (prevInst->isWriteEnableInst() && !inst->isWriteEnableInst()) ||
591 dst->getExecTypeSize() != prevInst->getDst()->getExecTypeSize())
592 {
593 continue;
594 }
595 auto rel = dst->compareOperand(prevInst->getDst());
596 if (rel == Rel_eq || rel == Rel_gt)
597 {
598 std::swap(vec[i], vec[vec.size() - 1]);
599 vec.pop_back();
600 #ifdef _DEBUG
601 //std::cerr << "Inst:\t";
602 //prevInst->dump();
603 //std::cerr << "killed by Inst:\t";
604 //inst->dump();
605 auto killIter = killedDefs.find(bb);
606 if (killIter == killedDefs.end())
607 {
608 killedDefs[bb] = { prevInst };
609 }
610 else
611 {
612 auto&& killedVec = killIter->second;
613 killedVec.push_back(prevInst);
614 }
615 #endif
616 }
617 }
618 vec.push_back(inst);
619 }
620 }
621 }
622
623 for (auto&& iter : activeDefines)
624 {
625 auto&& vec = iter.second;
626 for (auto I : vec)
627 {
628 escapedDefs.push_back(I);
629 }
630 }
631 escapedInsts[bb] = escapedDefs;
632 }
633
print(std::ostream & OS) const634 void DefEscapeBBAnalysis::print(std::ostream& OS) const
635 {
636 for (auto&& iter : escapedInsts)
637 {
638 G4_BB* bb = iter.first;
639 OS << "BB" << bb->getId() << ":\n";
640 OS << "Escaped inst:\n";
641 for (auto&& inst : iter.second)
642 {
643 OS << "\t";
644 inst->print(OS);
645 }
646 #ifdef _DEBUG
647 auto&& killIt = killedDefs.find(bb);
648 if (killIt != killedDefs.end())
649 {
650 OS << "Killed inst:\n";
651 for (auto&& inst : killIt->second)
652 {
653 OS << "\t";
654 inst->print(OS);
655 }
656 }
657 #endif // _DEBUG
658 OS << "\n";
659 }
660 }
661