1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "FlowGraph.h"
10 #include "BitSet.h"
11 #include "BuildIR.h"
12 #include "LocalDataflow.h"
13 #include <algorithm>
14 #include <unordered_map>
15 #include <vector>
16 
17 using namespace vISA;
18 
19 namespace {
20 
getOpndFootprint(G4_Operand * opnd,BitSet & footprint)21 void getOpndFootprint(G4_Operand* opnd, BitSet& footprint)
22 {
23     opnd->updateFootPrint(footprint, true);
24 }
25 
26 // Combine footprint.
27 //
28 // Use DefOnpd's foot print to kill the footprint being defined.
29 //
combineFootprint(G4_Operand * DefOpnd,BitSet & footprint)30 void combineFootprint(G4_Operand* DefOpnd, BitSet& footprint)
31 {
32     DefOpnd->updateFootPrint(footprint, false);
33 }
34 
35 struct LocalLivenessInfo;
36 
37 // This is the basic unit used to track live operands (not fully defined yet).
38 struct LiveNode {
LiveNode__anonf501e23d0111::LiveNode39     LiveNode(G4_INST* Inst, Gen4_Operand_Number OpNum)
40         : Inst(Inst)
41         , OpNum(OpNum)
42         , mask(getMaskSize(Inst, OpNum), 0)
43     {
44         auto opnd = getOperand();
45         if (opnd)
46             getOpndFootprint(opnd, mask);
47     }
48 
49     // The instruction being tracked.
50     G4_INST* Inst;
51 
52     // This indicates which operand being tracked.
53     Gen4_Operand_Number OpNum;
54 
55     // This tracks byte/bits level of opnd being partially defined.
56     BitSet mask;
57 
58     // The list of definition nodes seen, sorted in a reversed order.
59     //
60     // This data structure models the following du/ud chain, and it proves
61     // <I3, Src1> is a locally fully-defined operand.
62     //
63     // I1 : mov (8,  M1) V33(0,0)<1>:d V32(0,0)<1;1,0>:q
64     // I2 : mov (8,  M1) V33(1,0)<1>:d V32(2,0)<1;1,0>:q
65     // I3 : add (16, M1) V35<1>:d V34<1;1,0>:d V33(0,0)<1;1,0>:d
66     //
67     // LiveNode <I3, V33> will be partially defined by I2 or I1, and
68     // I1 together with I2 fully defines this live node.
69     //
70     std::vector<std::pair<G4_INST *, Gen4_Operand_Number>> DefNodes;
71 
getOperand__anonf501e23d0111::LiveNode72     G4_Operand *getOperand() const
73     {
74         return Inst->getOperand(OpNum);
75     }
76 
getMaskSize__anonf501e23d0111::LiveNode77     static unsigned getMaskSize(G4_INST *Inst, Gen4_Operand_Number OpNum)
78     {
79         G4_Operand *Opnd = Inst->getOperand(OpNum);
80         assert(Opnd && "null opnd");
81 
82         if (Opnd)
83         {
84             G4_Declare *Dcl = Opnd->getTopDcl();
85             if (Dcl == nullptr) {
86                 // There is no top declaration for this operand, so this is ARF.
87                 return 32;
88             }
89             return Dcl->getRegVar()->isFlag() ? Dcl->getNumberFlagElements()
90                 : Dcl->getByteSize();
91         }
92 
93         return 0;
94     }
95 
96     // Propocess a defintion to this live node. If this definition
97     // together with existing definitions fully kill this use, return true
98     // and add def-use links; return false otherwise.
99     bool addDefinition(G4_INST* Inst, Gen4_Operand_Number opndNum,
100                        LocalLivenessInfo &LLI);
101 
102     // Check if an operand depends on channel mask.
103     static bool dependsOnChannelMask(bool IsInSimdFlow, G4_INST* Inst,
104                                      Gen4_Operand_Number OpNum);
105 
106     // Check if def-use is 'aligned' with channel mask.
107     static bool alignedWithChannelMask(G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
108                                        G4_INST *UseInst, Gen4_Operand_Number UseOpNum);
109 
swap(LiveNode & a,LiveNode & b)110     friend void swap(LiveNode& a, LiveNode& b)
111     {
112         a.DefNodes.swap(b.DefNodes);
113         std::swap(a.Inst, b.Inst);
114         std::swap(a.OpNum, b.OpNum);
115         a.mask.swap(b.mask);
116     }
117 };
118 
119 struct LocalLivenessInfo {
120     // Keep live nodes while scanning the block.
121     // Each declare is associated with a list of live nodes.
122     std::unordered_map<const G4_Declare*, std::vector<LiveNode>> LiveNodes;
123 
124     // This indicates if this block is in simd-cf or not.
125     bool IsInSimdFlow;
126 
127     // Default constructor.
LocalLivenessInfo__anonf501e23d0111::LocalLivenessInfo128     explicit LocalLivenessInfo(bool IsInSimdFlow)
129         : IsInSimdFlow(IsInSimdFlow)
130     {
131     }
132 
133     // Populate global operands.
populateGlobals__anonf501e23d0111::LocalLivenessInfo134     void populateGlobals(GlobalOpndHashTable &globalOpndHT)
135     {
136         for (auto& Nodes : LiveNodes) {
137             for (auto &LN : Nodes.second) {
138                 G4_Operand* Opnd = LN.Inst->getOperand(LN.OpNum);
139                 assert(Opnd && "null operand");
140 
141                 // This is a temporal solution to allow optimizations
142                 // on partially defined local variables.
143                 //
144                 // TODO: use pseudo-kill to mark such variables.
145                 for (auto I = LN.DefNodes.rbegin(), E = LN.DefNodes.rend();
146                      I != E; ++I)
147                     I->first->addDefUse(LN.Inst, LN.OpNum);
148 
149                 globalOpndHT.addGlobalOpnd(Opnd);
150             }
151             Nodes.second.clear();
152         }
153     }
154 };
155 
156 } // namespace
157 
compOpnd(G4_Operand * Opnd1,G4_Operand * Opnd2)158 static G4_CmpRelation compOpnd(G4_Operand* Opnd1, G4_Operand* Opnd2)
159 {
160     if (Opnd1->isDstRegRegion())
161         return Opnd1->asDstRegRegion()->compareOperand(Opnd2);
162     else if (Opnd1->isCondMod())
163         return Opnd1->asCondMod()->compareOperand(Opnd2);
164     else if (Opnd1->isSrcRegRegion())
165         return Opnd1->asSrcRegRegion()->compareOperand(Opnd2);
166     else if (Opnd1->isPredicate())
167         return Opnd1->asPredicate()->compareOperand(Opnd2);
168 
169     // all other cases, virtual call.
170     return Opnd1->compareOperand(Opnd2);
171 }
172 
addDefinition(G4_INST * DefInst,Gen4_Operand_Number DefOpNum,LocalLivenessInfo & LLI)173 bool LiveNode::addDefinition(G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
174                              LocalLivenessInfo &LLI)
175 {
176     // This definition does not overlap with this live node.
177     G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
178     G4_Operand* UseOpnd = getOperand();
179     G4_CmpRelation Rel = compOpnd(DefOpnd, UseOpnd);
180     if (Rel == G4_CmpRelation::Rel_disjoint)
181         return false;
182 
183     // Check if this definition will be fully convered by a previous definition.
184     // This checks a single definition only. It is not optimal, but should cover
185     // common cases.
186     for (auto &Node : DefNodes) {
187         if (Node.second != DefOpNum)
188             continue;
189         G4_INST *PrevDefInst = Node.first;
190         if (PrevDefInst->getPredicate() || DefInst->getPredicate())
191             continue;
192         if (PrevDefInst->getMaskOffset() != DefInst->getMaskOffset())
193             continue;
194         if (!PrevDefInst->isWriteEnableInst() && DefInst->isWriteEnableInst())
195             continue;
196         G4_Operand* PrevDef = PrevDefInst->getOperand(Node.second);
197         G4_Operand* CurrDef = DefInst->getOperand(DefOpNum);
198         assert((PrevDef != nullptr) && (CurrDef != nullptr));
199         G4_CmpRelation DefRel = compOpnd(PrevDef, CurrDef);
200         if (DefRel == G4_CmpRelation::Rel_eq || DefRel == G4_CmpRelation::Rel_gt)
201             return false;
202     }
203 
204     // Determine to count this definition's footprint or not.
205     auto CombineBitV = [=, &LLI]() {
206         // If definition is predicated and it is not sel, then
207         // this definition cannot count, unless use is predicated
208         // by the same predicate value.
209         if (G4_Predicate *DefPred = DefInst->getPredicate()) {
210             if (DefInst->opcode() != G4_opcode::G4_sel) {
211                 // The following case is a full definition, when predicates
212                 // have the same value.
213                 // (+P1) mov (8, M1) V33(0,0)<1>:d 1  // DefInst
214                 // (+P1) add (8, M1) V34(0,0)<1>:d V33(0,0)<1;1,0>:d V32(0,0)<1;1,0>:d
215                 G4_Predicate *UsePred = this->Inst->getPredicate();
216                 if (UsePred == nullptr || !DefPred->samePredicate(*UsePred))
217                     return false;
218 
219                 // If UsePred is alive and has no definition, then UsePred should
220                 // have the same value as DefPred.
221                 G4_Declare *Dcl = UsePred->getTopDcl();
222                 auto Iter = LLI.LiveNodes.find(Dcl);
223                 if (Iter == LLI.LiveNodes.end())
224                     return false;
225 
226                 // Find this live node..
227                 auto NI = std::find_if(Iter->second.begin(), Iter->second.end(),
228                                        [=](const LiveNode &LN) {
229                                            return LN.Inst == this->Inst &&
230                                                   LN.OpNum == Opnd_pred;
231                                        });
232 
233                 // Not alive or alive but partially defined.
234                 if (NI == Iter->second.end() || !NI->DefNodes.empty())
235                     return false;
236             }
237         }
238 
239         // If def does not depend on channel mask, then combine its footprint.
240         if (!dependsOnChannelMask(LLI.IsInSimdFlow, DefInst, DefOpNum))
241             return true;
242 
243         // Otherwise, if this use does not depend on channel mask, then do not combine.
244         if (!dependsOnChannelMask(LLI.IsInSimdFlow, this->Inst, this->OpNum))
245             return false;
246 
247         return alignedWithChannelMask(DefInst, DefOpNum, this->Inst, this->OpNum);
248     };
249 
250     if (CombineBitV()) {
251         G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
252         combineFootprint(DefOpnd, mask);
253     }
254 
255     if (UseOpnd && mask.isEmpty(UseOpnd->getLeftBound(), UseOpnd->getRightBound())) {
256         // Use reverse_iterator as analysis is bottom-up. This makes
257         // early defs come first in the def-use lists.
258         if (!DefInst->isPseudoKill())
259             DefInst->addDefUse(this->Inst, this->OpNum);
260         for (auto I = DefNodes.rbegin(), E = DefNodes.rend(); I != E; ++I)
261             I->first->addDefUse(this->Inst, this->OpNum);
262         return true;
263     }
264 
265     // This live node is not yet fully killed.
266     DefNodes.emplace_back(DefInst, DefOpNum);
267     return false;
268 }
269 
dependsOnChannelMask(bool IsInSimdFlow,G4_INST * Inst,Gen4_Operand_Number OpNum)270 bool LiveNode::dependsOnChannelMask(bool IsInSimdFlow, G4_INST *Inst,
271                                     Gen4_Operand_Number OpNum)
272 {
273     if (!IsInSimdFlow || Inst->isWriteEnableInst())
274         return false;
275 
276     // Otherwise.
277     return true;
278 }
279 
280 // Check if def-use is 'aligned' with channel mask.
281 //
282 // CM = 11110000 (M8)
283 //      00001111 (M0)
284 //
285 // mov (8, M0) V33(0,0)<2>:d 1   // defines V33.{0,2,4,6}
286 // mov (8, M8) V33(0,1)<2>:d 2   // defines V33.{9,11,13,15}
287 // add (16, M0) V34.0<1>:d V33.0<1;1,0>:d 3 // uses V33.{0,1,2,3,12,13,14,15}
288 //
289 // This is not aligned, and it is not a full kill.
290 //
291 // mov (8, M0) V33(0,0)<1>:d 1   // defines V33.{0,1,2,3}
292 // mov (8, M8) V33(1,0)<1>:d 2   // defines V33.{12,13,14,15}
293 // add (16, M0) V34.0<1>:d V33.0<1;1,0>:d 3 // uses V33.{0,1,2,3,12,13,14,15}
294 //
295 // This is aligned, and it is a full kill.
296 //
alignedWithChannelMask(G4_INST * DefInst,Gen4_Operand_Number DefOpNum,G4_INST * UseInst,Gen4_Operand_Number UseOpNum)297 bool LiveNode::alignedWithChannelMask(
298     G4_INST* DefInst, Gen4_Operand_Number DefOpNum,
299     G4_INST* UseInst, Gen4_Operand_Number UseOpNum)
300 {
301     G4_Operand* DefOpnd = DefInst->getOperand(DefOpNum);
302     G4_Operand* UseOpnd = UseInst->getOperand(UseOpNum);
303     unsigned DefLB = DefOpnd->getLeftBound();
304     unsigned UseLB = UseOpnd->getLeftBound();
305     unsigned DefMaskOffset = DefInst->getMaskOffset();
306     unsigned UseMaskOffset = UseInst->getMaskOffset();
307 
308     // Flag is tracking at bit level.
309     G4_Declare *Dcl = DefOpnd->getTopDcl();
310     if (Dcl && Dcl->getRegFile() == G4_RegFileKind::G4_FLAG) {
311         int DefOffset = int(DefLB - DefMaskOffset);
312         int UseOffset = int(UseLB - UseMaskOffset);
313         return DefOffset == UseOffset;
314     }
315 
316     // Do not analyze instructions that may exceed two GRF boundary
317     if (DefInst->mayExceedTwoGRF() || UseInst->mayExceedTwoGRF())
318         return true;
319 
320     // Check that all uses are defined under the righ emask, if defined.
321     unsigned DefStride = 1;
322     if (DefOpnd->isDstRegRegion()) {
323         DefStride = DefOpnd->asDstRegRegion()->getHorzStride();
324     }
325 
326     bool IsContinguous = false;
327     if (UseOpnd->isSrcRegRegion()) {
328         G4_SrcRegRegion* UseReg = UseOpnd->asSrcRegRegion();
329         const RegionDesc* UseRegDesc = UseReg->getRegion();
330         IsContinguous = UseRegDesc->isContiguous(UseInst->getExecSize());
331     }
332 
333     unsigned DefTySz = DefOpnd->getTypeSize();
334     unsigned UseTySz = UseOpnd->getTypeSize();
335 
336     // Common cases.
337     if (DefStride == 1 && IsContinguous && DefTySz == UseTySz) {
338         int DefOffset = int(DefLB - DefMaskOffset * DefTySz);
339         int UseOffset = int(UseLB - UseMaskOffset * UseTySz);
340         return DefOffset == UseOffset;
341     }
342 
343     // Other cases.
344     //
345     // Initial value -1 means byte not defined, for [DefLB, DefRB].
346     std::array<int, 128> DefByteMask;
347     DefByteMask.fill(-1);
348 
349     // Set channel value for each defining byte.
350     int Channel = DefInst->getMaskOffset();
351     for (unsigned i = 0, n = DefInst->getExecSize(); i < n; ++i) {
352         for (unsigned j = 0; j < DefTySz; ++j)
353             DefByteMask[i * DefTySz * DefStride + j] = Channel;
354         ++Channel;
355     }
356 
357     // In general, enumerate elements of use region and for each byte
358     // check if this byte is defined by a correct emask. Note that
359     // it is ok that some bytes are not defined.
360     //
361     Channel = UseInst->getMaskOffset();
362     if (UseOpnd->isSrcRegRegion()) {
363         unsigned DefRB = DefOpnd->getRightBound();
364         auto Desc = UseOpnd->asSrcRegRegion()->getRegion();
365         int hs = Desc->isScalar() ? 1 : Desc->horzStride;
366         int vs = Desc->isScalar() ? 0 : Desc->vertStride;
367         int numRows = UseInst->getExecSize() / Desc->width;
368 
369         for (int i = 0; i < numRows; ++i) {
370             for (int j = 0; j < Desc->width; ++j) {
371                 // Check this element's emask at byte level when defined.
372                 int eltOffset = i * vs * UseTySz + j * hs * UseTySz;
373                 if (UseLB + eltOffset >= DefLB &&
374                     UseLB + eltOffset + UseTySz - 1 <= DefRB) {
375                     int Offset = UseLB + eltOffset - DefLB;
376                     for (unsigned k = 0; k < UseTySz; ++k) {
377                         int Mask = DefByteMask[Offset + k];
378                         if (Mask != -1 && Mask != Channel)
379                             return false;
380                     }
381                 }
382                 ++Channel;
383             }
384         }
385     }
386 
387     return true;
388 }
389 
390 // Remove one element from vector and return the iterator to the next element.
391 template <typename T, typename AllocatorTy>
392 typename std::vector<T, AllocatorTy>::iterator
kill(std::vector<T,AllocatorTy> & Elts,typename std::vector<T,AllocatorTy>::iterator Iter)393 kill(std::vector<T, AllocatorTy>& Elts,
394      typename std::vector<T, AllocatorTy>::iterator Iter)
395 {
396     assert(Iter != Elts.end());
397     assert(!Elts.empty());
398     if (&*Iter == &Elts.back()) {
399         // This is the last element so the next one is none.
400         Elts.pop_back();
401         return Elts.end();
402     }
403 
404     // Not the last one, swap with the tail and keep the iterator unchanged.
405     std::swap(*Iter, Elts.back());
406     Elts.pop_back();
407     return Iter;
408 }
409 
410 // Process reads. This simply creates a new live read node.
411 //
412 // Note that for an indirect operand, its top dcl is its address variable.
413 // This models def-use for the underlying address variable, not the
414 // variable being addressed. An indirect definition introduces a use too.
415 //
processReadOpnds(G4_BB * BB,G4_INST * Inst,LocalLivenessInfo & LLI)416 static void processReadOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI)
417 {
418     if (Inst->isPseudoKill())
419     {
420         return;
421     }
422 
423     // (1) Indirect dst operand reads address.
424     G4_DstRegRegion* Dst = Inst->getDst();
425     if (Dst && Dst->isIndirect()) {
426         G4_Declare* Dcl = Dst->getTopDcl();
427         assert(Dcl && "out of sync");
428         LLI.LiveNodes[Dcl].emplace_back(
429             Inst, Gen4_Operand_Number::Opnd_dst);
430     }
431 
432     // (2) Direct and indirect source operands.
433     for (auto OpNum :
434         { Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
435           Gen4_Operand_Number::Opnd_src2, Gen4_Operand_Number::Opnd_src3,
436           Gen4_Operand_Number::Opnd_src4, Gen4_Operand_Number::Opnd_src5,
437           Gen4_Operand_Number::Opnd_src6, Gen4_Operand_Number::Opnd_src7,
438           Gen4_Operand_Number::Opnd_pred, Gen4_Operand_Number::Opnd_implAccSrc }) {
439         G4_Operand* opnd = Inst->getOperand(OpNum);
440         if (opnd == nullptr || opnd->isImm() || opnd->isNullReg() || opnd->isLabel())
441             continue;
442 
443         if (Inst->isPseudoAddrMovIntrinsic())
444         {
445             G4_Declare* Dcl = opnd->asAddrExp()->getRegVar()->getDeclare();
446             LLI.LiveNodes[Dcl].emplace_back(Inst, OpNum);
447         }
448         else
449         {
450             G4_Declare* Dcl = opnd->getTopDcl();
451             LLI.LiveNodes[Dcl].emplace_back(Inst, OpNum);
452         }
453     }
454 }
455 
456 // Process writes. If this is a partial definition, then record this partial
457 // definition. When all partial definitions together define this live read node,
458 // it is killed and du/ud links are added.
459 //
processWriteOpnds(G4_BB * BB,G4_INST * Inst,LocalLivenessInfo & LLI)460 static void processWriteOpnds(G4_BB *BB, G4_INST *Inst, LocalLivenessInfo &LLI)
461 {
462     if (Inst->isPseudoKill())
463     {
464         return;
465     }
466     for (auto OpNum : { Gen4_Operand_Number::Opnd_dst,
467                         Gen4_Operand_Number::Opnd_condMod,
468                         Gen4_Operand_Number::Opnd_implAccDst }) {
469         G4_Operand* opnd = Inst->getOperand(OpNum);
470         if (opnd == nullptr || opnd->isNullReg())
471             continue;
472 
473         // Do not try to kill uses with indirect definitions.
474         if (opnd->isDstRegRegion() && opnd->asDstRegRegion()->isIndirect())
475             continue;
476 
477         // Iterate all live nodes associated to the same declaration.
478         auto& Nodes = LLI.LiveNodes[opnd->getTopDcl()];
479         for (auto Iter = Nodes.begin(); Iter != Nodes.end(); /*empty*/) {
480             LiveNode &liveNode = *Iter;
481             if (liveNode.addDefinition(Inst, OpNum, LLI)) {
482                 Iter = kill(Nodes, Iter);
483                 continue;
484             }
485             ++Iter;
486         }
487     }
488 }
489 
localDataFlowAnalysis()490 void FlowGraph::localDataFlowAnalysis()
491 {
492     for (auto BB : BBs) {
493         LocalLivenessInfo LLI(!BB->isAllLaneActive());
494         for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
495             G4_INST* Inst = *I;
496             G4_opcode Op = Inst->opcode();
497             if (Op == G4_opcode::G4_return || Op == G4_opcode::G4_label)
498                 continue;
499             if (Inst->isOptBarrier()) {
500                 // Do not try to build def-use accross an optimization barrier,
501                 // and this effectively disables optimizations across it.
502                 LLI.populateGlobals(globalOpndHT);
503 
504                 // A barrier does not kill, but may introduce uses.
505                 processReadOpnds(BB, Inst, LLI);
506                 continue;
507             }
508             processWriteOpnds(BB, Inst, LLI);
509             processReadOpnds(BB, Inst, LLI);
510         }
511 
512         // All left over live nodes are global.
513         LLI.populateGlobals(globalOpndHT);
514 
515         // Sort use lists according to their local ids.
516         // This matches the use list order produced by forward
517         // reaching definition based analysis. It is better for
518         // optimizations not to rely on this order.
519         BB->resetLocalIds();
520         for (auto Inst : *BB) {
521             if (Inst->use_size() > 1) {
522                 using Ty = std::pair<vISA::G4_INST *, Gen4_Operand_Number>;
523                 auto Cmp = [](const Ty &lhs, const Ty &rhs) -> bool {
524                     int lhsID = lhs.first->getLocalId();
525                     int rhsID = rhs.first->getLocalId();
526                     if (lhsID < rhsID)
527                         return true;
528                     else if (lhsID > rhsID)
529                         return false;
530                     return lhs.second < rhs.second;
531                 };
532                 Inst->sortUses(Cmp);
533             }
534         }
535     }
536 }
537 
538 // Reset existing def-use
resetLocalDataFlowData()539 void FlowGraph::resetLocalDataFlowData()
540 {
541     globalOpndHT.clearHashTable();
542     for (auto bb : BBs)
543     {
544         for (auto inst : *bb)
545         {
546             inst->clearDef();
547             inst->clearUse();
548         }
549     }
550 }
551 
analyzeBB(G4_BB * bb)552 void DefEscapeBBAnalysis::analyzeBB(G4_BB* bb)
553 {
554     // active defines in this BB, organized by root declare
555     invalidateBB(bb);
556     std::unordered_map<G4_Declare*, std::vector<G4_INST*> > activeDefines;
557     std::vector<G4_INST*> escapedDefs;
558     for (auto inst : *bb)
559     {
560         if (!inst->getDst())
561         {
562             continue;
563         }
564 
565         // analyze GRF/Address dst
566         auto dst = inst->getDst();
567 
568         if (dst->isIndirect())
569         {
570             escapedDefs.push_back(inst);
571         }
572         else if (dst->getTopDcl())
573         {
574             auto dcl = dst->getTopDcl()->getRootDeclare();
575             auto&& iter = activeDefines.find(dcl);
576             if (iter == activeDefines.end())
577             {
578                 // first define for dcl in this BB
579                 activeDefines[dcl] = { inst };
580             }
581             else
582             {
583                 auto&& vec = iter->second;
584                 // note size may shrink!
585                 for (int i = 0; i < (int)vec.size(); ++i)
586                 {
587 
588                     auto prevInst = vec[i];
589                     if (inst->getMaskOffset() != prevInst->getMaskOffset() ||
590                         (prevInst->isWriteEnableInst() && !inst->isWriteEnableInst()) ||
591                         dst->getExecTypeSize() != prevInst->getDst()->getExecTypeSize())
592                     {
593                         continue;
594                     }
595                     auto rel = dst->compareOperand(prevInst->getDst());
596                     if (rel == Rel_eq || rel == Rel_gt)
597                     {
598                         std::swap(vec[i], vec[vec.size() - 1]);
599                         vec.pop_back();
600 #ifdef _DEBUG
601                         //std::cerr << "Inst:\t";
602                         //prevInst->dump();
603                         //std::cerr << "killed by Inst:\t";
604                         //inst->dump();
605                         auto killIter = killedDefs.find(bb);
606                         if (killIter == killedDefs.end())
607                         {
608                             killedDefs[bb] = { prevInst };
609                         }
610                         else
611                         {
612                             auto&& killedVec = killIter->second;
613                             killedVec.push_back(prevInst);
614                         }
615 #endif
616                     }
617                 }
618                 vec.push_back(inst);
619             }
620         }
621     }
622 
623     for (auto&& iter : activeDefines)
624     {
625         auto&& vec = iter.second;
626         for (auto I : vec)
627         {
628             escapedDefs.push_back(I);
629         }
630     }
631     escapedInsts[bb] = escapedDefs;
632 }
633 
print(std::ostream & OS) const634 void DefEscapeBBAnalysis::print(std::ostream& OS) const
635 {
636     for (auto&& iter : escapedInsts)
637     {
638         G4_BB* bb = iter.first;
639         OS << "BB"  << bb->getId() << ":\n";
640         OS << "Escaped inst:\n";
641         for (auto&& inst : iter.second)
642         {
643             OS << "\t";
644             inst->print(OS);
645         }
646 #ifdef _DEBUG
647         auto&& killIt = killedDefs.find(bb);
648         if (killIt != killedDefs.end())
649         {
650             OS << "Killed inst:\n";
651             for (auto&& inst : killIt->second)
652             {
653                 OS << "\t";
654                 inst->print(OS);
655             }
656         }
657 #endif // _DEBUG
658         OS << "\n";
659     }
660 }
661