1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 /*========================== begin_copyright_notice ============================
10 
11 This file is distributed under the University of Illinois Open Source License.
12 See LICENSE.TXT for details.
13 
14 ============================= end_copyright_notice ===========================*/
15 
16 //===-------- DeSSA.cpp - divide phi variables into congruent class -------===//
17 //
18 //                     Intel LLVM Extention
19 //===----------------------------------------------------------------------===//
20 //
21 // This pass is originated from the StrongPHIElimination on the machine-ir.
22 // We have adopted it to work on llvm-ir. Also note that we have changed it
23 // from a transformation to an analysis, meaning which only divides phi-vars
24 // into congruent classes, and does NOT insert the copies. A separate code-gen
25 // pass can use this analysis to emit non-ssa target code.
26 //
27 // Algorithm and References:
28 //
29 // This pass consider how to eliminates PHI instructions by aggressively
30 // coalescing the copies that would otherwise be inserted by a naive algorithm
31 // and only inserting the copies that are necessary. The coalescing technique
32 // initially assumes that all registers appearing in a PHI instruction do not
33 // interfere. It then eliminates proven interferences, using dominators to only
34 // perform a linear number of interference tests instead of the quadratic number
35 // of interference tests that this would naively require.
36 // This is a technique derived from:
37 //
38 //    Budimlic, et al. Fast copy coalescing and live-range identification.
39 //    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
40 //    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
41 //    PLDI '02. ACM, New York, NY, 25-32.
42 //
43 // The original implementation constructs a data structure they call a dominance
44 // forest for this purpose. The dominance forest was shown to be unnecessary,
45 // as it is possible to emulate the creation and traversal of a dominance forest
46 // by directly using the dominator tree, rather than actually constructing the
47 // dominance forest.  This technique is explained in:
48 //
49 //   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
50 //     Quality and Efficiency,
51 //   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
52 //   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
53 //   CGO '09. IEEE, Washington, DC, 114-125.
54 //
55 // Careful implementation allows for all of the dominator forest interference
56 // checks to be performed at once in a single depth-first traversal of the
57 // dominator tree, which is what is implemented here.
58 //===----------------------------------------------------------------------===//
59 
60 #include "Compiler/CISACodeGen/DeSSA.hpp"
61 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
62 #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
63 #include "Compiler/MetaDataApi/MetaDataApi.h"
64 #include "common/debug/Debug.hpp"
65 #include "common/debug/Dump.hpp"
66 #include "Compiler/IGCPassSupport.h"
67 #include "common/LLVMWarningsPush.hpp"
68 #include "llvmWrapper/IR/Instructions.h"
69 #include <llvm/IR/InstIterator.h>
70 #include <llvm/IR/InlineAsm.h>
71 #include <llvmWrapper/IR/DerivedTypes.h>
72 #include "common/LLVMWarningsPop.hpp"
73 #include <algorithm>
74 #include "Probe/Assertion.h"
75 
76 using namespace llvm;
77 using namespace IGC;
78 using namespace IGC::Debug;
79 using namespace IGC::IGCMD;
80 
81 #define PASS_FLAG "DeSSA"
82 #define PASS_DESCRIPTION "coalesce moves coming from phi nodes"
83 #define PASS_CFG_ONLY true
84 #define PASS_ANALYSIS true
85 IGC_INITIALIZE_PASS_BEGIN(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
86 IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
87 IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
88 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
89 IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
90 IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
91 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
92 IGC_INITIALIZE_PASS_END(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
93 
94 char DeSSA::ID = 0;
95 
DeSSA()96 DeSSA::DeSSA() : FunctionPass(ID)
97 {
98     initializeDeSSAPass(*PassRegistry::getPassRegistry());
99 }
100 
print(raw_ostream & OS,const Module *) const101 void DeSSA::print(raw_ostream& OS, const Module*) const
102 {
103     // Assign each inst/arg a unique integer so that the output
104     // would be in order. It is useful when doing comparison.
105     DenseMap<const Value*, int> Val2IntMap;
106     int id = 0;
107     if (m_F) {
108         // All arguments
109         for (auto AI = m_F->arg_begin(), AE = m_F->arg_end(); AI != AE; ++AI) {
110             Value* aVal = &*AI;
111             Val2IntMap[aVal] = (++id);
112         }
113         // All instructions
114         for (auto II = inst_begin(m_F), IE = inst_end(m_F); II != IE; ++II) {
115             Instruction* Inst = &*II;
116             Val2IntMap[(Value*)Inst] = (++id);
117         }
118     }
119 
120     bool doSort = (!Val2IntMap.empty());
121 
122     auto valCmp = [&](const Value* V0, const Value* V1) -> bool {
123         int n0 = Val2IntMap[V0];
124         int n1 = Val2IntMap[V1];
125         return n0 < n1;
126     };
127 
128     SmallVector<Value*, 64> ValKeyVec;
129     DenseMap<Value*, SmallVector<Value*, 8> > output;
130     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
131     {
132         OS << "---- AliasMap ----\n\n";
133         for (auto& I : AliasMap) {
134             Value* aliaser = I.first;
135             Value* aliasee = I.second;
136             SmallVector<Value*, 8> & allAliasers = output[aliasee];
137             if (aliaser != aliasee) {
138                 allAliasers.push_back(aliaser);
139             }
140         }
141 
142         for (auto& I : output) {
143             Value* key = I.first;
144             ValKeyVec.push_back(key);
145         }
146         if (doSort) {
147             std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
148         }
149         for (auto& I : ValKeyVec) {
150             Value* aliasee = I;
151             SmallVector<Value*, 8> & allAliasers = output[aliasee];
152 
153             if (doSort) {
154                 std::sort(allAliasers.begin(), allAliasers.end(), valCmp);
155             }
156 
157             OS << "  Aliasee: ";
158             aliasee->print(OS);
159             OS << "\n";
160             for (int i = 0, sz = (int)allAliasers.size(); i < sz; ++i)
161             {
162                 OS << "     ";
163                 allAliasers[i]->print(OS);
164                 OS << "\n";
165             }
166         }
167         OS << "\n\n";
168     }
169 
170     OS << "---- InsEltMap ----\n\n";
171     output.clear();
172     ValKeyVec.clear();
173     for (auto& I : InsEltMap) {
174         Value* val = I.first;
175         Value* rootV = I.second;
176         SmallVector<Value*, 8> & allVals = output[rootV];
177         if (rootV != val) {
178             allVals.push_back(val);
179         }
180     }
181 
182     for (auto& I : output) {
183         Value* key = I.first;
184         ValKeyVec.push_back(key);
185     }
186     if (doSort) {
187         std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
188     }
189     for (auto& I : ValKeyVec) {
190         Value* rootV = I;
191         SmallVector<Value*, 8> & allVals = output[rootV];
192 
193         if (doSort) {
194             std::sort(allVals.begin(), allVals.end(), valCmp);
195         }
196 
197         OS << "  Root Value : ";
198         rootV->print(OS);
199         OS << "\n";
200         for (int i = 0, sz = (int)allVals.size(); i < sz; ++i)
201         {
202             OS << "       ";
203             allVals[i]->print(OS);
204             OS << "\n";
205         }
206     }
207     OS << "\n\n";
208 
209     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
210     {
211         OS << "---- Multi-value Alias (value in both AliasMap & InsEltMap) ----\n";
212 
213         // All InsElt output has been sorted
214         for (auto& I : ValKeyVec) {
215             Value* rootV = I;
216             SmallVector<Value*, 8> & allVals = output[rootV];
217 
218             OS << "  Root Value: ";
219             rootV->printAsOperand(OS);
220             if (isAliasee(rootV)) {
221                 OS << " [aliasee]";
222             }
223 
224             int num = 0;
225             for (int i = 0, sz = (int)allVals.size(); i < sz; ++i)
226             {
227                 Value* val = allVals[i];
228                 if (!isAliasee(val))
229                     continue;
230                 if ((num % 8) == 0) {
231                     OS << "\n       ";
232                 }
233 
234                 allVals[i]->printAsOperand(OS);
235                 OS << " [aliasee]  ";
236                 ++num;
237             }
238             OS << "\n";
239         }
240     }
241     OS << "\n\n";
242 
243     OS << "---- Phi-Var Isolations ----\n";
244     SmallVector<Node*, 64> NodeKeyVec;
245     std::map<Node*, SmallVector<Node*, 8> > nodeOutput;
246     //std::map<Node*, int> LeaderVisited;
247     for (auto I = RegNodeMap.begin(),
248         E = RegNodeMap.end(); I != E; ++I) {
249         Node* N = I->second;
250         // We don't want to change behavior of DeSSA by invoking
251         // dumping/printing functions. Thus, don't use getLeader()
252         // as it has side-effect (doing path halving).
253         Node* Leader = N->parent;
254         while (Leader != Leader->parent) {
255             Leader = Leader->parent;
256         }
257 
258         SmallVector<Node*, 8> & allNodes = nodeOutput[Leader];
259         if (N != Leader) {
260             allNodes.push_back(N);
261         }
262     }
263 
264     auto nodeCmp = [&](const Node* N0, const Node* N1) -> bool {
265         const Value* V0 = N0->value;
266         const Value* V1 = N1->value;
267         return valCmp(V0, V1);
268     };
269 
270     for (auto& I : nodeOutput) {
271         Node* key = I.first;
272         NodeKeyVec.push_back(key);
273     }
274     if (doSort) {
275         std::sort(NodeKeyVec.begin(), NodeKeyVec.end(), nodeCmp);
276     }
277     for (auto& I : NodeKeyVec) {
278         Node* Leader = I;
279         SmallVector<Node*, 8> & allNodes = nodeOutput[Leader];
280         if (doSort) {
281             std::sort(allNodes.begin(), allNodes.end(), nodeCmp);
282         }
283 
284         Value* VL;
285         if (isIsolated(Leader)) {
286             IGC_ASSERT_MESSAGE(allNodes.size() == 0, "ICE: isolated node still in multi-value CC!");
287             VL = Leader->value;
288             OS << "\nVar isolated : ";
289             VL->print(OS);
290             OS << "\n";
291         }
292         else {
293             OS << "\nLeader : ";
294             Leader->value->print(OS);
295             OS << "\n";
296             for (auto& II : allNodes) {
297                 Node* N = II;
298                 VL = N->value;
299                 OS << "    ";
300                 VL->print(OS);
301                 OS << "\n";
302                 N = N->next;
303             }
304         }
305     }
306 }
307 
dump() const308 void DeSSA::dump() const {
309     print(dbgs());
310 }
311 
runOnFunction(Function & MF)312 bool DeSSA::runOnFunction(Function& MF)
313 {
314     m_F = &MF;
315     CurrColor = 0;
316     MetaDataUtils* pMdUtils = nullptr;
317     pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
318     if (pMdUtils->findFunctionsInfoItem(&MF) == pMdUtils->end_FunctionsInfo())
319     {
320         return false;
321     }
322     CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
323     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
324     WIA = &getAnalysis<WIAnalysis>();
325     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
326     CG = &getAnalysis<CodeGenPatternMatch>();
327     DL = &MF.getParent()->getDataLayout();
328     LV = &getAnalysis<LiveVarsAnalysis>().getLiveVars();
329 
330     // make sure we do not run WIAnalysis between CodeGen and DeSSA,
331     // therefore m_program's Uniform Helper is still valid, which is
332     // used indirectly in DeSSA::GetPhiTemp().
333     // If we cannot maintain this assertion, then we should do
334     //   m_program->SetUniformHelper(WIA);
335 
336     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
337     {
338         //
339         // The DeSSA/Coalescing procedure:
340         //   1. Follow Dominance tree to set up alias map. While setting up alias map,
341         //      update liveness for aliasee so that alasee's liveness is the sum of
342         //      all its aliasers.
343         //      By aliaser/aliasee, it means the following:
344         //             aliaser = bitcast aliasee
345         //      (Most aliasing is from bitcast, some can be from other cast instructions
346         //       such as inttoptr/ptrtoint. It could be also from insElt/extElt.)
347         //
348         //      By traversing dominance tree depth-first (DF), it is guaranteed that
349         //      a def will be visited before its use except PHI node. Since PHI inst
350         //      is not a candidate for aliasing, this means that the def of aliasee has
351         //      been visited before the aliaser instruction. For example,
352         //            x = bitcast y
353         //         The def of y should be visited before visiting this bitcast inst.
354         //      Let alias(v0, v1) denote that v0 is an alias to v1. DF dominance-tree
355         //      traversal may not handle aliasing in the following order:
356         //              alias(v0, v1)
357         //              alias(v1, v2)
358         //      rather, it must be in the order
359         //              alias(v1, v2)
360         //              alias(v0, v1)
361         //      By doing DF dominance-tree traversal, this kind of aliasing chain will
362         //      be handled directly.
363         //
364         //   2. Set up InsEltMap, which coalesces vector values used in InsertElement
365         //      instructions. It is treated as "alias", meaning the root value's
366         //      liveness is the sum of all its non-root values. The difference b/w
367         //      AliasMap and InsEltMap is that AliasMap is pure alias in that all
368         //      aliasers have the same values as its aliasee (single-valued, like SSA);
369         //      while InsElt has multiple-valued values. This difference does not matter
370         //      in dessa, but it would matter when handling sub-vector aliasing later.
371         //
372         //      We could remove InsEltMap by adding each value into DeSSA node. To do
373         //      so, dessa traversal needs to be modified to have def of those values
374         //      in PhiSrcDefs. This will generally have a larger CC, which means more
375         //      compiling time.
376         //   3. Make sure DeSSA node only use the node value, that is, given value V,
377         //        its Node value:
378         //                V_aliasee = AliasMap[V] if V is in map, or V otherwise
379         //                node_value = InsEltMap[V_aliasee] if in InsEltMap; or V_aliasee
380         //                             otherwise.
381         //      Note that since the type of aliasess may be different from aliaser,
382         //      the values in the same CC will have different types.  Keep this in mind
383         //      when creating CVariable in GetSymbol().
384         //
385         //  Note that the algorithem forces coalescing of aliasing inst and InsertElement
386         //  inst before PHI-coalescing, which means it favors coaslescing of those aliasing
387         //  inst and InsertElement instructions. Thus, values in AliasMap/InsEltMap are
388         //  guananteed to be coalesced together at the end of DeSSA. PHI coalescing may
389         //  extend those maps by adding other values.
390         //
391         for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
392             DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
393             CoalesceAliasInstForBasicBlock(DI->getBlock());
394         }
395     }
396 
397     for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
398         DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
399         CoalesceInsertElementsForBasicBlock(DI->getBlock());
400     }
401 
402     // checkPHILoopInput
403     //  PreHeader:
404     //      x = ...
405     //  Header:
406     //      phi0 = [x, PreHeader], [t0, End]
407     //      phi1 = [x, PreHeader], [t1, End]
408     //      phi2 = [x, PreHeader], [t2, End]
409     //      ...
410     //  End:
411     //      ...
412     //      goto Header
413     //
414     //  The algorithme below will start with a largest congruent class possible,
415     //  which unions all phi's with the same source operands. This ends up with
416     //  a single congruent class of all phi's with x as their source operand.
417     //  Later, the algorithm isolates phi's as they interfere with each other,
418     //  causing mov instructions to be generated within the loop at BB End.
419     //
420     //  However, since all phi instructions are live at the same time, we will
421     //  not be able to coalesce them. In another word, there is no need to put
422     //  all phi's into the same congruent class in the first place. To achieve
423     //  this, we use a Value-to-int map to keep how many times a value is used
424     //  in the phi's,  and if the number of uses is over a threshold, we will
425     //  isolate the source operand and do not union it with its phi. In doing
426     //  so it is likely for the algorithm to coalesce the phi's dst and the
427     //  other src that is used in the loop, and therefore remove mov instrutions
428     //  in the loop.
429   //
430   //  Note that isolating a value introduce additional copy, thus a threshold
431   //  is used here as a heuristic to try to make sure that a benefit is more
432   //  than the cost.
433     enum { PHI_SRC_USE_THRESHOLD = 3 };  // arbitrary number
434     DenseMap<Value*, int> PHILoopPreHeaderSrcs;
435 
436     // build initial congruent class using union-find
437     for (Function::iterator I = MF.begin(), E = MF.end();
438         I != E; ++I)
439     {
440         // First, initialize PHILoopPreHeaderSrcs map
441         BasicBlock* MBB = &*I;
442         Loop* LP = LI ? LI->getLoopFor(MBB) : nullptr;
443         BasicBlock* PreHeader = LP ? LP->getLoopPredecessor() : nullptr;
444         bool checkPHILoopInput = LP && (LP->getHeader() == MBB) && PreHeader;
445         PHILoopPreHeaderSrcs.clear();
446         if (checkPHILoopInput)
447         {
448             for (BasicBlock::iterator BBI = I->begin(), BBE = I->end();
449                 BBI != BBE; ++BBI) {
450                 PHINode* PHI = dyn_cast<PHINode>(BBI);
451                 if (!PHI) {
452                     break;
453                 }
454 
455                 int srcIx = PHI->getBasicBlockIndex(PreHeader);
456                 if (srcIx < 0) {
457                     continue;
458                 }
459                 Value* SrcVal = PHI->getOperand(srcIx);
460                 if (isa<Constant>(SrcVal)) {
461                     continue;
462                 }
463                 if (PHILoopPreHeaderSrcs.count(SrcVal) == 0) {
464                     PHILoopPreHeaderSrcs[SrcVal] = 0;  // initialize to zero
465                 }
466                 PHILoopPreHeaderSrcs[SrcVal] += 1;
467             }
468         }
469 
470         for (BasicBlock::iterator BBI = I->begin(), BBE = I->end();
471             BBI != BBE; ++BBI) {
472             PHINode* PHI = dyn_cast<PHINode>(BBI);
473             if (!PHI) {
474                 break;
475             }
476 
477             e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, CTX);
478             IGC_ASSERT(PHI == getNodeValue(PHI));
479 
480             addReg(PHI, DefAlign);
481             PHISrcDefs[&(*I)].push_back(PHI);
482 
483             for (unsigned i = 0; i < PHI->getNumOperands(); ++i) {
484                 Value* OrigSrcVal = PHI->getOperand(i);
485                 // skip constant
486                 if (isa<Constant>(OrigSrcVal))
487                     continue;
488 
489                 // condition for preheader-src-isolation
490                 bool PreheaderSrcIsolation = (checkPHILoopInput &&
491                     !isa<InsertElementInst>(OrigSrcVal) && !isa<PHINode>(OrigSrcVal) &&
492                     PHI->getIncomingBlock(i) == PreHeader &&
493                     PHILoopPreHeaderSrcs.count(OrigSrcVal) > 0 &&
494                     PHILoopPreHeaderSrcs[OrigSrcVal] >= PHI_SRC_USE_THRESHOLD);
495                 // add src to the union
496                 Value* SrcVal;
497                 SrcVal = getNodeValue(OrigSrcVal);
498                 e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, CTX);
499                 Instruction* DefMI = dyn_cast<Instruction>(SrcVal);
500                 if (DefMI) {
501                     if (CG->SIMDConstExpr(DefMI)) {
502                         continue;  // special case, simdSize becomes a constant in vISA
503                     }
504                     addReg(SrcVal, SrcAlign);
505                     PHISrcDefs[DefMI->getParent()].push_back(DefMI);
506                     if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
507                         unionRegs(PHI, SrcVal);
508                     }
509                 }
510                 else if (isa<Argument>(SrcVal)) {
511                     addReg(SrcVal, SrcAlign);
512                     PHISrcArgs.insert(SrcVal);
513                     if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
514                         unionRegs(PHI, SrcVal);
515                     }
516                 }
517                 // cases that we need to isolate source
518                 if (CG->IsForceIsolated(SrcVal) || PreheaderSrcIsolation) {
519                     isolateReg(SrcVal);
520                 }
521             } // end of source-operand loop
522             // isolate complex type that IGC does not handle
523             if (PHI->getType()->isStructTy() ||
524                 PHI->getType()->isArrayTy()) {
525                 isolateReg(PHI);
526             }
527         }
528     }
529 
530     // \todo, the original paper talks aibout some before-hand quick
531     // isolation. The idea is to identify those essential splitting first
532     // in order to avoid unnecessary splitting in the next loop.
533 
534     // Perform a depth-first traversal of the dominator tree, splitting
535     // interferences amongst PHI-congruence classes.
536     if (!RegNodeMap.empty()) {
537         DenseMap<int, Value*> CurrentDominatingParent;
538         DenseMap<Value*, Value*> ImmediateDominatingParent;
539         // first, go through the function arguments
540         SplitInterferencesForArgument(CurrentDominatingParent, ImmediateDominatingParent);
541         // Then all the blocks
542         for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
543             DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
544             SplitInterferencesForBasicBlock(DI->getBlock(),
545                 CurrentDominatingParent,
546                 ImmediateDominatingParent);
547         }
548     }
549 
550     // Handle values that have specific alignment requirement.
551     SplitInterferencesForAlignment();
552 
553     if (IGC_IS_FLAG_ENABLED(DumpDeSSA))
554     {
555         const char* fname = MF.getName().data();
556         using namespace IGC::Debug;
557         auto name =
558             DumpName(GetShaderOutputName())
559             .Hash(CTX->hash)
560             .Type(CTX->type)
561             .Pass("dessa")
562             .PostFix(fname)
563             .Retry(CTX->m_retryManager.GetRetryId())
564             .Extension("txt");
565 
566         Dump dessaDump(name, DumpType::DBG_MSG_TEXT);
567 
568         DumpLock();
569         print(dessaDump.stream());
570         DumpUnlock();
571     }
572     m_F = nullptr;
573     return false;
574 }
575 
addReg(Value * Val,e_alignment Align)576 void DeSSA::addReg(Value* Val, e_alignment Align) {
577     if (RegNodeMap.count(Val))
578         return;
579     RegNodeMap[Val] = new (Allocator) Node(Val, ++CurrColor, Align);
580 }
581 // Using Path Halving in union-find
582 DeSSA::Node*
getLeader()583 DeSSA::Node::getLeader() {
584     Node* N = this;
585     Node* Parent = parent;
586     Node* Grandparent = Parent->parent;
587 
588     while (Parent != Grandparent) {
589         N->parent = Grandparent;
590         N = Grandparent;
591         Parent = N->parent;
592         Grandparent = Parent->parent;
593     }
594 
595     return Parent;
596 }
597 
getRegRoot(Value * Val,e_alignment * pAlign) const598 Value* DeSSA::getRegRoot(Value* Val, e_alignment* pAlign) const {
599     auto RI = RegNodeMap.find(Val);
600     if (RI == RegNodeMap.end())
601         return nullptr;
602     Node* TheNode = RI->second;
603     if (isIsolated(TheNode))
604         return nullptr;
605     Node* TheLeader = TheNode->getLeader();
606     if (pAlign)
607         * pAlign = TheLeader->alignment;
608     return TheLeader->value;
609 }
610 
getRootColor(Value * V)611 int DeSSA::getRootColor(Value* V)
612 {
613     auto RI = RegNodeMap.find(V);
614     if (RI == RegNodeMap.end())
615         return 0;
616     Node* TheNode = RI->second;
617     if (isIsolated(TheNode))
618         return 0;
619     Node* TheLeader = TheNode->getLeader();
620     return TheLeader->color;
621 }
622 
unionRegs(Node * Nd1,Node * Nd2)623 void DeSSA::unionRegs(Node* Nd1, Node* Nd2)
624 {
625     Node* N1 = Nd1->getLeader();
626     Node* N2 = Nd2->getLeader();
627     Node* NewLeader = nullptr;
628     Node* Leadee = nullptr;
629 
630     if (N1 == N2)
631         return;
632 
633     if (N1->rank > N2->rank) {
634         NewLeader = N1;
635         Leadee = N2;
636     }
637     else if (N1->rank < N2->rank) {
638         NewLeader = N2;
639         Leadee = N1;
640     }
641     else {
642         NewLeader = N1;
643         Leadee = N2;
644         NewLeader->rank++;
645     }
646 
647     IGC_ASSERT_MESSAGE(nullptr != NewLeader, "ICE: both leader and leadee shall not be null!");
648     IGC_ASSERT_MESSAGE(nullptr != Leadee, "ICE: both leader and leadee shall not be null!");
649     Leadee->parent = NewLeader;
650 
651     // Link the circular list of Leadee right before NewLeader
652     Node* Leadee_prev = Leadee->prev;
653     Node* NewLeader_prev = NewLeader->prev;
654     NewLeader_prev->next = Leadee;
655     Leadee->prev = NewLeader_prev;
656     Leadee_prev->next = NewLeader;
657     NewLeader->prev = Leadee_prev;
658 }
659 
isolateReg(Value * Val)660 void DeSSA::isolateReg(Value* Val) {
661     Node* ND = RegNodeMap[Val];
662     splitNode(ND);
663 }
664 
isIsolated(Value * V) const665 bool DeSSA::isIsolated(Value* V) const {
666     auto RI = RegNodeMap.find(V);
667     if (RI == RegNodeMap.end()) {
668         return true;
669     }
670     Node* DestNode = RI->second;
671     return isIsolated(DestNode);
672 }
673 
674 // Split node ND from its existing congurent class, and the
675 // node ND itself becomes a new single-value congruent class.
splitNode(Node * ND)676 void DeSSA::splitNode(Node* ND)
677 {
678     Node* N = ND->next;
679     if (N == ND) {
680         // ND is already in a single-value congruent class
681         return;
682     }
683 
684     Node* Leader = ND->getLeader();
685 
686     // Remove ND from the congruent class
687     Node* P = ND->prev;
688     N->prev = P;
689     P->next = N;
690 
691     // ND : a new single-value congruent class
692     ND->parent = ND;
693     ND->next = ND;
694     ND->prev = ND;
695     ND->rank = 0;
696 
697     // If leader is removed, need to have a new leader.
698     if (Leader == ND) {
699         // P will be the new leader. Also swap ND's color with P's
700         // so that the original congruent class still have the original
701         // color (this is important as Dom traversal assumes that the
702         // color of any congruent class remains unchanged).
703         int t = P->color;
704         P->color = ND->color;
705         ND->color = t;
706 
707         // New leader
708         Leader = P;
709     }
710 
711     // If ND has children, those children need to set their parent.
712     // Since we don't know if ND has children, we conservatively set
713     // parent for all remaining nodes using "a path compression", so
714     // that all nodes remains in the same rooted tree.
715     N = Leader->next;
716     Leader->parent = Leader;
717     Leader->rank = (Leader == N) ? 0 : 1;
718     while (N != Leader)
719     {
720         N->parent = Leader;
721         N->rank = 0;
722         N = N->next;
723     }
724 }
725 
726 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
727 /// interferences found between registers in the same congruence class. It
728 /// takes two DenseMaps as arguments that it also updates:
729 ///
730 /// 1) CurrentDominatingParent, which maps a color to the register in that
731 ///    congruence class whose definition was most recently seen.
732 ///
733 /// 2) ImmediateDominatingParent, which maps a register to the register in the
734 ///    same congruence class that most immediately dominates it.
735 ///
736 /// This function assumes that it is being called in a depth-first traversal
737 /// of the dominator tree.
738 ///
739 /// The algorithm used here is a generalization of the dominance-based SSA test
740 /// for two variables. If there are variables a_1, ..., a_n such that
741 ///
742 ///   def(a_1) dom ... dom def(a_n),
743 ///
744 /// then we can test for an interference between any two a_i by only using O(n)
745 /// interference tests between pairs of variables. If i < j and a_i and a_j
746 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
747 /// Thus, in order to test for an interference involving a_i, we need only check
748 /// for a potential interference with a_i+1.
749 ///
750 /// This method can be generalized to arbitrary sets of variables by performing
751 /// a depth-first traversal of the dominator tree. As we traverse down a branch
752 /// of the dominator tree, we keep track of the current dominating variable and
753 /// only perform an interference test with that variable. However, when we go to
754 /// another branch of the dominator tree, the definition of the current dominating
755 /// variable may no longer dominate the current block. In order to correct this,
756 /// we need to use a stack of past choices of the current dominating variable
757 /// and pop from this stack until we find a variable whose definition actually
758 /// dominates the current block.
759 ///
760 /// There will be one push on this stack for each variable that has become the
761 /// current dominating variable, so instead of using an explicit stack we can
762 /// simply associate the previous choice for a current dominating variable with
763 /// the new choice. This works better in our implementation, where we test for
764 /// interference in multiple distinct sets at once.
765 void
SplitInterferencesForBasicBlock(BasicBlock * MBB,DenseMap<int,Value * > & CurrentDominatingParent,DenseMap<Value *,Value * > & ImmediateDominatingParent)766 DeSSA::SplitInterferencesForBasicBlock(
767     BasicBlock* MBB,
768     DenseMap<int, Value*>& CurrentDominatingParent,
769     DenseMap<Value*, Value*>& ImmediateDominatingParent) {
770     // Sort defs by their order in the original basic block, as the code below
771     // assumes that it is processing definitions in dominance order.
772     std::vector<Instruction*>& DefInstrs = PHISrcDefs[MBB];
773     std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LV));
774 
775     for (std::vector<Instruction*>::const_iterator BBI = DefInstrs.begin(),
776         BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
777 
778         Instruction* DefMI = *BBI;
779 
780         // If the virtual register being defined is not used in any PHI or has
781         // already been isolated, then there are no more interferences to check.
782         int RootC = getRootColor(DefMI);
783         if (!RootC)
784             continue;
785 
786         // The input to this pass sometimes is not in SSA form in every basic
787         // block, as some virtual registers have redefinitions. We could eliminate
788         // this by fixing the passes that generate the non-SSA code, or we could
789         // handle it here by tracking defining machine instructions rather than
790         // virtual registers. For now, we just handle the situation conservatively
791         // in a way that will possibly lead to false interferences.
792         Value* NewParent = CurrentDominatingParent[RootC];
793         if (NewParent == DefMI)
794             continue;
795 
796         // Pop registers from the stack represented by ImmediateDominatingParent
797         // until we find a parent that dominates the current instruction.
798         while (NewParent) {
799             if (getRootColor(NewParent)) {
800                 // we have added the another condition because the domination-test
801                 // does not work between two phi-node. See the following comments
802                 // from the DT::dominates:
803                 // " It is not possible to determine dominance between two PHI nodes
804                 //   based on their ordering
805                 //  if (isa<PHINode>(A) && isa<PHINode>(B))
806                 //    return false;"
807                 if (isa<Argument>(NewParent)) {
808                     break;
809                 }
810                 else if (DT->dominates(cast<Instruction>(NewParent), DefMI)) {
811                     break;
812                 }
813                 else if (cast<Instruction>(NewParent)->getParent() == MBB &&
814                     isa<PHINode>(DefMI) && isa<PHINode>(NewParent)) {
815                     break;
816                 }
817             }
818             NewParent = ImmediateDominatingParent[NewParent];
819         }
820         // If NewParent is nonzero, then its definition dominates the current
821         // instruction, so it is only necessary to check for the liveness of
822         // NewParent in order to check for an interference.
823         if (NewParent && LV->isLiveAt(NewParent, DefMI)) {
824             // If there is an interference, always isolate the new register. This
825             // could be improved by using a heuristic that decides which of the two
826             // registers to isolate.
827             isolateReg(DefMI);
828             CurrentDominatingParent[RootC] = NewParent;
829         }
830         else {
831             // If there is no interference, update ImmediateDominatingParent and set
832             // the CurrentDominatingParent for this color to the current register.
833             ImmediateDominatingParent[DefMI] = NewParent;
834             CurrentDominatingParent[RootC] = DefMI;
835         }
836     }
837 
838     // We now walk the PHIs in successor blocks and check for interferences. This
839     // is necessary because the use of a PHI's operands are logically contained in
840     // the predecessor block. The def of a PHI's destination register is processed
841     // along with the other defs in a basic block.
842 
843     CurrentPHIForColor.clear();
844 
845     for (succ_iterator SI = succ_begin(MBB), E = succ_end(MBB); SI != E; ++SI) {
846         for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
847             BBI != BBE; ++BBI) {
848             PHINode* PHI = dyn_cast<PHINode>(BBI);
849             if (!PHI) {
850                 break;
851             }
852 
853             int RootC = getRootColor(PHI);
854             // check live-out interference
855             if (IGC_IS_FLAG_ENABLED(EnableDeSSAWA) && !RootC)
856             {
857                 // [todo] delete this code
858                 if (CTX->type == ShaderType::COMPUTE_SHADER)
859                 {
860                     for (unsigned i = 0; !RootC && i < PHI->getNumOperands(); i++) {
861                         Value* SrcVal = PHI->getOperand(i);
862                         if (!isa<Constant>(SrcVal)) {
863                             SrcVal = getNodeValue(SrcVal);
864                             RootC = getRootColor(SrcVal);
865                         }
866                     }
867                 }
868             }
869             if (!RootC) {
870                 continue;
871             }
872             // Find the index of the PHI operand that corresponds to this basic block.
873             unsigned PredIndex;
874             for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) {
875                 if (PHI->getIncomingBlock(PredIndex) == MBB)
876                     break;
877             }
878             IGC_ASSERT(PredIndex < PHI->getNumOperands());
879             Value* PredValue = PHI->getOperand(PredIndex);
880             PredValue = getNodeValue(PredValue);
881             std::pair<Instruction*, Value*>& CurrentPHI = CurrentPHIForColor[RootC];
882             // If two PHIs have the same operand from every shared predecessor, then
883             // they don't actually interfere. Otherwise, isolate the current PHI. This
884             // could possibly be improved, e.g. we could isolate the PHI with the
885             // fewest operands.
886             if (CurrentPHI.first && CurrentPHI.second != PredValue) {
887                 isolateReg(PHI);
888                 continue;
889             }
890             else {
891                 CurrentPHI = std::make_pair(PHI, PredValue);
892             }
893 
894             // check live-out interference
895             // Pop registers from the stack represented by ImmediateDominatingParent
896             // until we find a parent that dominates the current instruction.
897             Value* NewParent = CurrentDominatingParent[RootC];
898             while (NewParent) {
899                 if (getRootColor(NewParent)) {
900                     if (isa<Argument>(NewParent)) {
901                         break;
902                     }
903                     else if (DT->dominates(cast<Instruction>(NewParent)->getParent(), MBB)) {
904                         break;
905                     }
906                 }
907                 NewParent = ImmediateDominatingParent[NewParent];
908             }
909             CurrentDominatingParent[RootC] = NewParent;
910 
911             // If there is an interference with a register, always isolate the
912             // register rather than the PHI. It is also possible to isolate the
913             // PHI, but that introduces copies for all of the registers involved
914             // in that PHI.
915             if (NewParent && NewParent != PredValue && LV->isLiveOut(NewParent, *MBB)) {
916                 isolateReg(NewParent);
917             }
918         }
919     }
920 }
921 
922 void
SplitInterferencesForArgument(DenseMap<int,Value * > & CurrentDominatingParent,DenseMap<Value *,Value * > & ImmediateDominatingParent)923 DeSSA::SplitInterferencesForArgument(
924     DenseMap<int, Value*>& CurrentDominatingParent,
925     DenseMap<Value*, Value*>& ImmediateDominatingParent) {
926     // No two arguments can be in the same congruent class
927     for (auto BBI = PHISrcArgs.begin(),
928         BBE = PHISrcArgs.end(); BBI != BBE; ++BBI) {
929         Value* AV = *BBI;
930         // If the virtual register being defined is not used in any PHI or has
931         // already been isolated, then there are no more interferences to check.
932         int RootC = getRootColor(AV);
933         if (!RootC)
934             continue;
935         Value* NewParent = CurrentDominatingParent[RootC];
936         if (NewParent) {
937             isolateReg(AV);
938         }
939         else {
940             CurrentDominatingParent[RootC] = AV;
941         }
942     }
943 }
944 
945 // [todo] get rid of alignment-based isolation in dessa.
946 // Using alignment in isolation seems over-kill. The right approach
947 // would be one that avoids adding values with conflicting alignment
948 // requirement in the same congruent, not adding them in the same
949 // congruent class first and trying to isolate them later.
SplitInterferencesForAlignment()950 void DeSSA::SplitInterferencesForAlignment()
951 {
952     for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I)
953     {
954         // Find a root Node
955         Node* rootNode = I->second;
956         if (rootNode->parent != rootNode) {
957             continue;
958         }
959 
960         e_alignment Align = EALIGN_AUTO;
961         // Find the most restrictive alignment, i.e. GRF aligned ones.
962         Node* N = rootNode;
963         Node* Curr;
964         do {
965             Curr = N;
966             N = Curr->next;
967             if (Curr->alignment == EALIGN_GRF) {
968                 Align = EALIGN_GRF;
969                 break;
970             }
971         } while (N != rootNode);
972 
973         if (Align != EALIGN_GRF)
974             continue;
975 
976         // Isolate any mis-aligned value.
977         // Start with Curr node as it cannot be isolated
978         // (rootNode could be isolated), therefore, it remains
979         // in the linked list and can be used to test stop looping.
980         Node* Head = Curr;
981         N = Head;
982         do {
983             Curr = N;
984             N = N->next;
985             if (Curr->alignment != EALIGN_AUTO && Curr->alignment != EALIGN_GRF)
986             {
987                 IGC_ASSERT(nullptr != Curr);
988                 IGC_ASSERT_MESSAGE((Curr != Head), "Head Node cannot be isolated, something wrong!");
989                 isolateReg(Curr->value);
990             }
991         } while (N != Head);
992 
993         // Update root's alignment.
994         Head->getLeader()->alignment = Align;
995     }
996 }
997 
998 Value*
getInsEltRoot(Value * Val) const999 DeSSA::getInsEltRoot(Value* Val) const
1000 {
1001     auto RI = InsEltMap.find(Val);
1002     if (RI == InsEltMap.end())
1003         return Val;
1004     return RI->second;
1005 }
1006 
1007 /// <summary>
1008 /// Identify if an instruction has partial write semantics
1009 /// </summary>
1010 /// <param name="Inst"></param>
1011 /// <returns> the index of the source partial-write operand</returns>
1012 static
getPartialWriteSource(Value * Inst)1013 int getPartialWriteSource(Value *Inst)
1014 {
1015     if (isa<InsertElementInst>(Inst))
1016         return 0;  // source 0 is the original value
1017     if (auto CI = dyn_cast<CallInst>(Inst)) {
1018         // only handle inline-asm with simple destination
1019         if (CI->isInlineAsm() && !CI->getType()->isStructTy()) {
1020             InlineAsm* IA = cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
1021             StringRef constraintStr(IA->getConstraintString());
1022             SmallVector<StringRef, 8> constraints;
1023             constraintStr.split(constraints, ',');
1024             for (int i = 0; i < (int)constraints.size(); i++) {
1025                 unsigned destID = 0;
1026                 if (constraints[i].getAsInteger(10, destID) == 0) {
1027                     // constraint-string indicates that source(i-1) and
1028                     // destination should be the same vISA variable
1029                     if (i > 0 && destID == 0)
1030                         return (i - 1);
1031                 }
1032             }
1033         }
1034     }
1035     return -1;
1036 }
1037 
1038 void
CoalesceInsertElementsForBasicBlock(BasicBlock * Blk)1039 DeSSA::CoalesceInsertElementsForBasicBlock(BasicBlock* Blk)
1040 {
1041     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
1042     {
1043         for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1044             BBI != BBE; ++BBI) {
1045             Instruction* Inst = &(*BBI);
1046 
1047             if (!CG->NeedInstruction(*Inst)) {
1048                 continue;
1049             }
1050             // Only Aliasee needs to be handled.
1051             if (getAliasee(Inst) != Inst) {
1052                 continue;
1053             }
1054 
1055             // For keeping the existing behavior of InsEltMap unchanged
1056             auto PWSrcIdx = getPartialWriteSource(Inst);
1057             if (PWSrcIdx >= 0)
1058             {
1059                 Value* origSrcV = Inst->getOperand(PWSrcIdx);
1060                 Value* SrcV = getAliasee(origSrcV);
1061                 if (SrcV != Inst && isArgOrNeededInst(origSrcV))
1062                 {
1063                     // union them
1064                     e_alignment InstAlign = GetPreferredAlignment(Inst, WIA, CTX);
1065                     e_alignment SrcVAlign = GetPreferredAlignment(SrcV, WIA, CTX);
1066                     if (!LV->isLiveAt(SrcV, Inst) &&
1067                         !alignInterfere(InstAlign, SrcVAlign) &&
1068                         (WIA->whichDepend(SrcV) == WIA->whichDepend(Inst)))
1069                     {
1070                         InsEltMapAddValue(SrcV);
1071                         InsEltMapAddValue(Inst);
1072 
1073                         Value* SrcVRoot = getInsEltRoot(SrcV);
1074                         Value* InstRoot = getInsEltRoot(Inst);
1075 
1076                         // union them and their liveness info
1077                         InsEltMapUnionValue(SrcV, Inst);
1078                         LV->mergeUseFrom(SrcVRoot, InstRoot);
1079                     }
1080                 }
1081             }
1082         }
1083         return;
1084     }
1085 
1086     for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1087         BBI != BBE; ++BBI) {
1088         Instruction* Inst = &(*BBI);
1089         // skip phi, phi is handled in the 2nd loop
1090         if (isa<PHINode>(Inst))
1091         {
1092             continue;
1093         }
1094 
1095         // extend the liveness of InsertElement due to union
1096         for (unsigned i = 0; i < Inst->getNumOperands(); ++i) {
1097             Value* SrcV = Inst->getOperand(i);
1098             if (getPartialWriteSource(SrcV) >= 0) {
1099                 Value* RootV = getInsEltRoot(SrcV);
1100                 if (RootV != SrcV) {
1101                     LV->HandleVirtRegUse(RootV, Blk, Inst, true);
1102                 }
1103             }
1104         }
1105 
1106         auto PWSrcIdx = getPartialWriteSource(Inst);
1107         if (PWSrcIdx < 0) {
1108             continue;
1109         }
1110         // handle InsertElement
1111         InsEltMapAddValue(Inst);
1112 
1113         Value* SrcV = Inst->getOperand(PWSrcIdx);
1114         if (isa<Instruction>(SrcV) || isa<Argument>(SrcV)) {
1115             if (!LV->isLiveAt(SrcV, Inst)) {
1116                 Instruction* SrcDef = dyn_cast<Instruction>(SrcV);
1117                 if (SrcDef && WIA->whichDepend(SrcDef) == WIA->whichDepend(Inst)) {
1118                     // passed the liveness and alignment test
1119                     // may need to create a node for srcv, for example, when srcv is phi/arg
1120                     InsEltMapAddValue(SrcV);
1121                     InsEltMapUnionValue(SrcV, Inst);
1122                 }
1123             }
1124         }
1125     }
1126     // look at all the phis in the successor blocks
1127     // extend live-ranges due to the union of insert-element
1128     for (succ_iterator SI = succ_begin(Blk), E = succ_end(Blk); SI != E; ++SI)
1129     {
1130         for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI)
1131         {
1132             PHINode* phi = dyn_cast<PHINode>(BBI);
1133             if (phi)
1134             {
1135                 // extend the liveness of InsertElement due to union
1136                 Value* SrcV = phi->getIncomingValueForBlock(Blk);
1137                 if (isa<InsertElementInst>(SrcV)) {
1138                     Value* RootV = getInsEltRoot(SrcV);
1139                     if (RootV != SrcV) {
1140                         BasicBlock* DefBlk = (isa<Instruction>(RootV)) ?
1141                             cast<Instruction>(RootV)->getParent() : NULL;
1142                         LV->MarkVirtRegAliveInBlock(LV->getLVInfo(RootV), DefBlk, Blk);
1143                     }
1144                 }
1145             }
1146             else
1147             {
1148                 break;
1149             }
1150         }
1151     }
1152 }
1153 
getRootValue(Value * Val,e_alignment * pAlign) const1154 Value* DeSSA::getRootValue(Value* Val, e_alignment* pAlign) const
1155 {
1156     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
1157     {
1158         Value* mapVal = nullptr;
1159         auto AI = AliasMap.find(Val);
1160         if (AI != AliasMap.end()) {
1161             mapVal = AI->second;
1162         }
1163         auto IEI = InsEltMap.find(mapVal ? mapVal : Val);
1164         if (IEI != InsEltMap.end()) {
1165             mapVal = IEI->second;
1166         }
1167         Value* PhiRootVal = getRegRoot(mapVal ? mapVal : Val, pAlign);
1168         return (PhiRootVal ? PhiRootVal : mapVal);
1169     }
1170 
1171     auto RI = InsEltMap.find(Val);
1172     if (RI != InsEltMap.end()) {
1173         Value* InsEltRoot = RI->second;
1174         Value* PhiRootVal = getRegRoot(InsEltRoot, pAlign);
1175         return (PhiRootVal ? PhiRootVal : InsEltRoot);
1176     }
1177     return getRegRoot(Val, pAlign);
1178 }
1179 
getAllValuesInCongruentClass(Value * V,SmallVector<Value *,8> & ValsInCC)1180 void DeSSA::getAllValuesInCongruentClass(
1181     Value* V,
1182     SmallVector<Value*, 8> & ValsInCC)
1183 {
1184     // Handle InsertElement specially. Note that only rootValue from
1185     // a sequence of insertElement is in congruent class. The RootValue
1186     // has its liveness modified to cover all InsertElements that are
1187     // grouped together.
1188     Value* RootV = nullptr;
1189     RootV = getNodeValue(V);
1190 
1191     IGC_ASSERT_MESSAGE(nullptr != RootV, "ICE: Node value should not be nullptr!");
1192     ValsInCC.push_back(RootV);
1193     auto RI = RegNodeMap.find(RootV);
1194     if (RI != RegNodeMap.end()) {
1195         Node* First = RI->second;
1196         for (Node* N = First->next; N != First; N = N->next)
1197         {
1198             ValsInCC.push_back(N->value);
1199         }
1200     }
1201     return;
1202 }
1203 
CoalesceAliasInstForBasicBlock(BasicBlock * Blk)1204 void DeSSA::CoalesceAliasInstForBasicBlock(BasicBlock* Blk)
1205 {
1206     if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) < 2) {
1207         return;
1208     }
1209     for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1210         BBI != BBE; ++BBI) {
1211         Instruction* I = &(*BBI);
1212 
1213         // Now, better to think of code as a sequence Codegen Patterns,
1214         // not a sequence of llvm instructions.
1215         if (!CG->NeedInstruction(*I)) {
1216             continue;
1217         }
1218 
1219         if (InsertElementInst * IEI = dyn_cast<InsertElementInst>(I))
1220         {
1221             if (isa<UndefValue>(I->getOperand(0)))
1222             {
1223                 SmallVector<Value*, 16> AllIEIs;
1224                 int nelts = checkInsertElementAlias(IEI, AllIEIs);
1225                 if (nelts > 1)
1226                 {
1227                     //  Consider the following as an alias if all
1228                     //  Vi, i=0, n-1 (except Vn) has a single use.
1229                     //     V0 = InsElt undef, S0, 0
1230                     //     V1 = InsElt V0, S1, 1
1231                     //     ...
1232                     //     Vn = InsElt Vn-1, Sn, n
1233                     //
1234                     //  AliasMap has the following:
1235                     //     alias(V0, V0)
1236                     //     alias(V1, V0)
1237                     //     alias(V2, V0)
1238                     //     ......
1239                     //     alias(V0, V0)  <-- V0 is the root!
1240                     //
1241                     //  Note that elements could be sparse like
1242                     //     V0 = InsElt Undef, S1, 1
1243                     //     V1 = InsElt V0,    s3, 2
1244                     //
1245                     Value* aliasee = AllIEIs[0];
1246                     AddAlias(aliasee);
1247                     for (int i = 1; i < nelts; ++i) {
1248                         Value* V = AllIEIs[i];
1249                         AliasMap[V] = aliasee;
1250 
1251                         // union liveness info
1252                         LV->mergeUseFrom(aliasee, V);
1253                     }
1254                 }
1255             }
1256         }
1257         else if (CastInst * CastI = dyn_cast<CastInst>(I))
1258         {
1259             if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) < 3) {
1260                 continue;
1261             }
1262 
1263             Value* D = CastI;
1264             Value* S = CastI->getOperand(0);
1265             if (isArgOrNeededInst(S) &&
1266                 WIA->whichDepend(D) == WIA->whichDepend(S) &&
1267                 isNoOpInst(CastI, CTX))
1268             {
1269                 if (AliasMap.count(D) == 0) {
1270                     AddAlias(S);
1271                     Value* aliasee = AliasMap[S];
1272                     AliasMap[D] = aliasee;
1273 
1274                     // D will be deleted due to aliasing
1275                     NoopAliasMap[D] = 1;
1276 
1277                     // union liveness info
1278                     LV->mergeUseFrom(aliasee, D);
1279                 }
1280                 else {
1281                     // Only src operands of a phi can be visited before
1282                     // operands' definition. For other instructions such
1283                     // as castInst, this shall never happen
1284                     IGC_ASSERT_MESSAGE(0, "ICE: Use visited before definition!");
1285                 }
1286             }
1287         }
1288     }
1289 }
1290 
checkInsertElementAlias(InsertElementInst * IEI,SmallVector<Value *,16> & AllIEIs)1291 int DeSSA::checkInsertElementAlias(
1292     InsertElementInst* IEI, SmallVector<Value*, 16> & AllIEIs)
1293 {
1294     IGC_ASSERT(nullptr != IEI);
1295     IGC_ASSERT_MESSAGE(isa<UndefValue>(IEI->getOperand(0)), "ICE: need to pass first IEI as the argument");
1296 
1297     // Find the the alias pattern:
1298     //     V0 = IEI UndefValue, S0, 0
1299     //     V1 = IEI V0,         S1, 1
1300     //     V2 = IEI V1,         S2, 2
1301     //     ......
1302     //     Vn = IEI Vn_1,       Sn_1, n
1303     // All Vi (i=0,n_1, except i=n) has a single-use.
1304     //
1305     // If found, return the actual vector size;
1306     // otherwise, return 0.
1307     IGCLLVM::FixedVectorType* VTy = cast<IGCLLVM::FixedVectorType>(IEI->getType());
1308     IGC_ASSERT(nullptr != VTy);
1309     int nelts = (int)VTy->getNumElements();
1310     AllIEIs.resize(nelts, nullptr);
1311     InsertElementInst* Inst = IEI;
1312     IGC_ASSERT(nullptr != WIA);
1313     WIAnalysis::WIDependancy Dep = WIA->whichDepend(Inst);
1314     while (Inst)
1315     {
1316         // Check if Inst has constant index, stop if not.
1317         // (This is for catching a common case, a variable index
1318         //  can be handled as well if needed.)
1319         ConstantInt* CI = dyn_cast<ConstantInt>(Inst->getOperand(2));
1320         if (!CI) {
1321             return 0;
1322         }
1323         int ix = (int)CI->getZExtValue();
1324         AllIEIs[ix] = Inst;
1325         if (!Inst->hasOneUse() || Dep != WIA->whichDepend(Inst)) {
1326             break;
1327         }
1328         Inst = dyn_cast<InsertElementInst>(Inst->user_back());
1329     }
1330 
1331     // Return the number of elements found
1332     int num = 0;
1333     for (int i = 0; i < nelts; ++i) {
1334         if (AllIEIs[i] == nullptr)
1335             continue;
1336         if (num < i) {
1337             // Pack them
1338             AllIEIs[num] = AllIEIs[i];
1339             AllIEIs[i] = nullptr;
1340         }
1341         ++num;
1342     }
1343     return num;
1344 }
1345 
getAliasee(Value * V) const1346 Value* DeSSA::getAliasee(Value* V) const
1347 {
1348     auto AI = AliasMap.find(V);
1349     if (AI == AliasMap.end())
1350         return V;
1351     return AI->second;
1352 }
1353 
isAliaser(Value * V) const1354 bool DeSSA::isAliaser(Value* V) const
1355 {
1356     auto AI = AliasMap.find(V);
1357     if (AI == AliasMap.end()) {
1358         return false;
1359     }
1360     return AI->first != AI->second;
1361 }
1362 
isNoopAliaser(Value * V) const1363 bool DeSSA::isNoopAliaser(Value* V) const
1364 {
1365     return NoopAliasMap.count(V) > 0;
1366 }
1367 
isAliasee(Value * V) const1368 bool DeSSA::isAliasee(Value* V) const
1369 {
1370     auto AI = AliasMap.find(V);
1371     if (AI == AliasMap.end()) {
1372         return false;
1373     }
1374     return AI->first == AI->second;
1375 }
1376 
1377 // If V is neither InsElt'ed, nor phi-coalesced, it is said to be
1378 // single valued. In another word, if it is at most aliased only,
1379 // it will have a single value during V's lifetime.
isSingleValued(llvm::Value * V) const1380 bool DeSSA::isSingleValued(llvm::Value* V) const
1381 {
1382     Value* aliasee = getAliasee(V);
1383     Value* insEltRootV = getInsEltRoot(aliasee);
1384     if (InsEltMap.count(aliasee) || !isIsolated(insEltRootV)) {
1385         return false;
1386     }
1387     return true;
1388 }
1389 
1390 // The following paper explains an approach to check if two
1391 // congruent classes interfere using a linear approach.
1392 //
1393 //    Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness,
1394 //    Code Quality and Efficiency,
1395 //      In Proceedings of the 7th annual IEEE/ACM International Symposium
1396 //      on Code Generation and Optimization (Seattle, Washington,
1397 //      March 22 - 25, 2009). CGO '09. IEEE, Washington, DC, 114-125.
1398 //
1399 // Here, we simply use a naive pair-wise comparison.
1400 //
1401 // TODO: check if using linear approach described in the paper is
1402 //   necessary;  To do so, it needs to get PN (preorder number of BB)
1403 //   and sort congruent classes before doing interference checking.
interfere(llvm::Value * V0,llvm::Value * V1)1404 bool DeSSA::interfere(llvm::Value* V0, llvm::Value* V1)
1405 {
1406     SmallVector<Value*, 8> allCC0;
1407     SmallVector<Value*, 8> allCC1;
1408     getAllValuesInCongruentClass(V0, allCC0);
1409     getAllValuesInCongruentClass(V1, allCC1);
1410 
1411     for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0)
1412     {
1413         Value* val0 = allCC0[i0];
1414         for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1)
1415         {
1416             Value* val1 = allCC1[i1];
1417             if (LV->hasInterference(val0, val1)) {
1418                 return true;
1419             }
1420         }
1421     }
1422     return false;
1423 }
1424 
1425 // Alias interference checking.
1426 //    The caller is trying to check if V0 can alias to V1. For example,
1427 //      V0 = bitcast V1, or
1428 //      V0 = extractElement V1, ...
1429 //    As V0 and V1 hold the same value, the interference between these two
1430 //    does not matter. Thus, this function is a variant of interfere()
1431 //    with V0 and V1 interference ignored.
aliasInterfere(llvm::Value * V0,llvm::Value * V1)1432 bool DeSSA::aliasInterfere(llvm::Value* V0, llvm::Value* V1)
1433 {
1434     SmallVector<Value*, 8> allCC0;
1435     SmallVector<Value*, 8> allCC1;
1436     getAllValuesInCongruentClass(V0, allCC0);
1437     getAllValuesInCongruentClass(V1, allCC1);
1438 
1439     Value* V0_aliasee = getAliasee(V0);
1440     Value* V1_aliasee = getAliasee(V1);
1441 
1442     //
1443     // If aliasee is in InsEltMap, it is not single valued
1444     // and cannot be excluded from interfere checking.
1445     //
1446     // For example:
1447     //     x = bitcast y
1448     //     z = InsElt y, ...
1449     //       =  x
1450     //       =  y
1451     //
1452     //     {y, z} are coalesced via InsElt, interfere(x, y)
1453     //     must be checked.
1454     // However, if y (and x too) is not in InsEltMap, no need
1455     // to check interfere(x, y) as they have the same value
1456     // as the following:
1457     //     x = bitcast y
1458     //       = x
1459     //       = y
1460     //
1461     bool V0_oneValue = (InsEltMap.count(V0_aliasee) == 0);
1462     bool V1_oneValue = (InsEltMap.count(V1_aliasee) == 0);
1463     bool both_singleValue = (V0_oneValue && V1_oneValue);
1464 
1465     for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0)
1466     {
1467         Value* val0 = allCC0[i0];
1468         for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1)
1469         {
1470             Value* val1 = allCC1[i1];
1471             if (both_singleValue &&
1472                 val0 == V0_aliasee && val1 == V1_aliasee) {
1473                 continue;
1474             }
1475             if (LV->hasInterference(val0, val1)) {
1476                 return true;
1477             }
1478         }
1479     }
1480     return false;
1481 }
1482 
1483 // The existing code does align interference checking. Just
1484 // keep it for now. Likely to improve it later.
alignInterfere(e_alignment a1,e_alignment a2)1485 bool DeSSA::alignInterfere(e_alignment a1, e_alignment a2)
1486 {
1487     if (a1 == EALIGN_GRF && !(a2 == EALIGN_GRF || a2 == EALIGN_AUTO))
1488     {
1489         return true;
1490     }
1491     if (a2 == EALIGN_GRF && !(a1 == EALIGN_GRF || a1 == EALIGN_AUTO))
1492     {
1493         return true;
1494     }
1495     return false;
1496 }
1497