1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 /*========================== begin_copyright_notice ============================
10
11 This file is distributed under the University of Illinois Open Source License.
12 See LICENSE.TXT for details.
13
14 ============================= end_copyright_notice ===========================*/
15
16 //===-------- DeSSA.cpp - divide phi variables into congruent class -------===//
17 //
18 // Intel LLVM Extention
19 //===----------------------------------------------------------------------===//
20 //
21 // This pass is originated from the StrongPHIElimination on the machine-ir.
22 // We have adopted it to work on llvm-ir. Also note that we have changed it
23 // from a transformation to an analysis, meaning which only divides phi-vars
24 // into congruent classes, and does NOT insert the copies. A separate code-gen
25 // pass can use this analysis to emit non-ssa target code.
26 //
27 // Algorithm and References:
28 //
29 // This pass consider how to eliminates PHI instructions by aggressively
30 // coalescing the copies that would otherwise be inserted by a naive algorithm
31 // and only inserting the copies that are necessary. The coalescing technique
32 // initially assumes that all registers appearing in a PHI instruction do not
33 // interfere. It then eliminates proven interferences, using dominators to only
34 // perform a linear number of interference tests instead of the quadratic number
35 // of interference tests that this would naively require.
36 // This is a technique derived from:
37 //
38 // Budimlic, et al. Fast copy coalescing and live-range identification.
39 // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
40 // Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
41 // PLDI '02. ACM, New York, NY, 25-32.
42 //
43 // The original implementation constructs a data structure they call a dominance
44 // forest for this purpose. The dominance forest was shown to be unnecessary,
45 // as it is possible to emulate the creation and traversal of a dominance forest
46 // by directly using the dominator tree, rather than actually constructing the
47 // dominance forest. This technique is explained in:
48 //
49 // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
50 // Quality and Efficiency,
51 // In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
52 // Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
53 // CGO '09. IEEE, Washington, DC, 114-125.
54 //
55 // Careful implementation allows for all of the dominator forest interference
56 // checks to be performed at once in a single depth-first traversal of the
57 // dominator tree, which is what is implemented here.
58 //===----------------------------------------------------------------------===//
59
60 #include "Compiler/CISACodeGen/DeSSA.hpp"
61 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
62 #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
63 #include "Compiler/MetaDataApi/MetaDataApi.h"
64 #include "common/debug/Debug.hpp"
65 #include "common/debug/Dump.hpp"
66 #include "Compiler/IGCPassSupport.h"
67 #include "common/LLVMWarningsPush.hpp"
68 #include "llvmWrapper/IR/Instructions.h"
69 #include <llvm/IR/InstIterator.h>
70 #include <llvm/IR/InlineAsm.h>
71 #include <llvmWrapper/IR/DerivedTypes.h>
72 #include "common/LLVMWarningsPop.hpp"
73 #include <algorithm>
74 #include "Probe/Assertion.h"
75
76 using namespace llvm;
77 using namespace IGC;
78 using namespace IGC::Debug;
79 using namespace IGC::IGCMD;
80
81 #define PASS_FLAG "DeSSA"
82 #define PASS_DESCRIPTION "coalesce moves coming from phi nodes"
83 #define PASS_CFG_ONLY true
84 #define PASS_ANALYSIS true
85 IGC_INITIALIZE_PASS_BEGIN(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
86 IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
87 IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
88 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
89 IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
90 IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
91 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
92 IGC_INITIALIZE_PASS_END(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
93
94 char DeSSA::ID = 0;
95
DeSSA()96 DeSSA::DeSSA() : FunctionPass(ID)
97 {
98 initializeDeSSAPass(*PassRegistry::getPassRegistry());
99 }
100
print(raw_ostream & OS,const Module *) const101 void DeSSA::print(raw_ostream& OS, const Module*) const
102 {
103 // Assign each inst/arg a unique integer so that the output
104 // would be in order. It is useful when doing comparison.
105 DenseMap<const Value*, int> Val2IntMap;
106 int id = 0;
107 if (m_F) {
108 // All arguments
109 for (auto AI = m_F->arg_begin(), AE = m_F->arg_end(); AI != AE; ++AI) {
110 Value* aVal = &*AI;
111 Val2IntMap[aVal] = (++id);
112 }
113 // All instructions
114 for (auto II = inst_begin(m_F), IE = inst_end(m_F); II != IE; ++II) {
115 Instruction* Inst = &*II;
116 Val2IntMap[(Value*)Inst] = (++id);
117 }
118 }
119
120 bool doSort = (!Val2IntMap.empty());
121
122 auto valCmp = [&](const Value* V0, const Value* V1) -> bool {
123 int n0 = Val2IntMap[V0];
124 int n1 = Val2IntMap[V1];
125 return n0 < n1;
126 };
127
128 SmallVector<Value*, 64> ValKeyVec;
129 DenseMap<Value*, SmallVector<Value*, 8> > output;
130 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
131 {
132 OS << "---- AliasMap ----\n\n";
133 for (auto& I : AliasMap) {
134 Value* aliaser = I.first;
135 Value* aliasee = I.second;
136 SmallVector<Value*, 8> & allAliasers = output[aliasee];
137 if (aliaser != aliasee) {
138 allAliasers.push_back(aliaser);
139 }
140 }
141
142 for (auto& I : output) {
143 Value* key = I.first;
144 ValKeyVec.push_back(key);
145 }
146 if (doSort) {
147 std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
148 }
149 for (auto& I : ValKeyVec) {
150 Value* aliasee = I;
151 SmallVector<Value*, 8> & allAliasers = output[aliasee];
152
153 if (doSort) {
154 std::sort(allAliasers.begin(), allAliasers.end(), valCmp);
155 }
156
157 OS << " Aliasee: ";
158 aliasee->print(OS);
159 OS << "\n";
160 for (int i = 0, sz = (int)allAliasers.size(); i < sz; ++i)
161 {
162 OS << " ";
163 allAliasers[i]->print(OS);
164 OS << "\n";
165 }
166 }
167 OS << "\n\n";
168 }
169
170 OS << "---- InsEltMap ----\n\n";
171 output.clear();
172 ValKeyVec.clear();
173 for (auto& I : InsEltMap) {
174 Value* val = I.first;
175 Value* rootV = I.second;
176 SmallVector<Value*, 8> & allVals = output[rootV];
177 if (rootV != val) {
178 allVals.push_back(val);
179 }
180 }
181
182 for (auto& I : output) {
183 Value* key = I.first;
184 ValKeyVec.push_back(key);
185 }
186 if (doSort) {
187 std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
188 }
189 for (auto& I : ValKeyVec) {
190 Value* rootV = I;
191 SmallVector<Value*, 8> & allVals = output[rootV];
192
193 if (doSort) {
194 std::sort(allVals.begin(), allVals.end(), valCmp);
195 }
196
197 OS << " Root Value : ";
198 rootV->print(OS);
199 OS << "\n";
200 for (int i = 0, sz = (int)allVals.size(); i < sz; ++i)
201 {
202 OS << " ";
203 allVals[i]->print(OS);
204 OS << "\n";
205 }
206 }
207 OS << "\n\n";
208
209 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
210 {
211 OS << "---- Multi-value Alias (value in both AliasMap & InsEltMap) ----\n";
212
213 // All InsElt output has been sorted
214 for (auto& I : ValKeyVec) {
215 Value* rootV = I;
216 SmallVector<Value*, 8> & allVals = output[rootV];
217
218 OS << " Root Value: ";
219 rootV->printAsOperand(OS);
220 if (isAliasee(rootV)) {
221 OS << " [aliasee]";
222 }
223
224 int num = 0;
225 for (int i = 0, sz = (int)allVals.size(); i < sz; ++i)
226 {
227 Value* val = allVals[i];
228 if (!isAliasee(val))
229 continue;
230 if ((num % 8) == 0) {
231 OS << "\n ";
232 }
233
234 allVals[i]->printAsOperand(OS);
235 OS << " [aliasee] ";
236 ++num;
237 }
238 OS << "\n";
239 }
240 }
241 OS << "\n\n";
242
243 OS << "---- Phi-Var Isolations ----\n";
244 SmallVector<Node*, 64> NodeKeyVec;
245 std::map<Node*, SmallVector<Node*, 8> > nodeOutput;
246 //std::map<Node*, int> LeaderVisited;
247 for (auto I = RegNodeMap.begin(),
248 E = RegNodeMap.end(); I != E; ++I) {
249 Node* N = I->second;
250 // We don't want to change behavior of DeSSA by invoking
251 // dumping/printing functions. Thus, don't use getLeader()
252 // as it has side-effect (doing path halving).
253 Node* Leader = N->parent;
254 while (Leader != Leader->parent) {
255 Leader = Leader->parent;
256 }
257
258 SmallVector<Node*, 8> & allNodes = nodeOutput[Leader];
259 if (N != Leader) {
260 allNodes.push_back(N);
261 }
262 }
263
264 auto nodeCmp = [&](const Node* N0, const Node* N1) -> bool {
265 const Value* V0 = N0->value;
266 const Value* V1 = N1->value;
267 return valCmp(V0, V1);
268 };
269
270 for (auto& I : nodeOutput) {
271 Node* key = I.first;
272 NodeKeyVec.push_back(key);
273 }
274 if (doSort) {
275 std::sort(NodeKeyVec.begin(), NodeKeyVec.end(), nodeCmp);
276 }
277 for (auto& I : NodeKeyVec) {
278 Node* Leader = I;
279 SmallVector<Node*, 8> & allNodes = nodeOutput[Leader];
280 if (doSort) {
281 std::sort(allNodes.begin(), allNodes.end(), nodeCmp);
282 }
283
284 Value* VL;
285 if (isIsolated(Leader)) {
286 IGC_ASSERT_MESSAGE(allNodes.size() == 0, "ICE: isolated node still in multi-value CC!");
287 VL = Leader->value;
288 OS << "\nVar isolated : ";
289 VL->print(OS);
290 OS << "\n";
291 }
292 else {
293 OS << "\nLeader : ";
294 Leader->value->print(OS);
295 OS << "\n";
296 for (auto& II : allNodes) {
297 Node* N = II;
298 VL = N->value;
299 OS << " ";
300 VL->print(OS);
301 OS << "\n";
302 N = N->next;
303 }
304 }
305 }
306 }
307
dump() const308 void DeSSA::dump() const {
309 print(dbgs());
310 }
311
runOnFunction(Function & MF)312 bool DeSSA::runOnFunction(Function& MF)
313 {
314 m_F = &MF;
315 CurrColor = 0;
316 MetaDataUtils* pMdUtils = nullptr;
317 pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
318 if (pMdUtils->findFunctionsInfoItem(&MF) == pMdUtils->end_FunctionsInfo())
319 {
320 return false;
321 }
322 CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
323 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
324 WIA = &getAnalysis<WIAnalysis>();
325 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
326 CG = &getAnalysis<CodeGenPatternMatch>();
327 DL = &MF.getParent()->getDataLayout();
328 LV = &getAnalysis<LiveVarsAnalysis>().getLiveVars();
329
330 // make sure we do not run WIAnalysis between CodeGen and DeSSA,
331 // therefore m_program's Uniform Helper is still valid, which is
332 // used indirectly in DeSSA::GetPhiTemp().
333 // If we cannot maintain this assertion, then we should do
334 // m_program->SetUniformHelper(WIA);
335
336 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
337 {
338 //
339 // The DeSSA/Coalescing procedure:
340 // 1. Follow Dominance tree to set up alias map. While setting up alias map,
341 // update liveness for aliasee so that alasee's liveness is the sum of
342 // all its aliasers.
343 // By aliaser/aliasee, it means the following:
344 // aliaser = bitcast aliasee
345 // (Most aliasing is from bitcast, some can be from other cast instructions
346 // such as inttoptr/ptrtoint. It could be also from insElt/extElt.)
347 //
348 // By traversing dominance tree depth-first (DF), it is guaranteed that
349 // a def will be visited before its use except PHI node. Since PHI inst
350 // is not a candidate for aliasing, this means that the def of aliasee has
351 // been visited before the aliaser instruction. For example,
352 // x = bitcast y
353 // The def of y should be visited before visiting this bitcast inst.
354 // Let alias(v0, v1) denote that v0 is an alias to v1. DF dominance-tree
355 // traversal may not handle aliasing in the following order:
356 // alias(v0, v1)
357 // alias(v1, v2)
358 // rather, it must be in the order
359 // alias(v1, v2)
360 // alias(v0, v1)
361 // By doing DF dominance-tree traversal, this kind of aliasing chain will
362 // be handled directly.
363 //
364 // 2. Set up InsEltMap, which coalesces vector values used in InsertElement
365 // instructions. It is treated as "alias", meaning the root value's
366 // liveness is the sum of all its non-root values. The difference b/w
367 // AliasMap and InsEltMap is that AliasMap is pure alias in that all
368 // aliasers have the same values as its aliasee (single-valued, like SSA);
369 // while InsElt has multiple-valued values. This difference does not matter
370 // in dessa, but it would matter when handling sub-vector aliasing later.
371 //
372 // We could remove InsEltMap by adding each value into DeSSA node. To do
373 // so, dessa traversal needs to be modified to have def of those values
374 // in PhiSrcDefs. This will generally have a larger CC, which means more
375 // compiling time.
376 // 3. Make sure DeSSA node only use the node value, that is, given value V,
377 // its Node value:
378 // V_aliasee = AliasMap[V] if V is in map, or V otherwise
379 // node_value = InsEltMap[V_aliasee] if in InsEltMap; or V_aliasee
380 // otherwise.
381 // Note that since the type of aliasess may be different from aliaser,
382 // the values in the same CC will have different types. Keep this in mind
383 // when creating CVariable in GetSymbol().
384 //
385 // Note that the algorithem forces coalescing of aliasing inst and InsertElement
386 // inst before PHI-coalescing, which means it favors coaslescing of those aliasing
387 // inst and InsertElement instructions. Thus, values in AliasMap/InsEltMap are
388 // guananteed to be coalesced together at the end of DeSSA. PHI coalescing may
389 // extend those maps by adding other values.
390 //
391 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
392 DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
393 CoalesceAliasInstForBasicBlock(DI->getBlock());
394 }
395 }
396
397 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
398 DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
399 CoalesceInsertElementsForBasicBlock(DI->getBlock());
400 }
401
402 // checkPHILoopInput
403 // PreHeader:
404 // x = ...
405 // Header:
406 // phi0 = [x, PreHeader], [t0, End]
407 // phi1 = [x, PreHeader], [t1, End]
408 // phi2 = [x, PreHeader], [t2, End]
409 // ...
410 // End:
411 // ...
412 // goto Header
413 //
414 // The algorithme below will start with a largest congruent class possible,
415 // which unions all phi's with the same source operands. This ends up with
416 // a single congruent class of all phi's with x as their source operand.
417 // Later, the algorithm isolates phi's as they interfere with each other,
418 // causing mov instructions to be generated within the loop at BB End.
419 //
420 // However, since all phi instructions are live at the same time, we will
421 // not be able to coalesce them. In another word, there is no need to put
422 // all phi's into the same congruent class in the first place. To achieve
423 // this, we use a Value-to-int map to keep how many times a value is used
424 // in the phi's, and if the number of uses is over a threshold, we will
425 // isolate the source operand and do not union it with its phi. In doing
426 // so it is likely for the algorithm to coalesce the phi's dst and the
427 // other src that is used in the loop, and therefore remove mov instrutions
428 // in the loop.
429 //
430 // Note that isolating a value introduce additional copy, thus a threshold
431 // is used here as a heuristic to try to make sure that a benefit is more
432 // than the cost.
433 enum { PHI_SRC_USE_THRESHOLD = 3 }; // arbitrary number
434 DenseMap<Value*, int> PHILoopPreHeaderSrcs;
435
436 // build initial congruent class using union-find
437 for (Function::iterator I = MF.begin(), E = MF.end();
438 I != E; ++I)
439 {
440 // First, initialize PHILoopPreHeaderSrcs map
441 BasicBlock* MBB = &*I;
442 Loop* LP = LI ? LI->getLoopFor(MBB) : nullptr;
443 BasicBlock* PreHeader = LP ? LP->getLoopPredecessor() : nullptr;
444 bool checkPHILoopInput = LP && (LP->getHeader() == MBB) && PreHeader;
445 PHILoopPreHeaderSrcs.clear();
446 if (checkPHILoopInput)
447 {
448 for (BasicBlock::iterator BBI = I->begin(), BBE = I->end();
449 BBI != BBE; ++BBI) {
450 PHINode* PHI = dyn_cast<PHINode>(BBI);
451 if (!PHI) {
452 break;
453 }
454
455 int srcIx = PHI->getBasicBlockIndex(PreHeader);
456 if (srcIx < 0) {
457 continue;
458 }
459 Value* SrcVal = PHI->getOperand(srcIx);
460 if (isa<Constant>(SrcVal)) {
461 continue;
462 }
463 if (PHILoopPreHeaderSrcs.count(SrcVal) == 0) {
464 PHILoopPreHeaderSrcs[SrcVal] = 0; // initialize to zero
465 }
466 PHILoopPreHeaderSrcs[SrcVal] += 1;
467 }
468 }
469
470 for (BasicBlock::iterator BBI = I->begin(), BBE = I->end();
471 BBI != BBE; ++BBI) {
472 PHINode* PHI = dyn_cast<PHINode>(BBI);
473 if (!PHI) {
474 break;
475 }
476
477 e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, CTX);
478 IGC_ASSERT(PHI == getNodeValue(PHI));
479
480 addReg(PHI, DefAlign);
481 PHISrcDefs[&(*I)].push_back(PHI);
482
483 for (unsigned i = 0; i < PHI->getNumOperands(); ++i) {
484 Value* OrigSrcVal = PHI->getOperand(i);
485 // skip constant
486 if (isa<Constant>(OrigSrcVal))
487 continue;
488
489 // condition for preheader-src-isolation
490 bool PreheaderSrcIsolation = (checkPHILoopInput &&
491 !isa<InsertElementInst>(OrigSrcVal) && !isa<PHINode>(OrigSrcVal) &&
492 PHI->getIncomingBlock(i) == PreHeader &&
493 PHILoopPreHeaderSrcs.count(OrigSrcVal) > 0 &&
494 PHILoopPreHeaderSrcs[OrigSrcVal] >= PHI_SRC_USE_THRESHOLD);
495 // add src to the union
496 Value* SrcVal;
497 SrcVal = getNodeValue(OrigSrcVal);
498 e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, CTX);
499 Instruction* DefMI = dyn_cast<Instruction>(SrcVal);
500 if (DefMI) {
501 if (CG->SIMDConstExpr(DefMI)) {
502 continue; // special case, simdSize becomes a constant in vISA
503 }
504 addReg(SrcVal, SrcAlign);
505 PHISrcDefs[DefMI->getParent()].push_back(DefMI);
506 if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
507 unionRegs(PHI, SrcVal);
508 }
509 }
510 else if (isa<Argument>(SrcVal)) {
511 addReg(SrcVal, SrcAlign);
512 PHISrcArgs.insert(SrcVal);
513 if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
514 unionRegs(PHI, SrcVal);
515 }
516 }
517 // cases that we need to isolate source
518 if (CG->IsForceIsolated(SrcVal) || PreheaderSrcIsolation) {
519 isolateReg(SrcVal);
520 }
521 } // end of source-operand loop
522 // isolate complex type that IGC does not handle
523 if (PHI->getType()->isStructTy() ||
524 PHI->getType()->isArrayTy()) {
525 isolateReg(PHI);
526 }
527 }
528 }
529
530 // \todo, the original paper talks aibout some before-hand quick
531 // isolation. The idea is to identify those essential splitting first
532 // in order to avoid unnecessary splitting in the next loop.
533
534 // Perform a depth-first traversal of the dominator tree, splitting
535 // interferences amongst PHI-congruence classes.
536 if (!RegNodeMap.empty()) {
537 DenseMap<int, Value*> CurrentDominatingParent;
538 DenseMap<Value*, Value*> ImmediateDominatingParent;
539 // first, go through the function arguments
540 SplitInterferencesForArgument(CurrentDominatingParent, ImmediateDominatingParent);
541 // Then all the blocks
542 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
543 DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
544 SplitInterferencesForBasicBlock(DI->getBlock(),
545 CurrentDominatingParent,
546 ImmediateDominatingParent);
547 }
548 }
549
550 // Handle values that have specific alignment requirement.
551 SplitInterferencesForAlignment();
552
553 if (IGC_IS_FLAG_ENABLED(DumpDeSSA))
554 {
555 const char* fname = MF.getName().data();
556 using namespace IGC::Debug;
557 auto name =
558 DumpName(GetShaderOutputName())
559 .Hash(CTX->hash)
560 .Type(CTX->type)
561 .Pass("dessa")
562 .PostFix(fname)
563 .Retry(CTX->m_retryManager.GetRetryId())
564 .Extension("txt");
565
566 Dump dessaDump(name, DumpType::DBG_MSG_TEXT);
567
568 DumpLock();
569 print(dessaDump.stream());
570 DumpUnlock();
571 }
572 m_F = nullptr;
573 return false;
574 }
575
addReg(Value * Val,e_alignment Align)576 void DeSSA::addReg(Value* Val, e_alignment Align) {
577 if (RegNodeMap.count(Val))
578 return;
579 RegNodeMap[Val] = new (Allocator) Node(Val, ++CurrColor, Align);
580 }
581 // Using Path Halving in union-find
582 DeSSA::Node*
getLeader()583 DeSSA::Node::getLeader() {
584 Node* N = this;
585 Node* Parent = parent;
586 Node* Grandparent = Parent->parent;
587
588 while (Parent != Grandparent) {
589 N->parent = Grandparent;
590 N = Grandparent;
591 Parent = N->parent;
592 Grandparent = Parent->parent;
593 }
594
595 return Parent;
596 }
597
getRegRoot(Value * Val,e_alignment * pAlign) const598 Value* DeSSA::getRegRoot(Value* Val, e_alignment* pAlign) const {
599 auto RI = RegNodeMap.find(Val);
600 if (RI == RegNodeMap.end())
601 return nullptr;
602 Node* TheNode = RI->second;
603 if (isIsolated(TheNode))
604 return nullptr;
605 Node* TheLeader = TheNode->getLeader();
606 if (pAlign)
607 * pAlign = TheLeader->alignment;
608 return TheLeader->value;
609 }
610
getRootColor(Value * V)611 int DeSSA::getRootColor(Value* V)
612 {
613 auto RI = RegNodeMap.find(V);
614 if (RI == RegNodeMap.end())
615 return 0;
616 Node* TheNode = RI->second;
617 if (isIsolated(TheNode))
618 return 0;
619 Node* TheLeader = TheNode->getLeader();
620 return TheLeader->color;
621 }
622
unionRegs(Node * Nd1,Node * Nd2)623 void DeSSA::unionRegs(Node* Nd1, Node* Nd2)
624 {
625 Node* N1 = Nd1->getLeader();
626 Node* N2 = Nd2->getLeader();
627 Node* NewLeader = nullptr;
628 Node* Leadee = nullptr;
629
630 if (N1 == N2)
631 return;
632
633 if (N1->rank > N2->rank) {
634 NewLeader = N1;
635 Leadee = N2;
636 }
637 else if (N1->rank < N2->rank) {
638 NewLeader = N2;
639 Leadee = N1;
640 }
641 else {
642 NewLeader = N1;
643 Leadee = N2;
644 NewLeader->rank++;
645 }
646
647 IGC_ASSERT_MESSAGE(nullptr != NewLeader, "ICE: both leader and leadee shall not be null!");
648 IGC_ASSERT_MESSAGE(nullptr != Leadee, "ICE: both leader and leadee shall not be null!");
649 Leadee->parent = NewLeader;
650
651 // Link the circular list of Leadee right before NewLeader
652 Node* Leadee_prev = Leadee->prev;
653 Node* NewLeader_prev = NewLeader->prev;
654 NewLeader_prev->next = Leadee;
655 Leadee->prev = NewLeader_prev;
656 Leadee_prev->next = NewLeader;
657 NewLeader->prev = Leadee_prev;
658 }
659
isolateReg(Value * Val)660 void DeSSA::isolateReg(Value* Val) {
661 Node* ND = RegNodeMap[Val];
662 splitNode(ND);
663 }
664
isIsolated(Value * V) const665 bool DeSSA::isIsolated(Value* V) const {
666 auto RI = RegNodeMap.find(V);
667 if (RI == RegNodeMap.end()) {
668 return true;
669 }
670 Node* DestNode = RI->second;
671 return isIsolated(DestNode);
672 }
673
674 // Split node ND from its existing congurent class, and the
675 // node ND itself becomes a new single-value congruent class.
splitNode(Node * ND)676 void DeSSA::splitNode(Node* ND)
677 {
678 Node* N = ND->next;
679 if (N == ND) {
680 // ND is already in a single-value congruent class
681 return;
682 }
683
684 Node* Leader = ND->getLeader();
685
686 // Remove ND from the congruent class
687 Node* P = ND->prev;
688 N->prev = P;
689 P->next = N;
690
691 // ND : a new single-value congruent class
692 ND->parent = ND;
693 ND->next = ND;
694 ND->prev = ND;
695 ND->rank = 0;
696
697 // If leader is removed, need to have a new leader.
698 if (Leader == ND) {
699 // P will be the new leader. Also swap ND's color with P's
700 // so that the original congruent class still have the original
701 // color (this is important as Dom traversal assumes that the
702 // color of any congruent class remains unchanged).
703 int t = P->color;
704 P->color = ND->color;
705 ND->color = t;
706
707 // New leader
708 Leader = P;
709 }
710
711 // If ND has children, those children need to set their parent.
712 // Since we don't know if ND has children, we conservatively set
713 // parent for all remaining nodes using "a path compression", so
714 // that all nodes remains in the same rooted tree.
715 N = Leader->next;
716 Leader->parent = Leader;
717 Leader->rank = (Leader == N) ? 0 : 1;
718 while (N != Leader)
719 {
720 N->parent = Leader;
721 N->rank = 0;
722 N = N->next;
723 }
724 }
725
726 /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
727 /// interferences found between registers in the same congruence class. It
728 /// takes two DenseMaps as arguments that it also updates:
729 ///
730 /// 1) CurrentDominatingParent, which maps a color to the register in that
731 /// congruence class whose definition was most recently seen.
732 ///
733 /// 2) ImmediateDominatingParent, which maps a register to the register in the
734 /// same congruence class that most immediately dominates it.
735 ///
736 /// This function assumes that it is being called in a depth-first traversal
737 /// of the dominator tree.
738 ///
739 /// The algorithm used here is a generalization of the dominance-based SSA test
740 /// for two variables. If there are variables a_1, ..., a_n such that
741 ///
742 /// def(a_1) dom ... dom def(a_n),
743 ///
744 /// then we can test for an interference between any two a_i by only using O(n)
745 /// interference tests between pairs of variables. If i < j and a_i and a_j
746 /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
747 /// Thus, in order to test for an interference involving a_i, we need only check
748 /// for a potential interference with a_i+1.
749 ///
750 /// This method can be generalized to arbitrary sets of variables by performing
751 /// a depth-first traversal of the dominator tree. As we traverse down a branch
752 /// of the dominator tree, we keep track of the current dominating variable and
753 /// only perform an interference test with that variable. However, when we go to
754 /// another branch of the dominator tree, the definition of the current dominating
755 /// variable may no longer dominate the current block. In order to correct this,
756 /// we need to use a stack of past choices of the current dominating variable
757 /// and pop from this stack until we find a variable whose definition actually
758 /// dominates the current block.
759 ///
760 /// There will be one push on this stack for each variable that has become the
761 /// current dominating variable, so instead of using an explicit stack we can
762 /// simply associate the previous choice for a current dominating variable with
763 /// the new choice. This works better in our implementation, where we test for
764 /// interference in multiple distinct sets at once.
765 void
SplitInterferencesForBasicBlock(BasicBlock * MBB,DenseMap<int,Value * > & CurrentDominatingParent,DenseMap<Value *,Value * > & ImmediateDominatingParent)766 DeSSA::SplitInterferencesForBasicBlock(
767 BasicBlock* MBB,
768 DenseMap<int, Value*>& CurrentDominatingParent,
769 DenseMap<Value*, Value*>& ImmediateDominatingParent) {
770 // Sort defs by their order in the original basic block, as the code below
771 // assumes that it is processing definitions in dominance order.
772 std::vector<Instruction*>& DefInstrs = PHISrcDefs[MBB];
773 std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LV));
774
775 for (std::vector<Instruction*>::const_iterator BBI = DefInstrs.begin(),
776 BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
777
778 Instruction* DefMI = *BBI;
779
780 // If the virtual register being defined is not used in any PHI or has
781 // already been isolated, then there are no more interferences to check.
782 int RootC = getRootColor(DefMI);
783 if (!RootC)
784 continue;
785
786 // The input to this pass sometimes is not in SSA form in every basic
787 // block, as some virtual registers have redefinitions. We could eliminate
788 // this by fixing the passes that generate the non-SSA code, or we could
789 // handle it here by tracking defining machine instructions rather than
790 // virtual registers. For now, we just handle the situation conservatively
791 // in a way that will possibly lead to false interferences.
792 Value* NewParent = CurrentDominatingParent[RootC];
793 if (NewParent == DefMI)
794 continue;
795
796 // Pop registers from the stack represented by ImmediateDominatingParent
797 // until we find a parent that dominates the current instruction.
798 while (NewParent) {
799 if (getRootColor(NewParent)) {
800 // we have added the another condition because the domination-test
801 // does not work between two phi-node. See the following comments
802 // from the DT::dominates:
803 // " It is not possible to determine dominance between two PHI nodes
804 // based on their ordering
805 // if (isa<PHINode>(A) && isa<PHINode>(B))
806 // return false;"
807 if (isa<Argument>(NewParent)) {
808 break;
809 }
810 else if (DT->dominates(cast<Instruction>(NewParent), DefMI)) {
811 break;
812 }
813 else if (cast<Instruction>(NewParent)->getParent() == MBB &&
814 isa<PHINode>(DefMI) && isa<PHINode>(NewParent)) {
815 break;
816 }
817 }
818 NewParent = ImmediateDominatingParent[NewParent];
819 }
820 // If NewParent is nonzero, then its definition dominates the current
821 // instruction, so it is only necessary to check for the liveness of
822 // NewParent in order to check for an interference.
823 if (NewParent && LV->isLiveAt(NewParent, DefMI)) {
824 // If there is an interference, always isolate the new register. This
825 // could be improved by using a heuristic that decides which of the two
826 // registers to isolate.
827 isolateReg(DefMI);
828 CurrentDominatingParent[RootC] = NewParent;
829 }
830 else {
831 // If there is no interference, update ImmediateDominatingParent and set
832 // the CurrentDominatingParent for this color to the current register.
833 ImmediateDominatingParent[DefMI] = NewParent;
834 CurrentDominatingParent[RootC] = DefMI;
835 }
836 }
837
838 // We now walk the PHIs in successor blocks and check for interferences. This
839 // is necessary because the use of a PHI's operands are logically contained in
840 // the predecessor block. The def of a PHI's destination register is processed
841 // along with the other defs in a basic block.
842
843 CurrentPHIForColor.clear();
844
845 for (succ_iterator SI = succ_begin(MBB), E = succ_end(MBB); SI != E; ++SI) {
846 for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
847 BBI != BBE; ++BBI) {
848 PHINode* PHI = dyn_cast<PHINode>(BBI);
849 if (!PHI) {
850 break;
851 }
852
853 int RootC = getRootColor(PHI);
854 // check live-out interference
855 if (IGC_IS_FLAG_ENABLED(EnableDeSSAWA) && !RootC)
856 {
857 // [todo] delete this code
858 if (CTX->type == ShaderType::COMPUTE_SHADER)
859 {
860 for (unsigned i = 0; !RootC && i < PHI->getNumOperands(); i++) {
861 Value* SrcVal = PHI->getOperand(i);
862 if (!isa<Constant>(SrcVal)) {
863 SrcVal = getNodeValue(SrcVal);
864 RootC = getRootColor(SrcVal);
865 }
866 }
867 }
868 }
869 if (!RootC) {
870 continue;
871 }
872 // Find the index of the PHI operand that corresponds to this basic block.
873 unsigned PredIndex;
874 for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) {
875 if (PHI->getIncomingBlock(PredIndex) == MBB)
876 break;
877 }
878 IGC_ASSERT(PredIndex < PHI->getNumOperands());
879 Value* PredValue = PHI->getOperand(PredIndex);
880 PredValue = getNodeValue(PredValue);
881 std::pair<Instruction*, Value*>& CurrentPHI = CurrentPHIForColor[RootC];
882 // If two PHIs have the same operand from every shared predecessor, then
883 // they don't actually interfere. Otherwise, isolate the current PHI. This
884 // could possibly be improved, e.g. we could isolate the PHI with the
885 // fewest operands.
886 if (CurrentPHI.first && CurrentPHI.second != PredValue) {
887 isolateReg(PHI);
888 continue;
889 }
890 else {
891 CurrentPHI = std::make_pair(PHI, PredValue);
892 }
893
894 // check live-out interference
895 // Pop registers from the stack represented by ImmediateDominatingParent
896 // until we find a parent that dominates the current instruction.
897 Value* NewParent = CurrentDominatingParent[RootC];
898 while (NewParent) {
899 if (getRootColor(NewParent)) {
900 if (isa<Argument>(NewParent)) {
901 break;
902 }
903 else if (DT->dominates(cast<Instruction>(NewParent)->getParent(), MBB)) {
904 break;
905 }
906 }
907 NewParent = ImmediateDominatingParent[NewParent];
908 }
909 CurrentDominatingParent[RootC] = NewParent;
910
911 // If there is an interference with a register, always isolate the
912 // register rather than the PHI. It is also possible to isolate the
913 // PHI, but that introduces copies for all of the registers involved
914 // in that PHI.
915 if (NewParent && NewParent != PredValue && LV->isLiveOut(NewParent, *MBB)) {
916 isolateReg(NewParent);
917 }
918 }
919 }
920 }
921
922 void
SplitInterferencesForArgument(DenseMap<int,Value * > & CurrentDominatingParent,DenseMap<Value *,Value * > & ImmediateDominatingParent)923 DeSSA::SplitInterferencesForArgument(
924 DenseMap<int, Value*>& CurrentDominatingParent,
925 DenseMap<Value*, Value*>& ImmediateDominatingParent) {
926 // No two arguments can be in the same congruent class
927 for (auto BBI = PHISrcArgs.begin(),
928 BBE = PHISrcArgs.end(); BBI != BBE; ++BBI) {
929 Value* AV = *BBI;
930 // If the virtual register being defined is not used in any PHI or has
931 // already been isolated, then there are no more interferences to check.
932 int RootC = getRootColor(AV);
933 if (!RootC)
934 continue;
935 Value* NewParent = CurrentDominatingParent[RootC];
936 if (NewParent) {
937 isolateReg(AV);
938 }
939 else {
940 CurrentDominatingParent[RootC] = AV;
941 }
942 }
943 }
944
945 // [todo] get rid of alignment-based isolation in dessa.
946 // Using alignment in isolation seems over-kill. The right approach
947 // would be one that avoids adding values with conflicting alignment
948 // requirement in the same congruent, not adding them in the same
949 // congruent class first and trying to isolate them later.
SplitInterferencesForAlignment()950 void DeSSA::SplitInterferencesForAlignment()
951 {
952 for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I)
953 {
954 // Find a root Node
955 Node* rootNode = I->second;
956 if (rootNode->parent != rootNode) {
957 continue;
958 }
959
960 e_alignment Align = EALIGN_AUTO;
961 // Find the most restrictive alignment, i.e. GRF aligned ones.
962 Node* N = rootNode;
963 Node* Curr;
964 do {
965 Curr = N;
966 N = Curr->next;
967 if (Curr->alignment == EALIGN_GRF) {
968 Align = EALIGN_GRF;
969 break;
970 }
971 } while (N != rootNode);
972
973 if (Align != EALIGN_GRF)
974 continue;
975
976 // Isolate any mis-aligned value.
977 // Start with Curr node as it cannot be isolated
978 // (rootNode could be isolated), therefore, it remains
979 // in the linked list and can be used to test stop looping.
980 Node* Head = Curr;
981 N = Head;
982 do {
983 Curr = N;
984 N = N->next;
985 if (Curr->alignment != EALIGN_AUTO && Curr->alignment != EALIGN_GRF)
986 {
987 IGC_ASSERT(nullptr != Curr);
988 IGC_ASSERT_MESSAGE((Curr != Head), "Head Node cannot be isolated, something wrong!");
989 isolateReg(Curr->value);
990 }
991 } while (N != Head);
992
993 // Update root's alignment.
994 Head->getLeader()->alignment = Align;
995 }
996 }
997
998 Value*
getInsEltRoot(Value * Val) const999 DeSSA::getInsEltRoot(Value* Val) const
1000 {
1001 auto RI = InsEltMap.find(Val);
1002 if (RI == InsEltMap.end())
1003 return Val;
1004 return RI->second;
1005 }
1006
1007 /// <summary>
1008 /// Identify if an instruction has partial write semantics
1009 /// </summary>
1010 /// <param name="Inst"></param>
1011 /// <returns> the index of the source partial-write operand</returns>
1012 static
getPartialWriteSource(Value * Inst)1013 int getPartialWriteSource(Value *Inst)
1014 {
1015 if (isa<InsertElementInst>(Inst))
1016 return 0; // source 0 is the original value
1017 if (auto CI = dyn_cast<CallInst>(Inst)) {
1018 // only handle inline-asm with simple destination
1019 if (CI->isInlineAsm() && !CI->getType()->isStructTy()) {
1020 InlineAsm* IA = cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
1021 StringRef constraintStr(IA->getConstraintString());
1022 SmallVector<StringRef, 8> constraints;
1023 constraintStr.split(constraints, ',');
1024 for (int i = 0; i < (int)constraints.size(); i++) {
1025 unsigned destID = 0;
1026 if (constraints[i].getAsInteger(10, destID) == 0) {
1027 // constraint-string indicates that source(i-1) and
1028 // destination should be the same vISA variable
1029 if (i > 0 && destID == 0)
1030 return (i - 1);
1031 }
1032 }
1033 }
1034 }
1035 return -1;
1036 }
1037
1038 void
CoalesceInsertElementsForBasicBlock(BasicBlock * Blk)1039 DeSSA::CoalesceInsertElementsForBasicBlock(BasicBlock* Blk)
1040 {
1041 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
1042 {
1043 for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1044 BBI != BBE; ++BBI) {
1045 Instruction* Inst = &(*BBI);
1046
1047 if (!CG->NeedInstruction(*Inst)) {
1048 continue;
1049 }
1050 // Only Aliasee needs to be handled.
1051 if (getAliasee(Inst) != Inst) {
1052 continue;
1053 }
1054
1055 // For keeping the existing behavior of InsEltMap unchanged
1056 auto PWSrcIdx = getPartialWriteSource(Inst);
1057 if (PWSrcIdx >= 0)
1058 {
1059 Value* origSrcV = Inst->getOperand(PWSrcIdx);
1060 Value* SrcV = getAliasee(origSrcV);
1061 if (SrcV != Inst && isArgOrNeededInst(origSrcV))
1062 {
1063 // union them
1064 e_alignment InstAlign = GetPreferredAlignment(Inst, WIA, CTX);
1065 e_alignment SrcVAlign = GetPreferredAlignment(SrcV, WIA, CTX);
1066 if (!LV->isLiveAt(SrcV, Inst) &&
1067 !alignInterfere(InstAlign, SrcVAlign) &&
1068 (WIA->whichDepend(SrcV) == WIA->whichDepend(Inst)))
1069 {
1070 InsEltMapAddValue(SrcV);
1071 InsEltMapAddValue(Inst);
1072
1073 Value* SrcVRoot = getInsEltRoot(SrcV);
1074 Value* InstRoot = getInsEltRoot(Inst);
1075
1076 // union them and their liveness info
1077 InsEltMapUnionValue(SrcV, Inst);
1078 LV->mergeUseFrom(SrcVRoot, InstRoot);
1079 }
1080 }
1081 }
1082 }
1083 return;
1084 }
1085
1086 for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1087 BBI != BBE; ++BBI) {
1088 Instruction* Inst = &(*BBI);
1089 // skip phi, phi is handled in the 2nd loop
1090 if (isa<PHINode>(Inst))
1091 {
1092 continue;
1093 }
1094
1095 // extend the liveness of InsertElement due to union
1096 for (unsigned i = 0; i < Inst->getNumOperands(); ++i) {
1097 Value* SrcV = Inst->getOperand(i);
1098 if (getPartialWriteSource(SrcV) >= 0) {
1099 Value* RootV = getInsEltRoot(SrcV);
1100 if (RootV != SrcV) {
1101 LV->HandleVirtRegUse(RootV, Blk, Inst, true);
1102 }
1103 }
1104 }
1105
1106 auto PWSrcIdx = getPartialWriteSource(Inst);
1107 if (PWSrcIdx < 0) {
1108 continue;
1109 }
1110 // handle InsertElement
1111 InsEltMapAddValue(Inst);
1112
1113 Value* SrcV = Inst->getOperand(PWSrcIdx);
1114 if (isa<Instruction>(SrcV) || isa<Argument>(SrcV)) {
1115 if (!LV->isLiveAt(SrcV, Inst)) {
1116 Instruction* SrcDef = dyn_cast<Instruction>(SrcV);
1117 if (SrcDef && WIA->whichDepend(SrcDef) == WIA->whichDepend(Inst)) {
1118 // passed the liveness and alignment test
1119 // may need to create a node for srcv, for example, when srcv is phi/arg
1120 InsEltMapAddValue(SrcV);
1121 InsEltMapUnionValue(SrcV, Inst);
1122 }
1123 }
1124 }
1125 }
1126 // look at all the phis in the successor blocks
1127 // extend live-ranges due to the union of insert-element
1128 for (succ_iterator SI = succ_begin(Blk), E = succ_end(Blk); SI != E; ++SI)
1129 {
1130 for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI)
1131 {
1132 PHINode* phi = dyn_cast<PHINode>(BBI);
1133 if (phi)
1134 {
1135 // extend the liveness of InsertElement due to union
1136 Value* SrcV = phi->getIncomingValueForBlock(Blk);
1137 if (isa<InsertElementInst>(SrcV)) {
1138 Value* RootV = getInsEltRoot(SrcV);
1139 if (RootV != SrcV) {
1140 BasicBlock* DefBlk = (isa<Instruction>(RootV)) ?
1141 cast<Instruction>(RootV)->getParent() : NULL;
1142 LV->MarkVirtRegAliveInBlock(LV->getLVInfo(RootV), DefBlk, Blk);
1143 }
1144 }
1145 }
1146 else
1147 {
1148 break;
1149 }
1150 }
1151 }
1152 }
1153
getRootValue(Value * Val,e_alignment * pAlign) const1154 Value* DeSSA::getRootValue(Value* Val, e_alignment* pAlign) const
1155 {
1156 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
1157 {
1158 Value* mapVal = nullptr;
1159 auto AI = AliasMap.find(Val);
1160 if (AI != AliasMap.end()) {
1161 mapVal = AI->second;
1162 }
1163 auto IEI = InsEltMap.find(mapVal ? mapVal : Val);
1164 if (IEI != InsEltMap.end()) {
1165 mapVal = IEI->second;
1166 }
1167 Value* PhiRootVal = getRegRoot(mapVal ? mapVal : Val, pAlign);
1168 return (PhiRootVal ? PhiRootVal : mapVal);
1169 }
1170
1171 auto RI = InsEltMap.find(Val);
1172 if (RI != InsEltMap.end()) {
1173 Value* InsEltRoot = RI->second;
1174 Value* PhiRootVal = getRegRoot(InsEltRoot, pAlign);
1175 return (PhiRootVal ? PhiRootVal : InsEltRoot);
1176 }
1177 return getRegRoot(Val, pAlign);
1178 }
1179
getAllValuesInCongruentClass(Value * V,SmallVector<Value *,8> & ValsInCC)1180 void DeSSA::getAllValuesInCongruentClass(
1181 Value* V,
1182 SmallVector<Value*, 8> & ValsInCC)
1183 {
1184 // Handle InsertElement specially. Note that only rootValue from
1185 // a sequence of insertElement is in congruent class. The RootValue
1186 // has its liveness modified to cover all InsertElements that are
1187 // grouped together.
1188 Value* RootV = nullptr;
1189 RootV = getNodeValue(V);
1190
1191 IGC_ASSERT_MESSAGE(nullptr != RootV, "ICE: Node value should not be nullptr!");
1192 ValsInCC.push_back(RootV);
1193 auto RI = RegNodeMap.find(RootV);
1194 if (RI != RegNodeMap.end()) {
1195 Node* First = RI->second;
1196 for (Node* N = First->next; N != First; N = N->next)
1197 {
1198 ValsInCC.push_back(N->value);
1199 }
1200 }
1201 return;
1202 }
1203
CoalesceAliasInstForBasicBlock(BasicBlock * Blk)1204 void DeSSA::CoalesceAliasInstForBasicBlock(BasicBlock* Blk)
1205 {
1206 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) < 2) {
1207 return;
1208 }
1209 for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end();
1210 BBI != BBE; ++BBI) {
1211 Instruction* I = &(*BBI);
1212
1213 // Now, better to think of code as a sequence Codegen Patterns,
1214 // not a sequence of llvm instructions.
1215 if (!CG->NeedInstruction(*I)) {
1216 continue;
1217 }
1218
1219 if (InsertElementInst * IEI = dyn_cast<InsertElementInst>(I))
1220 {
1221 if (isa<UndefValue>(I->getOperand(0)))
1222 {
1223 SmallVector<Value*, 16> AllIEIs;
1224 int nelts = checkInsertElementAlias(IEI, AllIEIs);
1225 if (nelts > 1)
1226 {
1227 // Consider the following as an alias if all
1228 // Vi, i=0, n-1 (except Vn) has a single use.
1229 // V0 = InsElt undef, S0, 0
1230 // V1 = InsElt V0, S1, 1
1231 // ...
1232 // Vn = InsElt Vn-1, Sn, n
1233 //
1234 // AliasMap has the following:
1235 // alias(V0, V0)
1236 // alias(V1, V0)
1237 // alias(V2, V0)
1238 // ......
1239 // alias(V0, V0) <-- V0 is the root!
1240 //
1241 // Note that elements could be sparse like
1242 // V0 = InsElt Undef, S1, 1
1243 // V1 = InsElt V0, s3, 2
1244 //
1245 Value* aliasee = AllIEIs[0];
1246 AddAlias(aliasee);
1247 for (int i = 1; i < nelts; ++i) {
1248 Value* V = AllIEIs[i];
1249 AliasMap[V] = aliasee;
1250
1251 // union liveness info
1252 LV->mergeUseFrom(aliasee, V);
1253 }
1254 }
1255 }
1256 }
1257 else if (CastInst * CastI = dyn_cast<CastInst>(I))
1258 {
1259 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) < 3) {
1260 continue;
1261 }
1262
1263 Value* D = CastI;
1264 Value* S = CastI->getOperand(0);
1265 if (isArgOrNeededInst(S) &&
1266 WIA->whichDepend(D) == WIA->whichDepend(S) &&
1267 isNoOpInst(CastI, CTX))
1268 {
1269 if (AliasMap.count(D) == 0) {
1270 AddAlias(S);
1271 Value* aliasee = AliasMap[S];
1272 AliasMap[D] = aliasee;
1273
1274 // D will be deleted due to aliasing
1275 NoopAliasMap[D] = 1;
1276
1277 // union liveness info
1278 LV->mergeUseFrom(aliasee, D);
1279 }
1280 else {
1281 // Only src operands of a phi can be visited before
1282 // operands' definition. For other instructions such
1283 // as castInst, this shall never happen
1284 IGC_ASSERT_MESSAGE(0, "ICE: Use visited before definition!");
1285 }
1286 }
1287 }
1288 }
1289 }
1290
checkInsertElementAlias(InsertElementInst * IEI,SmallVector<Value *,16> & AllIEIs)1291 int DeSSA::checkInsertElementAlias(
1292 InsertElementInst* IEI, SmallVector<Value*, 16> & AllIEIs)
1293 {
1294 IGC_ASSERT(nullptr != IEI);
1295 IGC_ASSERT_MESSAGE(isa<UndefValue>(IEI->getOperand(0)), "ICE: need to pass first IEI as the argument");
1296
1297 // Find the the alias pattern:
1298 // V0 = IEI UndefValue, S0, 0
1299 // V1 = IEI V0, S1, 1
1300 // V2 = IEI V1, S2, 2
1301 // ......
1302 // Vn = IEI Vn_1, Sn_1, n
1303 // All Vi (i=0,n_1, except i=n) has a single-use.
1304 //
1305 // If found, return the actual vector size;
1306 // otherwise, return 0.
1307 IGCLLVM::FixedVectorType* VTy = cast<IGCLLVM::FixedVectorType>(IEI->getType());
1308 IGC_ASSERT(nullptr != VTy);
1309 int nelts = (int)VTy->getNumElements();
1310 AllIEIs.resize(nelts, nullptr);
1311 InsertElementInst* Inst = IEI;
1312 IGC_ASSERT(nullptr != WIA);
1313 WIAnalysis::WIDependancy Dep = WIA->whichDepend(Inst);
1314 while (Inst)
1315 {
1316 // Check if Inst has constant index, stop if not.
1317 // (This is for catching a common case, a variable index
1318 // can be handled as well if needed.)
1319 ConstantInt* CI = dyn_cast<ConstantInt>(Inst->getOperand(2));
1320 if (!CI) {
1321 return 0;
1322 }
1323 int ix = (int)CI->getZExtValue();
1324 AllIEIs[ix] = Inst;
1325 if (!Inst->hasOneUse() || Dep != WIA->whichDepend(Inst)) {
1326 break;
1327 }
1328 Inst = dyn_cast<InsertElementInst>(Inst->user_back());
1329 }
1330
1331 // Return the number of elements found
1332 int num = 0;
1333 for (int i = 0; i < nelts; ++i) {
1334 if (AllIEIs[i] == nullptr)
1335 continue;
1336 if (num < i) {
1337 // Pack them
1338 AllIEIs[num] = AllIEIs[i];
1339 AllIEIs[i] = nullptr;
1340 }
1341 ++num;
1342 }
1343 return num;
1344 }
1345
getAliasee(Value * V) const1346 Value* DeSSA::getAliasee(Value* V) const
1347 {
1348 auto AI = AliasMap.find(V);
1349 if (AI == AliasMap.end())
1350 return V;
1351 return AI->second;
1352 }
1353
isAliaser(Value * V) const1354 bool DeSSA::isAliaser(Value* V) const
1355 {
1356 auto AI = AliasMap.find(V);
1357 if (AI == AliasMap.end()) {
1358 return false;
1359 }
1360 return AI->first != AI->second;
1361 }
1362
isNoopAliaser(Value * V) const1363 bool DeSSA::isNoopAliaser(Value* V) const
1364 {
1365 return NoopAliasMap.count(V) > 0;
1366 }
1367
isAliasee(Value * V) const1368 bool DeSSA::isAliasee(Value* V) const
1369 {
1370 auto AI = AliasMap.find(V);
1371 if (AI == AliasMap.end()) {
1372 return false;
1373 }
1374 return AI->first == AI->second;
1375 }
1376
1377 // If V is neither InsElt'ed, nor phi-coalesced, it is said to be
1378 // single valued. In another word, if it is at most aliased only,
1379 // it will have a single value during V's lifetime.
isSingleValued(llvm::Value * V) const1380 bool DeSSA::isSingleValued(llvm::Value* V) const
1381 {
1382 Value* aliasee = getAliasee(V);
1383 Value* insEltRootV = getInsEltRoot(aliasee);
1384 if (InsEltMap.count(aliasee) || !isIsolated(insEltRootV)) {
1385 return false;
1386 }
1387 return true;
1388 }
1389
1390 // The following paper explains an approach to check if two
1391 // congruent classes interfere using a linear approach.
1392 //
1393 // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness,
1394 // Code Quality and Efficiency,
1395 // In Proceedings of the 7th annual IEEE/ACM International Symposium
1396 // on Code Generation and Optimization (Seattle, Washington,
1397 // March 22 - 25, 2009). CGO '09. IEEE, Washington, DC, 114-125.
1398 //
1399 // Here, we simply use a naive pair-wise comparison.
1400 //
1401 // TODO: check if using linear approach described in the paper is
1402 // necessary; To do so, it needs to get PN (preorder number of BB)
1403 // and sort congruent classes before doing interference checking.
interfere(llvm::Value * V0,llvm::Value * V1)1404 bool DeSSA::interfere(llvm::Value* V0, llvm::Value* V1)
1405 {
1406 SmallVector<Value*, 8> allCC0;
1407 SmallVector<Value*, 8> allCC1;
1408 getAllValuesInCongruentClass(V0, allCC0);
1409 getAllValuesInCongruentClass(V1, allCC1);
1410
1411 for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0)
1412 {
1413 Value* val0 = allCC0[i0];
1414 for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1)
1415 {
1416 Value* val1 = allCC1[i1];
1417 if (LV->hasInterference(val0, val1)) {
1418 return true;
1419 }
1420 }
1421 }
1422 return false;
1423 }
1424
1425 // Alias interference checking.
1426 // The caller is trying to check if V0 can alias to V1. For example,
1427 // V0 = bitcast V1, or
1428 // V0 = extractElement V1, ...
1429 // As V0 and V1 hold the same value, the interference between these two
1430 // does not matter. Thus, this function is a variant of interfere()
1431 // with V0 and V1 interference ignored.
aliasInterfere(llvm::Value * V0,llvm::Value * V1)1432 bool DeSSA::aliasInterfere(llvm::Value* V0, llvm::Value* V1)
1433 {
1434 SmallVector<Value*, 8> allCC0;
1435 SmallVector<Value*, 8> allCC1;
1436 getAllValuesInCongruentClass(V0, allCC0);
1437 getAllValuesInCongruentClass(V1, allCC1);
1438
1439 Value* V0_aliasee = getAliasee(V0);
1440 Value* V1_aliasee = getAliasee(V1);
1441
1442 //
1443 // If aliasee is in InsEltMap, it is not single valued
1444 // and cannot be excluded from interfere checking.
1445 //
1446 // For example:
1447 // x = bitcast y
1448 // z = InsElt y, ...
1449 // = x
1450 // = y
1451 //
1452 // {y, z} are coalesced via InsElt, interfere(x, y)
1453 // must be checked.
1454 // However, if y (and x too) is not in InsEltMap, no need
1455 // to check interfere(x, y) as they have the same value
1456 // as the following:
1457 // x = bitcast y
1458 // = x
1459 // = y
1460 //
1461 bool V0_oneValue = (InsEltMap.count(V0_aliasee) == 0);
1462 bool V1_oneValue = (InsEltMap.count(V1_aliasee) == 0);
1463 bool both_singleValue = (V0_oneValue && V1_oneValue);
1464
1465 for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0)
1466 {
1467 Value* val0 = allCC0[i0];
1468 for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1)
1469 {
1470 Value* val1 = allCC1[i1];
1471 if (both_singleValue &&
1472 val0 == V0_aliasee && val1 == V1_aliasee) {
1473 continue;
1474 }
1475 if (LV->hasInterference(val0, val1)) {
1476 return true;
1477 }
1478 }
1479 }
1480 return false;
1481 }
1482
1483 // The existing code does align interference checking. Just
1484 // keep it for now. Likely to improve it later.
alignInterfere(e_alignment a1,e_alignment a2)1485 bool DeSSA::alignInterfere(e_alignment a1, e_alignment a2)
1486 {
1487 if (a1 == EALIGN_GRF && !(a2 == EALIGN_GRF || a2 == EALIGN_AUTO))
1488 {
1489 return true;
1490 }
1491 if (a2 == EALIGN_GRF && !(a1 == EALIGN_GRF || a1 == EALIGN_AUTO))
1492 {
1493 return true;
1494 }
1495 return false;
1496 }
1497