1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "VariableReuseAnalysis.hpp"
10 #include "Compiler/IGCPassSupport.h"
11 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
12 #include "Compiler/CodeGenPublic.h"
13 #include "common/LLVMWarningsPush.hpp"
14 #include <llvm/Support/Debug.h>
15 #include "llvmWrapper/IR/DerivedTypes.h"
16 #include "common/LLVMWarningsPop.hpp"
17 #include <algorithm>
18 #include "Probe/Assertion.h"
19
20 using namespace llvm;
21 using namespace IGC;
22
23 namespace
24 {
25 // If V is scalar, return 1.
26 // if V is vector, return the number of elements.
getNumElts(Value * V)27 inline int getNumElts(Value* V) {
28 IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(V->getType());
29 return VTy ? (int)VTy->getNumElements() : 1;
30 }
31
getTypeSizeInBits(Type * Ty)32 inline int getTypeSizeInBits(Type* Ty) {
33 int scalarBits = Ty->getScalarSizeInBits();
34 IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
35 return scalarBits * (VTy ? (int)VTy->getNumElements() : 1);
36 }
37 }
38
39 char VariableReuseAnalysis::ID = 0;
40
41 IGC_INITIALIZE_PASS_BEGIN(VariableReuseAnalysis, "VariableReuseAnalysis",
42 "VariableReuseAnalysis", false, true)
43 // IGC_INITIALIZE_PASS_DEPENDENCY(RegisterEstimator)
IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)44 IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
45 IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
46 IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
47 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
48 IGC_INITIALIZE_PASS_DEPENDENCY(DeSSA)
49 IGC_INITIALIZE_PASS_DEPENDENCY(CoalescingEngine)
50 IGC_INITIALIZE_PASS_DEPENDENCY(BlockCoalescing)
51 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
52 IGC_INITIALIZE_PASS_END(VariableReuseAnalysis, "VariableReuseAnalysis",
53 "VariableReuseAnalysis", false, true)
54
55 llvm::FunctionPass* IGC::createVariableReuseAnalysisPass() {
56 return new VariableReuseAnalysis;
57 }
58
VariableReuseAnalysis()59 VariableReuseAnalysis::VariableReuseAnalysis()
60 : FunctionPass(ID),
61 m_pCtx(nullptr), m_WIA(nullptr), m_LV(nullptr), m_DeSSA(nullptr),
62 m_PatternMatch(nullptr), m_coalescingEngine(nullptr),
63 m_RPE(nullptr), m_SimdSize(0), m_IsFunctionPressureLow(Status::Undef),
64 m_IsBlockPressureLow(Status::Undef) {
65 initializeVariableReuseAnalysisPass(*PassRegistry::getPassRegistry());
66 }
67
runOnFunction(Function & F)68 bool VariableReuseAnalysis::runOnFunction(Function& F)
69 {
70 m_F = &F;
71
72 m_WIA = &(getAnalysis<WIAnalysis>());
73 if (IGC_IS_FLAG_DISABLED(DisableDeSSA))
74 {
75 m_DeSSA = &getAnalysis<DeSSA>();
76 }
77 m_LV = &(getAnalysis<LiveVarsAnalysis>().getLiveVars());
78 m_PatternMatch = &getAnalysis<CodeGenPatternMatch>();
79 m_pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
80 m_coalescingEngine = &getAnalysis<CoalescingEngine>();
81 m_DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
82 m_DL = &F.getParent()->getDataLayout();
83
84 // FIXME: enable RPE.
85 // m_RPE = &getAnalysis<RegisterEstimator>();
86
87 // Nothing but cleanup data from previous runs.
88 reset();
89
90 if (IGC_IS_FLAG_ENABLED(EnableVariableAlias) &&
91 m_DeSSA &&
92 !m_pCtx->getModuleMetaData()->compOpt.OptDisable &&
93 m_pCtx->platform.GetPlatformFamily() >= IGFX_GEN9_CORE)
94 {
95 // Setup ArgDeSSARoot (for subroutine, it might be conservative,
96 // but it should work.).
97 m_ArgDeSSARoot.clear();
98 for (auto II = F.arg_begin(), IE = F.arg_end(); II != IE; ++II)
99 {
100 Value* A = II;
101 if (Value * R = m_DeSSA->getRootValue(A)) {
102 m_ArgDeSSARoot.push_back(R);
103 }
104 }
105
106 // 0. Merge Variables
107 // Merge two different variables into a single one.
108 // The two vars that will be merged should have the same
109 // size/type and normally are defined with different values.
110 // Once merged, they are put in the same DeSSA congruent class
111 mergeVariables(&F);
112
113 // 1. SubVector aliasing
114 // Two variables alias each other if they have the same values.
115 // Although they have different names, the two variables share
116 // the same values over their live ranges. The cases such as
117 // extractElement/insertElement, etc. Once aliasing is identified,
118 // the liveness of the alias root is updated to be the sum of both.
119 // This is the same as DeSSA alias.
120 InsertElementAliasing(&F);
121
122 // 2. Handle extractElement, etc that handles a single instruction or
123 // a few instruction, not invovled in a complicated patterns like
124 // InsertElement.
125 visitLiveInstructions(&F);
126
127 postProcessing();
128
129 if (IGC_IS_FLAG_ENABLED(DumpVariableAlias))
130 {
131 auto name =
132 Debug::DumpName(Debug::GetShaderOutputName())
133 .Hash(m_pCtx->hash)
134 .Type(m_pCtx->type)
135 .Pass("VariableAlias")
136 .PostFix(F.getName().str())
137 .Extension("txt");
138 printAlias(Debug::Dump(name, Debug::DumpType::DBG_MSG_TEXT).stream(), m_F);
139 }
140 }
141
142 m_F = nullptr;
143 return false;
144 }
145
getMaxReuseDistance(uint16_t size)146 static unsigned getMaxReuseDistance(uint16_t size) {
147 return (size == 8) ? 10 : 5;
148 }
149
checkUseInst(Instruction * UseInst,LiveVars * LV)150 bool VariableReuseAnalysis::checkUseInst(Instruction* UseInst, LiveVars* LV) {
151 BasicBlock* CurBB = UseInst->getParent();
152 if (UseInst->isUsedOutsideOfBlock(CurBB))
153 return false;
154
155 // This situation can occur:
156 //
157 // ,------.
158 // | |
159 // | v
160 // | t2 = phi ... t1 ...
161 // | |
162 // | v
163 // | t1 = ...
164 // | ... = ... t1 ...
165 // | |
166 // `------'
167 //
168 // Disallow reuse if t1 has a phi use.
169 // Disallow reuse if t1 has a far away use when the pressure is not low.
170 unsigned DefLoc = LV->getDistance(UseInst);
171 unsigned FarUseLoc = 0;
172 for (auto UI : UseInst->users()) {
173 if (isa<PHINode>(UI))
174 return false;
175
176 auto Inst = dyn_cast<Instruction>(UI);
177 if (!Inst)
178 return false;
179 unsigned UseLoc = LV->getDistance(Inst);
180 FarUseLoc = std::max(FarUseLoc, UseLoc);
181 }
182
183 // When the whole function or block pressure is low, skip the distance check.
184 if (isCurFunctionPressureLow() || isCurBlockPressureLow())
185 return true;
186
187 // Use distance to limit reuse.
188 const unsigned FarUseDistance = getMaxReuseDistance(m_SimdSize);
189 return FarUseLoc <= DefLoc + FarUseDistance;
190 }
191
checkDefInst(Instruction * DefInst,Instruction * UseInst,LiveVars * LV)192 bool VariableReuseAnalysis::checkDefInst(Instruction* DefInst,
193 Instruction* UseInst, LiveVars* LV) {
194 IGC_ASSERT(nullptr != DefInst);
195 IGC_ASSERT(nullptr != UseInst);
196 if (isa<PHINode>(DefInst))
197 return false;
198
199 if (auto CI = dyn_cast<CallInst>(DefInst)) {
200 Function* F = CI->getCalledFunction();
201 // Do not reuse the return symbol of subroutine/stack calls.
202 if (!F || !F->isDeclaration())
203 return false;
204
205 if (isa<GenIntrinsicInst>(DefInst)) {
206 // Just skip all gen intrinsic calls. Some intrinsic calls may have
207 // special meaning.
208 return false;
209 }
210 }
211
212 // This is a block level reuse.
213 BasicBlock* CurBB = UseInst->getParent();
214 if (DefInst->getParent() != CurBB || DefInst->isUsedOutsideOfBlock(CurBB))
215 return false;
216
217 // Check whether UseInst is the last use of DefInst. If not, this source
218 // variable cannot be reused.
219 Instruction* LastUse = LV->getLVInfo(DefInst).findKill(CurBB);
220 if (LastUse != UseInst)
221 return false;
222
223 // When the whole function or block pressure is low, skip the distance check.
224 if (isCurFunctionPressureLow() || isCurBlockPressureLow())
225 return true;
226
227 // Use distance to limit far reuses.
228 unsigned DefLoc = LV->getDistance(DefInst);
229 unsigned UseLoc = LV->getDistance(UseInst);
230 const unsigned FarDefDistance = getMaxReuseDistance(m_SimdSize);
231 return UseLoc <= DefLoc + FarDefDistance;
232 }
233
mergeVariables(Function * F)234 void VariableReuseAnalysis::mergeVariables(Function* F)
235 {
236 for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II)
237 {
238 Instruction* I = &*II;
239 if (!m_PatternMatch->NeedInstruction(*I))
240 continue;
241 if (GenIntrinsicInst * CI = dyn_cast<GenIntrinsicInst>(I))
242 {
243 switch (CI->getIntrinsicID()) {
244 default:
245 break;
246 } // End of switch
247 }
248 }
249 }
250
visitLiveInstructions(Function * F)251 void VariableReuseAnalysis::visitLiveInstructions(Function* F)
252 {
253 for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI)
254 {
255 BasicBlock* BB = &*BI;
256 for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
257 {
258 Instruction& I = *II;
259 if (!m_PatternMatch->NeedInstruction(I))
260 continue;
261 visit(I);
262 }
263 }
264 }
265
266 // Given a root Value RootVal, all its values that are coalesced
267 // with it are in AllVals. This function finds the place to insert
268 // the lifeTimeStart for RootVal, which is either at the end of a
269 // BB or right before the first definition. If any value is argument,
270 // no lifeTimeStart is needed.
271 // (For assisting visa for liveness analysis.)
setLifeTimeStartPos(Value * RootVal,ValueVectorTy & AllVals,BlockCoalescing * theBC)272 void VariableReuseAnalysis::setLifeTimeStartPos(
273 Value* RootVal,
274 ValueVectorTy& AllVals,
275 BlockCoalescing* theBC)
276 {
277 SmallSet<BasicBlock*, 8> defBBSet;
278 SmallSet<BasicBlock*, 8> phiSrcMovBBSet;
279 for (int i = 0, sz = (int)AllVals.size(); i < sz; ++i)
280 {
281 Value* V = AllVals[i];
282 Instruction* I = dyn_cast<Instruction>(V);
283 if (!I) {
284 // For arg, global etc., its start is on entry.
285 // Thus, no need to insert lifetime start.
286 defBBSet.clear();
287 phiSrcMovBBSet.clear();
288 break;
289 }
290
291 if (PHINode * PHI = dyn_cast<PHINode>(I)) {
292 Value* PHI_root = m_DeSSA->getRootValue(PHI);
293 int sz1 = (int)PHI->getNumIncomingValues();
294 for (int i1 = 0; i1 < sz1; ++i1)
295 {
296 Value* Src = PHI->getIncomingValue(i1);
297 Value* Src_root = m_DeSSA->getRootValue(Src);
298 if (!Src_root || PHI_root != Src_root) {
299 // Need Src-side phi mov
300 BasicBlock* BB = PHI->getIncomingBlock(i1);
301 phiSrcMovBBSet.insert(BB);
302 }
303 }
304 }
305 else {
306 BasicBlock* BB = I->getParent();
307 defBBSet.insert(BB);
308 }
309 }
310
311 if (defBBSet.size() == 0 && phiSrcMovBBSet.size() == 0) {
312 return;
313 }
314
315 auto BSI = defBBSet.begin();
316 auto BSE = defBBSet.end();
317 BasicBlock* NearestDomBB = *BSI;
318 for (++BSI; BSI != BSE; ++BSI)
319 {
320 BasicBlock* aB = *BSI;
321 NearestDomBB = m_DT->findNearestCommonDominator(NearestDomBB, aB);
322 }
323
324 // phiSrcMovBBSet
325 for (auto II = phiSrcMovBBSet.begin(), IE = phiSrcMovBBSet.end();
326 II != IE; ++II)
327 {
328 BasicBlock* aB = *II;
329 NearestDomBB = m_DT->findNearestCommonDominator(NearestDomBB, aB);
330 }
331
332 // Skip emptry BBs that are going to be skipped in codegen emit.
333 while (theBC->IsEmptyBlock(NearestDomBB))
334 {
335 auto Node = m_DT->getNode(NearestDomBB);
336 NearestDomBB = Node->getIDom()->getBlock();
337 }
338
339 if (defBBSet.count(NearestDomBB))
340 {
341 // lifeTimeStart insert pos is in a BB where a def exists
342 m_LifetimeAt1stDefOfBB[RootVal] = NearestDomBB;
343 }
344 else
345 {
346 // No def in the bb, it must be at the end of BB
347 // (must be before phiSrcMov too).
348 m_LifetimeAtEndOfBB[NearestDomBB].push_back(RootVal);
349 }
350 }
351
postProcessing()352 void VariableReuseAnalysis::postProcessing()
353 {
354 // BlockCoalescing : check if a BB is a to-be-skipped empty BB.
355 // It is used for selecting BB to add lifetime start
356 BlockCoalescing* theBC = &getAnalysis<BlockCoalescing>();
357 if (!m_DeSSA || m_pCtx->getVectorCoalescingControl() < 3)
358 return;
359
360 DenseMap<Value*, int> dessaRootVisited;
361 auto IS = m_aliasMap.begin();
362 auto IE = m_aliasMap.end();
363 for (auto II = IS; II != IE; ++II)
364 {
365 SSubVecDesc* SV = II->second;
366 Value* aliasee = SV->BaseVector;
367 if (aliasee != SV->Aliaser)
368 continue;
369
370 // An alias set of an aliasee (base) :
371 // The aliasee and all its aliasers; and for each of them, all values
372 // in its dessa CC.
373 //
374 // For each Aliasee, record its lifetime start, which is the
375 // nearest dominator that dominates all value defs in an alias set.
376 // This BB is either one that has no defintion of values in the set;
377 // or one that has a defintion to a value in the set. For the former,
378 // m_LifetimeAtEndOfBB is used to keep track of it; for the latter,
379 // m_LifetimeAt1stDefOfBB is used.
380 ValueVectorTy AllVals;
381 SmallVector<Value*, 8> valInCC;
382 m_DeSSA->getAllValuesInCongruentClass(aliasee, valInCC);
383 AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
384
385 // update visited for aliasee
386 Value* aliaseeRoot = m_DeSSA->getRootValue(aliasee);
387 aliaseeRoot = aliaseeRoot ? aliaseeRoot : aliasee;
388 dessaRootVisited[aliaseeRoot] = 1;
389 for (int i = 0, sz = (int)SV->Aliasers.size(); i < sz; ++i)
390 {
391 SSubVecDesc* aSV = SV->Aliasers[i];
392 Value* aliaser = aSV->Aliaser;
393 valInCC.clear();
394 m_DeSSA->getAllValuesInCongruentClass(aliaser, valInCC);
395 AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
396
397 // update visited for aliaser
398 Value* aRoot = m_DeSSA->getRootValue(aliaser);
399 aRoot = aRoot ? aRoot : aliaser;
400 dessaRootVisited[aRoot] = 1;
401 }
402
403 setLifeTimeStartPos(aliaseeRoot, AllVals, theBC);
404 }
405
406 // For other vector values
407 if (m_pCtx->getVectorCoalescingControl() < 4)
408 return;
409
410 for (auto II = inst_begin(*m_F), IE = inst_end(*m_F); II != IE; ++II)
411 {
412 Instruction* I = &*II;
413 if (!m_PatternMatch->NeedInstruction(*I))
414 continue;
415 if (!I->getType()->isVectorTy())
416 continue;
417 Value* rootV = m_DeSSA->getRootValue(I);
418 rootV = rootV ? rootV : I;
419 if (dessaRootVisited.find(rootV) != dessaRootVisited.end()) {
420 // Already handled by sub-vector aliasing, skip
421 continue;
422 }
423
424 ValueVectorTy AllVals;
425 SmallVector<Value*, 8> valInCC;
426 m_DeSSA->getAllValuesInCongruentClass(rootV, valInCC);
427 AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
428
429 setLifeTimeStartPos(rootV, AllVals, theBC);
430 }
431 }
432
getRootValue(Value * V)433 Value* VariableReuseAnalysis::getRootValue(Value* V)
434 {
435 Value* dessaRV = nullptr;
436 if (m_DeSSA) {
437 dessaRV = m_DeSSA->getRootValue(V);
438 }
439 return dessaRV ? dessaRV : V;
440 }
441
getAliasRootValue(Value * V)442 Value* VariableReuseAnalysis::getAliasRootValue(Value* V)
443 {
444 Value* V_nv = m_DeSSA ? m_DeSSA->getNodeValue(V) : V;
445 auto II = m_aliasMap.find(V_nv);
446 if (II == m_aliasMap.end()) {
447 return V_nv;
448 }
449 return II->second->BaseVector;
450 }
451
452 // Returns true for the following pattern:
453 // a = extractElement <vectorType> EEI_Vec, <constant EEI_ix>
454 // b = insertElement <vectorType> V1, a, <constant IEI_ix>
455 // where EEI_ix and IEI_ix are constants; Return false otherwise.
getVectorIndicesIfConstant(InsertElementInst * IEI,int & IEI_ix,Value * & EEI_Vec,int & EEI_ix)456 bool VariableReuseAnalysis::getVectorIndicesIfConstant(
457 InsertElementInst* IEI, int& IEI_ix, Value*& EEI_Vec, int& EEI_ix)
458 {
459 // Check if I has constant index, skip if not.
460 ConstantInt* CI = dyn_cast<ConstantInt>(IEI->getOperand(2));
461 if (!CI) {
462 return false;
463 }
464 IEI_ix = (int)CI->getZExtValue();
465
466 // Check that the elements inserted are from extractElement
467 // Also, special-handling of insertelement itself.
468 Value* elem = IEI->getOperand(1);
469 ExtractElementInst* EEI = dyn_cast<ExtractElementInst>(elem);
470 if (!EEI) {
471 // Just insertelement itself
472 EEI_ix = 0;
473 EEI_Vec = elem;
474 return true;
475 }
476 ConstantInt* CI1 = dyn_cast<ConstantInt>(EEI->getIndexOperand());
477 if (!CI1) {
478 return false;
479 }
480 EEI_ix = (int)CI1->getZExtValue();
481 EEI_Vec = EEI->getVectorOperand();
482 return true;
483 }
484
visitExtractElementInst(ExtractElementInst & I)485 void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst& I)
486 {
487 if (m_pCtx->getVectorCoalescingControl() == 0) {
488 return;
489 }
490
491 ExtractElementInst* EEI = &I;
492 Value* vecVal = EEI->getVectorOperand();
493
494 // Before doing extractMask explicitly, don't do aliasing
495 // for extractElement whose vector operand are the candidate
496 // of the existing extractMask optimization, as doing so will
497 // disable the existing extractMask optimization, which will
498 // cause perf regression.
499 if (Instruction * Inst = dyn_cast<Instruction>(vecVal))
500 {
501 if (IGC_IS_FLAG_DISABLED(EnableExtractMask) &&
502 (isSampleInstruction(Inst) || isLdInstruction(Inst)))
503 {
504 // OCL can have sample (image read), not ld. For 3d/mac,
505 // need to check more
506 return;
507 }
508 }
509
510 // If inst is dead, EEI is an argument, or EEI & vecVal have
511 // different uniformness, skip it. (Current igc & visa interface
512 // requires any argument value to be a root value, not alias.)
513 if (m_HasBecomeNoopInsts.count(EEI) ||
514 m_DeSSA->isNoopAliaser(EEI) ||
515 isOrCoalescedWithArg(EEI) ||
516 (m_WIA && m_WIA->whichDepend(EEI) != m_WIA->whichDepend(vecVal))) {
517 return;
518 }
519
520 Value* EEI_nv = m_DeSSA->getNodeValue(EEI);
521 Value* vec_nv = m_DeSSA->getNodeValue(vecVal);
522
523 // If EEI has been payload-coalesced or has been vec-aliased,
524 // skip it for now (implementation choice).
525 // Note that payload-coalescing does not use node value yet.
526 if (hasBeenPayloadCoalesced(EEI) ||
527 hasAnotherInDCCAsAliasee(vec_nv) ||
528 hasAnyOfDCCAsAliaser(EEI_nv)) {
529 return;
530 }
531
532 // Can only do alias if idx is a known constant.
533 Value* IdxVal = EEI->getIndexOperand();
534 ConstantInt* Idx = dyn_cast<ConstantInt>(IdxVal);
535 if (!Idx) {
536 return;
537 }
538
539 int iIdx = (int)Idx->getZExtValue();
540 if (aliasInterfere(EEI_nv, vec_nv, iIdx)) {
541 return;
542 }
543
544 // Valid vec alias and add it into alias map
545 addVecAlias(EEI_nv, vec_nv, iIdx);
546
547 // Mark this inst as noop inst
548 m_HasBecomeNoopInsts[EEI] = 1;
549 }
550
printAlias(raw_ostream & OS,const Function * F) const551 void VariableReuseAnalysis::printAlias(raw_ostream& OS, const Function* F) const
552 {
553 // Assign each inst/arg a unique integer so that the output
554 // would be in order. It is useful when doing comparison.
555 DenseMap<const Value*, int> Val2IntMap;
556 int id = 0;
557 if (F) {
558 // All arguments
559 for (auto AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) {
560 const Value* aVal = AI;
561 Val2IntMap[aVal] = (++id);
562 }
563 // All instructions
564 for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II) {
565 const Instruction* Inst = &*II;
566 Val2IntMap[(Value*)Inst] = (++id);
567 }
568 }
569
570 auto SubVecCmp = [&](const SSubVecDesc* SV0, const SSubVecDesc* SV1) -> bool {
571 int n0 = Val2IntMap[SV0->Aliaser];
572 int n1 = Val2IntMap[SV1->Aliaser];
573 return n0 < n1;
574 };
575
576 OS << "\nSummary of Variable Alias Info: "
577 << (F ? F->getName().str() : "Function")
578 << "\n";
579
580 SmallVector<SSubVecDesc*, 64> sortedAlias;
581 for (auto& MI : m_aliasMap) {
582 SSubVecDesc* SV = MI.second;
583 sortedAlias.push_back(SV);
584 }
585 std::sort(sortedAlias.begin(), sortedAlias.end(), SubVecCmp);
586
587 for (int i = 0, sz = (int)sortedAlias.size(); i < sz; ++i)
588 {
589 SSubVecDesc* SV = sortedAlias[i];
590 Value* aliasee = SV->BaseVector;
591 if (SV->Aliaser != aliasee) {
592 // Not alias root
593 continue;
594 }
595 OS << "Aliasee : " << *aliasee << "\n";
596 std::sort(SV->Aliasers.begin(), SV->Aliasers.end(), SubVecCmp);
597 for (auto VI : SV->Aliasers)
598 {
599 SSubVecDesc* aSV = VI;
600 Value* aliaser = aSV->Aliaser;
601 Value* dessaRoot = m_DeSSA ? m_DeSSA->getRootValue(aliaser) : nullptr;
602 const char* inCC = dessaRoot ? ".inDessaCC" : "";
603 OS << " " << *aliaser
604 << " [" << aSV->StartElementOffset << "]"
605 << inCC << "\n";
606 }
607 OS << "\n";
608 }
609 OS << "\n";
610 }
611
dumpAlias() const612 void VariableReuseAnalysis::dumpAlias() const
613 {
614 printAlias(dbgs(), m_F);
615 }
616
617 // Add alias Aliaser ->Aliasee[Idx]
addVecAlias(Value * Aliaser,Value * Aliasee,int Idx)618 void VariableReuseAnalysis::addVecAlias(
619 Value* Aliaser, Value* Aliasee, int Idx)
620 {
621 SSubVecDesc* aliaserSV = getOrCreateSubVecDesc(Aliaser);
622 SSubVecDesc* aliaseeSV = getOrCreateSubVecDesc(Aliasee);
623 Value* aliaseeRoot = aliaseeSV->BaseVector;
624 aliaserSV->BaseVector = aliaseeRoot;
625 aliaserSV->StartElementOffset = Idx + aliaseeSV->StartElementOffset;
626
627 // If Aliaser exists as a root (aliasee), re-alias all its
628 // aliasers to the new root 'aliaseeRoot'.
629 SSubVecDesc* rootSV = getOrCreateSubVecDesc(aliaseeRoot);
630 if (aliaserSV->Aliasers.size() > 0)
631 {
632 for (int i = 0, sz = (int)aliaserSV->Aliasers.size(); i < sz; ++i)
633 {
634 SSubVecDesc* SV = aliaserSV->Aliasers[i];
635 SV->BaseVector = aliaseeRoot;
636 SV->StartElementOffset += Idx;
637 rootSV->Aliasers.push_back(SV);
638 }
639
640 // Clear aliaser's Aliasers as it is no longer a root
641 aliaserSV->Aliasers.clear();
642 }
643
644 // Finally, add aliaserSV into root's Aliaser vector and
645 // update aliaser to its root map if aliaser's not isolated.
646 rootSV->Aliasers.push_back(aliaserSV);
647
648 // aliaser
649 Value* rv0 = m_DeSSA ? m_DeSSA->getRootValue(Aliaser) : nullptr;
650 if (rv0) {
651 m_root2AliasMap[rv0] = Aliaser;
652 }
653 // aliasee, note that re-defining it does not matter.
654 Value* rv1 = m_DeSSA ? m_DeSSA->getRootValue(Aliasee) : nullptr;
655 if (rv1) {
656 m_root2AliasMap[rv1] = Aliasee;
657 }
658 }
659
getOrCreateSubVecDesc(Value * V)660 SSubVecDesc* VariableReuseAnalysis::getOrCreateSubVecDesc(Value* V)
661 {
662 if (m_aliasMap.count(V) == 0) {
663 SSubVecDesc* SV = new(Allocator) SSubVecDesc(V);
664 m_aliasMap.insert(std::make_pair(V, SV));
665 }
666 return m_aliasMap[V];
667 }
668
669 // Return true if V itself is sub-vector aliased.
670 // Note that other values in V's DeSSA CC are not checked.
isAliased(Value * V) const671 bool VariableReuseAnalysis::isAliased(Value* V) const
672 {
673 Value* V_nv = m_DeSSA ? m_DeSSA->getNodeValue(V) : V;
674 return m_aliasMap.count(V_nv) > 0;
675 }
676
677 // DCC: DeSSA Congruent Class
678 // If any value in V's DCC is aliaser, return true.
hasAnyOfDCCAsAliaser(Value * V) const679 bool VariableReuseAnalysis::hasAnyOfDCCAsAliaser(Value* V) const
680 {
681 auto II = m_aliasMap.find(V);
682 if (II != m_aliasMap.end()) {
683 // If it is in the map, all of its DCC should
684 // be either aliaser or aliasee, never have the
685 // mix of aliaser and aliasee (implementation
686 // must guarantee this).
687 SSubVecDesc* SV = II->second;
688 return SV->Aliaser != SV->BaseVector;
689 }
690
691 // If V is not in the map, check others in its DCC
692 Value* rv = m_DeSSA ? m_DeSSA->getRootValue(V) : nullptr;
693 if (rv) {
694 auto II = m_root2AliasMap.find(rv);
695 if (II != m_root2AliasMap.end()) {
696 Value* aV = II->second;
697 auto MI = m_aliasMap.find(aV);
698 IGC_ASSERT(MI != m_aliasMap.end());
699 SSubVecDesc* SV = MI->second;
700 return (SV->Aliaser != SV->BaseVector);
701 }
702 }
703 return false;
704 }
705
706 // DCC: DeSSA Congruent Class
707 // If there is another value in V's DCC that is aliasee, return true.
hasAnotherInDCCAsAliasee(Value * V) const708 bool VariableReuseAnalysis::hasAnotherInDCCAsAliasee(Value* V) const
709 {
710 // Check if any value of its dessa CC has been aliased already.
711 Value* rv = m_DeSSA ? m_DeSSA->getRootValue(V) : nullptr;
712 if (rv) {
713 auto II = m_root2AliasMap.find(rv);
714 if (II != m_root2AliasMap.end()) {
715 Value* aV = II->second;
716 auto MI = m_aliasMap.find(aV);
717 IGC_ASSERT(MI != m_aliasMap.end());
718 SSubVecDesc* SV = MI->second;
719 const Value* tV = SV->Aliaser;
720 return (tV == SV->BaseVector && tV != V);
721 }
722 }
723 return false;
724 }
725
726 // A chain of IEIs is used to define a vector. If all elements of this vector
727 // are inserted via this chain IEI that has a constant index, populate AllIEIs.
728 // input: FirstIEI (first IEI, usually with index = 0)
729 // output: AllIEIs (collect all values used to initialize the vector)
730 // Return value:
731 // true : if all elements are inserted with IEI of constant index
732 // false: otherwise.
getAllInsEltsIfAvailable(InsertElementInst * FirstIEI,VecInsEltInfoTy & AllIEIs)733 bool VariableReuseAnalysis::getAllInsEltsIfAvailable(
734 InsertElementInst* FirstIEI, VecInsEltInfoTy& AllIEIs)
735 {
736 int nelts = getNumElts(FirstIEI);
737
738 // Sanity
739 if (nelts < 2)
740 return false;
741
742 // AllIEIs are fixed to the number of elements of the vector.
743 AllIEIs.resize(nelts);
744
745 InsertElementInst* LastIEI = FirstIEI;
746 InsertElementInst* I = FirstIEI;
747 Value* dessaRoot = m_DeSSA->getRootValue(FirstIEI);
748 while (I)
749 {
750 LastIEI = I;
751
752 // For insertElement, it should be in the same dessa CC
753 // already, as dessa special-handles it. Make sure they
754 // are indeed in the same CC, otherwise, skip.
755 if (hasBeenPayloadCoalesced(I) ||
756 m_DeSSA->getRootValue(I) != dessaRoot)
757 return false;
758
759 Value* V = nullptr;
760 Value* E = nullptr;
761 int IEI_ix = 0, V_ix = 0;
762 if (!getElementValue(I, IEI_ix, E, V, V_ix)) {
763 return false;
764 }
765
766 IGC_ASSERT_MESSAGE(IEI_ix < nelts, "ICE: IEI's index out of bound!");
767 SVecInsEltInfo& InsEltInfo = AllIEIs[IEI_ix];
768 if (InsEltInfo.IEI) {
769 // One element is inserted more than once, skip.
770 return false;
771 }
772 InsEltInfo.IEI = I;
773 InsEltInfo.Elt = E;
774 InsEltInfo.FromVec = V;
775 InsEltInfo.FromVec_eltIx = V_ix;
776 if (E) {
777 InsEltInfo.EEI = dyn_cast<ExtractElementInst>(E);
778 }
779
780 if (!I->hasOneUse()) {
781 break;
782 }
783
784 I = dyn_cast<InsertElementInst>(I->user_back());
785 }
786
787 // Special cases.
788 if (AllIEIs.empty() || LastIEI->use_empty()) {
789 return false;
790 }
791
792 // Make sure all elements are present
793 for (int i = 0; i < nelts; ++i) {
794 if (AllIEIs[i].IEI == nullptr)
795 return false;
796 }
797 return true;
798 }
799
traceAliasValue(Value * V)800 Value* VariableReuseAnalysis::traceAliasValue(Value* V)
801 {
802 if (CastInst * CastI = dyn_cast_or_null<CastInst>(V))
803 {
804 Value* Src = CastI->getOperand(0);
805 if (isa<Constant>(Src))
806 return CastI;
807
808 Value* NV0 = m_DeSSA->getNodeValue(CastI);
809 Value* NV1 = m_DeSSA->getNodeValue(Src);
810 if (NV0 == NV1)
811 {
812 // Meaning they are aliased already by dessa
813 return traceAliasValue(Src);
814 }
815 }
816 return V;
817 }
818
819 //
820 // Returns true if the following is true
821 // IEI = insertElement <vectorType> Vec, A, <constant IEI_ix>
822 // Return false, otherwise.
823 //
824 // When the above condition is true, S, V, V_ix are used for the
825 // following cases:
826 // 1. sub-vector (V, V_ix), S = A
827 // A = extractElement <vectorType> V, <constant V_ix>
828 // A is the element denoted by (V, V_ix)
829 // 2. non-sub-vector: V=nullptr, V_ix=0, S = A
830 // A is a candidate inserted and can be alias to Vec
831 //
832 // Input: IEI
833 // Output: IEI_ix, S, V, V_ix
getElementValue(InsertElementInst * IEI,int & IEI_ix,Value * & S,Value * & V,int & V_ix)834 bool VariableReuseAnalysis::getElementValue(
835 InsertElementInst* IEI, int& IEI_ix, Value*& S, Value*& V, int& V_ix)
836 {
837 // Return value: S or (V, V_ix)
838 S = nullptr;
839 V = nullptr;
840 V_ix = 0;
841 IEI_ix = 0;
842
843 // Check if I has constant index, skip if not.
844 ConstantInt* CI = dyn_cast<ConstantInt>(IEI->getOperand(2));
845 if (!CI) {
846 return false;
847 }
848
849 // From now on, this func must return true.
850 IEI_ix = (int)CI->getZExtValue();
851
852 // Check that the elements inserted are from extractElement.
853 // Also, if no ExtractELement, get IEI's element value as S.
854 Value* elem0 = IEI->getOperand(1);
855 if (hasBeenPayloadCoalesced(elem0) ||
856 isa<Constant>(elem0) ||
857 isOrCoalescedWithArg(elem0))
858 {
859 // If elem0 has been payload-coalesced, is constant,
860 // or it has been aliased to an argument, it cannot
861 // be aliased to IEI.
862 return false;
863 }
864
865 Value* elem = traceAliasValue(elem0);
866 ExtractElementInst* EEI = dyn_cast<ExtractElementInst>(elem);
867 S = elem;
868 if (!EEI) {
869 // case 2. No sub-vector alias, but it is okay
870 // to use non-sub-vector aliasing.
871 return true;
872 }
873 ConstantInt* CI1 = dyn_cast<ConstantInt>(EEI->getIndexOperand());
874 if (!CI1 ||
875 !m_DeSSA->isSingleValued(elem))
876 {
877 // case 2
878 // 1. EEI's index isn't constant, or
879 // 2. EEI is not single-valued (implementation)
880 // No sub-vector aliasing, but non-sub-vector aliasing
881 // is okay.
882 return true;
883 }
884
885 V = EEI->getVectorOperand();
886 if (isa<Constant>(V) ||
887 hasBeenPayloadCoalesced(V))
888 {
889 // case 2 again, just non-sub-vector aliasing
890 V = nullptr;
891 return true;
892 }
893
894 // case 1.
895 V_ix = (int)CI1->getZExtValue();
896 return true;
897 }
898
InsertElementAliasing(Function * F)899 void VariableReuseAnalysis::InsertElementAliasing(Function* F)
900 {
901 // Do it if VATemp >= 2 and for ocl only for now
902 if (m_pCtx->getVectorCoalescingControl() < 2) {
903 return;
904 }
905
906 for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II)
907 {
908 Instruction* I = &*II;
909 if (!m_PatternMatch->NeedInstruction(*I))
910 continue;
911
912 InsertElementInst* IEI = dyn_cast<InsertElementInst>(I);
913 if (!IEI)
914 continue;
915
916 // Two cases for sub-vector aliasing:
917 // 1. extractFrom: sub-vector is created from a base vector.
918 // For example:
919 // given base: int8 b; a sub-vector s (int4) can be:
920 // s = (int4)(b.s4, b.s5, b.s6, b.s7)
921 // In this case, 's' becomes a part of 'b'. In LLVM IR,
922 // there are a chain of extElt and insElt instructions for
923 // doing so.
924 // 2. insertTo: sub-vector is used to create a base vector.
925 // For example:
926 // given sub-vector int4 s0, s1; int8 vector b is created like:
927 // b = (int8) (s0, s1)
928 // In this case, both s0 and s1 become part of b.
929
930 // Start insertElement pattern from the first InsertElement (one
931 // with UndefValue. Note that that this's also the dessa insElt root.
932 if (!isa<UndefValue>(IEI->getOperand(0)))
933 continue;
934
935 // First, collect all insertElementInst and extractElementInst.
936 VecInsEltInfoTy AllIEIs;
937 if (!getAllInsEltsIfAvailable(IEI, AllIEIs)) {
938 continue;
939 }
940
941 // Check if this is an extractFrom pattern.
942 // If so, add alias and return true.
943 if (processExtractFrom(AllIEIs)) {
944 continue;
945 }
946
947 // Check if this is an insertTo pattern.
948 // If so, add alias and return true.
949 if (processInsertTo(AllIEIs)) {
950 continue;
951 }
952 }
953 }
954
955 // Check if the vector value of InsertElement is
956 // a sub-vector of another one, return true if so.
processExtractFrom(VecInsEltInfoTy & AllIEIs)957 bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy& AllIEIs)
958 {
959 int nelts = (int)AllIEIs.size();
960 Value* BaseVec = AllIEIs[0].FromVec;
961 int BaseStartIx = AllIEIs[0].FromVec_eltIx;
962 if (!BaseVec) {
963 // Base is not a vector, so IEI cannot be
964 // a subvector of another vector!
965 return false;
966 }
967 int base_nelts = getNumElts(BaseVec);
968
969 // If Base's size is smaller than IEI's, IEI cannot be sub-vector
970 if (base_nelts < nelts) {
971 return false;
972 }
973
974 for (int i = 1; i < nelts; ++i)
975 {
976 if (AllIEIs[i].FromVec != BaseVec ||
977 AllIEIs[i].FromVec_eltIx != (BaseStartIx + i))
978 return false;
979 }
980
981 // Interference checking
982 Value* Sub = AllIEIs[0].IEI;
983 Value* Sub_nv = m_DeSSA->getNodeValue(Sub);
984 Value* Base_nv = m_DeSSA->getNodeValue(BaseVec);
985
986 // If Sub is an arg of function, skip (Base is okay to be an arg)
987 if (isOrCoalescedWithArg(Sub)) {
988 return false;
989 }
990
991 // Implementation restriction
992 if (hasAnyOfDCCAsAliaser(Sub_nv) ||
993 hasAnotherInDCCAsAliasee(Base_nv)) {
994 return false;
995 }
996
997 if (aliasInterfere(Sub_nv, Base_nv, BaseStartIx)) {
998 return false;
999 }
1000
1001 // add alias
1002 addVecAlias(Sub_nv, Base_nv, BaseStartIx);
1003
1004 // Make sure noop insts are in the map.
1005 for (int i = 0, sz = (int)AllIEIs.size(); i < sz; ++i)
1006 {
1007 InsertElementInst* IEI = AllIEIs[i].IEI;
1008 if (m_DeSSA->isNoopAliaser(IEI))
1009 continue;
1010 m_HasBecomeNoopInsts[IEI] = 1;
1011
1012 ExtractElementInst* EEI = AllIEIs[i].EEI;
1013 IGC_ASSERT(EEI);
1014 if (m_DeSSA->isNoopAliaser(EEI))
1015 continue;
1016 m_HasBecomeNoopInsts[EEI] = 1;
1017 }
1018 return true;
1019 }
1020
1021 // Check if IEI is a base vector created by other sub-vectors
1022 // or scalars. If it is, create alias and return true.
processInsertTo(VecInsEltInfoTy & AllIEIs)1023 bool VariableReuseAnalysis::processInsertTo(VecInsEltInfoTy& AllIEIs)
1024 {
1025 int nelts = (int)AllIEIs.size();
1026 Value* Sub = AllIEIs[0].FromVec;
1027 int SubStartIx = 0;
1028 SmallVector<std::pair<Value*, int>, 8> SubVecs;
1029
1030 auto IsInSubVecs = [&](Value* Val) -> bool {
1031 for (int j = 0, sz = (int)SubVecs.size(); j < sz; ++j) {
1032 if (SubVecs[j].first == Val)
1033 return true;
1034 }
1035 return false;
1036 };
1037
1038 // Check alias interference
1039 InsertElementInst* FirstIEI = AllIEIs[0].IEI;
1040 Value* Base_nv = m_DeSSA->getNodeValue(FirstIEI);
1041 // Early check to see if Base_nv could be used as Base.
1042 if (hasAnotherInDCCAsAliasee(Base_nv)) {
1043 return false;
1044 }
1045
1046 bool isSubCandidate = true;
1047 for (int i = 0; i < nelts; ++i)
1048 {
1049 // On entry to the iteration, AllIEIs[i].FromVec must be the
1050 // same as Sub. If the next Sub is different from the current
1051 // one, the current element (AllIEIs[i]) is the last one element
1052 // for the Sub.
1053 //
1054 // Note
1055 // case 1: if Elt == nullptr, no aliasing
1056 // case 2: if Elt != nullptr && Fromvec == nullptr, Elt aliasing
1057 // case 3: if Elt != nullptr && FromVec != nullptr,
1058 // (FromVec, FromVec_eltIx) sub-vector aliasing
1059 //
1060 Value* Elt = AllIEIs[i].Elt;
1061 if (!Elt ||
1062 (Sub && (i - SubStartIx) != AllIEIs[i].FromVec_eltIx)) {
1063 isSubCandidate = false;
1064 continue;
1065 }
1066
1067 // If Sub == nullptr or NextSub != Sub, this is the last element
1068 // of the current Sub (it is a scalar in case of sub == nullpr).
1069 Value* NextSub = (i < (nelts - 1)) ? AllIEIs[i + 1].FromVec : nullptr;
1070 if (!Sub || Sub != NextSub)
1071 {
1072 // End of the current Sub
1073 if (isSubCandidate)
1074 {
1075 Value* aliaser = Sub ? Sub : Elt;
1076 int sub_nelts = getNumElts(aliaser);
1077 // If Sub's size is not smaller than IEI's, or not all sub's
1078 // elements are used, skip.
1079 if (sub_nelts < nelts && (i - SubStartIx) == (sub_nelts - 1))
1080 {
1081 SubVecs.push_back(std::make_pair(aliaser, SubStartIx));
1082 }
1083 }
1084
1085 // NextSub should be the new sub-vector.
1086 // Make sure it is not used yet.
1087 // Note this works for special case in which NextSub = nullptr.
1088 isSubCandidate = true;
1089 Value* NextElt = (i < (nelts - 1)) ? AllIEIs[i + 1].Elt : nullptr;
1090 if (!NextElt ||
1091 (NextSub && IsInSubVecs(NextSub)) ||
1092 (!NextSub && IsInSubVecs(NextElt))) {
1093 isSubCandidate = false;
1094 }
1095 Sub = NextSub;
1096 SubStartIx = i + 1;
1097 }
1098 }
1099
1100 // Check alias interference
1101 bool hasAlias = false;
1102 for (int i = 0, sz = (int)SubVecs.size(); i < sz; ++i)
1103 {
1104 std::pair<Value*, int>& aPair = SubVecs[i];
1105 Value* V = aPair.first;
1106
1107 // If V is an arg, skip it
1108 if (isOrCoalescedWithArg(V)) {
1109 continue;
1110 }
1111
1112 int V_ix = aPair.second;
1113 Value* V_nv = m_DeSSA->getNodeValue(V);
1114 if (hasAnyOfDCCAsAliaser(V_nv)) {
1115 continue;
1116 }
1117 if (aliasInterfere(V_nv, Base_nv, V_ix)) {
1118 continue;
1119 }
1120 addVecAlias(V_nv, Base_nv, V_ix);
1121
1122 int V_sz = getNumElts(V);
1123 if (V_sz > 1)
1124 {
1125 // set up Noop inst
1126 // Make sure noop insts are in the map.
1127 for (int j = V_ix, sz = V_ix + V_sz; j < sz; ++j)
1128 {
1129 InsertElementInst* IEI = AllIEIs[j].IEI;
1130 if (m_DeSSA->isNoopAliaser(IEI))
1131 continue;
1132 m_HasBecomeNoopInsts[IEI] = 1;
1133
1134 ExtractElementInst* EEI = AllIEIs[j].EEI;
1135 IGC_ASSERT(EEI);
1136 // Sub-vector
1137 if (m_DeSSA->isNoopAliaser(EEI))
1138 continue;
1139 m_HasBecomeNoopInsts[EEI] = 1;
1140
1141 Value* EEI_nv = m_DeSSA->getNodeValue(EEI);
1142 addVecAlias(EEI_nv, Base_nv, j);
1143 }
1144 }
1145 hasAlias = true;
1146 }
1147 return hasAlias;
1148 }
1149
1150 // Return all aliased values of VecAliasee, given the alias:
1151 // Aliaser->(VecAliasee, Idx)
getAllAliasVals(ValueVectorTy & AliasVals,Value * Aliaser,Value * VecAliasee,int Idx)1152 void VariableReuseAnalysis::getAllAliasVals(
1153 ValueVectorTy& AliasVals,
1154 Value* Aliaser,
1155 Value* VecAliasee,
1156 int Idx)
1157 {
1158 AliasVals.clear();
1159 auto II = m_aliasMap.find(VecAliasee);
1160 AliasVals.push_back(VecAliasee);
1161 if (II != m_aliasMap.end())
1162 {
1163 SSubVecDesc* aliaseeSV = II->second;
1164 int nelts = getNumElts(Aliaser);
1165 int Idx_end = Idx + nelts - 1;
1166 for (int i = 0, sz = (int)(aliaseeSV->Aliasers.size()); i < sz; ++i)
1167 {
1168 SSubVecDesc* SV = aliaseeSV->Aliasers[i];
1169 int start = SV->StartElementOffset;
1170 int end = start + SV->NumElts - 1;
1171 if ((start > Idx_end) || (end < Idx))
1172 continue;
1173 AliasVals.push_back(SV->Aliaser);
1174 }
1175 }
1176 }
1177
1178
1179 // Check if two potentially-aliased values (must be dessa node
1180 // values) interfere each other.
aliasInterfere(Value * Sub,Value * Base,int BaseIdx)1181 bool VariableReuseAnalysis::aliasInterfere(Value* Sub, Value* Base, int BaseIdx)
1182 {
1183 ValueVectorTy Vec0, Vec1;
1184 Vec0.push_back(Sub);
1185 getAllAliasVals(Vec1, Sub, Base, BaseIdx);
1186 auto II0 = m_aliasMap.find(Sub);
1187 if (II0 != m_aliasMap.end()) {
1188 SSubVecDesc* SV0 = II0->second;
1189 for (int i = 0, sz = (int)SV0->Aliasers.size(); i < sz; ++i) {
1190 SSubVecDesc* tSV = SV0->Aliasers[i];
1191 Vec0.push_back(tSV->Aliaser);
1192 }
1193 }
1194
1195 for (int i0 = 0, sz0 = (int)Vec0.size(); i0 < sz0; ++i0)
1196 {
1197 Value* V0 = Vec0[i0];
1198 for (int i1 = 0, sz1 = (int)Vec1.size(); i1 < sz1; ++i1) {
1199 Value* V1 = Vec1[i1];
1200 if (m_DeSSA->aliasInterfere(V0, V1))
1201 return true;
1202 }
1203 }
1204 return false;
1205 }
1206