1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Compiler/Optimizer/Scalarizer.h"
10 #include "Compiler/IGCPassSupport.h"
11 #include "GenISAIntrinsics/GenIntrinsicInst.h"
12 #include "Compiler/CISACodeGen/helper.h"
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvmWrapper/IR/DerivedTypes.h"
15 #include "llvmWrapper/IR/Instructions.h"
16 #include "llvmWrapper/Support/Alignment.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/raw_os_ostream.h"
22 #include "common/LLVMWarningsPop.hpp"
23 #include "common/igc_regkeys.hpp"
24 #include "common/Types.hpp"
25 #include <iostream>
26 #include "Probe/Assertion.h"
27 
28 using namespace llvm;
29 using namespace IGC;
30 
31 #define V_PRINT(a,b) \
32     { \
33         if (IGC_IS_FLAG_ENABLED(EnableScalarizerDebugLog)) \
34         { \
35             outs() << b; \
36         } \
37     }
38 
39 namespace VectorizerUtils {
SetDebugLocBy(Instruction * I,const Instruction * setBy)40     static void SetDebugLocBy(Instruction* I, const Instruction* setBy) {
41         if (!(I->getDebugLoc())) {
42             I->setDebugLoc(setBy->getDebugLoc());
43         }
44     }
45 }
46 
47 // Register pass to igc-opt
48 #define PASS_FLAG "igc-scalarize"
49 #define PASS_DESCRIPTION "Scalarize functions"
50 #define PASS_CFG_ONLY false
51 #define PASS_ANALYSIS false
52 IGC_INITIALIZE_PASS_BEGIN(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
53 IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
54 
55 char ScalarizeFunction::ID = 0;
56 
ScalarizeFunction(bool selectiveScalarization)57 ScalarizeFunction::ScalarizeFunction(bool selectiveScalarization) : FunctionPass(ID)
58 {
59     initializeScalarizeFunctionPass(*PassRegistry::getPassRegistry());
60 
61     for (int i = 0; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0;
62     m_SelectiveScalarization = selectiveScalarization;
63 
64     // Initialize SCM buffers and allocation
65     m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
66     m_SCMArrays.push_back(m_SCMAllocationArray);
67     m_SCMArrayLocation = 0;
68 
69     V_PRINT(scalarizer, "ScalarizeFunction constructor\n");
70 }
71 
~ScalarizeFunction()72 ScalarizeFunction::~ScalarizeFunction()
73 {
74     releaseAllSCMEntries();
75     delete[] m_SCMAllocationArray;
76     destroyDummyFunc();
77     V_PRINT(scalarizer, "ScalarizeFunction destructor\n");
78 }
79 
runOnFunction(Function & F)80 bool ScalarizeFunction::runOnFunction(Function& F)
81 {
82 
83     if (!IGC::ForceAlwaysInline())
84     {
85         if (F.isDeclaration()) return false;
86     }
87     else
88     {
89         // Scalarization is done only on functions which return void (kernels)
90         if (!F.getReturnType()->isVoidTy())
91         {
92             return false;
93         }
94     }
95 
96     m_currFunc = &F;
97     m_moduleContext = &(m_currFunc->getContext());
98 
99     V_PRINT(scalarizer, "\nStart scalarizing function: " << m_currFunc->getName() << "\n");
100 
101     // obtain TagetData of the module
102     m_pDL = &F.getParent()->getDataLayout();
103 
104     // Prepare data structures for scalarizing a new function
105     m_usedVectors.clear();
106     m_removedInsts.clear();
107     m_SCM.clear();
108     releaseAllSCMEntries();
109     m_DRL.clear();
110     m_Excludes.clear();
111 
112     // collecting instructions that we want to avoid scalarization
113     if (m_SelectiveScalarization)
114     {
115         buildExclusiveSet();
116     }
117 
118     // Scalarization. Iterate over all the instructions
119     // Always hold the iterator at the instruction following the one being scalarized (so the
120     // iterator will "skip" any instructions that are going to be added in the scalarization work)
121     inst_iterator sI = inst_begin(m_currFunc);
122     inst_iterator sE = inst_end(m_currFunc);
123     while (sI != sE)
124     {
125         Instruction* currInst = &*sI;
126         // Move iterator to next instruction BEFORE scalarizing current instruction
127         ++sI;
128         if (m_Excludes.count(currInst))
129         {
130             recoverNonScalarizableInst(currInst);
131         }
132         else
133         {
134             dispatchInstructionToScalarize(currInst);
135         }
136     }
137 
138     resolveVectorValues();
139 
140     // Resolved DRL entries
141     resolveDeferredInstructions();
142 
143     // Iterate over removed insts and delete them
144     SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator ri = m_removedInsts.begin();
145     SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator re = m_removedInsts.end();
146     SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator index = ri;
147 
148     for (; index != re; ++index)
149     {
150         // get rid of old users
151         if (Value * val = dyn_cast<Value>(*index))
152         {
153             UndefValue* undefVal = UndefValue::get((*index)->getType());
154             (val)->replaceAllUsesWith(undefVal);
155         }
156         IGC_ASSERT_MESSAGE((*index)->use_empty(), "Unable to remove used instruction");
157         (*index)->eraseFromParent();
158     }
159 
160     V_PRINT(scalarizer, "\nCompleted scalarizing function: " << m_currFunc->getName() << "\n");
161     return true;
162 }
163 
164 /// <summary>
165 /// @brief We want to avoid scalarize vector-phi node if the vector is used
166 /// as a whole entity somewhere in the program. This function tries to find
167 /// this kind of definition web that involves phi-node, insert-element etc,
168 /// then add them into the exclusion-set (excluded from scalarization).
169 /// </summary>
buildExclusiveSet()170 void ScalarizeFunction::buildExclusiveSet()
171 {
172     inst_iterator sI = inst_begin(m_currFunc);
173     inst_iterator sE = inst_end(m_currFunc);
174     while (sI != sE)
175     {
176         Instruction* currInst = &*sI;
177         ++sI;
178         // find the seed for the workset
179         std::vector<llvm::Value*> workset;
180         if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(currInst))
181         {
182             unsigned numOperands = GII->getNumArgOperands();
183             for (unsigned i = 0; i < numOperands; i++)
184             {
185                 Value* operand = GII->getArgOperand(i);
186                 if (isa<VectorType>(operand->getType()))
187                 {
188                     workset.push_back(operand);
189                 }
190             }
191         }
192         else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
193         {
194             Value* scalarIndexVal = IEI->getOperand(2);
195             // If the index is not a constant - we cannot statically remove this inst
196             if (!isa<ConstantInt>(scalarIndexVal)) {
197                 workset.push_back(IEI);
198             }
199         }
200         else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
201         {
202             Value* scalarIndexVal = EEI->getOperand(1);
203             // If the index is not a constant - we cannot statically remove this inst
204             if (!isa<ConstantInt>(scalarIndexVal)) {
205                 workset.push_back(EEI->getOperand(0));
206             }
207         }
208         // try to find a phi-web from the seed
209         bool HasPHI = false;
210         std::set<llvm::Value*> defweb;
211         while (!workset.empty())
212         {
213             auto Def = workset.back();
214             workset.pop_back();
215             if (m_Excludes.count(Def) || defweb.count(Def))
216             {
217                 continue;
218             }
219             if (auto IEI = dyn_cast<InsertElementInst>(Def))
220             {
221                 defweb.insert(IEI);
222                 if (!defweb.count(IEI->getOperand(0)) &&
223                     (isa<PHINode>(IEI->getOperand(0)) ||
224                         isa<ShuffleVectorInst>(IEI->getOperand(0)) ||
225                         isa<InsertElementInst>(IEI->getOperand(0))))
226                 {
227                     workset.push_back(IEI->getOperand(0));
228                 }
229             }
230             else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
231             {
232                 defweb.insert(SVI);
233                 if (!defweb.count(SVI->getOperand(0)) &&
234                     (isa<PHINode>(SVI->getOperand(0)) ||
235                         isa<ShuffleVectorInst>(SVI->getOperand(0)) ||
236                         isa<InsertElementInst>(SVI->getOperand(0))))
237                 {
238                     workset.push_back(SVI->getOperand(0));
239                 }
240                 if (!defweb.count(SVI->getOperand(1)) &&
241                     (isa<PHINode>(SVI->getOperand(1)) ||
242                         isa<ShuffleVectorInst>(SVI->getOperand(1)) ||
243                         isa<InsertElementInst>(SVI->getOperand(1))))
244                 {
245                     workset.push_back(SVI->getOperand(1));
246                 }
247             }
248             else if (auto PHI = dyn_cast<PHINode>(Def))
249             {
250                 defweb.insert(PHI);
251                 HasPHI = true;  // !this def-web is qualified!
252                 for (int i = 0, n = PHI->getNumOperands(); i < n; ++i)
253                     if (!defweb.count(PHI->getOperand(i)) &&
254                         (isa<PHINode>(PHI->getOperand(i)) ||
255                             isa<ShuffleVectorInst>(PHI->getOperand(i)) ||
256                             isa<InsertElementInst>(PHI->getOperand(i))))
257                     {
258                         workset.push_back(PHI->getOperand(i));
259                     }
260             }
261             else
262             {
263                 continue;
264             }
265             // check use
266             for (auto U : Def->users())
267             {
268                 if (!defweb.count(U) &&
269                     (isa<PHINode>(U) ||
270                         isa<ShuffleVectorInst>(U) ||
271                         isa<InsertElementInst>(U)))
272                 {
273                     workset.push_back(U);
274                 }
275             }
276         }
277         // if we find a qualified web with PHINode, add those instructions
278         // into the exclusion set
279         if (HasPHI)
280         {
281             m_Excludes.merge(defweb);
282         }
283     }
284 }
285 
dispatchInstructionToScalarize(Instruction * I)286 void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
287 {
288     V_PRINT(scalarizer, "\tScalarizing Instruction: " << *I << "\n");
289 
290     if (m_removedInsts.count(I))
291     {
292         V_PRINT(scalarizer, "\tInstruction is already marked for removal. Being ignored..\n");
293         return;
294     }
295 
296     switch (I->getOpcode())
297     {
298     case Instruction::Add:
299     case Instruction::Sub:
300     case Instruction::Mul:
301     case Instruction::FAdd:
302     case Instruction::FSub:
303     case Instruction::FMul:
304     case Instruction::UDiv:
305     case Instruction::SDiv:
306     case Instruction::FDiv:
307     case Instruction::URem:
308     case Instruction::SRem:
309     case Instruction::FRem:
310     case Instruction::Shl:
311     case Instruction::LShr:
312     case Instruction::AShr:
313     case Instruction::And:
314     case Instruction::Or:
315     case Instruction::Xor:
316         scalarizeInstruction(dyn_cast<BinaryOperator>(I));
317         break;
318     case Instruction::ICmp:
319     case Instruction::FCmp:
320         scalarizeInstruction(dyn_cast<CmpInst>(I));
321         break;
322     case Instruction::Trunc:
323     case Instruction::ZExt:
324     case Instruction::SExt:
325     case Instruction::FPToUI:
326     case Instruction::FPToSI:
327     case Instruction::UIToFP:
328     case Instruction::SIToFP:
329     case Instruction::FPTrunc:
330     case Instruction::FPExt:
331     case Instruction::PtrToInt:
332     case Instruction::IntToPtr:
333     case Instruction::BitCast:
334         scalarizeInstruction(dyn_cast<CastInst>(I));
335         break;
336     case Instruction::PHI:
337         scalarizeInstruction(dyn_cast<PHINode>(I));
338         break;
339     case Instruction::Select:
340         scalarizeInstruction(dyn_cast<SelectInst>(I));
341         break;
342     case Instruction::ExtractElement:
343         scalarizeInstruction(dyn_cast<ExtractElementInst>(I));
344         break;
345     case Instruction::InsertElement:
346         scalarizeInstruction(dyn_cast<InsertElementInst>(I));
347         break;
348     case Instruction::ShuffleVector:
349         scalarizeInstruction(dyn_cast<ShuffleVectorInst>(I));
350         break;
351         //case Instruction::Call :
352         //  scalarizeInstruction(dyn_cast<CallInst>(I));
353         //  break;
354     case Instruction::Alloca:
355         scalarizeInstruction(dyn_cast<AllocaInst>(I));
356         break;
357     case Instruction::GetElementPtr:
358         scalarizeInstruction(dyn_cast<GetElementPtrInst>(I));
359         break;
360         // The remaining instructions are not supported for scalarization. Keep "as is"
361     default:
362         recoverNonScalarizableInst(I);
363         break;
364     }
365 }
366 
recoverNonScalarizableInst(Instruction * Inst)367 void ScalarizeFunction::recoverNonScalarizableInst(Instruction* Inst)
368 {
369     V_PRINT(scalarizer, "\t\tInstruction is not scalarizable.\n");
370 
371     // any vector value should have an SCM entry - even an empty one
372     if (isa<VectorType>(Inst->getType())) getSCMEntry(Inst);
373 
374     // Iterate over all arguments. Check that they all exist (or rebuilt)
375     if (CallInst * CI = dyn_cast<CallInst>(Inst))
376     {
377         unsigned numOperands = CI->getNumArgOperands();
378         for (unsigned i = 0; i < numOperands; i++)
379         {
380             Value* operand = CI->getArgOperand(i);
381             if (isa<VectorType>(operand->getType()))
382             {
383                 // Recover value if needed (only needed for vector values)
384                 obtainVectorValueWhichMightBeScalarized(operand);
385             }
386         }
387     }
388     else
389     {
390         unsigned numOperands = Inst->getNumOperands();
391         for (unsigned i = 0; i < numOperands; i++)
392         {
393             Value* operand = Inst->getOperand(i);
394             if (isa<VectorType>(operand->getType()))
395             {
396                 // Recover value if needed (only needed for vector values)
397                 obtainVectorValueWhichMightBeScalarized(operand);
398             }
399         }
400     }
401 }
402 
scalarizeInstruction(BinaryOperator * BI)403 void ScalarizeFunction::scalarizeInstruction(BinaryOperator* BI)
404 {
405     V_PRINT(scalarizer, "\t\tBinary instruction\n");
406     IGC_ASSERT_MESSAGE(BI, "instruction type dynamic cast failed");
407     IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(BI->getType());
408     // Only need handling for vector binary ops
409     if (!instType) return;
410 
411     // Prepare empty SCM entry for the instruction
412     SCMEntry* newEntry = getSCMEntry(BI);
413 
414     // Get additional info from instruction
415     unsigned numElements = int_cast<unsigned>(instType->getNumElements());
416 
417     // Obtain scalarized arguments
418     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
419     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
420     bool op0IsConst, op1IsConst;
421 
422     obtainScalarizedValues(operand0, &op0IsConst, BI->getOperand(0), BI);
423     obtainScalarizedValues(operand1, &op1IsConst, BI->getOperand(1), BI);
424 
425     // If both arguments are constants, don't bother Scalarizing inst
426     if (op0IsConst && op1IsConst) return;
427 
428     // Generate new (scalar) instructions
429     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
430     newScalarizedInsts.resize(numElements);
431     for (unsigned dup = 0; dup < numElements; dup++)
432     {
433         Value* Val = BinaryOperator::Create(
434             BI->getOpcode(),
435             operand0[dup],
436             operand1[dup],
437             BI->getName(),
438             BI
439         );
440         if (BinaryOperator * BO = dyn_cast<BinaryOperator>(Val)) {
441             // Copy overflow flags if any.
442             if (isa<OverflowingBinaryOperator>(BO)) {
443                 BO->setHasNoSignedWrap(BI->hasNoSignedWrap());
444                 BO->setHasNoUnsignedWrap(BI->hasNoUnsignedWrap());
445             }
446             // Copy exact flag if any.
447             if (isa<PossiblyExactOperator>(BO))
448                 BO->setIsExact(BI->isExact());
449             // Copy fast math flags if any.
450             if (isa<FPMathOperator>(BO))
451                 BO->setFastMathFlags(BI->getFastMathFlags());
452         }
453         newScalarizedInsts[dup] = Val;
454     }
455 
456     // Add new value/s to SCM
457     updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), BI, true);
458 
459     // Remove original instruction
460     m_removedInsts.insert(BI);
461 }
462 
scalarizeInstruction(CmpInst * CI)463 void ScalarizeFunction::scalarizeInstruction(CmpInst* CI)
464 {
465     V_PRINT(scalarizer, "\t\tCompare instruction\n");
466     IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
467     IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType());
468     // Only need handling for vector compares
469     if (!instType) return;
470 
471     // Prepare empty SCM entry for the instruction
472     SCMEntry* newEntry = getSCMEntry(CI);
473 
474     // Get additional info from instruction
475     unsigned numElements = int_cast<unsigned>(instType->getNumElements());
476 
477     // Obtain scalarized arguments
478 
479     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
480     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
481     bool op0IsConst, op1IsConst;
482 
483     obtainScalarizedValues(operand0, &op0IsConst, CI->getOperand(0), CI);
484     obtainScalarizedValues(operand1, &op1IsConst, CI->getOperand(1), CI);
485 
486     // If both arguments are constants, don't bother Scalarizing inst
487     if (op0IsConst && op1IsConst) return;
488 
489     // Generate new (scalar) instructions
490     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
491     newScalarizedInsts.resize(numElements);
492     for (unsigned dup = 0; dup < numElements; dup++)
493     {
494         newScalarizedInsts[dup] = CmpInst::Create(
495             CI->getOpcode(),
496             CI->getPredicate(),
497             operand0[dup],
498             operand1[dup],
499             CI->getName(),
500             CI
501         );
502     }
503 
504     // Add new value/s to SCM
505     updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), CI, true);
506 
507     // Remove original instruction
508     m_removedInsts.insert(CI);
509 }
510 
scalarizeInstruction(CastInst * CI)511 void ScalarizeFunction::scalarizeInstruction(CastInst* CI)
512 {
513     V_PRINT(scalarizer, "\t\tCast instruction\n");
514     IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
515     IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType());
516 
517     // For BitCast - we only scalarize if src and dst types have same vector length
518     if (isa<BitCastInst>(CI))
519     {
520         if (!instType) return recoverNonScalarizableInst(CI);
521         IGCLLVM::FixedVectorType* srcType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType());
522         if (!srcType || (instType->getNumElements() != srcType->getNumElements()))
523         {
524             return recoverNonScalarizableInst(CI);
525         }
526     }
527 
528     // Only need handling for vector cast
529     if (!instType) return;
530 
531     // Prepare empty SCM entry for the instruction
532     SCMEntry* newEntry = getSCMEntry(CI);
533 
534     // Get additional info from instruction
535     unsigned numElements = int_cast<unsigned>(instType->getNumElements());
536     IGC_ASSERT_MESSAGE(
537         isa<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType()),
538         "unexpected type!");
539     IGC_ASSERT_MESSAGE(
540         cast<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType())
541                 ->getNumElements() == numElements,
542         "unexpected vector width");
543 
544     // Obtain scalarized argument
545     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
546     bool op0IsConst;
547 
548     obtainScalarizedValues(operand0, &op0IsConst, CI->getOperand(0), CI);
549 
550     // If argument is a constant, don't bother Scalarizing inst
551     if (op0IsConst) return;
552 
553     // Obtain type, which ever scalar cast will cast-to
554     Type* scalarDestType = instType->getElementType();
555 
556     // Generate new (scalar) instructions
557     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
558     newScalarizedInsts.resize(numElements);
559     for (unsigned dup = 0; dup < numElements; dup++)
560     {
561         newScalarizedInsts[dup] = CastInst::Create(
562             CI->getOpcode(),
563             operand0[dup],
564             scalarDestType,
565             CI->getName(),
566             CI
567         );
568     }
569 
570     // Add new value/s to SCM
571     updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), CI, true);
572 
573     // Remove original instruction
574     m_removedInsts.insert(CI);
575 }
576 
scalarizeInstruction(PHINode * PI)577 void ScalarizeFunction::scalarizeInstruction(PHINode* PI)
578 {
579     V_PRINT(scalarizer, "\t\tPHI instruction\n");
580     IGC_ASSERT_MESSAGE(PI, "instruction type dynamic cast failed");
581     IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(PI->getType());
582     // Only need handling for vector PHI
583     if (!instType) return;
584 
585     // Obtain number of incoming nodes \ PHI values
586     unsigned numValues = PI->getNumIncomingValues();
587 
588     // Normally, a phi would be scalarized and a collection of
589     // extractelements would be emitted for each value.  Since
590     // VME payload CVariables don't necessarily match the size
591     // of the llvm type, keep these phis vectorized here so we
592     // can emit the appropriate movs in emitVectorCopy() when
593     // emitting movs for phis.
594     for (unsigned i = 0; i < numValues; i++)
595     {
596         auto* Op = PI->getIncomingValue(i);
597 
598         if (auto * GII = dyn_cast<GenIntrinsicInst>(Op))
599         {
600             switch (GII->getIntrinsicID())
601             {
602             case GenISAIntrinsic::GenISA_vmeSendIME2:
603             case GenISAIntrinsic::GenISA_vmeSendFBR2:
604             case GenISAIntrinsic::GenISA_vmeSendSIC2:
605                 recoverNonScalarizableInst(PI);
606                 return;
607 
608             default: break;
609             }
610         }
611     }
612 
613     {
614         // If PHI is used in insts that take vector as operands, keep this vector phi.
615         // With the vector phi, variable alias can do a better job. Otherwise, more mov
616         // insts could be generated.
617         DenseMap<PHINode*, int> visited;
618         SmallVector<PHINode*, 8> phis;
619         phis.push_back(PI);
620         while (!phis.empty())
621         {
622             PHINode* PN = phis.back();
623             phis.pop_back();
624             for (auto U : PN->users())
625             {
626                 if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(U))
627                 {
628                     switch (GII->getIntrinsicID())
629                     {
630                     default:
631                         break;
632                     case GenISAIntrinsic::GenISA_simdBlockWrite:
633                         recoverNonScalarizableInst(PI);
634                         return;
635                     }
636                 }
637                 else if (PHINode * N = dyn_cast<PHINode>(U))
638                 {
639                     if (visited.count(N) == 0) {
640                         visited[N] = 1;
641                         phis.push_back(N);
642                     }
643                 }
644             }
645         }
646         visited.clear();
647         phis.clear();
648     }
649 
650 
651     // Prepare empty SCM entry for the instruction
652     SCMEntry* newEntry = getSCMEntry(PI);
653 
654     // Get additional info from instruction
655     Type* scalarType = instType->getElementType();
656     unsigned numElements = int_cast<unsigned>(instType->getNumElements());
657 
658     // Create new (empty) PHI nodes, and place them.
659     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedPHI;
660     newScalarizedPHI.resize(numElements);
661     for (unsigned i = 0; i < numElements; i++)
662     {
663         newScalarizedPHI[i] = PHINode::Create(scalarType, numValues, PI->getName(), PI);
664     }
665 
666     // Iterate over incoming values in vector PHI, and fill scalar PHI's accordingly
667     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand;
668 
669     for (unsigned j = 0; j < numValues; j++)
670     {
671         // Obtain scalarized arguments
672         obtainScalarizedValues(operand, NULL, PI->getIncomingValue(j), PI);
673 
674         // Fill all scalarized PHI nodes with scalar arguments
675         for (unsigned i = 0; i < numElements; i++)
676         {
677             cast<PHINode>(newScalarizedPHI[i])->addIncoming(operand[i], PI->getIncomingBlock(j));
678         }
679     }
680 
681     // Add new value/s to SCM
682     updateSCMEntryWithValues(newEntry, &(newScalarizedPHI[0]), PI, true);
683 
684     // Remove original instruction
685     m_removedInsts.insert(PI);
686 }
687 
scalarizeInstruction(SelectInst * SI)688 void ScalarizeFunction::scalarizeInstruction(SelectInst* SI)
689 {
690     V_PRINT(scalarizer, "\t\tSelect instruction\n");
691     IGC_ASSERT_MESSAGE(SI, "instruction type dynamic cast failed");
692     IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(SI->getType());
693     // Only need handling for vector select
694     if (!instType) return;
695 
696     // Prepare empty SCM entry for the instruction
697     SCMEntry* newEntry = getSCMEntry(SI);
698 
699     // Get additional info from instruction
700     unsigned numElements = int_cast<unsigned>(instType->getNumElements());
701 
702     // Obtain scalarized arguments (select inst has 3 arguments: Cond, TrueVal, FalseVal)
703     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>condOp;
704     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>trueValOp;
705     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>falseValOp;
706 
707     obtainScalarizedValues(trueValOp, NULL, SI->getTrueValue(), SI);
708     obtainScalarizedValues(falseValOp, NULL, SI->getFalseValue(), SI);
709 
710     // Check if condition is a vector.
711     Value* conditionVal = SI->getCondition();
712     if (isa<VectorType>(conditionVal->getType()))
713     {
714         // Obtain scalarized breakdowns of condition
715         obtainScalarizedValues(condOp, NULL, conditionVal, SI);
716     }
717     else
718     {
719         condOp.resize(numElements);
720         // Broadcast the (scalar) condition, to be used by all the insruction breakdowns
721         for (unsigned i = 0; i < numElements; i++) condOp[i] = conditionVal;
722     }
723 
724     // Generate new (scalar) instructions
725     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
726     newScalarizedInsts.resize(numElements);
727     for (unsigned dup = 0; dup < numElements; dup++)
728     {
729         // Small optimization: Some scalar selects may be redundant (trueVal == falseVal)
730         if (trueValOp[dup] != falseValOp[dup])
731         {
732             newScalarizedInsts[dup] = SelectInst::Create(
733                 condOp[dup],
734                 trueValOp[dup],
735                 falseValOp[dup],
736                 SI->getName(),
737                 SI
738             );
739         }
740         else
741         {
742             // just "connect" the destination value to the true value input
743             newScalarizedInsts[dup] = trueValOp[dup];
744         }
745     }
746 
747     // Add new value/s to SCM
748     updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), SI, true);
749 
750     // Remove original instruction
751     m_removedInsts.insert(SI);
752 }
753 
scalarizeInstruction(ExtractElementInst * EI)754 void ScalarizeFunction::scalarizeInstruction(ExtractElementInst* EI)
755 {
756     V_PRINT(scalarizer, "\t\tExtractElement instruction\n");
757     IGC_ASSERT_MESSAGE(EI, "instruction type dynamic cast failed");
758 
759     // Proper scalarization makes "extractElement" instructions redundant
760     // Only need to "follow" the scalar element (as the input vector was
761     // already scalarized)
762     Value* vectorValue = EI->getOperand(0);
763     Value* scalarIndexVal = EI->getOperand(1);
764 
765     // If the index is not a constant - we cannot statically remove this inst
766     if (!isa<ConstantInt>(scalarIndexVal)) return recoverNonScalarizableInst(EI);
767 
768     // Obtain the scalarized operands
769     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand;
770     obtainScalarizedValues(operand, NULL, vectorValue, EI);
771 
772     // Connect the "extracted" value to all its consumers
773     uint64_t scalarIndex = cast<ConstantInt>(scalarIndexVal)->getZExtValue();
774     auto valueVType = cast<IGCLLVM::FixedVectorType>(vectorValue->getType());
775     if (static_cast<unsigned int>(scalarIndex) < (unsigned)valueVType->getNumElements())
776     {
777         IGC_ASSERT_MESSAGE(NULL != operand[static_cast<unsigned int>(scalarIndex)], "SCM error");
778 
779         if (IGC_IS_FLAG_ENABLED(UseOffsetInLocation))
780         {
781             // Metadata "implicitGlobalID" must be propagated to a new instruction as a WA
782             // for missing meta data preservation in this pass. When a general fix is applied
783             // then instructions below for this specific propagation must be removed.
784             Value* pNewVal = operand[static_cast<unsigned int>(scalarIndex)];
785 
786             if (MDNode* pEIMD = EI->getMetadata("implicitGlobalID"))
787             {
788                 // Compute thread and group identification instructions must have 'Output' attribute
789                 // added later during compilation. The implicitGlobalID metadata attached to this
790                 // instruction must be assigned to a new instruction, which replaces this instruction.
791                 // Unfortunatelly, replaceAllUsesWith() will not ensure such propagation.
792                 Instruction* pNewInst = dyn_cast_or_null<llvm::Instruction>(pNewVal);
793                 IGC_ASSERT_MESSAGE(pNewInst, "Missing implicit global ID instruction");
794 
795                 pNewInst->copyMetadata(*EI);
796             }
797         }
798 
799         // Replace all users of this inst, with the extracted scalar value
800         EI->replaceAllUsesWith(operand[static_cast<unsigned int>(scalarIndex)]);
801     }
802     else
803     {
804         IGC_ASSERT_MESSAGE(0, "The instruction extractElement is out of bounds.");
805         EI->replaceAllUsesWith(UndefValue::get(valueVType->getElementType()));
806     }
807 
808     // Remove original instruction
809     m_removedInsts.insert(EI);
810 }
811 
scalarizeInstruction(InsertElementInst * II)812 void ScalarizeFunction::scalarizeInstruction(InsertElementInst* II)
813 {
814     V_PRINT(scalarizer, "\t\tInsertElement instruction\n");
815     IGC_ASSERT_MESSAGE(II, "instruction type dynamic cast failed");
816 
817     // Proper scalarization makes "InsertElement" instructions redundant.
818     // Only need to "follow" the scalar elements and update in SCM
819     Value* sourceVectorValue = II->getOperand(0);
820     Value* sourceScalarValue = II->getOperand(1);
821     Value* scalarIndexVal = II->getOperand(2);
822 
823     // If the index is not a constant - we cannot statically remove this inst
824     if (!isa<ConstantInt>(scalarIndexVal)) return recoverNonScalarizableInst(II);
825 
826     // Prepare empty SCM entry for the instruction
827     SCMEntry* newEntry = getSCMEntry(II);
828 
829     IGC_ASSERT_MESSAGE(isa<ConstantInt>(scalarIndexVal), "inst arguments error");
830     uint64_t scalarIndex = cast<ConstantInt>(scalarIndexVal)->getZExtValue();
831     IGC_ASSERT_MESSAGE(
832         scalarIndex <
833             dyn_cast<IGCLLVM::FixedVectorType>(II->getType())->getNumElements(),
834         "index error");
835 
836     // Obtain breakdown of input vector
837     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>scalarValues;
838     if (isa<UndefValue>(sourceVectorValue))
839     {
840         // Scalarize the undef value (generate a scalar undef)
841         IGCLLVM::FixedVectorType* inputVectorType = dyn_cast<IGCLLVM::FixedVectorType>(sourceVectorValue->getType());
842         IGC_ASSERT_MESSAGE(inputVectorType, "expected vector argument");
843 
844         UndefValue* undefVal = UndefValue::get(inputVectorType->getElementType());
845 
846         // fill new SCM entry with UNDEFs and the new value
847         scalarValues.resize(static_cast<unsigned int>(inputVectorType->getNumElements()));
848         for (unsigned j = 0; j < inputVectorType->getNumElements(); j++)
849         {
850             scalarValues[j] = undefVal;
851         }
852         scalarValues[static_cast<unsigned int>(scalarIndex)] = sourceScalarValue;
853     }
854     else
855     {
856         // Obtain the scalar values of the input vector
857         obtainScalarizedValues(scalarValues, NULL, sourceVectorValue, II);
858         // Add the new element
859         scalarValues[static_cast<unsigned int>(scalarIndex)] = sourceScalarValue;
860     }
861 
862     // Add new value/s to SCM
863     updateSCMEntryWithValues(newEntry, &(scalarValues[0]), II, true, false);
864 
865     // Remove original instruction
866     m_removedInsts.insert(II);
867 }
868 
scalarizeInstruction(ShuffleVectorInst * SI)869 void ScalarizeFunction::scalarizeInstruction(ShuffleVectorInst* SI)
870 {
871     V_PRINT(scalarizer, "\t\tShuffleVector instruction\n");
872     IGC_ASSERT_MESSAGE(nullptr != SI, "instruction type dynamic cast failed");
873 
874     // Proper scalarization makes "ShuffleVector" instructions redundant.
875     // Only need to "follow" the scalar elements and update in SCM
876 
877     // Grab input vectors types and width
878     Value* sourceVector0Value = SI->getOperand(0);
879     IGC_ASSERT(nullptr != sourceVector0Value);
880     Value* sourceVector1Value = SI->getOperand(1);
881     IGC_ASSERT(nullptr != sourceVector1Value);
882     IGCLLVM::FixedVectorType* const inputType = dyn_cast<IGCLLVM::FixedVectorType>(sourceVector0Value->getType());
883     IGC_ASSERT_MESSAGE(nullptr != inputType, "vector input error");
884     IGC_ASSERT_MESSAGE(inputType == sourceVector1Value->getType(), "vector input error");
885     unsigned sourceVectorWidth = int_cast<unsigned>(inputType->getNumElements());
886 
887     // generate an array of values (pre-shuffle), which concatenates both vectors
888     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>allValues;
889     allValues.resize(2 * sourceVectorWidth);
890 
891     // Obtain scalarized input values (into concatenated array). if vector was Undef - keep NULL.
892     if (!isa<UndefValue>(sourceVector0Value))
893     {
894         obtainScalarizedValues(allValues, NULL, sourceVector0Value, SI, 0);
895     }
896     if (!isa<UndefValue>(sourceVector1Value))
897     {
898         // Place values, starting in the middle of concatenated array
899         obtainScalarizedValues(allValues, NULL, sourceVector1Value, SI, sourceVectorWidth);
900     }
901 
902     // Generate array for shuffled scalar values
903     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newVector;
904     unsigned width = int_cast<unsigned>(cast<IGCLLVM::FixedVectorType>(SI->getType())->getNumElements());
905 
906     // Generate undef value, which may be needed as some scalar elements
907     UndefValue* undef = UndefValue::get(inputType->getElementType());
908 
909     newVector.resize(width);
910     // Go over shuffle order, and place scalar values in array
911     for (unsigned i = 0; i < width; i++)
912     {
913         int maskValue = SI->getMaskValue(i);
914         if (maskValue >= 0 && NULL != allValues[maskValue])
915         {
916             newVector[i] = allValues[maskValue];
917         }
918         else
919         {
920             newVector[i] = undef;
921         }
922     }
923 
924     // Create the new SCM entry
925     SCMEntry* newEntry = getSCMEntry(SI);
926     updateSCMEntryWithValues(newEntry, &(newVector[0]), SI, true, false);
927 
928     // Remove original instruction
929     m_removedInsts.insert(SI);
930 }
931 
scalarizeInstruction(CallInst * CI)932 void ScalarizeFunction::scalarizeInstruction(CallInst* CI)
933 {
934     V_PRINT(scalarizer, "\t\tCall instruction\n");
935     IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
936 
937     recoverNonScalarizableInst(CI);
938 }
939 
scalarizeInstruction(AllocaInst * AI)940 void ScalarizeFunction::scalarizeInstruction(AllocaInst* AI)
941 {
942     V_PRINT(scalarizer, "\t\tAlloca instruction\n");
943     IGC_ASSERT_MESSAGE(AI, "instruction type dynamic cast failed");
944 
945     return recoverNonScalarizableInst(AI);
946 }
947 
scalarizeInstruction(GetElementPtrInst * GI)948 void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
949 {
950     V_PRINT(scalarizer, "\t\tGEP instruction\n");
951     IGC_ASSERT_MESSAGE(GI, "instruction type dynamic cast failed");
952 
953     // If it has more than one index, leave it as is.
954     if (GI->getNumIndices() != 1)
955     {
956         return recoverNonScalarizableInst(GI);
957     }
958     Value* baseValue = GI->getOperand(0);
959     Value* indexValue = GI->getOperand(1);
960 
961     // If it's not a vector instruction, leave it as is.
962     if (!baseValue->getType()->isVectorTy() && !indexValue->getType()->isVectorTy())
963     {
964         return recoverNonScalarizableInst(GI);
965     }
966     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
967     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand2;
968     Type* ptrTy;
969     unsigned width = 1;
970 
971     if (baseValue->getType()->isVectorTy())
972     {
973         width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(baseValue->getType())->getNumElements());
974         // Obtain the scalarized operands
975         obtainScalarizedValues(operand1, NULL, baseValue, GI);
976         ptrTy = dyn_cast<VectorType>(baseValue->getType())->getElementType();
977     }
978     else
979     {
980         ptrTy = baseValue->getType();
981     }
982     if (indexValue->getType()->isVectorTy())
983     {
984         width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(indexValue->getType())->getNumElements());
985         // Obtain the scalarized operands
986         obtainScalarizedValues(operand2, NULL, indexValue, GI);
987     }
988     IGC_ASSERT_MESSAGE(width > 1, "expected vector instruction");
989     SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>scalarValues;
990     scalarValues.resize(width);
991 
992     Value* assembledVector = UndefValue::get(IGCLLVM::FixedVectorType::get(ptrTy, width));
993     for (unsigned i = 0; i < width; ++i)
994     {
995         auto op1 = baseValue->getType()->isVectorTy() ? operand1[i] : baseValue;
996         auto op2 = indexValue->getType()->isVectorTy() ? operand2[i] : indexValue;
997 
998         Value* newGEP = GetElementPtrInst::Create(nullptr, op1, op2, "", GI);
999         Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1000         Instruction* insert = InsertElementInst::Create(assembledVector,
1001             newGEP, constIndex, "assembled.vect", GI);
1002         assembledVector = insert;
1003         scalarValues[i] = newGEP;
1004 
1005         V_PRINT(scalarizer,
1006             "\t\t\tCreated vector assembly inst:" << *assembledVector << "\n");
1007     }
1008     // Prepare empty SCM entry for the new instruction
1009     SCMEntry* newEntry = getSCMEntry(assembledVector);
1010     // Add new value/s to SCM
1011     updateSCMEntryWithValues(newEntry, &(scalarValues[0]), assembledVector, true);
1012     GI->replaceAllUsesWith(assembledVector);
1013 
1014     // Remove original instruction
1015     m_removedInsts.insert(GI);
1016 }
1017 
obtainScalarizedValues(SmallVectorImpl<Value * > & retValues,bool * retIsConstant,Value * origValue,Instruction * origInst,int destIdx)1018 void ScalarizeFunction::obtainScalarizedValues(SmallVectorImpl<Value*>& retValues, bool* retIsConstant,
1019     Value* origValue, Instruction* origInst, int destIdx)
1020 {
1021     V_PRINT(scalarizer, "\t\t\tObtaining scalar value... " << *origValue << "\n");
1022 
1023     IGCLLVM::FixedVectorType* origType = dyn_cast<IGCLLVM::FixedVectorType>(origValue->getType());
1024     IGC_ASSERT_MESSAGE(origType, "Value must have a vector type!");
1025     unsigned width = int_cast<unsigned>(origType->getNumElements());
1026 
1027     if (destIdx == -1)
1028     {
1029         destIdx = 0;
1030         retValues.resize(width);
1031     }
1032 
1033     if (NULL != retIsConstant)
1034     {
1035         // Set retIsConstant (return value) to true, if the origValue is constant
1036         if (!isa<Constant>(origValue))
1037         {
1038             *retIsConstant = false;
1039         }
1040         else
1041         {
1042             *retIsConstant = true;
1043         }
1044     }
1045 
1046     // Lookup value in SCM
1047     SCMEntry* currEntry = getScalarizedValues(origValue);
1048     if (currEntry && (NULL != currEntry->scalarValues[0]))
1049     {
1050         // Value was found in SCM
1051         V_PRINT(scalarizer,
1052             "\t\t\tFound existing entry in lookup of " << origValue->getName() << "\n");
1053         for (unsigned i = 0; i < width; i++)
1054         {
1055             // Copy values to return array
1056             IGC_ASSERT_MESSAGE(NULL != currEntry->scalarValues[i], "SCM entry contains NULL value");
1057             retValues[i + destIdx] = currEntry->scalarValues[i];
1058         }
1059     }
1060     else if (isa<UndefValue>(origValue))
1061     {
1062         IGC_ASSERT_MESSAGE(origType, "original value must have a vector type!");
1063         // value is an undefVal. Break it to element-sized undefs
1064         V_PRINT(scalarizer, "\t\t\tUndefVal constant\n");
1065         Value* undefElement = UndefValue::get(origType->getElementType());
1066         for (unsigned i = 0; i < width; i++)
1067         {
1068             retValues[i + destIdx] = undefElement;
1069         }
1070     }
1071     else if (Constant * vectorConst = dyn_cast<Constant>(origValue))
1072     {
1073         V_PRINT(scalarizer, "\t\t\tProper constant: " << *vectorConst << "\n");
1074         // Value is a constant. Break it down to scalars by employing a constant expression
1075         for (unsigned i = 0; i < width; i++)
1076         {
1077             retValues[i + destIdx] = ConstantExpr::getExtractElement(vectorConst,
1078                 ConstantInt::get(Type::getInt32Ty(context()), i));
1079         }
1080     }
1081     else if (isa<Instruction>(origValue) && !currEntry)
1082     {
1083         // Instruction not found in SCM. Means it will be defined in a following basic block.
1084         // Generate a DRL: dummy values, which will be resolved after all scalarization is complete.
1085         V_PRINT(scalarizer, "\t\t\t*** Not found. Setting DRL. \n");
1086         Type* dummyType = origType->getElementType();
1087         Function* dummy_function = getOrCreateDummyFunc(dummyType, origInst->getModule());
1088         DRLEntry newDRLEntry;
1089         newDRLEntry.unresolvedInst = origValue;
1090         newDRLEntry.dummyVals.resize(width);
1091         for (unsigned i = 0; i < width; i++)
1092         {
1093             // Generate dummy "call" instruction (but don't really place in function)
1094             retValues[i + destIdx] = CallInst::Create(dummy_function);
1095             newDRLEntry.dummyVals[i] = retValues[i + destIdx];
1096         }
1097 
1098         // Copy the data into DRL structure
1099         m_DRL.push_back(newDRLEntry);
1100     }
1101     else
1102     {
1103         V_PRINT(scalarizer,
1104             "\t\t\tCreating scalar conversion for " << origValue->getName() << "\n");
1105         // Value is an Instruction/global/function argument, and was not converted to scalars yet.
1106         // Create scalar values (break down the vector) and place in SCM:
1107         //   %scalar0 = extractelement <4 x Type> %vector, i32 0
1108         //   %scalar1 = extractelement <4 x Type> %vector, i32 1
1109         //   %scalar2 = extractelement <4 x Type> %vector, i32 2
1110         //   %scalar3 = extractelement <4 x Type> %vector, i32 3
1111         // The breaking instructions will be placed the the head of the function, or right
1112         // after the instruction (if it is an instruction)
1113         Instruction* locationInst = &*(inst_begin(m_currFunc));
1114         Instruction* origInstruction = dyn_cast<Instruction>(origValue);
1115         if (origInstruction)
1116         {
1117             BasicBlock::iterator insertLocation(origInstruction);
1118             ++insertLocation;
1119             locationInst = &(*insertLocation);
1120             // If the insert location is PHI, move the insert location to after all PHIs is the block
1121             if (isa<PHINode>(locationInst))
1122             {
1123                 locationInst = locationInst->getParent()->getFirstNonPHI();
1124             }
1125         }
1126 
1127         // Generate extractElement instructions
1128         for (unsigned i = 0; i < width; ++i)
1129         {
1130             Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1131             retValues[i + destIdx] = ExtractElementInst::Create(origValue, constIndex, "scalar", locationInst);
1132         }
1133         SCMEntry* newEntry = getSCMEntry(origValue);
1134         updateSCMEntryWithValues(newEntry, &(retValues[destIdx]), origValue, false);
1135 
1136     }
1137 }
1138 
obtainVectorValueWhichMightBeScalarized(Value * vectorVal)1139 void ScalarizeFunction::obtainVectorValueWhichMightBeScalarized(Value* vectorVal)
1140 {
1141     m_usedVectors.insert(vectorVal);
1142 }
1143 
resolveVectorValues()1144 void ScalarizeFunction::resolveVectorValues()
1145 {
1146     SmallSetVector<Value*, ESTIMATED_INST_NUM>::iterator it = m_usedVectors.begin();
1147     SmallSetVector<Value*, ESTIMATED_INST_NUM>::iterator e = m_usedVectors.end();
1148     for (; it != e; ++it) {
1149         obtainVectorValueWhichMightBeScalarizedImpl(*it);
1150     }
1151 }
1152 
obtainVectorValueWhichMightBeScalarizedImpl(Value * vectorVal)1153 void ScalarizeFunction::obtainVectorValueWhichMightBeScalarizedImpl(Value* vectorVal)
1154 {
1155     IGC_ASSERT_MESSAGE(isa<VectorType>(vectorVal->getType()), "Must be a vector type");
1156     if (isa<UndefValue>(vectorVal)) return;
1157 
1158     // ONLY IF the value appears in the SCM - there is a chance it was removed.
1159     if (!m_SCM.count(vectorVal)) return;
1160     SCMEntry* valueEntry = m_SCM[vectorVal];
1161 
1162     // Check in SCM entry, if value was really removed
1163     if (false == valueEntry->isOriginalVectorRemoved) return;
1164 
1165     V_PRINT(scalarizer, "\t\t\tTrying to use a removed value. Reassembling it...\n");
1166     // The vector value was removed. Need to reassemble it...
1167     //   %assembled.vect.0 = insertelement <4 x type> undef             , type %scalar.0, i32 0
1168     //   %assembled.vect.1 = insertelement <4 x type> %indx.vect.0, type %scalar.1, i32 1
1169     //   %assembled.vect.2 = insertelement <4 x type> %indx.vect.1, type %scalar.2, i32 2
1170     //   %assembled.vect.3 = insertelement <4 x type> %indx.vect.2, type %scalar.3, i32 3
1171     // Place the re-assembly in the location where the original instruction was
1172     Instruction* vectorInst = dyn_cast<Instruction>(vectorVal);
1173     IGC_ASSERT_MESSAGE(vectorInst, "SCM reports a non-instruction was removed. Should not happen");
1174     Instruction* insertLocation = vectorInst;
1175     // If the original instruction was PHI, place the re-assembly only after all PHIs is the block
1176     if (isa<PHINode>(vectorInst))
1177     {
1178         insertLocation = insertLocation->getParent()->getFirstNonPHI();
1179     }
1180 
1181     Value* assembledVector = UndefValue::get(vectorVal->getType());
1182     unsigned width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(vectorVal->getType())->getNumElements());
1183     for (unsigned i = 0; i < width; i++)
1184     {
1185         IGC_ASSERT_MESSAGE(NULL != valueEntry->scalarValues[i], "SCM entry has NULL value");
1186         Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1187         Instruction* insert = InsertElementInst::Create(assembledVector,
1188             valueEntry->scalarValues[i], constIndex, "assembled.vect", insertLocation);
1189         VectorizerUtils::SetDebugLocBy(insert, vectorInst);
1190         assembledVector = insert;
1191         V_PRINT(scalarizer,
1192             "\t\t\tCreated vector assembly inst:" << *assembledVector << "\n");
1193     }
1194     // Replace the uses of "vectorVal" with the new vector
1195     vectorVal->replaceAllUsesWith(assembledVector);
1196 
1197     // create SCM entry to represent the new vector value..
1198     SCMEntry* newEntry = getSCMEntry(assembledVector);
1199     updateSCMEntryWithValues(newEntry, &(valueEntry->scalarValues[0]), assembledVector, false);
1200 }
1201 
getSCMEntry(Value * origValue)1202 ScalarizeFunction::SCMEntry* ScalarizeFunction::getSCMEntry(Value* origValue)
1203 {
1204     // origValue may be scalar or vector:
1205     // When the actual returned value of the CALL inst is different from the The "proper" retval
1206     // the original CALL inst value may be scalar (i.e. int2 is converted to double which is a scalar)
1207     IGC_ASSERT_MESSAGE(!isa<UndefValue>(origValue), "Trying to create SCM to undef value...");
1208     if (m_SCM.count(origValue)) return m_SCM[origValue];
1209 
1210     // If index of next free SCMEntry overflows the array size, create a new array
1211     if (m_SCMArrayLocation == ESTIMATED_INST_NUM)
1212     {
1213         // Create new SCMAllocationArray, push it to the vector of arrays, and set free index to 0
1214         m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
1215         m_SCMArrays.push_back(m_SCMAllocationArray);
1216         m_SCMArrayLocation = 0;
1217     }
1218     // Allocate the new entry, and increment the free-element index
1219     SCMEntry* newEntry = &(m_SCMAllocationArray[m_SCMArrayLocation++]);
1220 
1221     // Set all primary data in entry
1222     if (newEntry->scalarValues.size())
1223         newEntry->scalarValues[0] = NULL;
1224     else
1225         newEntry->scalarValues.push_back(NULL);
1226 
1227     newEntry->isOriginalVectorRemoved = false;
1228 
1229     // Insert new entry to SCM map
1230     m_SCM.insert(std::pair<Value*, SCMEntry*>(origValue, newEntry));
1231 
1232     return newEntry;
1233 }
1234 
updateSCMEntryWithValues(ScalarizeFunction::SCMEntry * entry,Value * scalarValues[],const Value * origValue,bool isOrigValueRemoved,bool matchDbgLoc)1235 void ScalarizeFunction::updateSCMEntryWithValues(ScalarizeFunction::SCMEntry* entry,
1236     Value* scalarValues[],
1237     const Value* origValue,
1238     bool isOrigValueRemoved,
1239     bool matchDbgLoc)
1240 {
1241     IGC_ASSERT_MESSAGE((origValue->getType()->isArrayTy() || origValue->getType()->isVectorTy()), "only Vector values are supported");
1242     unsigned width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(origValue->getType())->getNumElements());
1243 
1244     entry->isOriginalVectorRemoved = isOrigValueRemoved;
1245 
1246     entry->scalarValues.resize(width);
1247 
1248     for (unsigned i = 0; i < width; ++i)
1249     {
1250         IGC_ASSERT_MESSAGE(NULL != scalarValues[i], "Trying to fill SCM with NULL value");
1251         entry->scalarValues[i] = scalarValues[i];
1252     }
1253 
1254     if (matchDbgLoc)
1255     {
1256         if (const Instruction * origInst = dyn_cast<Instruction>(origValue))
1257         {
1258             for (unsigned i = 0; i < width; ++i)
1259             {
1260                 Instruction* scalarInst = dyn_cast<Instruction>(scalarValues[i]);
1261                 if (scalarInst) VectorizerUtils::SetDebugLocBy(scalarInst, origInst);
1262             }
1263         }
1264     }
1265 }
1266 
getScalarizedValues(Value * origValue)1267 ScalarizeFunction::SCMEntry* ScalarizeFunction::getScalarizedValues(Value* origValue)
1268 {
1269     if (m_SCM.count(origValue)) return m_SCM[origValue];
1270     return NULL;
1271 }
1272 
releaseAllSCMEntries()1273 void ScalarizeFunction::releaseAllSCMEntries()
1274 {
1275     IGC_ASSERT_MESSAGE(m_SCMArrays.size() > 0, "At least one buffer is allocated at all times");
1276     while (m_SCMArrays.size() > 1)
1277     {
1278         // If there are additional allocated entry Arrays, release all of them (leave only the first)
1279         SCMEntry* popEntry = m_SCMArrays.pop_back_val();
1280         delete[] popEntry;
1281     }
1282     // set the "current" array pointer to the only remaining array
1283     m_SCMAllocationArray = m_SCMArrays[0];
1284     m_SCMArrayLocation = 0;
1285 }
1286 
resolveDeferredInstructions()1287 void ScalarizeFunction::resolveDeferredInstructions()
1288 {
1289     llvm::MapVector<Value*, Value*> dummyToScalarMap;
1290 
1291     // lambda to check if a value is a dummy instruction
1292     auto isDummyValue = [this](Value* val) -> bool
1293     {
1294         auto* call = dyn_cast<CallInst>(val);
1295         if (!call) return false;
1296         // If the Value is one of the dummy functions that we created.
1297         for (const auto& function : createdDummyFunctions) {
1298             if (call->getCalledFunction() == function.second)
1299                 return true;
1300         }
1301 
1302         return false;
1303     };
1304 
1305     for (auto deferredEntry = m_DRL.begin(); m_DRL.size() > 0;)
1306     {
1307         DRLEntry current = *deferredEntry;
1308         V_PRINT(scalarizer,
1309             "\tDRL Going to fix value of orig inst: " << *current.unresolvedInst << "\n");
1310         Instruction* vectorInst = dyn_cast<Instruction>(current.unresolvedInst);
1311         IGC_ASSERT_MESSAGE(vectorInst, "DRL only handles unresolved instructions");
1312 
1313         IGCLLVM::FixedVectorType* currType = dyn_cast<IGCLLVM::FixedVectorType>(vectorInst->getType());
1314         IGC_ASSERT_MESSAGE(currType, "Cannot have DRL of non-vector value");
1315         unsigned width = int_cast<unsigned>(currType->getNumElements());
1316 
1317         SCMEntry* currentInstEntry = getSCMEntry(vectorInst);
1318 
1319         if (currentInstEntry->scalarValues[0] == NULL)
1320         {
1321             V_PRINT(scalarizer, "\t\tInst was not scalarized yet, Scalarizing now...\n");
1322             SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newInsts;
1323 
1324             // This instruction was not scalarized. Create scalar values and place in SCM.
1325             //   %scalar0 = extractelement <4 x Type> %vector, i32 0
1326             //   %scalar1 = extractelement <4 x Type> %vector, i32 1
1327             //   %scalar2 = extractelement <4 x Type> %vector, i32 2
1328             //   %scalar3 = extractelement <4 x Type> %vector, i32 3
1329             // Place the vector break-down instructions right after the actual vector
1330             BasicBlock::iterator insertLocation(vectorInst);
1331             ++insertLocation;
1332             // If the insert location is PHI, move the insert location to after all PHIs is the block
1333             if (isa<PHINode>(insertLocation))
1334             {
1335                 insertLocation = BasicBlock::iterator(insertLocation->getParent()->getFirstNonPHI());
1336             }
1337 
1338             newInsts.resize(width);
1339             for (unsigned i = 0; i < width; i++)
1340             {
1341                 Value *constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1342                 Instruction *EE = ExtractElementInst::Create(vectorInst, constIndex, "scalar", &(*insertLocation));
1343                 newInsts[i] = EE;
1344             }
1345             updateSCMEntryWithValues(currentInstEntry, &(newInsts[0]), vectorInst, false);
1346         }
1347 
1348         bool totallyResolved = true;
1349 
1350         // Connect the resolved values to their consumers
1351         for (unsigned i = 0; i < width; ++i)
1352         {
1353             Instruction* dummyInst = dyn_cast<Instruction>(current.dummyVals[i]);
1354             IGC_ASSERT_MESSAGE(dummyInst, "Dummy values are all instructions!");
1355             Value* scalarVal = currentInstEntry->scalarValues[i];
1356 
1357             if (isDummyValue(scalarVal))
1358             {
1359                 // It's possible the scalar values are not resolved earlier and are themselves dummy instructions.
1360                 // In order to find the real value, we look in the map to see which value replaced it.
1361                 if (dummyToScalarMap.count(scalarVal))
1362                 scalarVal = dummyToScalarMap[scalarVal];
1363                 else
1364                     totallyResolved = false;
1365             }
1366 
1367             // Save every dummy instruction with the scalar value its replaced with
1368             dummyToScalarMap[dummyInst] = scalarVal;
1369         }
1370 
1371         if (totallyResolved)
1372         {
1373             m_DRL.erase(deferredEntry);
1374         }
1375         else
1376         {
1377             deferredEntry++;
1378         }
1379 
1380         if (deferredEntry == m_DRL.end())
1381         {
1382             deferredEntry = m_DRL.begin();
1383         }
1384     }
1385 
1386     for ( auto entry : dummyToScalarMap )
1387     {
1388         // Replace and erase all dummy instructions (don't use eraseFromParent as the dummy is not in the function)
1389         Instruction *dummyInst = cast<Instruction>(entry.first);
1390         dummyInst->replaceAllUsesWith(entry.second);
1391         dummyInst->deleteValue();
1392     }
1393 
1394     // clear DRL
1395     m_DRL.clear();
1396 }
1397 
createScalarizerPass(bool selectiveScalarization)1398 extern "C" FunctionPass* createScalarizerPass(bool selectiveScalarization)
1399 {
1400     return new ScalarizeFunction(selectiveScalarization);
1401 }
1402 
1403 
1404