1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Compiler/Optimizer/Scalarizer.h"
10 #include "Compiler/IGCPassSupport.h"
11 #include "GenISAIntrinsics/GenIntrinsicInst.h"
12 #include "Compiler/CISACodeGen/helper.h"
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvmWrapper/IR/DerivedTypes.h"
15 #include "llvmWrapper/IR/Instructions.h"
16 #include "llvmWrapper/Support/Alignment.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/raw_os_ostream.h"
22 #include "common/LLVMWarningsPop.hpp"
23 #include "common/igc_regkeys.hpp"
24 #include "common/Types.hpp"
25 #include <iostream>
26 #include "Probe/Assertion.h"
27
28 using namespace llvm;
29 using namespace IGC;
30
31 #define V_PRINT(a,b) \
32 { \
33 if (IGC_IS_FLAG_ENABLED(EnableScalarizerDebugLog)) \
34 { \
35 outs() << b; \
36 } \
37 }
38
39 namespace VectorizerUtils {
SetDebugLocBy(Instruction * I,const Instruction * setBy)40 static void SetDebugLocBy(Instruction* I, const Instruction* setBy) {
41 if (!(I->getDebugLoc())) {
42 I->setDebugLoc(setBy->getDebugLoc());
43 }
44 }
45 }
46
47 // Register pass to igc-opt
48 #define PASS_FLAG "igc-scalarize"
49 #define PASS_DESCRIPTION "Scalarize functions"
50 #define PASS_CFG_ONLY false
51 #define PASS_ANALYSIS false
52 IGC_INITIALIZE_PASS_BEGIN(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
53 IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
54
55 char ScalarizeFunction::ID = 0;
56
ScalarizeFunction(bool selectiveScalarization)57 ScalarizeFunction::ScalarizeFunction(bool selectiveScalarization) : FunctionPass(ID)
58 {
59 initializeScalarizeFunctionPass(*PassRegistry::getPassRegistry());
60
61 for (int i = 0; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0;
62 m_SelectiveScalarization = selectiveScalarization;
63
64 // Initialize SCM buffers and allocation
65 m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
66 m_SCMArrays.push_back(m_SCMAllocationArray);
67 m_SCMArrayLocation = 0;
68
69 V_PRINT(scalarizer, "ScalarizeFunction constructor\n");
70 }
71
~ScalarizeFunction()72 ScalarizeFunction::~ScalarizeFunction()
73 {
74 releaseAllSCMEntries();
75 delete[] m_SCMAllocationArray;
76 destroyDummyFunc();
77 V_PRINT(scalarizer, "ScalarizeFunction destructor\n");
78 }
79
runOnFunction(Function & F)80 bool ScalarizeFunction::runOnFunction(Function& F)
81 {
82
83 if (!IGC::ForceAlwaysInline())
84 {
85 if (F.isDeclaration()) return false;
86 }
87 else
88 {
89 // Scalarization is done only on functions which return void (kernels)
90 if (!F.getReturnType()->isVoidTy())
91 {
92 return false;
93 }
94 }
95
96 m_currFunc = &F;
97 m_moduleContext = &(m_currFunc->getContext());
98
99 V_PRINT(scalarizer, "\nStart scalarizing function: " << m_currFunc->getName() << "\n");
100
101 // obtain TagetData of the module
102 m_pDL = &F.getParent()->getDataLayout();
103
104 // Prepare data structures for scalarizing a new function
105 m_usedVectors.clear();
106 m_removedInsts.clear();
107 m_SCM.clear();
108 releaseAllSCMEntries();
109 m_DRL.clear();
110 m_Excludes.clear();
111
112 // collecting instructions that we want to avoid scalarization
113 if (m_SelectiveScalarization)
114 {
115 buildExclusiveSet();
116 }
117
118 // Scalarization. Iterate over all the instructions
119 // Always hold the iterator at the instruction following the one being scalarized (so the
120 // iterator will "skip" any instructions that are going to be added in the scalarization work)
121 inst_iterator sI = inst_begin(m_currFunc);
122 inst_iterator sE = inst_end(m_currFunc);
123 while (sI != sE)
124 {
125 Instruction* currInst = &*sI;
126 // Move iterator to next instruction BEFORE scalarizing current instruction
127 ++sI;
128 if (m_Excludes.count(currInst))
129 {
130 recoverNonScalarizableInst(currInst);
131 }
132 else
133 {
134 dispatchInstructionToScalarize(currInst);
135 }
136 }
137
138 resolveVectorValues();
139
140 // Resolved DRL entries
141 resolveDeferredInstructions();
142
143 // Iterate over removed insts and delete them
144 SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator ri = m_removedInsts.begin();
145 SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator re = m_removedInsts.end();
146 SmallDenseSet<Instruction*, ESTIMATED_INST_NUM>::iterator index = ri;
147
148 for (; index != re; ++index)
149 {
150 // get rid of old users
151 if (Value * val = dyn_cast<Value>(*index))
152 {
153 UndefValue* undefVal = UndefValue::get((*index)->getType());
154 (val)->replaceAllUsesWith(undefVal);
155 }
156 IGC_ASSERT_MESSAGE((*index)->use_empty(), "Unable to remove used instruction");
157 (*index)->eraseFromParent();
158 }
159
160 V_PRINT(scalarizer, "\nCompleted scalarizing function: " << m_currFunc->getName() << "\n");
161 return true;
162 }
163
164 /// <summary>
165 /// @brief We want to avoid scalarize vector-phi node if the vector is used
166 /// as a whole entity somewhere in the program. This function tries to find
167 /// this kind of definition web that involves phi-node, insert-element etc,
168 /// then add them into the exclusion-set (excluded from scalarization).
169 /// </summary>
buildExclusiveSet()170 void ScalarizeFunction::buildExclusiveSet()
171 {
172 inst_iterator sI = inst_begin(m_currFunc);
173 inst_iterator sE = inst_end(m_currFunc);
174 while (sI != sE)
175 {
176 Instruction* currInst = &*sI;
177 ++sI;
178 // find the seed for the workset
179 std::vector<llvm::Value*> workset;
180 if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(currInst))
181 {
182 unsigned numOperands = GII->getNumArgOperands();
183 for (unsigned i = 0; i < numOperands; i++)
184 {
185 Value* operand = GII->getArgOperand(i);
186 if (isa<VectorType>(operand->getType()))
187 {
188 workset.push_back(operand);
189 }
190 }
191 }
192 else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
193 {
194 Value* scalarIndexVal = IEI->getOperand(2);
195 // If the index is not a constant - we cannot statically remove this inst
196 if (!isa<ConstantInt>(scalarIndexVal)) {
197 workset.push_back(IEI);
198 }
199 }
200 else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
201 {
202 Value* scalarIndexVal = EEI->getOperand(1);
203 // If the index is not a constant - we cannot statically remove this inst
204 if (!isa<ConstantInt>(scalarIndexVal)) {
205 workset.push_back(EEI->getOperand(0));
206 }
207 }
208 // try to find a phi-web from the seed
209 bool HasPHI = false;
210 std::set<llvm::Value*> defweb;
211 while (!workset.empty())
212 {
213 auto Def = workset.back();
214 workset.pop_back();
215 if (m_Excludes.count(Def) || defweb.count(Def))
216 {
217 continue;
218 }
219 if (auto IEI = dyn_cast<InsertElementInst>(Def))
220 {
221 defweb.insert(IEI);
222 if (!defweb.count(IEI->getOperand(0)) &&
223 (isa<PHINode>(IEI->getOperand(0)) ||
224 isa<ShuffleVectorInst>(IEI->getOperand(0)) ||
225 isa<InsertElementInst>(IEI->getOperand(0))))
226 {
227 workset.push_back(IEI->getOperand(0));
228 }
229 }
230 else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
231 {
232 defweb.insert(SVI);
233 if (!defweb.count(SVI->getOperand(0)) &&
234 (isa<PHINode>(SVI->getOperand(0)) ||
235 isa<ShuffleVectorInst>(SVI->getOperand(0)) ||
236 isa<InsertElementInst>(SVI->getOperand(0))))
237 {
238 workset.push_back(SVI->getOperand(0));
239 }
240 if (!defweb.count(SVI->getOperand(1)) &&
241 (isa<PHINode>(SVI->getOperand(1)) ||
242 isa<ShuffleVectorInst>(SVI->getOperand(1)) ||
243 isa<InsertElementInst>(SVI->getOperand(1))))
244 {
245 workset.push_back(SVI->getOperand(1));
246 }
247 }
248 else if (auto PHI = dyn_cast<PHINode>(Def))
249 {
250 defweb.insert(PHI);
251 HasPHI = true; // !this def-web is qualified!
252 for (int i = 0, n = PHI->getNumOperands(); i < n; ++i)
253 if (!defweb.count(PHI->getOperand(i)) &&
254 (isa<PHINode>(PHI->getOperand(i)) ||
255 isa<ShuffleVectorInst>(PHI->getOperand(i)) ||
256 isa<InsertElementInst>(PHI->getOperand(i))))
257 {
258 workset.push_back(PHI->getOperand(i));
259 }
260 }
261 else
262 {
263 continue;
264 }
265 // check use
266 for (auto U : Def->users())
267 {
268 if (!defweb.count(U) &&
269 (isa<PHINode>(U) ||
270 isa<ShuffleVectorInst>(U) ||
271 isa<InsertElementInst>(U)))
272 {
273 workset.push_back(U);
274 }
275 }
276 }
277 // if we find a qualified web with PHINode, add those instructions
278 // into the exclusion set
279 if (HasPHI)
280 {
281 m_Excludes.merge(defweb);
282 }
283 }
284 }
285
dispatchInstructionToScalarize(Instruction * I)286 void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
287 {
288 V_PRINT(scalarizer, "\tScalarizing Instruction: " << *I << "\n");
289
290 if (m_removedInsts.count(I))
291 {
292 V_PRINT(scalarizer, "\tInstruction is already marked for removal. Being ignored..\n");
293 return;
294 }
295
296 switch (I->getOpcode())
297 {
298 case Instruction::Add:
299 case Instruction::Sub:
300 case Instruction::Mul:
301 case Instruction::FAdd:
302 case Instruction::FSub:
303 case Instruction::FMul:
304 case Instruction::UDiv:
305 case Instruction::SDiv:
306 case Instruction::FDiv:
307 case Instruction::URem:
308 case Instruction::SRem:
309 case Instruction::FRem:
310 case Instruction::Shl:
311 case Instruction::LShr:
312 case Instruction::AShr:
313 case Instruction::And:
314 case Instruction::Or:
315 case Instruction::Xor:
316 scalarizeInstruction(dyn_cast<BinaryOperator>(I));
317 break;
318 case Instruction::ICmp:
319 case Instruction::FCmp:
320 scalarizeInstruction(dyn_cast<CmpInst>(I));
321 break;
322 case Instruction::Trunc:
323 case Instruction::ZExt:
324 case Instruction::SExt:
325 case Instruction::FPToUI:
326 case Instruction::FPToSI:
327 case Instruction::UIToFP:
328 case Instruction::SIToFP:
329 case Instruction::FPTrunc:
330 case Instruction::FPExt:
331 case Instruction::PtrToInt:
332 case Instruction::IntToPtr:
333 case Instruction::BitCast:
334 scalarizeInstruction(dyn_cast<CastInst>(I));
335 break;
336 case Instruction::PHI:
337 scalarizeInstruction(dyn_cast<PHINode>(I));
338 break;
339 case Instruction::Select:
340 scalarizeInstruction(dyn_cast<SelectInst>(I));
341 break;
342 case Instruction::ExtractElement:
343 scalarizeInstruction(dyn_cast<ExtractElementInst>(I));
344 break;
345 case Instruction::InsertElement:
346 scalarizeInstruction(dyn_cast<InsertElementInst>(I));
347 break;
348 case Instruction::ShuffleVector:
349 scalarizeInstruction(dyn_cast<ShuffleVectorInst>(I));
350 break;
351 //case Instruction::Call :
352 // scalarizeInstruction(dyn_cast<CallInst>(I));
353 // break;
354 case Instruction::Alloca:
355 scalarizeInstruction(dyn_cast<AllocaInst>(I));
356 break;
357 case Instruction::GetElementPtr:
358 scalarizeInstruction(dyn_cast<GetElementPtrInst>(I));
359 break;
360 // The remaining instructions are not supported for scalarization. Keep "as is"
361 default:
362 recoverNonScalarizableInst(I);
363 break;
364 }
365 }
366
recoverNonScalarizableInst(Instruction * Inst)367 void ScalarizeFunction::recoverNonScalarizableInst(Instruction* Inst)
368 {
369 V_PRINT(scalarizer, "\t\tInstruction is not scalarizable.\n");
370
371 // any vector value should have an SCM entry - even an empty one
372 if (isa<VectorType>(Inst->getType())) getSCMEntry(Inst);
373
374 // Iterate over all arguments. Check that they all exist (or rebuilt)
375 if (CallInst * CI = dyn_cast<CallInst>(Inst))
376 {
377 unsigned numOperands = CI->getNumArgOperands();
378 for (unsigned i = 0; i < numOperands; i++)
379 {
380 Value* operand = CI->getArgOperand(i);
381 if (isa<VectorType>(operand->getType()))
382 {
383 // Recover value if needed (only needed for vector values)
384 obtainVectorValueWhichMightBeScalarized(operand);
385 }
386 }
387 }
388 else
389 {
390 unsigned numOperands = Inst->getNumOperands();
391 for (unsigned i = 0; i < numOperands; i++)
392 {
393 Value* operand = Inst->getOperand(i);
394 if (isa<VectorType>(operand->getType()))
395 {
396 // Recover value if needed (only needed for vector values)
397 obtainVectorValueWhichMightBeScalarized(operand);
398 }
399 }
400 }
401 }
402
scalarizeInstruction(BinaryOperator * BI)403 void ScalarizeFunction::scalarizeInstruction(BinaryOperator* BI)
404 {
405 V_PRINT(scalarizer, "\t\tBinary instruction\n");
406 IGC_ASSERT_MESSAGE(BI, "instruction type dynamic cast failed");
407 IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(BI->getType());
408 // Only need handling for vector binary ops
409 if (!instType) return;
410
411 // Prepare empty SCM entry for the instruction
412 SCMEntry* newEntry = getSCMEntry(BI);
413
414 // Get additional info from instruction
415 unsigned numElements = int_cast<unsigned>(instType->getNumElements());
416
417 // Obtain scalarized arguments
418 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
419 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
420 bool op0IsConst, op1IsConst;
421
422 obtainScalarizedValues(operand0, &op0IsConst, BI->getOperand(0), BI);
423 obtainScalarizedValues(operand1, &op1IsConst, BI->getOperand(1), BI);
424
425 // If both arguments are constants, don't bother Scalarizing inst
426 if (op0IsConst && op1IsConst) return;
427
428 // Generate new (scalar) instructions
429 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
430 newScalarizedInsts.resize(numElements);
431 for (unsigned dup = 0; dup < numElements; dup++)
432 {
433 Value* Val = BinaryOperator::Create(
434 BI->getOpcode(),
435 operand0[dup],
436 operand1[dup],
437 BI->getName(),
438 BI
439 );
440 if (BinaryOperator * BO = dyn_cast<BinaryOperator>(Val)) {
441 // Copy overflow flags if any.
442 if (isa<OverflowingBinaryOperator>(BO)) {
443 BO->setHasNoSignedWrap(BI->hasNoSignedWrap());
444 BO->setHasNoUnsignedWrap(BI->hasNoUnsignedWrap());
445 }
446 // Copy exact flag if any.
447 if (isa<PossiblyExactOperator>(BO))
448 BO->setIsExact(BI->isExact());
449 // Copy fast math flags if any.
450 if (isa<FPMathOperator>(BO))
451 BO->setFastMathFlags(BI->getFastMathFlags());
452 }
453 newScalarizedInsts[dup] = Val;
454 }
455
456 // Add new value/s to SCM
457 updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), BI, true);
458
459 // Remove original instruction
460 m_removedInsts.insert(BI);
461 }
462
scalarizeInstruction(CmpInst * CI)463 void ScalarizeFunction::scalarizeInstruction(CmpInst* CI)
464 {
465 V_PRINT(scalarizer, "\t\tCompare instruction\n");
466 IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
467 IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType());
468 // Only need handling for vector compares
469 if (!instType) return;
470
471 // Prepare empty SCM entry for the instruction
472 SCMEntry* newEntry = getSCMEntry(CI);
473
474 // Get additional info from instruction
475 unsigned numElements = int_cast<unsigned>(instType->getNumElements());
476
477 // Obtain scalarized arguments
478
479 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
480 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
481 bool op0IsConst, op1IsConst;
482
483 obtainScalarizedValues(operand0, &op0IsConst, CI->getOperand(0), CI);
484 obtainScalarizedValues(operand1, &op1IsConst, CI->getOperand(1), CI);
485
486 // If both arguments are constants, don't bother Scalarizing inst
487 if (op0IsConst && op1IsConst) return;
488
489 // Generate new (scalar) instructions
490 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
491 newScalarizedInsts.resize(numElements);
492 for (unsigned dup = 0; dup < numElements; dup++)
493 {
494 newScalarizedInsts[dup] = CmpInst::Create(
495 CI->getOpcode(),
496 CI->getPredicate(),
497 operand0[dup],
498 operand1[dup],
499 CI->getName(),
500 CI
501 );
502 }
503
504 // Add new value/s to SCM
505 updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), CI, true);
506
507 // Remove original instruction
508 m_removedInsts.insert(CI);
509 }
510
scalarizeInstruction(CastInst * CI)511 void ScalarizeFunction::scalarizeInstruction(CastInst* CI)
512 {
513 V_PRINT(scalarizer, "\t\tCast instruction\n");
514 IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
515 IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType());
516
517 // For BitCast - we only scalarize if src and dst types have same vector length
518 if (isa<BitCastInst>(CI))
519 {
520 if (!instType) return recoverNonScalarizableInst(CI);
521 IGCLLVM::FixedVectorType* srcType = dyn_cast<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType());
522 if (!srcType || (instType->getNumElements() != srcType->getNumElements()))
523 {
524 return recoverNonScalarizableInst(CI);
525 }
526 }
527
528 // Only need handling for vector cast
529 if (!instType) return;
530
531 // Prepare empty SCM entry for the instruction
532 SCMEntry* newEntry = getSCMEntry(CI);
533
534 // Get additional info from instruction
535 unsigned numElements = int_cast<unsigned>(instType->getNumElements());
536 IGC_ASSERT_MESSAGE(
537 isa<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType()),
538 "unexpected type!");
539 IGC_ASSERT_MESSAGE(
540 cast<IGCLLVM::FixedVectorType>(CI->getOperand(0)->getType())
541 ->getNumElements() == numElements,
542 "unexpected vector width");
543
544 // Obtain scalarized argument
545 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
546 bool op0IsConst;
547
548 obtainScalarizedValues(operand0, &op0IsConst, CI->getOperand(0), CI);
549
550 // If argument is a constant, don't bother Scalarizing inst
551 if (op0IsConst) return;
552
553 // Obtain type, which ever scalar cast will cast-to
554 Type* scalarDestType = instType->getElementType();
555
556 // Generate new (scalar) instructions
557 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
558 newScalarizedInsts.resize(numElements);
559 for (unsigned dup = 0; dup < numElements; dup++)
560 {
561 newScalarizedInsts[dup] = CastInst::Create(
562 CI->getOpcode(),
563 operand0[dup],
564 scalarDestType,
565 CI->getName(),
566 CI
567 );
568 }
569
570 // Add new value/s to SCM
571 updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), CI, true);
572
573 // Remove original instruction
574 m_removedInsts.insert(CI);
575 }
576
scalarizeInstruction(PHINode * PI)577 void ScalarizeFunction::scalarizeInstruction(PHINode* PI)
578 {
579 V_PRINT(scalarizer, "\t\tPHI instruction\n");
580 IGC_ASSERT_MESSAGE(PI, "instruction type dynamic cast failed");
581 IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(PI->getType());
582 // Only need handling for vector PHI
583 if (!instType) return;
584
585 // Obtain number of incoming nodes \ PHI values
586 unsigned numValues = PI->getNumIncomingValues();
587
588 // Normally, a phi would be scalarized and a collection of
589 // extractelements would be emitted for each value. Since
590 // VME payload CVariables don't necessarily match the size
591 // of the llvm type, keep these phis vectorized here so we
592 // can emit the appropriate movs in emitVectorCopy() when
593 // emitting movs for phis.
594 for (unsigned i = 0; i < numValues; i++)
595 {
596 auto* Op = PI->getIncomingValue(i);
597
598 if (auto * GII = dyn_cast<GenIntrinsicInst>(Op))
599 {
600 switch (GII->getIntrinsicID())
601 {
602 case GenISAIntrinsic::GenISA_vmeSendIME2:
603 case GenISAIntrinsic::GenISA_vmeSendFBR2:
604 case GenISAIntrinsic::GenISA_vmeSendSIC2:
605 recoverNonScalarizableInst(PI);
606 return;
607
608 default: break;
609 }
610 }
611 }
612
613 {
614 // If PHI is used in insts that take vector as operands, keep this vector phi.
615 // With the vector phi, variable alias can do a better job. Otherwise, more mov
616 // insts could be generated.
617 DenseMap<PHINode*, int> visited;
618 SmallVector<PHINode*, 8> phis;
619 phis.push_back(PI);
620 while (!phis.empty())
621 {
622 PHINode* PN = phis.back();
623 phis.pop_back();
624 for (auto U : PN->users())
625 {
626 if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(U))
627 {
628 switch (GII->getIntrinsicID())
629 {
630 default:
631 break;
632 case GenISAIntrinsic::GenISA_simdBlockWrite:
633 recoverNonScalarizableInst(PI);
634 return;
635 }
636 }
637 else if (PHINode * N = dyn_cast<PHINode>(U))
638 {
639 if (visited.count(N) == 0) {
640 visited[N] = 1;
641 phis.push_back(N);
642 }
643 }
644 }
645 }
646 visited.clear();
647 phis.clear();
648 }
649
650
651 // Prepare empty SCM entry for the instruction
652 SCMEntry* newEntry = getSCMEntry(PI);
653
654 // Get additional info from instruction
655 Type* scalarType = instType->getElementType();
656 unsigned numElements = int_cast<unsigned>(instType->getNumElements());
657
658 // Create new (empty) PHI nodes, and place them.
659 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedPHI;
660 newScalarizedPHI.resize(numElements);
661 for (unsigned i = 0; i < numElements; i++)
662 {
663 newScalarizedPHI[i] = PHINode::Create(scalarType, numValues, PI->getName(), PI);
664 }
665
666 // Iterate over incoming values in vector PHI, and fill scalar PHI's accordingly
667 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand;
668
669 for (unsigned j = 0; j < numValues; j++)
670 {
671 // Obtain scalarized arguments
672 obtainScalarizedValues(operand, NULL, PI->getIncomingValue(j), PI);
673
674 // Fill all scalarized PHI nodes with scalar arguments
675 for (unsigned i = 0; i < numElements; i++)
676 {
677 cast<PHINode>(newScalarizedPHI[i])->addIncoming(operand[i], PI->getIncomingBlock(j));
678 }
679 }
680
681 // Add new value/s to SCM
682 updateSCMEntryWithValues(newEntry, &(newScalarizedPHI[0]), PI, true);
683
684 // Remove original instruction
685 m_removedInsts.insert(PI);
686 }
687
scalarizeInstruction(SelectInst * SI)688 void ScalarizeFunction::scalarizeInstruction(SelectInst* SI)
689 {
690 V_PRINT(scalarizer, "\t\tSelect instruction\n");
691 IGC_ASSERT_MESSAGE(SI, "instruction type dynamic cast failed");
692 IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(SI->getType());
693 // Only need handling for vector select
694 if (!instType) return;
695
696 // Prepare empty SCM entry for the instruction
697 SCMEntry* newEntry = getSCMEntry(SI);
698
699 // Get additional info from instruction
700 unsigned numElements = int_cast<unsigned>(instType->getNumElements());
701
702 // Obtain scalarized arguments (select inst has 3 arguments: Cond, TrueVal, FalseVal)
703 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>condOp;
704 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>trueValOp;
705 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>falseValOp;
706
707 obtainScalarizedValues(trueValOp, NULL, SI->getTrueValue(), SI);
708 obtainScalarizedValues(falseValOp, NULL, SI->getFalseValue(), SI);
709
710 // Check if condition is a vector.
711 Value* conditionVal = SI->getCondition();
712 if (isa<VectorType>(conditionVal->getType()))
713 {
714 // Obtain scalarized breakdowns of condition
715 obtainScalarizedValues(condOp, NULL, conditionVal, SI);
716 }
717 else
718 {
719 condOp.resize(numElements);
720 // Broadcast the (scalar) condition, to be used by all the insruction breakdowns
721 for (unsigned i = 0; i < numElements; i++) condOp[i] = conditionVal;
722 }
723
724 // Generate new (scalar) instructions
725 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
726 newScalarizedInsts.resize(numElements);
727 for (unsigned dup = 0; dup < numElements; dup++)
728 {
729 // Small optimization: Some scalar selects may be redundant (trueVal == falseVal)
730 if (trueValOp[dup] != falseValOp[dup])
731 {
732 newScalarizedInsts[dup] = SelectInst::Create(
733 condOp[dup],
734 trueValOp[dup],
735 falseValOp[dup],
736 SI->getName(),
737 SI
738 );
739 }
740 else
741 {
742 // just "connect" the destination value to the true value input
743 newScalarizedInsts[dup] = trueValOp[dup];
744 }
745 }
746
747 // Add new value/s to SCM
748 updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), SI, true);
749
750 // Remove original instruction
751 m_removedInsts.insert(SI);
752 }
753
scalarizeInstruction(ExtractElementInst * EI)754 void ScalarizeFunction::scalarizeInstruction(ExtractElementInst* EI)
755 {
756 V_PRINT(scalarizer, "\t\tExtractElement instruction\n");
757 IGC_ASSERT_MESSAGE(EI, "instruction type dynamic cast failed");
758
759 // Proper scalarization makes "extractElement" instructions redundant
760 // Only need to "follow" the scalar element (as the input vector was
761 // already scalarized)
762 Value* vectorValue = EI->getOperand(0);
763 Value* scalarIndexVal = EI->getOperand(1);
764
765 // If the index is not a constant - we cannot statically remove this inst
766 if (!isa<ConstantInt>(scalarIndexVal)) return recoverNonScalarizableInst(EI);
767
768 // Obtain the scalarized operands
769 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand;
770 obtainScalarizedValues(operand, NULL, vectorValue, EI);
771
772 // Connect the "extracted" value to all its consumers
773 uint64_t scalarIndex = cast<ConstantInt>(scalarIndexVal)->getZExtValue();
774 auto valueVType = cast<IGCLLVM::FixedVectorType>(vectorValue->getType());
775 if (static_cast<unsigned int>(scalarIndex) < (unsigned)valueVType->getNumElements())
776 {
777 IGC_ASSERT_MESSAGE(NULL != operand[static_cast<unsigned int>(scalarIndex)], "SCM error");
778
779 if (IGC_IS_FLAG_ENABLED(UseOffsetInLocation))
780 {
781 // Metadata "implicitGlobalID" must be propagated to a new instruction as a WA
782 // for missing meta data preservation in this pass. When a general fix is applied
783 // then instructions below for this specific propagation must be removed.
784 Value* pNewVal = operand[static_cast<unsigned int>(scalarIndex)];
785
786 if (MDNode* pEIMD = EI->getMetadata("implicitGlobalID"))
787 {
788 // Compute thread and group identification instructions must have 'Output' attribute
789 // added later during compilation. The implicitGlobalID metadata attached to this
790 // instruction must be assigned to a new instruction, which replaces this instruction.
791 // Unfortunatelly, replaceAllUsesWith() will not ensure such propagation.
792 Instruction* pNewInst = dyn_cast_or_null<llvm::Instruction>(pNewVal);
793 IGC_ASSERT_MESSAGE(pNewInst, "Missing implicit global ID instruction");
794
795 pNewInst->copyMetadata(*EI);
796 }
797 }
798
799 // Replace all users of this inst, with the extracted scalar value
800 EI->replaceAllUsesWith(operand[static_cast<unsigned int>(scalarIndex)]);
801 }
802 else
803 {
804 IGC_ASSERT_MESSAGE(0, "The instruction extractElement is out of bounds.");
805 EI->replaceAllUsesWith(UndefValue::get(valueVType->getElementType()));
806 }
807
808 // Remove original instruction
809 m_removedInsts.insert(EI);
810 }
811
scalarizeInstruction(InsertElementInst * II)812 void ScalarizeFunction::scalarizeInstruction(InsertElementInst* II)
813 {
814 V_PRINT(scalarizer, "\t\tInsertElement instruction\n");
815 IGC_ASSERT_MESSAGE(II, "instruction type dynamic cast failed");
816
817 // Proper scalarization makes "InsertElement" instructions redundant.
818 // Only need to "follow" the scalar elements and update in SCM
819 Value* sourceVectorValue = II->getOperand(0);
820 Value* sourceScalarValue = II->getOperand(1);
821 Value* scalarIndexVal = II->getOperand(2);
822
823 // If the index is not a constant - we cannot statically remove this inst
824 if (!isa<ConstantInt>(scalarIndexVal)) return recoverNonScalarizableInst(II);
825
826 // Prepare empty SCM entry for the instruction
827 SCMEntry* newEntry = getSCMEntry(II);
828
829 IGC_ASSERT_MESSAGE(isa<ConstantInt>(scalarIndexVal), "inst arguments error");
830 uint64_t scalarIndex = cast<ConstantInt>(scalarIndexVal)->getZExtValue();
831 IGC_ASSERT_MESSAGE(
832 scalarIndex <
833 dyn_cast<IGCLLVM::FixedVectorType>(II->getType())->getNumElements(),
834 "index error");
835
836 // Obtain breakdown of input vector
837 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>scalarValues;
838 if (isa<UndefValue>(sourceVectorValue))
839 {
840 // Scalarize the undef value (generate a scalar undef)
841 IGCLLVM::FixedVectorType* inputVectorType = dyn_cast<IGCLLVM::FixedVectorType>(sourceVectorValue->getType());
842 IGC_ASSERT_MESSAGE(inputVectorType, "expected vector argument");
843
844 UndefValue* undefVal = UndefValue::get(inputVectorType->getElementType());
845
846 // fill new SCM entry with UNDEFs and the new value
847 scalarValues.resize(static_cast<unsigned int>(inputVectorType->getNumElements()));
848 for (unsigned j = 0; j < inputVectorType->getNumElements(); j++)
849 {
850 scalarValues[j] = undefVal;
851 }
852 scalarValues[static_cast<unsigned int>(scalarIndex)] = sourceScalarValue;
853 }
854 else
855 {
856 // Obtain the scalar values of the input vector
857 obtainScalarizedValues(scalarValues, NULL, sourceVectorValue, II);
858 // Add the new element
859 scalarValues[static_cast<unsigned int>(scalarIndex)] = sourceScalarValue;
860 }
861
862 // Add new value/s to SCM
863 updateSCMEntryWithValues(newEntry, &(scalarValues[0]), II, true, false);
864
865 // Remove original instruction
866 m_removedInsts.insert(II);
867 }
868
scalarizeInstruction(ShuffleVectorInst * SI)869 void ScalarizeFunction::scalarizeInstruction(ShuffleVectorInst* SI)
870 {
871 V_PRINT(scalarizer, "\t\tShuffleVector instruction\n");
872 IGC_ASSERT_MESSAGE(nullptr != SI, "instruction type dynamic cast failed");
873
874 // Proper scalarization makes "ShuffleVector" instructions redundant.
875 // Only need to "follow" the scalar elements and update in SCM
876
877 // Grab input vectors types and width
878 Value* sourceVector0Value = SI->getOperand(0);
879 IGC_ASSERT(nullptr != sourceVector0Value);
880 Value* sourceVector1Value = SI->getOperand(1);
881 IGC_ASSERT(nullptr != sourceVector1Value);
882 IGCLLVM::FixedVectorType* const inputType = dyn_cast<IGCLLVM::FixedVectorType>(sourceVector0Value->getType());
883 IGC_ASSERT_MESSAGE(nullptr != inputType, "vector input error");
884 IGC_ASSERT_MESSAGE(inputType == sourceVector1Value->getType(), "vector input error");
885 unsigned sourceVectorWidth = int_cast<unsigned>(inputType->getNumElements());
886
887 // generate an array of values (pre-shuffle), which concatenates both vectors
888 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>allValues;
889 allValues.resize(2 * sourceVectorWidth);
890
891 // Obtain scalarized input values (into concatenated array). if vector was Undef - keep NULL.
892 if (!isa<UndefValue>(sourceVector0Value))
893 {
894 obtainScalarizedValues(allValues, NULL, sourceVector0Value, SI, 0);
895 }
896 if (!isa<UndefValue>(sourceVector1Value))
897 {
898 // Place values, starting in the middle of concatenated array
899 obtainScalarizedValues(allValues, NULL, sourceVector1Value, SI, sourceVectorWidth);
900 }
901
902 // Generate array for shuffled scalar values
903 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newVector;
904 unsigned width = int_cast<unsigned>(cast<IGCLLVM::FixedVectorType>(SI->getType())->getNumElements());
905
906 // Generate undef value, which may be needed as some scalar elements
907 UndefValue* undef = UndefValue::get(inputType->getElementType());
908
909 newVector.resize(width);
910 // Go over shuffle order, and place scalar values in array
911 for (unsigned i = 0; i < width; i++)
912 {
913 int maskValue = SI->getMaskValue(i);
914 if (maskValue >= 0 && NULL != allValues[maskValue])
915 {
916 newVector[i] = allValues[maskValue];
917 }
918 else
919 {
920 newVector[i] = undef;
921 }
922 }
923
924 // Create the new SCM entry
925 SCMEntry* newEntry = getSCMEntry(SI);
926 updateSCMEntryWithValues(newEntry, &(newVector[0]), SI, true, false);
927
928 // Remove original instruction
929 m_removedInsts.insert(SI);
930 }
931
scalarizeInstruction(CallInst * CI)932 void ScalarizeFunction::scalarizeInstruction(CallInst* CI)
933 {
934 V_PRINT(scalarizer, "\t\tCall instruction\n");
935 IGC_ASSERT_MESSAGE(CI, "instruction type dynamic cast failed");
936
937 recoverNonScalarizableInst(CI);
938 }
939
scalarizeInstruction(AllocaInst * AI)940 void ScalarizeFunction::scalarizeInstruction(AllocaInst* AI)
941 {
942 V_PRINT(scalarizer, "\t\tAlloca instruction\n");
943 IGC_ASSERT_MESSAGE(AI, "instruction type dynamic cast failed");
944
945 return recoverNonScalarizableInst(AI);
946 }
947
scalarizeInstruction(GetElementPtrInst * GI)948 void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
949 {
950 V_PRINT(scalarizer, "\t\tGEP instruction\n");
951 IGC_ASSERT_MESSAGE(GI, "instruction type dynamic cast failed");
952
953 // If it has more than one index, leave it as is.
954 if (GI->getNumIndices() != 1)
955 {
956 return recoverNonScalarizableInst(GI);
957 }
958 Value* baseValue = GI->getOperand(0);
959 Value* indexValue = GI->getOperand(1);
960
961 // If it's not a vector instruction, leave it as is.
962 if (!baseValue->getType()->isVectorTy() && !indexValue->getType()->isVectorTy())
963 {
964 return recoverNonScalarizableInst(GI);
965 }
966 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand1;
967 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand2;
968 Type* ptrTy;
969 unsigned width = 1;
970
971 if (baseValue->getType()->isVectorTy())
972 {
973 width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(baseValue->getType())->getNumElements());
974 // Obtain the scalarized operands
975 obtainScalarizedValues(operand1, NULL, baseValue, GI);
976 ptrTy = dyn_cast<VectorType>(baseValue->getType())->getElementType();
977 }
978 else
979 {
980 ptrTy = baseValue->getType();
981 }
982 if (indexValue->getType()->isVectorTy())
983 {
984 width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(indexValue->getType())->getNumElements());
985 // Obtain the scalarized operands
986 obtainScalarizedValues(operand2, NULL, indexValue, GI);
987 }
988 IGC_ASSERT_MESSAGE(width > 1, "expected vector instruction");
989 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>scalarValues;
990 scalarValues.resize(width);
991
992 Value* assembledVector = UndefValue::get(IGCLLVM::FixedVectorType::get(ptrTy, width));
993 for (unsigned i = 0; i < width; ++i)
994 {
995 auto op1 = baseValue->getType()->isVectorTy() ? operand1[i] : baseValue;
996 auto op2 = indexValue->getType()->isVectorTy() ? operand2[i] : indexValue;
997
998 Value* newGEP = GetElementPtrInst::Create(nullptr, op1, op2, "", GI);
999 Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1000 Instruction* insert = InsertElementInst::Create(assembledVector,
1001 newGEP, constIndex, "assembled.vect", GI);
1002 assembledVector = insert;
1003 scalarValues[i] = newGEP;
1004
1005 V_PRINT(scalarizer,
1006 "\t\t\tCreated vector assembly inst:" << *assembledVector << "\n");
1007 }
1008 // Prepare empty SCM entry for the new instruction
1009 SCMEntry* newEntry = getSCMEntry(assembledVector);
1010 // Add new value/s to SCM
1011 updateSCMEntryWithValues(newEntry, &(scalarValues[0]), assembledVector, true);
1012 GI->replaceAllUsesWith(assembledVector);
1013
1014 // Remove original instruction
1015 m_removedInsts.insert(GI);
1016 }
1017
obtainScalarizedValues(SmallVectorImpl<Value * > & retValues,bool * retIsConstant,Value * origValue,Instruction * origInst,int destIdx)1018 void ScalarizeFunction::obtainScalarizedValues(SmallVectorImpl<Value*>& retValues, bool* retIsConstant,
1019 Value* origValue, Instruction* origInst, int destIdx)
1020 {
1021 V_PRINT(scalarizer, "\t\t\tObtaining scalar value... " << *origValue << "\n");
1022
1023 IGCLLVM::FixedVectorType* origType = dyn_cast<IGCLLVM::FixedVectorType>(origValue->getType());
1024 IGC_ASSERT_MESSAGE(origType, "Value must have a vector type!");
1025 unsigned width = int_cast<unsigned>(origType->getNumElements());
1026
1027 if (destIdx == -1)
1028 {
1029 destIdx = 0;
1030 retValues.resize(width);
1031 }
1032
1033 if (NULL != retIsConstant)
1034 {
1035 // Set retIsConstant (return value) to true, if the origValue is constant
1036 if (!isa<Constant>(origValue))
1037 {
1038 *retIsConstant = false;
1039 }
1040 else
1041 {
1042 *retIsConstant = true;
1043 }
1044 }
1045
1046 // Lookup value in SCM
1047 SCMEntry* currEntry = getScalarizedValues(origValue);
1048 if (currEntry && (NULL != currEntry->scalarValues[0]))
1049 {
1050 // Value was found in SCM
1051 V_PRINT(scalarizer,
1052 "\t\t\tFound existing entry in lookup of " << origValue->getName() << "\n");
1053 for (unsigned i = 0; i < width; i++)
1054 {
1055 // Copy values to return array
1056 IGC_ASSERT_MESSAGE(NULL != currEntry->scalarValues[i], "SCM entry contains NULL value");
1057 retValues[i + destIdx] = currEntry->scalarValues[i];
1058 }
1059 }
1060 else if (isa<UndefValue>(origValue))
1061 {
1062 IGC_ASSERT_MESSAGE(origType, "original value must have a vector type!");
1063 // value is an undefVal. Break it to element-sized undefs
1064 V_PRINT(scalarizer, "\t\t\tUndefVal constant\n");
1065 Value* undefElement = UndefValue::get(origType->getElementType());
1066 for (unsigned i = 0; i < width; i++)
1067 {
1068 retValues[i + destIdx] = undefElement;
1069 }
1070 }
1071 else if (Constant * vectorConst = dyn_cast<Constant>(origValue))
1072 {
1073 V_PRINT(scalarizer, "\t\t\tProper constant: " << *vectorConst << "\n");
1074 // Value is a constant. Break it down to scalars by employing a constant expression
1075 for (unsigned i = 0; i < width; i++)
1076 {
1077 retValues[i + destIdx] = ConstantExpr::getExtractElement(vectorConst,
1078 ConstantInt::get(Type::getInt32Ty(context()), i));
1079 }
1080 }
1081 else if (isa<Instruction>(origValue) && !currEntry)
1082 {
1083 // Instruction not found in SCM. Means it will be defined in a following basic block.
1084 // Generate a DRL: dummy values, which will be resolved after all scalarization is complete.
1085 V_PRINT(scalarizer, "\t\t\t*** Not found. Setting DRL. \n");
1086 Type* dummyType = origType->getElementType();
1087 Function* dummy_function = getOrCreateDummyFunc(dummyType, origInst->getModule());
1088 DRLEntry newDRLEntry;
1089 newDRLEntry.unresolvedInst = origValue;
1090 newDRLEntry.dummyVals.resize(width);
1091 for (unsigned i = 0; i < width; i++)
1092 {
1093 // Generate dummy "call" instruction (but don't really place in function)
1094 retValues[i + destIdx] = CallInst::Create(dummy_function);
1095 newDRLEntry.dummyVals[i] = retValues[i + destIdx];
1096 }
1097
1098 // Copy the data into DRL structure
1099 m_DRL.push_back(newDRLEntry);
1100 }
1101 else
1102 {
1103 V_PRINT(scalarizer,
1104 "\t\t\tCreating scalar conversion for " << origValue->getName() << "\n");
1105 // Value is an Instruction/global/function argument, and was not converted to scalars yet.
1106 // Create scalar values (break down the vector) and place in SCM:
1107 // %scalar0 = extractelement <4 x Type> %vector, i32 0
1108 // %scalar1 = extractelement <4 x Type> %vector, i32 1
1109 // %scalar2 = extractelement <4 x Type> %vector, i32 2
1110 // %scalar3 = extractelement <4 x Type> %vector, i32 3
1111 // The breaking instructions will be placed the the head of the function, or right
1112 // after the instruction (if it is an instruction)
1113 Instruction* locationInst = &*(inst_begin(m_currFunc));
1114 Instruction* origInstruction = dyn_cast<Instruction>(origValue);
1115 if (origInstruction)
1116 {
1117 BasicBlock::iterator insertLocation(origInstruction);
1118 ++insertLocation;
1119 locationInst = &(*insertLocation);
1120 // If the insert location is PHI, move the insert location to after all PHIs is the block
1121 if (isa<PHINode>(locationInst))
1122 {
1123 locationInst = locationInst->getParent()->getFirstNonPHI();
1124 }
1125 }
1126
1127 // Generate extractElement instructions
1128 for (unsigned i = 0; i < width; ++i)
1129 {
1130 Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1131 retValues[i + destIdx] = ExtractElementInst::Create(origValue, constIndex, "scalar", locationInst);
1132 }
1133 SCMEntry* newEntry = getSCMEntry(origValue);
1134 updateSCMEntryWithValues(newEntry, &(retValues[destIdx]), origValue, false);
1135
1136 }
1137 }
1138
obtainVectorValueWhichMightBeScalarized(Value * vectorVal)1139 void ScalarizeFunction::obtainVectorValueWhichMightBeScalarized(Value* vectorVal)
1140 {
1141 m_usedVectors.insert(vectorVal);
1142 }
1143
resolveVectorValues()1144 void ScalarizeFunction::resolveVectorValues()
1145 {
1146 SmallSetVector<Value*, ESTIMATED_INST_NUM>::iterator it = m_usedVectors.begin();
1147 SmallSetVector<Value*, ESTIMATED_INST_NUM>::iterator e = m_usedVectors.end();
1148 for (; it != e; ++it) {
1149 obtainVectorValueWhichMightBeScalarizedImpl(*it);
1150 }
1151 }
1152
obtainVectorValueWhichMightBeScalarizedImpl(Value * vectorVal)1153 void ScalarizeFunction::obtainVectorValueWhichMightBeScalarizedImpl(Value* vectorVal)
1154 {
1155 IGC_ASSERT_MESSAGE(isa<VectorType>(vectorVal->getType()), "Must be a vector type");
1156 if (isa<UndefValue>(vectorVal)) return;
1157
1158 // ONLY IF the value appears in the SCM - there is a chance it was removed.
1159 if (!m_SCM.count(vectorVal)) return;
1160 SCMEntry* valueEntry = m_SCM[vectorVal];
1161
1162 // Check in SCM entry, if value was really removed
1163 if (false == valueEntry->isOriginalVectorRemoved) return;
1164
1165 V_PRINT(scalarizer, "\t\t\tTrying to use a removed value. Reassembling it...\n");
1166 // The vector value was removed. Need to reassemble it...
1167 // %assembled.vect.0 = insertelement <4 x type> undef , type %scalar.0, i32 0
1168 // %assembled.vect.1 = insertelement <4 x type> %indx.vect.0, type %scalar.1, i32 1
1169 // %assembled.vect.2 = insertelement <4 x type> %indx.vect.1, type %scalar.2, i32 2
1170 // %assembled.vect.3 = insertelement <4 x type> %indx.vect.2, type %scalar.3, i32 3
1171 // Place the re-assembly in the location where the original instruction was
1172 Instruction* vectorInst = dyn_cast<Instruction>(vectorVal);
1173 IGC_ASSERT_MESSAGE(vectorInst, "SCM reports a non-instruction was removed. Should not happen");
1174 Instruction* insertLocation = vectorInst;
1175 // If the original instruction was PHI, place the re-assembly only after all PHIs is the block
1176 if (isa<PHINode>(vectorInst))
1177 {
1178 insertLocation = insertLocation->getParent()->getFirstNonPHI();
1179 }
1180
1181 Value* assembledVector = UndefValue::get(vectorVal->getType());
1182 unsigned width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(vectorVal->getType())->getNumElements());
1183 for (unsigned i = 0; i < width; i++)
1184 {
1185 IGC_ASSERT_MESSAGE(NULL != valueEntry->scalarValues[i], "SCM entry has NULL value");
1186 Value* constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1187 Instruction* insert = InsertElementInst::Create(assembledVector,
1188 valueEntry->scalarValues[i], constIndex, "assembled.vect", insertLocation);
1189 VectorizerUtils::SetDebugLocBy(insert, vectorInst);
1190 assembledVector = insert;
1191 V_PRINT(scalarizer,
1192 "\t\t\tCreated vector assembly inst:" << *assembledVector << "\n");
1193 }
1194 // Replace the uses of "vectorVal" with the new vector
1195 vectorVal->replaceAllUsesWith(assembledVector);
1196
1197 // create SCM entry to represent the new vector value..
1198 SCMEntry* newEntry = getSCMEntry(assembledVector);
1199 updateSCMEntryWithValues(newEntry, &(valueEntry->scalarValues[0]), assembledVector, false);
1200 }
1201
getSCMEntry(Value * origValue)1202 ScalarizeFunction::SCMEntry* ScalarizeFunction::getSCMEntry(Value* origValue)
1203 {
1204 // origValue may be scalar or vector:
1205 // When the actual returned value of the CALL inst is different from the The "proper" retval
1206 // the original CALL inst value may be scalar (i.e. int2 is converted to double which is a scalar)
1207 IGC_ASSERT_MESSAGE(!isa<UndefValue>(origValue), "Trying to create SCM to undef value...");
1208 if (m_SCM.count(origValue)) return m_SCM[origValue];
1209
1210 // If index of next free SCMEntry overflows the array size, create a new array
1211 if (m_SCMArrayLocation == ESTIMATED_INST_NUM)
1212 {
1213 // Create new SCMAllocationArray, push it to the vector of arrays, and set free index to 0
1214 m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
1215 m_SCMArrays.push_back(m_SCMAllocationArray);
1216 m_SCMArrayLocation = 0;
1217 }
1218 // Allocate the new entry, and increment the free-element index
1219 SCMEntry* newEntry = &(m_SCMAllocationArray[m_SCMArrayLocation++]);
1220
1221 // Set all primary data in entry
1222 if (newEntry->scalarValues.size())
1223 newEntry->scalarValues[0] = NULL;
1224 else
1225 newEntry->scalarValues.push_back(NULL);
1226
1227 newEntry->isOriginalVectorRemoved = false;
1228
1229 // Insert new entry to SCM map
1230 m_SCM.insert(std::pair<Value*, SCMEntry*>(origValue, newEntry));
1231
1232 return newEntry;
1233 }
1234
updateSCMEntryWithValues(ScalarizeFunction::SCMEntry * entry,Value * scalarValues[],const Value * origValue,bool isOrigValueRemoved,bool matchDbgLoc)1235 void ScalarizeFunction::updateSCMEntryWithValues(ScalarizeFunction::SCMEntry* entry,
1236 Value* scalarValues[],
1237 const Value* origValue,
1238 bool isOrigValueRemoved,
1239 bool matchDbgLoc)
1240 {
1241 IGC_ASSERT_MESSAGE((origValue->getType()->isArrayTy() || origValue->getType()->isVectorTy()), "only Vector values are supported");
1242 unsigned width = int_cast<unsigned>(dyn_cast<IGCLLVM::FixedVectorType>(origValue->getType())->getNumElements());
1243
1244 entry->isOriginalVectorRemoved = isOrigValueRemoved;
1245
1246 entry->scalarValues.resize(width);
1247
1248 for (unsigned i = 0; i < width; ++i)
1249 {
1250 IGC_ASSERT_MESSAGE(NULL != scalarValues[i], "Trying to fill SCM with NULL value");
1251 entry->scalarValues[i] = scalarValues[i];
1252 }
1253
1254 if (matchDbgLoc)
1255 {
1256 if (const Instruction * origInst = dyn_cast<Instruction>(origValue))
1257 {
1258 for (unsigned i = 0; i < width; ++i)
1259 {
1260 Instruction* scalarInst = dyn_cast<Instruction>(scalarValues[i]);
1261 if (scalarInst) VectorizerUtils::SetDebugLocBy(scalarInst, origInst);
1262 }
1263 }
1264 }
1265 }
1266
getScalarizedValues(Value * origValue)1267 ScalarizeFunction::SCMEntry* ScalarizeFunction::getScalarizedValues(Value* origValue)
1268 {
1269 if (m_SCM.count(origValue)) return m_SCM[origValue];
1270 return NULL;
1271 }
1272
releaseAllSCMEntries()1273 void ScalarizeFunction::releaseAllSCMEntries()
1274 {
1275 IGC_ASSERT_MESSAGE(m_SCMArrays.size() > 0, "At least one buffer is allocated at all times");
1276 while (m_SCMArrays.size() > 1)
1277 {
1278 // If there are additional allocated entry Arrays, release all of them (leave only the first)
1279 SCMEntry* popEntry = m_SCMArrays.pop_back_val();
1280 delete[] popEntry;
1281 }
1282 // set the "current" array pointer to the only remaining array
1283 m_SCMAllocationArray = m_SCMArrays[0];
1284 m_SCMArrayLocation = 0;
1285 }
1286
resolveDeferredInstructions()1287 void ScalarizeFunction::resolveDeferredInstructions()
1288 {
1289 llvm::MapVector<Value*, Value*> dummyToScalarMap;
1290
1291 // lambda to check if a value is a dummy instruction
1292 auto isDummyValue = [this](Value* val) -> bool
1293 {
1294 auto* call = dyn_cast<CallInst>(val);
1295 if (!call) return false;
1296 // If the Value is one of the dummy functions that we created.
1297 for (const auto& function : createdDummyFunctions) {
1298 if (call->getCalledFunction() == function.second)
1299 return true;
1300 }
1301
1302 return false;
1303 };
1304
1305 for (auto deferredEntry = m_DRL.begin(); m_DRL.size() > 0;)
1306 {
1307 DRLEntry current = *deferredEntry;
1308 V_PRINT(scalarizer,
1309 "\tDRL Going to fix value of orig inst: " << *current.unresolvedInst << "\n");
1310 Instruction* vectorInst = dyn_cast<Instruction>(current.unresolvedInst);
1311 IGC_ASSERT_MESSAGE(vectorInst, "DRL only handles unresolved instructions");
1312
1313 IGCLLVM::FixedVectorType* currType = dyn_cast<IGCLLVM::FixedVectorType>(vectorInst->getType());
1314 IGC_ASSERT_MESSAGE(currType, "Cannot have DRL of non-vector value");
1315 unsigned width = int_cast<unsigned>(currType->getNumElements());
1316
1317 SCMEntry* currentInstEntry = getSCMEntry(vectorInst);
1318
1319 if (currentInstEntry->scalarValues[0] == NULL)
1320 {
1321 V_PRINT(scalarizer, "\t\tInst was not scalarized yet, Scalarizing now...\n");
1322 SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newInsts;
1323
1324 // This instruction was not scalarized. Create scalar values and place in SCM.
1325 // %scalar0 = extractelement <4 x Type> %vector, i32 0
1326 // %scalar1 = extractelement <4 x Type> %vector, i32 1
1327 // %scalar2 = extractelement <4 x Type> %vector, i32 2
1328 // %scalar3 = extractelement <4 x Type> %vector, i32 3
1329 // Place the vector break-down instructions right after the actual vector
1330 BasicBlock::iterator insertLocation(vectorInst);
1331 ++insertLocation;
1332 // If the insert location is PHI, move the insert location to after all PHIs is the block
1333 if (isa<PHINode>(insertLocation))
1334 {
1335 insertLocation = BasicBlock::iterator(insertLocation->getParent()->getFirstNonPHI());
1336 }
1337
1338 newInsts.resize(width);
1339 for (unsigned i = 0; i < width; i++)
1340 {
1341 Value *constIndex = ConstantInt::get(Type::getInt32Ty(context()), i);
1342 Instruction *EE = ExtractElementInst::Create(vectorInst, constIndex, "scalar", &(*insertLocation));
1343 newInsts[i] = EE;
1344 }
1345 updateSCMEntryWithValues(currentInstEntry, &(newInsts[0]), vectorInst, false);
1346 }
1347
1348 bool totallyResolved = true;
1349
1350 // Connect the resolved values to their consumers
1351 for (unsigned i = 0; i < width; ++i)
1352 {
1353 Instruction* dummyInst = dyn_cast<Instruction>(current.dummyVals[i]);
1354 IGC_ASSERT_MESSAGE(dummyInst, "Dummy values are all instructions!");
1355 Value* scalarVal = currentInstEntry->scalarValues[i];
1356
1357 if (isDummyValue(scalarVal))
1358 {
1359 // It's possible the scalar values are not resolved earlier and are themselves dummy instructions.
1360 // In order to find the real value, we look in the map to see which value replaced it.
1361 if (dummyToScalarMap.count(scalarVal))
1362 scalarVal = dummyToScalarMap[scalarVal];
1363 else
1364 totallyResolved = false;
1365 }
1366
1367 // Save every dummy instruction with the scalar value its replaced with
1368 dummyToScalarMap[dummyInst] = scalarVal;
1369 }
1370
1371 if (totallyResolved)
1372 {
1373 m_DRL.erase(deferredEntry);
1374 }
1375 else
1376 {
1377 deferredEntry++;
1378 }
1379
1380 if (deferredEntry == m_DRL.end())
1381 {
1382 deferredEntry = m_DRL.begin();
1383 }
1384 }
1385
1386 for ( auto entry : dummyToScalarMap )
1387 {
1388 // Replace and erase all dummy instructions (don't use eraseFromParent as the dummy is not in the function)
1389 Instruction *dummyInst = cast<Instruction>(entry.first);
1390 dummyInst->replaceAllUsesWith(entry.second);
1391 dummyInst->deleteValue();
1392 }
1393
1394 // clear DRL
1395 m_DRL.clear();
1396 }
1397
createScalarizerPass(bool selectiveScalarization)1398 extern "C" FunctionPass* createScalarizerPass(bool selectiveScalarization)
1399 {
1400 return new ScalarizeFunction(selectiveScalarization);
1401 }
1402
1403
1404