1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Compiler/CISACodeGen/SimplifyConstant.h"
10 #include "Compiler/IGCPassSupport.h"
11 #include "Compiler/CodeGenPublicEnums.h"
12 #include "common/igc_regkeys.hpp"
13 #include "common/LLVMWarningsPush.hpp"
14 #include <llvm/Analysis/LoopInfo.h>
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvmWrapper/IR/DerivedTypes.h"
21 #include "llvmWrapper/IR/Value.h"
22 #include "common/LLVMWarningsPop.hpp"
23 #include "common/Types.hpp"
24 #include "Probe/Assertion.h"
25 
26 using namespace llvm;
27 using namespace IGC;
28 
29 namespace {
30 
31     /// \brief Perform by-value simplification of loading constant data.
32     ///
33     /// Currently this only applies to certain constant data array initializers.
34     ///
35     class SimplifyConstant : public ModulePass {
36     public:
37         static char ID; // Pass identification, replacement for typeid
SimplifyConstant()38         SimplifyConstant() : ModulePass(ID) {
39             initializeSimplifyConstantPass(*PassRegistry::getPassRegistry());
40         }
41         bool runOnModule(Module& M);
42 
43     private:
44         bool process(GlobalVariable* GV);
45     };
46 
47 } // namespace
48 
49 namespace IGC {
50 
51     IGC_INITIALIZE_PASS_BEGIN(SimplifyConstant, "SimplifyConstant", "SimplifyConstant", false, false)
52         IGC_INITIALIZE_PASS_END(SimplifyConstant, "SimplifyConstant", "SimplifyConstant", false, false)
53 
54 } // namespace IGC
55 
56 char SimplifyConstant::ID = 0;
createSimplifyConstantPass()57 ModulePass* IGC::createSimplifyConstantPass() { return new SimplifyConstant(); }
58 
runOnModule(Module & M)59 bool SimplifyConstant::runOnModule(Module& M) {
60     bool Changed = false;
61     for (auto I = M.global_begin(); I != M.global_end(); /*empty*/) {
62         GlobalVariable* GV = &(*I++);
63         if (GV->user_empty() || !GV->isConstant() || !GV->hasInitializer() ||
64             GV->getType()->getAddressSpace() != ADDRESS_SPACE_CONSTANT)
65             continue;
66         Changed |= process(GV);
67     }
68     return Changed;
69 }
70 
71 namespace {
72 
73     class ConstantLoader {
74     public:
75         enum CInitKind {
76             CK_Unknown,
77             // All elements are equal.
78             CK_Splat,
79             // f(i) = (i % 2) == 0 ? A : B;
80             CK_OddEven,
81             // f(i) = (i < Pivot) ? A : B
82             CK_Ladder2
83         };
84 
ConstantLoader(GlobalVariable * GV)85         explicit ConstantLoader(GlobalVariable* GV)
86             : GV(GV), Kind(CK_Unknown), Pivot(0) {
87             analyze();
88             simplify();
89         }
90 
simplified() const91         bool simplified() const { return Kind != CK_Unknown; }
92 
93     private:
94         void analyze();
95         void simplify();
96 
matchSplat(ConstantDataArray * CDA)97         bool matchSplat(ConstantDataArray* CDA) {
98             Constant* V0 = CDA->getElementAsConstant(0);
99             for (unsigned i = 1, n = CDA->getNumElements(); i < n; ++i)
100                 if (CDA->getElementAsConstant(i) != V0)
101                     return false;
102             Kind = CK_Splat;
103             return true;
104         }
matchOddEven(ConstantDataArray * CDA)105         bool matchOddEven(ConstantDataArray* CDA) {
106             Constant* V0 = CDA->getElementAsConstant(0);
107             Constant* V1 = CDA->getElementAsConstant(1);
108             for (unsigned i = 2, n = CDA->getNumElements(); i < n; ++i) {
109                 Constant* Val = CDA->getElementAsConstant(i);
110                 if ((i % 2 == 0 && Val != V0) || (i % 2 == 1 && Val != V1))
111                     return false;
112             }
113             Kind = CK_OddEven;
114             return true;
115         }
matchLadder2(ConstantDataArray * CDA)116         bool matchLadder2(ConstantDataArray* CDA) {
117             Constant* V = CDA->getElementAsConstant(0);
118             unsigned k = 0;
119             for (unsigned i = 1, n = CDA->getNumElements(); i < n; ++i) {
120                 Constant* Val = CDA->getElementAsConstant(i);
121                 if (Val != V) {
122                     if (k > 0)
123                         return false;
124 
125                     V = Val;
126                     k = i;
127                 }
128             }
129 
130             Kind = CK_Ladder2;
131             Pivot = k;
132             return true;
133         }
134 
135         GlobalVariable* GV;
136         CInitKind Kind;
137         // Index of the second value if exists.
138         unsigned Pivot;
139     };
140 
141 } // namespace
142 
analyze()143 void ConstantLoader::analyze() {
144     auto Init = GV->getInitializer();
145     auto CDA = dyn_cast<ConstantDataArray>(Init);
146     if (!CDA || CDA->isString() || CDA->getNumElements() <= 1)
147         return;
148 
149     matchSplat(CDA) || matchOddEven(CDA) || matchLadder2(CDA);
150 }
151 
simplify()152 void ConstantLoader::simplify() {
153     if (Kind == CK_Unknown)
154         return;
155 
156     Constant* Init = GV->getInitializer();
157     auto CDA = cast<ConstantDataArray>(Init);
158     Constant* V0 = CDA->getElementAsConstant(0);
159     Constant* V1 = nullptr;
160     if (Kind == CK_OddEven)
161         V1 = CDA->getElementAsConstant(1);
162     else if (Kind == CK_Ladder2)
163         V1 = CDA->getElementAsConstant(Pivot);
164 
165     // Replace all GEP + load uses with V0 or a select of V0 and V1.
166     for (auto UI = GV->user_begin(); UI != GV->user_end(); /* empty */) {
167         auto GEP = dyn_cast<GetElementPtrInst>(*UI++);
168         if (!GEP || !GEP->isInBounds())
169             continue;
170 
171         // Skip optimizations on function with optnone.
172         Function* F = GEP->getParent()->getParent();
173         if (F->hasFnAttribute(Attribute::OptimizeNone))
174             continue;
175 
176         for (auto I = GEP->user_begin(); I != GEP->user_end(); /* empty */) {
177             LoadInst* LI = dyn_cast<LoadInst>(*I++);
178             if (!LI)
179                 continue;
180             IGC_ASSERT(GEP->getNumIndices() == 2);
181             Value* Index = GEP->getOperand(2);
182             Value* Val = V0;
183             IRBuilder<> Builder(LI);
184             if (Kind == CK_Ladder2) {
185                 Value* Cmp = Builder.CreateICmpULT(
186                     Index, ConstantInt::get(Index->getType(), Pivot));
187                 Val = Builder.CreateSelect(Cmp, V0, V1);
188             }
189             else if (Kind == CK_OddEven) {
190                 Value* Cmp = Builder.CreateTrunc(Index, Builder.getInt1Ty());
191                 Val = Builder.CreateSelect(Cmp, V1, V0);
192             }
193             LI->replaceAllUsesWith(Val);
194             LI->eraseFromParent();
195         }
196 
197         if (GEP->use_empty())
198             GEP->eraseFromParent();
199     }
200 }
201 
process(GlobalVariable * GV)202 bool SimplifyConstant::process(GlobalVariable* GV) {
203     ConstantLoader Loader(GV);
204     if (Loader.simplified())
205         return true;
206 
207     return false;
208 }
209 
210 namespace {
211 
212     /// \brief Promote small constant data from global to register.
213     ///
214     class PromoteConstant : public FunctionPass {
215     public:
216         static char ID; // Pass identification, replacement for typeid
PromoteConstant()217         PromoteConstant() : FunctionPass(ID) {
218             initializePromoteConstantPass(*PassRegistry::getPassRegistry());
219         }
220 
getAnalysisUsage(AnalysisUsage & AU) const221         void getAnalysisUsage(AnalysisUsage& AU) const override {
222             AU.setPreservesCFG();
223             AU.addRequired<LoopInfoWrapperPass>();
224             AU.addPreserved<LoopInfoWrapperPass>();
225         }
226 
227         bool runOnFunction(Function& F) override;
228     };
229 
230 } // namespace
231 
232 namespace IGC {
233 
234     IGC_INITIALIZE_PASS_BEGIN(PromoteConstant, "PromoteConstant", "PromoteConstant", false, false)
235         IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
236         IGC_INITIALIZE_PASS_END(PromoteConstant, "PromoteConstant", "PromoteConstant", false, false)
237 
238 } // namespace IGC
239 
240 char PromoteConstant::ID = 0;
createPromoteConstantPass()241 FunctionPass* IGC::createPromoteConstantPass() { return new PromoteConstant(); }
242 
243 // Check uses. Within this function, this GV must be used and only used by
244 // in-bound GEPs each of which is also only used by loads.
245 //
246 // We only promote when there is load of GV inside loops.
247 //
checkUses(GlobalVariable * GV,const Function * F,LoopInfo & LI,const ConstantArray * llvm_used)248 static bool checkUses(GlobalVariable* GV, const Function* F, LoopInfo& LI, const ConstantArray* llvm_used) {
249     bool LoadInLoop = false;
250     for (auto U : GV->users()) {
251         // Because of ProgramScopeConstantAnalysis there might be user in llvm.used
252         // let's ignore this one, as it doesn't prevent opt.
253         // TODO: is there any other case of Constant to worry about. (?)
254         if (auto Const = dyn_cast<Constant>(U))
255         {
256             bool foundInLLVMUsed = false;
257             unsigned numOperands = llvm_used ? llvm_used->getNumOperands() : 0;
258             for (unsigned i = 0; i != numOperands; ++i)
259             {
260                 Value* gvi = llvm_used->getOperand(i)->stripPointerCastsNoFollowAliases();
261                 if (gvi == Const->stripPointerCastsNoFollowAliases())
262                 {
263                     foundInLLVMUsed = true;
264                     break;
265                 }
266             }
267             if (foundInLLVMUsed)
268                 continue;
269         }
270 
271         auto Inst = dyn_cast<Instruction>(U);
272         if (!Inst)
273             // TODO: this may be a const expr.
274             return false;
275 
276         if (F != Inst->getParent()->getParent())
277             continue;
278 
279         auto GEP = dyn_cast<GetElementPtrInst>(Inst);
280         if (!GEP || !GEP->isInBounds())
281             return false;
282 
283         for (auto V : GEP->users()) {
284             auto Inst = dyn_cast<LoadInst>(V);
285             if (!Inst)
286                 return false;
287             if (LI.getLoopFor(Inst->getParent()) != nullptr)
288                 LoadInLoop = true;
289         }
290     }
291 
292     if (IGC_IS_FLAG_ENABLED(AllowNonLoopConstantPromotion))
293         return true;
294 
295     return LoadInLoop;
296 }
297 
298 // IGC only allows the following vector sizes:
299 //
300 //------------------------------------
301 //                 name       size
302 //                         in elements
303 //------------------------------------
304 // IGC_IR_VECTOR_TYPE(x1,           1)
305 // IGC_IR_VECTOR_TYPE(x2,           2)
306 // IGC_IR_VECTOR_TYPE(x3,           3)
307 // IGC_IR_VECTOR_TYPE(x4,           4)
308 // IGC_IR_VECTOR_TYPE(x5,           5)
309 // IGC_IR_VECTOR_TYPE(x8,           8)
310 // IGC_IR_VECTOR_TYPE(x10,         10)
311 // IGC_IR_VECTOR_TYPE(x16,         16)
312 // IGC_IR_VECTOR_TYPE(x32,         32)
313 //
314 // check or return a legal vector size. 0 means illegal.
getLegalVectorSize(unsigned N)315 static unsigned getLegalVectorSize(unsigned N) {
316 #if 0
317     if (N > 32)
318         return 0;
319 
320     if (N == 6 || N == 7)
321         return 8;
322     if (N == 9)
323         return 10;
324     if (N >= 11 && N <= 16)
325         return 16;
326     if (N >= 17 && N <= 32)
327         return 32;
328     return N;
329 #else
330     // Only emit power-of-two vector sizes.
331     N = 1U << Log2_32_Ceil(N);
332     return (N > 32) ? 0 : N;
333 #endif
334 }
335 
336 // Check vector size. We may demote the data type if all values can fit into
337 // smaller data type.
338 //
checkSize(GlobalVariable * GV,IGCLLVM::FixedVectorType * & DataType,bool & IsSigned)339 static bool checkSize(GlobalVariable* GV, IGCLLVM::FixedVectorType*& DataType,
340     bool& IsSigned) {
341     Constant* Init = GV->getInitializer();
342     IGC_ASSERT(isa<ArrayType>(Init->getType()));
343     ArrayType* ArrayTy = cast<ArrayType>(Init->getType());
344     unsigned N = (unsigned)ArrayTy->getArrayNumElements();
345     Type* BaseTy = ArrayTy->getArrayElementType();
346     unsigned VectorSize = 1;
347     if (auto VT = dyn_cast<IGCLLVM::FixedVectorType>(BaseTy)) {
348         BaseTy = VT->getElementType();
349         VectorSize = int_cast<unsigned>(VT->getNumElements());
350         N *= VectorSize;
351     }
352     unsigned VS = getLegalVectorSize(N);
353 
354     // Allow experimental overriding for bigger than 32 elem. tables
355     if (IGC_IS_FLAG_ENABLED(AllowNonLoopConstantPromotion))
356         VS = 1U << Log2_32_Ceil(N);
357 
358     if (VS == 0)
359         return false;
360 
361     if (BaseTy->isIntegerTy()) {
362         // Cached integer types.
363         Type* Int8Ty = IntegerType::get(GV->getContext(), 8);
364         Type* Int16Ty = IntegerType::get(GV->getContext(), 16);
365 
366         // [i32 -1, i32 1] can be represented as {<i8 -1, i8 1>, Signed}
367         // and [i32 200, i32 1] can be represented as {<i8 200, i8 1>, !Signed}
368         int64_t Min = INT64_MAX, Max = INT64_MIN;
369         for (unsigned i = 0; i < ArrayTy->getArrayNumElements(); ++i) {
370             auto Elt = Init->getAggregateElement(i);
371             if (isa<UndefValue>(Elt))
372                 continue;
373             if (Elt->getType()->isVectorTy()) {
374                 for (unsigned j = 0; j < VectorSize; ++j) {
375                     auto VecElt = Elt->getAggregateElement(j);
376                     int64_t Val = cast<ConstantInt>(VecElt)->getSExtValue();
377                     Min = std::min(Min, Val);
378                     Max = std::max(Max, Val);
379                 }
380             }
381             else {
382                 int64_t Val = cast<ConstantInt>(Elt)->getSExtValue();
383                 Min = std::min(Min, Val);
384                 Max = std::max(Max, Val);
385             }
386         }
387 
388         unsigned BaseSzInBits = (unsigned int)BaseTy->getPrimitiveSizeInBits();
389         IGC_ASSERT(BaseSzInBits >= 8);
390         if (BaseSzInBits > 8 && Min >= 0 && Max <= UINT8_MAX) {
391             BaseTy = Int8Ty;
392             IsSigned = false;
393         }
394         else if (BaseSzInBits > 8 && Min >= INT8_MIN && Max <= INT8_MAX) {
395             BaseTy = Int8Ty;
396             IsSigned = true;
397         }
398         else if (BaseSzInBits > 16 && Min >= 0 && Max <= UINT16_MAX) {
399             BaseTy = Int16Ty;
400             IsSigned = false;
401         }
402         else if (BaseSzInBits > 16 && Min >= INT16_MIN && Max <= INT16_MAX) {
403             BaseTy = Int16Ty;
404             IsSigned = true;
405         }
406     }
407 
408     // Two GRFs by default.
409     unsigned ThresholdInBits = IGC_GET_FLAG_VALUE(ConstantPromotionSize) * 256;
410     unsigned TotalSzInBits = (unsigned int)BaseTy->getPrimitiveSizeInBits() * VS;
411     if (TotalSzInBits <= ThresholdInBits) {
412         DataType = IGCLLVM::FixedVectorType::get(BaseTy, VS);
413         return true;
414     }
415 
416     return false;
417 }
418 
419 // Check if a vector of data could be built out of initializer.
checkType(GlobalVariable * GV)420 static bool checkType(GlobalVariable* GV) {
421     Constant* Init = GV->getInitializer();
422     if (auto CDA = dyn_cast<ConstantDataArray>(Init))
423         return !CDA->isString();
424 
425     // Not simply data. Need to check if all elements have the same type in a
426     // constant array.
427     if (auto CA = dyn_cast<ConstantArray>(Init)) {
428         auto EltTy = CA->getType()->getElementType();
429         return EltTy->isFloatingPointTy() || EltTy->isIntegerTy() ||
430             EltTy->isVectorTy();
431     }
432 
433     return false;
434 }
435 
436 // Extract N elements from a vector, Idx, ... Idx + N - 1. Return a scalar
437 // or a vector value depending on N.
extractNElts(unsigned N,Value * VectorData,Value * Offset,IRBuilder<> & IRB)438 static Value* extractNElts(unsigned N, Value* VectorData, Value* Offset,
439     IRBuilder<>& IRB) {
440     if (N == 1)
441         return IRB.CreateExtractElement(VectorData, Offset);
442 
443     Type* Ty = cast<VectorType>(VectorData->getType())->getElementType();
444     Ty = IGCLLVM::FixedVectorType::get(Ty, N);
445     Value* Result = UndefValue::get(Ty);
446     for (unsigned i = 0; i < N; ++i) {
447         Value* VectorIdx = ConstantInt::get(Offset->getType(), i);
448         if (i == 0) {
449             auto Val = IRB.CreateExtractElement(VectorData, Offset);
450             Result = IRB.CreateInsertElement(Result, Val, VectorIdx);
451         }
452         else {
453             auto Idx = IRB.CreateAdd(Offset, VectorIdx);
454             auto Val = IRB.CreateExtractElement(VectorData, Idx);
455             Result = IRB.CreateInsertElement(Result, Val, VectorIdx);
456         }
457     }
458 
459     return Result;
460 }
461 
getConstantVal(Type * VEltTy,Constant * V,bool IsSigned)462 static Constant* getConstantVal(Type* VEltTy, Constant* V, bool IsSigned) {
463     if (V->getType() == VEltTy)
464         return V;
465     IGC_ASSERT(VEltTy->isIntegerTy());
466     int64_t IVal = cast<ConstantInt>(V)->getSExtValue();
467     return ConstantInt::get(VEltTy, IVal, IsSigned);
468 }
469 
promote(GlobalVariable * GV,IGCLLVM::FixedVectorType * AllocaType,bool IsSigned,Function * F)470 static void promote(GlobalVariable* GV, IGCLLVM::FixedVectorType* AllocaType, bool IsSigned,
471     Function* F) {
472     // Build the constant vector from constant array.
473     unsigned VS = int_cast<unsigned>(AllocaType->getNumElements());
474     SmallVector<Constant*, 16> Vals(VS, nullptr);
475     Type* VEltTy = AllocaType->getElementType();
476     auto Init = GV->getInitializer();
477 
478     if (auto CDA = dyn_cast<ConstantDataArray>(Init)) {
479         unsigned NElts = CDA->getNumElements();
480         for (unsigned i = 0; i < NElts; ++i) {
481             Constant* Elt = CDA->getAggregateElement(i);
482             IGC_ASSERT_MESSAGE(nullptr != Elt, "Null AggregateElement");
483             Vals[i] = getConstantVal(VEltTy, Elt, IsSigned);
484         }
485     }
486     else {
487         IGC_ASSERT_MESSAGE(isa<ConstantArray>(Init), "out of sync");
488         ConstantArray* CA = cast<ConstantArray>(Init);
489         unsigned NElts = CA->getNumOperands();
490         for (unsigned i = 0; i < NElts; ++i) {
491             Constant* const Elt = CA->getAggregateElement(i);
492             IGC_ASSERT_MESSAGE(nullptr != Elt, "Null AggregateElement");
493             if (auto EltTy = dyn_cast<VectorType>(Elt->getType())) {
494                 unsigned VectorSize = (unsigned)cast<IGCLLVM::FixedVectorType>(EltTy)->getNumElements();
495                 for (unsigned j = 0; j < VectorSize; ++j) {
496                     Constant* V = Elt->getAggregateElement(j);
497                     Vals[i * VectorSize + j] = getConstantVal(VEltTy, V, IsSigned);
498                 }
499             }
500             else
501                 Vals[i] = getConstantVal(VEltTy, Elt, IsSigned);
502         }
503     }
504     // Fill the missing values with undef, if any.
505     for (int i = VS - 1; i >= 0; --i) {
506         if (Vals[i] != nullptr)
507             break;
508         Vals[i] = UndefValue::get(VEltTy);
509     }
510     Constant* VectorData = ConstantVector::get(Vals);
511 
512     // Transform all uses
513     for (auto UI = GV->user_begin(); UI != GV->user_end(); /*empty*/) {
514         auto GEP = dyn_cast<GetElementPtrInst>(*UI++);
515         // might be Constant user in llvm.used
516         if (!GEP || GEP->getParent()->getParent() != F)
517             continue;
518         IGC_ASSERT(GEP->getNumIndices() == 2);
519         // This is the index to address the array, and the first index is to address
520         // the global variable itself.
521         Value* Index = GEP->getOperand(2);
522         // Demote the index type to int32 to avoid 64 multiplications during vISA
523         // emission, e.g. it is illegal to emit Q x W.
524         if (Index->getType()->getPrimitiveSizeInBits() > 32) {
525             IRBuilder<> Builder(GEP);
526             Index = Builder.CreateTrunc(Index, Builder.getInt32Ty());
527         }
528         for (auto I = GEP->user_begin(); I != GEP->user_end(); /*empty*/) {
529             auto LI = dyn_cast<LoadInst>(*I++);
530             IGC_ASSERT_MESSAGE(nullptr != LI, "nullptr");
531             IRBuilder<> Builder(LI);
532             Type* Ty = LI->getType();
533             unsigned N = 1;
534             Value* Offset = Index;
535             if (Ty->isVectorTy()) {
536                 N = (unsigned)cast<IGCLLVM::FixedVectorType>(Ty)->getNumElements();
537                 Offset = Builder.CreateMul(Offset, ConstantInt::get(Offset->getType(), N));
538             }
539             Value* Val = extractNElts(N, VectorData, Offset, Builder);
540             if (Val->getType() != LI->getType()) {
541                 IGC_ASSERT(Val->getType()->isIntOrIntVectorTy());
542                 Val = Builder.CreateIntCast(Val, LI->getType(), IsSigned);
543             }
544             LI->replaceAllUsesWith(Val);
545             LI->eraseFromParent();
546         }
547     }
548 }
549 
getElt(Constant * Init,int k)550 static Constant* getElt(Constant* Init, int k) {
551     int n = (int)Init->getType()->getArrayNumElements();
552     if (k >= n)
553         return UndefValue::get(Init->getType()->getArrayElementType());
554     return Init->getAggregateElement(k);
555 };
556 
557 // Recursively emit cmp+sel tree.
getVal(IRBuilder<> & Builder,Constant * Init,Value * Index,int Low,int Hi)558 static Value* getVal(IRBuilder<>& Builder, Constant* Init, Value* Index,
559     int Low, int Hi) {
560     IGC_ASSERT(Hi > Low);
561     Type* IdxTy = Index->getType();
562     // base case.
563     if (Hi == 1 + Low) {
564         Value* Cmp = Builder.CreateICmpEQ(Index, ConstantInt::get(IdxTy, Low));
565         return Builder.CreateSelect(Cmp, getElt(Init, Low), getElt(Init, Hi));
566     }
567 
568     // There are more than two elements to be compared.
569     int Mid = (Low + Hi + 1) / 2;
570     Value* LHS = getVal(Builder, Init, Index, Low, Mid - 1);
571     Value* RHS = getVal(Builder, Init, Index, Mid, Hi);
572     Value* Cmp = Builder.CreateICmpSLT(Index, ConstantInt::get(IdxTy, Mid));
573     return Builder.CreateSelect(Cmp, LHS, RHS);
574 }
575 
rewriteAsCmpSel(GlobalVariable * GV,Function & F)576 static bool rewriteAsCmpSel(GlobalVariable* GV, Function& F) {
577     bool Changed = false;
578 
579     Type* Ty = GV->getInitializer()->getType();
580     IGC_ASSERT(Ty->isArrayTy());
581     unsigned NElts = (unsigned)Ty->getArrayNumElements();
582 
583     for (auto UI = GV->user_begin(); UI != GV->user_end(); /*empty*/) {
584         auto GEP = dyn_cast<GetElementPtrInst>(*UI++);
585         // might be Constant user in llvm.used
586         if (!GEP || GEP->getParent()->getParent() != &F)
587             continue;
588 
589         // This is the index to address the array, and the first index is to
590         // address the global variable itself.
591         IGC_ASSERT(GEP->getNumIndices() == 2);
592         Value* Index = GEP->getOperand(2);
593         if (Index->getType()->getPrimitiveSizeInBits() > 32) {
594             IRBuilder<> Builder(GEP);
595             Index = Builder.CreateTrunc(Index, Builder.getInt32Ty());
596         }
597         for (auto I = GEP->user_begin(); I != GEP->user_end(); /*empty*/) {
598             auto LI = dyn_cast<LoadInst>(*I++);
599             IGC_ASSERT_MESSAGE(nullptr != LI, "nullptr");
600             IRBuilder<> Builder(LI);
601 
602             int n = (int)NextPowerOf2(NElts - 1);
603             Value* Val = nullptr;
604             if (n > 1)
605                 Val = getVal(Builder, GV->getInitializer(), Index, 0, n - 1);
606             else
607                 Val = getElt(GV->getInitializer(), 0);
608             LI->replaceAllUsesWith(Val);
609             LI->eraseFromParent();
610             Changed = true;
611         }
612     }
613 
614     return Changed;
615 }
616 
617 // Check if it is profitable to emit cmp-sel.
618 //
619 // For an array of size N = 2^k, (N - 1) cmp + (N - 1) sel is needed to extract
620 // a single element. We use the following threshold:
621 //
622 // (1) if N <= 4, return true, or
623 // (2) if N <= 8 and element type is vector, return true, otherwise
624 // (3) return false.
625 //
626 // [4 x float],          6 ops, presumably better than a send
627 // [4 x <2 x float> ],   6 ops, ditto
628 // [8 x <2 x float> ],  14 ops, ditto
629 // [16 x <2 x float> ], 30 ops, close to a send
630 //
isCmpSelProfitable(GlobalVariable * GV)631 static bool isCmpSelProfitable(GlobalVariable* GV) {
632     Constant* Init = GV->getInitializer();
633     unsigned NElts = (unsigned)Init->getType()->getArrayNumElements();
634     unsigned CmpSelSize = IGC_GET_FLAG_VALUE(ConstantPromotionCmpSelSize);
635 
636     if (NElts <= CmpSelSize)
637         return true;
638     Type* EltTy = Init->getType()->getArrayElementType();
639     return EltTy->isVectorTy() && NElts <= CmpSelSize * 2;
640 }
641 
runOnFunction(Function & F)642 bool PromoteConstant::runOnFunction(Function& F) {
643     if (skipFunction(F))
644         return false;
645 
646     LoopInfo& LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
647     Module* M = F.getParent();
648     bool Changed = false;
649 
650     auto gv = M->getGlobalVariable("llvm.used");
651     ConstantArray* llvm_used = gv ? dyn_cast_or_null<ConstantArray>(gv->getInitializer()) : nullptr;
652 
653     for (auto I = M->global_begin(); I != M->global_end(); /*empty*/) {
654         GlobalVariable* GV = &(*I++);
655         if (GV->user_empty() || !GV->isConstant() || !GV->hasInitializer() ||
656             GV->getType()->getAddressSpace() != ADDRESS_SPACE_CONSTANT)
657             continue;
658         if (!checkType(GV))
659             continue;
660         if (!checkUses(GV, &F, LI, llvm_used))
661             continue;
662 
663         // Rewrite as cmp+sel sequence if profitable.
664         if (isCmpSelProfitable(GV)) {
665             Changed |= rewriteAsCmpSel(GV, F);
666             continue;
667         }
668 
669         // If possible demote the data into smaller type. Uses of value will be
670         // promoted back with ZExt or SExt.
671         IGCLLVM::FixedVectorType* AllocaType = nullptr;
672         bool IsSigned = false;
673         if (!checkSize(GV, AllocaType, IsSigned))
674             continue;
675 
676         IGC_ASSERT(AllocaType);
677         promote(GV, AllocaType, IsSigned, &F);
678         Changed = true;
679     }
680 
681     return Changed;
682 }
683