1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "common/LLVMWarningsPush.hpp"
12 #include <llvm/Analysis/AssumptionCache.h>
13 #include <llvm/IR/Instruction.h>
14 #include <llvm/IR/Instructions.h>
15 #include <llvm/IR/Function.h>
16 #include <llvm/IR/Intrinsics.h>
17 #include <llvm/IR/IntrinsicInst.h>
18 #include <llvm/IR/Constants.h>
19 #include <llvm/IR/DataLayout.h>
20 #include <llvm/IR/Metadata.h>
21 #include <llvm/IR/Operator.h>
22 #include <llvm/IR/CFG.h>
23 #include <llvmWrapper/IR/IRBuilder.h>
24 #include <llvm/IR/PassManager.h>
25 #include <llvm/ADT/SmallSet.h>
26 #include <llvm/ADT/DenseSet.h>
27 #include <llvm/Analysis/PostDominators.h>
28 #include "common/LLVMWarningsPop.hpp"
29 #include "GenISAIntrinsics/GenIntrinsics.h"
30 #include "GenISAIntrinsics/GenIntrinsicInst.h"
31 #include "Compiler/CodeGenPublicEnums.h"
32 #include "Compiler/CISACodeGen/Platform.hpp"
33 #include "Compiler/MetaDataApi/MetaDataApi.h"
34 #include "common/MDFrameWork.h"
35 #include "common/Types.hpp"
36 #include "Probe/Assertion.h"
37 
38 typedef unsigned int uint;
39 
40 #define SIZE_WORD   2
41 #define SIZE_DWORD  4
42 #define SIZE_OWORD 16
43 
44 enum ADDRESS_SPACE : unsigned int;
45 
46 namespace IGC
47 {
48 
49     class CodeGenContext;
50     struct SProgramOutput;
51 
52     static const char * const INTEL_SYMBOL_TABLE_VOID_PROGRAM = "Intel_Symbol_Table_Void_Program";
53 
54 #ifdef _DEBUG
55     template<typename T, size_t N>
56     using smallvector = std::vector<T>;
57 #else
58     template<typename T, size_t N>
59     using smallvector = llvm::SmallVector<T, N>;
60 #endif
61 
62     // This is used to return true/false/dunno results.
63     enum class Tristate
64     {
65         Unknown = -1, False = 0, True = 1
66     };
67 
68     enum e_llvmType
69     {
70         e_Instruction = 0,
71         e_Intrinsic = 1,
72         e_GenISAIntrinsic = 1,
73     };
74 #define LLVMTYPEBYTE 24
75 
76 #define OPCODE(instName,llvmType) \
77     instName | llvmType<<LLVMTYPEBYTE
78 
79 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
80     name = OPCODE(llvm::llvmType::instName,e_##llvmType),
81     enum EOPCODE
82     {
83 #include "opCode.h"
84     };
85 #undef DECLARE_OPCODE
86 
87 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
88     static_assert((llvm::llvmType::instName < ( 1 << LLVMTYPEBYTE ) ), "Enum bitfield range check");
89 #include "opCode.h"
90 #undef DECLARE_OPCODE
91 
92     EOPCODE GetOpCode(const llvm::Instruction* inst);
93     bool SupportsModifier(llvm::Instruction* inst);
94     bool SupportsSaturate(llvm::Instruction* inst);
95     bool SupportsPredicate(llvm::Instruction* inst);
96     bool SupportsCondModifier(llvm::Instruction* inst);
97     bool SupportsRegioning(llvm::Instruction* inst);
98     bool IsMathIntrinsic(EOPCODE opcode);
99     bool IsAtomicIntrinsic(EOPCODE opcode);
100     bool IsGradientIntrinsic(EOPCODE opcode);
101     bool IsExtendedMathInstruction(llvm::Instruction* Inst);
102     bool IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode);
103     bool UsesTypedConstantBuffer(
104         const CodeGenContext* pContext,
105         const BufferType bufType);
106 
107     bool ComputesGradient(llvm::Instruction* inst);
108 
109     BufferType GetBufferType(uint addrSpace);
110 
111     uint getImmValueU32(const llvm::Value* value);
112     bool getImmValueBool(const llvm::Value* value);
113 
114     template <typename EnumT>
getImmValueEnum(const llvm::Value * val)115     static inline EnumT getImmValueEnum(const llvm::Value* val)
116     {
117         return static_cast<EnumT>(getImmValueU32(val));
118     }
119 
120     void VectorToElement(
121         llvm::Value* inst,
122         llvm::Value* elem[],
123         llvm::Type* int32Ty,
124         llvm::Instruction* insert_before,
125         int vsize = 4);
126     llvm::Value* ElementToVector(
127         llvm::Value* elem[],
128         llvm::Type* int32Ty,
129         llvm::Instruction* insert_before,
130         int vsize = 4);
131 
132     llvm::Value* ConvertToFloat(llvm::IRBuilder<>& builder, llvm::Value* val);
133     void ConvertToFloat(llvm::IRBuilder<>& builder, llvm::SmallVectorImpl<llvm::Value*>& instList);
134     //scalarize aggregate into flattened members
135     void ScalarizeAggregateMembers(llvm::IRBuilder<>& builder, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*>& instList);
136     //scalarize aggregate into flattened member addresses
137     void ScalarizeAggregateMemberAddresses(IGCLLVM::IRBuilder<>& builder, llvm::Type* type, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*>& instList, llvm::SmallVector<llvm::Value*, 16> indices);
138 
139     /// return true if pLLVMInst is load from constant-buffer with immediate constant-buffer index
140     bool IsLoadFromDirectCB(llvm::Instruction* pLLVMInst, uint& cbId, llvm::Value*& eltPtrVal);
141     bool IsReadOnlyLoadDirectCB(llvm::Instruction* pLLVMInst, uint& cbId, llvm::Value*& eltPtrVal, BufferType& buftype);
142 
143     int findSampleInstructionTextureIdx(llvm::Instruction* inst);
144     llvm::Value* getTextureIndexArgBasedOnOpcode(llvm::Instruction* inst);
145     llvm::Value* GetBufferOperand(llvm::Instruction* inst);
146 
147     llvm::LoadInst* cloneLoad(llvm::LoadInst* Orig, llvm::Value* Ptr);
148     llvm::StoreInst* cloneStore(llvm::StoreInst* Orig, llvm::Value* Val, llvm::Value* Ptr);
149 
150     llvm::LdRawIntrinsic* CreateLoadRawIntrinsic(llvm::LoadInst* inst, llvm::Value* bufPtr, llvm::Value* offsetVal);
151     llvm::StoreRawIntrinsic* CreateStoreRawIntrinsic(llvm::StoreInst* inst, llvm::Value* bufPtr, llvm::Value* offsetVal);
152 
153     void getTextureAndSamplerOperands(llvm::GenIntrinsicInst* pIntr, llvm::Value*& pTextureValue, llvm::Value*& pSamplerValue);
154     void ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst*& pIntr, llvm::Value* oldPtr, llvm::Value* newPtr);
155 
156     llvm::Value* TracePointerSource(llvm::Value* resourcePtr);
157     llvm::Value* TracePointerSource(llvm::Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList, std::vector<llvm::Value*>& instList);
158     llvm::Value* TracePointerSource(llvm::Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList, std::vector<llvm::Value*>& instList, llvm::SmallSet<llvm::PHINode*, 8> & visitedPHIs);
159     bool GetResourcePointerInfo(llvm::Value* srcPtr, unsigned& resID, IGC::BufferType& resTy, IGC::BufferAccessType& accessTy, bool& needBufferOffset);
160     BufferAccessType getDefaultAccessType(BufferType bufTy);
161     bool GetGRFOffsetFromRTV(llvm::Value* pointerSrc, unsigned& GRFOffset);
162     bool GetStatelessBufferInfo(llvm::Value* pointer, unsigned& bufIdOrGRFOffset, IGC::BufferType& bufferTy, llvm::Value*& bufferSrcPtr, bool& isDirectBuf);
163     // try to evaluate the address if it is constant.
164     bool EvalConstantAddress(llvm::Value* address, unsigned int& offset, const llvm::DataLayout* pDL, llvm::Value* ptrSrc = nullptr);
165     bool getConstantAddress(llvm::Instruction& I, ConstantAddress& cl, CodeGenContext* pContext, bool& directBuf, bool& statelessBuf, bool& bindlessBuf);
166 
167 
168     bool isSampleLoadGather4InfoInstruction(llvm::Instruction* inst);
169     bool isSampleInstruction(llvm::Instruction* inst);
170     bool isInfoInstruction(llvm::Instruction* inst);
171     bool isLdInstruction(llvm::Instruction* inst);
172     bool isGather4Instruction(llvm::Instruction* inst);
173     bool isVectorInputInstruction(llvm::Instruction* inst);
174 
175     bool IsMediaIOIntrinsic(llvm::Instruction* inst);
176     bool IsSIMDBlockIntrinsic(llvm::Instruction* inst);
177     bool isSubGroupIntrinsic(const llvm::Instruction* I);
178 
179     bool IsStatelessMemLoadIntrinsic(llvm::GenISAIntrinsic::ID id);
180     bool IsStatelessMemStoreIntrinsic(llvm::GenISAIntrinsic::ID id);
181     bool IsStatelessMemAtomicIntrinsic(llvm::GenIntrinsicInst& inst, llvm::GenISAIntrinsic::ID id);
182 
183     bool isURBWriteIntrinsic(const llvm::Instruction* inst);
184 
185     llvm::Instruction* AdjustSystemValueCall(llvm::GenIntrinsicInst* inst);
186 
187     unsigned EncodeAS4GFXResource(
188         const llvm::Value& bufIdx,
189         BufferType bufType,
190         unsigned uniqueIndAS = IGC::DefaultIndirectIdx);
191 
192     unsigned SetBufferAsBindless(unsigned addressSpaceOfPtr, BufferType bufferType);
193     bool isStatefulAddrSpace(unsigned AS);
194 
195     BufferType DecodeAS4GFXResource(unsigned addrSpace, bool& directIdx, unsigned& bufId);
196     BufferType DecodeBufferType(unsigned addrSpace);
197     int getConstantBufferLoadOffset(llvm::LoadInst* ld);
198 
199     bool isDummyBasicBlock(llvm::BasicBlock* BB);
200 
201     bool IsDirectIdx(unsigned addrSpace);
202     bool isNaNCheck(llvm::FCmpInst& FC);
203 
IsBindless(BufferType t)204     inline bool IsBindless(BufferType t)
205     {
206         return t == BINDLESS || t == BINDLESS_CONSTANT_BUFFER || t == BINDLESS_TEXTURE;
207     }
IsSSHbindless(BufferType t)208     inline bool IsSSHbindless(BufferType t)
209     {
210         return t == SSH_BINDLESS || t == SSH_BINDLESS_CONSTANT_BUFFER || t == SSH_BINDLESS_TEXTURE;
211     }
212 
213     bool IsUnsignedCmp(const llvm::CmpInst::Predicate Pred);
214     bool IsSignedCmp(const llvm::CmpInst::Predicate Pred);
215 
216     bool IsBitCastForLifetimeMark(const llvm::Value* V);
217 
218     // isA64Ptr - Queries whether given pointer type requires 64-bit representation in vISA
219     bool isA64Ptr(llvm::PointerType* PT, CodeGenContext* pContext);
220 
getIntelSymbolTableVoidProgram(llvm::Module * pM)221     inline llvm::Function* getIntelSymbolTableVoidProgram(llvm::Module* pM)
222     {
223         return pM->getFunction(INTEL_SYMBOL_TABLE_VOID_PROGRAM);
224     }
isIntelSymbolTableVoidProgram(llvm::Function * pF)225     inline bool isIntelSymbolTableVoidProgram(llvm::Function* pF)
226     {
227         return (pF == getIntelSymbolTableVoidProgram(pF->getParent()));
228     }
229 
ForceAlwaysInline()230     inline bool ForceAlwaysInline()
231     {
232         // return true if FunctionControl is set to INLINE, and SelectiveFunctionControl does not force fcalls.
233         return IGC_GET_FLAG_VALUE(FunctionControl) == FLAG_FCALL_FORCE_INLINE &&
234             (IGC_GET_FLAG_VALUE(SelectiveFunctionControl) == FLAG_FCALL_DEFAULT ||
235                 IGC_GET_FLAG_VALUE(SelectiveFunctionControl) == FLAG_FCALL_FORCE_INLINE);
236     }
237 
238     /// Return true if F is an entry function of a kernel or a shader.
239     ///    A entry function must have an entry in FunctionInfoMetaData
240     ///       with type KernelFunction;
241     ///    A non-entry function may have an entry, if so, that entry in
242     ///       FunctionInfoMetaData must have type UserFunction.
isEntryFunc(const IGCMD::MetaDataUtils * pM,const llvm::Function * CF)243     inline bool isEntryFunc(const IGCMD::MetaDataUtils* pM, const llvm::Function* CF)
244     {
245         llvm::Function* F = const_cast<llvm::Function*>(CF);
246         if (F == nullptr || F->empty() ||
247             pM->findFunctionsInfoItem(F) == pM->end_FunctionsInfo())
248             return false;
249 
250         IGCMD::FunctionInfoMetaDataHandle Info = pM->getFunctionsInfoItem(F);
251         IGC_ASSERT_MESSAGE(Info->isTypeHasValue(), "FunctionInfoMetaData missing type!");
252         return Info->getType() == FunctionTypeMD::KernelFunction;
253     }
254 
isPixelShaderPhaseFunction(const llvm::Function * CF)255     inline bool isPixelShaderPhaseFunction(const llvm::Function *CF) {
256         const llvm::Module* M = CF->getParent();
257         static const char* const phases[] = { NAMED_METADATA_COARSE_PHASE,
258                                               NAMED_METADATA_PIXEL_PHASE };
259         for (auto phase : phases) {
260             if (auto MD = M->getNamedMetadata(phase)) {
261                 if (MD->getOperand(0) && MD->getOperand(0)->getOperand(0)) {
262                     auto Func = llvm::mdconst::dyn_extract<llvm::Function>(
263                         MD->getOperand(0)->getOperand(0));
264                     if (Func == CF)
265                         return true;
266                 }
267             }
268         }
269         return false;
270     }
271 
isCoarsePhaseFunction(const llvm::Function * CF)272     inline bool isCoarsePhaseFunction(const llvm::Function* CF) {
273         const llvm::Module * M = CF->getParent();
274         if (auto MD = M->getNamedMetadata(NAMED_METADATA_COARSE_PHASE)) {
275             if (MD->getOperand(0) && MD->getOperand(0)->getOperand(0)) {
276                 auto Func = llvm::mdconst::dyn_extract<llvm::Function>(
277                     MD->getOperand(0)->getOperand(0));
278                 return Func == CF;
279             }
280         }
281         return false;
282     }
283 
isPixelPhaseFunction(const llvm::Function * CF)284     inline bool isPixelPhaseFunction(const llvm::Function* CF) {
285         const llvm::Module* M = CF->getParent();
286         if (auto MD = M->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE)) {
287             if (MD->getOperand(0) && MD->getOperand(0)->getOperand(0)) {
288                 auto Func = llvm::mdconst::dyn_extract<llvm::Function>(
289                     MD->getOperand(0)->getOperand(0));
290                 return Func == CF;
291             }
292         }
293         return false;
294     }
295 
isNonEntryMultirateShader(const llvm::Function * CF)296     inline bool isNonEntryMultirateShader(const llvm::Function* CF) {
297         if (isPixelPhaseFunction(CF))
298         {
299             const llvm::Module* CM = CF->getParent();
300             if (auto MD = CM->getNamedMetadata(NAMED_METADATA_COARSE_PHASE)) {
301                 if (MD->getOperand(0) && MD->getOperand(0)->getOperand(0)) {
302                     auto Func = llvm::mdconst::dyn_extract<llvm::Function>(
303                         MD->getOperand(0)->getOperand(0));
304                     return Func != nullptr;
305                 }
306             }
307         }
308         return  false;
309     }
310 
311     // Return a unique entry function.
312     // If more than one entry exists, return the first and and set it as unique.
313     // All subsequent calls to this function will get the entry set by the first call.
314     llvm::Function* getUniqueEntryFunc(const IGCMD::MetaDataUtils* pM, IGC::ModuleMetaData* pModMD);
315 
316     // \brief Get next instruction, returning null if it's the last of the BB.
317     // This is the replacement of Instruction::getNextNode(), since getNextNode()
318     // on last inst of BB will return sentinel node as instruction, which will
319     // cause memory corruption.  A better solution is to switch to iterator and
320     // avoid using getNextNode().
GetNextInstruction(llvm::Instruction * inst)321     inline llvm::Instruction* GetNextInstruction(llvm::Instruction* inst)
322     {
323         llvm::BasicBlock::iterator I = llvm::BasicBlock::iterator(inst);
324         if (++I == inst->getParent()->end())
325         {
326             return nullptr;
327         }
328         return &(*I);
329     }
330 
331     template <typename T>
RTWriteHasSource0Alpha(const T * rtWrite,ModuleMetaData * md)332     inline bool RTWriteHasSource0Alpha(
333         const T* rtWrite,
334         ModuleMetaData* md)
335     {
336         return (nullptr != rtWrite->getSource0Alpha()) && !llvm::isa<llvm::UndefValue>(rtWrite->getSource0Alpha());
337     }
338 
339     template <typename T>
DoesRTWriteSrc0AlphaBelongToHomogeneousPart(const T * rtWrite,ModuleMetaData * md)340     inline bool DoesRTWriteSrc0AlphaBelongToHomogeneousPart(
341         const T* rtWrite,
342         ModuleMetaData* md)
343     {
344         return !rtWrite->hasMask() && RTWriteHasSource0Alpha(rtWrite, md);
345     }
346 
VectorUsedByConstExtractOnly(llvm::Value * val,llvm::SmallVector<llvm::SmallVector<llvm::ExtractElementInst *,1>,4> & extracts)347     inline bool VectorUsedByConstExtractOnly(
348         llvm::Value* val,
349         llvm::SmallVector< llvm::SmallVector<llvm::ExtractElementInst*, 1>, 4> & extracts)
350     {
351         for (auto UI = val->user_begin(), UE = val->user_end(); UI != UE; ++UI)
352         {
353             llvm::ExtractElementInst* ei =
354                 llvm::dyn_cast<llvm::ExtractElementInst>(*UI);
355             if (!ei)
356             {
357                 return false;
358             }
359             else
360             {
361                 llvm::ConstantInt* idxv =
362                     llvm::dyn_cast<llvm::ConstantInt>(ei->getIndexOperand());
363                 if (!idxv)
364                 {
365                     return false;
366                 }
367                 uint idx = (uint)idxv->getZExtValue();
368                 extracts[idx].push_back(ei);
369             }
370         }
371         return true;
372     }
373 
LoadUsedByConstExtractOnly(llvm::LoadInst * ld,llvm::SmallVector<llvm::SmallVector<llvm::ExtractElementInst *,1>,4> & extracts)374     inline bool LoadUsedByConstExtractOnly(
375         llvm::LoadInst* ld,
376         llvm::SmallVector< llvm::SmallVector<llvm::ExtractElementInst*, 1>, 4> & extracts)
377     {
378         return VectorUsedByConstExtractOnly(ld, extracts);
379     }
380 
381 
382     llvm::Value* mutatePtrType(llvm::Value* ptrv, llvm::PointerType* newType,
383         llvm::IRBuilder<>& builder, const llvm::Twine& name = "");
384 
385     unsigned int AppendConservativeRastWAHeader(IGC::SProgramOutput* program, SIMDMode simdmode);
386 
387     bool DSDualPatchEnabled(class CodeGenContext* ctx);
388 
389 
390     /// \brief Check whether inst precedes given position in one basic block
isInstPrecede(const llvm::Instruction * inst,const llvm::Instruction * pos)391     inline bool isInstPrecede(
392         const llvm::Instruction* inst,
393         const llvm::Instruction* pos)
394     {
395         // must within same basic block
396         IGC_ASSERT(inst->getParent() == pos->getParent());
397         if (inst == pos)
398         {
399             return true;
400         }
401 
402         auto II = inst->getParent()->begin();
403         for (; &*II != inst && &*II != pos; ++II)
404             ;
405         return &*II == inst;
406     }
407 
408     // If true, the codegen will not emit any code for this instruction
409     // (So dst and src are aliased to each other.)
410     bool isNoOpInst(llvm::Instruction* I, CodeGenContext* Ctx);
411 
412     // CxtI is the instruction at which V is checked whether
413     // it is positive or not.
414     bool valueIsPositive(
415         llvm::Value* V,
416         const llvm::DataLayout* DL,
417         llvm::AssumptionCache* AC = nullptr,
418         llvm::Instruction* CxtI = nullptr);
419 
GetThreadOccupancyPerSubslice(SIMDMode simdMode,unsigned threadGroupSize,unsigned hwThreadPerSubslice,unsigned slmSize,unsigned slmSizePerSubSlice)420     inline float GetThreadOccupancyPerSubslice(SIMDMode simdMode, unsigned threadGroupSize, unsigned hwThreadPerSubslice, unsigned slmSize, unsigned slmSizePerSubSlice)
421     {
422         unsigned simdWidth = 8;
423 
424         switch (simdMode)
425         {
426         case SIMDMode::SIMD8:   simdWidth = 8;  break;
427         case SIMDMode::SIMD16:  simdWidth = 16; break;
428         case SIMDMode::SIMD32:  simdWidth = 32; break;
429         default:
430             IGC_ASSERT_MESSAGE(0, "Invalid SIMD mode");
431             break;
432         }
433 
434         IGC_ASSERT(simdWidth);
435         const unsigned nThreadsPerTG = (threadGroupSize + simdWidth - 1) / simdWidth;
436         IGC_ASSERT(nThreadsPerTG);
437         const unsigned TGPerSubsliceNoSLM = hwThreadPerSubslice / nThreadsPerTG;
438         const unsigned nTGDispatch = (slmSize == 0) ? TGPerSubsliceNoSLM : std::min(TGPerSubsliceNoSLM, slmSizePerSubSlice / slmSize);
439         IGC_ASSERT(float(hwThreadPerSubslice));
440         const float occupancy = float(nTGDispatch * nThreadsPerTG) / float(hwThreadPerSubslice);
441         return occupancy;
442     }
443 
444     // Duplicate of the LLVM function in llvm/Transforms/Utils/ModuleUtils.h
445     // Global can now be any pointer type that uses addrspace
446     void appendToUsed(llvm::Module& M, llvm::ArrayRef<llvm::GlobalValue*> Values);
447 
448     bool safeScheduleUp(llvm::BasicBlock* BB, llvm::Value* V, llvm::Instruction*& InsertPos, llvm::DenseSet<llvm::Instruction*> Scheduled);
449 
GetHwThreadsPerWG(const IGC::CPlatform & platform)450     inline unsigned GetHwThreadsPerWG(const IGC::CPlatform& platform)
451     {
452         unsigned hwThreadPerWorkgroup = platform.getMaxNumberHWThreadForEachWG();
453         if (platform.supportPooledEU())
454         {
455             hwThreadPerWorkgroup = std::min(platform.getMaxNumberThreadPerWorkgroupPooledMax(), (unsigned)64);
456         }
457         return hwThreadPerWorkgroup;
458     }
459 
getLeastSIMDAllowed(unsigned int threadGroupSize,unsigned int hwThreadPerWorkgroup)460     inline SIMDMode getLeastSIMDAllowed(unsigned int threadGroupSize, unsigned int hwThreadPerWorkgroup)
461     {
462         if (hwThreadPerWorkgroup == 0)
463         {
464             hwThreadPerWorkgroup = 42; //On GT1 HW, there are 7 threads/EU and 6 EU/subslice, 42 is the minimum threads/workgroup any HW can support
465         }
466         if (threadGroupSize <= hwThreadPerWorkgroup * 8)
467         {
468             return SIMDMode::SIMD8;
469         }
470         else if (threadGroupSize <= hwThreadPerWorkgroup * 16)
471         {
472             return SIMDMode::SIMD16;
473         }
474         else
475         {
476             return SIMDMode::SIMD32;
477         }
478     }
479 
480     // Debug line info helper function
updateDebugLoc(llvm::Instruction * pOrigin,llvm::Instruction * pNew)481     inline void updateDebugLoc(llvm::Instruction* pOrigin, llvm::Instruction* pNew)
482     {
483         IGC_ASSERT_MESSAGE(nullptr != pOrigin, "Expect valid instructions");
484         IGC_ASSERT_MESSAGE(nullptr != pNew, "Expect valid instructions");
485         pNew->setDebugLoc(pOrigin->getDebugLoc());
486     }
487 
488     llvm::ConstantInt* getConstantSInt(llvm::IRBuilder<>& Builder, const int bitSize, int64_t val);
489     llvm::ConstantInt* getConstantUInt(llvm::IRBuilder<>& Builder, const int bitSize, uint64_t val);
490     llvm::Value* CreateMulhS64(llvm::IRBuilder<>& B, llvm::Value* const u, llvm::Value* const v);
491     llvm::Value* CreateMulhU64(llvm::IRBuilder<>& B, llvm::Value* const u, llvm::Value* const v);
492     llvm::Value* CreateMulh(llvm::Function& F, llvm::IRBuilder<>& B, const bool isSigned, llvm::Value* const u, llvm::Value* const v);
493 
494     // Ported from PostDominators.cpp of llvm10 or later
495     // replace this with PDT.dominates(I1, I2) once we upgrade
496     bool PDT_dominates(llvm::PostDominatorTree& PTD,
497         const llvm::Instruction* I1,
498         const llvm::Instruction* I2);
499 
500     // Returns true if a function has an inline asm call instruction
501     bool hasInlineAsmInFunc(llvm::Function& F);
502 
503     std::tuple<std::string, std::string, unsigned> ParseVectorVariantFunctionString(llvm::StringRef varStr);
504 
505     // Return base type of complex type or nullptr if it cannot be processed
506     llvm::Type* GetBaseType(llvm::Type* ProcessedType);
507 
508     // Function modifies address space in selected uses of given input value
509     void FixAddressSpaceInAllUses(llvm::Value* ptr, uint newAS, uint oldAS);
510 
511 
512     // Returns the dynamic URB base offset and an immediate const offset
513     // from the dynamic base. The function calculates the result by walking
514     // the use-def chain of pUrbOffset.
515     // If pUrbOffset is an immediate constant (==offset) then
516     // <nullptr, offset> is returned.
517     // In all other cases <pUrbOffset, 0> is returned.
518     std::pair<llvm::Value*, unsigned int> GetURBBaseAndOffset(llvm::Value* pUrbOffset);
519 
520     std::vector<std::pair<unsigned int, std::string>> GetPrintfStrings(llvm::Module &M);
521 } // namespace IGC
522