1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/CISACodeGen/DebugInfoData.hpp"
12 #include "Compiler/CISACodeGen/CVariable.hpp"
13 #include "Compiler/CISACodeGen/PushAnalysis.hpp"
14 #include "Compiler/CISACodeGen/helper.h"
15 #include "Compiler/CISACodeGen/CISACodeGen.h"
16 #include "Compiler/CISACodeGen/CISABuilder.hpp"
17 #include "Compiler/CISACodeGen/LiveVars.hpp"
18 #include "Compiler/CISACodeGen/WIAnalysis.hpp"
19 #include "Compiler/CISACodeGen/CoalescingEngine.hpp"
20 #include "Compiler/CodeGenPublic.h"
21 #include "Compiler/MetaDataApi/MetaDataApi.h"
22 // Needed for SConstantGatherEntry
23 #include "usc_gen7.h"
24 #include "common/Types.hpp"
25 #include "common/LLVMWarningsPush.hpp"
26 #include <llvm/ADT/DenseMap.h>
27 #include <llvm/ADT/MapVector.h>
28 #include "common/LLVMWarningsPop.hpp"
29 #include "common/debug/Dump.hpp"
30 #include <map>
31 #include <string>
32 #include <vector>
33 #include "Probe/Assertion.h"
34 
35 namespace llvm
36 {
37     class Value;
38     class PHINode;
39     class Function;
40     class BasicBlock;
41 }
42 
43 namespace IGC
44 {
45 class DeSSA;
46 class CoalescingEngine;
47 class GenXFunctionGroupAnalysis;
48 class VariableReuseAnalysis;
49 
50 struct PushInfo;
51 
52 // Helper Function
53 VISA_Type GetType(llvm::Type* pType, CodeGenContext* pDataLayout);
54 uint64_t GetImmediateVal(llvm::Value* Const);
55 e_alignment GetPreferredAlignment(llvm::Value* Val, WIAnalysis* WIA, CodeGenContext* pContext);
56 
57 class CShaderProgram;
58 
59 ///--------------------------------------------------------------------------------------------------------
60 class CShader
61 {
62 public:
63     friend class CShaderProgram;
64 
65     class ExtractMaskWrapper
66     {
67         // To enable ExtractMask of any vector size. Currently, only vector
68         // whose size is no larger than 32 has its extractMask calculated.
69     private:
70         uint32_t m_EM;     // 32 bit extractMask;
71         bool     m_hasEM;  // If true, m_EM is valid; otherwise, not valid.
72     public:
73         ExtractMaskWrapper(CShader* pS, llvm::Value* VecVal);
74 
75         ExtractMaskWrapper() = delete;
76         ExtractMaskWrapper(const ExtractMaskWrapper&) = delete;
77         ExtractMaskWrapper& operator=(const ExtractMaskWrapper&) = delete;
78 
79         // b: bit position, from 0 to 31.
isSet(uint32_t b) const80         bool isSet(uint32_t b) const
81         {
82             if (m_hasEM) {
83                 IGC_ASSERT(b < 32);
84                 return (1 << (b)) & m_EM;
85             }
86             return true;
87         }
88 
getEM() const89         uint32_t getEM() const { return m_EM; }
hasEM() const90         uint16_t hasEM() const { return m_hasEM; }
91     };
92 
93     CShader(llvm::Function*, CShaderProgram* pProgram);
94     virtual ~CShader();
95     void        Destroy();
96     virtual void InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, ShaderDispatchMode shaderMode = ShaderDispatchMode::NOT_APPLICABLE);
PreCompile()97     virtual void PreCompile() {}
PreCompileFunction(llvm::Function & F)98     virtual void PreCompileFunction(llvm::Function& F) {}
ParseShaderSpecificOpcode(llvm::Instruction * inst)99     virtual void ParseShaderSpecificOpcode(llvm::Instruction* inst) {}
AllocatePayload()100     virtual void AllocatePayload() {}
AddPrologue()101     virtual void AddPrologue() {}
102     virtual void PreAnalysisPass();
ExtractGlobalVariables()103     virtual void ExtractGlobalVariables() {}
104     void         EOTURBWrite();
105     void         EOTRenderTarget(CVariable* r1, bool isPerCoarse);
106     virtual void AddEpilogue(llvm::ReturnInst* ret);
107 
GetURBOutputHandle()108     virtual CVariable* GetURBOutputHandle()
109     {
110         IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!");
111         return nullptr;
112     }
GetURBInputHandle(CVariable * pVertexIndex)113     virtual CVariable* GetURBInputHandle(CVariable* pVertexIndex)
114     {
115         IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!");
116         return nullptr;
117     }
passNOSInlineData()118     virtual bool passNOSInlineData() { return false; }
loadThreadPayload()119     virtual bool loadThreadPayload() { return false; }
getAnnotatedNumThreads()120     virtual unsigned getAnnotatedNumThreads() { return 0; }
hasReadWriteImage(llvm::Function & F)121     virtual bool hasReadWriteImage(llvm::Function& F) { return false; }
CompileSIMDSize(SIMDMode simdMode,EmitPass & EP,llvm::Function & F)122     virtual bool CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
123     {
124         return CompileSIMDSizeInCommon(simdMode);
125     }
126     CVariable* LazyCreateCCTupleBackingVariable(
127         CoalescingEngine::CCTuple* ccTuple,
128         VISA_Type baseType = ISA_TYPE_UD);
129     CVariable* GetSymbol(llvm::Value* value, bool fromConstantPool = false);
130     void        AddSetup(uint index, CVariable* var);
131     bool        AppendPayloadSetup(CVariable* var);
132     void        AddPatchTempSetup(CVariable* var);
133     void        AddPatchConstantSetup(uint index, CVariable* var);
134 
135     // TODO: simplify calls to GetNewVariable to these shorter and more
136     // expressive cases where possible.
137     //
138     // CVariable* GetNewVector(VISA_Type type, const CName &name) {
139     //     return GetNewVariable(numLanes(m_SIMDSize), type, EALIGN_GRF, false, name);
140     // }
141     // CVariable* GetNewUniform(VISA_Type type, const CName &name) {
142     //    grep a GetNewVariable(1, .. true) and see what B and W use
143     //     return GetNewVariable(1, type, alignOf_TODO(type), true, name);
144     // }
145 
GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,const CName & name)146     CVariable* GetNewVariable(
147         uint16_t nbElement,
148         VISA_Type type,
149         e_alignment align,
150         const CName &name)
151     {
152         return GetNewVariable(nbElement, type, align, false, 1, name);
153     }
GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,UniformArgWrap uniform,const CName & name)154     CVariable* GetNewVariable(
155         uint16_t nbElement,
156         VISA_Type type,
157         e_alignment align,
158         UniformArgWrap uniform,
159         const CName &name)
160     {
161         return GetNewVariable(nbElement, type, align, uniform, 1, name);
162     }
163     CVariable* GetNewVariable(
164         uint16_t nbElement,
165         VISA_Type type,
166         e_alignment align,
167         UniformArgWrap uniform,
168         uint16_t numberInstance,
169         const CName &name);
170     CVariable* GetNewVariable(const CVariable* from);
171     CVariable* GetNewAddressVariable(
172         uint16_t nbElement,
173         VISA_Type type,
174         UniformArgWrap uniform,
175         bool vectorUniform,
176         const CName &name);
177     CVariable* GetNewVector(llvm::Value* val, e_alignment preferredAlign = EALIGN_AUTO);
178     CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements);
179     CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform);
180 
181     // If BaseVar's type matches V's, return BaseVar; otherwise, create an new
182     // alias CVariable to BaseVar. The newly-created alias CVariable's size
183     // should be the same as BaseVar's size (used for creating alias for values
184     // in the same DeSSA's congruent class).
185     CVariable* createAliasIfNeeded(llvm::Value* V, CVariable* BaseVar);
186     // Allow to create an alias of a variable handpicking a slice to be able to do cross lane in SIMD32
187     CVariable* GetVarHalf(CVariable* var, unsigned int half);
188 
189     void        CopyVariable(CVariable* dst, CVariable* src, uint dstSubVar = 0, uint srcSubVar = 0);
190     void        PackAndCopyVariable(CVariable* dst, CVariable* src, uint subVar = 0);
191     bool        IsValueUsed(llvm::Value* value);
192     CVariable*  GetGlobalCVar(llvm::Value* value);
193     uint        GetNbElementAndMask(llvm::Value* value, uint32_t& mask);
194     void        CreatePayload(uint regCount, uint idxOffset, CVariable*& payload, llvm::Instruction* inst, uint paramOffset, uint8_t hfFactor);
195     uint        GetNbVectorElementAndMask(llvm::Value* value, uint32_t& mask);
196     uint16_t    AdjustExtractIndex(llvm::Value* value, uint16_t elemIndex);
197     WIBaseClass::WIDependancy GetDependency(llvm::Value* v) const;
198     void        SetDependency(llvm::Value* v, WIBaseClass::WIDependancy dep);
199     bool        GetIsUniform(llvm::Value* v) const;
200     bool        InsideDivergentCF(const llvm::Instruction* inst) const;
201     bool        InsideWorkgroupDivergentCF(const llvm::Instruction* inst) const;
202     CEncoder& GetEncoder();
203     CVariable* GetR0();
204     CVariable* GetNULL();
205     CVariable* GetTSC();
206     CVariable* GetSR0();
207     CVariable* GetCR0();
208     CVariable* GetCE0();
209     CVariable* GetDBG();
210     CVariable* GetHWTID();
211     CVariable* GetSP();
212     CVariable* GetFP();
213     CVariable* GetPrevFP();
214     CVariable* GetARGV();
215     CVariable* GetRETV();
216     CVariable* GetPrivateBase();
217     CVariable* GetImplArgBufPtr();
218     CVariable* GetLocalIdBufPtr();
219 
220     void SaveSRet(CVariable* sretPtr);
221     CVariable* GetAndResetSRet();
222 
hasSP() const223     bool hasSP() const { return m_SP != nullptr; }
hasFP() const224     bool hasFP() const { return m_FP != nullptr; }
225 
226     void InitializeStackVariables();
227     void SaveStackState();
228     void RestoreStackState();
229 
230     void        AllocateInput(CVariable* var, uint offset, uint instance = 0, bool forceLiveOut = false);
231     void        AllocateOutput(CVariable* var, uint offset, uint instance = 0);
232     CVariable* ImmToVariable(uint64_t immediate, VISA_Type type, bool isCodePatchCandidate = false);
233     CVariable* GetConstant(llvm::Constant* C, CVariable* dstVar = nullptr);
234     CVariable* GetScalarConstant(llvm::Value* c);
235     CVariable* GetUndef(VISA_Type type);
236     llvm::Constant* findCommonConstant(llvm::Constant* C, uint elts, uint currentEmitElts, bool& allSame);
237     virtual unsigned int GetGlobalMappingValue(llvm::Value* c);
238     virtual CVariable* GetGlobalMapping(llvm::Value* c);
239     CVariable* BitCast(CVariable* var, VISA_Type newType);
240     void        ResolveAlias(CVariable* var);
241     void        CacheArgumentsList();
242     virtual void MapPushedInputs();
243     void        CreateGatherMap();
244     void        CreateConstantBufferOutput(SKernelProgram* pKernelProgram);
245     void        CreateFunctionSymbol(llvm::Function* pFunc);
246     void        CreateGlobalSymbol(llvm::GlobalVariable* pGlobal);
247 
248     CVariable*  GetStructVariable(llvm::Value* v, bool forceVectorInit = false);
249 
250     void        CreateImplicitArgs();
251     void        CreateAliasVars();
252     uint        GetBlockId(llvm::BasicBlock* block);
GetNumSBlocks()253     uint        GetNumSBlocks() { return m_numBlocks; }
254 
SetUniformHelper(WIAnalysis * WI)255     void        SetUniformHelper(WIAnalysis* WI) { m_WI = WI; }
SetDeSSAHelper(DeSSA * deSSA)256     void        SetDeSSAHelper(DeSSA* deSSA) { m_deSSA = deSSA; }
SetCoalescingEngineHelper(CoalescingEngine * ce)257     void        SetCoalescingEngineHelper(CoalescingEngine* ce) { m_coalescingEngine = ce; }
SetCodeGenHelper(CodeGenPatternMatch * CG)258     void        SetCodeGenHelper(CodeGenPatternMatch* CG) { m_CG = CG; }
SetPushInfoHelper(PushInfo * PI)259     void        SetPushInfoHelper(PushInfo* PI) { pushInfo = *PI; }
SetDominatorTreeHelper(llvm::DominatorTree * DT)260     void        SetDominatorTreeHelper(llvm::DominatorTree* DT) { m_DT = DT; }
SetDataLayout(const llvm::DataLayout * DL)261     void        SetDataLayout(const llvm::DataLayout* DL) { m_DL = DL; }
SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis * FGA)262     void        SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis* FGA) { m_FGA = FGA; }
SetVariableReuseAnalysis(VariableReuseAnalysis * VRA)263     void        SetVariableReuseAnalysis(VariableReuseAnalysis* VRA) { m_VRA = VRA; }
SetMetaDataUtils(IGC::IGCMD::MetaDataUtils * pMdUtils)264     void        SetMetaDataUtils(IGC::IGCMD::MetaDataUtils* pMdUtils) { m_pMdUtils = pMdUtils; }
SetScratchSpaceSize(uint size)265     void        SetScratchSpaceSize(uint size) { m_ScratchSpaceSize = size; }
GetMetaDataUtils()266     IGCMD::MetaDataUtils* GetMetaDataUtils() { return m_pMdUtils; }
267 
SetShaderSpecificHelper(EmitPass * emitPass)268     virtual  void SetShaderSpecificHelper(EmitPass* emitPass) {}
269 
270     void        AllocateConstants(uint& offset);
271     void        AllocateStatelessConstants(uint& offset);
272     void        AllocateSimplePushConstants(uint& offset);
273     void        AllocateNOSConstants(uint& offset);
274     void        AllocateConstants3DShader(uint& offset);
GetShaderType() const275     ShaderType  GetShaderType() const { return GetContext()->type; }
276     bool        IsPatchablePS();
277     bool        IsValueCoalesced(llvm::Value* v);
278 
GetHasBarrier() const279     bool        GetHasBarrier() const { return m_HasBarrier; }
SetHasBarrier()280     void        SetHasBarrier() { m_HasBarrier = true; }
281 
282     void        GetSimdOffsetBase(CVariable*& pVar);
283     /// Returns a simd8 register filled with values [24, 20, 16, 12, 8, 4, 0]
284     /// that are used to index subregisters of a GRF when counting offsets in bytes.
285     /// Used e.g. for indirect addressing via a0 register.
286     CVariable* GetPerLaneOffsetsReg(uint typeSizeInBytes);
287 
288     void        GetPayloadElementSymbols(llvm::Value* inst, CVariable* payload[], int vecWidth);
289 
GetContext() const290     CodeGenContext* GetContext() const { return m_ctx; }
291 
292     SProgramOutput* ProgramOutput();
293 
294     bool CanTreatAsAlias(llvm::ExtractElementInst* inst);
295     bool CanTreatScalarSourceAsAlias(llvm::InsertElementInst*);
296 
297     bool HasBecomeNoop(llvm::Instruction* inst);
298 
299     // If V is not in any congruent class, not aliased to any other
300     // variables, not payload-coalesced, then this function returns
301     // true.
302     bool IsCoalesced(llvm::Value* V);
303 
304     bool VMECoalescePattern(llvm::GenIntrinsicInst*);
305 
306     bool isUnpacked(llvm::Value* value);
307 
308     /// Return true if we are sure that all lanes are active at the begging of the thread
HasFullDispatchMask()309     virtual bool HasFullDispatchMask() { return false; }
310 
311     llvm::Function* entry;
312     const CBTILayout* m_pBtiLayout;
313     const CPlatform* m_Platform;
314     const CDriverInfo* m_DriverInfo;
315 
316     ModuleMetaData* m_ModuleMetadata;
317 
318     /// Dispatch size is the number of logical threads running in one hardware thread
319     SIMDMode m_dispatchSize;
320     /// SIMD Size is the default size of instructions
321     ShaderDispatchMode m_ShaderDispatchMode;
322     /// the default emit size for this shader. This is the default size for variables as well
323     /// as the default execution size for each instruction. encoder may override it explicitly
324     /// via CEncoder::SetSIMDSize
325     SIMDMode m_SIMDSize;
326     uint8_t m_numberInstance;
327     PushInfo pushInfo;
328     bool isInputsPulled; //true if any input is pulled, false otherwise
329     bool isMessageTargetDataCacheDataPort;
330     uint m_sendStallCycle;
331     uint m_staticCycle;
332     unsigned m_spillSize = 0;
333     float m_spillCost = 0;          // num weighted spill inst / total inst
334 
335     std::vector<llvm::Value*> m_argListCache;
336 
337     /// The size in byte used by igc (non-spill space). And this
338     /// is the value passed to VISA so that VISA's spill, if any,
339     /// will go after this space.
340     uint m_ScratchSpaceSize;
341 
342     ShaderStats* m_shaderStats;
343 
344     // Number of binding table entries per cache line.
345     static constexpr DWORD cBTEntriesPerCacheLine = 32;
346     // Max BTI value that can increase binding table count.
347     // SampleEngine:    Binding Table Index is set to 252 specifies the bindless surface offset.
348     // DataPort:        The special entry 255 is used to reference Stateless A32 or A64 address model,
349     //                  and the special entry 254 is used to reference the SLM address model.
350     //                  The special entry 252 is used to reference bindless resource operation.
351     static constexpr DWORD MAX_BINDING_TABLE_INDEX = 251;
352     static constexpr uint cMessageExtendedDescriptorEOTBit = BIT(5);
353 
GetCCTupleToVariableMapping(CoalescingEngine::CCTuple * ccTuple)354     CVariable* GetCCTupleToVariableMapping(CoalescingEngine::CCTuple* ccTuple)
355     {
356         return ccTupleMapping[ccTuple];
357     }
358 
addConstantInPool(llvm::Constant * C,CVariable * Var)359     void addConstantInPool(llvm::Constant* C, CVariable* Var) {
360         ConstantPool[C] = Var;
361     }
362 
lookupConstantInPool(llvm::Constant * C)363     CVariable* lookupConstantInPool(llvm::Constant* C) {
364         return ConstantPool.lookup(C);
365     }
366 
367     unsigned int EvaluateSIMDConstExpr(llvm::Value* C);
368 
369     /// Initialize per function status.
370     void BeginFunction(llvm::Function* F);
371     // This method split payload interpolations from the shader into another compilation unit
372     void SplitPayloadFromShader(llvm::Function* F);
373     /// This method is used to create the vISA variable for function F's formal return value
374     CVariable* getOrCreateReturnSymbol(llvm::Function* F);
375     /// This method is used to create the vISA variable for function F's formal argument
376     CVariable* getOrCreateArgumentSymbol(
377         llvm::Argument* Arg,
378         bool ArgInCallee, // true if Arg isn't in current func
379         bool useStackCall = false);
380     void UpdateSymbolMap(llvm::Value* v, CVariable* CVar);
381     VISA_Type GetType(llvm::Type* type);
382     uint32_t GetNumElts(llvm::Type* type, bool isUniform = false);
383 
384     /// Evaluate constant expression and return the result immediate value.
385     uint64_t GetConstantExpr(llvm::ConstantExpr* C);
386 
387 
GetMaxUsedBindingTableEntryCount(void) const388     uint32_t GetMaxUsedBindingTableEntryCount(void) const
389     {
390         if (m_BindingTableUsedEntriesBitmap != 0)
391         {
392             // m_BindingTableEntryCount is index; '+ 1' due to calculate total used count.
393             return (m_BindingTableEntryCount + 1);
394         }
395         return 0;
396     }
397 
GetBindingTableEntryBitmap(void) const398     uint32_t GetBindingTableEntryBitmap(void) const
399     {
400         return m_BindingTableUsedEntriesBitmap;
401     }
402 
SetBindingTableEntryCountAndBitmap(bool directIdx,BufferType bufType,uint32_t typeBti,uint32_t bti)403     void SetBindingTableEntryCountAndBitmap(bool directIdx, BufferType bufType, uint32_t typeBti, uint32_t bti)
404     {
405         if (bti <= MAX_BINDING_TABLE_INDEX)
406         {
407             if (directIdx)
408             {
409                 m_BindingTableEntryCount = (bti <= m_pBtiLayout->GetBindingTableEntryCount()) ? (std::max(bti, m_BindingTableEntryCount)) : m_BindingTableEntryCount;
410                 m_BindingTableUsedEntriesBitmap |= BIT(bti / cBTEntriesPerCacheLine);
411 
412                 if (bufType == RESOURCE)
413                 {
414                     m_shaderResourceLoaded[typeBti / 32] |= BIT(typeBti % 32);
415                 }
416                 else if (bufType == CONSTANT_BUFFER)
417                 {
418                     m_constantBufferLoaded |= BIT(typeBti);
419                 }
420                 else if (bufType == UAV)
421                 {
422                     m_uavLoaded |= QWBIT(typeBti);
423                 }
424                 else if (bufType == RENDER_TARGET)
425                 {
426                     m_renderTargetLoaded |= BIT(typeBti);
427                 }
428             }
429             else
430             {
431                 // Indirect addressing, set the maximum BTI.
432                 m_BindingTableEntryCount = m_pBtiLayout->GetBindingTableEntryCount();
433                 m_BindingTableUsedEntriesBitmap |= BITMASK_RANGE(0, (m_BindingTableEntryCount / cBTEntriesPerCacheLine));
434 
435                 if (bufType == RESOURCE)
436                 {
437                     unsigned int MaxArray = m_pBtiLayout->GetTextureIndexSize() / 32;
438                     for (unsigned int i = 0; i < MaxArray; i++)
439                     {
440                         m_shaderResourceLoaded[i] = 0xffffffff;
441                     }
442 
443                     for (unsigned int i = MaxArray * 32; i < m_pBtiLayout->GetTextureIndexSize(); i++)
444                     {
445                         m_shaderResourceLoaded[MaxArray] = BIT(i % 32);
446                     }
447                 }
448                 else if (bufType == CONSTANT_BUFFER)
449                 {
450                     m_constantBufferLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetConstantBufferIndexSize());
451                 }
452                 else if (bufType == UAV)
453                 {
454                     m_uavLoaded |= QWBITMASK_RANGE(0, m_pBtiLayout->GetUavIndexSize());
455                 }
456                 else if (bufType == RENDER_TARGET)
457                 {
458                     m_renderTargetLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetRenderTargetIndexSize());
459                 }
460             }
461         }
462     }
463 
464     /// Evaluate the Sampler Count field value.
465     unsigned int GetSamplerCount(unsigned int samplerCount);
466 
467     static unsigned GetIMEReturnPayloadSize(llvm::GenIntrinsicInst* I);
468 
addCVarsForVectorBC(llvm::BitCastInst * BCI,llvm::SmallVector<CVariable *,8> CVars)469     void addCVarsForVectorBC(llvm::BitCastInst* BCI, llvm::SmallVector<CVariable*, 8> CVars)
470     {
471         IGC_ASSERT_MESSAGE(m_VectorBCItoCVars.find(BCI) == std::end(m_VectorBCItoCVars), "a variable already exists for this vector bitcast");
472         m_VectorBCItoCVars.try_emplace(BCI, CVars);
473     }
474 
getCVarForVectorBCI(llvm::BitCastInst * BCI,int index)475     CVariable* getCVarForVectorBCI(llvm::BitCastInst* BCI, int index)
476     {
477         auto iter = m_VectorBCItoCVars.find(BCI);
478         if (iter == m_VectorBCItoCVars.end())
479         {
480             return nullptr;
481         }
482         return (*iter).second[index];
483     }
484 
SetHasGlobalStatelessAccess()485     void SetHasGlobalStatelessAccess() { m_HasGlobalStatelessMemoryAccess = true; }
GetHasGlobalStatelessAccess() const486     bool GetHasGlobalStatelessAccess() const { return m_HasGlobalStatelessMemoryAccess; }
SetHasConstantStatelessAccess()487     void SetHasConstantStatelessAccess() { m_HasConstantStatelessMemoryAccess = true; }
GetHasConstantStatelessAccess() const488     bool GetHasConstantStatelessAccess() const { return m_HasConstantStatelessMemoryAccess; }
SetHasGlobalAtomics()489     void SetHasGlobalAtomics() { m_HasGlobalAtomics = true; }
GetHasGlobalAtomics() const490     bool GetHasGlobalAtomics() const { return m_HasGlobalAtomics; }
GetHasDPAS() const491     bool GetHasDPAS() const { return m_HasDPAS; }
SetHasDPAS()492     void SetHasDPAS() { m_HasDPAS = true; }
IncStatelessWritesCount()493     void IncStatelessWritesCount() { ++m_StatelessWritesCount; }
IncIndirectStatelessCount()494     void IncIndirectStatelessCount() { ++m_IndirectStatelessCount; }
GetStatelessWritesCount() const495     uint32_t GetStatelessWritesCount() const { return m_StatelessWritesCount; }
GetIndirectStatelessCount() const496     uint32_t GetIndirectStatelessCount() const { return m_IndirectStatelessCount; }
497 
498     // In bytes
getGRFSize() const499     uint32_t getGRFSize() const { return m_Platform->getGRFSize(); }
500     // in DWORDs
getMinPushConstantBufferAlignmentInBytes() const501     uint32_t getMinPushConstantBufferAlignmentInBytes() const { return m_Platform->getMinPushConstantBufferAlignment() * sizeof(DWORD); }
502 
503 
getGRFAlignment() const504     e_alignment getGRFAlignment() const { return CVariable::getAlignment(getGRFSize()); }
505 
GetSymbolMapping()506     llvm::DenseMap<llvm::Value*, CVariable*>& GetSymbolMapping()
507     {
508         return symbolMapping;
509     }
510 
GetGlobalMapping()511     llvm::DenseMap<llvm::Value*, CVariable*>& GetGlobalMapping()
512     {
513         return globalSymbolMapping;
514     }
515 
GetKernelArgOffset(CVariable * argV)516     int64_t GetKernelArgOffset(CVariable* argV)
517     {
518         auto it = kernelArgToPayloadOffsetMap.find(argV);
519         return it != kernelArgToPayloadOffsetMap.end() ? (int64_t) it->second : -1;
520     }
521 
522     DebugInfoData& GetDebugInfoData();
523 
524     unsigned int GetPrimitiveTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
525     unsigned int GetPrimitiveTypeSizeInRegister(const llvm::Type* Ty) const;
526     unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
527     unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const;
528 
HasStackCalls() const529     bool HasStackCalls() const { return m_HasStackCalls; }
SetHasStackCalls()530     void SetHasStackCalls() { m_HasStackCalls = true; }
IsIntelSymbolTableVoidProgram() const531     bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; }
SetIsIntelSymbolTableVoidProgram()532     void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; }
533 
534 protected:
535     bool CompileSIMDSizeInCommon(SIMDMode simdMode);
536     uint32_t GetShaderThreadUsageRate();
537 private:
538     // Return DefInst's CVariable if it could be reused for UseInst, and return
539     // nullptr otherwise.
540     CVariable* reuseSourceVar(llvm::Instruction* UseInst,
541         llvm::Instruction* DefInst,
542         e_alignment preferredAlign);
543 
544     // Return nullptr if no source variable is reused. Otherwise return a
545     // CVariable from its source operand.
546     CVariable* GetSymbolFromSource(llvm::Instruction* UseInst,
547         e_alignment preferredAlign);
548 
549 protected:
550     CShaderProgram* m_parent;
551     CodeGenContext* m_ctx;
552     WIAnalysis* m_WI;
553     DeSSA* m_deSSA;
554     CoalescingEngine* m_coalescingEngine;
555     CodeGenPatternMatch* m_CG;
556     llvm::DominatorTree* m_DT;
557     const llvm::DataLayout* m_DL;
558     GenXFunctionGroupAnalysis* m_FGA;
559     VariableReuseAnalysis* m_VRA;
560 
561     uint m_numBlocks;
562     IGC::IGCMD::MetaDataUtils* m_pMdUtils;
563 
564 #if defined(_DEBUG) || defined(_INTERNAL)
565     llvm::SpecificBumpPtrAllocator<CVariable> Allocator;
566 #else
567     llvm::BumpPtrAllocator Allocator;
568 #endif
569 
570     // Mapping from formal argument to its variable or from function to its
571     // return variable. Per kernel mapping. Used when llvm functions are
572     // compiled into vISA subroutine
573     llvm::DenseMap<llvm::Value*, CVariable*> globalSymbolMapping;
574 
575     llvm::DenseMap<llvm::Value*, CVariable*> symbolMapping;
576     // Yet another map: a mapping from ccTuple to its corresponding root variable.
577     // Variables that participate in congruence class tuples will be defined as
578     // aliases (with respective offset) to the root variable.
579     llvm::DenseMap<CoalescingEngine::CCTuple*, CVariable*> ccTupleMapping;
580     // Constant pool.
581     llvm::DenseMap<llvm::Constant*, CVariable*> ConstantPool;
582 
583     // keep a map when we generate accurate mask for vector value
584     // in order to reduce register usage
585     llvm::DenseMap<llvm::Value*, uint32_t> extractMasks;
586 
587     // keep a map for each kernel argument to its allocated payload offset
588     llvm::DenseMap<CVariable*, uint32_t> kernelArgToPayloadOffsetMap;
589 
590     CEncoder encoder;
591     std::vector<CVariable*> setup;
592     std::vector<CVariable*> payloadLiveOutSetup;
593     std::vector<CVariable*> payloadTempSetup;
594     std::vector<CVariable*> patchConstantSetup;
595 
596     uint m_maxBlockId;
597 
598     CVariable* m_R0;
599     CVariable* m_NULL;
600     CVariable* m_TSC;
601     CVariable* m_SR0;
602     CVariable* m_CR0;
603     CVariable* m_CE0;
604     CVariable* m_DBG;
605     CVariable* m_HW_TID;
606     CVariable* m_SP;
607     CVariable* m_FP;
608     CVariable* m_SavedFP;
609     CVariable* m_ARGV;
610     CVariable* m_RETV;
611     CVariable* m_SavedSRetPtr;
612     CVariable* m_ImplArgBufPtr;
613     CVariable* m_LocalIdBufPtr;
614 
615     std::vector<USC::SConstantGatherEntry> gatherMap;
616     uint     m_ConstantBufferLength;
617     uint     m_constantBufferMask;
618     uint     m_constantBufferLoaded;
619     uint64_t m_uavLoaded;
620     uint     m_shaderResourceLoaded[4];
621     uint     m_renderTargetLoaded;
622 
623     int  m_cbSlot;
624     uint m_statelessCBPushedSize;
625     uint m_NOSBufferSize = 0;
626 
627     /// holds max number of inputs that can be pushed for this shader unit
628     static const uint32_t m_pMaxNumOfPushedInputs;
629 
630     bool m_HasBarrier;
631     SProgramOutput m_simdProgram;
632 
633     // Holds max used binding table entry index.
634     uint32_t m_BindingTableEntryCount;
635 
636     // Holds binding table entries bitmap.
637     uint32_t m_BindingTableUsedEntriesBitmap;
638 
639     // for each vector BCI whose uses are all extractElt with imm offset,
640     // we store the CVariables for each index
641     llvm::DenseMap<llvm::Instruction*, llvm::SmallVector<CVariable*, 8>> m_VectorBCItoCVars;
642 
643     // Those two are for stateful token setup. It is a quick
644     // special case checking. Once a generic approach is added,
645     // this two fields shall be retired.
646     bool m_HasGlobalStatelessMemoryAccess;
647     bool m_HasConstantStatelessMemoryAccess;
648 
649     bool m_HasGlobalAtomics = false;
650 
651     bool m_HasDPAS = false;
652 
653     uint32_t m_StatelessWritesCount = 0;
654     uint32_t m_IndirectStatelessCount = 0;
655 
656     DebugInfoData diData;
657 
658     bool m_HasStackCalls = false;
659     bool m_isIntelSymbolTableVoidProgram = false;
660 };
661 
662 /// This class contains the information for the different SIMD version
663 /// of a kernel. Each kernel in the module is associated to one CShaderProgram
664 class CShaderProgram
665 {
666 public:
667     typedef llvm::MapVector<llvm::Function*, CShaderProgram*> KernelShaderMap;
668     CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel);
669     ~CShaderProgram();
670     CShader* GetOrCreateShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
671     CShader* GetShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
672     void DeleteShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE);
GetContext()673     CodeGenContext* GetContext() { return m_context; }
674     void FillProgram(SVertexShaderKernelProgram* pKernelProgram);
675     void FillProgram(SHullShaderKernelProgram* pKernelProgram);
676     void FillProgram(SDomainShaderKernelProgram* pKernelProgram);
677     void FillProgram(SGeometryShaderKernelProgram* pKernelProgram);
678     void FillProgram(SPixelShaderKernelProgram* pKernelProgram);
679     void FillProgram(SComputeShaderKernelProgram* pKernelProgram);
680     void FillProgram(SOpenCLProgramInfo* pKernelProgram);
681     ShaderStats* m_shaderStats;
682 
683 protected:
684     CShader*& GetShaderPtr(SIMDMode simd, ShaderDispatchMode mode);
685     CShader* CreateNewShader(SIMDMode simd);
686     void ClearShaderPtr(SIMDMode simd);
687 
hasShaderOutput(CShader * shader)688     inline bool hasShaderOutput(CShader* shader)
689     {
690         return (shader && shader->ProgramOutput()->m_programSize > 0);
691     }
692 
freeShaderOutput(CShader * shader)693     inline void freeShaderOutput(CShader* shader)
694     {
695         if (hasShaderOutput(shader))
696         {
697             IGC::aligned_free(shader->ProgramOutput()->m_programBin);
698             shader->ProgramOutput()->m_programSize = 0;
699         }
700     }
701 
702     CodeGenContext* m_context;
703     llvm::Function* m_kernel;
704     std::array<CShader*, 8> m_SIMDshaders;
705 };
706 
707 struct SInstContext
708 {
709     CVariable* flag;
710     e_modifier dst_mod;
711     bool invertFlag;
initIGC::SInstContext712     void init()
713     {
714         flag = NULL;
715         dst_mod = EMOD_NONE;
716         invertFlag = false;
717     }
718 };
719 
720 static const SInstContext g_InitContext =
721 {
722     NULL,
723     EMOD_NONE,
724     false,
725 };
726 
727 void unify_opt_PreProcess(CodeGenContext* pContext);
728 // Forward declaration
729 struct PSSignature;
730 void CodeGen(PixelShaderContext* ctx, CShaderProgram::KernelShaderMap& shaders, PSSignature* pSignature = nullptr);
731 void CodeGen(OpenCLProgramContext* ctx, CShaderProgram::KernelShaderMap& shaders);
732 }
733