1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "Compiler/CISACodeGen/DebugInfoData.hpp" 12 #include "Compiler/CISACodeGen/CVariable.hpp" 13 #include "Compiler/CISACodeGen/PushAnalysis.hpp" 14 #include "Compiler/CISACodeGen/helper.h" 15 #include "Compiler/CISACodeGen/CISACodeGen.h" 16 #include "Compiler/CISACodeGen/CISABuilder.hpp" 17 #include "Compiler/CISACodeGen/LiveVars.hpp" 18 #include "Compiler/CISACodeGen/WIAnalysis.hpp" 19 #include "Compiler/CISACodeGen/CoalescingEngine.hpp" 20 #include "Compiler/CodeGenPublic.h" 21 #include "Compiler/MetaDataApi/MetaDataApi.h" 22 // Needed for SConstantGatherEntry 23 #include "usc_gen7.h" 24 #include "common/Types.hpp" 25 #include "common/LLVMWarningsPush.hpp" 26 #include <llvm/ADT/DenseMap.h> 27 #include <llvm/ADT/MapVector.h> 28 #include "common/LLVMWarningsPop.hpp" 29 #include "common/debug/Dump.hpp" 30 #include <map> 31 #include <string> 32 #include <vector> 33 #include "Probe/Assertion.h" 34 35 namespace llvm 36 { 37 class Value; 38 class PHINode; 39 class Function; 40 class BasicBlock; 41 } 42 43 namespace IGC 44 { 45 class DeSSA; 46 class CoalescingEngine; 47 class GenXFunctionGroupAnalysis; 48 class VariableReuseAnalysis; 49 50 struct PushInfo; 51 52 // Helper Function 53 VISA_Type GetType(llvm::Type* pType, CodeGenContext* pDataLayout); 54 uint64_t GetImmediateVal(llvm::Value* Const); 55 e_alignment GetPreferredAlignment(llvm::Value* Val, WIAnalysis* WIA, CodeGenContext* pContext); 56 57 class CShaderProgram; 58 59 ///-------------------------------------------------------------------------------------------------------- 60 class CShader 61 { 62 public: 63 friend class CShaderProgram; 64 65 class ExtractMaskWrapper 66 { 67 // To enable ExtractMask of any vector size. Currently, only vector 68 // whose size is no larger than 32 has its extractMask calculated. 69 private: 70 uint32_t m_EM; // 32 bit extractMask; 71 bool m_hasEM; // If true, m_EM is valid; otherwise, not valid. 72 public: 73 ExtractMaskWrapper(CShader* pS, llvm::Value* VecVal); 74 75 ExtractMaskWrapper() = delete; 76 ExtractMaskWrapper(const ExtractMaskWrapper&) = delete; 77 ExtractMaskWrapper& operator=(const ExtractMaskWrapper&) = delete; 78 79 // b: bit position, from 0 to 31. isSet(uint32_t b) const80 bool isSet(uint32_t b) const 81 { 82 if (m_hasEM) { 83 IGC_ASSERT(b < 32); 84 return (1 << (b)) & m_EM; 85 } 86 return true; 87 } 88 getEM() const89 uint32_t getEM() const { return m_EM; } hasEM() const90 uint16_t hasEM() const { return m_hasEM; } 91 }; 92 93 CShader(llvm::Function*, CShaderProgram* pProgram); 94 virtual ~CShader(); 95 void Destroy(); 96 virtual void InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, ShaderDispatchMode shaderMode = ShaderDispatchMode::NOT_APPLICABLE); PreCompile()97 virtual void PreCompile() {} PreCompileFunction(llvm::Function & F)98 virtual void PreCompileFunction(llvm::Function& F) {} ParseShaderSpecificOpcode(llvm::Instruction * inst)99 virtual void ParseShaderSpecificOpcode(llvm::Instruction* inst) {} AllocatePayload()100 virtual void AllocatePayload() {} AddPrologue()101 virtual void AddPrologue() {} 102 virtual void PreAnalysisPass(); ExtractGlobalVariables()103 virtual void ExtractGlobalVariables() {} 104 void EOTURBWrite(); 105 void EOTRenderTarget(CVariable* r1, bool isPerCoarse); 106 virtual void AddEpilogue(llvm::ReturnInst* ret); 107 GetURBOutputHandle()108 virtual CVariable* GetURBOutputHandle() 109 { 110 IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!"); 111 return nullptr; 112 } GetURBInputHandle(CVariable * pVertexIndex)113 virtual CVariable* GetURBInputHandle(CVariable* pVertexIndex) 114 { 115 IGC_ASSERT_MESSAGE(0, "Should be overridden in a derived class!"); 116 return nullptr; 117 } passNOSInlineData()118 virtual bool passNOSInlineData() { return false; } loadThreadPayload()119 virtual bool loadThreadPayload() { return false; } getAnnotatedNumThreads()120 virtual unsigned getAnnotatedNumThreads() { return 0; } hasReadWriteImage(llvm::Function & F)121 virtual bool hasReadWriteImage(llvm::Function& F) { return false; } CompileSIMDSize(SIMDMode simdMode,EmitPass & EP,llvm::Function & F)122 virtual bool CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F) 123 { 124 return CompileSIMDSizeInCommon(simdMode); 125 } 126 CVariable* LazyCreateCCTupleBackingVariable( 127 CoalescingEngine::CCTuple* ccTuple, 128 VISA_Type baseType = ISA_TYPE_UD); 129 CVariable* GetSymbol(llvm::Value* value, bool fromConstantPool = false); 130 void AddSetup(uint index, CVariable* var); 131 bool AppendPayloadSetup(CVariable* var); 132 void AddPatchTempSetup(CVariable* var); 133 void AddPatchConstantSetup(uint index, CVariable* var); 134 135 // TODO: simplify calls to GetNewVariable to these shorter and more 136 // expressive cases where possible. 137 // 138 // CVariable* GetNewVector(VISA_Type type, const CName &name) { 139 // return GetNewVariable(numLanes(m_SIMDSize), type, EALIGN_GRF, false, name); 140 // } 141 // CVariable* GetNewUniform(VISA_Type type, const CName &name) { 142 // grep a GetNewVariable(1, .. true) and see what B and W use 143 // return GetNewVariable(1, type, alignOf_TODO(type), true, name); 144 // } 145 GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,const CName & name)146 CVariable* GetNewVariable( 147 uint16_t nbElement, 148 VISA_Type type, 149 e_alignment align, 150 const CName &name) 151 { 152 return GetNewVariable(nbElement, type, align, false, 1, name); 153 } GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,UniformArgWrap uniform,const CName & name)154 CVariable* GetNewVariable( 155 uint16_t nbElement, 156 VISA_Type type, 157 e_alignment align, 158 UniformArgWrap uniform, 159 const CName &name) 160 { 161 return GetNewVariable(nbElement, type, align, uniform, 1, name); 162 } 163 CVariable* GetNewVariable( 164 uint16_t nbElement, 165 VISA_Type type, 166 e_alignment align, 167 UniformArgWrap uniform, 168 uint16_t numberInstance, 169 const CName &name); 170 CVariable* GetNewVariable(const CVariable* from); 171 CVariable* GetNewAddressVariable( 172 uint16_t nbElement, 173 VISA_Type type, 174 UniformArgWrap uniform, 175 bool vectorUniform, 176 const CName &name); 177 CVariable* GetNewVector(llvm::Value* val, e_alignment preferredAlign = EALIGN_AUTO); 178 CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements); 179 CVariable* GetNewAlias(CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform); 180 181 // If BaseVar's type matches V's, return BaseVar; otherwise, create an new 182 // alias CVariable to BaseVar. The newly-created alias CVariable's size 183 // should be the same as BaseVar's size (used for creating alias for values 184 // in the same DeSSA's congruent class). 185 CVariable* createAliasIfNeeded(llvm::Value* V, CVariable* BaseVar); 186 // Allow to create an alias of a variable handpicking a slice to be able to do cross lane in SIMD32 187 CVariable* GetVarHalf(CVariable* var, unsigned int half); 188 189 void CopyVariable(CVariable* dst, CVariable* src, uint dstSubVar = 0, uint srcSubVar = 0); 190 void PackAndCopyVariable(CVariable* dst, CVariable* src, uint subVar = 0); 191 bool IsValueUsed(llvm::Value* value); 192 CVariable* GetGlobalCVar(llvm::Value* value); 193 uint GetNbElementAndMask(llvm::Value* value, uint32_t& mask); 194 void CreatePayload(uint regCount, uint idxOffset, CVariable*& payload, llvm::Instruction* inst, uint paramOffset, uint8_t hfFactor); 195 uint GetNbVectorElementAndMask(llvm::Value* value, uint32_t& mask); 196 uint16_t AdjustExtractIndex(llvm::Value* value, uint16_t elemIndex); 197 WIBaseClass::WIDependancy GetDependency(llvm::Value* v) const; 198 void SetDependency(llvm::Value* v, WIBaseClass::WIDependancy dep); 199 bool GetIsUniform(llvm::Value* v) const; 200 bool InsideDivergentCF(const llvm::Instruction* inst) const; 201 bool InsideWorkgroupDivergentCF(const llvm::Instruction* inst) const; 202 CEncoder& GetEncoder(); 203 CVariable* GetR0(); 204 CVariable* GetNULL(); 205 CVariable* GetTSC(); 206 CVariable* GetSR0(); 207 CVariable* GetCR0(); 208 CVariable* GetCE0(); 209 CVariable* GetDBG(); 210 CVariable* GetHWTID(); 211 CVariable* GetSP(); 212 CVariable* GetFP(); 213 CVariable* GetPrevFP(); 214 CVariable* GetARGV(); 215 CVariable* GetRETV(); 216 CVariable* GetPrivateBase(); 217 CVariable* GetImplArgBufPtr(); 218 CVariable* GetLocalIdBufPtr(); 219 220 void SaveSRet(CVariable* sretPtr); 221 CVariable* GetAndResetSRet(); 222 hasSP() const223 bool hasSP() const { return m_SP != nullptr; } hasFP() const224 bool hasFP() const { return m_FP != nullptr; } 225 226 void InitializeStackVariables(); 227 void SaveStackState(); 228 void RestoreStackState(); 229 230 void AllocateInput(CVariable* var, uint offset, uint instance = 0, bool forceLiveOut = false); 231 void AllocateOutput(CVariable* var, uint offset, uint instance = 0); 232 CVariable* ImmToVariable(uint64_t immediate, VISA_Type type, bool isCodePatchCandidate = false); 233 CVariable* GetConstant(llvm::Constant* C, CVariable* dstVar = nullptr); 234 CVariable* GetScalarConstant(llvm::Value* c); 235 CVariable* GetUndef(VISA_Type type); 236 llvm::Constant* findCommonConstant(llvm::Constant* C, uint elts, uint currentEmitElts, bool& allSame); 237 virtual unsigned int GetGlobalMappingValue(llvm::Value* c); 238 virtual CVariable* GetGlobalMapping(llvm::Value* c); 239 CVariable* BitCast(CVariable* var, VISA_Type newType); 240 void ResolveAlias(CVariable* var); 241 void CacheArgumentsList(); 242 virtual void MapPushedInputs(); 243 void CreateGatherMap(); 244 void CreateConstantBufferOutput(SKernelProgram* pKernelProgram); 245 void CreateFunctionSymbol(llvm::Function* pFunc); 246 void CreateGlobalSymbol(llvm::GlobalVariable* pGlobal); 247 248 CVariable* GetStructVariable(llvm::Value* v, bool forceVectorInit = false); 249 250 void CreateImplicitArgs(); 251 void CreateAliasVars(); 252 uint GetBlockId(llvm::BasicBlock* block); GetNumSBlocks()253 uint GetNumSBlocks() { return m_numBlocks; } 254 SetUniformHelper(WIAnalysis * WI)255 void SetUniformHelper(WIAnalysis* WI) { m_WI = WI; } SetDeSSAHelper(DeSSA * deSSA)256 void SetDeSSAHelper(DeSSA* deSSA) { m_deSSA = deSSA; } SetCoalescingEngineHelper(CoalescingEngine * ce)257 void SetCoalescingEngineHelper(CoalescingEngine* ce) { m_coalescingEngine = ce; } SetCodeGenHelper(CodeGenPatternMatch * CG)258 void SetCodeGenHelper(CodeGenPatternMatch* CG) { m_CG = CG; } SetPushInfoHelper(PushInfo * PI)259 void SetPushInfoHelper(PushInfo* PI) { pushInfo = *PI; } SetDominatorTreeHelper(llvm::DominatorTree * DT)260 void SetDominatorTreeHelper(llvm::DominatorTree* DT) { m_DT = DT; } SetDataLayout(const llvm::DataLayout * DL)261 void SetDataLayout(const llvm::DataLayout* DL) { m_DL = DL; } SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis * FGA)262 void SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis* FGA) { m_FGA = FGA; } SetVariableReuseAnalysis(VariableReuseAnalysis * VRA)263 void SetVariableReuseAnalysis(VariableReuseAnalysis* VRA) { m_VRA = VRA; } SetMetaDataUtils(IGC::IGCMD::MetaDataUtils * pMdUtils)264 void SetMetaDataUtils(IGC::IGCMD::MetaDataUtils* pMdUtils) { m_pMdUtils = pMdUtils; } SetScratchSpaceSize(uint size)265 void SetScratchSpaceSize(uint size) { m_ScratchSpaceSize = size; } GetMetaDataUtils()266 IGCMD::MetaDataUtils* GetMetaDataUtils() { return m_pMdUtils; } 267 SetShaderSpecificHelper(EmitPass * emitPass)268 virtual void SetShaderSpecificHelper(EmitPass* emitPass) {} 269 270 void AllocateConstants(uint& offset); 271 void AllocateStatelessConstants(uint& offset); 272 void AllocateSimplePushConstants(uint& offset); 273 void AllocateNOSConstants(uint& offset); 274 void AllocateConstants3DShader(uint& offset); GetShaderType() const275 ShaderType GetShaderType() const { return GetContext()->type; } 276 bool IsPatchablePS(); 277 bool IsValueCoalesced(llvm::Value* v); 278 GetHasBarrier() const279 bool GetHasBarrier() const { return m_HasBarrier; } SetHasBarrier()280 void SetHasBarrier() { m_HasBarrier = true; } 281 282 void GetSimdOffsetBase(CVariable*& pVar); 283 /// Returns a simd8 register filled with values [24, 20, 16, 12, 8, 4, 0] 284 /// that are used to index subregisters of a GRF when counting offsets in bytes. 285 /// Used e.g. for indirect addressing via a0 register. 286 CVariable* GetPerLaneOffsetsReg(uint typeSizeInBytes); 287 288 void GetPayloadElementSymbols(llvm::Value* inst, CVariable* payload[], int vecWidth); 289 GetContext() const290 CodeGenContext* GetContext() const { return m_ctx; } 291 292 SProgramOutput* ProgramOutput(); 293 294 bool CanTreatAsAlias(llvm::ExtractElementInst* inst); 295 bool CanTreatScalarSourceAsAlias(llvm::InsertElementInst*); 296 297 bool HasBecomeNoop(llvm::Instruction* inst); 298 299 // If V is not in any congruent class, not aliased to any other 300 // variables, not payload-coalesced, then this function returns 301 // true. 302 bool IsCoalesced(llvm::Value* V); 303 304 bool VMECoalescePattern(llvm::GenIntrinsicInst*); 305 306 bool isUnpacked(llvm::Value* value); 307 308 /// Return true if we are sure that all lanes are active at the begging of the thread HasFullDispatchMask()309 virtual bool HasFullDispatchMask() { return false; } 310 311 llvm::Function* entry; 312 const CBTILayout* m_pBtiLayout; 313 const CPlatform* m_Platform; 314 const CDriverInfo* m_DriverInfo; 315 316 ModuleMetaData* m_ModuleMetadata; 317 318 /// Dispatch size is the number of logical threads running in one hardware thread 319 SIMDMode m_dispatchSize; 320 /// SIMD Size is the default size of instructions 321 ShaderDispatchMode m_ShaderDispatchMode; 322 /// the default emit size for this shader. This is the default size for variables as well 323 /// as the default execution size for each instruction. encoder may override it explicitly 324 /// via CEncoder::SetSIMDSize 325 SIMDMode m_SIMDSize; 326 uint8_t m_numberInstance; 327 PushInfo pushInfo; 328 bool isInputsPulled; //true if any input is pulled, false otherwise 329 bool isMessageTargetDataCacheDataPort; 330 uint m_sendStallCycle; 331 uint m_staticCycle; 332 unsigned m_spillSize = 0; 333 float m_spillCost = 0; // num weighted spill inst / total inst 334 335 std::vector<llvm::Value*> m_argListCache; 336 337 /// The size in byte used by igc (non-spill space). And this 338 /// is the value passed to VISA so that VISA's spill, if any, 339 /// will go after this space. 340 uint m_ScratchSpaceSize; 341 342 ShaderStats* m_shaderStats; 343 344 // Number of binding table entries per cache line. 345 static constexpr DWORD cBTEntriesPerCacheLine = 32; 346 // Max BTI value that can increase binding table count. 347 // SampleEngine: Binding Table Index is set to 252 specifies the bindless surface offset. 348 // DataPort: The special entry 255 is used to reference Stateless A32 or A64 address model, 349 // and the special entry 254 is used to reference the SLM address model. 350 // The special entry 252 is used to reference bindless resource operation. 351 static constexpr DWORD MAX_BINDING_TABLE_INDEX = 251; 352 static constexpr uint cMessageExtendedDescriptorEOTBit = BIT(5); 353 GetCCTupleToVariableMapping(CoalescingEngine::CCTuple * ccTuple)354 CVariable* GetCCTupleToVariableMapping(CoalescingEngine::CCTuple* ccTuple) 355 { 356 return ccTupleMapping[ccTuple]; 357 } 358 addConstantInPool(llvm::Constant * C,CVariable * Var)359 void addConstantInPool(llvm::Constant* C, CVariable* Var) { 360 ConstantPool[C] = Var; 361 } 362 lookupConstantInPool(llvm::Constant * C)363 CVariable* lookupConstantInPool(llvm::Constant* C) { 364 return ConstantPool.lookup(C); 365 } 366 367 unsigned int EvaluateSIMDConstExpr(llvm::Value* C); 368 369 /// Initialize per function status. 370 void BeginFunction(llvm::Function* F); 371 // This method split payload interpolations from the shader into another compilation unit 372 void SplitPayloadFromShader(llvm::Function* F); 373 /// This method is used to create the vISA variable for function F's formal return value 374 CVariable* getOrCreateReturnSymbol(llvm::Function* F); 375 /// This method is used to create the vISA variable for function F's formal argument 376 CVariable* getOrCreateArgumentSymbol( 377 llvm::Argument* Arg, 378 bool ArgInCallee, // true if Arg isn't in current func 379 bool useStackCall = false); 380 void UpdateSymbolMap(llvm::Value* v, CVariable* CVar); 381 VISA_Type GetType(llvm::Type* type); 382 uint32_t GetNumElts(llvm::Type* type, bool isUniform = false); 383 384 /// Evaluate constant expression and return the result immediate value. 385 uint64_t GetConstantExpr(llvm::ConstantExpr* C); 386 387 GetMaxUsedBindingTableEntryCount(void) const388 uint32_t GetMaxUsedBindingTableEntryCount(void) const 389 { 390 if (m_BindingTableUsedEntriesBitmap != 0) 391 { 392 // m_BindingTableEntryCount is index; '+ 1' due to calculate total used count. 393 return (m_BindingTableEntryCount + 1); 394 } 395 return 0; 396 } 397 GetBindingTableEntryBitmap(void) const398 uint32_t GetBindingTableEntryBitmap(void) const 399 { 400 return m_BindingTableUsedEntriesBitmap; 401 } 402 SetBindingTableEntryCountAndBitmap(bool directIdx,BufferType bufType,uint32_t typeBti,uint32_t bti)403 void SetBindingTableEntryCountAndBitmap(bool directIdx, BufferType bufType, uint32_t typeBti, uint32_t bti) 404 { 405 if (bti <= MAX_BINDING_TABLE_INDEX) 406 { 407 if (directIdx) 408 { 409 m_BindingTableEntryCount = (bti <= m_pBtiLayout->GetBindingTableEntryCount()) ? (std::max(bti, m_BindingTableEntryCount)) : m_BindingTableEntryCount; 410 m_BindingTableUsedEntriesBitmap |= BIT(bti / cBTEntriesPerCacheLine); 411 412 if (bufType == RESOURCE) 413 { 414 m_shaderResourceLoaded[typeBti / 32] |= BIT(typeBti % 32); 415 } 416 else if (bufType == CONSTANT_BUFFER) 417 { 418 m_constantBufferLoaded |= BIT(typeBti); 419 } 420 else if (bufType == UAV) 421 { 422 m_uavLoaded |= QWBIT(typeBti); 423 } 424 else if (bufType == RENDER_TARGET) 425 { 426 m_renderTargetLoaded |= BIT(typeBti); 427 } 428 } 429 else 430 { 431 // Indirect addressing, set the maximum BTI. 432 m_BindingTableEntryCount = m_pBtiLayout->GetBindingTableEntryCount(); 433 m_BindingTableUsedEntriesBitmap |= BITMASK_RANGE(0, (m_BindingTableEntryCount / cBTEntriesPerCacheLine)); 434 435 if (bufType == RESOURCE) 436 { 437 unsigned int MaxArray = m_pBtiLayout->GetTextureIndexSize() / 32; 438 for (unsigned int i = 0; i < MaxArray; i++) 439 { 440 m_shaderResourceLoaded[i] = 0xffffffff; 441 } 442 443 for (unsigned int i = MaxArray * 32; i < m_pBtiLayout->GetTextureIndexSize(); i++) 444 { 445 m_shaderResourceLoaded[MaxArray] = BIT(i % 32); 446 } 447 } 448 else if (bufType == CONSTANT_BUFFER) 449 { 450 m_constantBufferLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetConstantBufferIndexSize()); 451 } 452 else if (bufType == UAV) 453 { 454 m_uavLoaded |= QWBITMASK_RANGE(0, m_pBtiLayout->GetUavIndexSize()); 455 } 456 else if (bufType == RENDER_TARGET) 457 { 458 m_renderTargetLoaded |= BITMASK_RANGE(0, m_pBtiLayout->GetRenderTargetIndexSize()); 459 } 460 } 461 } 462 } 463 464 /// Evaluate the Sampler Count field value. 465 unsigned int GetSamplerCount(unsigned int samplerCount); 466 467 static unsigned GetIMEReturnPayloadSize(llvm::GenIntrinsicInst* I); 468 addCVarsForVectorBC(llvm::BitCastInst * BCI,llvm::SmallVector<CVariable *,8> CVars)469 void addCVarsForVectorBC(llvm::BitCastInst* BCI, llvm::SmallVector<CVariable*, 8> CVars) 470 { 471 IGC_ASSERT_MESSAGE(m_VectorBCItoCVars.find(BCI) == std::end(m_VectorBCItoCVars), "a variable already exists for this vector bitcast"); 472 m_VectorBCItoCVars.try_emplace(BCI, CVars); 473 } 474 getCVarForVectorBCI(llvm::BitCastInst * BCI,int index)475 CVariable* getCVarForVectorBCI(llvm::BitCastInst* BCI, int index) 476 { 477 auto iter = m_VectorBCItoCVars.find(BCI); 478 if (iter == m_VectorBCItoCVars.end()) 479 { 480 return nullptr; 481 } 482 return (*iter).second[index]; 483 } 484 SetHasGlobalStatelessAccess()485 void SetHasGlobalStatelessAccess() { m_HasGlobalStatelessMemoryAccess = true; } GetHasGlobalStatelessAccess() const486 bool GetHasGlobalStatelessAccess() const { return m_HasGlobalStatelessMemoryAccess; } SetHasConstantStatelessAccess()487 void SetHasConstantStatelessAccess() { m_HasConstantStatelessMemoryAccess = true; } GetHasConstantStatelessAccess() const488 bool GetHasConstantStatelessAccess() const { return m_HasConstantStatelessMemoryAccess; } SetHasGlobalAtomics()489 void SetHasGlobalAtomics() { m_HasGlobalAtomics = true; } GetHasGlobalAtomics() const490 bool GetHasGlobalAtomics() const { return m_HasGlobalAtomics; } GetHasDPAS() const491 bool GetHasDPAS() const { return m_HasDPAS; } SetHasDPAS()492 void SetHasDPAS() { m_HasDPAS = true; } IncStatelessWritesCount()493 void IncStatelessWritesCount() { ++m_StatelessWritesCount; } IncIndirectStatelessCount()494 void IncIndirectStatelessCount() { ++m_IndirectStatelessCount; } GetStatelessWritesCount() const495 uint32_t GetStatelessWritesCount() const { return m_StatelessWritesCount; } GetIndirectStatelessCount() const496 uint32_t GetIndirectStatelessCount() const { return m_IndirectStatelessCount; } 497 498 // In bytes getGRFSize() const499 uint32_t getGRFSize() const { return m_Platform->getGRFSize(); } 500 // in DWORDs getMinPushConstantBufferAlignmentInBytes() const501 uint32_t getMinPushConstantBufferAlignmentInBytes() const { return m_Platform->getMinPushConstantBufferAlignment() * sizeof(DWORD); } 502 503 getGRFAlignment() const504 e_alignment getGRFAlignment() const { return CVariable::getAlignment(getGRFSize()); } 505 GetSymbolMapping()506 llvm::DenseMap<llvm::Value*, CVariable*>& GetSymbolMapping() 507 { 508 return symbolMapping; 509 } 510 GetGlobalMapping()511 llvm::DenseMap<llvm::Value*, CVariable*>& GetGlobalMapping() 512 { 513 return globalSymbolMapping; 514 } 515 GetKernelArgOffset(CVariable * argV)516 int64_t GetKernelArgOffset(CVariable* argV) 517 { 518 auto it = kernelArgToPayloadOffsetMap.find(argV); 519 return it != kernelArgToPayloadOffsetMap.end() ? (int64_t) it->second : -1; 520 } 521 522 DebugInfoData& GetDebugInfoData(); 523 524 unsigned int GetPrimitiveTypeSizeInRegisterInBits(const llvm::Type* Ty) const; 525 unsigned int GetPrimitiveTypeSizeInRegister(const llvm::Type* Ty) const; 526 unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const; 527 unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const; 528 HasStackCalls() const529 bool HasStackCalls() const { return m_HasStackCalls; } SetHasStackCalls()530 void SetHasStackCalls() { m_HasStackCalls = true; } IsIntelSymbolTableVoidProgram() const531 bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; } SetIsIntelSymbolTableVoidProgram()532 void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; } 533 534 protected: 535 bool CompileSIMDSizeInCommon(SIMDMode simdMode); 536 uint32_t GetShaderThreadUsageRate(); 537 private: 538 // Return DefInst's CVariable if it could be reused for UseInst, and return 539 // nullptr otherwise. 540 CVariable* reuseSourceVar(llvm::Instruction* UseInst, 541 llvm::Instruction* DefInst, 542 e_alignment preferredAlign); 543 544 // Return nullptr if no source variable is reused. Otherwise return a 545 // CVariable from its source operand. 546 CVariable* GetSymbolFromSource(llvm::Instruction* UseInst, 547 e_alignment preferredAlign); 548 549 protected: 550 CShaderProgram* m_parent; 551 CodeGenContext* m_ctx; 552 WIAnalysis* m_WI; 553 DeSSA* m_deSSA; 554 CoalescingEngine* m_coalescingEngine; 555 CodeGenPatternMatch* m_CG; 556 llvm::DominatorTree* m_DT; 557 const llvm::DataLayout* m_DL; 558 GenXFunctionGroupAnalysis* m_FGA; 559 VariableReuseAnalysis* m_VRA; 560 561 uint m_numBlocks; 562 IGC::IGCMD::MetaDataUtils* m_pMdUtils; 563 564 #if defined(_DEBUG) || defined(_INTERNAL) 565 llvm::SpecificBumpPtrAllocator<CVariable> Allocator; 566 #else 567 llvm::BumpPtrAllocator Allocator; 568 #endif 569 570 // Mapping from formal argument to its variable or from function to its 571 // return variable. Per kernel mapping. Used when llvm functions are 572 // compiled into vISA subroutine 573 llvm::DenseMap<llvm::Value*, CVariable*> globalSymbolMapping; 574 575 llvm::DenseMap<llvm::Value*, CVariable*> symbolMapping; 576 // Yet another map: a mapping from ccTuple to its corresponding root variable. 577 // Variables that participate in congruence class tuples will be defined as 578 // aliases (with respective offset) to the root variable. 579 llvm::DenseMap<CoalescingEngine::CCTuple*, CVariable*> ccTupleMapping; 580 // Constant pool. 581 llvm::DenseMap<llvm::Constant*, CVariable*> ConstantPool; 582 583 // keep a map when we generate accurate mask for vector value 584 // in order to reduce register usage 585 llvm::DenseMap<llvm::Value*, uint32_t> extractMasks; 586 587 // keep a map for each kernel argument to its allocated payload offset 588 llvm::DenseMap<CVariable*, uint32_t> kernelArgToPayloadOffsetMap; 589 590 CEncoder encoder; 591 std::vector<CVariable*> setup; 592 std::vector<CVariable*> payloadLiveOutSetup; 593 std::vector<CVariable*> payloadTempSetup; 594 std::vector<CVariable*> patchConstantSetup; 595 596 uint m_maxBlockId; 597 598 CVariable* m_R0; 599 CVariable* m_NULL; 600 CVariable* m_TSC; 601 CVariable* m_SR0; 602 CVariable* m_CR0; 603 CVariable* m_CE0; 604 CVariable* m_DBG; 605 CVariable* m_HW_TID; 606 CVariable* m_SP; 607 CVariable* m_FP; 608 CVariable* m_SavedFP; 609 CVariable* m_ARGV; 610 CVariable* m_RETV; 611 CVariable* m_SavedSRetPtr; 612 CVariable* m_ImplArgBufPtr; 613 CVariable* m_LocalIdBufPtr; 614 615 std::vector<USC::SConstantGatherEntry> gatherMap; 616 uint m_ConstantBufferLength; 617 uint m_constantBufferMask; 618 uint m_constantBufferLoaded; 619 uint64_t m_uavLoaded; 620 uint m_shaderResourceLoaded[4]; 621 uint m_renderTargetLoaded; 622 623 int m_cbSlot; 624 uint m_statelessCBPushedSize; 625 uint m_NOSBufferSize = 0; 626 627 /// holds max number of inputs that can be pushed for this shader unit 628 static const uint32_t m_pMaxNumOfPushedInputs; 629 630 bool m_HasBarrier; 631 SProgramOutput m_simdProgram; 632 633 // Holds max used binding table entry index. 634 uint32_t m_BindingTableEntryCount; 635 636 // Holds binding table entries bitmap. 637 uint32_t m_BindingTableUsedEntriesBitmap; 638 639 // for each vector BCI whose uses are all extractElt with imm offset, 640 // we store the CVariables for each index 641 llvm::DenseMap<llvm::Instruction*, llvm::SmallVector<CVariable*, 8>> m_VectorBCItoCVars; 642 643 // Those two are for stateful token setup. It is a quick 644 // special case checking. Once a generic approach is added, 645 // this two fields shall be retired. 646 bool m_HasGlobalStatelessMemoryAccess; 647 bool m_HasConstantStatelessMemoryAccess; 648 649 bool m_HasGlobalAtomics = false; 650 651 bool m_HasDPAS = false; 652 653 uint32_t m_StatelessWritesCount = 0; 654 uint32_t m_IndirectStatelessCount = 0; 655 656 DebugInfoData diData; 657 658 bool m_HasStackCalls = false; 659 bool m_isIntelSymbolTableVoidProgram = false; 660 }; 661 662 /// This class contains the information for the different SIMD version 663 /// of a kernel. Each kernel in the module is associated to one CShaderProgram 664 class CShaderProgram 665 { 666 public: 667 typedef llvm::MapVector<llvm::Function*, CShaderProgram*> KernelShaderMap; 668 CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel); 669 ~CShaderProgram(); 670 CShader* GetOrCreateShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE); 671 CShader* GetShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE); 672 void DeleteShader(SIMDMode simd, ShaderDispatchMode mode = ShaderDispatchMode::NOT_APPLICABLE); GetContext()673 CodeGenContext* GetContext() { return m_context; } 674 void FillProgram(SVertexShaderKernelProgram* pKernelProgram); 675 void FillProgram(SHullShaderKernelProgram* pKernelProgram); 676 void FillProgram(SDomainShaderKernelProgram* pKernelProgram); 677 void FillProgram(SGeometryShaderKernelProgram* pKernelProgram); 678 void FillProgram(SPixelShaderKernelProgram* pKernelProgram); 679 void FillProgram(SComputeShaderKernelProgram* pKernelProgram); 680 void FillProgram(SOpenCLProgramInfo* pKernelProgram); 681 ShaderStats* m_shaderStats; 682 683 protected: 684 CShader*& GetShaderPtr(SIMDMode simd, ShaderDispatchMode mode); 685 CShader* CreateNewShader(SIMDMode simd); 686 void ClearShaderPtr(SIMDMode simd); 687 hasShaderOutput(CShader * shader)688 inline bool hasShaderOutput(CShader* shader) 689 { 690 return (shader && shader->ProgramOutput()->m_programSize > 0); 691 } 692 freeShaderOutput(CShader * shader)693 inline void freeShaderOutput(CShader* shader) 694 { 695 if (hasShaderOutput(shader)) 696 { 697 IGC::aligned_free(shader->ProgramOutput()->m_programBin); 698 shader->ProgramOutput()->m_programSize = 0; 699 } 700 } 701 702 CodeGenContext* m_context; 703 llvm::Function* m_kernel; 704 std::array<CShader*, 8> m_SIMDshaders; 705 }; 706 707 struct SInstContext 708 { 709 CVariable* flag; 710 e_modifier dst_mod; 711 bool invertFlag; initIGC::SInstContext712 void init() 713 { 714 flag = NULL; 715 dst_mod = EMOD_NONE; 716 invertFlag = false; 717 } 718 }; 719 720 static const SInstContext g_InitContext = 721 { 722 NULL, 723 EMOD_NONE, 724 false, 725 }; 726 727 void unify_opt_PreProcess(CodeGenContext* pContext); 728 // Forward declaration 729 struct PSSignature; 730 void CodeGen(PixelShaderContext* ctx, CShaderProgram::KernelShaderMap& shaders, PSSignature* pSignature = nullptr); 731 void CodeGen(OpenCLProgramContext* ctx, CShaderProgram::KernelShaderMap& shaders); 732 } 733