1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/IR/Function.h>
11 #include <llvmWrapper/IR/DerivedTypes.h>
12 #include "common/LLVMWarningsPop.hpp"
13 #include "AdaptorCommon/ImplicitArgs.hpp"
14 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
15 #include "Compiler/CISACodeGen/DeSSA.hpp"
16 #include "Compiler/CISACodeGen/GenCodeGenModule.h"
17 #include "Compiler/CISACodeGen/messageEncoding.hpp"
18 #include "Compiler/CISACodeGen/VariableReuseAnalysis.hpp"
19 #include "Compiler/CISACodeGen/PixelShaderCodeGen.hpp"
20 #include "Compiler/CISACodeGen/VertexShaderCodeGen.hpp"
21 #include "Compiler/CISACodeGen/GeometryShaderCodeGen.hpp"
22 #include "Compiler/CISACodeGen/ComputeShaderCodeGen.hpp"
23 #include "Compiler/CISACodeGen/HullShaderCodeGen.hpp"
24 #include "Compiler/CISACodeGen/DomainShaderCodeGen.hpp"
25 #include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
26 #include "Compiler/MetaDataApi/MetaDataApi.h"
27 #include "common/secure_mem.h"
28 #include "Probe/Assertion.h"
29 
30 using namespace llvm;
31 using namespace IGC;
32 using namespace IGC::IGCMD;
33 
CShader(Function * pFunc,CShaderProgram * pProgram)34 CShader::CShader(Function* pFunc, CShaderProgram* pProgram)
35     : entry(pFunc)
36     , m_parent(pProgram)
37     , encoder()
38     , m_HasBarrier(false)
39 {
40     m_ctx = m_parent->GetContext();
41     m_WI = nullptr;
42     m_deSSA = nullptr;
43     m_coalescingEngine = nullptr;
44     m_DL = nullptr;
45     m_FGA = nullptr;
46     m_VRA = nullptr;
47     m_shaderStats = nullptr;
48     m_constantBufferMask = 0;
49     m_constantBufferLoaded = 0;
50     m_uavLoaded = 0;
51     for (int i = 0; i < 4; i++)
52     {
53         m_shaderResourceLoaded[i] = 0;
54     }
55     m_renderTargetLoaded = 0;
56     isInputsPulled = false;
57     m_cbSlot = -1;
58     m_statelessCBPushedSize = 0;
59     isMessageTargetDataCacheDataPort = false;
60     m_BindingTableEntryCount = 0;
61     m_BindingTableUsedEntriesBitmap = 0;
62     // [OCL] preAnalysis()/ParseShaderSpecificOpcode() must
63     // set this to ture if there is any stateless access.
64     m_HasGlobalStatelessMemoryAccess = false;
65     m_HasConstantStatelessMemoryAccess = false;
66     m_HasDPAS = false;
67 
68     m_simdProgram.init(!m_ctx->platform.hasScratchSurface(), m_ctx->platform.maxPerThreadScratchSpace(), GetContext()->getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory);
69 }
70 
InitEncoder(SIMDMode simdSize,bool canAbortOnSpill,ShaderDispatchMode shaderMode)71 void CShader::InitEncoder(SIMDMode simdSize, bool canAbortOnSpill, ShaderDispatchMode shaderMode)
72 {
73     m_sendStallCycle = 0;
74     m_staticCycle = 0;
75     m_maxBlockId = 0;
76     m_ScratchSpaceSize = 0;
77     m_R0 = nullptr;
78     m_NULL = nullptr;
79     m_TSC = nullptr;
80     m_SR0 = nullptr;
81     m_CR0 = nullptr;
82     m_CE0 = nullptr;
83     m_DBG = nullptr;
84     m_HW_TID = nullptr;
85     m_SP = nullptr;
86     m_FP = nullptr;
87     m_SavedFP = nullptr;
88     m_ARGV = nullptr;
89     m_RETV = nullptr;
90     m_SavedSRetPtr = nullptr;
91     m_ImplArgBufPtr = nullptr;
92     m_LocalIdBufPtr = nullptr;
93 
94     // SIMD32 is a SIMD16 shader with 2 instance of each instruction
95     m_SIMDSize = (simdSize == SIMDMode::SIMD8 ? SIMDMode::SIMD8 : SIMDMode::SIMD16);
96     m_ShaderDispatchMode = shaderMode;
97     m_numberInstance = simdSize == SIMDMode::SIMD32 ? 2 : 1;
98     m_dispatchSize = simdSize;
99     globalSymbolMapping.clear();
100     symbolMapping.clear();
101     ccTupleMapping.clear();
102     ConstantPool.clear();
103     setup.clear();
104     patchConstantSetup.clear();
105     kernelArgToPayloadOffsetMap.clear();
106     encoder.SetProgram(this);
107 }
108 
109 // Pre-analysis pass to be executed before call to visa builder so we can pass scratch space offset
PreAnalysisPass()110 void CShader::PreAnalysisPass()
111 {
112     ExtractGlobalVariables();
113 
114     auto funcMDItr = m_ModuleMetadata->FuncMD.find(entry);
115     if (funcMDItr != m_ModuleMetadata->FuncMD.end())
116     {
117         if (funcMDItr->second.privateMemoryPerWI != 0)
118         {
119             if (GetContext()->getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory
120                 || GetContext()->getModuleMetaData()->compOpt.UseStatelessforPrivateMemory
121                 )
122             {
123                 const uint32_t GRFSize = getGRFSize();
124                 IGC_ASSERT(0 < GRFSize);
125 
126                 m_ScratchSpaceSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_dispatchSize);
127 
128                 // Round up to GRF-byte aligned.
129                 m_ScratchSpaceSize = ((GRFSize + m_ScratchSpaceSize - 1) / GRFSize) * GRFSize;
130 
131             }
132         }
133     }
134 
135     for (auto BB = entry->begin(), BE = entry->end(); BB != BE; ++BB) {
136         llvm::BasicBlock* pLLVMBB = &(*BB);
137         llvm::BasicBlock::InstListType& instructionList = pLLVMBB->getInstList();
138         for (auto I = instructionList.begin(), E = instructionList.end(); I != E; ++I) {
139             llvm::Instruction* inst = &(*I);
140             ParseShaderSpecificOpcode(inst);
141         }
142     }
143 }
144 
ProgramOutput()145 SProgramOutput* CShader::ProgramOutput()
146 {
147     return &m_simdProgram;
148 }
149 
EOTURBWrite()150 void CShader::EOTURBWrite()
151 {
152 
153     CEncoder& encoder = GetEncoder();
154     uint messageLength = 3;
155 
156     // Creating a payload of size 3 = header + channelmask + undef data
157     // As EOT message cant have message length == 0, setting channel mask = 0 and data = undef.
158     CVariable* pEOTPayload =
159         GetNewVariable(
160             messageLength * numLanes(SIMDMode::SIMD8),
161             ISA_TYPE_D, EALIGN_GRF, false, 1, "EOTPayload");
162 
163     CVariable* zero = ImmToVariable(0x0, ISA_TYPE_D);
164     // write at handle 0
165     CopyVariable(pEOTPayload, zero, 0);
166     // use 0 as write mask
167     CopyVariable(pEOTPayload, zero, 1);
168 
169     constexpr uint exDesc = EU_MESSAGE_TARGET_URB | cMessageExtendedDescriptorEOTBit;
170 
171     const uint desc = UrbMessage(
172         messageLength,
173         0,
174         true,
175         false,
176         true,
177         0,
178         EU_URB_OPCODE_SIMD8_WRITE);
179 
180     CVariable* pMessDesc = ImmToVariable(desc, ISA_TYPE_D);
181 
182     encoder.Send(nullptr, pEOTPayload, exDesc, pMessDesc);
183     encoder.Push();
184 }
185 
EOTRenderTarget(CVariable * r1,bool isPerCoarse)186 void CShader::EOTRenderTarget(CVariable* r1, bool isPerCoarse)
187 {
188     CVariable* src[4] = { nullptr, nullptr, nullptr, nullptr };
189     bool isUndefined[4] = { true, true, true, true };
190     CVariable* const nullSurfaceBti = ImmToVariable(m_pBtiLayout->GetNullSurfaceIdx(), ISA_TYPE_D);
191     CVariable* const blendStateIndex = ImmToVariable(0, ISA_TYPE_D);
192     SetBindingTableEntryCountAndBitmap(true, BUFFER_TYPE_UNKNOWN, 0, m_pBtiLayout->GetNullSurfaceIdx());
193     encoder.RenderTargetWrite(
194         src,
195         isUndefined,
196         true,  // lastRenderTarget,
197         true,  // Null RT
198         false, // perSample,
199         isPerCoarse, // coarseMode,
200         false, // isHeaderMaskFromCe0,
201         nullSurfaceBti,
202         blendStateIndex,
203         nullptr, // source0Alpha,
204         nullptr, // oMaskOpnd,
205         nullptr, // outputDepthOpnd,
206         nullptr, // stencilOpnd,
207         nullptr, // cpscounter,
208         nullptr, // sampleIndex,
209         r1);
210     encoder.Push();
211 }
212 
213 
AddEpilogue(llvm::ReturnInst * ret)214 void CShader::AddEpilogue(llvm::ReturnInst* ret)
215 {
216     encoder.EOT();
217     encoder.Push();
218 }
219 
InitializeStackVariables()220 void CShader::InitializeStackVariables()
221 {
222     // create argument-value register, limited to 12 GRF
223     m_ARGV = GetNewVariable(getGRFSize() * 3, ISA_TYPE_D, getGRFAlignment(), false, 1, "ARGV");
224     encoder.GetVISAPredefinedVar(m_ARGV, PREDEFINED_ARG);
225     // create return-value register, limited to 4 GRF
226     m_RETV = GetNewVariable(getGRFSize(), ISA_TYPE_D, getGRFAlignment(), false, 1, "ReturnValue");
227     encoder.GetVISAPredefinedVar(m_RETV, PREDEFINED_RET);
228     // create stack-pointer register
229     m_SP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "SP");
230     encoder.GetVISAPredefinedVar(m_SP, PREDEFINED_FE_SP);
231     // create frame-pointer register
232     m_FP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "FP");
233     encoder.GetVISAPredefinedVar(m_FP, PREDEFINED_FE_FP);
234     // create pointers locations to buffers
235     if (!m_ctx->platform.isXeHPSDVPlus() &&
236         IGC_IS_FLAG_DISABLED(ForceInlineStackCallWithImplArg))
237     {
238         m_ImplArgBufPtr = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "ImplArgPtr");
239         encoder.GetVISAPredefinedVar(m_ImplArgBufPtr, PREDEFINED_IMPL_ARG_BUF_PTR);
240         m_LocalIdBufPtr = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "LocalIdPtr");
241         encoder.GetVISAPredefinedVar(m_LocalIdBufPtr, PREDEFINED_LOCAL_ID_BUF_PTR);
242     }
243 }
244 
245 /// save FP of previous frame when entering a stack-call function
SaveStackState()246 void CShader::SaveStackState()
247 {
248     IGC_ASSERT(!m_SavedFP);
249     IGC_ASSERT(m_FP);
250     IGC_ASSERT(m_SP);
251     m_SavedFP = GetNewVariable(m_FP);
252     encoder.Copy(m_SavedFP, m_FP);
253     encoder.Push();
254 }
255 
256 /// restore SP and FP when exiting a stack-call function
RestoreStackState()257 void CShader::RestoreStackState()
258 {
259     IGC_ASSERT(m_SavedFP);
260     IGC_ASSERT(m_FP);
261     IGC_ASSERT(m_SP);
262     // Restore SP to current FP
263     encoder.Copy(m_SP, m_FP);
264     encoder.Push();
265     // Restore FP to previous frame's FP
266     encoder.Copy(m_FP, m_SavedFP);
267     encoder.Push();
268     m_SavedFP = nullptr;
269 }
270 
CreateImplicitArgs()271 void CShader::CreateImplicitArgs()
272 {
273     m_numBlocks = entry->size();
274     m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
275     encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);
276 
277     // create variables for implicit args
278     ImplicitArgs implicitArgs(*entry, m_pMdUtils);
279     unsigned numImplicitArgs = implicitArgs.size();
280 
281     // Push Args are only for entry function
282     const unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
283     const unsigned numPushArgs = (isEntryFunc(m_pMdUtils, entry) && !isNonEntryMultirateShader(entry) ? numPushArgsEntry : 0);
284     const int numFuncArgs = entry->arg_size() - numImplicitArgs - numPushArgs;
285     IGC_ASSERT_MESSAGE(0 <= numFuncArgs, "Function arg size does not match meta data and push args.");
286 
287     // Create symbol for every arguments [5/2019]
288     //   (Previously, symbols are created only for implicit args.)
289     //   Since vISA requires input var (argument) to be root symbol (CVariable)
290     //   and GetSymbol() does not guarantee this due to coalescing of argument
291     //   values and others. Here, we handle arguments specially by creating
292     //   a CVariable symbol for each argument, and use this newly-created symbol
293     //   as the root symbol for its congruent class if any. This should always
294     //   work as it does not matter which value in a coalesced set is going to
295     //   be a root symbol.
296     //
297     //   Once a root symbol is created, the root value of its conguent class
298     //   needs to have as its symbol an alias to this root symbol.
299 
300     // Update SymbolMapping for argument value.
301     auto updateArgSymbolMapping = [&](Value* Arg, CVariable* CVarArg) {
302         symbolMapping.insert(std::make_pair(Arg, CVarArg));
303         Value* Node = m_deSSA ? m_deSSA->getRootValue(Arg) : nullptr;
304         if (Node)
305         {
306             // If Arg isn't root, must setup symbolMapping for root.
307             if (Node != Arg) {
308                 // 'Node' should not have a symbol entry at this moment.
309                 IGC_ASSERT_MESSAGE(symbolMapping.count(Node) == 0, "Root symbol of arg should not be set at this point!");
310                 CVariable* aV = CVarArg;
311                 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
312                 {
313                     aV = createAliasIfNeeded(Node, CVarArg);
314                 }
315                 symbolMapping[Node] = aV;
316             }
317         }
318     };
319 
320     llvm::Function::arg_iterator arg = entry->arg_begin();
321     for (int i = 0; i < numFuncArgs; ++i, ++arg)
322     {
323         Value* ArgVal = arg;
324         if (ArgVal->use_empty())
325             continue;
326         e_alignment algn = GetPreferredAlignment(ArgVal, m_WI, m_ctx);
327         CVariable* ArgCVar = GetNewVector(ArgVal, algn);
328         updateArgSymbolMapping(ArgVal, ArgCVar);
329     }
330 
331     for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
332         ImplicitArg implictArg = implicitArgs[i];
333         IGC_ASSERT_MESSAGE((implictArg.getNumberElements() < (UINT16_MAX)), "getNumberElements > higher than 64k");
334 
335         bool isUniform = WIAnalysis::isDepUniform(implictArg.getDependency());
336         uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
337 
338         CVariable* var = GetNewVariable(
339             nbElements,
340             implictArg.getVISAType(*m_DL),
341             implictArg.getAlignType(*m_DL),
342             isUniform,
343             isUniform ? 1 : m_numberInstance,
344             CName(implictArg.getName()));
345 
346         if (implictArg.getArgType() == ImplicitArg::R0) {
347             encoder.GetVISAPredefinedVar(var, PREDEFINED_R0);
348         }
349 
350         // This is a per function symbol mapping, that is, only available for a
351         // llvm function which will be cleared for each run of EmitVISAPass.
352         updateArgSymbolMapping(arg, var);
353 
354         // Kernel's implicit arguments's symbols will be available for the
355         // whole kernel CodeGen. With this, there is no need to pass implicit
356         // arguments and this should help to reduce the register pressure with
357         // presence of subroutines.
358         IGC_ASSERT_MESSAGE(!globalSymbolMapping.count(&(*arg)), "should not exist already");
359         globalSymbolMapping.insert(std::make_pair(&(*arg), var));
360     }
361 
362     for (unsigned i = 0; i < numPushArgs; ++i, ++arg)
363     {
364         Value* ArgVal = arg;
365         if (ArgVal->use_empty())
366             continue;
367         e_alignment algn = GetPreferredAlignment(ArgVal, m_WI, m_ctx);
368         CVariable* ArgCVar = GetNewVector(ArgVal, algn);
369         updateArgSymbolMapping(ArgVal, ArgCVar);
370     }
371 
372     CreateAliasVars();
373 }
374 
GetDebugInfoData()375 DebugInfoData& IGC::CShader::GetDebugInfoData()
376 {
377     return diData;
378 }
379 
380 // For sub-vector aliasing, pre-allocating cvariables for those
381 // valeus that have sub-vector aliasing before emit instructions.
382 // (The sub-vector aliasing is done in VariableReuseAnalysis.)
CreateAliasVars()383 void CShader::CreateAliasVars()
384 {
385     // Create CVariables for vector aliasing (This is more
386     // efficient than doing it on-fly inside getSymbol()).
387     if (GetContext()->getVectorCoalescingControl() > 0 &&
388         !m_VRA->m_aliasMap.empty())
389     {
390         // For each vector alias root, generate cvariable
391         // for it and all its component sub-vector
392         for (auto& II : m_VRA->m_aliasMap)
393         {
394             SSubVecDesc* SV = II.second;
395             Value* rootVal = SV->BaseVector;
396             if (SV->Aliaser != rootVal)
397                 continue;
398             CVariable* rootCVar = GetSymbol(rootVal);
399 
400             // Generate all vector aliasers and their
401             // dessa root if any.
402             for (int i = 0, sz = (int)SV->Aliasers.size(); i < sz; ++i)
403             {
404                 SSubVecDesc* aSV = SV->Aliasers[i];
405                 Value* V = aSV->Aliaser;
406                 // Create alias cvariable for Aliaser and its dessa root if any
407                 Value* Vals[2] = { V, nullptr };
408                 if (m_deSSA) {
409                     Value* dessaRootVal = m_deSSA->getRootValue(V);
410                     if (dessaRootVal && dessaRootVal != V)
411                         Vals[1] = dessaRootVal;
412                 }
413                 int startIx = aSV->StartElementOffset;
414 
415                 for (int i = 0; i < 2; ++i)
416                 {
417                     V = Vals[i];
418                     if (!V)
419                         continue;
420 
421                     Type* Ty = V->getType();
422                     IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
423                     Type* BTy = VTy ? VTy->getElementType() : Ty;
424                     int nelts = (VTy ? (int)VTy->getNumElements() : 1);
425 
426                     VISA_Type visaTy = GetType(BTy);
427                     int typeBytes = (int)CEncoder::GetCISADataTypeSize(visaTy);
428                     int offsetInBytes = typeBytes * startIx;
429                     int nbelts = nelts;
430                     if (!rootCVar->IsUniform())
431                     {
432                         int width = (int)numLanes(m_SIMDSize);
433                         offsetInBytes *= width;
434                         nbelts *= width;
435                     }
436                     CVariable* Var = GetNewAlias(rootCVar, visaTy, offsetInBytes, nbelts);
437                     symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(V, Var));
438                 }
439             }
440         }
441     }
442 }
443 
AddPatchTempSetup(CVariable * var)444 void CShader::AddPatchTempSetup(CVariable* var)
445 {
446     payloadTempSetup.push_back(var);
447 }
448 
AppendPayloadSetup(CVariable * var)449 bool CShader::AppendPayloadSetup(CVariable* var)
450 {
451     auto v = var->GetAlias() ? var->GetAlias() : var;
452     if (find(payloadLiveOutSetup.begin(), payloadLiveOutSetup.end(), v) != payloadLiveOutSetup.end())
453     {
454         return true;
455     }
456     payloadLiveOutSetup.push_back(v);
457     return false;
458 }
459 
AddSetup(uint index,CVariable * var)460 void CShader::AddSetup(uint index, CVariable* var)
461 {
462     if (setup.size() < index + 1) {
463         setup.resize(index + 1, nullptr);
464     }
465     if (setup[index] == nullptr) {
466         setup[index] = var;
467     }
468 }
469 
AddPatchConstantSetup(uint index,CVariable * var)470 void CShader::AddPatchConstantSetup(uint index, CVariable* var)
471 {
472     if (patchConstantSetup.size() < index + 1) {
473         patchConstantSetup.resize(index + 1, nullptr);
474     }
475     if (patchConstantSetup[index] == nullptr) {
476         patchConstantSetup[index] = var;
477     }
478 }
479 
AllocateInput(CVariable * var,uint offset,uint instance,bool forceLiveOut)480 void CShader::AllocateInput(CVariable* var, uint offset, uint instance, bool forceLiveOut)
481 {
482     // the input offset must respect the variable alignment
483     IGC_ASSERT(nullptr != var);
484     IGC_ASSERT(offset % (1u << var->GetAlign()) == 0);
485     encoder.DeclareInput(var, offset, instance);
486     kernelArgToPayloadOffsetMap[var] = offset;
487     // For the payload section, we need to mark inputs to be outputs
488     // so that inputs will be alive across the entire payload section
489     if (forceLiveOut)
490     {
491         encoder.MarkAsPayloadLiveOut(var);
492     }
493 }
494 
AllocateOutput(CVariable * var,uint offset,uint instance)495 void CShader::AllocateOutput(CVariable* var, uint offset, uint instance)
496 {
497     IGC_ASSERT(nullptr != var);
498     IGC_ASSERT(offset % (1u << var->GetAlign()) == 0);
499     encoder.DeclareInput(var, offset, instance);
500     encoder.MarkAsOutput(var);
501 }
502 
AllocateConstants3DShader(uint & offset)503 void CShader::AllocateConstants3DShader(uint& offset)
504 {
505     if (m_Platform->WaForceCB0ToBeZeroWhenSendingPC() && m_DriverInfo->implementPushConstantWA()) {
506         // Allocate space for constant pushed from the constant buffer
507         AllocateConstants(offset);
508         AllocateSimplePushConstants(offset);
509         // Allocate space for constant set by driver
510         AllocateNOSConstants(offset);
511     }
512     else {
513         // Allocate space for constant set by driver
514         AllocateNOSConstants(offset);
515         // Allocate space for constant pushed from the constant buffer
516         AllocateConstants(offset);
517         AllocateSimplePushConstants(offset);
518     }
519     offset = iSTD::Align(offset, getGRFSize());
520 }
521 
AllocateConstants(uint & offset)522 void CShader::AllocateConstants(uint& offset)
523 {
524     m_ConstantBufferLength = 0;
525     for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++) {
526         CVariable* var = GetSymbol(m_argListCache[I->second]);
527         AllocateInput(var, offset + m_ConstantBufferLength, 0, encoder.IsCodePatchCandidate());
528         m_ConstantBufferLength += var->GetSize();
529     }
530 
531     m_ConstantBufferLength = iSTD::Align(m_ConstantBufferLength, getGRFSize());
532     offset += m_ConstantBufferLength;
533 }
534 
AllocateSimplePushConstants(uint & offset)535 void CShader::AllocateSimplePushConstants(uint& offset)
536 {
537     for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++)
538     {
539         for (auto I : pushInfo.simplePushInfoArr[i].simplePushLoads)
540         {
541             uint subOffset = I.first;
542             CVariable* var = GetSymbol(m_argListCache[I.second]);
543             AllocateInput(var, subOffset - pushInfo.simplePushInfoArr[i].offset + offset, 0, encoder.IsCodePatchCandidate());
544         }
545         offset += pushInfo.simplePushInfoArr[i].size;
546     }
547 }
548 
AllocateNOSConstants(uint & offset)549 void CShader::AllocateNOSConstants(uint& offset)
550 {
551     uint maxConstantPushed = 0;
552 
553     for (auto I = pushInfo.constantReg.begin(), E = pushInfo.constantReg.end(); I != E; I++) {
554         CVariable* var = GetSymbol(m_argListCache[I->second]);
555         AllocateInput(var, offset + I->first * SIZE_DWORD, 0, encoder.IsCodePatchCandidate());
556         maxConstantPushed = std::max(maxConstantPushed, I->first + 1);
557     }
558     maxConstantPushed = iSTD::Max(maxConstantPushed, static_cast<uint>(m_ModuleMetadata->MinNOSPushConstantSize));
559     m_NOSBufferSize = iSTD::Align(maxConstantPushed * SIZE_DWORD, getGRFSize());
560     offset += m_NOSBufferSize;
561 }
562 
563 
CreateGatherMap()564 void CShader::CreateGatherMap()
565 {
566     int index = -1;
567     gatherMap.reserve(pushInfo.constants.size());
568     for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++)
569     {
570         unsigned int address = (I->first.bufId * 256 * 4) + (I->first.eltId);
571         unsigned int cstOffset = address / 4;
572         unsigned int cstChannel = address % 4;
573         if (cstOffset != index)
574         {
575             USC::SConstantGatherEntry entry;
576             entry.GatherEntry.Fields.constantBufferOffset = cstOffset % 256;
577             entry.GatherEntry.Fields.channelMask = BIT(cstChannel);
578             // with 3DSTATE_DX9_CONSTANT if buffer is more than 4Kb,
579             //  the constant after 255 can be accessed in constant buffer 1
580             int CBIndex = cstOffset / 256;
581             entry.GatherEntry.Fields.constantBufferIndex = CBIndex;
582             m_constantBufferMask |= BIT(CBIndex);
583             gatherMap.push_back(entry);
584             index = cstOffset;
585         }
586         else
587         {
588             gatherMap[gatherMap.size() - 1].GatherEntry.Fields.channelMask |= BIT(cstChannel);
589         }
590     }
591 
592     // The size of the gather map must be even
593     if (gatherMap.size() % 2 != 0)
594     {
595         USC::SConstantGatherEntry entry;
596         entry.GatherEntry.Value = 0;
597         gatherMap.push_back(entry);
598     }
599 }
600 
CreateConstantBufferOutput(SKernelProgram * pKernelProgram)601 void  CShader::CreateConstantBufferOutput(SKernelProgram* pKernelProgram)
602 {
603     pKernelProgram->ConstantBufferMask = m_constantBufferMask;
604     pKernelProgram->gatherMapSize = gatherMap.size();
605     if (pKernelProgram->gatherMapSize > 0)
606     {
607         pKernelProgram->gatherMap = new char[pKernelProgram->gatherMapSize * sizeof(USC::SConstantGatherEntry)];
608         memcpy_s(pKernelProgram->gatherMap, pKernelProgram->gatherMapSize *
609             sizeof(USC::SConstantGatherEntry),
610             &gatherMap[0],
611             gatherMap.size() * sizeof(USC::SConstantGatherEntry));
612         pKernelProgram->ConstantBufferLength = m_ConstantBufferLength / getMinPushConstantBufferAlignmentInBytes();
613     }
614 
615     if (m_cbSlot != -1)
616     {
617         pKernelProgram->bufferSlot = m_cbSlot;
618         pKernelProgram->statelessCBPushedSize = m_statelessCBPushedSize;
619     }
620 
621     // for simple push
622     for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++)
623     {
624         pKernelProgram->simplePushInfoArr[i].m_cbIdx = pushInfo.simplePushInfoArr[i].cbIdx;
625         pKernelProgram->simplePushInfoArr[i].m_pushableAddressGrfOffset= pushInfo.simplePushInfoArr[i].pushableAddressGrfOffset;
626         pKernelProgram->simplePushInfoArr[i].m_pushableOffsetGrfOffset = pushInfo.simplePushInfoArr[i].pushableOffsetGrfOffset;
627         pKernelProgram->simplePushInfoArr[i].m_offset = pushInfo.simplePushInfoArr[i].offset;
628         pKernelProgram->simplePushInfoArr[i].m_size = pushInfo.simplePushInfoArr[i].size;
629         pKernelProgram->simplePushInfoArr[i].isStateless = pushInfo.simplePushInfoArr[i].isStateless;
630         pKernelProgram->simplePushInfoArr[i].isBindless = pushInfo.simplePushInfoArr[i].isBindless;
631     }
632 
633     if (GetContext()->m_ConstantBufferReplaceShaderPatterns)
634     {
635         pKernelProgram->m_ConstantBufferReplaceShaderPatterns = GetContext()->m_ConstantBufferReplaceShaderPatterns;
636         pKernelProgram->m_ConstantBufferReplaceShaderPatternsSize = GetContext()->m_ConstantBufferReplaceShaderPatternsSize;
637         pKernelProgram->m_ConstantBufferUsageMask = GetContext()->m_ConstantBufferUsageMask;
638         pKernelProgram->m_ConstantBufferReplaceSize = GetContext()->m_ConstantBufferReplaceSize;
639     }
640 }
641 
CreateFunctionSymbol(llvm::Function * pFunc)642 void CShader::CreateFunctionSymbol(llvm::Function* pFunc)
643 {
644     // Functions with uses in this module requires relocation
645     CVariable* funcAddr = GetSymbol(pFunc);
646     std::string funcName = pFunc->getName().str();
647     encoder.AddVISASymbol(funcName, funcAddr);
648     encoder.Push();
649 }
650 
CreateGlobalSymbol(llvm::GlobalVariable * pGlobal)651 void CShader::CreateGlobalSymbol(llvm::GlobalVariable* pGlobal)
652 {
653     CVariable* globalAddr = GetSymbol(pGlobal);
654     std::string globalName = pGlobal->getName().str();
655     encoder.AddVISASymbol(globalName, globalAddr);
656     encoder.Push();
657 }
658 
CacheArgumentsList()659 void CShader::CacheArgumentsList()
660 {
661     m_argListCache.clear();
662     for (auto arg = entry->arg_begin(); arg != entry->arg_end(); ++arg)
663         m_argListCache.push_back(&(*arg));
664 }
665 
666 // Pixel shader has dedicated implementation of this function
MapPushedInputs()667 void CShader::MapPushedInputs()
668 {
669     for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
670     {
671         // We need to map the value associated with the value pushed to a physical register
672         CVariable* var = GetSymbol(m_argListCache[I->second.argIndex]);
673         AddSetup(I->second.index, var);
674     }
675 }
676 
IsPatchablePS()677 bool CShader::IsPatchablePS()
678 {
679     return (GetShaderType() == ShaderType::PIXEL_SHADER &&
680         static_cast<CPixelShader*>(this)->GetPhase() != PSPHASE_PIXEL);
681 }
682 
GetR0()683 CVariable* CShader::GetR0()
684 {
685     return m_R0;
686 }
687 
GetNULL()688 CVariable* CShader::GetNULL()
689 {
690     if (!m_NULL)
691     {
692         m_NULL = new (Allocator)CVariable(2, true, ISA_TYPE_D, EVARTYPE_GENERAL, EALIGN_DWORD, false, 1, CName::NONE);
693         encoder.GetVISAPredefinedVar(m_NULL, PREDEFINED_NULL);
694     }
695     return m_NULL;
696 }
697 
GetTSC()698 CVariable* CShader::GetTSC()
699 {
700     if (!m_TSC)
701     {
702         m_TSC = new (Allocator) CVariable(2, true, ISA_TYPE_UD, EVARTYPE_GENERAL, EALIGN_DWORD, false, 1, CName::NONE);
703         encoder.GetVISAPredefinedVar(m_TSC, PREDEFINED_TSC);
704     }
705     return m_TSC;
706 }
707 
GetSR0()708 CVariable* CShader::GetSR0()
709 {
710     if (!m_SR0)
711     {
712         m_SR0 = GetNewVariable(4, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
713 
714         encoder.GetVISAPredefinedVar(m_SR0, PREDEFINED_SR0);
715     }
716     return m_SR0;
717 }
718 
GetCR0()719 CVariable* CShader::GetCR0()
720 {
721     if (!m_CR0)
722     {
723         m_CR0 = GetNewVariable(3, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
724         encoder.GetVISAPredefinedVar(m_CR0, PREDEFINED_CR0);
725     }
726     return m_CR0;
727 }
728 
GetCE0()729 CVariable* CShader::GetCE0()
730 {
731     if (!m_CE0)
732     {
733         m_CE0 = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
734         encoder.GetVISAPredefinedVar(m_CE0, PREDEFINED_CE0);
735     }
736     return m_CE0;
737 }
738 
GetDBG()739 CVariable* CShader::GetDBG()
740 {
741     if (!m_DBG)
742     {
743         m_DBG = GetNewVariable(2, ISA_TYPE_D, EALIGN_DWORD, true, CName::NONE);
744         encoder.GetVISAPredefinedVar(m_DBG, PREDEFINED_DBG);
745     }
746     return m_DBG;
747 }
748 
GetHWTID()749 CVariable* CShader::GetHWTID()
750 {
751     if (!m_HW_TID)
752     {
753         if (m_Platform->getHWTIDFromSR0())
754         {
755             auto RemoveBitRange = [this](CVariable* &src, unsigned removebit, unsigned range)->void
756             {
757                 CVariable* leftHalf = GetNewVariable(src);
758                 CVariable* rightHalf = GetNewVariable(src);
759                 uint32_t mask = BITMASK(removebit);
760                 // src = (src & mask) | ((src >> range) & ~mask)
761                 encoder.And(rightHalf, src, ImmToVariable(mask, ISA_TYPE_D));
762                 encoder.Push();
763                 encoder.IShr(leftHalf, src, ImmToVariable(range, ISA_TYPE_D));
764                 encoder.Push();
765                 encoder.And(leftHalf, leftHalf, ImmToVariable(~mask, ISA_TYPE_D));
766                 encoder.Push();
767                 encoder.Or(src, rightHalf, leftHalf);
768                 encoder.Push();
769             };
770 
771             // XeHP_SDV
772             // [13:11] Slice ID.
773             // [10:9] Dual - SubSlice ID
774             // [8] SubSlice ID.
775             // [7] : EUID[2]
776             // [6] : Reserved
777             // [5:4] EUID[1:0]
778             // [3] : Reserved MBZ
779             // [2:0] : TID
780             //
781             // HWTID is calculated using a concatenation of TID:EUID:SubSliceID:SliceID
782 
783             uint32_t bitmask = BITMASK(14);
784             m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
785             encoder.SetNoMask();
786             encoder.SetSrcSubReg(0, 0);
787             encoder.And(m_HW_TID, GetSR0(), ImmToVariable(bitmask, ISA_TYPE_D));
788             encoder.Push();
789 
790             // Remove bit [6]
791             RemoveBitRange(m_HW_TID, 6, 1);
792             // Remove bit [3]
793             RemoveBitRange(m_HW_TID, 3, 1);
794         }
795         else
796         {
797             m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
798             encoder.GetVISAPredefinedVar(m_HW_TID, PREDEFINED_HW_TID);
799         }
800     }
801     return m_HW_TID;
802 }
803 
GetPrivateBase()804 CVariable* CShader::GetPrivateBase()
805 {
806     ImplicitArgs implicitArgs(*entry, m_pMdUtils);
807     unsigned numPushArgs = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
808     unsigned numImplicitArgs = implicitArgs.size();
809     IGC_ASSERT_MESSAGE(entry->arg_size() >= (numImplicitArgs + numPushArgs), "Function arg size does not match meta data and push args.");
810     unsigned numFuncArgs = entry->arg_size() - numImplicitArgs - numPushArgs;
811 
812     Argument* kerArg = nullptr;
813     llvm::Function::arg_iterator arg = entry->arg_begin();
814     for (unsigned i = 0; i < numFuncArgs; ++i, ++arg);
815     for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
816         ImplicitArg implicitArg = implicitArgs[i];
817         if (implicitArg.getArgType() == ImplicitArg::ArgType::PRIVATE_BASE)
818         {
819             kerArg = (&*arg);
820             break;
821         }
822     }
823     IGC_ASSERT(kerArg);
824     return GetSymbol(kerArg);
825 }
826 
GetImplArgBufPtr()827 CVariable* CShader::GetImplArgBufPtr()
828 {
829     IGC_ASSERT(m_ImplArgBufPtr);
830     return m_ImplArgBufPtr;
831 }
832 
GetLocalIdBufPtr()833 CVariable* CShader::GetLocalIdBufPtr()
834 {
835     IGC_ASSERT(m_LocalIdBufPtr);
836     return m_LocalIdBufPtr;
837 }
838 
GetFP()839 CVariable* CShader::GetFP()
840 {
841     IGC_ASSERT(m_FP);
842     return m_FP;
843 }
GetPrevFP()844 CVariable* CShader::GetPrevFP()
845 {
846     return m_SavedFP;
847 }
GetSP()848 CVariable* CShader::GetSP()
849 {
850     IGC_ASSERT(m_SP);
851     return m_SP;
852 }
853 
GetARGV()854 CVariable* CShader::GetARGV()
855 {
856     IGC_ASSERT(m_ARGV);
857     return m_ARGV;
858 }
859 
GetRETV()860 CVariable* CShader::GetRETV()
861 {
862     IGC_ASSERT(m_RETV);
863     return m_RETV;
864 }
865 
GetEncoder()866 CEncoder& CShader::GetEncoder()
867 {
868     return encoder;
869 }
870 
SaveSRet(CVariable * sretPtr)871 void CShader::SaveSRet(CVariable* sretPtr)
872 {
873     IGC_ASSERT(m_SavedSRetPtr == nullptr);
874     m_SavedSRetPtr = sretPtr;
875 }
876 
GetAndResetSRet()877 CVariable* CShader::GetAndResetSRet()
878 {
879     CVariable* temp = m_SavedSRetPtr;
880     m_SavedSRetPtr = nullptr;
881     return temp;
882 }
883 
~CShader()884 CShader::~CShader()
885 {
886     // free all the memory allocated
887     Destroy();
888 }
889 
IsValueUsed(llvm::Value * value)890 bool CShader::IsValueUsed(llvm::Value* value)
891 {
892     auto it = symbolMapping.find(value);
893     if (it != symbolMapping.end())
894     {
895         return true;
896     }
897     return false;
898 }
899 
GetGlobalCVar(llvm::Value * value)900 CVariable* CShader::GetGlobalCVar(llvm::Value* value)
901 {
902     auto it = globalSymbolMapping.find(value);
903     if (it != globalSymbolMapping.end())
904         return it->second;
905     return nullptr;
906 }
907 
BitCast(CVariable * var,VISA_Type newType)908 CVariable* CShader::BitCast(CVariable* var, VISA_Type newType)
909 {
910     CVariable* bitCast = nullptr;
911     uint32_t newEltSz = CEncoder::GetCISADataTypeSize(newType);
912     uint32_t eltSz = var->GetElemSize();
913     // Bitcase requires both src and dst have the same size, which means
914     // one element size is the same as or multiple of the other (if they
915     // are vectors with different number of elements).
916     IGC_ASSERT(   (newEltSz >= eltSz && (newEltSz % eltSz) == 0)
917                || (newEltSz < eltSz && (eltSz% newEltSz) == 0));
918     if (var->IsImmediate())
919     {
920         if (newEltSz == eltSz)
921             bitCast = ImmToVariable(var->GetImmediateValue(), newType);
922         else
923         {
924             // Need a temp. For example,  bitcast i64 0 -> 2xi32
925             CVariable* tmp = GetNewVariable(
926                 1,
927                 var->GetType(),
928                 CEncoder::GetCISADataTypeAlignment(var->GetType()),
929                 true,
930                 1,
931                 "vecImmBitCast");
932             encoder.Copy(tmp, var);
933             encoder.Push();
934 
935             bitCast = GetNewAlias(tmp, newType, 0, 0);
936         }
937     }
938     else
939     {
940         // TODO: we need to store this bitCasted var to avoid creating many times
941         bitCast = GetNewAlias(var, newType, 0, 0);
942     }
943     return bitCast;
944 }
945 
ImmToVariable(uint64_t immediate,VISA_Type type,bool isCodePatchCandidate)946 CVariable* CShader::ImmToVariable(uint64_t immediate, VISA_Type type, bool isCodePatchCandidate)
947 {
948     VISA_Type immType = type;
949 
950     if (type == ISA_TYPE_BOOL)
951     {
952         // bool immediates cannot be inlined
953         uint immediateValue = immediate ? 0xFFFFFFFF : 0;
954         CVariable* immVar = new (Allocator)  CVariable(immediateValue, ISA_TYPE_UD);
955         // src-variable is no longer a boolean, V-ISA cannot take boolean-src immed.
956 
957         CVariable* dst = GetNewVariable(
958             numLanes(m_dispatchSize), ISA_TYPE_BOOL, EALIGN_BYTE, CName::NONE);
959         // FIXME: We need to pop/push the encoder context
960         //encoder.save();
961         if (isCodePatchCandidate)
962         {
963             encoder.SetPayloadSectionAsPrimary();
964         }
965         encoder.SetP(dst, immVar);
966         encoder.Push();
967         if (isCodePatchCandidate)
968         {
969             encoder.SetPayloadSectionAsSecondary();
970         }
971         return dst;
972     }
973 
974     CVariable* var = new (Allocator) CVariable(immediate, immType);
975     return var;
976 }
977 
GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,UniformArgWrap isUniform,uint16_t numberInstance,const CName & name)978 CVariable* CShader::GetNewVariable(
979     uint16_t nbElement, VISA_Type type, e_alignment align,
980     UniformArgWrap isUniform, uint16_t numberInstance, const CName &name)
981 {
982     e_varType varType;
983     if (type == ISA_TYPE_BOOL)
984     {
985         varType = EVARTYPE_PREDICATE;
986     }
987     else
988     {
989         IGC_ASSERT(align >= CEncoder::GetCISADataTypeAlignment(type));
990         varType = EVARTYPE_GENERAL;
991     }
992     CVariable* var = new (Allocator) CVariable(
993         nbElement, isUniform, type, varType, align, false, numberInstance, name);
994     encoder.CreateVISAVar(var);
995     return var;
996 }
997 
GetNewVariable(const CVariable * from)998 CVariable* CShader::GetNewVariable(const CVariable* from)
999 {
1000     CVariable* var = new (Allocator) CVariable(*from);
1001     encoder.CreateVISAVar(var);
1002     return var;
1003 }
1004 
GetNewAddressVariable(uint16_t nbElement,VISA_Type type,UniformArgWrap isUniform,bool isVectorUniform,const CName & name)1005 CVariable* CShader::GetNewAddressVariable(
1006     uint16_t nbElement, VISA_Type type,
1007     UniformArgWrap isUniform, bool isVectorUniform,
1008     const CName &name)
1009 {
1010     CVariable* var = new (Allocator) CVariable(
1011         nbElement, isUniform, type,
1012         EVARTYPE_ADDRESS, EALIGN_DWORD,
1013         isVectorUniform, 1, name);
1014     encoder.CreateVISAVar(var);
1015     return var;
1016 }
1017 
GetDependency(Value * v) const1018 WIBaseClass::WIDependancy CShader::GetDependency(Value* v) const
1019 {
1020     return m_WI ? (m_WI->whichDepend(v)) : WIBaseClass::RANDOM;
1021 }
1022 
SetDependency(llvm::Value * v,WIBaseClass::WIDependancy dep)1023 void CShader::SetDependency(llvm::Value* v, WIBaseClass::WIDependancy dep)
1024 {
1025     if (m_WI) m_WI->incUpdateDepend(v, dep);
1026 }
1027 
GetIsUniform(llvm::Value * v) const1028 bool CShader::GetIsUniform(llvm::Value* v) const
1029 {
1030     return m_WI ? (m_WI->isUniform(v)) : false;
1031 }
1032 
InsideDivergentCF(const llvm::Instruction * inst) const1033 bool CShader::InsideDivergentCF(const llvm::Instruction* inst) const
1034 {
1035     return m_WI ? m_WI->insideDivergentCF(inst) : true;
1036 }
1037 
InsideWorkgroupDivergentCF(const llvm::Instruction * inst) const1038 bool CShader::InsideWorkgroupDivergentCF(const llvm::Instruction* inst) const
1039 {
1040     return m_WI ? m_WI->insideWorkgroupDivergentCF(inst) : true;
1041 }
1042 
GetNbVectorElementAndMask(llvm::Value * val,uint32_t & mask)1043 uint CShader::GetNbVectorElementAndMask(llvm::Value* val, uint32_t& mask)
1044 {
1045     llvm::Type* type = val->getType();
1046     uint nbElement = int_cast<uint>(cast<IGCLLVM::FixedVectorType>(type)->getNumElements());
1047     mask = 0;
1048     // we don't process vector bigger than 31 elements as the mask has only 32bits
1049     // If we want to support longer vectors we need to extend the mask size
1050     //
1051     // If val has been coalesced, don't prune it.
1052     if (IsCoalesced(val) || nbElement > 31)
1053     {
1054         return nbElement;
1055     }
1056     bool gpgpuPreemptionWANeeded =
1057         ((GetShaderType() == ShaderType::OPENCL_SHADER) || (GetShaderType() == ShaderType::COMPUTE_SHADER)) &&
1058         (m_SIMDSize == SIMDMode::SIMD8) &&
1059         m_Platform->WaSamplerResponseLengthMustBeGreaterThan1() &&
1060         m_Platform->supportGPGPUMidThreadPreemption();
1061 
1062     if (llvm::GenIntrinsicInst * inst = llvm::dyn_cast<GenIntrinsicInst>(val))
1063     {
1064         // try to prune the destination size
1065         GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1066         if (IID == GenISAIntrinsic::GenISA_ldstructured ||
1067             IID == GenISAIntrinsic::GenISA_typedread)
1068         {
1069             // prune with write-mask if possible
1070             uint elemCnt = 0;
1071             for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1072             {
1073                 if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1074                 {
1075                     if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1076                     {
1077                         elemCnt++;
1078                         IGC_ASSERT(index->getZExtValue() < 5);
1079                         mask |= (1 << index->getZExtValue());
1080                         continue;
1081                     }
1082                 }
1083                 // if the vector is accessed by anything else than direct Extract we cannot prune it
1084                 elemCnt = nbElement;
1085                 mask = 0;
1086                 break;
1087             }
1088 
1089             if (mask)
1090             {
1091                 nbElement = elemCnt;
1092             }
1093         }
1094         else if (isSampleInstruction(inst) || isLdInstruction(inst) || isInfoInstruction(inst))
1095         {
1096             // sampler can return selected channel ony with extra header, when
1097             // returning only 1~2 channels, it suppose to have better performance.
1098             uint nbExtract = 0, maxIndex = 0;
1099             uint8_t maskExtract = 0;
1100             bool allExtract = true;
1101 
1102             for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1103             {
1104                 ExtractElementInst* extract = llvm::dyn_cast<ExtractElementInst>(*I);
1105                 if (extract != nullptr)
1106                 {
1107                     llvm::ConstantInt* indexVal;
1108                     indexVal = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand());
1109                     if (indexVal != nullptr)
1110                     {
1111                         uint index = static_cast<uint>(indexVal->getZExtValue());
1112                         maxIndex = std::max(maxIndex, index + 1);
1113 
1114                         maskExtract |= (1 << index);
1115                         nbExtract++;
1116                     }
1117                     else
1118                     {
1119                         // if extractlement with dynamic index
1120                         maxIndex = nbElement;
1121                         allExtract = false;
1122                         break;
1123                     }
1124                 }
1125                 else
1126                 {
1127                     // if the vector is accessed by anything else than direct Extract we cannot prune it
1128                     maxIndex = nbElement;
1129                     allExtract = false;
1130                     break;
1131                 }
1132             }
1133 
1134             // TODO: there are some issues in EmitVISAPass prevents enabling
1135             // selected channel return for info intrinsics.
1136             if (!allExtract ||
1137                 gpgpuPreemptionWANeeded ||
1138                 IGC_IS_FLAG_DISABLED(EnableSamplerChannelReturn) ||
1139                 isInfoInstruction(inst) ||
1140                 maskExtract > 0xf)
1141             {
1142                 if (gpgpuPreemptionWANeeded)
1143                 {
1144                     maxIndex = std::max((uint)2, maxIndex);
1145                 }
1146 
1147                 mask = BIT(maxIndex) - 1;
1148                 nbElement = maxIndex;
1149             }
1150             else
1151             {
1152                 // based on return channels, decide whether do partial
1153                 // return with addtional header
1154                 static const bool selectReturnChannels[] = {
1155                     false,      // 0 0000 - should not happen
1156                     false,      // 1 0001 - r
1157                     false,      // 2 0010 -  g
1158                     false,      // 3 0011 - rg
1159                     true,       // 4 0100 -   b
1160                     false,      // 5 0101 - r b
1161                     false,      // 6 0110 -  gb
1162                     false,      // 7 0111 - rgb
1163                     true,       // 8 1000 -    a
1164                     true,       // 9 1001 - r  a
1165                     true,       // a 1010 -  g a
1166                     false,      // b 1011 - rg a
1167                     true,       // c 1100 -   ba
1168                     false,      // d 1101 - r ba
1169                     false,      // e 1110 -  gba
1170                     false       // f 1111 - rgba
1171                 };
1172                 IGC_ASSERT(maskExtract != 0);
1173                 IGC_ASSERT(maskExtract <= 0xf);
1174 
1175                 if (selectReturnChannels[maskExtract])
1176                 {
1177                     mask = maskExtract;
1178                     nbElement = nbExtract;
1179                 }
1180                 else
1181                 {
1182                     mask = BIT(maxIndex) - 1;
1183                     nbElement = maxIndex;
1184                 }
1185             }
1186         }
1187         else
1188         {
1189             GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1190             if (isLdInstruction(inst) ||
1191                 IID == GenISAIntrinsic::GenISA_URBRead ||
1192                 IID == GenISAIntrinsic::GenISA_URBReadOutput ||
1193                 IID == GenISAIntrinsic::GenISA_DCL_ShaderInputVec ||
1194                 IID == GenISAIntrinsic::GenISA_DCL_HSinputVec)
1195             {
1196                 // prune without write-mask
1197                 uint maxIndex = 0;
1198                 for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1199                 {
1200                     if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1201                     {
1202                         if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1203                         {
1204                             maxIndex = std::max(maxIndex, static_cast<uint>(index->getZExtValue()) + 1);
1205                             continue;
1206                         }
1207                     }
1208                     // if the vector is accessed by anything else than direct Extract we cannot prune it
1209                     maxIndex = nbElement;
1210                     break;
1211                 }
1212 
1213                 mask = BIT(maxIndex) - 1;
1214                 nbElement = maxIndex;
1215             }
1216         }
1217     }
1218     else if (llvm::BitCastInst * inst = dyn_cast<BitCastInst>(val))
1219     {
1220         for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1221         {
1222             if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1223             {
1224                 if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1225                 {
1226                     uint indexBit = BIT(static_cast<uint>(index->getZExtValue()));
1227                     mask |= indexBit;
1228                     continue;
1229                 }
1230             }
1231             mask = BIT(nbElement) - 1;
1232             break;
1233         }
1234         if (mask)
1235         {
1236             nbElement = iSTD::BitCount(mask);
1237         }
1238     }
1239     return nbElement;
1240 }
1241 
ExtractMaskWrapper(CShader * pS,Value * VecVal)1242 CShader::ExtractMaskWrapper::ExtractMaskWrapper(CShader* pS, Value* VecVal)
1243 {
1244     auto it = pS->extractMasks.find(VecVal);
1245     if (it != pS->extractMasks.end())
1246     {
1247         m_hasEM = true;
1248         m_EM = it->second;
1249         return;
1250     }
1251     IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(VecVal->getType());
1252     const unsigned int numChannels = VTy ? (unsigned)VTy->getNumElements() : 1;
1253     if (numChannels <= 32)
1254     {
1255         m_hasEM = true;
1256         m_EM = (uint32_t)((1ULL << numChannels) - 1);
1257     }
1258     else
1259     {
1260         m_hasEM = false;
1261         m_EM = 0;
1262     }
1263 }
1264 
AdjustExtractIndex(llvm::Value * vecVal,uint16_t index)1265 uint16_t CShader::AdjustExtractIndex(llvm::Value* vecVal, uint16_t index)
1266 {
1267     const ExtractMaskWrapper EMW(this, vecVal);
1268 
1269     uint16_t result = index;
1270     if (EMW.hasEM())
1271     {
1272         IGC_ASSERT(index < 32);
1273         uint32_t mask = EMW.getEM();
1274         for (uint i = 0; i < index; ++i)
1275         {
1276             if ((mask & (1 << i)) == 0)
1277             {
1278                 result--;
1279             }
1280         }
1281         return result;
1282     }
1283     else
1284     {
1285         return index;
1286     }
1287 }
1288 
GetSimdOffsetBase(CVariable * & pVar)1289 void CShader::GetSimdOffsetBase(CVariable*& pVar)
1290 {
1291     encoder.SetSimdSize(SIMDMode::SIMD8);
1292     encoder.SetNoMask();
1293     encoder.Cast(pVar, ImmToVariable(0x76543210, ISA_TYPE_V));
1294     encoder.Push();
1295 
1296     if (m_dispatchSize >= SIMDMode::SIMD16)
1297     {
1298         encoder.SetSimdSize(SIMDMode::SIMD8);
1299         encoder.SetDstSubReg(8);
1300         encoder.SetNoMask();
1301         encoder.Add(pVar, pVar, ImmToVariable(8, ISA_TYPE_W));
1302         encoder.Push();
1303     }
1304 
1305     if (encoder.IsSecondHalf())
1306     {
1307         encoder.SetNoMask();
1308         encoder.Add(pVar, pVar, ImmToVariable(16, ISA_TYPE_W));
1309         encoder.Push();
1310     }
1311     else if (m_SIMDSize == SIMDMode::SIMD32)
1312     {
1313         // (W) add (16) V1(16) V1(0) 16:w
1314         encoder.SetSimdSize(SIMDMode::SIMD16);
1315         encoder.SetNoMask();
1316         encoder.SetDstSubReg(16);
1317         encoder.Add(pVar, pVar, ImmToVariable(16, ISA_TYPE_W));
1318         encoder.Push();
1319     }
1320 }
1321 
GetPerLaneOffsetsReg(uint typeSizeInBytes)1322 CVariable* CShader::GetPerLaneOffsetsReg(uint typeSizeInBytes)
1323 {
1324     CVariable* pPerLaneOffsetsRaw =
1325         GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_UW, EALIGN_GRF, "PerLaneOffsetsRaw");
1326     GetSimdOffsetBase(pPerLaneOffsetsRaw);
1327 
1328     // per-lane offsets need to be added to address register
1329     CVariable* pConst2 = ImmToVariable(typeSizeInBytes, ISA_TYPE_UW);
1330 
1331     CVariable* pPerLaneOffsetsReg =
1332         GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_UW, EALIGN_GRF, false, "PerLaneOffsetsRawReg");
1333 
1334     // perLaneOffsets = 4 * perLaneOffsetsRaw
1335     encoder.SetNoMask();
1336     encoder.Mul(pPerLaneOffsetsReg, pPerLaneOffsetsRaw, pConst2);
1337     encoder.Push();
1338 
1339     return pPerLaneOffsetsReg;
1340 }
1341 
1342 void
CreatePayload(uint regCount,uint idxOffset,CVariable * & payload,llvm::Instruction * inst,uint paramOffset,uint8_t hfFactor)1343 CShader::CreatePayload(uint regCount, uint idxOffset, CVariable*& payload,
1344     llvm::Instruction* inst, uint paramOffset,
1345     uint8_t hfFactor)
1346 {
1347     for (uint i = 0; i < regCount; ++i)
1348     {
1349         uint subVarIdx = ((numLanes(m_SIMDSize) / (getGRFSize() >> 2)) >> hfFactor) * i + idxOffset;
1350         CopyVariable(payload, GetSymbol(inst->getOperand(i + paramOffset)), subVarIdx);
1351     }
1352 }
1353 
GetIMEReturnPayloadSize(GenIntrinsicInst * I)1354 unsigned CShader::GetIMEReturnPayloadSize(GenIntrinsicInst* I)
1355 {
1356     IGC_ASSERT(I->getIntrinsicID() == GenISAIntrinsic::GenISA_vmeSendIME2);
1357 
1358     const auto streamMode =
1359         (COMMON_ISA_VME_STREAM_MODE)(
1360             cast<ConstantInt>(I->getArgOperand(4))->getZExtValue());
1361     auto* refImgBTI = I->getArgOperand(2);
1362     auto* bwdRefImgBTI = I->getArgOperand(3);
1363     const bool isDualRef = (refImgBTI != bwdRefImgBTI);
1364 
1365     uint32_t regs2rcv = 7;
1366     if ((streamMode == VME_STREAM_OUT) || (streamMode == VME_STREAM_IN_OUT))
1367     {
1368         regs2rcv += 2;
1369         if (isDualRef)
1370         {
1371             regs2rcv += 2;
1372         }
1373     }
1374     return regs2rcv;
1375 }
1376 
GetNbElementAndMask(llvm::Value * value,uint32_t & mask)1377 uint CShader::GetNbElementAndMask(llvm::Value* value, uint32_t& mask)
1378 {
1379     mask = 0;
1380     // Special case for VME's GenISA_createMessagePhases intrinsic
1381     if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(value)) {
1382         GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1383         switch (IID)
1384         {
1385         case GenISAIntrinsic::GenISA_createMessagePhases:
1386         case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
1387         case GenISAIntrinsic::GenISA_createMessagePhasesV:
1388         case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
1389         {
1390             Value* numGRFs = inst->getArgOperand(0);
1391             IGC_ASSERT_MESSAGE(isa<ConstantInt>(numGRFs), "Number GRFs operand is expected to be constant int!");
1392             // Number elements = {num GRFs} * {num DWords in GRF} = {num GRFs} * 8;
1393             return int_cast<unsigned int>(cast<ConstantInt>(numGRFs)->getZExtValue() * 8);
1394         }
1395         default:
1396             break;
1397         }
1398     }
1399     else if (auto * PN = dyn_cast<PHINode>(value))
1400     {
1401         // We could have case like below that payload is undef on some path.
1402         //
1403         // BB1:
1404         //   %147 = call i32 @llvm.genx.GenISA.createMessagePhasesNoInit(i32 11)
1405         //   call void @llvm.genx.GenISA.vmeSendIME2(i32 % 147, ...)
1406         //   br label %BB2
1407         // BB2:
1408         //   ... = phi i32[%147, %BB1], [0, %BB]
1409         //
1410         for (uint i = 0, e = PN->getNumOperands(); i != e; ++i)
1411         {
1412             if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(PN->getOperand(i)))
1413             {
1414                 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1415                 switch (IID)
1416                 {
1417                 case GenISAIntrinsic::GenISA_createMessagePhases:
1418                 case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
1419                 case GenISAIntrinsic::GenISA_createMessagePhasesV:
1420                 case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
1421                     return GetNbElementAndMask(inst, mask);
1422                 default:
1423                     break;
1424                 }
1425             }
1426         }
1427     }
1428 
1429     uint nbElement = 0;
1430     uint bSize = 0;
1431     llvm::Type* const type = value->getType();
1432     IGC_ASSERT(nullptr != type);
1433     switch (type->getTypeID())
1434     {
1435     case llvm::Type::FloatTyID:
1436     case llvm::Type::HalfTyID:
1437         nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1438         break;
1439     case llvm::Type::IntegerTyID:
1440         bSize = llvm::cast<llvm::IntegerType>(type)->getBitWidth();
1441         nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1442         if (bSize == 1 && !m_CG->canEmitAsUniformBool(value))
1443         {
1444             nbElement = numLanes(m_SIMDSize);
1445         }
1446         break;
1447     case IGCLLVM::VectorTyID:
1448     {
1449         uint nElem = GetNbVectorElementAndMask(value, mask);
1450         nbElement = GetIsUniform(value) ? nElem : (nElem * numLanes(m_SIMDSize));
1451     }
1452     break;
1453     case llvm::Type::PointerTyID:
1454         // Assumes 32-bit pointers
1455         nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1456         break;
1457     case llvm::Type::DoubleTyID:
1458         nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1459         break;
1460     default:
1461         IGC_ASSERT(0);
1462         break;
1463     }
1464     return nbElement;
1465 }
1466 
GetUndef(VISA_Type type)1467 CVariable* CShader::GetUndef(VISA_Type type)
1468 {
1469     CVariable* var = nullptr;
1470     if (type == ISA_TYPE_BOOL)
1471     {
1472         var = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_BYTE, "undef");
1473     }
1474     else
1475     {
1476         var = new (Allocator) CVariable(type);
1477     }
1478     return var;
1479 }
1480 
1481 // TODO: Obviously, lots of works are needed to support constant expression
1482 // better.
GetConstantExpr(ConstantExpr * CE)1483 uint64_t CShader::GetConstantExpr(ConstantExpr* CE) {
1484     IGC_ASSERT(nullptr != CE);
1485     switch (CE->getOpcode()) {
1486     default:
1487         break;
1488     case Instruction::IntToPtr: {
1489         Constant* C = CE->getOperand(0);
1490         if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C))
1491             return GetImmediateVal(C);
1492         if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C))
1493             return GetConstantExpr(CE1);
1494         break;
1495     }
1496     case Instruction::PtrToInt: {
1497         Constant* C = CE->getOperand(0);
1498         if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C))
1499             return GetConstantExpr(CE1);
1500         if (GlobalVariable * GV = dyn_cast<GlobalVariable>(C))
1501             return GetGlobalMappingValue(GV);
1502         break;
1503     }
1504     case Instruction::Trunc: {
1505         Constant* C = CE->getOperand(0);
1506         if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C)) {
1507             if (IntegerType * ITy = dyn_cast<IntegerType>(CE1->getType())) {
1508                 return GetConstantExpr(CE1) & ITy->getBitMask();
1509             }
1510         }
1511         break;
1512     }
1513     case Instruction::LShr: {
1514         Constant* C = CE->getOperand(0);
1515         if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C)) {
1516             if (dyn_cast<IntegerType>(CE1->getType())) {
1517                 uint64_t ShAmt = GetImmediateVal(CE->getOperand(1));
1518                 return GetConstantExpr(CE1) >> ShAmt;
1519             }
1520         }
1521         break;
1522     }
1523     }
1524 
1525     IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported constant expression!");
1526     return 0;
1527 }
1528 
GetGlobalMappingValue(llvm::Value * c)1529 unsigned int CShader::GetGlobalMappingValue(llvm::Value* c)
1530 {
1531     IGC_ASSERT_MESSAGE(0, "The global variables are not handled");
1532 
1533     return 0;
1534 }
1535 
GetGlobalMapping(llvm::Value * c)1536 CVariable* CShader::GetGlobalMapping(llvm::Value* c)
1537 {
1538     IGC_ASSERT_MESSAGE(0, "The global variables are not handled");
1539 
1540     VISA_Type type = GetType(c->getType());
1541     return ImmToVariable(0, type);
1542 }
1543 
GetScalarConstant(llvm::Value * const c)1544 CVariable* CShader::GetScalarConstant(llvm::Value* const c)
1545 {
1546     IGC_ASSERT(nullptr != c);
1547     const VISA_Type type = GetType(c->getType());
1548 
1549     // Constants
1550     if (isa<ConstantInt>(c) || isa<ConstantFP>(c) || isa<ConstantPointerNull>(c))
1551     {
1552         return ImmToVariable(GetImmediateVal(c), type);
1553     }
1554 
1555     // Undefined values
1556     if (isa<UndefValue>(c))
1557     {
1558         return GetUndef(type);
1559     }
1560 
1561     // GlobalVariables
1562     if (isa<GlobalVariable>(c))
1563     {
1564         return GetGlobalMapping(c);
1565     }
1566 
1567     // Constant Expression
1568     if (ConstantExpr * CE = dyn_cast<ConstantExpr>(c))
1569         return ImmToVariable(GetConstantExpr(CE), type);
1570 
1571     IGC_ASSERT_MESSAGE(0, "Unhandled flavor of constant!");
1572     return 0;
1573 }
1574 
1575 // Return true if can be encoded as mini float and return the encoding in value
getByteFloatEncoding(ConstantFP * fp,uint8_t & value)1576 static bool getByteFloatEncoding(ConstantFP* fp, uint8_t& value)
1577 {
1578     value = 0;
1579     if (fp->getType()->isFloatTy())
1580     {
1581         if (fp->isZero())
1582         {
1583             value = fp->isNegative() ? 0x80 : 0;
1584             return true;
1585         }
1586         APInt api = fp->getValueAPF().bitcastToAPInt();
1587         FLOAT32 bitFloat;
1588         bitFloat.value.u = int_cast<unsigned int>(api.getZExtValue());
1589         // check that fraction doesn't have any bots set below bit 23 - 4
1590         // Byte float can only encode the higer 4 bits of the fraction
1591         if ((bitFloat.fraction & (~(0xF << (23 - 4)))) == 0 &&
1592             ((bitFloat.exponent > 124 && bitFloat.exponent <= 131) ||
1593             (bitFloat.exponent == 124 && bitFloat.fraction != 0)))
1594         {
1595             // convert to float 8bits format
1596             value |= bitFloat.sign << 7;
1597             value |= (bitFloat.fraction >> (23 - 4));
1598             value |= (bitFloat.exponent & 0x3) << 4;
1599             value |= (bitFloat.exponent & BIT(7)) >> 1;
1600             return true;
1601         }
1602     }
1603     return false;
1604 }
1605 
1606 // Return the most commonly used constant. Return null if all constant are different.
findCommonConstant(llvm::Constant * C,uint elts,uint currentEmitElts,bool & allSame)1607 llvm::Constant* CShader::findCommonConstant(llvm::Constant* C, uint elts, uint currentEmitElts, bool& allSame)
1608 {
1609     if (elts == 1)
1610     {
1611         return nullptr;
1612     }
1613 
1614     llvm::MapVector<llvm::Constant*, int> constMap;
1615     constMap.clear();
1616     Constant* constC = nullptr;
1617     bool cannotPackVF = !m_ctx->platform.hasPackedRestrictedFloatVector();
1618     for (uint32_t i = currentEmitElts; i < currentEmitElts + elts; i++)
1619     {
1620         constC = C->getAggregateElement(i);
1621         if (!constC)
1622         {
1623             return nullptr;
1624         }
1625         constMap[constC]++;
1626 
1627         // check if the constant can be packed in vf.
1628         if (!isa<UndefValue>(constC) && elts >= 4)
1629         {
1630             llvm::VectorType* VTy = llvm::dyn_cast<llvm::VectorType>(C->getType());
1631             uint8_t encoding = 0;
1632             if (VTy->getScalarType()->isFloatTy() &&
1633                 !getByteFloatEncoding(cast<ConstantFP>(constC), encoding))
1634             {
1635                 cannotPackVF = true;
1636             }
1637         }
1638     }
1639     int mostUsedCount = 1;
1640     Constant* mostUsedValue = nullptr;
1641     for (auto iter = constMap.begin(); iter != constMap.end(); iter++)
1642     {
1643         if (iter->second > mostUsedCount)
1644         {
1645             mostUsedValue = iter->first;
1646             mostUsedCount = iter->second;
1647         }
1648     }
1649 
1650     constMap.clear();
1651     allSame = (mostUsedCount == elts);
1652 
1653     if (allSame)
1654     {
1655         return mostUsedValue;
1656     }
1657     else if (mostUsedCount > 1 && cannotPackVF)
1658     {
1659         return mostUsedValue;
1660     }
1661     else
1662     {
1663         return nullptr;
1664     }
1665 }
1666 
1667 auto sizeToSIMDMode = [](uint32_t size)
__anon543366880302(uint32_t size) 1668 {
1669     switch (size)
1670     {
1671     case 1:
1672         return SIMDMode::SIMD1;
1673     case 2:
1674         return SIMDMode::SIMD2;
1675     case 4:
1676         return SIMDMode::SIMD4;
1677     case 8:
1678         return SIMDMode::SIMD8;
1679     case 16:
1680         return SIMDMode::SIMD16;
1681     default:
1682         IGC_ASSERT_MESSAGE(0, "unexpected simd size");
1683         return SIMDMode::SIMD1;
1684     }
1685 };
1686 
GetStructVariable(llvm::Value * v,bool forceVectorInit)1687 CVariable* CShader::GetStructVariable(llvm::Value* v, bool forceVectorInit)
1688 {
1689     IGC_ASSERT(v->getType()->isStructTy());
1690 
1691     auto isConstBase = [](Value* v)->bool
1692     {
1693         return isa<Constant>(v) || v->getValueID() == Value::UndefValueVal;
1694     };
1695 
1696     IGC_ASSERT_MESSAGE(isConstBase(v) ||
1697         isa<InsertValueInst>(v) ||
1698         isa<CallInst>(v) ||
1699         isa<Argument>(v),
1700         "Invalid struct symbol usage! Struct symbol should only come from const, insertvalue, call, or function arg");
1701 
1702     if (isa<InsertValueInst>(v))
1703     {
1704         // Walk up all the `insertvalue` instructions until we get to the constant base struct.
1705         // All `insertvalue` instructions that operate on the same struct should be mapped to the same CVar,
1706         // so just use the first instruction to do all the mapping.
1707         Value* baseV = v;
1708         InsertValueInst* FirstInsertValueInst = nullptr;
1709         while (InsertValueInst* II = dyn_cast<InsertValueInst>(baseV))
1710         {
1711             baseV = II->getOperand(0);
1712             FirstInsertValueInst = II;
1713         }
1714         if (FirstInsertValueInst)
1715         {
1716             // Check if it's already created
1717             auto it = symbolMapping.find(FirstInsertValueInst);
1718             if (it != symbolMapping.end())
1719             {
1720                 return it->second;
1721             }
1722             v = FirstInsertValueInst;
1723         }
1724     }
1725     else if (isa<CallInst>(v) || isa<Argument>(v))
1726     {
1727         // Check for function argument symbols, and return value from calls
1728         auto it = symbolMapping.find(v);
1729         if (it != symbolMapping.end())
1730         {
1731             return it->second;
1732         }
1733     }
1734     else
1735     {
1736         // Const cannot be mapped
1737         IGC_ASSERT(isConstBase(v) && symbolMapping.find(v) == symbolMapping.end());
1738     }
1739 
1740     bool isUniform = forceVectorInit ? false : m_WI->isUniform(v);
1741     StructType* sTy = cast<StructType>(v->getType());
1742     auto& DL = entry->getParent()->getDataLayout();
1743     const StructLayout* SL = DL.getStructLayout(sTy);
1744 
1745     // Represent the struct as a vector of BYTES
1746     unsigned structSizeInBytes = (unsigned)SL->getSizeInBytes();
1747     unsigned lanes = isUniform ? 1 : numLanes(m_dispatchSize);
1748     CVariable* cVar = GetNewVariable(structSizeInBytes * lanes, ISA_TYPE_B, EALIGN_GRF, isUniform, "StructV");
1749 
1750     // Initialize the struct default value if it has one
1751     if (Constant* C = dyn_cast<Constant>(v))
1752     {
1753         for (unsigned i = 0; i < sTy->getNumElements(); i++)
1754         {
1755             CVariable* elementSrc = GetSymbol(C->getAggregateElement(i));
1756             if (!elementSrc->IsUndef())
1757             {
1758                 unsigned elementOffset = (unsigned)SL->getElementOffset(i);
1759                 CVariable* elementDst = GetNewAlias(cVar, elementSrc->GetType(), elementOffset * lanes, elementSrc->GetNumberElement() * lanes);
1760                 GetEncoder().Copy(elementDst, elementSrc);
1761                 GetEncoder().Push();
1762             }
1763         }
1764     }
1765 
1766     // Map the original llvm value to this new CVar.
1767     // The original value cannot be const, since we cannot map them. They will need to be initialized each time.
1768     if (!isConstBase(v))
1769         symbolMapping[v] = cVar;
1770 
1771     return cVar;
1772 }
1773 
GetConstant(llvm::Constant * C,CVariable * dstVar)1774 CVariable* CShader::GetConstant(llvm::Constant* C, CVariable* dstVar)
1775 {
1776     IGCLLVM::FixedVectorType* VTy = llvm::dyn_cast<IGCLLVM::FixedVectorType>(C->getType());
1777     if (C && VTy)
1778     {   // Vector constant
1779         llvm::Type* eTy = VTy->getElementType();
1780         IGC_ASSERT_MESSAGE((VTy->getNumElements() < (UINT16_MAX)), "getNumElements more than 64k elements");
1781         uint16_t elts = (uint16_t)VTy->getNumElements();
1782 
1783         if (elts == 1)
1784         {
1785             llvm::Constant* const EC = C->getAggregateElement((uint)0);
1786             IGC_ASSERT_MESSAGE(nullptr != EC, "Vector Constant has no valid constant element!");
1787             return GetScalarConstant(EC);
1788         }
1789 
1790         // Emit a scalar move to load the element of index k.
1791         auto copyScalar = [=](int k, CVariable* Var)
1792         {
1793             Constant* const EC = C->getAggregateElement(k);
1794             IGC_ASSERT_MESSAGE(nullptr != EC, "Constant Vector: Invalid non-constant element!");
1795             if (isa<UndefValue>(EC))
1796                 return;
1797 
1798             CVariable* eVal = GetScalarConstant(EC);
1799             if (Var->IsUniform())
1800             {
1801                 GetEncoder().SetDstSubReg(k);
1802             }
1803             else
1804             {
1805                 auto input_size = GetScalarTypeSizeInRegister(eTy);
1806                 Var = GetNewAlias(Var, Var->GetType(), k * input_size * numLanes(m_SIMDSize), 0);
1807             }
1808             GetEncoder().Copy(Var, eVal);
1809             GetEncoder().Push();
1810         };
1811 
1812         // Emit a simd4 move to load 4 byte float.
1813         auto copyV4 = [=](int k, uint32_t vfimm, CVariable* Var)
1814         {
1815             CVariable* Imm = ImmToVariable(vfimm, ISA_TYPE_VF);
1816             GetEncoder().SetUniformSIMDSize(SIMDMode::SIMD4);
1817             GetEncoder().SetDstSubReg(k);
1818             GetEncoder().Copy(Var, Imm);
1819             GetEncoder().Push();
1820         };
1821 
1822 
1823         if (dstVar != nullptr && !(dstVar->IsUniform()))
1824         {
1825             for (uint i = 0; i < elts; i++)
1826             {
1827                 copyScalar(i, dstVar);
1828             }
1829             return dstVar;
1830         }
1831 
1832         CVariable* CVar = (dstVar == nullptr) ?
1833             GetNewVariable(elts, GetType(eTy), EALIGN_GRF, true, C->getName()) : dstVar;
1834         uint remainElts = elts;
1835         uint currentEltsOffset = 0;
1836         uint size = 8;
1837         while (remainElts != 0)
1838         {
1839             bool allSame = 0;
1840 
1841             while (size > remainElts && size != 1)
1842             {
1843                 size /= 2;
1844             }
1845 
1846             Constant* commonConstant = findCommonConstant(C, size, currentEltsOffset, allSame);
1847             // case 2: all constants the same
1848             if (commonConstant && allSame)
1849             {
1850                 GetEncoder().SetUniformSIMDSize(sizeToSIMDMode(size));
1851                 GetEncoder().SetDstSubReg(currentEltsOffset);
1852                 GetEncoder().Copy(CVar, GetScalarConstant(commonConstant));
1853                 GetEncoder().Push();
1854             }
1855 
1856             // case 3: some constants the same
1857             else if (commonConstant)
1858             {
1859                 GetEncoder().SetUniformSIMDSize(sizeToSIMDMode(size));
1860                 GetEncoder().SetDstSubReg(currentEltsOffset);
1861                 GetEncoder().Copy(CVar, GetScalarConstant(commonConstant));
1862                 GetEncoder().Push();
1863 
1864                 Constant* constC = nullptr;
1865                 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i++)
1866                 {
1867                     constC = C->getAggregateElement(i);
1868                     if (constC != commonConstant && !isa<UndefValue>(constC))
1869                     {
1870                         GetEncoder().SetDstSubReg(i);
1871                         GetEncoder().Copy(CVar, GetScalarConstant(constC));
1872                         GetEncoder().Push();
1873                     }
1874                 }
1875             }
1876             // case 4: VFPack
1877             else if (VTy->getScalarType()->isFloatTy() && size >= 4)
1878             {
1879                 unsigned Step = 4;
1880                 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i += Step)
1881                 {
1882                     // pack into vf if possible.
1883                     uint32_t vfimm = 0;
1884                     bool canUseVF = m_ctx->platform.hasPackedRestrictedFloatVector();
1885                     for (unsigned j = 0; j < Step && canUseVF; ++j)
1886                     {
1887                         Constant* EC = C->getAggregateElement(i + j);
1888                         // Treat undef as 0.0f.
1889                         if (isa<UndefValue>(EC))
1890                             continue;
1891                         uint8_t encoding = 0;
1892                         canUseVF = getByteFloatEncoding(cast<ConstantFP>(EC), encoding);
1893                         if (canUseVF)
1894                         {
1895                             uint32_t v = encoding;
1896                             v <<= j * 8;
1897                             vfimm |= v;
1898                         }
1899                         else
1900                         {
1901                             break;
1902                         }
1903                     }
1904 
1905                     if (canUseVF)
1906                     {
1907                         copyV4(i, vfimm, CVar);
1908                     }
1909                     else
1910                     {
1911                         for (unsigned j = i; j < i + Step; ++j)
1912                             copyScalar(j, CVar);
1913                     }
1914                 }
1915             }
1916             // case 5: single copy
1917             else
1918             {
1919                 // Element-wise copy or trailing elements copy if partially packed.
1920                 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i++)
1921                 {
1922                     copyScalar(i, CVar);
1923                 }
1924             }
1925             remainElts -= size;
1926             currentEltsOffset += size;
1927         }
1928         return CVar;
1929     }
1930 
1931     return GetScalarConstant(C);
1932 }
1933 
GetType(llvm::Type * type,CodeGenContext * pContext)1934 VISA_Type IGC::GetType(llvm::Type* type, CodeGenContext* pContext)
1935 {
1936     IGC_ASSERT(nullptr != pContext);
1937     IGC_ASSERT(nullptr != type);
1938 
1939     switch (type->getTypeID())
1940     {
1941     case llvm::Type::FloatTyID:
1942         return ISA_TYPE_F;
1943     case llvm::Type::IntegerTyID:
1944         switch (type->getIntegerBitWidth())
1945         {
1946         case 1:
1947             return ISA_TYPE_BOOL;
1948         case 8:
1949             return ISA_TYPE_B;
1950         case 16:
1951             return ISA_TYPE_W;
1952         case 32:
1953             return ISA_TYPE_D;
1954         case 64:
1955             return ISA_TYPE_Q;
1956         default:
1957             IGC_ASSERT_MESSAGE(0, "illegal type");
1958             break;
1959         }
1960         break;
1961     case IGCLLVM::VectorTyID:
1962         return GetType(type->getContainedType(0), pContext);
1963     case llvm::Type::PointerTyID:
1964     {
1965         unsigned int AS = type->getPointerAddressSpace();
1966         uint numBits = pContext->getRegisterPointerSizeInBits(AS);
1967         if (numBits == 32)
1968         {
1969             return ISA_TYPE_UD;
1970         }
1971         else
1972         {
1973             return ISA_TYPE_UQ;
1974         }
1975     }
1976     case llvm::Type::DoubleTyID:
1977         return ISA_TYPE_DF;
1978     case llvm::Type::HalfTyID:
1979         return ISA_TYPE_HF;
1980     case llvm::Type::StructTyID:
1981         // Structs are always internally represented as BYTES
1982         return ISA_TYPE_B;
1983     default:
1984         IGC_ASSERT(0);
1985         break;
1986     }
1987     IGC_ASSERT(0);
1988     return ISA_TYPE_F;
1989 }
1990 
GetType(llvm::Type * type)1991 VISA_Type CShader::GetType(llvm::Type* type)
1992 {
1993     return IGC::GetType(type, GetContext());
1994 }
1995 
GetNumElts(llvm::Type * type,bool isUniform)1996 uint32_t CShader::GetNumElts(llvm::Type* type, bool isUniform)
1997 {
1998     uint32_t numElts = isUniform ? 1 : numLanes(m_SIMDSize);
1999 
2000     if (type->isVectorTy())
2001     {
2002         IGC_ASSERT(type->getContainedType(0)->isIntegerTy() || type->getContainedType(0)->isFloatingPointTy());
2003 
2004         auto VT = cast<IGCLLVM::FixedVectorType>(type);
2005         numElts *= (uint16_t)VT->getNumElements();
2006     }
2007     else if (type->isStructTy())
2008     {
2009         auto& DL = entry->getParent()->getDataLayout();
2010         const StructLayout* SL = DL.getStructLayout(cast<StructType>(type));
2011         numElts *= (uint16_t)SL->getSizeInBytes();
2012     }
2013     return numElts;
2014 }
2015 
GetImmediateVal(llvm::Value * Const)2016 uint64_t IGC::GetImmediateVal(llvm::Value* Const)
2017 {
2018     // Constant integer
2019     if (llvm::ConstantInt * CInt = llvm::dyn_cast<llvm::ConstantInt>(Const))
2020     {
2021         return CInt->getZExtValue();
2022     }
2023 
2024     // Constant float/double
2025     if (llvm::ConstantFP * CFP = llvm::dyn_cast<llvm::ConstantFP>(Const))
2026     {
2027         APInt api = CFP->getValueAPF().bitcastToAPInt();
2028         return api.getZExtValue();
2029     }
2030 
2031     // Null pointer
2032     if (llvm::isa<ConstantPointerNull>(Const))
2033     {
2034         return 0;
2035     }
2036 
2037     IGC_ASSERT_MESSAGE(0, "Unhandled constant value!");
2038     return 0;
2039 }
2040 
2041 /// IsRawAtomicIntrinsic - Check wether it's RAW atomic, which is optimized
2042 /// potentially by scalarized atomic operation.
IsRawAtomicIntrinsic(llvm::Value * V)2043 static bool IsRawAtomicIntrinsic(llvm::Value* V) {
2044     GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(V);
2045     if (!GII)
2046         return false;
2047 
2048     switch (GII->getIntrinsicID()) {
2049     default:
2050         break;
2051     case GenISAIntrinsic::GenISA_intatomicraw:
2052     case GenISAIntrinsic::GenISA_floatatomicraw:
2053     case GenISAIntrinsic::GenISA_intatomicrawA64:
2054     case GenISAIntrinsic::GenISA_floatatomicrawA64:
2055     case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
2056     case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
2057     case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
2058     case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
2059         return true;
2060     }
2061 
2062     return false;
2063 }
2064 
2065 /// GetPreferredAlignmentOnUse - Return preferred alignment based on how the
2066 /// specified value is being used.
GetPreferredAlignmentOnUse(llvm::Value * V,WIAnalysis * WIA,CodeGenContext * pContext)2067 static e_alignment GetPreferredAlignmentOnUse(llvm::Value* V, WIAnalysis* WIA,
2068     CodeGenContext* pContext)
2069 {
2070     auto getAlign = [](Value* aV, WIAnalysis* aWIA, CodeGenContext* pCtx) -> e_alignment
2071     {
2072         // If uniform variables are once used by uniform loads, stores, or atomic
2073         // ops, they need being GRF aligned.
2074         for (auto UI = aV->user_begin(), UE = aV->user_end(); UI != UE; ++UI) {
2075             if (LoadInst* ST = dyn_cast<LoadInst>(*UI)) {
2076                 Value* Ptr = ST->getPointerOperand();
2077                 if (aWIA->isUniform(Ptr)) {
2078                     if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pCtx))
2079                         return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2080                     return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2081                 }
2082             }
2083             if (StoreInst* ST = dyn_cast<StoreInst>(*UI)) {
2084                 Value* Ptr = ST->getPointerOperand();
2085                 if (aWIA->isUniform(Ptr)) {
2086                     if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pCtx))
2087                         return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2088                     return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2089                 }
2090             }
2091 
2092             // Last, check Gen intrinsic.
2093             GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(*UI);
2094             if (!GII) {
2095                 continue;
2096             }
2097 
2098             if (IsRawAtomicIntrinsic(GII)) {
2099                 Value* Ptr = GII->getArgOperand(1);
2100                 if (aWIA->isUniform(Ptr)) {
2101                     if (PointerType* PtrTy = dyn_cast<PointerType>(Ptr->getType())) {
2102                         if (IGC::isA64Ptr(PtrTy, pCtx))
2103                             return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2104                     }
2105                     return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2106                 }
2107             }
2108         }
2109         return EALIGN_AUTO;
2110     };
2111 
2112     e_alignment algn = getAlign(V, WIA, pContext);
2113     if (algn != EALIGN_AUTO) {
2114         return algn;
2115     }
2116 
2117     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
2118     {
2119         // Check if this V is used as load/store's address via
2120         // inttoptr that is actually noop (aliased by dessa already).
2121         //    x = ...
2122         //    y = inttoptr x
2123         //    load/store y
2124         // To make sure not to increase register pressure, only do it if y
2125         // is the sole use of x!
2126         if (V->hasOneUse())
2127         {
2128             // todo: use deSSA->isNoopAliaser() to check if it has become an alias
2129             User* U = V->user_back();
2130             IntToPtrInst* IPtr = dyn_cast<IntToPtrInst>(U);
2131             if (IPtr && isNoOpInst(IPtr, pContext))
2132             {
2133                 algn = getAlign(IPtr, WIA, pContext);
2134                 if (algn != EALIGN_AUTO) {
2135                     return algn;
2136                 }
2137             }
2138         }
2139     }
2140 
2141     // Otherwise, naturally aligned is always assumed.
2142     return EALIGN_AUTO;
2143 }
2144 
2145 /// GetPreferredAlignment - Return preferred alignment based on how the
2146 /// specified value is being defined/used.
GetPreferredAlignment(llvm::Value * V,WIAnalysis * WIA,CodeGenContext * pContext)2147 e_alignment IGC::GetPreferredAlignment(llvm::Value* V, WIAnalysis* WIA,
2148     CodeGenContext* pContext)
2149 {
2150     // So far, non-uniform variables are always naturally aligned.
2151     if (!WIA->isUniform(V))
2152         return EALIGN_AUTO;
2153 
2154     // As the layout of argument is fixed, only naturally aligned could be
2155     // assumed.
2156     if (isa<Argument>(V))
2157         return CEncoder::GetCISADataTypeAlignment(GetType(V->getType(), pContext));
2158 
2159     // For values not being mapped to variables directly, always assume
2160     // natually aligned.
2161     if (!isa<Instruction>(V))
2162         return EALIGN_AUTO;
2163 
2164     // If uniform variables are results from uniform loads, they need being GRF
2165     // aligned.
2166     if (LoadInst * LD = dyn_cast<LoadInst>(V)) {
2167         Value* Ptr = LD->getPointerOperand();
2168         // For 64-bit load, we have to check how the loaded value being used.
2169         e_alignment Align = (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2170         if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pContext))
2171             Align = GetPreferredAlignmentOnUse(V, WIA, pContext);
2172         return (Align == EALIGN_AUTO) ? (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD : Align;
2173     }
2174 
2175     // If uniform variables are results from uniform atomic ops, they need
2176     // being GRF aligned.
2177     if (IsRawAtomicIntrinsic(V)) {
2178         GenIntrinsicInst* GII = cast<GenIntrinsicInst>(V);
2179         Value* Ptr = GII->getArgOperand(1);
2180         // For 64-bit atomic ops, we have to check how the return value being
2181         // used.
2182         e_alignment Align = (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2183         if (PointerType * PtrTy = dyn_cast<PointerType>(Ptr->getType())) {
2184             if (IGC::isA64Ptr(PtrTy, pContext))
2185                 Align = GetPreferredAlignmentOnUse(V, WIA, pContext);
2186         }
2187         return (Align == EALIGN_AUTO) ? (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD : Align;
2188     }
2189 
2190 
2191     // Check how that value is used.
2192     return GetPreferredAlignmentOnUse(V, WIA, pContext);
2193 }
2194 
LazyCreateCCTupleBackingVariable(CoalescingEngine::CCTuple * ccTuple,VISA_Type baseVisaType)2195 CVariable* CShader::LazyCreateCCTupleBackingVariable(
2196     CoalescingEngine::CCTuple* ccTuple,
2197     VISA_Type baseVisaType)
2198 {
2199     CVariable* var = NULL;
2200     auto it = ccTupleMapping.find(ccTuple);
2201     if (it != ccTupleMapping.end()) {
2202         var = ccTupleMapping[ccTuple];
2203     }
2204     else {
2205         auto mult = (m_SIMDSize == m_Platform->getMinDispatchMode()) ? 1 : 2;
2206         mult = CEncoder::GetCISADataTypeSize(baseVisaType) == 2 ? 1 : mult;
2207         unsigned int numRows = ccTuple->GetNumElements() * mult;
2208         const unsigned int denominator = CEncoder::GetCISADataTypeSize(ISA_TYPE_F);
2209         IGC_ASSERT(denominator);
2210         unsigned int numElts = numRows * getGRFSize() / denominator;
2211 
2212         //int size = numLanes(m_SIMDSize) * ccTuple->GetNumElements();
2213         if (ccTuple->HasNonHomogeneousElements())
2214         {
2215             numElts += m_coalescingEngine->GetLeftReservedOffset(ccTuple->GetRoot(), m_SIMDSize) / denominator;
2216             numElts += m_coalescingEngine->GetRightReservedOffset(ccTuple->GetRoot(), m_SIMDSize) / denominator;
2217         }
2218 
2219         IGC_ASSERT_MESSAGE((numElts < (UINT16_MAX)), "tuple byte size higher than 64k");
2220 
2221         // create one
2222         var = GetNewVariable(
2223             (uint16_t)numElts,
2224             ISA_TYPE_F,
2225             (GetContext()->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD,
2226             false,
2227             m_numberInstance,
2228             "CCTuple");
2229         ccTupleMapping.insert(std::pair<CoalescingEngine::CCTuple*, CVariable*>(ccTuple, var));
2230     }
2231 
2232     return var;
2233 }
2234 
2235 /// F should be a non-kernel function.
2236 ///
2237 /// For a subroutine call, symbols (CVariables) are created as follows:
2238 ///
2239 /// (1) If subroutine returns non-void value, then a unified return CVarable
2240 /// is created to communicate between callee and caller. Function
2241 /// 'getOrCreateReturnSymbol' creates such a unique symbol (CVariable)
2242 /// on-demand. This return symbol is cached inside 'globalSymbolMapping'
2243 /// object and it is *NOT* part of local symbol table 'symbolMapping'.
2244 /// Currently return symbols are non-uniform.
2245 ///
2246 /// (2) Subroutine formal arguments are also created on-demand, which may be
2247 /// created from their first call sites or ahead of any call site. Symbols for
2248 /// subroutine formal arguments are also stored inside 'globalSymbolMapping'
2249 /// during entire module codegen. During each subroutine vISA emission,
2250 /// value-to-symbol mapping are also copied into 'symbolMapping' to allow
2251 /// EmitVISAPass to emit code in a uniform way.
2252 ///
2253 /// In some sense, all formal arguments are pre-allocated. Those symbols must be
2254 /// non-alias cvariable (ie root cvariable) as required by visa.
2255 ///
2256 /// Currently, all explicit arguments are non-uniform and most implicit
2257 /// arguments are uniform. Some implicit arguments may share the same symbol
2258 /// with their caller's implicit argument of the same kind. This is a subroutine
2259 /// optimization implemented in 'getOrCreateArgumentSymbol'.
2260 ///
BeginFunction(llvm::Function * F)2261 void CShader::BeginFunction(llvm::Function* F)
2262 {
2263     // TODO: merge InitEncoder with this function.
2264 
2265     symbolMapping.clear();
2266     ccTupleMapping.clear();
2267     ConstantPool.clear();
2268 
2269     bool useStackCall = m_FGA && m_FGA->useStackCall(F);
2270     if (useStackCall)
2271     {
2272         globalSymbolMapping.clear();
2273         encoder.BeginStackFunction(F);
2274         // create pre-defined r0
2275         m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
2276         encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);
2277     }
2278     else
2279     {
2280         encoder.BeginSubroutine(F);
2281     }
2282     // Set already created symbols for formal arguments.
2283     for (auto& Arg : F->args())
2284     {
2285         if (!Arg.use_empty())
2286         {
2287             // the treatment of argument is more complex for subroutine and simpler for stack-call function
2288             CVariable* Var = getOrCreateArgumentSymbol(&Arg, false, useStackCall);
2289             symbolMapping[&Arg] = Var;
2290 
2291             if (Value * Node = m_deSSA->getRootValue(&Arg))
2292             {
2293                 if (Node != (Value*)& Arg &&
2294                     symbolMapping.count(Node) == 0)
2295                 {
2296                     CVariable* aV = Var;
2297                     if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2298                     {
2299                         aV = createAliasIfNeeded(Node, Var);
2300                     }
2301                     symbolMapping[Node] = aV;
2302                 }
2303             }
2304         }
2305     }
2306 
2307     CreateAliasVars();
2308     PreCompileFunction(*F);
2309 }
2310 
2311 // This method split payload interpolations from the shader into another compilation unit
SplitPayloadFromShader(llvm::Function * F)2312 void CShader::SplitPayloadFromShader(llvm::Function* F)
2313 {
2314     encoder.BeginPayloadSection();
2315 }
2316 
2317 /// This method is used to create the vISA variable for function F's formal return value
getOrCreateReturnSymbol(llvm::Function * F)2318 CVariable* CShader::getOrCreateReturnSymbol(llvm::Function* F)
2319 {
2320     IGC_ASSERT_MESSAGE(nullptr != F, "null function");
2321     auto it = globalSymbolMapping.find(F);
2322     if (it != globalSymbolMapping.end())
2323     {
2324         return it->second;
2325     }
2326 
2327     auto retType = F->getReturnType();
2328     IGC_ASSERT(nullptr != retType);
2329     if (F->isDeclaration() || retType->isVoidTy())
2330         return nullptr;
2331 
2332     IGC_ASSERT(retType->isSingleValueType());
2333     VISA_Type type = GetType(retType);
2334     uint16_t nElts = (uint16_t)GetNumElts(retType, false);
2335     e_alignment align = getGRFAlignment();
2336     CVariable* var = GetNewVariable(
2337         nElts, type, align, false, m_numberInstance,
2338         CName(F->getName(), "_RETVAL"));
2339     globalSymbolMapping.insert(std::make_pair(F, var));
2340     return var;
2341 }
2342 
2343 /// This method is used to create the vISA variable for function F's formal argument
getOrCreateArgumentSymbol(Argument * Arg,bool ArgInCallee,bool useStackCall)2344 CVariable* CShader::getOrCreateArgumentSymbol(
2345     Argument* Arg,
2346     bool ArgInCallee,
2347     bool useStackCall)
2348 {
2349     llvm::DenseMap<llvm::Value*, CVariable*>* pSymMap = &globalSymbolMapping;
2350     IGC_ASSERT(nullptr != pSymMap);
2351     auto it = pSymMap->find(Arg);
2352     if (it != pSymMap->end())
2353     {
2354         return it->second;
2355     }
2356 
2357     CVariable* var = nullptr;
2358 
2359     // Stack call does not use implicit args
2360     if (!useStackCall)
2361     {
2362         // An explicit argument is not uniform, and for an implicit argument, it
2363         // is predefined. Note that it is not necessarily uniform.
2364         Function* F = Arg->getParent();
2365         ImplicitArgs implicitArgs(*F, m_pMdUtils);
2366         unsigned numImplicitArgs = implicitArgs.size();
2367         unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
2368         unsigned numPushArgs = (isEntryFunc(m_pMdUtils, F) && !isNonEntryMultirateShader(F) ? numPushArgsEntry : 0);
2369         IGC_ASSERT_MESSAGE(F->arg_size() >= (numImplicitArgs + numPushArgs), "Function arg size does not match meta data and push args.");
2370         unsigned numFuncArgs = F->arg_size() - numImplicitArgs - numPushArgs;
2371 
2372         llvm::Function::arg_iterator arg = F->arg_begin();
2373         std::advance(arg, numFuncArgs);
2374         for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg)
2375         {
2376             Argument* argVal = &(*arg);
2377             if (argVal == Arg)
2378             {
2379                 ImplicitArg implictArg = implicitArgs[i];
2380                 auto ArgType = implictArg.getArgType();
2381 
2382                 // Just reuse the kernel arguments for the following.
2383                 // Note that for read only general arguments, we may do similar
2384                 // optimization, with some advanced analysis.
2385                 if (ArgType == ImplicitArg::ArgType::R0 ||
2386                     ArgType == ImplicitArg::ArgType::PAYLOAD_HEADER ||
2387                     ArgType == ImplicitArg::ArgType::WORK_DIM ||
2388                     ArgType == ImplicitArg::ArgType::NUM_GROUPS ||
2389                     ArgType == ImplicitArg::ArgType::GLOBAL_SIZE ||
2390                     ArgType == ImplicitArg::ArgType::LOCAL_SIZE ||
2391                     ArgType == ImplicitArg::ArgType::ENQUEUED_LOCAL_WORK_SIZE ||
2392                     ArgType == ImplicitArg::ArgType::CONSTANT_BASE ||
2393                     ArgType == ImplicitArg::ArgType::GLOBAL_BASE ||
2394                     ArgType == ImplicitArg::ArgType::PRIVATE_BASE ||
2395                     ArgType == ImplicitArg::ArgType::PRINTF_BUFFER)
2396                 {
2397                     Function& K = *m_FGA->getSubGroupMap(F);
2398                     ImplicitArgs IAs(K, m_pMdUtils);
2399                     uint32_t nIAs = (uint32_t)IAs.size();
2400                     uint32_t iArgIx = IAs.getArgIndex(ArgType);
2401                     uint32_t argIx = (uint32_t)K.arg_size() - nIAs + iArgIx;
2402                     if (isEntryFunc(m_pMdUtils, &K) && !isNonEntryMultirateShader(&K)) {
2403                         argIx = argIx - numPushArgsEntry;
2404                     }
2405                     Function::arg_iterator arg = K.arg_begin();
2406                     for (uint32_t j = 0; j < argIx; ++j, ++arg);
2407                     Argument* kerArg = &(*arg);
2408 
2409                     // Pre-condition: all kernel arguments have been created already.
2410                     IGC_ASSERT(pSymMap->count(kerArg));
2411                     return (*pSymMap)[kerArg];
2412                 }
2413                 else
2414                 {
2415                     bool isUniform = WIAnalysis::isDepUniform(implictArg.getDependency());
2416                     uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
2417 
2418 
2419                     var = GetNewVariable(nbElements,
2420                         implictArg.getVISAType(*m_DL),
2421                         implictArg.getAlignType(*m_DL), isUniform,
2422                         isUniform ? 1 : m_numberInstance,
2423                         argVal->getName());
2424                 }
2425                 break;
2426             }
2427         }
2428     }
2429 
2430     // This is not implicit.
2431     if (var == nullptr)
2432     {
2433         // GetPreferredAlignment treats all arguments as kernel ones, which have
2434         // predefined alignments; but this is not true for subroutines.
2435         // Conservatively use GRF aligned.
2436         e_alignment align = getGRFAlignment();
2437 
2438         bool isUniform = false;
2439         if (!ArgInCallee) {
2440             // Arg is for the current function and m_WI is available
2441             isUniform = m_WI->isUniform(&*Arg);
2442         }
2443 
2444         VISA_Type type = GetType(Arg->getType());
2445         uint16_t nElts = (uint16_t)GetNumElts(Arg->getType(), isUniform);
2446         var = GetNewVariable(nElts, type, align, isUniform, m_numberInstance, Arg->getName());
2447     }
2448     pSymMap->insert(std::make_pair(Arg, var));
2449     return var;
2450 }
2451 
UpdateSymbolMap(llvm::Value * v,CVariable * CVar)2452 void CShader::UpdateSymbolMap(llvm::Value* v, CVariable* CVar)
2453 {
2454     symbolMapping[v] = CVar;
2455 }
2456 
2457 // Reuse a varable in the following case
2458 // %x = op1...
2459 // %y = op2 (%x, ...)
2460 // with some constraints:
2461 // - %x and %y belong to the same block
2462 // - %x and %y do not live out of this block
2463 // - %x does not interfere with %y
2464 // - %x is not phi
2465 // - %y has no phi use
2466 // - %x and %y have the same uniformity, and the same size
2467 // - %x is not an alias
2468 // - alignment is OK
2469 //
reuseSourceVar(Instruction * UseInst,Instruction * DefInst,e_alignment preferredAlign)2470 CVariable* CShader::reuseSourceVar(Instruction* UseInst, Instruction* DefInst,
2471     e_alignment preferredAlign)
2472 {
2473     // Only when DefInst has been assigned a CVar.
2474     IGC_ASSERT(nullptr != DefInst);
2475     IGC_ASSERT(nullptr != UseInst);
2476     auto It = symbolMapping.find(DefInst);
2477     if (It == symbolMapping.end())
2478         return nullptr;
2479 
2480     // If the def is an alias/immediate, then do not reuse.
2481     // TODO: allow alias.
2482     CVariable* DefVar = It->second;
2483     if (DefVar->GetAlias() || DefVar->IsImmediate())
2484         return nullptr;
2485 
2486     // LLVM IR level checks and RPE based heuristics.
2487     if (!m_VRA->checkDefInst(DefInst, UseInst, m_deSSA->getLiveVars()))
2488         return nullptr;
2489 
2490     // Do not reuse when variable size exceeds the threshold.
2491     //
2492     // TODO: If vISA global RA can better deal with fragmentation, this will
2493     // become unnecessary.
2494     //
2495     // TODO: Remove this check if register pressure is low, or very high.
2496     //
2497     unsigned Threshold = IGC_GET_FLAG_VALUE(VariableReuseByteSize);
2498     if (DefVar->GetSize() > Threshold)
2499         return nullptr;
2500 
2501     // Only reuse when they have the same uniformness.
2502     if (GetIsUniform(UseInst) != GetIsUniform(DefInst))
2503         return nullptr;
2504 
2505     // Check alignments. If UseInst has a stricter alignment then do not reuse.
2506     e_alignment DefAlign = DefVar->GetAlign();
2507     e_alignment UseAlign = preferredAlign;
2508     if (DefAlign == EALIGN_AUTO)
2509     {
2510         VISA_Type Ty = GetType(DefInst->getType());
2511         DefAlign = CEncoder::GetCISADataTypeAlignment(Ty);
2512     }
2513     if (UseAlign == EALIGN_AUTO)
2514     {
2515         VISA_Type Ty = GetType(UseInst->getType());
2516         UseAlign = CEncoder::GetCISADataTypeAlignment(Ty);
2517     }
2518     if (UseAlign > DefAlign)
2519         return nullptr;
2520 
2521     // Reuse this source when types match.
2522     if (DefInst->getType() == UseInst->getType())
2523     {
2524         return DefVar;
2525     }
2526 
2527     // Check cast instructions and create an alias if necessary.
2528     if (CastInst * CI = dyn_cast<CastInst>(UseInst))
2529     {
2530         VISA_Type UseTy = GetType(UseInst->getType());
2531         if (UseTy == DefVar->GetType())
2532         {
2533             return DefVar;
2534         }
2535 
2536         if (encoder.GetCISADataTypeSize(UseTy) != encoder.GetCISADataTypeSize(DefVar->GetType()))
2537         {
2538             // trunc/zext is needed, reuse not possible
2539             // this extra check is needed because in code gen we implicitly convert all private pointers
2540             // to 32-bit when LLVM assumes it's 64-bit based on DL
2541             return nullptr;
2542         }
2543 
2544         // TODO: allow %y = trunc i32 %x to i8
2545         IGC_ASSERT(CI->isNoopCast(*m_DL));
2546         return GetNewAlias(DefVar, UseTy, 0, 0);
2547     }
2548 
2549     // No reuse yet.
2550     return nullptr;;
2551 }
2552 
GetSymbolFromSource(Instruction * UseInst,e_alignment preferredAlign)2553 CVariable* CShader::GetSymbolFromSource(Instruction* UseInst,
2554     e_alignment preferredAlign)
2555 {
2556     if (UseInst->isBinaryOp() || isa<SelectInst>(UseInst))
2557     {
2558         if (!m_VRA->checkUseInst(UseInst, m_deSSA->getLiveVars()))
2559             return nullptr;
2560 
2561         for (unsigned i = 0; i < UseInst->getNumOperands(); ++i)
2562         {
2563             Value* Opnd = UseInst->getOperand(i);
2564             auto DefInst = dyn_cast<Instruction>(Opnd);
2565             // Only for non-uniform binary instructions.
2566             if (!DefInst || GetIsUniform(DefInst))
2567                 continue;
2568 
2569             if (IsCoalesced(DefInst))
2570             {
2571                 continue;
2572             }
2573 
2574             CVariable* Var = reuseSourceVar(UseInst, DefInst, preferredAlign);
2575             if (Var)
2576                 return Var;
2577         }
2578         return nullptr;
2579     }
2580     else if (auto CI = dyn_cast<CastInst>(UseInst))
2581     {
2582         if (!m_VRA->checkUseInst(UseInst, m_deSSA->getLiveVars()))
2583             return nullptr;
2584 
2585         Value* Opnd = UseInst->getOperand(0);
2586         auto DefInst = dyn_cast<Instruction>(Opnd);
2587         if (!DefInst)
2588             return nullptr;
2589 
2590         if (!IsCoalesced(DefInst))
2591         {
2592             return nullptr;
2593         }
2594 
2595         // TODO: allow %y = trunc i32 %x to i16
2596         if (!CI->isNoopCast(*m_DL))
2597             return nullptr;
2598 
2599         // WA: vISA does not optimize the following reuse well yet.
2600         // %398 = bitcast i16 %vCastload to <2 x i8>
2601         // produces
2602         // mov (16) r7.0<1>:w r18.0<2;1,0>:w
2603         // mov (16) r7.0<1>:b r7.0<2;1,0>:b
2604         // mov (16) r20.0<1>:f r7.0<8;8,1>:ub
2605         // not
2606         // mov (16) r7.0<1>:w r18.0<2;1,0>:w
2607         // mov (16) r20.0<1>:f r7.0<2;1,0>:ub
2608         //
2609         if (CI->getOpcode() == Instruction::BitCast)
2610         {
2611             if (GetScalarTypeSizeInRegisterInBits(CI->getSrcTy()) !=
2612                 GetScalarTypeSizeInRegisterInBits(CI->getDestTy()))
2613                 return nullptr;
2614         }
2615 
2616         return reuseSourceVar(UseInst, DefInst, preferredAlign);
2617     }
2618 
2619     // TODO, allow insert element/value, gep, intrinsic calls etc..
2620     //
2621     // No source for reuse.
2622     return nullptr;
2623 }
2624 
EvaluateSIMDConstExpr(Value * C)2625 unsigned int CShader::EvaluateSIMDConstExpr(Value* C)
2626 {
2627     if (BinaryOperator * op = dyn_cast<BinaryOperator>(C))
2628     {
2629         switch (op->getOpcode())
2630         {
2631         case Instruction::Add:
2632             return EvaluateSIMDConstExpr(op->getOperand(0)) + EvaluateSIMDConstExpr(op->getOperand(1));
2633         case Instruction::Mul:
2634             return EvaluateSIMDConstExpr(op->getOperand(0)) * EvaluateSIMDConstExpr(op->getOperand(1));
2635         case Instruction::Shl:
2636             return EvaluateSIMDConstExpr(op->getOperand(0)) << EvaluateSIMDConstExpr(op->getOperand(1));
2637         default:
2638             break;
2639         }
2640     }
2641     if (llvm::GenIntrinsicInst * genInst = dyn_cast<GenIntrinsicInst>(C))
2642     {
2643         if (genInst->getIntrinsicID() == GenISAIntrinsic::GenISA_simdSize)
2644         {
2645             return numLanes(m_dispatchSize);
2646 
2647         }
2648     }
2649     if (ConstantInt * constValue = dyn_cast<ConstantInt>(C))
2650     {
2651         return (unsigned int)constValue->getZExtValue();
2652     }
2653     IGC_ASSERT_MESSAGE(0, "unknow SIMD constant expression");
2654     return 0;
2655 }
2656 
GetSymbol(llvm::Value * value,bool fromConstantPool)2657 CVariable* CShader::GetSymbol(llvm::Value* value, bool fromConstantPool)
2658 {
2659     CVariable* var = nullptr;
2660 
2661     // Symbol mappings for struct types
2662     if (value->getType()->isStructTy())
2663     {
2664         return GetStructVariable(value);
2665     }
2666 
2667     if (Constant * C = llvm::dyn_cast<llvm::Constant>(value))
2668     {
2669         // Check for function and global symbols
2670         {
2671             // Function Pointer
2672             auto isFunctionType = [this](Value* value)->bool
2673             {
2674                 return isa<GlobalValue>(value) &&
2675                     value->getType()->isPointerTy() &&
2676                     value->getType()->getPointerElementType()->isFunctionTy();
2677             };
2678             // Global Variable/Constant
2679             auto isGlobalVarType = [this](Value* value)->bool
2680             {
2681                 return isa<GlobalVariable>(value) &&
2682                     m_ModuleMetadata->inlineProgramScopeOffsets.count(cast<GlobalVariable>(value)) > 0;
2683             };
2684 
2685             bool isVecType = value->getType()->isVectorTy();
2686             bool isFunction = false;
2687             bool isGlobalVar = false;
2688 
2689             if (isVecType)
2690             {
2691                 Value* element = C->getAggregateElement((unsigned)0);
2692                 if (isFunctionType(element))
2693                     isFunction = true;
2694                 else if (isGlobalVarType(element))
2695                     isGlobalVar = true;
2696             }
2697             else if (isFunctionType(value))
2698             {
2699                 isFunction = true;
2700             }
2701             else if (isGlobalVarType(value))
2702             {
2703                 isGlobalVar = true;
2704             }
2705 
2706             if (isFunction || isGlobalVar)
2707             {
2708                 auto it = symbolMapping.find(value);
2709                 if (it != symbolMapping.end())
2710                 {
2711                     return it->second;
2712                 }
2713                 const auto &valName = value->getName();
2714                 if (isVecType)
2715                 {
2716                     // Map the entire vector value to the CVar
2717                     unsigned numElements = (unsigned)cast<IGCLLVM::FixedVectorType>(value->getType())->getNumElements();
2718                     var = GetNewVariable(numElements, ISA_TYPE_UQ,
2719                         (GetContext()->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD,
2720                         WIBaseClass::UNIFORM_GLOBAL, 1, valName);
2721                     symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2722 
2723                     // Copy over each element
2724                     for (unsigned i = 0; i < numElements; i++)
2725                     {
2726                         Value* element = C->getAggregateElement(i);
2727                         CVariable* elementV = GetSymbol(element);
2728                         CVariable* offsetV = GetNewAlias(var, ISA_TYPE_UQ, i * var->GetElemSize(), 1);
2729                         encoder.Copy(offsetV, elementV);
2730                         encoder.Push();
2731                     }
2732                     return var;
2733                 }
2734                 else
2735                 {
2736                     var = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, WIBaseClass::UNIFORM_GLOBAL, 1, valName);
2737                     symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2738                     return var;
2739                 }
2740             }
2741         }
2742 
2743         if (fromConstantPool) {
2744             CVariable* cvar = ConstantPool.lookup(C);
2745             if (cvar)
2746                 return cvar;
2747             // Generate constant initialization.
2748             SEncoderState S = encoder.CopyEncoderState();
2749             encoder.Push();
2750             cvar = GetConstant(C);
2751             if (!C->getType()->isVectorTy()) {
2752                 CVariable* dst = GetNewVector(C);
2753                 encoder.Copy(dst, cvar);
2754                 encoder.Push();
2755                 cvar = dst;
2756             }
2757             encoder.SetEncoderState(S);
2758             addConstantInPool(C, cvar);
2759             return cvar;
2760         }
2761         var = GetConstant(C);
2762         return var;
2763     }
2764 
2765     else if (Instruction * inst = dyn_cast<Instruction>(value))
2766     {
2767         if (m_CG->SIMDConstExpr(inst))
2768         {
2769             return ImmToVariable(EvaluateSIMDConstExpr(inst), ISA_TYPE_D);
2770         }
2771     }
2772 
2773     auto it = symbolMapping.find(value);
2774 
2775     // mapping exists, return
2776     if (it != symbolMapping.end())
2777     {
2778         return it->second;
2779     }
2780 
2781     if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias) &&
2782         m_deSSA && value != m_deSSA->getNodeValue(value))
2783     {
2784         // Generate CVariable alias.
2785         // Value and its aliasee must be of the same size.
2786         Value* nodeVal = m_deSSA->getNodeValue(value);
2787         IGC_ASSERT_MESSAGE(nodeVal != value, "ICE: value must be aliaser!");
2788 
2789         // For non node value, get symbol for node value first.
2790         // Then, get an alias to that node value.
2791         CVariable* Base = GetSymbol(nodeVal);
2792         CVariable* AliasVar = createAliasIfNeeded(value, Base);
2793         symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, AliasVar));
2794         return AliasVar;
2795     }
2796 
2797     if (!isa<InsertElementInst>(value) && value->hasOneUse()) {
2798         auto IEI = dyn_cast<InsertElementInst>(value->user_back());
2799         if (IEI && CanTreatScalarSourceAsAlias(IEI)) {
2800             CVariable* Var = GetSymbol(IEI);
2801             llvm::ConstantInt* Idx = llvm::cast<llvm::ConstantInt>(IEI->getOperand(2));
2802             unsigned short NumElts = 1;
2803             unsigned EltSz = CEncoder::GetCISADataTypeSize(GetType(IEI->getType()->getScalarType()));
2804             unsigned Offset = unsigned(Idx->getZExtValue() * EltSz);
2805             if (!Var->IsUniform()) {
2806                 NumElts = numLanes(m_SIMDSize);
2807                 Offset *= Var->getOffsetMultiplier() * numLanes(m_SIMDSize);
2808             }
2809             CVariable* Alias = GetNewAlias(Var, Var->GetType(), (uint16_t)Offset, NumElts);
2810             // FIXME: It makes no sense to map it as this `value` is
2811             // single-used implied from CanTreatScalarSourceAsAlias().
2812             symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, Alias));
2813             return Alias;
2814         }
2815     }
2816 
2817     if (llvm::ExtractElementInst * EEI = llvm::dyn_cast<ExtractElementInst>(value))
2818     {
2819         if (CanTreatAsAlias(EEI))
2820         {
2821             llvm::ConstantInt* const pConstElem = llvm::dyn_cast<llvm::ConstantInt>(EEI->getIndexOperand());
2822             IGC_ASSERT(nullptr != pConstElem);
2823             Value* vecOperand = EEI->getVectorOperand();
2824             // need to call GetSymbol() before AdjustExtractIndex(), since
2825             // GetSymbol may update mask of the vector operand.
2826             CVariable* vec = GetSymbol(vecOperand);
2827 
2828             uint element = AdjustExtractIndex(vecOperand, (uint16_t)pConstElem->getZExtValue());
2829             IGC_ASSERT_MESSAGE((element < (UINT16_MAX)), "ExtractElementInst element index > higher than 64k");
2830 
2831             // see if distinct CVariables were created during vector bitcast copy
2832             if (auto vectorBCI = dyn_cast<BitCastInst>(vecOperand))
2833             {
2834                 CVariable* EEIVar = getCVarForVectorBCI(vectorBCI, element);
2835                 if (EEIVar)
2836                 {
2837                     return EEIVar;
2838                 }
2839             }
2840 
2841             uint offset = 0;
2842             unsigned EltSz = CEncoder::GetCISADataTypeSize(GetType(EEI->getType()));
2843             if (GetIsUniform(EEI->getOperand(0)))
2844             {
2845                 offset = int_cast<unsigned int>(element * EltSz);
2846             }
2847             else
2848             {
2849                 offset = int_cast<unsigned int>(vec->getOffsetMultiplier() * element * numLanes(m_SIMDSize) * EltSz);
2850             }
2851             IGC_ASSERT_MESSAGE((offset < (UINT16_MAX)), "computed alias offset higher than 64k");
2852 
2853             // You'd expect the number of elements of the extracted variable to be
2854             // vec->GetNumberElement() / vecOperand->getType()->getVectorNumElements().
2855             // However, vec->GetNumberElement() is not always what you'd expect it to be because of
2856             // the pruning code in GetNbVectorElement().
2857             // So, recompute the number of elements from scratch.
2858             uint16_t numElements = 1;
2859             if (!vec->IsUniform())
2860             {
2861                 numElements = numLanes(m_SIMDSize);
2862             }
2863             var = GetNewAlias(vec, vec->GetType(), (uint16_t)offset, numElements);
2864             symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2865             return var;
2866         }
2867     }
2868 
2869     if (GenIntrinsicInst * genInst = dyn_cast<GenIntrinsicInst>(value))
2870     {
2871         if (VMECoalescePattern(genInst))
2872         {
2873             auto* Sym = GetSymbol(genInst->getOperand(0));
2874             auto* Alias = GetNewAlias(Sym, Sym->GetType(), 0, Sym->GetNumberElement());
2875             symbolMapping.insert(std::pair<Value*, CVariable*>(value, Alias));
2876             return Alias;
2877         }
2878         if (genInst->getIntrinsicID() == GenISAIntrinsic::GenISA_UpdateDiscardMask)
2879         {
2880             IGC_ASSERT(GetShaderType() == ShaderType::PIXEL_SHADER);
2881             return (static_cast<CPixelShader*>(this))->GetDiscardPixelMask();
2882         }
2883     }
2884 
2885     if (m_coalescingEngine) {
2886         CoalescingEngine::CCTuple* ccTuple = m_coalescingEngine->GetValueCCTupleMapping(value);
2887         if (ccTuple) {
2888             VISA_Type type = GetType(value->getType());
2889             CVariable* var = LazyCreateCCTupleBackingVariable(ccTuple, type);
2890 
2891             int mult = 1;
2892             if (CEncoder::GetCISADataTypeSize(type) == 2 && m_SIMDSize == SIMDMode::SIMD8)
2893             {
2894                 mult = 2;
2895             }
2896 
2897             //FIXME: Could improve by copying types from value
2898 
2899             unsigned EltSz = CEncoder::GetCISADataTypeSize(type);
2900             int offset = int_cast<int>(mult * (m_coalescingEngine->GetValueOffsetInCCTuple(value) - ccTuple->GetLeftBound()) *
2901                 numLanes(m_SIMDSize) * EltSz);
2902 
2903             if (ccTuple->HasNonHomogeneousElements())
2904             {
2905                 offset += m_coalescingEngine->GetLeftReservedOffset(ccTuple->GetRoot(), m_SIMDSize);
2906             }
2907 
2908             TODO("NumElements in this alias is 0 to preserve previous behavior. I have no idea what it should be.");
2909             IGC_ASSERT_MESSAGE((offset < (UINT16_MAX)), "alias offset > higher than 64k");
2910             CVariable* newVar = GetNewAlias(var, type, (uint16_t)offset, 0);
2911             symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, newVar));
2912             return newVar;
2913         }
2914     }
2915 
2916     // If we use a value which is not marked as needed by the pattern matching, then something went wrong
2917     IGC_ASSERT(!isa<Instruction>(value) || isa<PHINode>(value) || m_CG->NeedInstruction(cast<Instruction>(*value)));
2918 
2919     e_alignment preferredAlign = GetPreferredAlignment(value, m_WI, GetContext());
2920 
2921     // simple de-ssa, always creates a new svar, and return
2922     if (!m_deSSA)
2923     {
2924         var = GetNewVector(value, preferredAlign);
2925         symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2926         return var;
2927     }
2928 
2929     llvm::Value* rootValue = m_deSSA->getRootValue(value, &preferredAlign);
2930     // belong to a congruent class
2931     if (rootValue)
2932     {
2933         it = symbolMapping.find(rootValue);
2934         if (it != symbolMapping.end())
2935         {
2936             var = it->second;
2937             CVariable* aV = var;
2938             if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2939             {
2940                 aV = createAliasIfNeeded(value, var);
2941             }
2942             symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, aV));
2943             /*
2944             *  When we don't scalarize vectors, vector may come from phi/insert-element
2945             *  We cannot adjust extract-mask
2946             */
2947             if (value->getType()->isVectorTy())
2948             {
2949                 extractMasks.erase(value);
2950             }
2951             return aV;
2952         }
2953     }
2954 
2955     if (IGC_IS_FLAG_ENABLED(EnableVariableReuse))
2956     {
2957         // Only for instructions and do not reuse flag variables.
2958         if (!value->getType()->getScalarType()->isIntegerTy(1))
2959         {
2960             if (auto Inst = dyn_cast<Instruction>(value))
2961             {
2962                 var = GetSymbolFromSource(Inst, preferredAlign);
2963             }
2964         }
2965     }
2966 
2967     // need to create a new mapping
2968     if (!var)
2969     {
2970         var = GetNewVector(value, preferredAlign);
2971     }
2972 
2973     symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2974     if (rootValue)
2975     {
2976         CVariable* aV = var;
2977         if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2978         {
2979             aV = createAliasIfNeeded(rootValue, var);
2980         }
2981         symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(rootValue, aV));
2982     }
2983     return var;
2984 }
2985 
2986 /// WHEN implement vector-coalescing, want to be more conservative in
2987 /// treating extract-element as alias in order to reduce the complexity of
2988 /// the problem
CanTreatAsAlias(llvm::ExtractElementInst * inst)2989 bool CShader::CanTreatAsAlias(llvm::ExtractElementInst* inst)
2990 {
2991     llvm::Value* idxSrc = inst->getIndexOperand();
2992     if (!isa<llvm::ConstantInt>(idxSrc))
2993     {
2994         return false;
2995     }
2996 
2997     llvm::Value* vecSrc = inst->getVectorOperand();
2998     if (isa<llvm::InsertElementInst>(vecSrc))
2999     {
3000         return false;
3001     }
3002 
3003     if (IsCoalesced(inst) || IsCoalesced(vecSrc))
3004     {
3005         return false;
3006     }
3007 
3008     for (auto I = vecSrc->user_begin(), E = vecSrc->user_end(); I != E; ++I)
3009     {
3010         llvm::ExtractElementInst* extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I);
3011         if (!extract)
3012         {
3013             return false;
3014         }
3015         if (!isa<ConstantInt>(extract->getIndexOperand()))
3016         {
3017             return false;
3018         }
3019     }
3020 
3021     return true;
3022 }
3023 
isUsedInPHINode(llvm::Instruction * I)3024 static bool isUsedInPHINode(llvm::Instruction* I) {
3025     for (auto U : I->users()) {
3026         if (isa<PHINode>(U))
3027             return true;
3028         if (auto BC = dyn_cast<BitCastInst>(U)) {
3029             if (isUsedInPHINode(BC))
3030                 return true;
3031         }
3032         if (auto IEI = dyn_cast<InsertElementInst>(U)) {
3033             if (isUsedInPHINode(IEI))
3034                 return true;
3035         }
3036     }
3037     return false;
3038 }
3039 
CanTreatScalarSourceAsAlias(llvm::InsertElementInst * IEI)3040 bool CShader::CanTreatScalarSourceAsAlias(llvm::InsertElementInst* IEI) {
3041     // Skip if it's not enabled.
3042     if (!IGC_IS_FLAG_ENABLED(EnableInsertElementScalarCoalescing))
3043         return false;
3044     // Skip if IEI is used in PHI.
3045     // FIXME: Should skip PHI if this IEI is from its backedge.
3046     if (isUsedInPHINode(IEI))
3047         return false;
3048     // Skip if the index is not constant.
3049     llvm::ConstantInt* IdxOp = dyn_cast<llvm::ConstantInt>(IEI->getOperand(2));
3050     if (!IdxOp)
3051         return false;
3052     // Skip if the scalar operand is not single-used.
3053     Value* ScalarOp = IEI->getOperand(1);
3054     if (!ScalarOp->hasOneUse())
3055         return false;
3056     // Skip if the scalar operand is not an instruction.
3057     if (!isa<llvm::Instruction>(ScalarOp))
3058         return false;
3059     // Skip the scalar operand may be treated as alias.
3060     if (llvm::dyn_cast<llvm::PHINode>(ScalarOp))
3061         return false;
3062     if (auto EEI = llvm::dyn_cast<llvm::ExtractElementInst>(ScalarOp)) {
3063         if (CanTreatAsAlias(EEI))
3064             return false;
3065     }
3066     auto Def = cast<llvm::Instruction>(ScalarOp);
3067     auto BB = Def->getParent();
3068     // Skip that scalar value is not defined locally.
3069     if (BB != IEI->getParent())
3070         return false;
3071     if (!m_deSSA)
3072         return isa<llvm::UndefValue>(IEI->getOperand(0));
3073     // Since we will define that vector element ahead from the previous
3074     // position, check whether such hoisting is safe.
3075     auto BI = std::prev(llvm::BasicBlock::reverse_iterator(IEI->getIterator()));
3076     auto BE = std::prev(llvm::BasicBlock::reverse_iterator(Def->getIterator()));
3077     auto Idx = IdxOp->getZExtValue();
3078     for (; BI != BE && BI != BB->rend(); ++BI) {
3079         if (&*BI != IEI)
3080             continue;
3081         Value* VecOp = IEI->getOperand(0);
3082         // If the source operand is `undef`, `insertelement` could be always
3083         // treated as alias (of the destination of the scalar operand).
3084         if (isa<UndefValue>(VecOp))
3085             return true;
3086         Value* SrcRoot = m_deSSA->getRootValue(VecOp);
3087         Value* DstRoot = m_deSSA->getRootValue(IEI);
3088         // `dst` vector will be copied from `src` vector if they won't coalese.
3089         // Hoisting this insertion is unsafe.
3090         if (SrcRoot != DstRoot)
3091             return false;
3092         IEI = dyn_cast<llvm::InsertElementInst>(VecOp);
3093         // However, if `src` is not defined through `insertelement`, it's still
3094         // unsafe to hoist this insertion.
3095         if (!IEI)
3096             return false;
3097         // If that's dynamically indexed insertion or insertion on the same
3098         // index, it's unsafe to hoist this insertion.
3099         llvm::ConstantInt* IdxOp = dyn_cast<llvm::ConstantInt>(IEI->getOperand(2));
3100         if (!IdxOp)
3101             return false;
3102         if (IdxOp->getZExtValue() == Idx)
3103             return false;
3104     }
3105     return true;
3106 }
3107 
HasBecomeNoop(Instruction * inst)3108 bool CShader::HasBecomeNoop(Instruction* inst) {
3109     return m_VRA->m_HasBecomeNoopInsts.count(inst);
3110 }
3111 
IsCoalesced(Value * V)3112 bool CShader::IsCoalesced(Value* V) {
3113     if ((m_VRA && m_VRA->isAliasedValue(V)) ||
3114         (m_deSSA && m_deSSA->getRootValue(V)) ||
3115         (m_coalescingEngine && m_coalescingEngine->GetValueCCTupleMapping(V)))
3116     {
3117         return true;
3118     }
3119     return false;
3120 }
3121 
3122 #define SET_INTRINSICS()                              \
3123          GenISAIntrinsic::GenISA_setMessagePhaseX:    \
3124     case GenISAIntrinsic::GenISA_setMessagePhaseXV:   \
3125     case GenISAIntrinsic::GenISA_setMessagePhase:     \
3126     case GenISAIntrinsic::GenISA_setMessagePhaseV:    \
3127     case GenISAIntrinsic::GenISA_simdSetMessagePhase: \
3128     case GenISAIntrinsic::GenISA_simdSetMessagePhaseV
3129 
IsSetMessageIntrinsic(GenIntrinsicInst * I)3130 static bool IsSetMessageIntrinsic(GenIntrinsicInst* I)
3131 {
3132     switch (I->getIntrinsicID())
3133     {
3134     case SET_INTRINSICS():
3135         return true;
3136     default:
3137         return false;
3138     }
3139 }
3140 
VMECoalescePattern(GenIntrinsicInst * genInst)3141 bool CShader::VMECoalescePattern(GenIntrinsicInst* genInst)
3142 {
3143     if (!IsSetMessageIntrinsic(genInst))
3144         return false;
3145 
3146     if (IsCoalesced(genInst))
3147     {
3148         return false;
3149     }
3150 
3151     if (GenIntrinsicInst * argInst = dyn_cast<GenIntrinsicInst>(genInst->getOperand(0)))
3152     {
3153         if (IsCoalesced(argInst))
3154         {
3155             return false;
3156         }
3157 
3158         switch (argInst->getIntrinsicID())
3159         {
3160         case GenISAIntrinsic::GenISA_createMessagePhases:
3161         case GenISAIntrinsic::GenISA_createMessagePhasesV:
3162         case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
3163         case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
3164         case SET_INTRINSICS():
3165         {
3166             bool OneUse = argInst->hasOneUse();
3167 
3168             if (OneUse)
3169             {
3170                 return (argInst->getParent() == genInst->getParent());
3171             }
3172 
3173             // If we don't succeed in the quick check above, also match if there
3174             // is a single set intrinsic and all of the other users dominate the
3175             // set intrinsic in the block.
3176 
3177             SmallPtrSet<Value*, 4> Users(argInst->user_begin(), argInst->user_end());
3178 
3179             uint32_t SetMessageCnt = 0U;
3180             for (auto U : Users)
3181             {
3182                 if (!isa<GenIntrinsicInst>(U))
3183                     return false;
3184 
3185                 auto* GII = cast<GenIntrinsicInst>(U);
3186                 if (GII->getParent() != argInst->getParent())
3187                     return false;
3188 
3189                 if (IsSetMessageIntrinsic(GII))
3190                     SetMessageCnt++;
3191             }
3192 
3193             if (SetMessageCnt > 1)
3194                 return false;
3195 
3196             uint32_t NonSetInsts = Users.size() - SetMessageCnt;
3197 
3198             auto E = argInst->getParent()->end();
3199             for (auto I = argInst->getIterator(); I != E; I++)
3200             {
3201                 if (Users.count(&*I) != 0)
3202                 {
3203                     if (IsSetMessageIntrinsic(cast<GenIntrinsicInst>(&*I)))
3204                     {
3205                         return false;
3206                     }
3207                     else
3208                     {
3209                         if (--NonSetInsts == 0)
3210                             break;
3211                     }
3212                 }
3213             }
3214 
3215             return true;
3216         }
3217         default:
3218             return false;
3219         }
3220     }
3221 
3222     return false;
3223 
3224 }
3225 
3226 #undef SET_INTRINSICS
3227 
isUnpacked(llvm::Value * value)3228 bool CShader::isUnpacked(llvm::Value* value)
3229 {
3230     bool isUnpacked = false;
3231     if (m_SIMDSize == m_Platform->getMinDispatchMode())
3232     {
3233         if (isa<SampleIntrinsic>(value) || isa<LdmcsInstrinsic>(value))
3234         {
3235             if (cast<VectorType>(value->getType())->getElementType()->isHalfTy() ||
3236                 cast<VectorType>(value->getType())->getElementType()->isIntegerTy(16))
3237             {
3238                 isUnpacked = true;
3239                 auto uses = value->user_begin();
3240                 auto endUses = value->user_end();
3241                 while (uses != endUses)
3242                 {
3243                     if (llvm::ExtractElementInst * extrElement = dyn_cast<llvm::ExtractElementInst>(*uses))
3244                     {
3245                         if (CanTreatAsAlias(extrElement))
3246                         {
3247                             ++uses;
3248                             continue;
3249                         }
3250                     }
3251                     isUnpacked = false;
3252                     break;
3253                 }
3254             }
3255         }
3256     }
3257     return isUnpacked;
3258 }
3259 /// GetNewVector
3260 ///
GetNewVector(llvm::Value * value,e_alignment preferredAlign)3261 CVariable* CShader::GetNewVector(llvm::Value* value, e_alignment preferredAlign)
3262 {
3263     VISA_Type type = GetType(value->getType());
3264     WIBaseClass::WIDependancy dep = GetDependency(value);
3265     bool uniform = WIAnalysis::isDepUniform(dep);
3266     uint32_t mask = 0;
3267     bool isUnpackedBool = isUnpacked(value);
3268     uint8_t multiplier = (isUnpackedBool) ? 2 : 1;
3269     uint nElem = GetNbElementAndMask(value, mask) * multiplier;
3270     IGC_ASSERT_MESSAGE((nElem < (UINT16_MAX)), "getNumElements more than 64k elements");
3271     const uint16_t nbElement = (uint16_t)nElem;
3272     // TODO: Non-uniform variable should be naturally aligned instead of GRF
3273     // aligned. E.g., <8 x i16> should be aligned to 16B instead of 32B or GRF.
3274     e_alignment align = EALIGN_GRF;
3275     if (uniform) {
3276         // So far, preferredAlign is only applied to uniform variable.
3277         // TODO: Add preferred alignment for non-uniform variables.
3278         align = preferredAlign;
3279         if (align == EALIGN_AUTO)
3280             align = CEncoder::GetCISADataTypeAlignment(type);
3281     }
3282     uint16_t numberOfInstance = m_numberInstance;
3283     if (uniform)
3284     {
3285         if (type != ISA_TYPE_BOOL || m_CG->canEmitAsUniformBool(value))
3286         {
3287             numberOfInstance = 1;
3288         }
3289     }
3290     if (mask)
3291     {
3292         extractMasks[value] = mask;
3293     }
3294     const auto &valueName = value->getName();
3295     CVariable* var =
3296         GetNewVariable(
3297             nbElement,
3298             type,
3299             align,
3300             dep,
3301             numberOfInstance,
3302             valueName);
3303     if (isUnpackedBool)
3304         var->setisUnpacked();
3305     return var;
3306 }
3307 
3308 /// GetNewAlias
GetNewAlias(CVariable * var,VISA_Type type,uint16_t offset,uint16_t numElements)3309 CVariable* CShader::GetNewAlias(
3310     CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements)
3311 {
3312     IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3313     CVariable* alias = new (Allocator)CVariable(var, type, offset, numElements, var->IsUniform());
3314     encoder.CreateVISAVar(alias);
3315     return alias;
3316 }
3317 
3318 // createAliasIfNeeded() returns the Var that is either BaseVar or
3319 // its alias of the same size.
3320 //
3321 // If BaseVar's type matches V's, return BaseVar; otherwise, create an
3322 // new alias CVariable to BaseVar. The new CVariable has V's size, which
3323 // should not be larger than BaseVar's.
3324 //
3325 // Note that V's type is either vector or scalar.
createAliasIfNeeded(Value * V,CVariable * BaseVar)3326 CVariable* CShader::createAliasIfNeeded(Value* V, CVariable* BaseVar)
3327 {
3328     Type* Ty = V->getType();
3329     VectorType* VTy = dyn_cast<VectorType>(Ty);
3330     Type* BTy = VTy ? VTy->getElementType() : Ty;
3331     VISA_Type visaTy = GetType(BTy);
3332     if (visaTy == BaseVar->GetType())
3333     {
3334         return BaseVar;
3335     }
3336 
3337     uint16_t visaTy_sz = CEncoder::GetCISADataTypeSize(visaTy);
3338     IGC_ASSERT(visaTy_sz);
3339     uint16_t nbe = BaseVar->GetSize() / visaTy_sz;
3340     IGC_ASSERT_MESSAGE((BaseVar->GetSize() % visaTy_sz) == 0, "V's Var should be the same size as BaseVar!");
3341     CVariable* NewAliasVar = GetNewAlias(BaseVar, visaTy, 0, nbe);
3342     return NewAliasVar;
3343 }
3344 
3345 /// GetNewAlias
GetNewAlias(CVariable * var,VISA_Type type,uint16_t offset,uint16_t numElements,bool uniform)3346 CVariable* CShader::GetNewAlias(
3347     CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform)
3348 {
3349     IGC_ASSERT(nullptr != var);
3350     IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3351     CVariable* alias = new (Allocator) CVariable(var, type, offset, numElements, uniform);
3352     encoder.CreateVISAVar(alias);
3353     return alias;
3354 }
3355 
GetVarHalf(CVariable * var,unsigned int half)3356 CVariable* CShader::GetVarHalf(CVariable* var, unsigned int half)
3357 {
3358     const char *lowOrHi = half == 0 ? "Lo" : "Hi";
3359     IGC_ASSERT(nullptr != var);
3360     IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3361     CVariable* alias = new (Allocator) CVariable(
3362         var->GetNumberElement(),
3363         var->IsUniform(),
3364         var->GetType(),
3365         var->GetVarType(),
3366         var->GetAlign(),
3367         var->IsVectorUniform(),
3368         1,
3369         CName(var->getName(), lowOrHi));
3370     alias->visaGenVariable[0] = var->visaGenVariable[half];
3371     return alias;
3372 }
3373 
GetPayloadElementSymbols(llvm::Value * inst,CVariable * payload[],int vecWidth)3374 void CShader::GetPayloadElementSymbols(llvm::Value* inst, CVariable* payload[], int vecWidth)
3375 {
3376     llvm::ConstantDataVector* cv = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
3377     if (cv) {
3378         IGC_ASSERT(vecWidth == cv->getNumElements());
3379         for (int i = 0; i < vecWidth; ++i) {
3380             payload[i] = GetSymbol(cv->getElementAsConstant(i));
3381         }
3382         return;
3383     }
3384 
3385     llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
3386     IGC_ASSERT(nullptr != ie);
3387 
3388     for (int i = 0; i < vecWidth; ++i) {
3389         payload[i] = NULL;
3390     }
3391 
3392     int count = 0;
3393     //Gather elements of vector
3394     while (ie != NULL) {
3395         int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
3396         IGC_ASSERT(iOffset >= 0);
3397         IGC_ASSERT(iOffset < vecWidth);
3398 
3399         // Get the scalar value from this insert
3400         if (payload[iOffset] == NULL) {
3401             payload[iOffset] = GetSymbol(ie->getOperand(1));
3402             count++;
3403         }
3404 
3405         // Do we have another insert?
3406         llvm::Value* insertBase = ie->getOperand(0);
3407         ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
3408         if (ie != NULL) {
3409             continue;
3410         }
3411 
3412         if (llvm::isa<llvm::UndefValue>(insertBase)) {
3413             break;
3414         }
3415     }
3416     IGC_ASSERT(count == vecWidth);
3417 }
3418 
Destroy()3419 void CShader::Destroy()
3420 {
3421 }
3422 
3423 // Helper function to copy raw register
CopyVariable(CVariable * dst,CVariable * src,uint dstSubVar,uint srcSubVar)3424 void CShader::CopyVariable(
3425     CVariable* dst,
3426     CVariable* src,
3427     uint dstSubVar,
3428     uint srcSubVar)
3429 {
3430     CVariable* rawDst = dst;
3431     // The source have to match for a raw copy
3432     if (src->GetType() != dst->GetType())
3433     {
3434         rawDst = BitCast(dst, src->GetType());
3435     }
3436     encoder.SetSrcSubVar(0, srcSubVar);
3437     encoder.SetDstSubVar(dstSubVar);
3438     encoder.Copy(rawDst, src);
3439     encoder.Push();
3440 }
3441 
3442 // Helper function to copy and pack raw register
PackAndCopyVariable(CVariable * dst,CVariable * src,uint subVar)3443 void CShader::PackAndCopyVariable(
3444     CVariable* dst,
3445     CVariable* src,
3446     uint subVar)
3447 {
3448     CVariable* rawDst = dst;
3449     // The source have to match for a raw copy
3450     if (src->GetType() != dst->GetType())
3451     {
3452         rawDst = BitCast(dst, src->GetType());
3453     }
3454     encoder.SetDstSubVar(subVar);
3455     if (!src->IsUniform())
3456     {
3457         encoder.SetSrcRegion(0, 16, 8, 2);
3458     }
3459     encoder.Copy(rawDst, src);
3460     encoder.Push();
3461 }
3462 
CompileSIMDSizeInCommon(SIMDMode simdMode)3463 bool CShader::CompileSIMDSizeInCommon(SIMDMode simdMode)
3464 {
3465     bool ret = (m_ScratchSpaceSize <= m_ctx->platform.maxPerThreadScratchSpace());
3466     m_simdProgram.setScratchSpaceUsedByShader(m_ScratchSpaceSize);
3467     if (m_ctx->platform.hasScratchSurface() && m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace()) {
3468         ret = (m_simdProgram.getScratchSpaceUsageInSlot0() <= m_ctx->platform.maxPerThreadScratchSpace());
3469     }
3470 
3471 
3472     return ret;
3473 }
3474 
GetShaderThreadUsageRate()3475 uint32_t CShader::GetShaderThreadUsageRate()
3476 {
3477     return 1;
3478 }
3479 
GetShader(SIMDMode simd,ShaderDispatchMode mode)3480 CShader* CShaderProgram::GetShader(SIMDMode simd, ShaderDispatchMode mode)
3481 {
3482     return GetShaderPtr(simd, mode);
3483 }
3484 
GetShaderPtr(SIMDMode simd,ShaderDispatchMode mode)3485 CShader*& CShaderProgram::GetShaderPtr(SIMDMode simd, ShaderDispatchMode mode)
3486 {
3487     switch (mode)
3488     {
3489     case ShaderDispatchMode::DUAL_PATCH:
3490         return m_SIMDshaders[3];
3491     default:
3492         break;
3493     }
3494 
3495     switch (simd)
3496     {
3497     case SIMDMode::SIMD8:
3498         return m_SIMDshaders[0];
3499     case SIMDMode::SIMD16:
3500         return m_SIMDshaders[1];
3501     case SIMDMode::SIMD32:
3502         return m_SIMDshaders[2];
3503     default:
3504         IGC_ASSERT_MESSAGE(0, "wrong SIMD size");
3505         break;
3506     }
3507     return m_SIMDshaders[0];
3508 }
3509 
ClearShaderPtr(SIMDMode simd)3510 void CShaderProgram::ClearShaderPtr(SIMDMode simd)
3511 {
3512     switch (simd)
3513     {
3514     case SIMDMode::SIMD8:   m_SIMDshaders[0] = nullptr; break;
3515     case SIMDMode::SIMD16:  m_SIMDshaders[1] = nullptr; break;
3516     case SIMDMode::SIMD32:  m_SIMDshaders[2] = nullptr; break;
3517     default:
3518         IGC_ASSERT_MESSAGE(0, "wrong SIMD size");
3519         break;
3520     }
3521 }
3522 
GetOrCreateShader(SIMDMode simd,ShaderDispatchMode mode)3523 CShader* CShaderProgram::GetOrCreateShader(SIMDMode simd, ShaderDispatchMode mode)
3524 {
3525     CShader*& pShader = GetShaderPtr(simd, mode);
3526     if (pShader == nullptr)
3527     {
3528         pShader = CreateNewShader(simd);
3529     }
3530     return pShader;
3531 }
3532 
CreateNewShader(SIMDMode simd)3533 CShader* CShaderProgram::CreateNewShader(SIMDMode simd)
3534 {
3535     CShader* pShader = nullptr;
3536     {
3537         switch (m_context->type)
3538         {
3539         case ShaderType::OPENCL_SHADER:
3540             pShader = new COpenCLKernel((OpenCLProgramContext*)m_context, m_kernel, this);
3541             break;
3542         case ShaderType::PIXEL_SHADER:
3543             pShader = new CPixelShader(m_kernel, this);
3544             break;
3545         case ShaderType::VERTEX_SHADER:
3546             pShader = new CVertexShader(m_kernel, this);
3547             break;
3548         case ShaderType::GEOMETRY_SHADER:
3549             pShader = new CGeometryShader(m_kernel, this);
3550             break;
3551         case ShaderType::HULL_SHADER:
3552             pShader = new CHullShader(m_kernel, this);
3553             break;
3554         case ShaderType::DOMAIN_SHADER:
3555             pShader = new CDomainShader(m_kernel, this);
3556             break;
3557         case ShaderType::COMPUTE_SHADER:
3558             pShader = new CComputeShader(m_kernel, this);
3559             break;
3560         default:
3561             IGC_ASSERT_MESSAGE(0, "wrong shader type");
3562             break;
3563         }
3564     }
3565 
3566     IGC_ASSERT(nullptr != pShader);
3567 
3568     pShader->m_shaderStats = m_shaderStats;
3569     pShader->m_DriverInfo = &m_context->m_DriverInfo;
3570     pShader->m_Platform = &m_context->platform;
3571     pShader->m_pBtiLayout = &m_context->btiLayout;
3572     pShader->m_ModuleMetadata = m_context->getModuleMetaData();
3573 
3574     return pShader;
3575 }
3576 
DeleteShader(SIMDMode simd,ShaderDispatchMode mode)3577 void CShaderProgram::DeleteShader(SIMDMode simd, ShaderDispatchMode mode)
3578 {
3579     CShader*& pShader = GetShaderPtr(simd, mode);
3580     delete pShader;
3581     pShader = nullptr;
3582 }
3583 
GetSamplerCount(unsigned int samplerCount)3584 unsigned int CShader::GetSamplerCount(unsigned int samplerCount)
3585 {
3586     if (samplerCount > 0)
3587     {
3588         if (samplerCount <= 4)
3589             return 1; // between 1 and 4 samplers used
3590         else if (samplerCount >= 5 && samplerCount <= 8)
3591             return 2; // between 5 and 8 samplers used
3592         else if (samplerCount >= 9 && samplerCount <= 12)
3593             return 3; // between 9 and 12 samplers used
3594         else if (samplerCount >= 13 && samplerCount <= 16)
3595             return 4; // between 13 and 16 samplers used
3596         else
3597             // Samplers count out of range. Force value 0 to avoid undefined behavior.
3598             return 0;
3599     }
3600     return 0;
3601 }
3602 
CShaderProgram(CodeGenContext * ctx,llvm::Function * kernel)3603 CShaderProgram::CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel)
3604     : m_shaderStats(nullptr)
3605     , m_context(ctx)
3606     , m_kernel(kernel)
3607     , m_SIMDshaders()
3608 {
3609 }
3610 
~CShaderProgram()3611 CShaderProgram::~CShaderProgram()
3612 {
3613     for (auto& shader : m_SIMDshaders)
3614     {
3615         delete shader;
3616     }
3617     m_context = nullptr;
3618 }
3619 
GetPrimitiveTypeSizeInRegisterInBits(const Type * Ty) const3620 unsigned int CShader::GetPrimitiveTypeSizeInRegisterInBits(const Type* Ty) const
3621 {
3622     unsigned int sizeInBits = (unsigned int)Ty->getPrimitiveSizeInBits();
3623     if (Ty->isPtrOrPtrVectorTy())
3624     {
3625         sizeInBits =
3626             GetContext()->getRegisterPointerSizeInBits(Ty->getPointerAddressSpace());
3627         if (auto* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
3628         {
3629             sizeInBits *= (unsigned)VTy->getNumElements();
3630         }
3631     }
3632     return sizeInBits;
3633 }
3634 
GetPrimitiveTypeSizeInRegister(const Type * Ty) const3635 unsigned int CShader::GetPrimitiveTypeSizeInRegister(const Type* Ty) const
3636 {
3637     return GetPrimitiveTypeSizeInRegisterInBits(Ty) / 8;
3638 }
3639 
GetScalarTypeSizeInRegisterInBits(const Type * Ty) const3640 unsigned int CShader::GetScalarTypeSizeInRegisterInBits(const Type* Ty) const
3641 {
3642     unsigned int sizeInBits = Ty->getScalarSizeInBits();
3643     if (Ty->isPtrOrPtrVectorTy())
3644     {
3645         sizeInBits =
3646             GetContext()->getRegisterPointerSizeInBits(Ty->getPointerAddressSpace());
3647     }
3648     return sizeInBits;
3649 }
3650 
GetScalarTypeSizeInRegister(const Type * Ty) const3651 unsigned int CShader::GetScalarTypeSizeInRegister(const Type* Ty) const
3652 {
3653     return GetScalarTypeSizeInRegisterInBits(Ty) / 8;
3654 }
3655 
3656