1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/IR/Function.h>
11 #include <llvmWrapper/IR/DerivedTypes.h>
12 #include "common/LLVMWarningsPop.hpp"
13 #include "AdaptorCommon/ImplicitArgs.hpp"
14 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
15 #include "Compiler/CISACodeGen/DeSSA.hpp"
16 #include "Compiler/CISACodeGen/GenCodeGenModule.h"
17 #include "Compiler/CISACodeGen/messageEncoding.hpp"
18 #include "Compiler/CISACodeGen/VariableReuseAnalysis.hpp"
19 #include "Compiler/CISACodeGen/PixelShaderCodeGen.hpp"
20 #include "Compiler/CISACodeGen/VertexShaderCodeGen.hpp"
21 #include "Compiler/CISACodeGen/GeometryShaderCodeGen.hpp"
22 #include "Compiler/CISACodeGen/ComputeShaderCodeGen.hpp"
23 #include "Compiler/CISACodeGen/HullShaderCodeGen.hpp"
24 #include "Compiler/CISACodeGen/DomainShaderCodeGen.hpp"
25 #include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
26 #include "Compiler/MetaDataApi/MetaDataApi.h"
27 #include "common/secure_mem.h"
28 #include "Probe/Assertion.h"
29
30 using namespace llvm;
31 using namespace IGC;
32 using namespace IGC::IGCMD;
33
CShader(Function * pFunc,CShaderProgram * pProgram)34 CShader::CShader(Function* pFunc, CShaderProgram* pProgram)
35 : entry(pFunc)
36 , m_parent(pProgram)
37 , encoder()
38 , m_HasBarrier(false)
39 {
40 m_ctx = m_parent->GetContext();
41 m_WI = nullptr;
42 m_deSSA = nullptr;
43 m_coalescingEngine = nullptr;
44 m_DL = nullptr;
45 m_FGA = nullptr;
46 m_VRA = nullptr;
47 m_shaderStats = nullptr;
48 m_constantBufferMask = 0;
49 m_constantBufferLoaded = 0;
50 m_uavLoaded = 0;
51 for (int i = 0; i < 4; i++)
52 {
53 m_shaderResourceLoaded[i] = 0;
54 }
55 m_renderTargetLoaded = 0;
56 isInputsPulled = false;
57 m_cbSlot = -1;
58 m_statelessCBPushedSize = 0;
59 isMessageTargetDataCacheDataPort = false;
60 m_BindingTableEntryCount = 0;
61 m_BindingTableUsedEntriesBitmap = 0;
62 // [OCL] preAnalysis()/ParseShaderSpecificOpcode() must
63 // set this to ture if there is any stateless access.
64 m_HasGlobalStatelessMemoryAccess = false;
65 m_HasConstantStatelessMemoryAccess = false;
66 m_HasDPAS = false;
67
68 m_simdProgram.init(!m_ctx->platform.hasScratchSurface(), m_ctx->platform.maxPerThreadScratchSpace(), GetContext()->getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory);
69 }
70
InitEncoder(SIMDMode simdSize,bool canAbortOnSpill,ShaderDispatchMode shaderMode)71 void CShader::InitEncoder(SIMDMode simdSize, bool canAbortOnSpill, ShaderDispatchMode shaderMode)
72 {
73 m_sendStallCycle = 0;
74 m_staticCycle = 0;
75 m_maxBlockId = 0;
76 m_ScratchSpaceSize = 0;
77 m_R0 = nullptr;
78 m_NULL = nullptr;
79 m_TSC = nullptr;
80 m_SR0 = nullptr;
81 m_CR0 = nullptr;
82 m_CE0 = nullptr;
83 m_DBG = nullptr;
84 m_HW_TID = nullptr;
85 m_SP = nullptr;
86 m_FP = nullptr;
87 m_SavedFP = nullptr;
88 m_ARGV = nullptr;
89 m_RETV = nullptr;
90 m_SavedSRetPtr = nullptr;
91 m_ImplArgBufPtr = nullptr;
92 m_LocalIdBufPtr = nullptr;
93
94 // SIMD32 is a SIMD16 shader with 2 instance of each instruction
95 m_SIMDSize = (simdSize == SIMDMode::SIMD8 ? SIMDMode::SIMD8 : SIMDMode::SIMD16);
96 m_ShaderDispatchMode = shaderMode;
97 m_numberInstance = simdSize == SIMDMode::SIMD32 ? 2 : 1;
98 m_dispatchSize = simdSize;
99 globalSymbolMapping.clear();
100 symbolMapping.clear();
101 ccTupleMapping.clear();
102 ConstantPool.clear();
103 setup.clear();
104 patchConstantSetup.clear();
105 kernelArgToPayloadOffsetMap.clear();
106 encoder.SetProgram(this);
107 }
108
109 // Pre-analysis pass to be executed before call to visa builder so we can pass scratch space offset
PreAnalysisPass()110 void CShader::PreAnalysisPass()
111 {
112 ExtractGlobalVariables();
113
114 auto funcMDItr = m_ModuleMetadata->FuncMD.find(entry);
115 if (funcMDItr != m_ModuleMetadata->FuncMD.end())
116 {
117 if (funcMDItr->second.privateMemoryPerWI != 0)
118 {
119 if (GetContext()->getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory
120 || GetContext()->getModuleMetaData()->compOpt.UseStatelessforPrivateMemory
121 )
122 {
123 const uint32_t GRFSize = getGRFSize();
124 IGC_ASSERT(0 < GRFSize);
125
126 m_ScratchSpaceSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_dispatchSize);
127
128 // Round up to GRF-byte aligned.
129 m_ScratchSpaceSize = ((GRFSize + m_ScratchSpaceSize - 1) / GRFSize) * GRFSize;
130
131 }
132 }
133 }
134
135 for (auto BB = entry->begin(), BE = entry->end(); BB != BE; ++BB) {
136 llvm::BasicBlock* pLLVMBB = &(*BB);
137 llvm::BasicBlock::InstListType& instructionList = pLLVMBB->getInstList();
138 for (auto I = instructionList.begin(), E = instructionList.end(); I != E; ++I) {
139 llvm::Instruction* inst = &(*I);
140 ParseShaderSpecificOpcode(inst);
141 }
142 }
143 }
144
ProgramOutput()145 SProgramOutput* CShader::ProgramOutput()
146 {
147 return &m_simdProgram;
148 }
149
EOTURBWrite()150 void CShader::EOTURBWrite()
151 {
152
153 CEncoder& encoder = GetEncoder();
154 uint messageLength = 3;
155
156 // Creating a payload of size 3 = header + channelmask + undef data
157 // As EOT message cant have message length == 0, setting channel mask = 0 and data = undef.
158 CVariable* pEOTPayload =
159 GetNewVariable(
160 messageLength * numLanes(SIMDMode::SIMD8),
161 ISA_TYPE_D, EALIGN_GRF, false, 1, "EOTPayload");
162
163 CVariable* zero = ImmToVariable(0x0, ISA_TYPE_D);
164 // write at handle 0
165 CopyVariable(pEOTPayload, zero, 0);
166 // use 0 as write mask
167 CopyVariable(pEOTPayload, zero, 1);
168
169 constexpr uint exDesc = EU_MESSAGE_TARGET_URB | cMessageExtendedDescriptorEOTBit;
170
171 const uint desc = UrbMessage(
172 messageLength,
173 0,
174 true,
175 false,
176 true,
177 0,
178 EU_URB_OPCODE_SIMD8_WRITE);
179
180 CVariable* pMessDesc = ImmToVariable(desc, ISA_TYPE_D);
181
182 encoder.Send(nullptr, pEOTPayload, exDesc, pMessDesc);
183 encoder.Push();
184 }
185
EOTRenderTarget(CVariable * r1,bool isPerCoarse)186 void CShader::EOTRenderTarget(CVariable* r1, bool isPerCoarse)
187 {
188 CVariable* src[4] = { nullptr, nullptr, nullptr, nullptr };
189 bool isUndefined[4] = { true, true, true, true };
190 CVariable* const nullSurfaceBti = ImmToVariable(m_pBtiLayout->GetNullSurfaceIdx(), ISA_TYPE_D);
191 CVariable* const blendStateIndex = ImmToVariable(0, ISA_TYPE_D);
192 SetBindingTableEntryCountAndBitmap(true, BUFFER_TYPE_UNKNOWN, 0, m_pBtiLayout->GetNullSurfaceIdx());
193 encoder.RenderTargetWrite(
194 src,
195 isUndefined,
196 true, // lastRenderTarget,
197 true, // Null RT
198 false, // perSample,
199 isPerCoarse, // coarseMode,
200 false, // isHeaderMaskFromCe0,
201 nullSurfaceBti,
202 blendStateIndex,
203 nullptr, // source0Alpha,
204 nullptr, // oMaskOpnd,
205 nullptr, // outputDepthOpnd,
206 nullptr, // stencilOpnd,
207 nullptr, // cpscounter,
208 nullptr, // sampleIndex,
209 r1);
210 encoder.Push();
211 }
212
213
AddEpilogue(llvm::ReturnInst * ret)214 void CShader::AddEpilogue(llvm::ReturnInst* ret)
215 {
216 encoder.EOT();
217 encoder.Push();
218 }
219
InitializeStackVariables()220 void CShader::InitializeStackVariables()
221 {
222 // create argument-value register, limited to 12 GRF
223 m_ARGV = GetNewVariable(getGRFSize() * 3, ISA_TYPE_D, getGRFAlignment(), false, 1, "ARGV");
224 encoder.GetVISAPredefinedVar(m_ARGV, PREDEFINED_ARG);
225 // create return-value register, limited to 4 GRF
226 m_RETV = GetNewVariable(getGRFSize(), ISA_TYPE_D, getGRFAlignment(), false, 1, "ReturnValue");
227 encoder.GetVISAPredefinedVar(m_RETV, PREDEFINED_RET);
228 // create stack-pointer register
229 m_SP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "SP");
230 encoder.GetVISAPredefinedVar(m_SP, PREDEFINED_FE_SP);
231 // create frame-pointer register
232 m_FP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "FP");
233 encoder.GetVISAPredefinedVar(m_FP, PREDEFINED_FE_FP);
234 // create pointers locations to buffers
235 if (!m_ctx->platform.isXeHPSDVPlus() &&
236 IGC_IS_FLAG_DISABLED(ForceInlineStackCallWithImplArg))
237 {
238 m_ImplArgBufPtr = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "ImplArgPtr");
239 encoder.GetVISAPredefinedVar(m_ImplArgBufPtr, PREDEFINED_IMPL_ARG_BUF_PTR);
240 m_LocalIdBufPtr = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "LocalIdPtr");
241 encoder.GetVISAPredefinedVar(m_LocalIdBufPtr, PREDEFINED_LOCAL_ID_BUF_PTR);
242 }
243 }
244
245 /// save FP of previous frame when entering a stack-call function
SaveStackState()246 void CShader::SaveStackState()
247 {
248 IGC_ASSERT(!m_SavedFP);
249 IGC_ASSERT(m_FP);
250 IGC_ASSERT(m_SP);
251 m_SavedFP = GetNewVariable(m_FP);
252 encoder.Copy(m_SavedFP, m_FP);
253 encoder.Push();
254 }
255
256 /// restore SP and FP when exiting a stack-call function
RestoreStackState()257 void CShader::RestoreStackState()
258 {
259 IGC_ASSERT(m_SavedFP);
260 IGC_ASSERT(m_FP);
261 IGC_ASSERT(m_SP);
262 // Restore SP to current FP
263 encoder.Copy(m_SP, m_FP);
264 encoder.Push();
265 // Restore FP to previous frame's FP
266 encoder.Copy(m_FP, m_SavedFP);
267 encoder.Push();
268 m_SavedFP = nullptr;
269 }
270
CreateImplicitArgs()271 void CShader::CreateImplicitArgs()
272 {
273 m_numBlocks = entry->size();
274 m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
275 encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);
276
277 // create variables for implicit args
278 ImplicitArgs implicitArgs(*entry, m_pMdUtils);
279 unsigned numImplicitArgs = implicitArgs.size();
280
281 // Push Args are only for entry function
282 const unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
283 const unsigned numPushArgs = (isEntryFunc(m_pMdUtils, entry) && !isNonEntryMultirateShader(entry) ? numPushArgsEntry : 0);
284 const int numFuncArgs = entry->arg_size() - numImplicitArgs - numPushArgs;
285 IGC_ASSERT_MESSAGE(0 <= numFuncArgs, "Function arg size does not match meta data and push args.");
286
287 // Create symbol for every arguments [5/2019]
288 // (Previously, symbols are created only for implicit args.)
289 // Since vISA requires input var (argument) to be root symbol (CVariable)
290 // and GetSymbol() does not guarantee this due to coalescing of argument
291 // values and others. Here, we handle arguments specially by creating
292 // a CVariable symbol for each argument, and use this newly-created symbol
293 // as the root symbol for its congruent class if any. This should always
294 // work as it does not matter which value in a coalesced set is going to
295 // be a root symbol.
296 //
297 // Once a root symbol is created, the root value of its conguent class
298 // needs to have as its symbol an alias to this root symbol.
299
300 // Update SymbolMapping for argument value.
301 auto updateArgSymbolMapping = [&](Value* Arg, CVariable* CVarArg) {
302 symbolMapping.insert(std::make_pair(Arg, CVarArg));
303 Value* Node = m_deSSA ? m_deSSA->getRootValue(Arg) : nullptr;
304 if (Node)
305 {
306 // If Arg isn't root, must setup symbolMapping for root.
307 if (Node != Arg) {
308 // 'Node' should not have a symbol entry at this moment.
309 IGC_ASSERT_MESSAGE(symbolMapping.count(Node) == 0, "Root symbol of arg should not be set at this point!");
310 CVariable* aV = CVarArg;
311 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
312 {
313 aV = createAliasIfNeeded(Node, CVarArg);
314 }
315 symbolMapping[Node] = aV;
316 }
317 }
318 };
319
320 llvm::Function::arg_iterator arg = entry->arg_begin();
321 for (int i = 0; i < numFuncArgs; ++i, ++arg)
322 {
323 Value* ArgVal = arg;
324 if (ArgVal->use_empty())
325 continue;
326 e_alignment algn = GetPreferredAlignment(ArgVal, m_WI, m_ctx);
327 CVariable* ArgCVar = GetNewVector(ArgVal, algn);
328 updateArgSymbolMapping(ArgVal, ArgCVar);
329 }
330
331 for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
332 ImplicitArg implictArg = implicitArgs[i];
333 IGC_ASSERT_MESSAGE((implictArg.getNumberElements() < (UINT16_MAX)), "getNumberElements > higher than 64k");
334
335 bool isUniform = WIAnalysis::isDepUniform(implictArg.getDependency());
336 uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
337
338 CVariable* var = GetNewVariable(
339 nbElements,
340 implictArg.getVISAType(*m_DL),
341 implictArg.getAlignType(*m_DL),
342 isUniform,
343 isUniform ? 1 : m_numberInstance,
344 CName(implictArg.getName()));
345
346 if (implictArg.getArgType() == ImplicitArg::R0) {
347 encoder.GetVISAPredefinedVar(var, PREDEFINED_R0);
348 }
349
350 // This is a per function symbol mapping, that is, only available for a
351 // llvm function which will be cleared for each run of EmitVISAPass.
352 updateArgSymbolMapping(arg, var);
353
354 // Kernel's implicit arguments's symbols will be available for the
355 // whole kernel CodeGen. With this, there is no need to pass implicit
356 // arguments and this should help to reduce the register pressure with
357 // presence of subroutines.
358 IGC_ASSERT_MESSAGE(!globalSymbolMapping.count(&(*arg)), "should not exist already");
359 globalSymbolMapping.insert(std::make_pair(&(*arg), var));
360 }
361
362 for (unsigned i = 0; i < numPushArgs; ++i, ++arg)
363 {
364 Value* ArgVal = arg;
365 if (ArgVal->use_empty())
366 continue;
367 e_alignment algn = GetPreferredAlignment(ArgVal, m_WI, m_ctx);
368 CVariable* ArgCVar = GetNewVector(ArgVal, algn);
369 updateArgSymbolMapping(ArgVal, ArgCVar);
370 }
371
372 CreateAliasVars();
373 }
374
GetDebugInfoData()375 DebugInfoData& IGC::CShader::GetDebugInfoData()
376 {
377 return diData;
378 }
379
380 // For sub-vector aliasing, pre-allocating cvariables for those
381 // valeus that have sub-vector aliasing before emit instructions.
382 // (The sub-vector aliasing is done in VariableReuseAnalysis.)
CreateAliasVars()383 void CShader::CreateAliasVars()
384 {
385 // Create CVariables for vector aliasing (This is more
386 // efficient than doing it on-fly inside getSymbol()).
387 if (GetContext()->getVectorCoalescingControl() > 0 &&
388 !m_VRA->m_aliasMap.empty())
389 {
390 // For each vector alias root, generate cvariable
391 // for it and all its component sub-vector
392 for (auto& II : m_VRA->m_aliasMap)
393 {
394 SSubVecDesc* SV = II.second;
395 Value* rootVal = SV->BaseVector;
396 if (SV->Aliaser != rootVal)
397 continue;
398 CVariable* rootCVar = GetSymbol(rootVal);
399
400 // Generate all vector aliasers and their
401 // dessa root if any.
402 for (int i = 0, sz = (int)SV->Aliasers.size(); i < sz; ++i)
403 {
404 SSubVecDesc* aSV = SV->Aliasers[i];
405 Value* V = aSV->Aliaser;
406 // Create alias cvariable for Aliaser and its dessa root if any
407 Value* Vals[2] = { V, nullptr };
408 if (m_deSSA) {
409 Value* dessaRootVal = m_deSSA->getRootValue(V);
410 if (dessaRootVal && dessaRootVal != V)
411 Vals[1] = dessaRootVal;
412 }
413 int startIx = aSV->StartElementOffset;
414
415 for (int i = 0; i < 2; ++i)
416 {
417 V = Vals[i];
418 if (!V)
419 continue;
420
421 Type* Ty = V->getType();
422 IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
423 Type* BTy = VTy ? VTy->getElementType() : Ty;
424 int nelts = (VTy ? (int)VTy->getNumElements() : 1);
425
426 VISA_Type visaTy = GetType(BTy);
427 int typeBytes = (int)CEncoder::GetCISADataTypeSize(visaTy);
428 int offsetInBytes = typeBytes * startIx;
429 int nbelts = nelts;
430 if (!rootCVar->IsUniform())
431 {
432 int width = (int)numLanes(m_SIMDSize);
433 offsetInBytes *= width;
434 nbelts *= width;
435 }
436 CVariable* Var = GetNewAlias(rootCVar, visaTy, offsetInBytes, nbelts);
437 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(V, Var));
438 }
439 }
440 }
441 }
442 }
443
AddPatchTempSetup(CVariable * var)444 void CShader::AddPatchTempSetup(CVariable* var)
445 {
446 payloadTempSetup.push_back(var);
447 }
448
AppendPayloadSetup(CVariable * var)449 bool CShader::AppendPayloadSetup(CVariable* var)
450 {
451 auto v = var->GetAlias() ? var->GetAlias() : var;
452 if (find(payloadLiveOutSetup.begin(), payloadLiveOutSetup.end(), v) != payloadLiveOutSetup.end())
453 {
454 return true;
455 }
456 payloadLiveOutSetup.push_back(v);
457 return false;
458 }
459
AddSetup(uint index,CVariable * var)460 void CShader::AddSetup(uint index, CVariable* var)
461 {
462 if (setup.size() < index + 1) {
463 setup.resize(index + 1, nullptr);
464 }
465 if (setup[index] == nullptr) {
466 setup[index] = var;
467 }
468 }
469
AddPatchConstantSetup(uint index,CVariable * var)470 void CShader::AddPatchConstantSetup(uint index, CVariable* var)
471 {
472 if (patchConstantSetup.size() < index + 1) {
473 patchConstantSetup.resize(index + 1, nullptr);
474 }
475 if (patchConstantSetup[index] == nullptr) {
476 patchConstantSetup[index] = var;
477 }
478 }
479
AllocateInput(CVariable * var,uint offset,uint instance,bool forceLiveOut)480 void CShader::AllocateInput(CVariable* var, uint offset, uint instance, bool forceLiveOut)
481 {
482 // the input offset must respect the variable alignment
483 IGC_ASSERT(nullptr != var);
484 IGC_ASSERT(offset % (1u << var->GetAlign()) == 0);
485 encoder.DeclareInput(var, offset, instance);
486 kernelArgToPayloadOffsetMap[var] = offset;
487 // For the payload section, we need to mark inputs to be outputs
488 // so that inputs will be alive across the entire payload section
489 if (forceLiveOut)
490 {
491 encoder.MarkAsPayloadLiveOut(var);
492 }
493 }
494
AllocateOutput(CVariable * var,uint offset,uint instance)495 void CShader::AllocateOutput(CVariable* var, uint offset, uint instance)
496 {
497 IGC_ASSERT(nullptr != var);
498 IGC_ASSERT(offset % (1u << var->GetAlign()) == 0);
499 encoder.DeclareInput(var, offset, instance);
500 encoder.MarkAsOutput(var);
501 }
502
AllocateConstants3DShader(uint & offset)503 void CShader::AllocateConstants3DShader(uint& offset)
504 {
505 if (m_Platform->WaForceCB0ToBeZeroWhenSendingPC() && m_DriverInfo->implementPushConstantWA()) {
506 // Allocate space for constant pushed from the constant buffer
507 AllocateConstants(offset);
508 AllocateSimplePushConstants(offset);
509 // Allocate space for constant set by driver
510 AllocateNOSConstants(offset);
511 }
512 else {
513 // Allocate space for constant set by driver
514 AllocateNOSConstants(offset);
515 // Allocate space for constant pushed from the constant buffer
516 AllocateConstants(offset);
517 AllocateSimplePushConstants(offset);
518 }
519 offset = iSTD::Align(offset, getGRFSize());
520 }
521
AllocateConstants(uint & offset)522 void CShader::AllocateConstants(uint& offset)
523 {
524 m_ConstantBufferLength = 0;
525 for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++) {
526 CVariable* var = GetSymbol(m_argListCache[I->second]);
527 AllocateInput(var, offset + m_ConstantBufferLength, 0, encoder.IsCodePatchCandidate());
528 m_ConstantBufferLength += var->GetSize();
529 }
530
531 m_ConstantBufferLength = iSTD::Align(m_ConstantBufferLength, getGRFSize());
532 offset += m_ConstantBufferLength;
533 }
534
AllocateSimplePushConstants(uint & offset)535 void CShader::AllocateSimplePushConstants(uint& offset)
536 {
537 for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++)
538 {
539 for (auto I : pushInfo.simplePushInfoArr[i].simplePushLoads)
540 {
541 uint subOffset = I.first;
542 CVariable* var = GetSymbol(m_argListCache[I.second]);
543 AllocateInput(var, subOffset - pushInfo.simplePushInfoArr[i].offset + offset, 0, encoder.IsCodePatchCandidate());
544 }
545 offset += pushInfo.simplePushInfoArr[i].size;
546 }
547 }
548
AllocateNOSConstants(uint & offset)549 void CShader::AllocateNOSConstants(uint& offset)
550 {
551 uint maxConstantPushed = 0;
552
553 for (auto I = pushInfo.constantReg.begin(), E = pushInfo.constantReg.end(); I != E; I++) {
554 CVariable* var = GetSymbol(m_argListCache[I->second]);
555 AllocateInput(var, offset + I->first * SIZE_DWORD, 0, encoder.IsCodePatchCandidate());
556 maxConstantPushed = std::max(maxConstantPushed, I->first + 1);
557 }
558 maxConstantPushed = iSTD::Max(maxConstantPushed, static_cast<uint>(m_ModuleMetadata->MinNOSPushConstantSize));
559 m_NOSBufferSize = iSTD::Align(maxConstantPushed * SIZE_DWORD, getGRFSize());
560 offset += m_NOSBufferSize;
561 }
562
563
CreateGatherMap()564 void CShader::CreateGatherMap()
565 {
566 int index = -1;
567 gatherMap.reserve(pushInfo.constants.size());
568 for (auto I = pushInfo.constants.begin(), E = pushInfo.constants.end(); I != E; I++)
569 {
570 unsigned int address = (I->first.bufId * 256 * 4) + (I->first.eltId);
571 unsigned int cstOffset = address / 4;
572 unsigned int cstChannel = address % 4;
573 if (cstOffset != index)
574 {
575 USC::SConstantGatherEntry entry;
576 entry.GatherEntry.Fields.constantBufferOffset = cstOffset % 256;
577 entry.GatherEntry.Fields.channelMask = BIT(cstChannel);
578 // with 3DSTATE_DX9_CONSTANT if buffer is more than 4Kb,
579 // the constant after 255 can be accessed in constant buffer 1
580 int CBIndex = cstOffset / 256;
581 entry.GatherEntry.Fields.constantBufferIndex = CBIndex;
582 m_constantBufferMask |= BIT(CBIndex);
583 gatherMap.push_back(entry);
584 index = cstOffset;
585 }
586 else
587 {
588 gatherMap[gatherMap.size() - 1].GatherEntry.Fields.channelMask |= BIT(cstChannel);
589 }
590 }
591
592 // The size of the gather map must be even
593 if (gatherMap.size() % 2 != 0)
594 {
595 USC::SConstantGatherEntry entry;
596 entry.GatherEntry.Value = 0;
597 gatherMap.push_back(entry);
598 }
599 }
600
CreateConstantBufferOutput(SKernelProgram * pKernelProgram)601 void CShader::CreateConstantBufferOutput(SKernelProgram* pKernelProgram)
602 {
603 pKernelProgram->ConstantBufferMask = m_constantBufferMask;
604 pKernelProgram->gatherMapSize = gatherMap.size();
605 if (pKernelProgram->gatherMapSize > 0)
606 {
607 pKernelProgram->gatherMap = new char[pKernelProgram->gatherMapSize * sizeof(USC::SConstantGatherEntry)];
608 memcpy_s(pKernelProgram->gatherMap, pKernelProgram->gatherMapSize *
609 sizeof(USC::SConstantGatherEntry),
610 &gatherMap[0],
611 gatherMap.size() * sizeof(USC::SConstantGatherEntry));
612 pKernelProgram->ConstantBufferLength = m_ConstantBufferLength / getMinPushConstantBufferAlignmentInBytes();
613 }
614
615 if (m_cbSlot != -1)
616 {
617 pKernelProgram->bufferSlot = m_cbSlot;
618 pKernelProgram->statelessCBPushedSize = m_statelessCBPushedSize;
619 }
620
621 // for simple push
622 for (unsigned int i = 0; i < pushInfo.simplePushBufferUsed; i++)
623 {
624 pKernelProgram->simplePushInfoArr[i].m_cbIdx = pushInfo.simplePushInfoArr[i].cbIdx;
625 pKernelProgram->simplePushInfoArr[i].m_pushableAddressGrfOffset= pushInfo.simplePushInfoArr[i].pushableAddressGrfOffset;
626 pKernelProgram->simplePushInfoArr[i].m_pushableOffsetGrfOffset = pushInfo.simplePushInfoArr[i].pushableOffsetGrfOffset;
627 pKernelProgram->simplePushInfoArr[i].m_offset = pushInfo.simplePushInfoArr[i].offset;
628 pKernelProgram->simplePushInfoArr[i].m_size = pushInfo.simplePushInfoArr[i].size;
629 pKernelProgram->simplePushInfoArr[i].isStateless = pushInfo.simplePushInfoArr[i].isStateless;
630 pKernelProgram->simplePushInfoArr[i].isBindless = pushInfo.simplePushInfoArr[i].isBindless;
631 }
632
633 if (GetContext()->m_ConstantBufferReplaceShaderPatterns)
634 {
635 pKernelProgram->m_ConstantBufferReplaceShaderPatterns = GetContext()->m_ConstantBufferReplaceShaderPatterns;
636 pKernelProgram->m_ConstantBufferReplaceShaderPatternsSize = GetContext()->m_ConstantBufferReplaceShaderPatternsSize;
637 pKernelProgram->m_ConstantBufferUsageMask = GetContext()->m_ConstantBufferUsageMask;
638 pKernelProgram->m_ConstantBufferReplaceSize = GetContext()->m_ConstantBufferReplaceSize;
639 }
640 }
641
CreateFunctionSymbol(llvm::Function * pFunc)642 void CShader::CreateFunctionSymbol(llvm::Function* pFunc)
643 {
644 // Functions with uses in this module requires relocation
645 CVariable* funcAddr = GetSymbol(pFunc);
646 std::string funcName = pFunc->getName().str();
647 encoder.AddVISASymbol(funcName, funcAddr);
648 encoder.Push();
649 }
650
CreateGlobalSymbol(llvm::GlobalVariable * pGlobal)651 void CShader::CreateGlobalSymbol(llvm::GlobalVariable* pGlobal)
652 {
653 CVariable* globalAddr = GetSymbol(pGlobal);
654 std::string globalName = pGlobal->getName().str();
655 encoder.AddVISASymbol(globalName, globalAddr);
656 encoder.Push();
657 }
658
CacheArgumentsList()659 void CShader::CacheArgumentsList()
660 {
661 m_argListCache.clear();
662 for (auto arg = entry->arg_begin(); arg != entry->arg_end(); ++arg)
663 m_argListCache.push_back(&(*arg));
664 }
665
666 // Pixel shader has dedicated implementation of this function
MapPushedInputs()667 void CShader::MapPushedInputs()
668 {
669 for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
670 {
671 // We need to map the value associated with the value pushed to a physical register
672 CVariable* var = GetSymbol(m_argListCache[I->second.argIndex]);
673 AddSetup(I->second.index, var);
674 }
675 }
676
IsPatchablePS()677 bool CShader::IsPatchablePS()
678 {
679 return (GetShaderType() == ShaderType::PIXEL_SHADER &&
680 static_cast<CPixelShader*>(this)->GetPhase() != PSPHASE_PIXEL);
681 }
682
GetR0()683 CVariable* CShader::GetR0()
684 {
685 return m_R0;
686 }
687
GetNULL()688 CVariable* CShader::GetNULL()
689 {
690 if (!m_NULL)
691 {
692 m_NULL = new (Allocator)CVariable(2, true, ISA_TYPE_D, EVARTYPE_GENERAL, EALIGN_DWORD, false, 1, CName::NONE);
693 encoder.GetVISAPredefinedVar(m_NULL, PREDEFINED_NULL);
694 }
695 return m_NULL;
696 }
697
GetTSC()698 CVariable* CShader::GetTSC()
699 {
700 if (!m_TSC)
701 {
702 m_TSC = new (Allocator) CVariable(2, true, ISA_TYPE_UD, EVARTYPE_GENERAL, EALIGN_DWORD, false, 1, CName::NONE);
703 encoder.GetVISAPredefinedVar(m_TSC, PREDEFINED_TSC);
704 }
705 return m_TSC;
706 }
707
GetSR0()708 CVariable* CShader::GetSR0()
709 {
710 if (!m_SR0)
711 {
712 m_SR0 = GetNewVariable(4, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
713
714 encoder.GetVISAPredefinedVar(m_SR0, PREDEFINED_SR0);
715 }
716 return m_SR0;
717 }
718
GetCR0()719 CVariable* CShader::GetCR0()
720 {
721 if (!m_CR0)
722 {
723 m_CR0 = GetNewVariable(3, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
724 encoder.GetVISAPredefinedVar(m_CR0, PREDEFINED_CR0);
725 }
726 return m_CR0;
727 }
728
GetCE0()729 CVariable* CShader::GetCE0()
730 {
731 if (!m_CE0)
732 {
733 m_CE0 = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
734 encoder.GetVISAPredefinedVar(m_CE0, PREDEFINED_CE0);
735 }
736 return m_CE0;
737 }
738
GetDBG()739 CVariable* CShader::GetDBG()
740 {
741 if (!m_DBG)
742 {
743 m_DBG = GetNewVariable(2, ISA_TYPE_D, EALIGN_DWORD, true, CName::NONE);
744 encoder.GetVISAPredefinedVar(m_DBG, PREDEFINED_DBG);
745 }
746 return m_DBG;
747 }
748
GetHWTID()749 CVariable* CShader::GetHWTID()
750 {
751 if (!m_HW_TID)
752 {
753 if (m_Platform->getHWTIDFromSR0())
754 {
755 auto RemoveBitRange = [this](CVariable* &src, unsigned removebit, unsigned range)->void
756 {
757 CVariable* leftHalf = GetNewVariable(src);
758 CVariable* rightHalf = GetNewVariable(src);
759 uint32_t mask = BITMASK(removebit);
760 // src = (src & mask) | ((src >> range) & ~mask)
761 encoder.And(rightHalf, src, ImmToVariable(mask, ISA_TYPE_D));
762 encoder.Push();
763 encoder.IShr(leftHalf, src, ImmToVariable(range, ISA_TYPE_D));
764 encoder.Push();
765 encoder.And(leftHalf, leftHalf, ImmToVariable(~mask, ISA_TYPE_D));
766 encoder.Push();
767 encoder.Or(src, rightHalf, leftHalf);
768 encoder.Push();
769 };
770
771 // XeHP_SDV
772 // [13:11] Slice ID.
773 // [10:9] Dual - SubSlice ID
774 // [8] SubSlice ID.
775 // [7] : EUID[2]
776 // [6] : Reserved
777 // [5:4] EUID[1:0]
778 // [3] : Reserved MBZ
779 // [2:0] : TID
780 //
781 // HWTID is calculated using a concatenation of TID:EUID:SubSliceID:SliceID
782
783 uint32_t bitmask = BITMASK(14);
784 m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
785 encoder.SetNoMask();
786 encoder.SetSrcSubReg(0, 0);
787 encoder.And(m_HW_TID, GetSR0(), ImmToVariable(bitmask, ISA_TYPE_D));
788 encoder.Push();
789
790 // Remove bit [6]
791 RemoveBitRange(m_HW_TID, 6, 1);
792 // Remove bit [3]
793 RemoveBitRange(m_HW_TID, 3, 1);
794 }
795 else
796 {
797 m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
798 encoder.GetVISAPredefinedVar(m_HW_TID, PREDEFINED_HW_TID);
799 }
800 }
801 return m_HW_TID;
802 }
803
GetPrivateBase()804 CVariable* CShader::GetPrivateBase()
805 {
806 ImplicitArgs implicitArgs(*entry, m_pMdUtils);
807 unsigned numPushArgs = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
808 unsigned numImplicitArgs = implicitArgs.size();
809 IGC_ASSERT_MESSAGE(entry->arg_size() >= (numImplicitArgs + numPushArgs), "Function arg size does not match meta data and push args.");
810 unsigned numFuncArgs = entry->arg_size() - numImplicitArgs - numPushArgs;
811
812 Argument* kerArg = nullptr;
813 llvm::Function::arg_iterator arg = entry->arg_begin();
814 for (unsigned i = 0; i < numFuncArgs; ++i, ++arg);
815 for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
816 ImplicitArg implicitArg = implicitArgs[i];
817 if (implicitArg.getArgType() == ImplicitArg::ArgType::PRIVATE_BASE)
818 {
819 kerArg = (&*arg);
820 break;
821 }
822 }
823 IGC_ASSERT(kerArg);
824 return GetSymbol(kerArg);
825 }
826
GetImplArgBufPtr()827 CVariable* CShader::GetImplArgBufPtr()
828 {
829 IGC_ASSERT(m_ImplArgBufPtr);
830 return m_ImplArgBufPtr;
831 }
832
GetLocalIdBufPtr()833 CVariable* CShader::GetLocalIdBufPtr()
834 {
835 IGC_ASSERT(m_LocalIdBufPtr);
836 return m_LocalIdBufPtr;
837 }
838
GetFP()839 CVariable* CShader::GetFP()
840 {
841 IGC_ASSERT(m_FP);
842 return m_FP;
843 }
GetPrevFP()844 CVariable* CShader::GetPrevFP()
845 {
846 return m_SavedFP;
847 }
GetSP()848 CVariable* CShader::GetSP()
849 {
850 IGC_ASSERT(m_SP);
851 return m_SP;
852 }
853
GetARGV()854 CVariable* CShader::GetARGV()
855 {
856 IGC_ASSERT(m_ARGV);
857 return m_ARGV;
858 }
859
GetRETV()860 CVariable* CShader::GetRETV()
861 {
862 IGC_ASSERT(m_RETV);
863 return m_RETV;
864 }
865
GetEncoder()866 CEncoder& CShader::GetEncoder()
867 {
868 return encoder;
869 }
870
SaveSRet(CVariable * sretPtr)871 void CShader::SaveSRet(CVariable* sretPtr)
872 {
873 IGC_ASSERT(m_SavedSRetPtr == nullptr);
874 m_SavedSRetPtr = sretPtr;
875 }
876
GetAndResetSRet()877 CVariable* CShader::GetAndResetSRet()
878 {
879 CVariable* temp = m_SavedSRetPtr;
880 m_SavedSRetPtr = nullptr;
881 return temp;
882 }
883
~CShader()884 CShader::~CShader()
885 {
886 // free all the memory allocated
887 Destroy();
888 }
889
IsValueUsed(llvm::Value * value)890 bool CShader::IsValueUsed(llvm::Value* value)
891 {
892 auto it = symbolMapping.find(value);
893 if (it != symbolMapping.end())
894 {
895 return true;
896 }
897 return false;
898 }
899
GetGlobalCVar(llvm::Value * value)900 CVariable* CShader::GetGlobalCVar(llvm::Value* value)
901 {
902 auto it = globalSymbolMapping.find(value);
903 if (it != globalSymbolMapping.end())
904 return it->second;
905 return nullptr;
906 }
907
BitCast(CVariable * var,VISA_Type newType)908 CVariable* CShader::BitCast(CVariable* var, VISA_Type newType)
909 {
910 CVariable* bitCast = nullptr;
911 uint32_t newEltSz = CEncoder::GetCISADataTypeSize(newType);
912 uint32_t eltSz = var->GetElemSize();
913 // Bitcase requires both src and dst have the same size, which means
914 // one element size is the same as or multiple of the other (if they
915 // are vectors with different number of elements).
916 IGC_ASSERT( (newEltSz >= eltSz && (newEltSz % eltSz) == 0)
917 || (newEltSz < eltSz && (eltSz% newEltSz) == 0));
918 if (var->IsImmediate())
919 {
920 if (newEltSz == eltSz)
921 bitCast = ImmToVariable(var->GetImmediateValue(), newType);
922 else
923 {
924 // Need a temp. For example, bitcast i64 0 -> 2xi32
925 CVariable* tmp = GetNewVariable(
926 1,
927 var->GetType(),
928 CEncoder::GetCISADataTypeAlignment(var->GetType()),
929 true,
930 1,
931 "vecImmBitCast");
932 encoder.Copy(tmp, var);
933 encoder.Push();
934
935 bitCast = GetNewAlias(tmp, newType, 0, 0);
936 }
937 }
938 else
939 {
940 // TODO: we need to store this bitCasted var to avoid creating many times
941 bitCast = GetNewAlias(var, newType, 0, 0);
942 }
943 return bitCast;
944 }
945
ImmToVariable(uint64_t immediate,VISA_Type type,bool isCodePatchCandidate)946 CVariable* CShader::ImmToVariable(uint64_t immediate, VISA_Type type, bool isCodePatchCandidate)
947 {
948 VISA_Type immType = type;
949
950 if (type == ISA_TYPE_BOOL)
951 {
952 // bool immediates cannot be inlined
953 uint immediateValue = immediate ? 0xFFFFFFFF : 0;
954 CVariable* immVar = new (Allocator) CVariable(immediateValue, ISA_TYPE_UD);
955 // src-variable is no longer a boolean, V-ISA cannot take boolean-src immed.
956
957 CVariable* dst = GetNewVariable(
958 numLanes(m_dispatchSize), ISA_TYPE_BOOL, EALIGN_BYTE, CName::NONE);
959 // FIXME: We need to pop/push the encoder context
960 //encoder.save();
961 if (isCodePatchCandidate)
962 {
963 encoder.SetPayloadSectionAsPrimary();
964 }
965 encoder.SetP(dst, immVar);
966 encoder.Push();
967 if (isCodePatchCandidate)
968 {
969 encoder.SetPayloadSectionAsSecondary();
970 }
971 return dst;
972 }
973
974 CVariable* var = new (Allocator) CVariable(immediate, immType);
975 return var;
976 }
977
GetNewVariable(uint16_t nbElement,VISA_Type type,e_alignment align,UniformArgWrap isUniform,uint16_t numberInstance,const CName & name)978 CVariable* CShader::GetNewVariable(
979 uint16_t nbElement, VISA_Type type, e_alignment align,
980 UniformArgWrap isUniform, uint16_t numberInstance, const CName &name)
981 {
982 e_varType varType;
983 if (type == ISA_TYPE_BOOL)
984 {
985 varType = EVARTYPE_PREDICATE;
986 }
987 else
988 {
989 IGC_ASSERT(align >= CEncoder::GetCISADataTypeAlignment(type));
990 varType = EVARTYPE_GENERAL;
991 }
992 CVariable* var = new (Allocator) CVariable(
993 nbElement, isUniform, type, varType, align, false, numberInstance, name);
994 encoder.CreateVISAVar(var);
995 return var;
996 }
997
GetNewVariable(const CVariable * from)998 CVariable* CShader::GetNewVariable(const CVariable* from)
999 {
1000 CVariable* var = new (Allocator) CVariable(*from);
1001 encoder.CreateVISAVar(var);
1002 return var;
1003 }
1004
GetNewAddressVariable(uint16_t nbElement,VISA_Type type,UniformArgWrap isUniform,bool isVectorUniform,const CName & name)1005 CVariable* CShader::GetNewAddressVariable(
1006 uint16_t nbElement, VISA_Type type,
1007 UniformArgWrap isUniform, bool isVectorUniform,
1008 const CName &name)
1009 {
1010 CVariable* var = new (Allocator) CVariable(
1011 nbElement, isUniform, type,
1012 EVARTYPE_ADDRESS, EALIGN_DWORD,
1013 isVectorUniform, 1, name);
1014 encoder.CreateVISAVar(var);
1015 return var;
1016 }
1017
GetDependency(Value * v) const1018 WIBaseClass::WIDependancy CShader::GetDependency(Value* v) const
1019 {
1020 return m_WI ? (m_WI->whichDepend(v)) : WIBaseClass::RANDOM;
1021 }
1022
SetDependency(llvm::Value * v,WIBaseClass::WIDependancy dep)1023 void CShader::SetDependency(llvm::Value* v, WIBaseClass::WIDependancy dep)
1024 {
1025 if (m_WI) m_WI->incUpdateDepend(v, dep);
1026 }
1027
GetIsUniform(llvm::Value * v) const1028 bool CShader::GetIsUniform(llvm::Value* v) const
1029 {
1030 return m_WI ? (m_WI->isUniform(v)) : false;
1031 }
1032
InsideDivergentCF(const llvm::Instruction * inst) const1033 bool CShader::InsideDivergentCF(const llvm::Instruction* inst) const
1034 {
1035 return m_WI ? m_WI->insideDivergentCF(inst) : true;
1036 }
1037
InsideWorkgroupDivergentCF(const llvm::Instruction * inst) const1038 bool CShader::InsideWorkgroupDivergentCF(const llvm::Instruction* inst) const
1039 {
1040 return m_WI ? m_WI->insideWorkgroupDivergentCF(inst) : true;
1041 }
1042
GetNbVectorElementAndMask(llvm::Value * val,uint32_t & mask)1043 uint CShader::GetNbVectorElementAndMask(llvm::Value* val, uint32_t& mask)
1044 {
1045 llvm::Type* type = val->getType();
1046 uint nbElement = int_cast<uint>(cast<IGCLLVM::FixedVectorType>(type)->getNumElements());
1047 mask = 0;
1048 // we don't process vector bigger than 31 elements as the mask has only 32bits
1049 // If we want to support longer vectors we need to extend the mask size
1050 //
1051 // If val has been coalesced, don't prune it.
1052 if (IsCoalesced(val) || nbElement > 31)
1053 {
1054 return nbElement;
1055 }
1056 bool gpgpuPreemptionWANeeded =
1057 ((GetShaderType() == ShaderType::OPENCL_SHADER) || (GetShaderType() == ShaderType::COMPUTE_SHADER)) &&
1058 (m_SIMDSize == SIMDMode::SIMD8) &&
1059 m_Platform->WaSamplerResponseLengthMustBeGreaterThan1() &&
1060 m_Platform->supportGPGPUMidThreadPreemption();
1061
1062 if (llvm::GenIntrinsicInst * inst = llvm::dyn_cast<GenIntrinsicInst>(val))
1063 {
1064 // try to prune the destination size
1065 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1066 if (IID == GenISAIntrinsic::GenISA_ldstructured ||
1067 IID == GenISAIntrinsic::GenISA_typedread)
1068 {
1069 // prune with write-mask if possible
1070 uint elemCnt = 0;
1071 for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1072 {
1073 if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1074 {
1075 if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1076 {
1077 elemCnt++;
1078 IGC_ASSERT(index->getZExtValue() < 5);
1079 mask |= (1 << index->getZExtValue());
1080 continue;
1081 }
1082 }
1083 // if the vector is accessed by anything else than direct Extract we cannot prune it
1084 elemCnt = nbElement;
1085 mask = 0;
1086 break;
1087 }
1088
1089 if (mask)
1090 {
1091 nbElement = elemCnt;
1092 }
1093 }
1094 else if (isSampleInstruction(inst) || isLdInstruction(inst) || isInfoInstruction(inst))
1095 {
1096 // sampler can return selected channel ony with extra header, when
1097 // returning only 1~2 channels, it suppose to have better performance.
1098 uint nbExtract = 0, maxIndex = 0;
1099 uint8_t maskExtract = 0;
1100 bool allExtract = true;
1101
1102 for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1103 {
1104 ExtractElementInst* extract = llvm::dyn_cast<ExtractElementInst>(*I);
1105 if (extract != nullptr)
1106 {
1107 llvm::ConstantInt* indexVal;
1108 indexVal = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand());
1109 if (indexVal != nullptr)
1110 {
1111 uint index = static_cast<uint>(indexVal->getZExtValue());
1112 maxIndex = std::max(maxIndex, index + 1);
1113
1114 maskExtract |= (1 << index);
1115 nbExtract++;
1116 }
1117 else
1118 {
1119 // if extractlement with dynamic index
1120 maxIndex = nbElement;
1121 allExtract = false;
1122 break;
1123 }
1124 }
1125 else
1126 {
1127 // if the vector is accessed by anything else than direct Extract we cannot prune it
1128 maxIndex = nbElement;
1129 allExtract = false;
1130 break;
1131 }
1132 }
1133
1134 // TODO: there are some issues in EmitVISAPass prevents enabling
1135 // selected channel return for info intrinsics.
1136 if (!allExtract ||
1137 gpgpuPreemptionWANeeded ||
1138 IGC_IS_FLAG_DISABLED(EnableSamplerChannelReturn) ||
1139 isInfoInstruction(inst) ||
1140 maskExtract > 0xf)
1141 {
1142 if (gpgpuPreemptionWANeeded)
1143 {
1144 maxIndex = std::max((uint)2, maxIndex);
1145 }
1146
1147 mask = BIT(maxIndex) - 1;
1148 nbElement = maxIndex;
1149 }
1150 else
1151 {
1152 // based on return channels, decide whether do partial
1153 // return with addtional header
1154 static const bool selectReturnChannels[] = {
1155 false, // 0 0000 - should not happen
1156 false, // 1 0001 - r
1157 false, // 2 0010 - g
1158 false, // 3 0011 - rg
1159 true, // 4 0100 - b
1160 false, // 5 0101 - r b
1161 false, // 6 0110 - gb
1162 false, // 7 0111 - rgb
1163 true, // 8 1000 - a
1164 true, // 9 1001 - r a
1165 true, // a 1010 - g a
1166 false, // b 1011 - rg a
1167 true, // c 1100 - ba
1168 false, // d 1101 - r ba
1169 false, // e 1110 - gba
1170 false // f 1111 - rgba
1171 };
1172 IGC_ASSERT(maskExtract != 0);
1173 IGC_ASSERT(maskExtract <= 0xf);
1174
1175 if (selectReturnChannels[maskExtract])
1176 {
1177 mask = maskExtract;
1178 nbElement = nbExtract;
1179 }
1180 else
1181 {
1182 mask = BIT(maxIndex) - 1;
1183 nbElement = maxIndex;
1184 }
1185 }
1186 }
1187 else
1188 {
1189 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1190 if (isLdInstruction(inst) ||
1191 IID == GenISAIntrinsic::GenISA_URBRead ||
1192 IID == GenISAIntrinsic::GenISA_URBReadOutput ||
1193 IID == GenISAIntrinsic::GenISA_DCL_ShaderInputVec ||
1194 IID == GenISAIntrinsic::GenISA_DCL_HSinputVec)
1195 {
1196 // prune without write-mask
1197 uint maxIndex = 0;
1198 for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1199 {
1200 if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1201 {
1202 if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1203 {
1204 maxIndex = std::max(maxIndex, static_cast<uint>(index->getZExtValue()) + 1);
1205 continue;
1206 }
1207 }
1208 // if the vector is accessed by anything else than direct Extract we cannot prune it
1209 maxIndex = nbElement;
1210 break;
1211 }
1212
1213 mask = BIT(maxIndex) - 1;
1214 nbElement = maxIndex;
1215 }
1216 }
1217 }
1218 else if (llvm::BitCastInst * inst = dyn_cast<BitCastInst>(val))
1219 {
1220 for (auto I = inst->user_begin(), E = inst->user_end(); I != E; ++I)
1221 {
1222 if (llvm::ExtractElementInst * extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I))
1223 {
1224 if (llvm::ConstantInt * index = llvm::dyn_cast<ConstantInt>(extract->getIndexOperand()))
1225 {
1226 uint indexBit = BIT(static_cast<uint>(index->getZExtValue()));
1227 mask |= indexBit;
1228 continue;
1229 }
1230 }
1231 mask = BIT(nbElement) - 1;
1232 break;
1233 }
1234 if (mask)
1235 {
1236 nbElement = iSTD::BitCount(mask);
1237 }
1238 }
1239 return nbElement;
1240 }
1241
ExtractMaskWrapper(CShader * pS,Value * VecVal)1242 CShader::ExtractMaskWrapper::ExtractMaskWrapper(CShader* pS, Value* VecVal)
1243 {
1244 auto it = pS->extractMasks.find(VecVal);
1245 if (it != pS->extractMasks.end())
1246 {
1247 m_hasEM = true;
1248 m_EM = it->second;
1249 return;
1250 }
1251 IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(VecVal->getType());
1252 const unsigned int numChannels = VTy ? (unsigned)VTy->getNumElements() : 1;
1253 if (numChannels <= 32)
1254 {
1255 m_hasEM = true;
1256 m_EM = (uint32_t)((1ULL << numChannels) - 1);
1257 }
1258 else
1259 {
1260 m_hasEM = false;
1261 m_EM = 0;
1262 }
1263 }
1264
AdjustExtractIndex(llvm::Value * vecVal,uint16_t index)1265 uint16_t CShader::AdjustExtractIndex(llvm::Value* vecVal, uint16_t index)
1266 {
1267 const ExtractMaskWrapper EMW(this, vecVal);
1268
1269 uint16_t result = index;
1270 if (EMW.hasEM())
1271 {
1272 IGC_ASSERT(index < 32);
1273 uint32_t mask = EMW.getEM();
1274 for (uint i = 0; i < index; ++i)
1275 {
1276 if ((mask & (1 << i)) == 0)
1277 {
1278 result--;
1279 }
1280 }
1281 return result;
1282 }
1283 else
1284 {
1285 return index;
1286 }
1287 }
1288
GetSimdOffsetBase(CVariable * & pVar)1289 void CShader::GetSimdOffsetBase(CVariable*& pVar)
1290 {
1291 encoder.SetSimdSize(SIMDMode::SIMD8);
1292 encoder.SetNoMask();
1293 encoder.Cast(pVar, ImmToVariable(0x76543210, ISA_TYPE_V));
1294 encoder.Push();
1295
1296 if (m_dispatchSize >= SIMDMode::SIMD16)
1297 {
1298 encoder.SetSimdSize(SIMDMode::SIMD8);
1299 encoder.SetDstSubReg(8);
1300 encoder.SetNoMask();
1301 encoder.Add(pVar, pVar, ImmToVariable(8, ISA_TYPE_W));
1302 encoder.Push();
1303 }
1304
1305 if (encoder.IsSecondHalf())
1306 {
1307 encoder.SetNoMask();
1308 encoder.Add(pVar, pVar, ImmToVariable(16, ISA_TYPE_W));
1309 encoder.Push();
1310 }
1311 else if (m_SIMDSize == SIMDMode::SIMD32)
1312 {
1313 // (W) add (16) V1(16) V1(0) 16:w
1314 encoder.SetSimdSize(SIMDMode::SIMD16);
1315 encoder.SetNoMask();
1316 encoder.SetDstSubReg(16);
1317 encoder.Add(pVar, pVar, ImmToVariable(16, ISA_TYPE_W));
1318 encoder.Push();
1319 }
1320 }
1321
GetPerLaneOffsetsReg(uint typeSizeInBytes)1322 CVariable* CShader::GetPerLaneOffsetsReg(uint typeSizeInBytes)
1323 {
1324 CVariable* pPerLaneOffsetsRaw =
1325 GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_UW, EALIGN_GRF, "PerLaneOffsetsRaw");
1326 GetSimdOffsetBase(pPerLaneOffsetsRaw);
1327
1328 // per-lane offsets need to be added to address register
1329 CVariable* pConst2 = ImmToVariable(typeSizeInBytes, ISA_TYPE_UW);
1330
1331 CVariable* pPerLaneOffsetsReg =
1332 GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_UW, EALIGN_GRF, false, "PerLaneOffsetsRawReg");
1333
1334 // perLaneOffsets = 4 * perLaneOffsetsRaw
1335 encoder.SetNoMask();
1336 encoder.Mul(pPerLaneOffsetsReg, pPerLaneOffsetsRaw, pConst2);
1337 encoder.Push();
1338
1339 return pPerLaneOffsetsReg;
1340 }
1341
1342 void
CreatePayload(uint regCount,uint idxOffset,CVariable * & payload,llvm::Instruction * inst,uint paramOffset,uint8_t hfFactor)1343 CShader::CreatePayload(uint regCount, uint idxOffset, CVariable*& payload,
1344 llvm::Instruction* inst, uint paramOffset,
1345 uint8_t hfFactor)
1346 {
1347 for (uint i = 0; i < regCount; ++i)
1348 {
1349 uint subVarIdx = ((numLanes(m_SIMDSize) / (getGRFSize() >> 2)) >> hfFactor) * i + idxOffset;
1350 CopyVariable(payload, GetSymbol(inst->getOperand(i + paramOffset)), subVarIdx);
1351 }
1352 }
1353
GetIMEReturnPayloadSize(GenIntrinsicInst * I)1354 unsigned CShader::GetIMEReturnPayloadSize(GenIntrinsicInst* I)
1355 {
1356 IGC_ASSERT(I->getIntrinsicID() == GenISAIntrinsic::GenISA_vmeSendIME2);
1357
1358 const auto streamMode =
1359 (COMMON_ISA_VME_STREAM_MODE)(
1360 cast<ConstantInt>(I->getArgOperand(4))->getZExtValue());
1361 auto* refImgBTI = I->getArgOperand(2);
1362 auto* bwdRefImgBTI = I->getArgOperand(3);
1363 const bool isDualRef = (refImgBTI != bwdRefImgBTI);
1364
1365 uint32_t regs2rcv = 7;
1366 if ((streamMode == VME_STREAM_OUT) || (streamMode == VME_STREAM_IN_OUT))
1367 {
1368 regs2rcv += 2;
1369 if (isDualRef)
1370 {
1371 regs2rcv += 2;
1372 }
1373 }
1374 return regs2rcv;
1375 }
1376
GetNbElementAndMask(llvm::Value * value,uint32_t & mask)1377 uint CShader::GetNbElementAndMask(llvm::Value* value, uint32_t& mask)
1378 {
1379 mask = 0;
1380 // Special case for VME's GenISA_createMessagePhases intrinsic
1381 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(value)) {
1382 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1383 switch (IID)
1384 {
1385 case GenISAIntrinsic::GenISA_createMessagePhases:
1386 case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
1387 case GenISAIntrinsic::GenISA_createMessagePhasesV:
1388 case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
1389 {
1390 Value* numGRFs = inst->getArgOperand(0);
1391 IGC_ASSERT_MESSAGE(isa<ConstantInt>(numGRFs), "Number GRFs operand is expected to be constant int!");
1392 // Number elements = {num GRFs} * {num DWords in GRF} = {num GRFs} * 8;
1393 return int_cast<unsigned int>(cast<ConstantInt>(numGRFs)->getZExtValue() * 8);
1394 }
1395 default:
1396 break;
1397 }
1398 }
1399 else if (auto * PN = dyn_cast<PHINode>(value))
1400 {
1401 // We could have case like below that payload is undef on some path.
1402 //
1403 // BB1:
1404 // %147 = call i32 @llvm.genx.GenISA.createMessagePhasesNoInit(i32 11)
1405 // call void @llvm.genx.GenISA.vmeSendIME2(i32 % 147, ...)
1406 // br label %BB2
1407 // BB2:
1408 // ... = phi i32[%147, %BB1], [0, %BB]
1409 //
1410 for (uint i = 0, e = PN->getNumOperands(); i != e; ++i)
1411 {
1412 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(PN->getOperand(i)))
1413 {
1414 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
1415 switch (IID)
1416 {
1417 case GenISAIntrinsic::GenISA_createMessagePhases:
1418 case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
1419 case GenISAIntrinsic::GenISA_createMessagePhasesV:
1420 case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
1421 return GetNbElementAndMask(inst, mask);
1422 default:
1423 break;
1424 }
1425 }
1426 }
1427 }
1428
1429 uint nbElement = 0;
1430 uint bSize = 0;
1431 llvm::Type* const type = value->getType();
1432 IGC_ASSERT(nullptr != type);
1433 switch (type->getTypeID())
1434 {
1435 case llvm::Type::FloatTyID:
1436 case llvm::Type::HalfTyID:
1437 nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1438 break;
1439 case llvm::Type::IntegerTyID:
1440 bSize = llvm::cast<llvm::IntegerType>(type)->getBitWidth();
1441 nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1442 if (bSize == 1 && !m_CG->canEmitAsUniformBool(value))
1443 {
1444 nbElement = numLanes(m_SIMDSize);
1445 }
1446 break;
1447 case IGCLLVM::VectorTyID:
1448 {
1449 uint nElem = GetNbVectorElementAndMask(value, mask);
1450 nbElement = GetIsUniform(value) ? nElem : (nElem * numLanes(m_SIMDSize));
1451 }
1452 break;
1453 case llvm::Type::PointerTyID:
1454 // Assumes 32-bit pointers
1455 nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1456 break;
1457 case llvm::Type::DoubleTyID:
1458 nbElement = GetIsUniform(value) ? 1 : numLanes(m_SIMDSize);
1459 break;
1460 default:
1461 IGC_ASSERT(0);
1462 break;
1463 }
1464 return nbElement;
1465 }
1466
GetUndef(VISA_Type type)1467 CVariable* CShader::GetUndef(VISA_Type type)
1468 {
1469 CVariable* var = nullptr;
1470 if (type == ISA_TYPE_BOOL)
1471 {
1472 var = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_BYTE, "undef");
1473 }
1474 else
1475 {
1476 var = new (Allocator) CVariable(type);
1477 }
1478 return var;
1479 }
1480
1481 // TODO: Obviously, lots of works are needed to support constant expression
1482 // better.
GetConstantExpr(ConstantExpr * CE)1483 uint64_t CShader::GetConstantExpr(ConstantExpr* CE) {
1484 IGC_ASSERT(nullptr != CE);
1485 switch (CE->getOpcode()) {
1486 default:
1487 break;
1488 case Instruction::IntToPtr: {
1489 Constant* C = CE->getOperand(0);
1490 if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C))
1491 return GetImmediateVal(C);
1492 if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C))
1493 return GetConstantExpr(CE1);
1494 break;
1495 }
1496 case Instruction::PtrToInt: {
1497 Constant* C = CE->getOperand(0);
1498 if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C))
1499 return GetConstantExpr(CE1);
1500 if (GlobalVariable * GV = dyn_cast<GlobalVariable>(C))
1501 return GetGlobalMappingValue(GV);
1502 break;
1503 }
1504 case Instruction::Trunc: {
1505 Constant* C = CE->getOperand(0);
1506 if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C)) {
1507 if (IntegerType * ITy = dyn_cast<IntegerType>(CE1->getType())) {
1508 return GetConstantExpr(CE1) & ITy->getBitMask();
1509 }
1510 }
1511 break;
1512 }
1513 case Instruction::LShr: {
1514 Constant* C = CE->getOperand(0);
1515 if (ConstantExpr * CE1 = dyn_cast<ConstantExpr>(C)) {
1516 if (dyn_cast<IntegerType>(CE1->getType())) {
1517 uint64_t ShAmt = GetImmediateVal(CE->getOperand(1));
1518 return GetConstantExpr(CE1) >> ShAmt;
1519 }
1520 }
1521 break;
1522 }
1523 }
1524
1525 IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported constant expression!");
1526 return 0;
1527 }
1528
GetGlobalMappingValue(llvm::Value * c)1529 unsigned int CShader::GetGlobalMappingValue(llvm::Value* c)
1530 {
1531 IGC_ASSERT_MESSAGE(0, "The global variables are not handled");
1532
1533 return 0;
1534 }
1535
GetGlobalMapping(llvm::Value * c)1536 CVariable* CShader::GetGlobalMapping(llvm::Value* c)
1537 {
1538 IGC_ASSERT_MESSAGE(0, "The global variables are not handled");
1539
1540 VISA_Type type = GetType(c->getType());
1541 return ImmToVariable(0, type);
1542 }
1543
GetScalarConstant(llvm::Value * const c)1544 CVariable* CShader::GetScalarConstant(llvm::Value* const c)
1545 {
1546 IGC_ASSERT(nullptr != c);
1547 const VISA_Type type = GetType(c->getType());
1548
1549 // Constants
1550 if (isa<ConstantInt>(c) || isa<ConstantFP>(c) || isa<ConstantPointerNull>(c))
1551 {
1552 return ImmToVariable(GetImmediateVal(c), type);
1553 }
1554
1555 // Undefined values
1556 if (isa<UndefValue>(c))
1557 {
1558 return GetUndef(type);
1559 }
1560
1561 // GlobalVariables
1562 if (isa<GlobalVariable>(c))
1563 {
1564 return GetGlobalMapping(c);
1565 }
1566
1567 // Constant Expression
1568 if (ConstantExpr * CE = dyn_cast<ConstantExpr>(c))
1569 return ImmToVariable(GetConstantExpr(CE), type);
1570
1571 IGC_ASSERT_MESSAGE(0, "Unhandled flavor of constant!");
1572 return 0;
1573 }
1574
1575 // Return true if can be encoded as mini float and return the encoding in value
getByteFloatEncoding(ConstantFP * fp,uint8_t & value)1576 static bool getByteFloatEncoding(ConstantFP* fp, uint8_t& value)
1577 {
1578 value = 0;
1579 if (fp->getType()->isFloatTy())
1580 {
1581 if (fp->isZero())
1582 {
1583 value = fp->isNegative() ? 0x80 : 0;
1584 return true;
1585 }
1586 APInt api = fp->getValueAPF().bitcastToAPInt();
1587 FLOAT32 bitFloat;
1588 bitFloat.value.u = int_cast<unsigned int>(api.getZExtValue());
1589 // check that fraction doesn't have any bots set below bit 23 - 4
1590 // Byte float can only encode the higer 4 bits of the fraction
1591 if ((bitFloat.fraction & (~(0xF << (23 - 4)))) == 0 &&
1592 ((bitFloat.exponent > 124 && bitFloat.exponent <= 131) ||
1593 (bitFloat.exponent == 124 && bitFloat.fraction != 0)))
1594 {
1595 // convert to float 8bits format
1596 value |= bitFloat.sign << 7;
1597 value |= (bitFloat.fraction >> (23 - 4));
1598 value |= (bitFloat.exponent & 0x3) << 4;
1599 value |= (bitFloat.exponent & BIT(7)) >> 1;
1600 return true;
1601 }
1602 }
1603 return false;
1604 }
1605
1606 // Return the most commonly used constant. Return null if all constant are different.
findCommonConstant(llvm::Constant * C,uint elts,uint currentEmitElts,bool & allSame)1607 llvm::Constant* CShader::findCommonConstant(llvm::Constant* C, uint elts, uint currentEmitElts, bool& allSame)
1608 {
1609 if (elts == 1)
1610 {
1611 return nullptr;
1612 }
1613
1614 llvm::MapVector<llvm::Constant*, int> constMap;
1615 constMap.clear();
1616 Constant* constC = nullptr;
1617 bool cannotPackVF = !m_ctx->platform.hasPackedRestrictedFloatVector();
1618 for (uint32_t i = currentEmitElts; i < currentEmitElts + elts; i++)
1619 {
1620 constC = C->getAggregateElement(i);
1621 if (!constC)
1622 {
1623 return nullptr;
1624 }
1625 constMap[constC]++;
1626
1627 // check if the constant can be packed in vf.
1628 if (!isa<UndefValue>(constC) && elts >= 4)
1629 {
1630 llvm::VectorType* VTy = llvm::dyn_cast<llvm::VectorType>(C->getType());
1631 uint8_t encoding = 0;
1632 if (VTy->getScalarType()->isFloatTy() &&
1633 !getByteFloatEncoding(cast<ConstantFP>(constC), encoding))
1634 {
1635 cannotPackVF = true;
1636 }
1637 }
1638 }
1639 int mostUsedCount = 1;
1640 Constant* mostUsedValue = nullptr;
1641 for (auto iter = constMap.begin(); iter != constMap.end(); iter++)
1642 {
1643 if (iter->second > mostUsedCount)
1644 {
1645 mostUsedValue = iter->first;
1646 mostUsedCount = iter->second;
1647 }
1648 }
1649
1650 constMap.clear();
1651 allSame = (mostUsedCount == elts);
1652
1653 if (allSame)
1654 {
1655 return mostUsedValue;
1656 }
1657 else if (mostUsedCount > 1 && cannotPackVF)
1658 {
1659 return mostUsedValue;
1660 }
1661 else
1662 {
1663 return nullptr;
1664 }
1665 }
1666
1667 auto sizeToSIMDMode = [](uint32_t size)
__anon543366880302(uint32_t size) 1668 {
1669 switch (size)
1670 {
1671 case 1:
1672 return SIMDMode::SIMD1;
1673 case 2:
1674 return SIMDMode::SIMD2;
1675 case 4:
1676 return SIMDMode::SIMD4;
1677 case 8:
1678 return SIMDMode::SIMD8;
1679 case 16:
1680 return SIMDMode::SIMD16;
1681 default:
1682 IGC_ASSERT_MESSAGE(0, "unexpected simd size");
1683 return SIMDMode::SIMD1;
1684 }
1685 };
1686
GetStructVariable(llvm::Value * v,bool forceVectorInit)1687 CVariable* CShader::GetStructVariable(llvm::Value* v, bool forceVectorInit)
1688 {
1689 IGC_ASSERT(v->getType()->isStructTy());
1690
1691 auto isConstBase = [](Value* v)->bool
1692 {
1693 return isa<Constant>(v) || v->getValueID() == Value::UndefValueVal;
1694 };
1695
1696 IGC_ASSERT_MESSAGE(isConstBase(v) ||
1697 isa<InsertValueInst>(v) ||
1698 isa<CallInst>(v) ||
1699 isa<Argument>(v),
1700 "Invalid struct symbol usage! Struct symbol should only come from const, insertvalue, call, or function arg");
1701
1702 if (isa<InsertValueInst>(v))
1703 {
1704 // Walk up all the `insertvalue` instructions until we get to the constant base struct.
1705 // All `insertvalue` instructions that operate on the same struct should be mapped to the same CVar,
1706 // so just use the first instruction to do all the mapping.
1707 Value* baseV = v;
1708 InsertValueInst* FirstInsertValueInst = nullptr;
1709 while (InsertValueInst* II = dyn_cast<InsertValueInst>(baseV))
1710 {
1711 baseV = II->getOperand(0);
1712 FirstInsertValueInst = II;
1713 }
1714 if (FirstInsertValueInst)
1715 {
1716 // Check if it's already created
1717 auto it = symbolMapping.find(FirstInsertValueInst);
1718 if (it != symbolMapping.end())
1719 {
1720 return it->second;
1721 }
1722 v = FirstInsertValueInst;
1723 }
1724 }
1725 else if (isa<CallInst>(v) || isa<Argument>(v))
1726 {
1727 // Check for function argument symbols, and return value from calls
1728 auto it = symbolMapping.find(v);
1729 if (it != symbolMapping.end())
1730 {
1731 return it->second;
1732 }
1733 }
1734 else
1735 {
1736 // Const cannot be mapped
1737 IGC_ASSERT(isConstBase(v) && symbolMapping.find(v) == symbolMapping.end());
1738 }
1739
1740 bool isUniform = forceVectorInit ? false : m_WI->isUniform(v);
1741 StructType* sTy = cast<StructType>(v->getType());
1742 auto& DL = entry->getParent()->getDataLayout();
1743 const StructLayout* SL = DL.getStructLayout(sTy);
1744
1745 // Represent the struct as a vector of BYTES
1746 unsigned structSizeInBytes = (unsigned)SL->getSizeInBytes();
1747 unsigned lanes = isUniform ? 1 : numLanes(m_dispatchSize);
1748 CVariable* cVar = GetNewVariable(structSizeInBytes * lanes, ISA_TYPE_B, EALIGN_GRF, isUniform, "StructV");
1749
1750 // Initialize the struct default value if it has one
1751 if (Constant* C = dyn_cast<Constant>(v))
1752 {
1753 for (unsigned i = 0; i < sTy->getNumElements(); i++)
1754 {
1755 CVariable* elementSrc = GetSymbol(C->getAggregateElement(i));
1756 if (!elementSrc->IsUndef())
1757 {
1758 unsigned elementOffset = (unsigned)SL->getElementOffset(i);
1759 CVariable* elementDst = GetNewAlias(cVar, elementSrc->GetType(), elementOffset * lanes, elementSrc->GetNumberElement() * lanes);
1760 GetEncoder().Copy(elementDst, elementSrc);
1761 GetEncoder().Push();
1762 }
1763 }
1764 }
1765
1766 // Map the original llvm value to this new CVar.
1767 // The original value cannot be const, since we cannot map them. They will need to be initialized each time.
1768 if (!isConstBase(v))
1769 symbolMapping[v] = cVar;
1770
1771 return cVar;
1772 }
1773
GetConstant(llvm::Constant * C,CVariable * dstVar)1774 CVariable* CShader::GetConstant(llvm::Constant* C, CVariable* dstVar)
1775 {
1776 IGCLLVM::FixedVectorType* VTy = llvm::dyn_cast<IGCLLVM::FixedVectorType>(C->getType());
1777 if (C && VTy)
1778 { // Vector constant
1779 llvm::Type* eTy = VTy->getElementType();
1780 IGC_ASSERT_MESSAGE((VTy->getNumElements() < (UINT16_MAX)), "getNumElements more than 64k elements");
1781 uint16_t elts = (uint16_t)VTy->getNumElements();
1782
1783 if (elts == 1)
1784 {
1785 llvm::Constant* const EC = C->getAggregateElement((uint)0);
1786 IGC_ASSERT_MESSAGE(nullptr != EC, "Vector Constant has no valid constant element!");
1787 return GetScalarConstant(EC);
1788 }
1789
1790 // Emit a scalar move to load the element of index k.
1791 auto copyScalar = [=](int k, CVariable* Var)
1792 {
1793 Constant* const EC = C->getAggregateElement(k);
1794 IGC_ASSERT_MESSAGE(nullptr != EC, "Constant Vector: Invalid non-constant element!");
1795 if (isa<UndefValue>(EC))
1796 return;
1797
1798 CVariable* eVal = GetScalarConstant(EC);
1799 if (Var->IsUniform())
1800 {
1801 GetEncoder().SetDstSubReg(k);
1802 }
1803 else
1804 {
1805 auto input_size = GetScalarTypeSizeInRegister(eTy);
1806 Var = GetNewAlias(Var, Var->GetType(), k * input_size * numLanes(m_SIMDSize), 0);
1807 }
1808 GetEncoder().Copy(Var, eVal);
1809 GetEncoder().Push();
1810 };
1811
1812 // Emit a simd4 move to load 4 byte float.
1813 auto copyV4 = [=](int k, uint32_t vfimm, CVariable* Var)
1814 {
1815 CVariable* Imm = ImmToVariable(vfimm, ISA_TYPE_VF);
1816 GetEncoder().SetUniformSIMDSize(SIMDMode::SIMD4);
1817 GetEncoder().SetDstSubReg(k);
1818 GetEncoder().Copy(Var, Imm);
1819 GetEncoder().Push();
1820 };
1821
1822
1823 if (dstVar != nullptr && !(dstVar->IsUniform()))
1824 {
1825 for (uint i = 0; i < elts; i++)
1826 {
1827 copyScalar(i, dstVar);
1828 }
1829 return dstVar;
1830 }
1831
1832 CVariable* CVar = (dstVar == nullptr) ?
1833 GetNewVariable(elts, GetType(eTy), EALIGN_GRF, true, C->getName()) : dstVar;
1834 uint remainElts = elts;
1835 uint currentEltsOffset = 0;
1836 uint size = 8;
1837 while (remainElts != 0)
1838 {
1839 bool allSame = 0;
1840
1841 while (size > remainElts && size != 1)
1842 {
1843 size /= 2;
1844 }
1845
1846 Constant* commonConstant = findCommonConstant(C, size, currentEltsOffset, allSame);
1847 // case 2: all constants the same
1848 if (commonConstant && allSame)
1849 {
1850 GetEncoder().SetUniformSIMDSize(sizeToSIMDMode(size));
1851 GetEncoder().SetDstSubReg(currentEltsOffset);
1852 GetEncoder().Copy(CVar, GetScalarConstant(commonConstant));
1853 GetEncoder().Push();
1854 }
1855
1856 // case 3: some constants the same
1857 else if (commonConstant)
1858 {
1859 GetEncoder().SetUniformSIMDSize(sizeToSIMDMode(size));
1860 GetEncoder().SetDstSubReg(currentEltsOffset);
1861 GetEncoder().Copy(CVar, GetScalarConstant(commonConstant));
1862 GetEncoder().Push();
1863
1864 Constant* constC = nullptr;
1865 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i++)
1866 {
1867 constC = C->getAggregateElement(i);
1868 if (constC != commonConstant && !isa<UndefValue>(constC))
1869 {
1870 GetEncoder().SetDstSubReg(i);
1871 GetEncoder().Copy(CVar, GetScalarConstant(constC));
1872 GetEncoder().Push();
1873 }
1874 }
1875 }
1876 // case 4: VFPack
1877 else if (VTy->getScalarType()->isFloatTy() && size >= 4)
1878 {
1879 unsigned Step = 4;
1880 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i += Step)
1881 {
1882 // pack into vf if possible.
1883 uint32_t vfimm = 0;
1884 bool canUseVF = m_ctx->platform.hasPackedRestrictedFloatVector();
1885 for (unsigned j = 0; j < Step && canUseVF; ++j)
1886 {
1887 Constant* EC = C->getAggregateElement(i + j);
1888 // Treat undef as 0.0f.
1889 if (isa<UndefValue>(EC))
1890 continue;
1891 uint8_t encoding = 0;
1892 canUseVF = getByteFloatEncoding(cast<ConstantFP>(EC), encoding);
1893 if (canUseVF)
1894 {
1895 uint32_t v = encoding;
1896 v <<= j * 8;
1897 vfimm |= v;
1898 }
1899 else
1900 {
1901 break;
1902 }
1903 }
1904
1905 if (canUseVF)
1906 {
1907 copyV4(i, vfimm, CVar);
1908 }
1909 else
1910 {
1911 for (unsigned j = i; j < i + Step; ++j)
1912 copyScalar(j, CVar);
1913 }
1914 }
1915 }
1916 // case 5: single copy
1917 else
1918 {
1919 // Element-wise copy or trailing elements copy if partially packed.
1920 for (uint i = currentEltsOffset; i < currentEltsOffset + size; i++)
1921 {
1922 copyScalar(i, CVar);
1923 }
1924 }
1925 remainElts -= size;
1926 currentEltsOffset += size;
1927 }
1928 return CVar;
1929 }
1930
1931 return GetScalarConstant(C);
1932 }
1933
GetType(llvm::Type * type,CodeGenContext * pContext)1934 VISA_Type IGC::GetType(llvm::Type* type, CodeGenContext* pContext)
1935 {
1936 IGC_ASSERT(nullptr != pContext);
1937 IGC_ASSERT(nullptr != type);
1938
1939 switch (type->getTypeID())
1940 {
1941 case llvm::Type::FloatTyID:
1942 return ISA_TYPE_F;
1943 case llvm::Type::IntegerTyID:
1944 switch (type->getIntegerBitWidth())
1945 {
1946 case 1:
1947 return ISA_TYPE_BOOL;
1948 case 8:
1949 return ISA_TYPE_B;
1950 case 16:
1951 return ISA_TYPE_W;
1952 case 32:
1953 return ISA_TYPE_D;
1954 case 64:
1955 return ISA_TYPE_Q;
1956 default:
1957 IGC_ASSERT_MESSAGE(0, "illegal type");
1958 break;
1959 }
1960 break;
1961 case IGCLLVM::VectorTyID:
1962 return GetType(type->getContainedType(0), pContext);
1963 case llvm::Type::PointerTyID:
1964 {
1965 unsigned int AS = type->getPointerAddressSpace();
1966 uint numBits = pContext->getRegisterPointerSizeInBits(AS);
1967 if (numBits == 32)
1968 {
1969 return ISA_TYPE_UD;
1970 }
1971 else
1972 {
1973 return ISA_TYPE_UQ;
1974 }
1975 }
1976 case llvm::Type::DoubleTyID:
1977 return ISA_TYPE_DF;
1978 case llvm::Type::HalfTyID:
1979 return ISA_TYPE_HF;
1980 case llvm::Type::StructTyID:
1981 // Structs are always internally represented as BYTES
1982 return ISA_TYPE_B;
1983 default:
1984 IGC_ASSERT(0);
1985 break;
1986 }
1987 IGC_ASSERT(0);
1988 return ISA_TYPE_F;
1989 }
1990
GetType(llvm::Type * type)1991 VISA_Type CShader::GetType(llvm::Type* type)
1992 {
1993 return IGC::GetType(type, GetContext());
1994 }
1995
GetNumElts(llvm::Type * type,bool isUniform)1996 uint32_t CShader::GetNumElts(llvm::Type* type, bool isUniform)
1997 {
1998 uint32_t numElts = isUniform ? 1 : numLanes(m_SIMDSize);
1999
2000 if (type->isVectorTy())
2001 {
2002 IGC_ASSERT(type->getContainedType(0)->isIntegerTy() || type->getContainedType(0)->isFloatingPointTy());
2003
2004 auto VT = cast<IGCLLVM::FixedVectorType>(type);
2005 numElts *= (uint16_t)VT->getNumElements();
2006 }
2007 else if (type->isStructTy())
2008 {
2009 auto& DL = entry->getParent()->getDataLayout();
2010 const StructLayout* SL = DL.getStructLayout(cast<StructType>(type));
2011 numElts *= (uint16_t)SL->getSizeInBytes();
2012 }
2013 return numElts;
2014 }
2015
GetImmediateVal(llvm::Value * Const)2016 uint64_t IGC::GetImmediateVal(llvm::Value* Const)
2017 {
2018 // Constant integer
2019 if (llvm::ConstantInt * CInt = llvm::dyn_cast<llvm::ConstantInt>(Const))
2020 {
2021 return CInt->getZExtValue();
2022 }
2023
2024 // Constant float/double
2025 if (llvm::ConstantFP * CFP = llvm::dyn_cast<llvm::ConstantFP>(Const))
2026 {
2027 APInt api = CFP->getValueAPF().bitcastToAPInt();
2028 return api.getZExtValue();
2029 }
2030
2031 // Null pointer
2032 if (llvm::isa<ConstantPointerNull>(Const))
2033 {
2034 return 0;
2035 }
2036
2037 IGC_ASSERT_MESSAGE(0, "Unhandled constant value!");
2038 return 0;
2039 }
2040
2041 /// IsRawAtomicIntrinsic - Check wether it's RAW atomic, which is optimized
2042 /// potentially by scalarized atomic operation.
IsRawAtomicIntrinsic(llvm::Value * V)2043 static bool IsRawAtomicIntrinsic(llvm::Value* V) {
2044 GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(V);
2045 if (!GII)
2046 return false;
2047
2048 switch (GII->getIntrinsicID()) {
2049 default:
2050 break;
2051 case GenISAIntrinsic::GenISA_intatomicraw:
2052 case GenISAIntrinsic::GenISA_floatatomicraw:
2053 case GenISAIntrinsic::GenISA_intatomicrawA64:
2054 case GenISAIntrinsic::GenISA_floatatomicrawA64:
2055 case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
2056 case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
2057 case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
2058 case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
2059 return true;
2060 }
2061
2062 return false;
2063 }
2064
2065 /// GetPreferredAlignmentOnUse - Return preferred alignment based on how the
2066 /// specified value is being used.
GetPreferredAlignmentOnUse(llvm::Value * V,WIAnalysis * WIA,CodeGenContext * pContext)2067 static e_alignment GetPreferredAlignmentOnUse(llvm::Value* V, WIAnalysis* WIA,
2068 CodeGenContext* pContext)
2069 {
2070 auto getAlign = [](Value* aV, WIAnalysis* aWIA, CodeGenContext* pCtx) -> e_alignment
2071 {
2072 // If uniform variables are once used by uniform loads, stores, or atomic
2073 // ops, they need being GRF aligned.
2074 for (auto UI = aV->user_begin(), UE = aV->user_end(); UI != UE; ++UI) {
2075 if (LoadInst* ST = dyn_cast<LoadInst>(*UI)) {
2076 Value* Ptr = ST->getPointerOperand();
2077 if (aWIA->isUniform(Ptr)) {
2078 if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pCtx))
2079 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2080 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2081 }
2082 }
2083 if (StoreInst* ST = dyn_cast<StoreInst>(*UI)) {
2084 Value* Ptr = ST->getPointerOperand();
2085 if (aWIA->isUniform(Ptr)) {
2086 if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pCtx))
2087 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2088 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2089 }
2090 }
2091
2092 // Last, check Gen intrinsic.
2093 GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(*UI);
2094 if (!GII) {
2095 continue;
2096 }
2097
2098 if (IsRawAtomicIntrinsic(GII)) {
2099 Value* Ptr = GII->getArgOperand(1);
2100 if (aWIA->isUniform(Ptr)) {
2101 if (PointerType* PtrTy = dyn_cast<PointerType>(Ptr->getType())) {
2102 if (IGC::isA64Ptr(PtrTy, pCtx))
2103 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_64WORD : EALIGN_32WORD;
2104 }
2105 return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2106 }
2107 }
2108 }
2109 return EALIGN_AUTO;
2110 };
2111
2112 e_alignment algn = getAlign(V, WIA, pContext);
2113 if (algn != EALIGN_AUTO) {
2114 return algn;
2115 }
2116
2117 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias))
2118 {
2119 // Check if this V is used as load/store's address via
2120 // inttoptr that is actually noop (aliased by dessa already).
2121 // x = ...
2122 // y = inttoptr x
2123 // load/store y
2124 // To make sure not to increase register pressure, only do it if y
2125 // is the sole use of x!
2126 if (V->hasOneUse())
2127 {
2128 // todo: use deSSA->isNoopAliaser() to check if it has become an alias
2129 User* U = V->user_back();
2130 IntToPtrInst* IPtr = dyn_cast<IntToPtrInst>(U);
2131 if (IPtr && isNoOpInst(IPtr, pContext))
2132 {
2133 algn = getAlign(IPtr, WIA, pContext);
2134 if (algn != EALIGN_AUTO) {
2135 return algn;
2136 }
2137 }
2138 }
2139 }
2140
2141 // Otherwise, naturally aligned is always assumed.
2142 return EALIGN_AUTO;
2143 }
2144
2145 /// GetPreferredAlignment - Return preferred alignment based on how the
2146 /// specified value is being defined/used.
GetPreferredAlignment(llvm::Value * V,WIAnalysis * WIA,CodeGenContext * pContext)2147 e_alignment IGC::GetPreferredAlignment(llvm::Value* V, WIAnalysis* WIA,
2148 CodeGenContext* pContext)
2149 {
2150 // So far, non-uniform variables are always naturally aligned.
2151 if (!WIA->isUniform(V))
2152 return EALIGN_AUTO;
2153
2154 // As the layout of argument is fixed, only naturally aligned could be
2155 // assumed.
2156 if (isa<Argument>(V))
2157 return CEncoder::GetCISADataTypeAlignment(GetType(V->getType(), pContext));
2158
2159 // For values not being mapped to variables directly, always assume
2160 // natually aligned.
2161 if (!isa<Instruction>(V))
2162 return EALIGN_AUTO;
2163
2164 // If uniform variables are results from uniform loads, they need being GRF
2165 // aligned.
2166 if (LoadInst * LD = dyn_cast<LoadInst>(V)) {
2167 Value* Ptr = LD->getPointerOperand();
2168 // For 64-bit load, we have to check how the loaded value being used.
2169 e_alignment Align = (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2170 if (IGC::isA64Ptr(cast<PointerType>(Ptr->getType()), pContext))
2171 Align = GetPreferredAlignmentOnUse(V, WIA, pContext);
2172 return (Align == EALIGN_AUTO) ? (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD : Align;
2173 }
2174
2175 // If uniform variables are results from uniform atomic ops, they need
2176 // being GRF aligned.
2177 if (IsRawAtomicIntrinsic(V)) {
2178 GenIntrinsicInst* GII = cast<GenIntrinsicInst>(V);
2179 Value* Ptr = GII->getArgOperand(1);
2180 // For 64-bit atomic ops, we have to check how the return value being
2181 // used.
2182 e_alignment Align = (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
2183 if (PointerType * PtrTy = dyn_cast<PointerType>(Ptr->getType())) {
2184 if (IGC::isA64Ptr(PtrTy, pContext))
2185 Align = GetPreferredAlignmentOnUse(V, WIA, pContext);
2186 }
2187 return (Align == EALIGN_AUTO) ? (pContext->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD : Align;
2188 }
2189
2190
2191 // Check how that value is used.
2192 return GetPreferredAlignmentOnUse(V, WIA, pContext);
2193 }
2194
LazyCreateCCTupleBackingVariable(CoalescingEngine::CCTuple * ccTuple,VISA_Type baseVisaType)2195 CVariable* CShader::LazyCreateCCTupleBackingVariable(
2196 CoalescingEngine::CCTuple* ccTuple,
2197 VISA_Type baseVisaType)
2198 {
2199 CVariable* var = NULL;
2200 auto it = ccTupleMapping.find(ccTuple);
2201 if (it != ccTupleMapping.end()) {
2202 var = ccTupleMapping[ccTuple];
2203 }
2204 else {
2205 auto mult = (m_SIMDSize == m_Platform->getMinDispatchMode()) ? 1 : 2;
2206 mult = CEncoder::GetCISADataTypeSize(baseVisaType) == 2 ? 1 : mult;
2207 unsigned int numRows = ccTuple->GetNumElements() * mult;
2208 const unsigned int denominator = CEncoder::GetCISADataTypeSize(ISA_TYPE_F);
2209 IGC_ASSERT(denominator);
2210 unsigned int numElts = numRows * getGRFSize() / denominator;
2211
2212 //int size = numLanes(m_SIMDSize) * ccTuple->GetNumElements();
2213 if (ccTuple->HasNonHomogeneousElements())
2214 {
2215 numElts += m_coalescingEngine->GetLeftReservedOffset(ccTuple->GetRoot(), m_SIMDSize) / denominator;
2216 numElts += m_coalescingEngine->GetRightReservedOffset(ccTuple->GetRoot(), m_SIMDSize) / denominator;
2217 }
2218
2219 IGC_ASSERT_MESSAGE((numElts < (UINT16_MAX)), "tuple byte size higher than 64k");
2220
2221 // create one
2222 var = GetNewVariable(
2223 (uint16_t)numElts,
2224 ISA_TYPE_F,
2225 (GetContext()->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD,
2226 false,
2227 m_numberInstance,
2228 "CCTuple");
2229 ccTupleMapping.insert(std::pair<CoalescingEngine::CCTuple*, CVariable*>(ccTuple, var));
2230 }
2231
2232 return var;
2233 }
2234
2235 /// F should be a non-kernel function.
2236 ///
2237 /// For a subroutine call, symbols (CVariables) are created as follows:
2238 ///
2239 /// (1) If subroutine returns non-void value, then a unified return CVarable
2240 /// is created to communicate between callee and caller. Function
2241 /// 'getOrCreateReturnSymbol' creates such a unique symbol (CVariable)
2242 /// on-demand. This return symbol is cached inside 'globalSymbolMapping'
2243 /// object and it is *NOT* part of local symbol table 'symbolMapping'.
2244 /// Currently return symbols are non-uniform.
2245 ///
2246 /// (2) Subroutine formal arguments are also created on-demand, which may be
2247 /// created from their first call sites or ahead of any call site. Symbols for
2248 /// subroutine formal arguments are also stored inside 'globalSymbolMapping'
2249 /// during entire module codegen. During each subroutine vISA emission,
2250 /// value-to-symbol mapping are also copied into 'symbolMapping' to allow
2251 /// EmitVISAPass to emit code in a uniform way.
2252 ///
2253 /// In some sense, all formal arguments are pre-allocated. Those symbols must be
2254 /// non-alias cvariable (ie root cvariable) as required by visa.
2255 ///
2256 /// Currently, all explicit arguments are non-uniform and most implicit
2257 /// arguments are uniform. Some implicit arguments may share the same symbol
2258 /// with their caller's implicit argument of the same kind. This is a subroutine
2259 /// optimization implemented in 'getOrCreateArgumentSymbol'.
2260 ///
BeginFunction(llvm::Function * F)2261 void CShader::BeginFunction(llvm::Function* F)
2262 {
2263 // TODO: merge InitEncoder with this function.
2264
2265 symbolMapping.clear();
2266 ccTupleMapping.clear();
2267 ConstantPool.clear();
2268
2269 bool useStackCall = m_FGA && m_FGA->useStackCall(F);
2270 if (useStackCall)
2271 {
2272 globalSymbolMapping.clear();
2273 encoder.BeginStackFunction(F);
2274 // create pre-defined r0
2275 m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
2276 encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);
2277 }
2278 else
2279 {
2280 encoder.BeginSubroutine(F);
2281 }
2282 // Set already created symbols for formal arguments.
2283 for (auto& Arg : F->args())
2284 {
2285 if (!Arg.use_empty())
2286 {
2287 // the treatment of argument is more complex for subroutine and simpler for stack-call function
2288 CVariable* Var = getOrCreateArgumentSymbol(&Arg, false, useStackCall);
2289 symbolMapping[&Arg] = Var;
2290
2291 if (Value * Node = m_deSSA->getRootValue(&Arg))
2292 {
2293 if (Node != (Value*)& Arg &&
2294 symbolMapping.count(Node) == 0)
2295 {
2296 CVariable* aV = Var;
2297 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2298 {
2299 aV = createAliasIfNeeded(Node, Var);
2300 }
2301 symbolMapping[Node] = aV;
2302 }
2303 }
2304 }
2305 }
2306
2307 CreateAliasVars();
2308 PreCompileFunction(*F);
2309 }
2310
2311 // This method split payload interpolations from the shader into another compilation unit
SplitPayloadFromShader(llvm::Function * F)2312 void CShader::SplitPayloadFromShader(llvm::Function* F)
2313 {
2314 encoder.BeginPayloadSection();
2315 }
2316
2317 /// This method is used to create the vISA variable for function F's formal return value
getOrCreateReturnSymbol(llvm::Function * F)2318 CVariable* CShader::getOrCreateReturnSymbol(llvm::Function* F)
2319 {
2320 IGC_ASSERT_MESSAGE(nullptr != F, "null function");
2321 auto it = globalSymbolMapping.find(F);
2322 if (it != globalSymbolMapping.end())
2323 {
2324 return it->second;
2325 }
2326
2327 auto retType = F->getReturnType();
2328 IGC_ASSERT(nullptr != retType);
2329 if (F->isDeclaration() || retType->isVoidTy())
2330 return nullptr;
2331
2332 IGC_ASSERT(retType->isSingleValueType());
2333 VISA_Type type = GetType(retType);
2334 uint16_t nElts = (uint16_t)GetNumElts(retType, false);
2335 e_alignment align = getGRFAlignment();
2336 CVariable* var = GetNewVariable(
2337 nElts, type, align, false, m_numberInstance,
2338 CName(F->getName(), "_RETVAL"));
2339 globalSymbolMapping.insert(std::make_pair(F, var));
2340 return var;
2341 }
2342
2343 /// This method is used to create the vISA variable for function F's formal argument
getOrCreateArgumentSymbol(Argument * Arg,bool ArgInCallee,bool useStackCall)2344 CVariable* CShader::getOrCreateArgumentSymbol(
2345 Argument* Arg,
2346 bool ArgInCallee,
2347 bool useStackCall)
2348 {
2349 llvm::DenseMap<llvm::Value*, CVariable*>* pSymMap = &globalSymbolMapping;
2350 IGC_ASSERT(nullptr != pSymMap);
2351 auto it = pSymMap->find(Arg);
2352 if (it != pSymMap->end())
2353 {
2354 return it->second;
2355 }
2356
2357 CVariable* var = nullptr;
2358
2359 // Stack call does not use implicit args
2360 if (!useStackCall)
2361 {
2362 // An explicit argument is not uniform, and for an implicit argument, it
2363 // is predefined. Note that it is not necessarily uniform.
2364 Function* F = Arg->getParent();
2365 ImplicitArgs implicitArgs(*F, m_pMdUtils);
2366 unsigned numImplicitArgs = implicitArgs.size();
2367 unsigned numPushArgsEntry = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
2368 unsigned numPushArgs = (isEntryFunc(m_pMdUtils, F) && !isNonEntryMultirateShader(F) ? numPushArgsEntry : 0);
2369 IGC_ASSERT_MESSAGE(F->arg_size() >= (numImplicitArgs + numPushArgs), "Function arg size does not match meta data and push args.");
2370 unsigned numFuncArgs = F->arg_size() - numImplicitArgs - numPushArgs;
2371
2372 llvm::Function::arg_iterator arg = F->arg_begin();
2373 std::advance(arg, numFuncArgs);
2374 for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg)
2375 {
2376 Argument* argVal = &(*arg);
2377 if (argVal == Arg)
2378 {
2379 ImplicitArg implictArg = implicitArgs[i];
2380 auto ArgType = implictArg.getArgType();
2381
2382 // Just reuse the kernel arguments for the following.
2383 // Note that for read only general arguments, we may do similar
2384 // optimization, with some advanced analysis.
2385 if (ArgType == ImplicitArg::ArgType::R0 ||
2386 ArgType == ImplicitArg::ArgType::PAYLOAD_HEADER ||
2387 ArgType == ImplicitArg::ArgType::WORK_DIM ||
2388 ArgType == ImplicitArg::ArgType::NUM_GROUPS ||
2389 ArgType == ImplicitArg::ArgType::GLOBAL_SIZE ||
2390 ArgType == ImplicitArg::ArgType::LOCAL_SIZE ||
2391 ArgType == ImplicitArg::ArgType::ENQUEUED_LOCAL_WORK_SIZE ||
2392 ArgType == ImplicitArg::ArgType::CONSTANT_BASE ||
2393 ArgType == ImplicitArg::ArgType::GLOBAL_BASE ||
2394 ArgType == ImplicitArg::ArgType::PRIVATE_BASE ||
2395 ArgType == ImplicitArg::ArgType::PRINTF_BUFFER)
2396 {
2397 Function& K = *m_FGA->getSubGroupMap(F);
2398 ImplicitArgs IAs(K, m_pMdUtils);
2399 uint32_t nIAs = (uint32_t)IAs.size();
2400 uint32_t iArgIx = IAs.getArgIndex(ArgType);
2401 uint32_t argIx = (uint32_t)K.arg_size() - nIAs + iArgIx;
2402 if (isEntryFunc(m_pMdUtils, &K) && !isNonEntryMultirateShader(&K)) {
2403 argIx = argIx - numPushArgsEntry;
2404 }
2405 Function::arg_iterator arg = K.arg_begin();
2406 for (uint32_t j = 0; j < argIx; ++j, ++arg);
2407 Argument* kerArg = &(*arg);
2408
2409 // Pre-condition: all kernel arguments have been created already.
2410 IGC_ASSERT(pSymMap->count(kerArg));
2411 return (*pSymMap)[kerArg];
2412 }
2413 else
2414 {
2415 bool isUniform = WIAnalysis::isDepUniform(implictArg.getDependency());
2416 uint16_t nbElements = (uint16_t)implictArg.getNumberElements();
2417
2418
2419 var = GetNewVariable(nbElements,
2420 implictArg.getVISAType(*m_DL),
2421 implictArg.getAlignType(*m_DL), isUniform,
2422 isUniform ? 1 : m_numberInstance,
2423 argVal->getName());
2424 }
2425 break;
2426 }
2427 }
2428 }
2429
2430 // This is not implicit.
2431 if (var == nullptr)
2432 {
2433 // GetPreferredAlignment treats all arguments as kernel ones, which have
2434 // predefined alignments; but this is not true for subroutines.
2435 // Conservatively use GRF aligned.
2436 e_alignment align = getGRFAlignment();
2437
2438 bool isUniform = false;
2439 if (!ArgInCallee) {
2440 // Arg is for the current function and m_WI is available
2441 isUniform = m_WI->isUniform(&*Arg);
2442 }
2443
2444 VISA_Type type = GetType(Arg->getType());
2445 uint16_t nElts = (uint16_t)GetNumElts(Arg->getType(), isUniform);
2446 var = GetNewVariable(nElts, type, align, isUniform, m_numberInstance, Arg->getName());
2447 }
2448 pSymMap->insert(std::make_pair(Arg, var));
2449 return var;
2450 }
2451
UpdateSymbolMap(llvm::Value * v,CVariable * CVar)2452 void CShader::UpdateSymbolMap(llvm::Value* v, CVariable* CVar)
2453 {
2454 symbolMapping[v] = CVar;
2455 }
2456
2457 // Reuse a varable in the following case
2458 // %x = op1...
2459 // %y = op2 (%x, ...)
2460 // with some constraints:
2461 // - %x and %y belong to the same block
2462 // - %x and %y do not live out of this block
2463 // - %x does not interfere with %y
2464 // - %x is not phi
2465 // - %y has no phi use
2466 // - %x and %y have the same uniformity, and the same size
2467 // - %x is not an alias
2468 // - alignment is OK
2469 //
reuseSourceVar(Instruction * UseInst,Instruction * DefInst,e_alignment preferredAlign)2470 CVariable* CShader::reuseSourceVar(Instruction* UseInst, Instruction* DefInst,
2471 e_alignment preferredAlign)
2472 {
2473 // Only when DefInst has been assigned a CVar.
2474 IGC_ASSERT(nullptr != DefInst);
2475 IGC_ASSERT(nullptr != UseInst);
2476 auto It = symbolMapping.find(DefInst);
2477 if (It == symbolMapping.end())
2478 return nullptr;
2479
2480 // If the def is an alias/immediate, then do not reuse.
2481 // TODO: allow alias.
2482 CVariable* DefVar = It->second;
2483 if (DefVar->GetAlias() || DefVar->IsImmediate())
2484 return nullptr;
2485
2486 // LLVM IR level checks and RPE based heuristics.
2487 if (!m_VRA->checkDefInst(DefInst, UseInst, m_deSSA->getLiveVars()))
2488 return nullptr;
2489
2490 // Do not reuse when variable size exceeds the threshold.
2491 //
2492 // TODO: If vISA global RA can better deal with fragmentation, this will
2493 // become unnecessary.
2494 //
2495 // TODO: Remove this check if register pressure is low, or very high.
2496 //
2497 unsigned Threshold = IGC_GET_FLAG_VALUE(VariableReuseByteSize);
2498 if (DefVar->GetSize() > Threshold)
2499 return nullptr;
2500
2501 // Only reuse when they have the same uniformness.
2502 if (GetIsUniform(UseInst) != GetIsUniform(DefInst))
2503 return nullptr;
2504
2505 // Check alignments. If UseInst has a stricter alignment then do not reuse.
2506 e_alignment DefAlign = DefVar->GetAlign();
2507 e_alignment UseAlign = preferredAlign;
2508 if (DefAlign == EALIGN_AUTO)
2509 {
2510 VISA_Type Ty = GetType(DefInst->getType());
2511 DefAlign = CEncoder::GetCISADataTypeAlignment(Ty);
2512 }
2513 if (UseAlign == EALIGN_AUTO)
2514 {
2515 VISA_Type Ty = GetType(UseInst->getType());
2516 UseAlign = CEncoder::GetCISADataTypeAlignment(Ty);
2517 }
2518 if (UseAlign > DefAlign)
2519 return nullptr;
2520
2521 // Reuse this source when types match.
2522 if (DefInst->getType() == UseInst->getType())
2523 {
2524 return DefVar;
2525 }
2526
2527 // Check cast instructions and create an alias if necessary.
2528 if (CastInst * CI = dyn_cast<CastInst>(UseInst))
2529 {
2530 VISA_Type UseTy = GetType(UseInst->getType());
2531 if (UseTy == DefVar->GetType())
2532 {
2533 return DefVar;
2534 }
2535
2536 if (encoder.GetCISADataTypeSize(UseTy) != encoder.GetCISADataTypeSize(DefVar->GetType()))
2537 {
2538 // trunc/zext is needed, reuse not possible
2539 // this extra check is needed because in code gen we implicitly convert all private pointers
2540 // to 32-bit when LLVM assumes it's 64-bit based on DL
2541 return nullptr;
2542 }
2543
2544 // TODO: allow %y = trunc i32 %x to i8
2545 IGC_ASSERT(CI->isNoopCast(*m_DL));
2546 return GetNewAlias(DefVar, UseTy, 0, 0);
2547 }
2548
2549 // No reuse yet.
2550 return nullptr;;
2551 }
2552
GetSymbolFromSource(Instruction * UseInst,e_alignment preferredAlign)2553 CVariable* CShader::GetSymbolFromSource(Instruction* UseInst,
2554 e_alignment preferredAlign)
2555 {
2556 if (UseInst->isBinaryOp() || isa<SelectInst>(UseInst))
2557 {
2558 if (!m_VRA->checkUseInst(UseInst, m_deSSA->getLiveVars()))
2559 return nullptr;
2560
2561 for (unsigned i = 0; i < UseInst->getNumOperands(); ++i)
2562 {
2563 Value* Opnd = UseInst->getOperand(i);
2564 auto DefInst = dyn_cast<Instruction>(Opnd);
2565 // Only for non-uniform binary instructions.
2566 if (!DefInst || GetIsUniform(DefInst))
2567 continue;
2568
2569 if (IsCoalesced(DefInst))
2570 {
2571 continue;
2572 }
2573
2574 CVariable* Var = reuseSourceVar(UseInst, DefInst, preferredAlign);
2575 if (Var)
2576 return Var;
2577 }
2578 return nullptr;
2579 }
2580 else if (auto CI = dyn_cast<CastInst>(UseInst))
2581 {
2582 if (!m_VRA->checkUseInst(UseInst, m_deSSA->getLiveVars()))
2583 return nullptr;
2584
2585 Value* Opnd = UseInst->getOperand(0);
2586 auto DefInst = dyn_cast<Instruction>(Opnd);
2587 if (!DefInst)
2588 return nullptr;
2589
2590 if (!IsCoalesced(DefInst))
2591 {
2592 return nullptr;
2593 }
2594
2595 // TODO: allow %y = trunc i32 %x to i16
2596 if (!CI->isNoopCast(*m_DL))
2597 return nullptr;
2598
2599 // WA: vISA does not optimize the following reuse well yet.
2600 // %398 = bitcast i16 %vCastload to <2 x i8>
2601 // produces
2602 // mov (16) r7.0<1>:w r18.0<2;1,0>:w
2603 // mov (16) r7.0<1>:b r7.0<2;1,0>:b
2604 // mov (16) r20.0<1>:f r7.0<8;8,1>:ub
2605 // not
2606 // mov (16) r7.0<1>:w r18.0<2;1,0>:w
2607 // mov (16) r20.0<1>:f r7.0<2;1,0>:ub
2608 //
2609 if (CI->getOpcode() == Instruction::BitCast)
2610 {
2611 if (GetScalarTypeSizeInRegisterInBits(CI->getSrcTy()) !=
2612 GetScalarTypeSizeInRegisterInBits(CI->getDestTy()))
2613 return nullptr;
2614 }
2615
2616 return reuseSourceVar(UseInst, DefInst, preferredAlign);
2617 }
2618
2619 // TODO, allow insert element/value, gep, intrinsic calls etc..
2620 //
2621 // No source for reuse.
2622 return nullptr;
2623 }
2624
EvaluateSIMDConstExpr(Value * C)2625 unsigned int CShader::EvaluateSIMDConstExpr(Value* C)
2626 {
2627 if (BinaryOperator * op = dyn_cast<BinaryOperator>(C))
2628 {
2629 switch (op->getOpcode())
2630 {
2631 case Instruction::Add:
2632 return EvaluateSIMDConstExpr(op->getOperand(0)) + EvaluateSIMDConstExpr(op->getOperand(1));
2633 case Instruction::Mul:
2634 return EvaluateSIMDConstExpr(op->getOperand(0)) * EvaluateSIMDConstExpr(op->getOperand(1));
2635 case Instruction::Shl:
2636 return EvaluateSIMDConstExpr(op->getOperand(0)) << EvaluateSIMDConstExpr(op->getOperand(1));
2637 default:
2638 break;
2639 }
2640 }
2641 if (llvm::GenIntrinsicInst * genInst = dyn_cast<GenIntrinsicInst>(C))
2642 {
2643 if (genInst->getIntrinsicID() == GenISAIntrinsic::GenISA_simdSize)
2644 {
2645 return numLanes(m_dispatchSize);
2646
2647 }
2648 }
2649 if (ConstantInt * constValue = dyn_cast<ConstantInt>(C))
2650 {
2651 return (unsigned int)constValue->getZExtValue();
2652 }
2653 IGC_ASSERT_MESSAGE(0, "unknow SIMD constant expression");
2654 return 0;
2655 }
2656
GetSymbol(llvm::Value * value,bool fromConstantPool)2657 CVariable* CShader::GetSymbol(llvm::Value* value, bool fromConstantPool)
2658 {
2659 CVariable* var = nullptr;
2660
2661 // Symbol mappings for struct types
2662 if (value->getType()->isStructTy())
2663 {
2664 return GetStructVariable(value);
2665 }
2666
2667 if (Constant * C = llvm::dyn_cast<llvm::Constant>(value))
2668 {
2669 // Check for function and global symbols
2670 {
2671 // Function Pointer
2672 auto isFunctionType = [this](Value* value)->bool
2673 {
2674 return isa<GlobalValue>(value) &&
2675 value->getType()->isPointerTy() &&
2676 value->getType()->getPointerElementType()->isFunctionTy();
2677 };
2678 // Global Variable/Constant
2679 auto isGlobalVarType = [this](Value* value)->bool
2680 {
2681 return isa<GlobalVariable>(value) &&
2682 m_ModuleMetadata->inlineProgramScopeOffsets.count(cast<GlobalVariable>(value)) > 0;
2683 };
2684
2685 bool isVecType = value->getType()->isVectorTy();
2686 bool isFunction = false;
2687 bool isGlobalVar = false;
2688
2689 if (isVecType)
2690 {
2691 Value* element = C->getAggregateElement((unsigned)0);
2692 if (isFunctionType(element))
2693 isFunction = true;
2694 else if (isGlobalVarType(element))
2695 isGlobalVar = true;
2696 }
2697 else if (isFunctionType(value))
2698 {
2699 isFunction = true;
2700 }
2701 else if (isGlobalVarType(value))
2702 {
2703 isGlobalVar = true;
2704 }
2705
2706 if (isFunction || isGlobalVar)
2707 {
2708 auto it = symbolMapping.find(value);
2709 if (it != symbolMapping.end())
2710 {
2711 return it->second;
2712 }
2713 const auto &valName = value->getName();
2714 if (isVecType)
2715 {
2716 // Map the entire vector value to the CVar
2717 unsigned numElements = (unsigned)cast<IGCLLVM::FixedVectorType>(value->getType())->getNumElements();
2718 var = GetNewVariable(numElements, ISA_TYPE_UQ,
2719 (GetContext()->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD,
2720 WIBaseClass::UNIFORM_GLOBAL, 1, valName);
2721 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2722
2723 // Copy over each element
2724 for (unsigned i = 0; i < numElements; i++)
2725 {
2726 Value* element = C->getAggregateElement(i);
2727 CVariable* elementV = GetSymbol(element);
2728 CVariable* offsetV = GetNewAlias(var, ISA_TYPE_UQ, i * var->GetElemSize(), 1);
2729 encoder.Copy(offsetV, elementV);
2730 encoder.Push();
2731 }
2732 return var;
2733 }
2734 else
2735 {
2736 var = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, WIBaseClass::UNIFORM_GLOBAL, 1, valName);
2737 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2738 return var;
2739 }
2740 }
2741 }
2742
2743 if (fromConstantPool) {
2744 CVariable* cvar = ConstantPool.lookup(C);
2745 if (cvar)
2746 return cvar;
2747 // Generate constant initialization.
2748 SEncoderState S = encoder.CopyEncoderState();
2749 encoder.Push();
2750 cvar = GetConstant(C);
2751 if (!C->getType()->isVectorTy()) {
2752 CVariable* dst = GetNewVector(C);
2753 encoder.Copy(dst, cvar);
2754 encoder.Push();
2755 cvar = dst;
2756 }
2757 encoder.SetEncoderState(S);
2758 addConstantInPool(C, cvar);
2759 return cvar;
2760 }
2761 var = GetConstant(C);
2762 return var;
2763 }
2764
2765 else if (Instruction * inst = dyn_cast<Instruction>(value))
2766 {
2767 if (m_CG->SIMDConstExpr(inst))
2768 {
2769 return ImmToVariable(EvaluateSIMDConstExpr(inst), ISA_TYPE_D);
2770 }
2771 }
2772
2773 auto it = symbolMapping.find(value);
2774
2775 // mapping exists, return
2776 if (it != symbolMapping.end())
2777 {
2778 return it->second;
2779 }
2780
2781 if (IGC_IS_FLAG_ENABLED(EnableDeSSAAlias) &&
2782 m_deSSA && value != m_deSSA->getNodeValue(value))
2783 {
2784 // Generate CVariable alias.
2785 // Value and its aliasee must be of the same size.
2786 Value* nodeVal = m_deSSA->getNodeValue(value);
2787 IGC_ASSERT_MESSAGE(nodeVal != value, "ICE: value must be aliaser!");
2788
2789 // For non node value, get symbol for node value first.
2790 // Then, get an alias to that node value.
2791 CVariable* Base = GetSymbol(nodeVal);
2792 CVariable* AliasVar = createAliasIfNeeded(value, Base);
2793 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, AliasVar));
2794 return AliasVar;
2795 }
2796
2797 if (!isa<InsertElementInst>(value) && value->hasOneUse()) {
2798 auto IEI = dyn_cast<InsertElementInst>(value->user_back());
2799 if (IEI && CanTreatScalarSourceAsAlias(IEI)) {
2800 CVariable* Var = GetSymbol(IEI);
2801 llvm::ConstantInt* Idx = llvm::cast<llvm::ConstantInt>(IEI->getOperand(2));
2802 unsigned short NumElts = 1;
2803 unsigned EltSz = CEncoder::GetCISADataTypeSize(GetType(IEI->getType()->getScalarType()));
2804 unsigned Offset = unsigned(Idx->getZExtValue() * EltSz);
2805 if (!Var->IsUniform()) {
2806 NumElts = numLanes(m_SIMDSize);
2807 Offset *= Var->getOffsetMultiplier() * numLanes(m_SIMDSize);
2808 }
2809 CVariable* Alias = GetNewAlias(Var, Var->GetType(), (uint16_t)Offset, NumElts);
2810 // FIXME: It makes no sense to map it as this `value` is
2811 // single-used implied from CanTreatScalarSourceAsAlias().
2812 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, Alias));
2813 return Alias;
2814 }
2815 }
2816
2817 if (llvm::ExtractElementInst * EEI = llvm::dyn_cast<ExtractElementInst>(value))
2818 {
2819 if (CanTreatAsAlias(EEI))
2820 {
2821 llvm::ConstantInt* const pConstElem = llvm::dyn_cast<llvm::ConstantInt>(EEI->getIndexOperand());
2822 IGC_ASSERT(nullptr != pConstElem);
2823 Value* vecOperand = EEI->getVectorOperand();
2824 // need to call GetSymbol() before AdjustExtractIndex(), since
2825 // GetSymbol may update mask of the vector operand.
2826 CVariable* vec = GetSymbol(vecOperand);
2827
2828 uint element = AdjustExtractIndex(vecOperand, (uint16_t)pConstElem->getZExtValue());
2829 IGC_ASSERT_MESSAGE((element < (UINT16_MAX)), "ExtractElementInst element index > higher than 64k");
2830
2831 // see if distinct CVariables were created during vector bitcast copy
2832 if (auto vectorBCI = dyn_cast<BitCastInst>(vecOperand))
2833 {
2834 CVariable* EEIVar = getCVarForVectorBCI(vectorBCI, element);
2835 if (EEIVar)
2836 {
2837 return EEIVar;
2838 }
2839 }
2840
2841 uint offset = 0;
2842 unsigned EltSz = CEncoder::GetCISADataTypeSize(GetType(EEI->getType()));
2843 if (GetIsUniform(EEI->getOperand(0)))
2844 {
2845 offset = int_cast<unsigned int>(element * EltSz);
2846 }
2847 else
2848 {
2849 offset = int_cast<unsigned int>(vec->getOffsetMultiplier() * element * numLanes(m_SIMDSize) * EltSz);
2850 }
2851 IGC_ASSERT_MESSAGE((offset < (UINT16_MAX)), "computed alias offset higher than 64k");
2852
2853 // You'd expect the number of elements of the extracted variable to be
2854 // vec->GetNumberElement() / vecOperand->getType()->getVectorNumElements().
2855 // However, vec->GetNumberElement() is not always what you'd expect it to be because of
2856 // the pruning code in GetNbVectorElement().
2857 // So, recompute the number of elements from scratch.
2858 uint16_t numElements = 1;
2859 if (!vec->IsUniform())
2860 {
2861 numElements = numLanes(m_SIMDSize);
2862 }
2863 var = GetNewAlias(vec, vec->GetType(), (uint16_t)offset, numElements);
2864 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2865 return var;
2866 }
2867 }
2868
2869 if (GenIntrinsicInst * genInst = dyn_cast<GenIntrinsicInst>(value))
2870 {
2871 if (VMECoalescePattern(genInst))
2872 {
2873 auto* Sym = GetSymbol(genInst->getOperand(0));
2874 auto* Alias = GetNewAlias(Sym, Sym->GetType(), 0, Sym->GetNumberElement());
2875 symbolMapping.insert(std::pair<Value*, CVariable*>(value, Alias));
2876 return Alias;
2877 }
2878 if (genInst->getIntrinsicID() == GenISAIntrinsic::GenISA_UpdateDiscardMask)
2879 {
2880 IGC_ASSERT(GetShaderType() == ShaderType::PIXEL_SHADER);
2881 return (static_cast<CPixelShader*>(this))->GetDiscardPixelMask();
2882 }
2883 }
2884
2885 if (m_coalescingEngine) {
2886 CoalescingEngine::CCTuple* ccTuple = m_coalescingEngine->GetValueCCTupleMapping(value);
2887 if (ccTuple) {
2888 VISA_Type type = GetType(value->getType());
2889 CVariable* var = LazyCreateCCTupleBackingVariable(ccTuple, type);
2890
2891 int mult = 1;
2892 if (CEncoder::GetCISADataTypeSize(type) == 2 && m_SIMDSize == SIMDMode::SIMD8)
2893 {
2894 mult = 2;
2895 }
2896
2897 //FIXME: Could improve by copying types from value
2898
2899 unsigned EltSz = CEncoder::GetCISADataTypeSize(type);
2900 int offset = int_cast<int>(mult * (m_coalescingEngine->GetValueOffsetInCCTuple(value) - ccTuple->GetLeftBound()) *
2901 numLanes(m_SIMDSize) * EltSz);
2902
2903 if (ccTuple->HasNonHomogeneousElements())
2904 {
2905 offset += m_coalescingEngine->GetLeftReservedOffset(ccTuple->GetRoot(), m_SIMDSize);
2906 }
2907
2908 TODO("NumElements in this alias is 0 to preserve previous behavior. I have no idea what it should be.");
2909 IGC_ASSERT_MESSAGE((offset < (UINT16_MAX)), "alias offset > higher than 64k");
2910 CVariable* newVar = GetNewAlias(var, type, (uint16_t)offset, 0);
2911 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, newVar));
2912 return newVar;
2913 }
2914 }
2915
2916 // If we use a value which is not marked as needed by the pattern matching, then something went wrong
2917 IGC_ASSERT(!isa<Instruction>(value) || isa<PHINode>(value) || m_CG->NeedInstruction(cast<Instruction>(*value)));
2918
2919 e_alignment preferredAlign = GetPreferredAlignment(value, m_WI, GetContext());
2920
2921 // simple de-ssa, always creates a new svar, and return
2922 if (!m_deSSA)
2923 {
2924 var = GetNewVector(value, preferredAlign);
2925 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2926 return var;
2927 }
2928
2929 llvm::Value* rootValue = m_deSSA->getRootValue(value, &preferredAlign);
2930 // belong to a congruent class
2931 if (rootValue)
2932 {
2933 it = symbolMapping.find(rootValue);
2934 if (it != symbolMapping.end())
2935 {
2936 var = it->second;
2937 CVariable* aV = var;
2938 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2939 {
2940 aV = createAliasIfNeeded(value, var);
2941 }
2942 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, aV));
2943 /*
2944 * When we don't scalarize vectors, vector may come from phi/insert-element
2945 * We cannot adjust extract-mask
2946 */
2947 if (value->getType()->isVectorTy())
2948 {
2949 extractMasks.erase(value);
2950 }
2951 return aV;
2952 }
2953 }
2954
2955 if (IGC_IS_FLAG_ENABLED(EnableVariableReuse))
2956 {
2957 // Only for instructions and do not reuse flag variables.
2958 if (!value->getType()->getScalarType()->isIntegerTy(1))
2959 {
2960 if (auto Inst = dyn_cast<Instruction>(value))
2961 {
2962 var = GetSymbolFromSource(Inst, preferredAlign);
2963 }
2964 }
2965 }
2966
2967 // need to create a new mapping
2968 if (!var)
2969 {
2970 var = GetNewVector(value, preferredAlign);
2971 }
2972
2973 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(value, var));
2974 if (rootValue)
2975 {
2976 CVariable* aV = var;
2977 if (IGC_GET_FLAG_VALUE(EnableDeSSAAlias) >= 2)
2978 {
2979 aV = createAliasIfNeeded(rootValue, var);
2980 }
2981 symbolMapping.insert(std::pair<llvm::Value*, CVariable*>(rootValue, aV));
2982 }
2983 return var;
2984 }
2985
2986 /// WHEN implement vector-coalescing, want to be more conservative in
2987 /// treating extract-element as alias in order to reduce the complexity of
2988 /// the problem
CanTreatAsAlias(llvm::ExtractElementInst * inst)2989 bool CShader::CanTreatAsAlias(llvm::ExtractElementInst* inst)
2990 {
2991 llvm::Value* idxSrc = inst->getIndexOperand();
2992 if (!isa<llvm::ConstantInt>(idxSrc))
2993 {
2994 return false;
2995 }
2996
2997 llvm::Value* vecSrc = inst->getVectorOperand();
2998 if (isa<llvm::InsertElementInst>(vecSrc))
2999 {
3000 return false;
3001 }
3002
3003 if (IsCoalesced(inst) || IsCoalesced(vecSrc))
3004 {
3005 return false;
3006 }
3007
3008 for (auto I = vecSrc->user_begin(), E = vecSrc->user_end(); I != E; ++I)
3009 {
3010 llvm::ExtractElementInst* extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I);
3011 if (!extract)
3012 {
3013 return false;
3014 }
3015 if (!isa<ConstantInt>(extract->getIndexOperand()))
3016 {
3017 return false;
3018 }
3019 }
3020
3021 return true;
3022 }
3023
isUsedInPHINode(llvm::Instruction * I)3024 static bool isUsedInPHINode(llvm::Instruction* I) {
3025 for (auto U : I->users()) {
3026 if (isa<PHINode>(U))
3027 return true;
3028 if (auto BC = dyn_cast<BitCastInst>(U)) {
3029 if (isUsedInPHINode(BC))
3030 return true;
3031 }
3032 if (auto IEI = dyn_cast<InsertElementInst>(U)) {
3033 if (isUsedInPHINode(IEI))
3034 return true;
3035 }
3036 }
3037 return false;
3038 }
3039
CanTreatScalarSourceAsAlias(llvm::InsertElementInst * IEI)3040 bool CShader::CanTreatScalarSourceAsAlias(llvm::InsertElementInst* IEI) {
3041 // Skip if it's not enabled.
3042 if (!IGC_IS_FLAG_ENABLED(EnableInsertElementScalarCoalescing))
3043 return false;
3044 // Skip if IEI is used in PHI.
3045 // FIXME: Should skip PHI if this IEI is from its backedge.
3046 if (isUsedInPHINode(IEI))
3047 return false;
3048 // Skip if the index is not constant.
3049 llvm::ConstantInt* IdxOp = dyn_cast<llvm::ConstantInt>(IEI->getOperand(2));
3050 if (!IdxOp)
3051 return false;
3052 // Skip if the scalar operand is not single-used.
3053 Value* ScalarOp = IEI->getOperand(1);
3054 if (!ScalarOp->hasOneUse())
3055 return false;
3056 // Skip if the scalar operand is not an instruction.
3057 if (!isa<llvm::Instruction>(ScalarOp))
3058 return false;
3059 // Skip the scalar operand may be treated as alias.
3060 if (llvm::dyn_cast<llvm::PHINode>(ScalarOp))
3061 return false;
3062 if (auto EEI = llvm::dyn_cast<llvm::ExtractElementInst>(ScalarOp)) {
3063 if (CanTreatAsAlias(EEI))
3064 return false;
3065 }
3066 auto Def = cast<llvm::Instruction>(ScalarOp);
3067 auto BB = Def->getParent();
3068 // Skip that scalar value is not defined locally.
3069 if (BB != IEI->getParent())
3070 return false;
3071 if (!m_deSSA)
3072 return isa<llvm::UndefValue>(IEI->getOperand(0));
3073 // Since we will define that vector element ahead from the previous
3074 // position, check whether such hoisting is safe.
3075 auto BI = std::prev(llvm::BasicBlock::reverse_iterator(IEI->getIterator()));
3076 auto BE = std::prev(llvm::BasicBlock::reverse_iterator(Def->getIterator()));
3077 auto Idx = IdxOp->getZExtValue();
3078 for (; BI != BE && BI != BB->rend(); ++BI) {
3079 if (&*BI != IEI)
3080 continue;
3081 Value* VecOp = IEI->getOperand(0);
3082 // If the source operand is `undef`, `insertelement` could be always
3083 // treated as alias (of the destination of the scalar operand).
3084 if (isa<UndefValue>(VecOp))
3085 return true;
3086 Value* SrcRoot = m_deSSA->getRootValue(VecOp);
3087 Value* DstRoot = m_deSSA->getRootValue(IEI);
3088 // `dst` vector will be copied from `src` vector if they won't coalese.
3089 // Hoisting this insertion is unsafe.
3090 if (SrcRoot != DstRoot)
3091 return false;
3092 IEI = dyn_cast<llvm::InsertElementInst>(VecOp);
3093 // However, if `src` is not defined through `insertelement`, it's still
3094 // unsafe to hoist this insertion.
3095 if (!IEI)
3096 return false;
3097 // If that's dynamically indexed insertion or insertion on the same
3098 // index, it's unsafe to hoist this insertion.
3099 llvm::ConstantInt* IdxOp = dyn_cast<llvm::ConstantInt>(IEI->getOperand(2));
3100 if (!IdxOp)
3101 return false;
3102 if (IdxOp->getZExtValue() == Idx)
3103 return false;
3104 }
3105 return true;
3106 }
3107
HasBecomeNoop(Instruction * inst)3108 bool CShader::HasBecomeNoop(Instruction* inst) {
3109 return m_VRA->m_HasBecomeNoopInsts.count(inst);
3110 }
3111
IsCoalesced(Value * V)3112 bool CShader::IsCoalesced(Value* V) {
3113 if ((m_VRA && m_VRA->isAliasedValue(V)) ||
3114 (m_deSSA && m_deSSA->getRootValue(V)) ||
3115 (m_coalescingEngine && m_coalescingEngine->GetValueCCTupleMapping(V)))
3116 {
3117 return true;
3118 }
3119 return false;
3120 }
3121
3122 #define SET_INTRINSICS() \
3123 GenISAIntrinsic::GenISA_setMessagePhaseX: \
3124 case GenISAIntrinsic::GenISA_setMessagePhaseXV: \
3125 case GenISAIntrinsic::GenISA_setMessagePhase: \
3126 case GenISAIntrinsic::GenISA_setMessagePhaseV: \
3127 case GenISAIntrinsic::GenISA_simdSetMessagePhase: \
3128 case GenISAIntrinsic::GenISA_simdSetMessagePhaseV
3129
IsSetMessageIntrinsic(GenIntrinsicInst * I)3130 static bool IsSetMessageIntrinsic(GenIntrinsicInst* I)
3131 {
3132 switch (I->getIntrinsicID())
3133 {
3134 case SET_INTRINSICS():
3135 return true;
3136 default:
3137 return false;
3138 }
3139 }
3140
VMECoalescePattern(GenIntrinsicInst * genInst)3141 bool CShader::VMECoalescePattern(GenIntrinsicInst* genInst)
3142 {
3143 if (!IsSetMessageIntrinsic(genInst))
3144 return false;
3145
3146 if (IsCoalesced(genInst))
3147 {
3148 return false;
3149 }
3150
3151 if (GenIntrinsicInst * argInst = dyn_cast<GenIntrinsicInst>(genInst->getOperand(0)))
3152 {
3153 if (IsCoalesced(argInst))
3154 {
3155 return false;
3156 }
3157
3158 switch (argInst->getIntrinsicID())
3159 {
3160 case GenISAIntrinsic::GenISA_createMessagePhases:
3161 case GenISAIntrinsic::GenISA_createMessagePhasesV:
3162 case GenISAIntrinsic::GenISA_createMessagePhasesNoInit:
3163 case GenISAIntrinsic::GenISA_createMessagePhasesNoInitV:
3164 case SET_INTRINSICS():
3165 {
3166 bool OneUse = argInst->hasOneUse();
3167
3168 if (OneUse)
3169 {
3170 return (argInst->getParent() == genInst->getParent());
3171 }
3172
3173 // If we don't succeed in the quick check above, also match if there
3174 // is a single set intrinsic and all of the other users dominate the
3175 // set intrinsic in the block.
3176
3177 SmallPtrSet<Value*, 4> Users(argInst->user_begin(), argInst->user_end());
3178
3179 uint32_t SetMessageCnt = 0U;
3180 for (auto U : Users)
3181 {
3182 if (!isa<GenIntrinsicInst>(U))
3183 return false;
3184
3185 auto* GII = cast<GenIntrinsicInst>(U);
3186 if (GII->getParent() != argInst->getParent())
3187 return false;
3188
3189 if (IsSetMessageIntrinsic(GII))
3190 SetMessageCnt++;
3191 }
3192
3193 if (SetMessageCnt > 1)
3194 return false;
3195
3196 uint32_t NonSetInsts = Users.size() - SetMessageCnt;
3197
3198 auto E = argInst->getParent()->end();
3199 for (auto I = argInst->getIterator(); I != E; I++)
3200 {
3201 if (Users.count(&*I) != 0)
3202 {
3203 if (IsSetMessageIntrinsic(cast<GenIntrinsicInst>(&*I)))
3204 {
3205 return false;
3206 }
3207 else
3208 {
3209 if (--NonSetInsts == 0)
3210 break;
3211 }
3212 }
3213 }
3214
3215 return true;
3216 }
3217 default:
3218 return false;
3219 }
3220 }
3221
3222 return false;
3223
3224 }
3225
3226 #undef SET_INTRINSICS
3227
isUnpacked(llvm::Value * value)3228 bool CShader::isUnpacked(llvm::Value* value)
3229 {
3230 bool isUnpacked = false;
3231 if (m_SIMDSize == m_Platform->getMinDispatchMode())
3232 {
3233 if (isa<SampleIntrinsic>(value) || isa<LdmcsInstrinsic>(value))
3234 {
3235 if (cast<VectorType>(value->getType())->getElementType()->isHalfTy() ||
3236 cast<VectorType>(value->getType())->getElementType()->isIntegerTy(16))
3237 {
3238 isUnpacked = true;
3239 auto uses = value->user_begin();
3240 auto endUses = value->user_end();
3241 while (uses != endUses)
3242 {
3243 if (llvm::ExtractElementInst * extrElement = dyn_cast<llvm::ExtractElementInst>(*uses))
3244 {
3245 if (CanTreatAsAlias(extrElement))
3246 {
3247 ++uses;
3248 continue;
3249 }
3250 }
3251 isUnpacked = false;
3252 break;
3253 }
3254 }
3255 }
3256 }
3257 return isUnpacked;
3258 }
3259 /// GetNewVector
3260 ///
GetNewVector(llvm::Value * value,e_alignment preferredAlign)3261 CVariable* CShader::GetNewVector(llvm::Value* value, e_alignment preferredAlign)
3262 {
3263 VISA_Type type = GetType(value->getType());
3264 WIBaseClass::WIDependancy dep = GetDependency(value);
3265 bool uniform = WIAnalysis::isDepUniform(dep);
3266 uint32_t mask = 0;
3267 bool isUnpackedBool = isUnpacked(value);
3268 uint8_t multiplier = (isUnpackedBool) ? 2 : 1;
3269 uint nElem = GetNbElementAndMask(value, mask) * multiplier;
3270 IGC_ASSERT_MESSAGE((nElem < (UINT16_MAX)), "getNumElements more than 64k elements");
3271 const uint16_t nbElement = (uint16_t)nElem;
3272 // TODO: Non-uniform variable should be naturally aligned instead of GRF
3273 // aligned. E.g., <8 x i16> should be aligned to 16B instead of 32B or GRF.
3274 e_alignment align = EALIGN_GRF;
3275 if (uniform) {
3276 // So far, preferredAlign is only applied to uniform variable.
3277 // TODO: Add preferred alignment for non-uniform variables.
3278 align = preferredAlign;
3279 if (align == EALIGN_AUTO)
3280 align = CEncoder::GetCISADataTypeAlignment(type);
3281 }
3282 uint16_t numberOfInstance = m_numberInstance;
3283 if (uniform)
3284 {
3285 if (type != ISA_TYPE_BOOL || m_CG->canEmitAsUniformBool(value))
3286 {
3287 numberOfInstance = 1;
3288 }
3289 }
3290 if (mask)
3291 {
3292 extractMasks[value] = mask;
3293 }
3294 const auto &valueName = value->getName();
3295 CVariable* var =
3296 GetNewVariable(
3297 nbElement,
3298 type,
3299 align,
3300 dep,
3301 numberOfInstance,
3302 valueName);
3303 if (isUnpackedBool)
3304 var->setisUnpacked();
3305 return var;
3306 }
3307
3308 /// GetNewAlias
GetNewAlias(CVariable * var,VISA_Type type,uint16_t offset,uint16_t numElements)3309 CVariable* CShader::GetNewAlias(
3310 CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements)
3311 {
3312 IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3313 CVariable* alias = new (Allocator)CVariable(var, type, offset, numElements, var->IsUniform());
3314 encoder.CreateVISAVar(alias);
3315 return alias;
3316 }
3317
3318 // createAliasIfNeeded() returns the Var that is either BaseVar or
3319 // its alias of the same size.
3320 //
3321 // If BaseVar's type matches V's, return BaseVar; otherwise, create an
3322 // new alias CVariable to BaseVar. The new CVariable has V's size, which
3323 // should not be larger than BaseVar's.
3324 //
3325 // Note that V's type is either vector or scalar.
createAliasIfNeeded(Value * V,CVariable * BaseVar)3326 CVariable* CShader::createAliasIfNeeded(Value* V, CVariable* BaseVar)
3327 {
3328 Type* Ty = V->getType();
3329 VectorType* VTy = dyn_cast<VectorType>(Ty);
3330 Type* BTy = VTy ? VTy->getElementType() : Ty;
3331 VISA_Type visaTy = GetType(BTy);
3332 if (visaTy == BaseVar->GetType())
3333 {
3334 return BaseVar;
3335 }
3336
3337 uint16_t visaTy_sz = CEncoder::GetCISADataTypeSize(visaTy);
3338 IGC_ASSERT(visaTy_sz);
3339 uint16_t nbe = BaseVar->GetSize() / visaTy_sz;
3340 IGC_ASSERT_MESSAGE((BaseVar->GetSize() % visaTy_sz) == 0, "V's Var should be the same size as BaseVar!");
3341 CVariable* NewAliasVar = GetNewAlias(BaseVar, visaTy, 0, nbe);
3342 return NewAliasVar;
3343 }
3344
3345 /// GetNewAlias
GetNewAlias(CVariable * var,VISA_Type type,uint16_t offset,uint16_t numElements,bool uniform)3346 CVariable* CShader::GetNewAlias(
3347 CVariable* var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform)
3348 {
3349 IGC_ASSERT(nullptr != var);
3350 IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3351 CVariable* alias = new (Allocator) CVariable(var, type, offset, numElements, uniform);
3352 encoder.CreateVISAVar(alias);
3353 return alias;
3354 }
3355
GetVarHalf(CVariable * var,unsigned int half)3356 CVariable* CShader::GetVarHalf(CVariable* var, unsigned int half)
3357 {
3358 const char *lowOrHi = half == 0 ? "Lo" : "Hi";
3359 IGC_ASSERT(nullptr != var);
3360 IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3361 CVariable* alias = new (Allocator) CVariable(
3362 var->GetNumberElement(),
3363 var->IsUniform(),
3364 var->GetType(),
3365 var->GetVarType(),
3366 var->GetAlign(),
3367 var->IsVectorUniform(),
3368 1,
3369 CName(var->getName(), lowOrHi));
3370 alias->visaGenVariable[0] = var->visaGenVariable[half];
3371 return alias;
3372 }
3373
GetPayloadElementSymbols(llvm::Value * inst,CVariable * payload[],int vecWidth)3374 void CShader::GetPayloadElementSymbols(llvm::Value* inst, CVariable* payload[], int vecWidth)
3375 {
3376 llvm::ConstantDataVector* cv = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
3377 if (cv) {
3378 IGC_ASSERT(vecWidth == cv->getNumElements());
3379 for (int i = 0; i < vecWidth; ++i) {
3380 payload[i] = GetSymbol(cv->getElementAsConstant(i));
3381 }
3382 return;
3383 }
3384
3385 llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
3386 IGC_ASSERT(nullptr != ie);
3387
3388 for (int i = 0; i < vecWidth; ++i) {
3389 payload[i] = NULL;
3390 }
3391
3392 int count = 0;
3393 //Gather elements of vector
3394 while (ie != NULL) {
3395 int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
3396 IGC_ASSERT(iOffset >= 0);
3397 IGC_ASSERT(iOffset < vecWidth);
3398
3399 // Get the scalar value from this insert
3400 if (payload[iOffset] == NULL) {
3401 payload[iOffset] = GetSymbol(ie->getOperand(1));
3402 count++;
3403 }
3404
3405 // Do we have another insert?
3406 llvm::Value* insertBase = ie->getOperand(0);
3407 ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
3408 if (ie != NULL) {
3409 continue;
3410 }
3411
3412 if (llvm::isa<llvm::UndefValue>(insertBase)) {
3413 break;
3414 }
3415 }
3416 IGC_ASSERT(count == vecWidth);
3417 }
3418
Destroy()3419 void CShader::Destroy()
3420 {
3421 }
3422
3423 // Helper function to copy raw register
CopyVariable(CVariable * dst,CVariable * src,uint dstSubVar,uint srcSubVar)3424 void CShader::CopyVariable(
3425 CVariable* dst,
3426 CVariable* src,
3427 uint dstSubVar,
3428 uint srcSubVar)
3429 {
3430 CVariable* rawDst = dst;
3431 // The source have to match for a raw copy
3432 if (src->GetType() != dst->GetType())
3433 {
3434 rawDst = BitCast(dst, src->GetType());
3435 }
3436 encoder.SetSrcSubVar(0, srcSubVar);
3437 encoder.SetDstSubVar(dstSubVar);
3438 encoder.Copy(rawDst, src);
3439 encoder.Push();
3440 }
3441
3442 // Helper function to copy and pack raw register
PackAndCopyVariable(CVariable * dst,CVariable * src,uint subVar)3443 void CShader::PackAndCopyVariable(
3444 CVariable* dst,
3445 CVariable* src,
3446 uint subVar)
3447 {
3448 CVariable* rawDst = dst;
3449 // The source have to match for a raw copy
3450 if (src->GetType() != dst->GetType())
3451 {
3452 rawDst = BitCast(dst, src->GetType());
3453 }
3454 encoder.SetDstSubVar(subVar);
3455 if (!src->IsUniform())
3456 {
3457 encoder.SetSrcRegion(0, 16, 8, 2);
3458 }
3459 encoder.Copy(rawDst, src);
3460 encoder.Push();
3461 }
3462
CompileSIMDSizeInCommon(SIMDMode simdMode)3463 bool CShader::CompileSIMDSizeInCommon(SIMDMode simdMode)
3464 {
3465 bool ret = (m_ScratchSpaceSize <= m_ctx->platform.maxPerThreadScratchSpace());
3466 m_simdProgram.setScratchSpaceUsedByShader(m_ScratchSpaceSize);
3467 if (m_ctx->platform.hasScratchSurface() && m_ctx->m_DriverInfo.supportsSeparatingSpillAndPrivateScratchMemorySpace()) {
3468 ret = (m_simdProgram.getScratchSpaceUsageInSlot0() <= m_ctx->platform.maxPerThreadScratchSpace());
3469 }
3470
3471
3472 return ret;
3473 }
3474
GetShaderThreadUsageRate()3475 uint32_t CShader::GetShaderThreadUsageRate()
3476 {
3477 return 1;
3478 }
3479
GetShader(SIMDMode simd,ShaderDispatchMode mode)3480 CShader* CShaderProgram::GetShader(SIMDMode simd, ShaderDispatchMode mode)
3481 {
3482 return GetShaderPtr(simd, mode);
3483 }
3484
GetShaderPtr(SIMDMode simd,ShaderDispatchMode mode)3485 CShader*& CShaderProgram::GetShaderPtr(SIMDMode simd, ShaderDispatchMode mode)
3486 {
3487 switch (mode)
3488 {
3489 case ShaderDispatchMode::DUAL_PATCH:
3490 return m_SIMDshaders[3];
3491 default:
3492 break;
3493 }
3494
3495 switch (simd)
3496 {
3497 case SIMDMode::SIMD8:
3498 return m_SIMDshaders[0];
3499 case SIMDMode::SIMD16:
3500 return m_SIMDshaders[1];
3501 case SIMDMode::SIMD32:
3502 return m_SIMDshaders[2];
3503 default:
3504 IGC_ASSERT_MESSAGE(0, "wrong SIMD size");
3505 break;
3506 }
3507 return m_SIMDshaders[0];
3508 }
3509
ClearShaderPtr(SIMDMode simd)3510 void CShaderProgram::ClearShaderPtr(SIMDMode simd)
3511 {
3512 switch (simd)
3513 {
3514 case SIMDMode::SIMD8: m_SIMDshaders[0] = nullptr; break;
3515 case SIMDMode::SIMD16: m_SIMDshaders[1] = nullptr; break;
3516 case SIMDMode::SIMD32: m_SIMDshaders[2] = nullptr; break;
3517 default:
3518 IGC_ASSERT_MESSAGE(0, "wrong SIMD size");
3519 break;
3520 }
3521 }
3522
GetOrCreateShader(SIMDMode simd,ShaderDispatchMode mode)3523 CShader* CShaderProgram::GetOrCreateShader(SIMDMode simd, ShaderDispatchMode mode)
3524 {
3525 CShader*& pShader = GetShaderPtr(simd, mode);
3526 if (pShader == nullptr)
3527 {
3528 pShader = CreateNewShader(simd);
3529 }
3530 return pShader;
3531 }
3532
CreateNewShader(SIMDMode simd)3533 CShader* CShaderProgram::CreateNewShader(SIMDMode simd)
3534 {
3535 CShader* pShader = nullptr;
3536 {
3537 switch (m_context->type)
3538 {
3539 case ShaderType::OPENCL_SHADER:
3540 pShader = new COpenCLKernel((OpenCLProgramContext*)m_context, m_kernel, this);
3541 break;
3542 case ShaderType::PIXEL_SHADER:
3543 pShader = new CPixelShader(m_kernel, this);
3544 break;
3545 case ShaderType::VERTEX_SHADER:
3546 pShader = new CVertexShader(m_kernel, this);
3547 break;
3548 case ShaderType::GEOMETRY_SHADER:
3549 pShader = new CGeometryShader(m_kernel, this);
3550 break;
3551 case ShaderType::HULL_SHADER:
3552 pShader = new CHullShader(m_kernel, this);
3553 break;
3554 case ShaderType::DOMAIN_SHADER:
3555 pShader = new CDomainShader(m_kernel, this);
3556 break;
3557 case ShaderType::COMPUTE_SHADER:
3558 pShader = new CComputeShader(m_kernel, this);
3559 break;
3560 default:
3561 IGC_ASSERT_MESSAGE(0, "wrong shader type");
3562 break;
3563 }
3564 }
3565
3566 IGC_ASSERT(nullptr != pShader);
3567
3568 pShader->m_shaderStats = m_shaderStats;
3569 pShader->m_DriverInfo = &m_context->m_DriverInfo;
3570 pShader->m_Platform = &m_context->platform;
3571 pShader->m_pBtiLayout = &m_context->btiLayout;
3572 pShader->m_ModuleMetadata = m_context->getModuleMetaData();
3573
3574 return pShader;
3575 }
3576
DeleteShader(SIMDMode simd,ShaderDispatchMode mode)3577 void CShaderProgram::DeleteShader(SIMDMode simd, ShaderDispatchMode mode)
3578 {
3579 CShader*& pShader = GetShaderPtr(simd, mode);
3580 delete pShader;
3581 pShader = nullptr;
3582 }
3583
GetSamplerCount(unsigned int samplerCount)3584 unsigned int CShader::GetSamplerCount(unsigned int samplerCount)
3585 {
3586 if (samplerCount > 0)
3587 {
3588 if (samplerCount <= 4)
3589 return 1; // between 1 and 4 samplers used
3590 else if (samplerCount >= 5 && samplerCount <= 8)
3591 return 2; // between 5 and 8 samplers used
3592 else if (samplerCount >= 9 && samplerCount <= 12)
3593 return 3; // between 9 and 12 samplers used
3594 else if (samplerCount >= 13 && samplerCount <= 16)
3595 return 4; // between 13 and 16 samplers used
3596 else
3597 // Samplers count out of range. Force value 0 to avoid undefined behavior.
3598 return 0;
3599 }
3600 return 0;
3601 }
3602
CShaderProgram(CodeGenContext * ctx,llvm::Function * kernel)3603 CShaderProgram::CShaderProgram(CodeGenContext* ctx, llvm::Function* kernel)
3604 : m_shaderStats(nullptr)
3605 , m_context(ctx)
3606 , m_kernel(kernel)
3607 , m_SIMDshaders()
3608 {
3609 }
3610
~CShaderProgram()3611 CShaderProgram::~CShaderProgram()
3612 {
3613 for (auto& shader : m_SIMDshaders)
3614 {
3615 delete shader;
3616 }
3617 m_context = nullptr;
3618 }
3619
GetPrimitiveTypeSizeInRegisterInBits(const Type * Ty) const3620 unsigned int CShader::GetPrimitiveTypeSizeInRegisterInBits(const Type* Ty) const
3621 {
3622 unsigned int sizeInBits = (unsigned int)Ty->getPrimitiveSizeInBits();
3623 if (Ty->isPtrOrPtrVectorTy())
3624 {
3625 sizeInBits =
3626 GetContext()->getRegisterPointerSizeInBits(Ty->getPointerAddressSpace());
3627 if (auto* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
3628 {
3629 sizeInBits *= (unsigned)VTy->getNumElements();
3630 }
3631 }
3632 return sizeInBits;
3633 }
3634
GetPrimitiveTypeSizeInRegister(const Type * Ty) const3635 unsigned int CShader::GetPrimitiveTypeSizeInRegister(const Type* Ty) const
3636 {
3637 return GetPrimitiveTypeSizeInRegisterInBits(Ty) / 8;
3638 }
3639
GetScalarTypeSizeInRegisterInBits(const Type * Ty) const3640 unsigned int CShader::GetScalarTypeSizeInRegisterInBits(const Type* Ty) const
3641 {
3642 unsigned int sizeInBits = Ty->getScalarSizeInBits();
3643 if (Ty->isPtrOrPtrVectorTy())
3644 {
3645 sizeInBits =
3646 GetContext()->getRegisterPointerSizeInBits(Ty->getPointerAddressSpace());
3647 }
3648 return sizeInBits;
3649 }
3650
GetScalarTypeSizeInRegister(const Type * Ty) const3651 unsigned int CShader::GetScalarTypeSizeInRegister(const Type* Ty) const
3652 {
3653 return GetScalarTypeSizeInRegisterInBits(Ty) / 8;
3654 }
3655
3656