1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2018-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include <sstream>
10 #include "common/LLVMWarningsPush.hpp"
11 #include <llvm/Support/ScaledNumber.h>
12 #include <llvm/Demangle/Demangle.h>
13 #include <llvm/IR/DebugInfo.h>
14 #include "common/LLVMWarningsPop.hpp"
15 #include "Compiler/CISACodeGen/ComputeShaderCodeGen.hpp"
16 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
17 #include "Compiler/CodeGenPublic.h"
18 #include "Probe/Assertion.h"
19 
20 namespace IGC
21 {
22 
23     typedef struct RetryState {
24         bool allowLICM;
25         bool allowCodeSinking;
26         bool allowSimd32Slicing;
27         bool allowPromotePrivateMemory;
28         bool allowPreRAScheduler;
29         bool allowVISAPreRAScheduler;
30         bool allowLargeURBWrite;
31         unsigned nextState;
32     } RetryState;
33 
34     static const RetryState RetryTable[] = {
35         { true, true, false, true, true, true, true, 1 },
36         { false, true, true, false, false, false, false, 500 }
37     };
38 
RetryManager()39     RetryManager::RetryManager() : enabled(false)
40     {
41         memset(m_simdEntries, 0, sizeof(m_simdEntries));
42         firstStateId = IGC_GET_FLAG_VALUE(RetryManagerFirstStateId);
43         stateId = firstStateId;
44         IGC_ASSERT(stateId < getStateCnt());
45     }
46 
AdvanceState()47     bool RetryManager::AdvanceState() {
48         if (!enabled || IGC_IS_FLAG_ENABLED(DisableRecompilation))
49         {
50             return false;
51         }
52         IGC_ASSERT(stateId < getStateCnt());
53         stateId = RetryTable[stateId].nextState;
54         return (stateId < getStateCnt());
55     }
AllowLICM()56     bool RetryManager::AllowLICM() {
57         IGC_ASSERT(stateId < getStateCnt());
58         return RetryTable[stateId].allowLICM;
59     }
AllowPromotePrivateMemory()60     bool RetryManager::AllowPromotePrivateMemory() {
61         IGC_ASSERT(stateId < getStateCnt());
62         return RetryTable[stateId].allowPromotePrivateMemory;
63     }
AllowPreRAScheduler()64     bool RetryManager::AllowPreRAScheduler() {
65         IGC_ASSERT(stateId < getStateCnt());
66         return RetryTable[stateId].allowPreRAScheduler;
67     }
AllowVISAPreRAScheduler()68     bool RetryManager::AllowVISAPreRAScheduler() {
69         IGC_ASSERT(stateId < getStateCnt());
70         return RetryTable[stateId].allowVISAPreRAScheduler;
71     }
AllowCodeSinking()72     bool RetryManager::AllowCodeSinking() {
73         IGC_ASSERT(stateId < getStateCnt());
74         return RetryTable[stateId].allowCodeSinking;
75     }
AllowSimd32Slicing()76     bool RetryManager::AllowSimd32Slicing() {
77         IGC_ASSERT(stateId < getStateCnt());
78         return RetryTable[stateId].allowSimd32Slicing;
79     }
AllowLargeURBWrite()80     bool RetryManager::AllowLargeURBWrite() {
81         IGC_ASSERT(stateId < getStateCnt());
82         return RetryTable[stateId].allowLargeURBWrite;
83     }
SetFirstStateId(int id)84     void RetryManager::SetFirstStateId(int id) {
85         firstStateId = id;
86     }
IsFirstTry()87     bool RetryManager::IsFirstTry() {
88         return (stateId == firstStateId);
89     }
IsLastTry()90     bool RetryManager::IsLastTry() {
91         return (!enabled ||
92             IGC_IS_FLAG_ENABLED(DisableRecompilation) ||
93             lastSpillSize < IGC_GET_FLAG_VALUE(AllowedSpillRegCount) ||
94             (stateId < getStateCnt() && RetryTable[stateId].nextState >= getStateCnt()));
95     }
GetRetryId() const96     unsigned RetryManager::GetRetryId() const { return stateId; }
97 
Enable()98     void RetryManager::Enable() { enabled = true; }
Disable()99     void RetryManager::Disable() { enabled = false; }
100 
SetSpillSize(unsigned int spillSize)101     void RetryManager::SetSpillSize(unsigned int spillSize) { lastSpillSize = spillSize; }
GetLastSpillSize()102     unsigned int RetryManager::GetLastSpillSize() { return lastSpillSize; }
103 
ClearSpillParams()104     void RetryManager::ClearSpillParams() {
105         lastSpillSize = 0;
106         numInstructions = 0;
107     }
108 
109     // save entry for given SIMD mode, to avoid recompile for next retry.
SaveSIMDEntry(SIMDMode simdMode,CShader * shader)110     void RetryManager::SaveSIMDEntry(SIMDMode simdMode, CShader* shader)
111     {
112         switch (simdMode)
113         {
114         case SIMDMode::SIMD8:   m_simdEntries[0] = shader;  break;
115         case SIMDMode::SIMD16:  m_simdEntries[1] = shader;  break;
116         case SIMDMode::SIMD32:  m_simdEntries[2] = shader;  break;
117         default:
118             IGC_ASSERT(0);
119             break;
120         }
121     }
122 
GetSIMDEntry(SIMDMode simdMode)123     CShader* RetryManager::GetSIMDEntry(SIMDMode simdMode)
124     {
125         switch (simdMode)
126         {
127         case SIMDMode::SIMD8:   return m_simdEntries[0];
128         case SIMDMode::SIMD16:  return m_simdEntries[1];
129         case SIMDMode::SIMD32:  return m_simdEntries[2];
130         default:
131             IGC_ASSERT(0);
132             return nullptr;
133         }
134     }
135 
~RetryManager()136     RetryManager::~RetryManager()
137     {
138         for (unsigned i = 0; i < 3; i++)
139         {
140             if (m_simdEntries[i])
141             {
142                 delete m_simdEntries[i];
143             }
144         }
145     }
146 
AnyKernelSpills()147     bool RetryManager::AnyKernelSpills()
148     {
149         for (unsigned i = 0; i < 3; i++)
150         {
151             if (m_simdEntries[i] && m_simdEntries[i]->m_spillCost > 0.0)
152             {
153                 return true;
154             }
155         }
156         return false;
157     }
158 
PickupKernels(CodeGenContext * cgCtx)159     bool RetryManager::PickupKernels(CodeGenContext* cgCtx)
160     {
161         if (cgCtx->type == ShaderType::COMPUTE_SHADER)
162         {
163             return PickupCS(static_cast<ComputeShaderContext*>(cgCtx));
164         }
165         else
166         {
167             IGC_ASSERT_MESSAGE(0, "TODO for other shader types");
168             return true;
169         }
170     }
171 
getStateCnt()172     unsigned RetryManager::getStateCnt()
173     {
174         return sizeof(RetryTable) / sizeof(RetryState);
175     };
176 
PickCSEntryForcedFromDriver(SIMDMode & simdMode,unsigned char forcedSIMDModeFromDriver)177     CShader* RetryManager::PickCSEntryForcedFromDriver(SIMDMode& simdMode, unsigned char forcedSIMDModeFromDriver)
178     {
179         if (forcedSIMDModeFromDriver == 8)
180         {
181             if ((m_simdEntries[0] && m_simdEntries[0]->m_spillSize == 0) || IsLastTry())
182             {
183                 simdMode = SIMDMode::SIMD8;
184                 return m_simdEntries[0];
185             }
186         }
187         else if (forcedSIMDModeFromDriver == 16)
188         {
189             if ((m_simdEntries[1] && m_simdEntries[1]->m_spillSize == 0) || IsLastTry())
190             {
191                 simdMode = SIMDMode::SIMD16;
192                 return m_simdEntries[1];
193             }
194         }
195         else if (forcedSIMDModeFromDriver == 32)
196         {
197             if ((m_simdEntries[2] && m_simdEntries[2]->m_spillSize == 0) || IsLastTry())
198             {
199                 simdMode = SIMDMode::SIMD32;
200                 return m_simdEntries[2];
201             }
202         }
203         return nullptr;
204     }
205 
PickCSEntryByRegKey(SIMDMode & simdMode,ComputeShaderContext * cgCtx)206     CShader* RetryManager::PickCSEntryByRegKey(SIMDMode& simdMode, ComputeShaderContext* cgCtx)
207     {
208         if (IGC_IS_FLAG_ENABLED(ForceCSSIMD32))
209         {
210             simdMode = SIMDMode::SIMD32;
211             return m_simdEntries[2];
212         }
213         else
214             if (IGC_IS_FLAG_ENABLED(ForceCSSIMD16) && m_simdEntries[1])
215             {
216                 simdMode = SIMDMode::SIMD16;
217                 return m_simdEntries[1];
218             }
219             else
220                 if (IGC_IS_FLAG_ENABLED(ForceCSLeastSIMD)
221                     )
222                 {
223                     if (m_simdEntries[0])
224                     {
225                         simdMode = SIMDMode::SIMD8;
226                         return m_simdEntries[0];
227                     }
228                     else
229                         if (m_simdEntries[1])
230                         {
231                             simdMode = SIMDMode::SIMD16;
232                             return m_simdEntries[1];
233                         }
234                         else
235                         {
236                             simdMode = SIMDMode::SIMD32;
237                             return m_simdEntries[2];
238                         }
239                 }
240 
241         return nullptr;
242     }
243 
PickCSEntryEarly(SIMDMode & simdMode,ComputeShaderContext * cgCtx)244     CShader* RetryManager::PickCSEntryEarly(SIMDMode& simdMode,
245         ComputeShaderContext* cgCtx)
246     {
247         float spillThreshold = cgCtx->GetSpillThreshold();
248         float occu8 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD8);
249         float occu16 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD16);
250         float occu32 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD32);
251 
252         bool simd32NoSpill = m_simdEntries[2] && m_simdEntries[2]->m_spillCost <= spillThreshold;
253         bool simd16NoSpill = m_simdEntries[1] && m_simdEntries[1]->m_spillCost <= spillThreshold;
254         bool simd8NoSpill = m_simdEntries[0] && m_simdEntries[0]->m_spillCost <= spillThreshold;
255 
256         // If SIMD32/16/8 are all allowed, then choose one which has highest thread occupancy
257 
258         if (IGC_IS_FLAG_ENABLED(EnableHighestSIMDForNoSpill))
259         {
260             if (simd32NoSpill)
261             {
262                 simdMode = SIMDMode::SIMD32;
263                 return m_simdEntries[2];
264             }
265 
266             if (simd16NoSpill)
267             {
268                 simdMode = SIMDMode::SIMD16;
269                 return m_simdEntries[1];
270             }
271         }
272         else
273         {
274             if (simd32NoSpill)
275             {
276                 if (occu32 >= occu16 && occu32 >= occu8)
277                 {
278                     simdMode = SIMDMode::SIMD32;
279                     return m_simdEntries[2];
280                 }
281                 // If SIMD32 doesn't spill, SIMD16 and SIMD8 shouldn't, if they exist
282                 IGC_ASSERT((m_simdEntries[0] == NULL) || simd8NoSpill == true);
283                 IGC_ASSERT((m_simdEntries[1] == NULL) || simd16NoSpill == true);
284             }
285 
286             if (simd16NoSpill)
287             {
288                 if (occu16 >= occu8 && occu16 >= occu32)
289                 {
290                     simdMode = SIMDMode::SIMD16;
291                     return m_simdEntries[1];
292                 }
293                 IGC_ASSERT_MESSAGE((m_simdEntries[0] == NULL) || simd8NoSpill == true, "If SIMD16 doesn't spill, SIMD8 shouldn't, if it exists");
294             }
295         }
296 
297         bool needToRetry = false;
298         if (cgCtx->m_slmSize)
299         {
300             if (occu16 > occu8 || occu32 > occu16)
301             {
302                 needToRetry = true;
303             }
304         }
305 
306         SIMDMode maxSimdMode = cgCtx->GetMaxSIMDMode();
307         if (maxSimdMode == SIMDMode::SIMD8 || !needToRetry)
308         {
309             if (m_simdEntries[0] && m_simdEntries[0]->m_spillSize == 0)
310             {
311                 simdMode = SIMDMode::SIMD8;
312                 return m_simdEntries[0];
313             }
314         }
315         return nullptr;
316     }
317 
PickCSEntryFinally(SIMDMode & simdMode)318     CShader* RetryManager::PickCSEntryFinally(SIMDMode& simdMode)
319     {
320         if (m_simdEntries[0])
321         {
322             simdMode = SIMDMode::SIMD8;
323             return m_simdEntries[0];
324         }
325         else
326             if (m_simdEntries[1])
327             {
328                 simdMode = SIMDMode::SIMD16;
329                 return m_simdEntries[1];
330             }
331             else
332             {
333                 simdMode = SIMDMode::SIMD32;
334                 return m_simdEntries[2];
335             }
336     }
337 
FreeAllocatedMemForNotPickedCS(SIMDMode simdMode)338     void RetryManager::FreeAllocatedMemForNotPickedCS(SIMDMode simdMode)
339     {
340         if (simdMode != SIMDMode::SIMD8 && m_simdEntries[0] != nullptr)
341         {
342             if (m_simdEntries[0]->ProgramOutput()->m_programBin != nullptr)
343                 aligned_free(m_simdEntries[0]->ProgramOutput()->m_programBin);
344         }
345         if (simdMode != SIMDMode::SIMD16 && m_simdEntries[1] != nullptr)
346         {
347             if (m_simdEntries[1]->ProgramOutput()->m_programBin != nullptr)
348                 aligned_free(m_simdEntries[1]->ProgramOutput()->m_programBin);
349         }
350         if (simdMode != SIMDMode::SIMD32 && m_simdEntries[2] != nullptr)
351         {
352             if (m_simdEntries[2]->ProgramOutput()->m_programBin != nullptr)
353                 aligned_free(m_simdEntries[2]->ProgramOutput()->m_programBin);
354         }
355     }
356 
PickupCS(ComputeShaderContext * cgCtx)357     bool RetryManager::PickupCS(ComputeShaderContext* cgCtx)
358     {
359         SIMDMode simdMode = SIMDMode::UNKNOWN;
360         CComputeShader* shader = nullptr;
361         SComputeShaderKernelProgram* pKernelProgram = &cgCtx->programOutput;
362 
363         if (cgCtx->getModuleMetaData()->csInfo.forcedSIMDSize != 0)
364         {
365             shader = static_cast<CComputeShader*>(
366                 PickCSEntryForcedFromDriver(simdMode, cgCtx->getModuleMetaData()->csInfo.forcedSIMDSize));
367         }
368         if (!shader)
369         {
370             shader = static_cast<CComputeShader*>(
371                 PickCSEntryByRegKey(simdMode, cgCtx));
372         }
373         if (!shader)
374         {
375             shader = static_cast<CComputeShader*>(
376                 PickCSEntryEarly(simdMode, cgCtx));
377         }
378         if (!shader && IsLastTry())
379         {
380             shader = static_cast<CComputeShader*>(
381                 PickCSEntryFinally(simdMode));
382             IGC_ASSERT(shader != nullptr);
383         }
384 
385         if (shader)
386         {
387             switch (simdMode)
388             {
389             case SIMDMode::SIMD8:
390                 pKernelProgram->simd8 = *shader->ProgramOutput();
391                 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD8;
392                 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode,
393                     ShaderDispatchMode::NOT_APPLICABLE);
394                 break;
395 
396             case SIMDMode::SIMD16:
397                 pKernelProgram->simd16 = *shader->ProgramOutput();
398                 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD16;
399                 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode,
400                     ShaderDispatchMode::NOT_APPLICABLE);
401                 break;
402 
403             case SIMDMode::SIMD32:
404                 pKernelProgram->simd32 = *shader->ProgramOutput();
405                 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD32;
406                 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode,
407                     ShaderDispatchMode::NOT_APPLICABLE);
408                 break;
409 
410             default:
411                 IGC_ASSERT_MESSAGE(0, "Invalie SIMDMode");
412                 break;
413             }
414             shader->FillProgram(pKernelProgram);
415             pKernelProgram->SIMDInfo = cgCtx->GetSIMDInfo();
416 
417 
418             // free allocated memory for the remaining kernels
419             FreeAllocatedMemForNotPickedCS(simdMode);
420 
421             return true;
422         }
423         return false;
424     }
425 
LLVMContextWrapper(bool createResourceDimTypes)426     LLVMContextWrapper::LLVMContextWrapper(bool createResourceDimTypes)
427     {
428         if (createResourceDimTypes)
429         {
430             CreateResourceDimensionTypes(*this);
431         }
432     }
433 
AddRef()434     void LLVMContextWrapper::AddRef()
435     {
436         refCount++;
437     }
438 
Release()439     void LLVMContextWrapper::Release()
440     {
441         refCount--;
442         if (refCount == 0)
443         {
444             delete this;
445         }
446     }
447 
448     /** get shader's thread group size */
GetThreadGroupSize()449     unsigned ComputeShaderContext::GetThreadGroupSize()
450     {
451         llvm::GlobalVariable* pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_X");
452         m_threadGroupSize_X = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
453 
454         pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_Y");
455         m_threadGroupSize_Y = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
456 
457         pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_Z");
458         m_threadGroupSize_Z = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
459 
460         return m_threadGroupSize_X * m_threadGroupSize_Y * m_threadGroupSize_Z;
461     }
462 
GetSlmSizePerSubslice()463     unsigned ComputeShaderContext::GetSlmSizePerSubslice()
464     {
465         return platform.getSlmSizePerSsOrDss();
466     }
467 
GetSlmSize() const468     unsigned ComputeShaderContext::GetSlmSize() const
469     {
470         return m_slmSize;
471     }
472 
GetThreadOccupancy(SIMDMode simdMode)473     float ComputeShaderContext::GetThreadOccupancy(SIMDMode simdMode)
474     {
475         return GetThreadOccupancyPerSubslice(simdMode, GetThreadGroupSize(), GetHwThreadsPerWG(platform), m_slmSize, GetSlmSizePerSubslice());
476     }
477 
478     /** get smallest SIMD mode allowed based on thread group size */
GetLeastSIMDModeAllowed()479     SIMDMode ComputeShaderContext::GetLeastSIMDModeAllowed()
480     {
481         SIMDMode mode = getLeastSIMDAllowed(
482             GetThreadGroupSize(),
483             GetHwThreadsPerWG(platform));
484         return mode;
485     }
486 
487     /** get largest SIMD mode for performance based on thread group size */
GetMaxSIMDMode()488     SIMDMode ComputeShaderContext::GetMaxSIMDMode()
489     {
490         unsigned threadGroupSize = GetThreadGroupSize();
491         SIMDMode mode;
492         if (threadGroupSize <= 8)
493         {
494             mode = SIMDMode::SIMD8;
495         }
496         else if (threadGroupSize <= 16)
497         {
498             mode = SIMDMode::SIMD16;
499         }
500         else
501         {
502             mode = SIMDMode::SIMD32;
503         }
504         return mode;
505     }
506 
GetSpillThreshold() const507     float ComputeShaderContext::GetSpillThreshold() const
508     {
509         float spillThresholdSLM = platform.adjustedSpillThreshold() / 100.0f;
510         //enable CSSpillThresholdSLM with desired value to override the default value.
511         if(IGC_IS_FLAG_ENABLED(CSSpillThresholdSLM))
512             spillThresholdSLM = float(IGC_GET_FLAG_VALUE(CSSpillThresholdSLM)) / 100.0f;
513         float spillThresholdNoSLM =
514             float(IGC_GET_FLAG_VALUE(CSSpillThresholdNoSLM)) / 100.0f;
515         return m_slmSize ? spillThresholdSLM : spillThresholdNoSLM;
516     }
517 
isSPIRV() const518     bool OpenCLProgramContext::isSPIRV() const
519     {
520         return isSpirV;
521     }
522 
setAsSPIRV()523     void OpenCLProgramContext::setAsSPIRV()
524     {
525         isSpirV = true;
526     }
getProfilingTimerResolution()527     float OpenCLProgramContext::getProfilingTimerResolution()
528     {
529         return m_ProfilingTimerResolution;
530     }
531 
getNumThreadsPerEU() const532     uint32_t OpenCLProgramContext::getNumThreadsPerEU() const
533     {
534         if (m_Options.IntelRequiredEUThreadCount)
535         {
536             return m_Options.requiredEUThreadCount;
537         }
538         if (m_InternalOptions.IntelNumThreadPerEU || m_InternalOptions.Intel256GRFPerThread)
539         {
540             return m_InternalOptions.numThreadsPerEU;
541         }
542 
543         return 0;
544     }
545 
getNumGRFPerThread() const546     uint32_t OpenCLProgramContext::getNumGRFPerThread() const
547     {
548         if (platform.supportsStaticRegSharing())
549         {
550             if (m_InternalOptions.Intel128GRFPerThread)
551             {
552                 return 128;
553             }
554             else if (m_InternalOptions.Intel256GRFPerThread)
555             {
556                 return 256;
557             }
558         }
559         return CodeGenContext::getNumGRFPerThread();
560     }
561 
forceGlobalMemoryAllocation() const562     bool OpenCLProgramContext::forceGlobalMemoryAllocation() const
563     {
564         return m_InternalOptions.ForceGlobalMemoryAllocation;
565     }
566 
allocatePrivateAsGlobalBuffer() const567     bool OpenCLProgramContext::allocatePrivateAsGlobalBuffer() const
568     {
569         return forceGlobalMemoryAllocation() || (m_instrTypes.hasDynamicGenericLoadStore && platform.canForcePrivateToGlobal());
570     }
571 
hasNoLocalToGenericCast() const572     bool OpenCLProgramContext::hasNoLocalToGenericCast() const
573     {
574         return m_InternalOptions.HasNoLocalToGeneric || getModuleMetaData()->hasNoLocalToGenericCast;
575     }
576 
hasNoPrivateToGenericCast() const577     bool OpenCLProgramContext::hasNoPrivateToGenericCast() const
578     {
579         return getModuleMetaData()->hasNoPrivateToGenericCast;
580     }
581 
getVectorCoalescingControl() const582     int16_t OpenCLProgramContext::getVectorCoalescingControl() const
583     {
584         // cmdline option > registry key
585         int val = m_InternalOptions.VectorCoalescingControl;
586         if (val < 0)
587         {
588             // no cmdline option
589             val = IGC_GET_FLAG_VALUE(VATemp);
590         }
591         return val;
592     }
593 
parseOptions(const char * IntOptStr)594     void OpenCLProgramContext::InternalOptions::parseOptions(const char* IntOptStr)
595     {
596         // Assume flags is in the form: <f0>[=<v0>] <f1>[=<v1>] ...
597         // flag name and its value are either seperated by ' ' or '=';
598         // flag seperator is always ' '.
599         const char* NAMESEP = " =";  // separator b/w name and its value
600 
601         llvm::StringRef opts(IntOptStr);
602         size_t Pos = 0;
603         while (Pos != llvm::StringRef::npos)
604         {
605             // Get a flag name
606             Pos = opts.find_first_not_of(' ', Pos);
607             if (Pos == llvm::StringRef::npos)
608                 continue;
609 
610             size_t ePos = opts.find_first_of(NAMESEP, Pos);
611             llvm::StringRef flagName = opts.substr(Pos, ePos - Pos);
612 
613             // Build options:  -cl-intel-xxxx, -ze-intel-xxxx, -ze-opt-xxxx
614             //                 -cl-xxxx, -ze-xxxx
615             // Both cl version and ze version means the same thing.
616             // Here, strip off common prefix.
617             size_t prefix_len;
618             if (flagName.startswith("-cl-intel") || flagName.startswith("-ze-intel"))
619             {
620                 prefix_len = 9;
621             }
622             else if (flagName.startswith("-ze-opt"))
623             {
624                 prefix_len = 7;
625             }
626             else if (flagName.startswith("-cl") || flagName.startswith("-ze"))
627             {
628                 prefix_len = 3;
629             }
630             else
631             {
632                 // not a valid flag, skip
633                 Pos = opts.find_first_of(' ', Pos);
634                 continue;
635             }
636 
637             llvm::StringRef suffix = flagName.drop_front(prefix_len);
638             if (suffix.equals("-replace-global-offsets-by-zero"))
639             {
640                 replaceGlobalOffsetsByZero = true;
641             }
642             else if (suffix.equals("-kernel-debug-enable"))
643             {
644                 KernelDebugEnable = true;
645             }
646             else if (suffix.equals("-include-sip-csr"))
647             {
648                 IncludeSIPCSR = true;
649             }
650             else if (suffix.equals("-include-sip-kernel-debug"))
651             {
652                 IncludeSIPKernelDebug = true;
653             }
654             else if (suffix.equals("-include-sip-kernel-local-debug"))
655             {
656                 IncludeSIPKernelDebugWithLocalMemory = true;
657             }
658             else if (suffix.equals("-use-32bit-ptr-arith"))
659             {
660                 Use32BitPtrArith = true;
661             }
662 
663             // -cl-intel-greater-than-4GB-buffer-required, -ze-opt-greater-than-4GB-buffer-required
664             else if (suffix.equals("-greater-than-4GB-buffer-required"))
665             {
666                 IntelGreaterThan4GBBufferRequired = true;
667             }
668 
669             // -cl-intel-has-buffer-offset-arg, -ze-opt-has-buffer-offset-arg
670             else if (suffix.equals("-has-buffer-offset-arg"))
671             {
672                 IntelHasBufferOffsetArg = true;
673             }
674 
675             // -cl-intel-buffer-offset-arg-required, -ze-opt-buffer-offset-arg-required
676             else if (suffix.equals("-buffer-offset-arg-required"))
677             {
678                 IntelBufferOffsetArgOptional = false;
679             }
680 
681             // -cl-intel-has-positive-pointer-offset, -ze-opt-has-positive-pointer-offset
682             else if (suffix.equals("-has-positive-pointer-offset"))
683             {
684                 IntelHasPositivePointerOffset = true;
685             }
686 
687             // -cl-intel-has-subDW-aligned-ptr-arg, -ze-opt-has-subDW-aligned-ptr-arg
688             else if (suffix.equals("-has-subDW-aligned-ptr-arg"))
689             {
690                 IntelHasSubDWAlignedPtrArg = true;
691             }
692 
693             // -cl-intel-disable-a64WA
694             else if (suffix.equals("-disable-a64WA"))
695             {
696                 IntelDisableA64WA = true;
697             }
698 
699             // -cl-intel-force-enable-a64WA
700             else if (suffix.equals("-force-enable-a64WA"))
701             {
702                 IntelForceEnableA64WA = true;
703             }
704 
705             // GTPin flags used by L0 driver runtime
706             // -cl-intel-gtpin-rera
707             else if (suffix.equals("-gtpin-rera"))
708             {
709                 GTPinReRA = true;
710             }
711             else if (suffix.equals("-gtpin-grf-info"))
712             {
713                 GTPinGRFInfo = true;
714             }
715             else if (suffix.equals("-gtpin-scratch-area-size"))
716             {
717                 GTPinScratchAreaSize = true;
718                 size_t valStart = opts.find_first_not_of(' ', ePos + 1);
719                 size_t valEnd = opts.find_first_of(' ', valStart);
720                 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart);
721                 if (valStr.getAsInteger(10, GTPinScratchAreaSizeValue))
722                 {
723                     IGC_ASSERT(0);
724                 }
725                 Pos = valEnd;
726                 continue;
727             }
728 
729             // -cl-intel-no-prera-scheduling
730             else if (suffix.equals("-no-prera-scheduling"))
731             {
732                 IntelEnablePreRAScheduling = false;
733             }
734             // -cl-intel-no-local-to-generic
735             else if (suffix.equals("-no-local-to-generic"))
736             {
737                 HasNoLocalToGeneric = true;
738             }
739             // -cl-intel-force-global-mem-allocation
740             else if (suffix.equals("-force-global-mem-allocation"))
741             {
742                 ForceGlobalMemoryAllocation = true;
743             }
744 
745             //
746             // Options to set the number of GRF and threads
747             // (All start with -cl-intel or -ze-opt)
748             else if (suffix.equals("-128-GRF-per-thread"))
749             {
750                 Intel128GRFPerThread = true;
751                 numThreadsPerEU = 8;
752             }
753             else if (suffix.equals("-256-GRF-per-thread") ||
754                 suffix.equals("-large-register-file"))
755             {
756                 Intel256GRFPerThread = true;
757                 numThreadsPerEU = 4;
758             }
759             else if (suffix.equals("-num-thread-per-eu"))
760             {
761                 IntelNumThreadPerEU = true;
762 
763                 // Take an integer value after this option:
764                 //   <flag> <number>
765                 size_t valStart = opts.find_first_not_of(' ', ePos + 1);
766                 size_t valEnd = opts.find_first_of(' ', valStart);
767                 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart);
768                 if (valStr.getAsInteger(10, numThreadsPerEU))
769                 {
770                     IGC_ASSERT(0);
771                 }
772                 Pos = valEnd;
773                 continue;
774             }
775 
776             // -cl-intel-use-bindless-buffers
777             else if (suffix.equals("-use-bindless-buffers"))
778             {
779                 PromoteStatelessToBindless = true;
780             }
781             // -cl-intel-use-bindless-images
782             else if (suffix.equals("-use-bindless-images"))
783             {
784                 PreferBindlessImages = true;
785             }
786             // -cl-intel-use-bindless-mode
787             else if (suffix.equals("-use-bindless-mode"))
788             {
789                 // This is a new option that combines bindless generation for buffers
790                 // and images. Keep the old internal options to have compatibility
791                 // for existing tests. Those (old) options could be removed in future.
792                 UseBindlessMode = true;
793                 PreferBindlessImages = true;
794                 PromoteStatelessToBindless = true;
795             }
796             // -cl-intel-use-bindless-printf
797             else if (suffix.equals("-use-bindless-printf"))
798             {
799                 UseBindlessPrintf = true;
800             }
801             // -cl-intel-use-bindless-legacy-mode
802             else if (suffix.equals("-use-bindless-legacy-mode"))
803             {
804                 UseBindlessLegacyMode = true;
805             }
806             // -cl-intel-use-bindless-advanced-mode
807             else if (suffix.equals("-use-bindless-advanced-mode"))
808             {
809                 UseBindlessLegacyMode = false;
810             }
811             // -cl-intel-vector-coalesing
812             else if (suffix.equals("-vector-coalescing"))
813             {
814                 // -cl-intel-vector-coalescing=<0-5>.
815                 size_t valStart = opts.find_first_not_of(' ', ePos + 1);
816                 size_t valEnd = opts.find_first_of(' ', valStart);
817                 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart);
818 
819                 int16_t val;
820                 if (valStr.getAsInteger(10, val))
821                 {
822                     IGC_ASSERT_MESSAGE(false, "-cl-intel-vector-coalescing: invalid value, ignored!");
823                 }
824                 else if (val >= 0 && val <= 5)
825                 {
826                     VectorCoalescingControl = val;
827                 }
828                 Pos = valEnd;
829                 continue;
830             }
831             // -cl-intel-allow-zebin
832             else if (suffix.equals("-allow-zebin"))
833             {
834                 EnableZEBinary = true;
835             }
836             // -cl-intel-no-spill
837             else if (suffix.equals("-no-spill"))
838             {
839                 // This is an option to avoid spill/fill instructions in scheduler kernel.
840                 // OpenCL Runtime triggers scheduler kernel offline compilation while driver building,
841                 // since scratch space is not supported in this specific case, we cannot end up with
842                 // spilling kernel. If this option is set, then IGC will recompile the kernel with
843                 // some some optimizations disabled to avoid spill/fill instructions.
844                 NoSpill = true;
845             }
846 
847             // advance to the next flag
848             Pos = opts.find_first_of(' ', Pos);
849         }
850     }
851 
initLLVMContextWrapper(bool createResourceDimTypes)852     void CodeGenContext::initLLVMContextWrapper(bool createResourceDimTypes)
853     {
854         llvmCtxWrapper = new LLVMContextWrapper(createResourceDimTypes);
855         llvmCtxWrapper->AddRef();
856     }
857 
getLLVMContext() const858     llvm::LLVMContext* CodeGenContext::getLLVMContext() const {
859         return llvmCtxWrapper;
860     }
861 
getMetaDataUtils() const862     IGC::IGCMD::MetaDataUtils* CodeGenContext::getMetaDataUtils() const
863     {
864         IGC_ASSERT_MESSAGE(nullptr != m_pMdUtils, "Metadata Utils is not initialized");
865         return m_pMdUtils;
866     }
867 
getModule() const868     IGCLLVM::Module* CodeGenContext::getModule() const { return module; }
869 
initCompOptionFromRegkey(CodeGenContext * ctx)870     static void initCompOptionFromRegkey(CodeGenContext* ctx)
871     {
872         CompOptions& opt = ctx->getModuleMetaData()->compOpt;
873 
874         opt.pixelShaderDoNotAbortOnSpill =
875             IGC_IS_FLAG_ENABLED(PixelShaderDoNotAbortOnSpill);
876         opt.forcePixelShaderSIMDMode =
877             IGC_GET_FLAG_VALUE(ForcePixelShaderSIMDMode);
878     }
879 
setModule(llvm::Module * m)880     void CodeGenContext::setModule(llvm::Module* m)
881     {
882         module = (IGCLLVM::Module*)m;
883         m_pMdUtils = new IGC::IGCMD::MetaDataUtils(m);
884         modMD = new IGC::ModuleMetaData();
885         initCompOptionFromRegkey(this);
886     }
887 
888     // Several clients explicitly delete module without resetting module to null.
889     // This causes the issue later when the dtor is invoked (trying to delete a
890     // dangling pointer again). This function is used to replace any explicit
891     // delete in order to prevent deleting dangling pointers happening.
deleteModule()892     void CodeGenContext::deleteModule()
893     {
894         delete m_pMdUtils;
895         delete modMD;
896         delete module;
897         m_pMdUtils = nullptr;
898         modMD = nullptr;
899         module = nullptr;
900         delete annotater;
901         annotater = nullptr;
902     }
903 
getModuleMetaData() const904     IGC::ModuleMetaData* CodeGenContext::getModuleMetaData() const
905     {
906         IGC_ASSERT_MESSAGE(nullptr != modMD, "Module Metadata is not initialized");
907         return modMD;
908     }
909 
getRegisterPointerSizeInBits(unsigned int AS) const910     unsigned int CodeGenContext::getRegisterPointerSizeInBits(unsigned int AS) const
911     {
912         unsigned int pointerSizeInRegister = 32;
913         switch (AS)
914         {
915         case ADDRESS_SPACE_GLOBAL:
916         case ADDRESS_SPACE_CONSTANT:
917         case ADDRESS_SPACE_GENERIC:
918         case ADDRESS_SPACE_GLOBAL_OR_PRIVATE:
919             pointerSizeInRegister =
920                 getModule()->getDataLayout().getPointerSizeInBits(AS);
921             break;
922         case ADDRESS_SPACE_LOCAL:
923         case ADDRESS_SPACE_A32:
924             pointerSizeInRegister = 32;
925             break;
926         case ADDRESS_SPACE_PRIVATE:
927             if (getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory)
928             {
929                 pointerSizeInRegister = 32;
930             }
931             else
932             {
933                 pointerSizeInRegister = ((type == ShaderType::OPENCL_SHADER) ?
934                     getModule()->getDataLayout().getPointerSizeInBits(AS) : 64);
935             }
936             break;
937         default:
938             pointerSizeInRegister = 32;
939             break;
940         }
941         return pointerSizeInRegister;
942     }
943 
enableFunctionCall() const944     bool CodeGenContext::enableFunctionCall() const
945     {
946         return (m_enableSubroutine || m_enableFunctionPointer);
947     }
948 
949     /// Check for user functions in the module and enable the m_enableSubroutine flag if exists
CheckEnableSubroutine(llvm::Module & M)950     void CodeGenContext::CheckEnableSubroutine(llvm::Module& M)
951     {
952         bool EnableSubroutine = false;
953         for (auto& F : M)
954         {
955             if (F.isDeclaration() ||
956                 F.use_empty() ||
957                 isEntryFunc(getMetaDataUtils(), &F))
958             {
959                 continue;
960             }
961 
962             if (F.hasFnAttribute("KMPLOCK") ||
963                 F.hasFnAttribute(llvm::Attribute::NoInline) ||
964                 !F.hasFnAttribute(llvm::Attribute::AlwaysInline))
965             {
966                 EnableSubroutine = true;
967                 break;
968             }
969         }
970         m_enableSubroutine = EnableSubroutine;
971     }
972 
InitVarMetaData()973     void CodeGenContext::InitVarMetaData() {}
974 
~CodeGenContext()975     CodeGenContext::~CodeGenContext()
976     {
977         clear();
978     }
979 
980 
clear()981     void CodeGenContext::clear()
982     {
983         m_enableSubroutine = false;
984         m_enableFunctionPointer = false;
985 
986         delete modMD;
987         delete m_pMdUtils;
988         modMD = nullptr;
989         m_pMdUtils = nullptr;
990 
991         delete module;
992         llvmCtxWrapper->Release();
993         module = nullptr;
994         llvmCtxWrapper = nullptr;
995     }
996 
getRelatedFunction(const llvm::Value * value)997     static const llvm::Function *getRelatedFunction(const llvm::Value *value)
998     {
999         if (value == nullptr)
1000             return nullptr;
1001 
1002         if (const llvm::Function *F = llvm::dyn_cast<llvm::Function>(value)) {
1003             return F;
1004         }
1005         if (const llvm::Argument *A = llvm::dyn_cast<llvm::Argument>(value)) {
1006             return A->getParent();
1007         }
1008         if (const llvm::BasicBlock *BB = llvm::dyn_cast<llvm::BasicBlock>(value)) {
1009             return BB->getParent();
1010         }
1011         if (const llvm::Instruction *I = llvm::dyn_cast<llvm::Instruction>(value)) {
1012             return I->getParent()->getParent();
1013         }
1014 
1015         return nullptr;
1016     }
1017 
isEntryPoint(const CodeGenContext * ctx,const llvm::Function * F)1018     static bool isEntryPoint(const CodeGenContext *ctx, const llvm::Function *F)
1019     {
1020         if (F == nullptr) {
1021             return false;
1022         }
1023 
1024         auto& FuncMD = ctx->getModuleMetaData()->FuncMD;
1025         auto FuncInfo = FuncMD.find(const_cast<llvm::Function *>(F));
1026         if (FuncInfo == FuncMD.end()) {
1027             return false;
1028         }
1029 
1030         const FunctionMetaData* MD = &FuncInfo->second;
1031         return MD->functionType == KernelFunction;
1032     }
1033 
findCallingKernels(const CodeGenContext * ctx,const llvm::Function * F,llvm::SmallPtrSetImpl<const llvm::Function * > & kernels)1034     static void findCallingKernels
1035         (const CodeGenContext *ctx, const llvm::Function *F, llvm::SmallPtrSetImpl<const llvm::Function *> &kernels)
1036     {
1037         if (F == nullptr || kernels.count(F))
1038             return;
1039 
1040         for (const llvm::User *U : F->users()) {
1041             auto *CI = llvm::dyn_cast<llvm::CallInst>(U);
1042             if (CI == nullptr)
1043                 continue;
1044 
1045             if (CI->getCalledFunction() != F)
1046                 continue;
1047 
1048             const llvm::Function *caller = getRelatedFunction(CI);
1049             if (isEntryPoint(ctx, caller)) {
1050                 kernels.insert(caller);
1051                 continue;
1052             }
1053             // Caller is not a kernel, try to check which kerneles might
1054             // be calling it:
1055             findCallingKernels(ctx, caller, kernels);
1056         }
1057     }
1058 
handleOpenMPDemangling(const std::string & name,std::string * strippedName)1059     static bool handleOpenMPDemangling(const std::string &name, std::string *strippedName) {
1060         // OpenMP mangled names have following structure:
1061         //
1062         // __omp_offloading_DD_FFFF_PP_lBB
1063         //
1064         // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
1065         // mangled name of the function that encloses the target region and BB is the
1066         // line number of the target region.
1067         if (name.rfind("__omp_offloading_", 0) != 0) {
1068             return false;
1069         }
1070         size_t offset = sizeof "__omp_offloading_";
1071         offset = name.find('_', offset + 1); // Find end of DD.
1072         if (offset == std::string::npos)
1073             return false;
1074         offset = name.find('_', offset + 1); // Find end of FFFF.
1075         if (offset == std::string::npos)
1076             return false;
1077 
1078         const size_t start = offset + 1;
1079         const size_t end = name.rfind('_'); // Find beginning of lBB.
1080         if (end == std::string::npos)
1081             return false;
1082 
1083         *strippedName = name.substr(start, end - start);
1084         return true;
1085     }
1086 
1087 
demangleFuncName(const std::string & rawName)1088     static std::string demangleFuncName(const std::string &rawName) {
1089         // OpenMP adds additional prefix and suffix to the mangling scheme,
1090         // remove it if present.
1091         std::string name;
1092         if (!handleOpenMPDemangling(rawName, &name)) {
1093             // If OpenMP demangling didn't succeed just proceed with received
1094             // symbol name
1095             name = rawName;
1096         }
1097 #if LLVM_VERSION_MAJOR >= 10
1098         return llvm::demangle(name);
1099 #else
1100         char *demangled = nullptr;
1101 
1102         demangled = llvm::itaniumDemangle(name.c_str(), nullptr, nullptr, nullptr);
1103         if (demangled == nullptr) {
1104             demangled = llvm::microsoftDemangle(name.c_str(), nullptr, nullptr, nullptr);
1105         }
1106 
1107         if (demangled == nullptr) {
1108             return name;
1109         }
1110 
1111         std::string result = demangled;
1112         std::free(demangled);
1113         return result;
1114 #endif
1115     }
1116 
EmitError(std::ostream & OS,const char * errorstr,const llvm::Value * context) const1117     void CodeGenContext::EmitError(std::ostream &OS, const char* errorstr, const llvm::Value* context) const
1118     {
1119         OS << "\nerror: ";
1120         OS << errorstr;
1121         // Try to get debug location to print out the relevant info.
1122         if (const llvm::Instruction *I = llvm::dyn_cast_or_null<llvm::Instruction>(context)) {
1123             if (const llvm::DILocation *DL = I->getDebugLoc()) {
1124                 OS << "\nin file: " << DL->getFilename().str() << ":" << DL->getLine() << "\n";
1125             }
1126         }
1127         // Try to find function related to given context
1128         // to print more informative error message.
1129         if (const llvm::Function *F = getRelatedFunction(context)) {
1130             // If the function is a kernel just print the kernel name.
1131             if (isEntryPoint(this, F)) {
1132                 OS << "\nin kernel: '" << demangleFuncName(std::string(F->getName())) << "'";
1133             // If the function is not a kernel try to print all kernels that
1134             // might be using this function.
1135             } else {
1136                 llvm::SmallPtrSet<const llvm::Function *, 16> kernels;
1137                 findCallingKernels(this, F, kernels);
1138 
1139                 const size_t kernelsCount = kernels.size();
1140                 OS << "\nin function: '" << demangleFuncName(std::string(F->getName())) << "' ";
1141                 if (kernelsCount == 0) {
1142                     OS << "called indirectly by at least one of the kernels.\n";
1143                 } else if (kernelsCount == 1) {
1144                     const llvm::Function *kernel = *kernels.begin();
1145                     OS << "called by kernel: '" << demangleFuncName(std::string(kernel->getName())) << "'\n";
1146                 } else {
1147                     OS << "called by kernels:\n";
1148                     for (const llvm::Function *kernel : kernels) {
1149                         OS << "  - '" << demangleFuncName(std::string(kernel->getName())) << "'\n";
1150                     }
1151                 }
1152             }
1153         }
1154         OS << "\nerror: backend compiler failed build.\n";
1155     }
1156 
EmitError(const char * errorstr,const llvm::Value * context)1157     void CodeGenContext::EmitError(const char* errorstr, const llvm::Value *context)
1158     {
1159         EmitError(this->oclErrorMessage, errorstr, context);
1160     }
1161 
EmitWarning(const char * warningstr)1162     void CodeGenContext::EmitWarning(const char* warningstr)
1163     {
1164         this->oclWarningMessage << "\nwarning: ";
1165         this->oclWarningMessage << warningstr;
1166         this->oclWarningMessage << "\n";
1167     }
1168 
getCompilerOption()1169     CompOptions& CodeGenContext::getCompilerOption()
1170     {
1171         return getModuleMetaData()->compOpt;
1172     }
1173 
resetOnRetry()1174     void CodeGenContext::resetOnRetry()
1175     {
1176         m_tempCount = 0;
1177     }
1178 
getNumThreadsPerEU() const1179     uint32_t CodeGenContext::getNumThreadsPerEU() const
1180     {
1181         return 0;
1182     }
1183 
getNumGRFPerThread() const1184     uint32_t CodeGenContext::getNumGRFPerThread() const
1185     {
1186         if (IGC_GET_FLAG_VALUE(TotalGRFNum) != 0)
1187         {
1188             return IGC_GET_FLAG_VALUE(TotalGRFNum);
1189         }
1190         if (getModuleMetaData()->csInfo.forceTotalGRFNum != 0)
1191         {
1192             return getModuleMetaData()->csInfo.forceTotalGRFNum;
1193         }
1194         if (this->type == ShaderType::COMPUTE_SHADER && IGC_GET_FLAG_VALUE(TotalGRFNum4CS) != 0)
1195         {
1196             return IGC_GET_FLAG_VALUE(TotalGRFNum4CS);
1197         }
1198         return 128;
1199     }
1200 
forceGlobalMemoryAllocation() const1201     bool CodeGenContext::forceGlobalMemoryAllocation() const
1202     {
1203         return false;
1204     }
1205 
allocatePrivateAsGlobalBuffer() const1206     bool CodeGenContext::allocatePrivateAsGlobalBuffer() const
1207     {
1208         return false;
1209     }
1210 
hasNoLocalToGenericCast() const1211     bool CodeGenContext::hasNoLocalToGenericCast() const
1212     {
1213         return false;
1214     }
1215 
hasNoPrivateToGenericCast() const1216     bool CodeGenContext::hasNoPrivateToGenericCast() const
1217     {
1218         return false;
1219     }
1220 
getVectorCoalescingControl() const1221     int16_t CodeGenContext::getVectorCoalescingControl() const
1222     {
1223         return 0;
1224     }
1225 
isPOSH() const1226     bool CodeGenContext::isPOSH() const
1227     {
1228         return this->getModule()->getModuleFlag(
1229             "IGC::PositionOnlyVertexShader") != nullptr;
1230     }
1231 
setFlagsPerCtx()1232     void CodeGenContext::setFlagsPerCtx()
1233     {
1234         if (m_DriverInfo.DessaAliasLevel() != -1) {
1235             if ((int)IGC_GET_FLAG_VALUE(EnableDeSSAAlias) > m_DriverInfo.DessaAliasLevel())
1236             {
1237                 IGC_SET_FLAG_VALUE(EnableDeSSAAlias, m_DriverInfo.DessaAliasLevel());
1238             }
1239         }
1240     }
1241 
1242 
1243 }
1244