1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2018-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #include <sstream> 10 #include "common/LLVMWarningsPush.hpp" 11 #include <llvm/Support/ScaledNumber.h> 12 #include <llvm/Demangle/Demangle.h> 13 #include <llvm/IR/DebugInfo.h> 14 #include "common/LLVMWarningsPop.hpp" 15 #include "Compiler/CISACodeGen/ComputeShaderCodeGen.hpp" 16 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp" 17 #include "Compiler/CodeGenPublic.h" 18 #include "Probe/Assertion.h" 19 20 namespace IGC 21 { 22 23 typedef struct RetryState { 24 bool allowLICM; 25 bool allowCodeSinking; 26 bool allowSimd32Slicing; 27 bool allowPromotePrivateMemory; 28 bool allowPreRAScheduler; 29 bool allowVISAPreRAScheduler; 30 bool allowLargeURBWrite; 31 unsigned nextState; 32 } RetryState; 33 34 static const RetryState RetryTable[] = { 35 { true, true, false, true, true, true, true, 1 }, 36 { false, true, true, false, false, false, false, 500 } 37 }; 38 RetryManager()39 RetryManager::RetryManager() : enabled(false) 40 { 41 memset(m_simdEntries, 0, sizeof(m_simdEntries)); 42 firstStateId = IGC_GET_FLAG_VALUE(RetryManagerFirstStateId); 43 stateId = firstStateId; 44 IGC_ASSERT(stateId < getStateCnt()); 45 } 46 AdvanceState()47 bool RetryManager::AdvanceState() { 48 if (!enabled || IGC_IS_FLAG_ENABLED(DisableRecompilation)) 49 { 50 return false; 51 } 52 IGC_ASSERT(stateId < getStateCnt()); 53 stateId = RetryTable[stateId].nextState; 54 return (stateId < getStateCnt()); 55 } AllowLICM()56 bool RetryManager::AllowLICM() { 57 IGC_ASSERT(stateId < getStateCnt()); 58 return RetryTable[stateId].allowLICM; 59 } AllowPromotePrivateMemory()60 bool RetryManager::AllowPromotePrivateMemory() { 61 IGC_ASSERT(stateId < getStateCnt()); 62 return RetryTable[stateId].allowPromotePrivateMemory; 63 } AllowPreRAScheduler()64 bool RetryManager::AllowPreRAScheduler() { 65 IGC_ASSERT(stateId < getStateCnt()); 66 return RetryTable[stateId].allowPreRAScheduler; 67 } AllowVISAPreRAScheduler()68 bool RetryManager::AllowVISAPreRAScheduler() { 69 IGC_ASSERT(stateId < getStateCnt()); 70 return RetryTable[stateId].allowVISAPreRAScheduler; 71 } AllowCodeSinking()72 bool RetryManager::AllowCodeSinking() { 73 IGC_ASSERT(stateId < getStateCnt()); 74 return RetryTable[stateId].allowCodeSinking; 75 } AllowSimd32Slicing()76 bool RetryManager::AllowSimd32Slicing() { 77 IGC_ASSERT(stateId < getStateCnt()); 78 return RetryTable[stateId].allowSimd32Slicing; 79 } AllowLargeURBWrite()80 bool RetryManager::AllowLargeURBWrite() { 81 IGC_ASSERT(stateId < getStateCnt()); 82 return RetryTable[stateId].allowLargeURBWrite; 83 } SetFirstStateId(int id)84 void RetryManager::SetFirstStateId(int id) { 85 firstStateId = id; 86 } IsFirstTry()87 bool RetryManager::IsFirstTry() { 88 return (stateId == firstStateId); 89 } IsLastTry()90 bool RetryManager::IsLastTry() { 91 return (!enabled || 92 IGC_IS_FLAG_ENABLED(DisableRecompilation) || 93 lastSpillSize < IGC_GET_FLAG_VALUE(AllowedSpillRegCount) || 94 (stateId < getStateCnt() && RetryTable[stateId].nextState >= getStateCnt())); 95 } GetRetryId() const96 unsigned RetryManager::GetRetryId() const { return stateId; } 97 Enable()98 void RetryManager::Enable() { enabled = true; } Disable()99 void RetryManager::Disable() { enabled = false; } 100 SetSpillSize(unsigned int spillSize)101 void RetryManager::SetSpillSize(unsigned int spillSize) { lastSpillSize = spillSize; } GetLastSpillSize()102 unsigned int RetryManager::GetLastSpillSize() { return lastSpillSize; } 103 ClearSpillParams()104 void RetryManager::ClearSpillParams() { 105 lastSpillSize = 0; 106 numInstructions = 0; 107 } 108 109 // save entry for given SIMD mode, to avoid recompile for next retry. SaveSIMDEntry(SIMDMode simdMode,CShader * shader)110 void RetryManager::SaveSIMDEntry(SIMDMode simdMode, CShader* shader) 111 { 112 switch (simdMode) 113 { 114 case SIMDMode::SIMD8: m_simdEntries[0] = shader; break; 115 case SIMDMode::SIMD16: m_simdEntries[1] = shader; break; 116 case SIMDMode::SIMD32: m_simdEntries[2] = shader; break; 117 default: 118 IGC_ASSERT(0); 119 break; 120 } 121 } 122 GetSIMDEntry(SIMDMode simdMode)123 CShader* RetryManager::GetSIMDEntry(SIMDMode simdMode) 124 { 125 switch (simdMode) 126 { 127 case SIMDMode::SIMD8: return m_simdEntries[0]; 128 case SIMDMode::SIMD16: return m_simdEntries[1]; 129 case SIMDMode::SIMD32: return m_simdEntries[2]; 130 default: 131 IGC_ASSERT(0); 132 return nullptr; 133 } 134 } 135 ~RetryManager()136 RetryManager::~RetryManager() 137 { 138 for (unsigned i = 0; i < 3; i++) 139 { 140 if (m_simdEntries[i]) 141 { 142 delete m_simdEntries[i]; 143 } 144 } 145 } 146 AnyKernelSpills()147 bool RetryManager::AnyKernelSpills() 148 { 149 for (unsigned i = 0; i < 3; i++) 150 { 151 if (m_simdEntries[i] && m_simdEntries[i]->m_spillCost > 0.0) 152 { 153 return true; 154 } 155 } 156 return false; 157 } 158 PickupKernels(CodeGenContext * cgCtx)159 bool RetryManager::PickupKernels(CodeGenContext* cgCtx) 160 { 161 if (cgCtx->type == ShaderType::COMPUTE_SHADER) 162 { 163 return PickupCS(static_cast<ComputeShaderContext*>(cgCtx)); 164 } 165 else 166 { 167 IGC_ASSERT_MESSAGE(0, "TODO for other shader types"); 168 return true; 169 } 170 } 171 getStateCnt()172 unsigned RetryManager::getStateCnt() 173 { 174 return sizeof(RetryTable) / sizeof(RetryState); 175 }; 176 PickCSEntryForcedFromDriver(SIMDMode & simdMode,unsigned char forcedSIMDModeFromDriver)177 CShader* RetryManager::PickCSEntryForcedFromDriver(SIMDMode& simdMode, unsigned char forcedSIMDModeFromDriver) 178 { 179 if (forcedSIMDModeFromDriver == 8) 180 { 181 if ((m_simdEntries[0] && m_simdEntries[0]->m_spillSize == 0) || IsLastTry()) 182 { 183 simdMode = SIMDMode::SIMD8; 184 return m_simdEntries[0]; 185 } 186 } 187 else if (forcedSIMDModeFromDriver == 16) 188 { 189 if ((m_simdEntries[1] && m_simdEntries[1]->m_spillSize == 0) || IsLastTry()) 190 { 191 simdMode = SIMDMode::SIMD16; 192 return m_simdEntries[1]; 193 } 194 } 195 else if (forcedSIMDModeFromDriver == 32) 196 { 197 if ((m_simdEntries[2] && m_simdEntries[2]->m_spillSize == 0) || IsLastTry()) 198 { 199 simdMode = SIMDMode::SIMD32; 200 return m_simdEntries[2]; 201 } 202 } 203 return nullptr; 204 } 205 PickCSEntryByRegKey(SIMDMode & simdMode,ComputeShaderContext * cgCtx)206 CShader* RetryManager::PickCSEntryByRegKey(SIMDMode& simdMode, ComputeShaderContext* cgCtx) 207 { 208 if (IGC_IS_FLAG_ENABLED(ForceCSSIMD32)) 209 { 210 simdMode = SIMDMode::SIMD32; 211 return m_simdEntries[2]; 212 } 213 else 214 if (IGC_IS_FLAG_ENABLED(ForceCSSIMD16) && m_simdEntries[1]) 215 { 216 simdMode = SIMDMode::SIMD16; 217 return m_simdEntries[1]; 218 } 219 else 220 if (IGC_IS_FLAG_ENABLED(ForceCSLeastSIMD) 221 ) 222 { 223 if (m_simdEntries[0]) 224 { 225 simdMode = SIMDMode::SIMD8; 226 return m_simdEntries[0]; 227 } 228 else 229 if (m_simdEntries[1]) 230 { 231 simdMode = SIMDMode::SIMD16; 232 return m_simdEntries[1]; 233 } 234 else 235 { 236 simdMode = SIMDMode::SIMD32; 237 return m_simdEntries[2]; 238 } 239 } 240 241 return nullptr; 242 } 243 PickCSEntryEarly(SIMDMode & simdMode,ComputeShaderContext * cgCtx)244 CShader* RetryManager::PickCSEntryEarly(SIMDMode& simdMode, 245 ComputeShaderContext* cgCtx) 246 { 247 float spillThreshold = cgCtx->GetSpillThreshold(); 248 float occu8 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD8); 249 float occu16 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD16); 250 float occu32 = cgCtx->GetThreadOccupancy(SIMDMode::SIMD32); 251 252 bool simd32NoSpill = m_simdEntries[2] && m_simdEntries[2]->m_spillCost <= spillThreshold; 253 bool simd16NoSpill = m_simdEntries[1] && m_simdEntries[1]->m_spillCost <= spillThreshold; 254 bool simd8NoSpill = m_simdEntries[0] && m_simdEntries[0]->m_spillCost <= spillThreshold; 255 256 // If SIMD32/16/8 are all allowed, then choose one which has highest thread occupancy 257 258 if (IGC_IS_FLAG_ENABLED(EnableHighestSIMDForNoSpill)) 259 { 260 if (simd32NoSpill) 261 { 262 simdMode = SIMDMode::SIMD32; 263 return m_simdEntries[2]; 264 } 265 266 if (simd16NoSpill) 267 { 268 simdMode = SIMDMode::SIMD16; 269 return m_simdEntries[1]; 270 } 271 } 272 else 273 { 274 if (simd32NoSpill) 275 { 276 if (occu32 >= occu16 && occu32 >= occu8) 277 { 278 simdMode = SIMDMode::SIMD32; 279 return m_simdEntries[2]; 280 } 281 // If SIMD32 doesn't spill, SIMD16 and SIMD8 shouldn't, if they exist 282 IGC_ASSERT((m_simdEntries[0] == NULL) || simd8NoSpill == true); 283 IGC_ASSERT((m_simdEntries[1] == NULL) || simd16NoSpill == true); 284 } 285 286 if (simd16NoSpill) 287 { 288 if (occu16 >= occu8 && occu16 >= occu32) 289 { 290 simdMode = SIMDMode::SIMD16; 291 return m_simdEntries[1]; 292 } 293 IGC_ASSERT_MESSAGE((m_simdEntries[0] == NULL) || simd8NoSpill == true, "If SIMD16 doesn't spill, SIMD8 shouldn't, if it exists"); 294 } 295 } 296 297 bool needToRetry = false; 298 if (cgCtx->m_slmSize) 299 { 300 if (occu16 > occu8 || occu32 > occu16) 301 { 302 needToRetry = true; 303 } 304 } 305 306 SIMDMode maxSimdMode = cgCtx->GetMaxSIMDMode(); 307 if (maxSimdMode == SIMDMode::SIMD8 || !needToRetry) 308 { 309 if (m_simdEntries[0] && m_simdEntries[0]->m_spillSize == 0) 310 { 311 simdMode = SIMDMode::SIMD8; 312 return m_simdEntries[0]; 313 } 314 } 315 return nullptr; 316 } 317 PickCSEntryFinally(SIMDMode & simdMode)318 CShader* RetryManager::PickCSEntryFinally(SIMDMode& simdMode) 319 { 320 if (m_simdEntries[0]) 321 { 322 simdMode = SIMDMode::SIMD8; 323 return m_simdEntries[0]; 324 } 325 else 326 if (m_simdEntries[1]) 327 { 328 simdMode = SIMDMode::SIMD16; 329 return m_simdEntries[1]; 330 } 331 else 332 { 333 simdMode = SIMDMode::SIMD32; 334 return m_simdEntries[2]; 335 } 336 } 337 FreeAllocatedMemForNotPickedCS(SIMDMode simdMode)338 void RetryManager::FreeAllocatedMemForNotPickedCS(SIMDMode simdMode) 339 { 340 if (simdMode != SIMDMode::SIMD8 && m_simdEntries[0] != nullptr) 341 { 342 if (m_simdEntries[0]->ProgramOutput()->m_programBin != nullptr) 343 aligned_free(m_simdEntries[0]->ProgramOutput()->m_programBin); 344 } 345 if (simdMode != SIMDMode::SIMD16 && m_simdEntries[1] != nullptr) 346 { 347 if (m_simdEntries[1]->ProgramOutput()->m_programBin != nullptr) 348 aligned_free(m_simdEntries[1]->ProgramOutput()->m_programBin); 349 } 350 if (simdMode != SIMDMode::SIMD32 && m_simdEntries[2] != nullptr) 351 { 352 if (m_simdEntries[2]->ProgramOutput()->m_programBin != nullptr) 353 aligned_free(m_simdEntries[2]->ProgramOutput()->m_programBin); 354 } 355 } 356 PickupCS(ComputeShaderContext * cgCtx)357 bool RetryManager::PickupCS(ComputeShaderContext* cgCtx) 358 { 359 SIMDMode simdMode = SIMDMode::UNKNOWN; 360 CComputeShader* shader = nullptr; 361 SComputeShaderKernelProgram* pKernelProgram = &cgCtx->programOutput; 362 363 if (cgCtx->getModuleMetaData()->csInfo.forcedSIMDSize != 0) 364 { 365 shader = static_cast<CComputeShader*>( 366 PickCSEntryForcedFromDriver(simdMode, cgCtx->getModuleMetaData()->csInfo.forcedSIMDSize)); 367 } 368 if (!shader) 369 { 370 shader = static_cast<CComputeShader*>( 371 PickCSEntryByRegKey(simdMode, cgCtx)); 372 } 373 if (!shader) 374 { 375 shader = static_cast<CComputeShader*>( 376 PickCSEntryEarly(simdMode, cgCtx)); 377 } 378 if (!shader && IsLastTry()) 379 { 380 shader = static_cast<CComputeShader*>( 381 PickCSEntryFinally(simdMode)); 382 IGC_ASSERT(shader != nullptr); 383 } 384 385 if (shader) 386 { 387 switch (simdMode) 388 { 389 case SIMDMode::SIMD8: 390 pKernelProgram->simd8 = *shader->ProgramOutput(); 391 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD8; 392 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode, 393 ShaderDispatchMode::NOT_APPLICABLE); 394 break; 395 396 case SIMDMode::SIMD16: 397 pKernelProgram->simd16 = *shader->ProgramOutput(); 398 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD16; 399 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode, 400 ShaderDispatchMode::NOT_APPLICABLE); 401 break; 402 403 case SIMDMode::SIMD32: 404 pKernelProgram->simd32 = *shader->ProgramOutput(); 405 pKernelProgram->SimdWidth = USC::GFXMEDIA_GPUWALKER_SIMD32; 406 cgCtx->SetSIMDInfo(SIMD_SELECTED, simdMode, 407 ShaderDispatchMode::NOT_APPLICABLE); 408 break; 409 410 default: 411 IGC_ASSERT_MESSAGE(0, "Invalie SIMDMode"); 412 break; 413 } 414 shader->FillProgram(pKernelProgram); 415 pKernelProgram->SIMDInfo = cgCtx->GetSIMDInfo(); 416 417 418 // free allocated memory for the remaining kernels 419 FreeAllocatedMemForNotPickedCS(simdMode); 420 421 return true; 422 } 423 return false; 424 } 425 LLVMContextWrapper(bool createResourceDimTypes)426 LLVMContextWrapper::LLVMContextWrapper(bool createResourceDimTypes) 427 { 428 if (createResourceDimTypes) 429 { 430 CreateResourceDimensionTypes(*this); 431 } 432 } 433 AddRef()434 void LLVMContextWrapper::AddRef() 435 { 436 refCount++; 437 } 438 Release()439 void LLVMContextWrapper::Release() 440 { 441 refCount--; 442 if (refCount == 0) 443 { 444 delete this; 445 } 446 } 447 448 /** get shader's thread group size */ GetThreadGroupSize()449 unsigned ComputeShaderContext::GetThreadGroupSize() 450 { 451 llvm::GlobalVariable* pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_X"); 452 m_threadGroupSize_X = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 453 454 pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_Y"); 455 m_threadGroupSize_Y = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 456 457 pGlobal = getModule()->getGlobalVariable("ThreadGroupSize_Z"); 458 m_threadGroupSize_Z = int_cast<unsigned>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 459 460 return m_threadGroupSize_X * m_threadGroupSize_Y * m_threadGroupSize_Z; 461 } 462 GetSlmSizePerSubslice()463 unsigned ComputeShaderContext::GetSlmSizePerSubslice() 464 { 465 return platform.getSlmSizePerSsOrDss(); 466 } 467 GetSlmSize() const468 unsigned ComputeShaderContext::GetSlmSize() const 469 { 470 return m_slmSize; 471 } 472 GetThreadOccupancy(SIMDMode simdMode)473 float ComputeShaderContext::GetThreadOccupancy(SIMDMode simdMode) 474 { 475 return GetThreadOccupancyPerSubslice(simdMode, GetThreadGroupSize(), GetHwThreadsPerWG(platform), m_slmSize, GetSlmSizePerSubslice()); 476 } 477 478 /** get smallest SIMD mode allowed based on thread group size */ GetLeastSIMDModeAllowed()479 SIMDMode ComputeShaderContext::GetLeastSIMDModeAllowed() 480 { 481 SIMDMode mode = getLeastSIMDAllowed( 482 GetThreadGroupSize(), 483 GetHwThreadsPerWG(platform)); 484 return mode; 485 } 486 487 /** get largest SIMD mode for performance based on thread group size */ GetMaxSIMDMode()488 SIMDMode ComputeShaderContext::GetMaxSIMDMode() 489 { 490 unsigned threadGroupSize = GetThreadGroupSize(); 491 SIMDMode mode; 492 if (threadGroupSize <= 8) 493 { 494 mode = SIMDMode::SIMD8; 495 } 496 else if (threadGroupSize <= 16) 497 { 498 mode = SIMDMode::SIMD16; 499 } 500 else 501 { 502 mode = SIMDMode::SIMD32; 503 } 504 return mode; 505 } 506 GetSpillThreshold() const507 float ComputeShaderContext::GetSpillThreshold() const 508 { 509 float spillThresholdSLM = platform.adjustedSpillThreshold() / 100.0f; 510 //enable CSSpillThresholdSLM with desired value to override the default value. 511 if(IGC_IS_FLAG_ENABLED(CSSpillThresholdSLM)) 512 spillThresholdSLM = float(IGC_GET_FLAG_VALUE(CSSpillThresholdSLM)) / 100.0f; 513 float spillThresholdNoSLM = 514 float(IGC_GET_FLAG_VALUE(CSSpillThresholdNoSLM)) / 100.0f; 515 return m_slmSize ? spillThresholdSLM : spillThresholdNoSLM; 516 } 517 isSPIRV() const518 bool OpenCLProgramContext::isSPIRV() const 519 { 520 return isSpirV; 521 } 522 setAsSPIRV()523 void OpenCLProgramContext::setAsSPIRV() 524 { 525 isSpirV = true; 526 } getProfilingTimerResolution()527 float OpenCLProgramContext::getProfilingTimerResolution() 528 { 529 return m_ProfilingTimerResolution; 530 } 531 getNumThreadsPerEU() const532 uint32_t OpenCLProgramContext::getNumThreadsPerEU() const 533 { 534 if (m_Options.IntelRequiredEUThreadCount) 535 { 536 return m_Options.requiredEUThreadCount; 537 } 538 if (m_InternalOptions.IntelNumThreadPerEU || m_InternalOptions.Intel256GRFPerThread) 539 { 540 return m_InternalOptions.numThreadsPerEU; 541 } 542 543 return 0; 544 } 545 getNumGRFPerThread() const546 uint32_t OpenCLProgramContext::getNumGRFPerThread() const 547 { 548 if (platform.supportsStaticRegSharing()) 549 { 550 if (m_InternalOptions.Intel128GRFPerThread) 551 { 552 return 128; 553 } 554 else if (m_InternalOptions.Intel256GRFPerThread) 555 { 556 return 256; 557 } 558 } 559 return CodeGenContext::getNumGRFPerThread(); 560 } 561 forceGlobalMemoryAllocation() const562 bool OpenCLProgramContext::forceGlobalMemoryAllocation() const 563 { 564 return m_InternalOptions.ForceGlobalMemoryAllocation; 565 } 566 allocatePrivateAsGlobalBuffer() const567 bool OpenCLProgramContext::allocatePrivateAsGlobalBuffer() const 568 { 569 return forceGlobalMemoryAllocation() || (m_instrTypes.hasDynamicGenericLoadStore && platform.canForcePrivateToGlobal()); 570 } 571 hasNoLocalToGenericCast() const572 bool OpenCLProgramContext::hasNoLocalToGenericCast() const 573 { 574 return m_InternalOptions.HasNoLocalToGeneric || getModuleMetaData()->hasNoLocalToGenericCast; 575 } 576 hasNoPrivateToGenericCast() const577 bool OpenCLProgramContext::hasNoPrivateToGenericCast() const 578 { 579 return getModuleMetaData()->hasNoPrivateToGenericCast; 580 } 581 getVectorCoalescingControl() const582 int16_t OpenCLProgramContext::getVectorCoalescingControl() const 583 { 584 // cmdline option > registry key 585 int val = m_InternalOptions.VectorCoalescingControl; 586 if (val < 0) 587 { 588 // no cmdline option 589 val = IGC_GET_FLAG_VALUE(VATemp); 590 } 591 return val; 592 } 593 parseOptions(const char * IntOptStr)594 void OpenCLProgramContext::InternalOptions::parseOptions(const char* IntOptStr) 595 { 596 // Assume flags is in the form: <f0>[=<v0>] <f1>[=<v1>] ... 597 // flag name and its value are either seperated by ' ' or '='; 598 // flag seperator is always ' '. 599 const char* NAMESEP = " ="; // separator b/w name and its value 600 601 llvm::StringRef opts(IntOptStr); 602 size_t Pos = 0; 603 while (Pos != llvm::StringRef::npos) 604 { 605 // Get a flag name 606 Pos = opts.find_first_not_of(' ', Pos); 607 if (Pos == llvm::StringRef::npos) 608 continue; 609 610 size_t ePos = opts.find_first_of(NAMESEP, Pos); 611 llvm::StringRef flagName = opts.substr(Pos, ePos - Pos); 612 613 // Build options: -cl-intel-xxxx, -ze-intel-xxxx, -ze-opt-xxxx 614 // -cl-xxxx, -ze-xxxx 615 // Both cl version and ze version means the same thing. 616 // Here, strip off common prefix. 617 size_t prefix_len; 618 if (flagName.startswith("-cl-intel") || flagName.startswith("-ze-intel")) 619 { 620 prefix_len = 9; 621 } 622 else if (flagName.startswith("-ze-opt")) 623 { 624 prefix_len = 7; 625 } 626 else if (flagName.startswith("-cl") || flagName.startswith("-ze")) 627 { 628 prefix_len = 3; 629 } 630 else 631 { 632 // not a valid flag, skip 633 Pos = opts.find_first_of(' ', Pos); 634 continue; 635 } 636 637 llvm::StringRef suffix = flagName.drop_front(prefix_len); 638 if (suffix.equals("-replace-global-offsets-by-zero")) 639 { 640 replaceGlobalOffsetsByZero = true; 641 } 642 else if (suffix.equals("-kernel-debug-enable")) 643 { 644 KernelDebugEnable = true; 645 } 646 else if (suffix.equals("-include-sip-csr")) 647 { 648 IncludeSIPCSR = true; 649 } 650 else if (suffix.equals("-include-sip-kernel-debug")) 651 { 652 IncludeSIPKernelDebug = true; 653 } 654 else if (suffix.equals("-include-sip-kernel-local-debug")) 655 { 656 IncludeSIPKernelDebugWithLocalMemory = true; 657 } 658 else if (suffix.equals("-use-32bit-ptr-arith")) 659 { 660 Use32BitPtrArith = true; 661 } 662 663 // -cl-intel-greater-than-4GB-buffer-required, -ze-opt-greater-than-4GB-buffer-required 664 else if (suffix.equals("-greater-than-4GB-buffer-required")) 665 { 666 IntelGreaterThan4GBBufferRequired = true; 667 } 668 669 // -cl-intel-has-buffer-offset-arg, -ze-opt-has-buffer-offset-arg 670 else if (suffix.equals("-has-buffer-offset-arg")) 671 { 672 IntelHasBufferOffsetArg = true; 673 } 674 675 // -cl-intel-buffer-offset-arg-required, -ze-opt-buffer-offset-arg-required 676 else if (suffix.equals("-buffer-offset-arg-required")) 677 { 678 IntelBufferOffsetArgOptional = false; 679 } 680 681 // -cl-intel-has-positive-pointer-offset, -ze-opt-has-positive-pointer-offset 682 else if (suffix.equals("-has-positive-pointer-offset")) 683 { 684 IntelHasPositivePointerOffset = true; 685 } 686 687 // -cl-intel-has-subDW-aligned-ptr-arg, -ze-opt-has-subDW-aligned-ptr-arg 688 else if (suffix.equals("-has-subDW-aligned-ptr-arg")) 689 { 690 IntelHasSubDWAlignedPtrArg = true; 691 } 692 693 // -cl-intel-disable-a64WA 694 else if (suffix.equals("-disable-a64WA")) 695 { 696 IntelDisableA64WA = true; 697 } 698 699 // -cl-intel-force-enable-a64WA 700 else if (suffix.equals("-force-enable-a64WA")) 701 { 702 IntelForceEnableA64WA = true; 703 } 704 705 // GTPin flags used by L0 driver runtime 706 // -cl-intel-gtpin-rera 707 else if (suffix.equals("-gtpin-rera")) 708 { 709 GTPinReRA = true; 710 } 711 else if (suffix.equals("-gtpin-grf-info")) 712 { 713 GTPinGRFInfo = true; 714 } 715 else if (suffix.equals("-gtpin-scratch-area-size")) 716 { 717 GTPinScratchAreaSize = true; 718 size_t valStart = opts.find_first_not_of(' ', ePos + 1); 719 size_t valEnd = opts.find_first_of(' ', valStart); 720 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart); 721 if (valStr.getAsInteger(10, GTPinScratchAreaSizeValue)) 722 { 723 IGC_ASSERT(0); 724 } 725 Pos = valEnd; 726 continue; 727 } 728 729 // -cl-intel-no-prera-scheduling 730 else if (suffix.equals("-no-prera-scheduling")) 731 { 732 IntelEnablePreRAScheduling = false; 733 } 734 // -cl-intel-no-local-to-generic 735 else if (suffix.equals("-no-local-to-generic")) 736 { 737 HasNoLocalToGeneric = true; 738 } 739 // -cl-intel-force-global-mem-allocation 740 else if (suffix.equals("-force-global-mem-allocation")) 741 { 742 ForceGlobalMemoryAllocation = true; 743 } 744 745 // 746 // Options to set the number of GRF and threads 747 // (All start with -cl-intel or -ze-opt) 748 else if (suffix.equals("-128-GRF-per-thread")) 749 { 750 Intel128GRFPerThread = true; 751 numThreadsPerEU = 8; 752 } 753 else if (suffix.equals("-256-GRF-per-thread") || 754 suffix.equals("-large-register-file")) 755 { 756 Intel256GRFPerThread = true; 757 numThreadsPerEU = 4; 758 } 759 else if (suffix.equals("-num-thread-per-eu")) 760 { 761 IntelNumThreadPerEU = true; 762 763 // Take an integer value after this option: 764 // <flag> <number> 765 size_t valStart = opts.find_first_not_of(' ', ePos + 1); 766 size_t valEnd = opts.find_first_of(' ', valStart); 767 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart); 768 if (valStr.getAsInteger(10, numThreadsPerEU)) 769 { 770 IGC_ASSERT(0); 771 } 772 Pos = valEnd; 773 continue; 774 } 775 776 // -cl-intel-use-bindless-buffers 777 else if (suffix.equals("-use-bindless-buffers")) 778 { 779 PromoteStatelessToBindless = true; 780 } 781 // -cl-intel-use-bindless-images 782 else if (suffix.equals("-use-bindless-images")) 783 { 784 PreferBindlessImages = true; 785 } 786 // -cl-intel-use-bindless-mode 787 else if (suffix.equals("-use-bindless-mode")) 788 { 789 // This is a new option that combines bindless generation for buffers 790 // and images. Keep the old internal options to have compatibility 791 // for existing tests. Those (old) options could be removed in future. 792 UseBindlessMode = true; 793 PreferBindlessImages = true; 794 PromoteStatelessToBindless = true; 795 } 796 // -cl-intel-use-bindless-printf 797 else if (suffix.equals("-use-bindless-printf")) 798 { 799 UseBindlessPrintf = true; 800 } 801 // -cl-intel-use-bindless-legacy-mode 802 else if (suffix.equals("-use-bindless-legacy-mode")) 803 { 804 UseBindlessLegacyMode = true; 805 } 806 // -cl-intel-use-bindless-advanced-mode 807 else if (suffix.equals("-use-bindless-advanced-mode")) 808 { 809 UseBindlessLegacyMode = false; 810 } 811 // -cl-intel-vector-coalesing 812 else if (suffix.equals("-vector-coalescing")) 813 { 814 // -cl-intel-vector-coalescing=<0-5>. 815 size_t valStart = opts.find_first_not_of(' ', ePos + 1); 816 size_t valEnd = opts.find_first_of(' ', valStart); 817 llvm::StringRef valStr = opts.substr(valStart, valEnd - valStart); 818 819 int16_t val; 820 if (valStr.getAsInteger(10, val)) 821 { 822 IGC_ASSERT_MESSAGE(false, "-cl-intel-vector-coalescing: invalid value, ignored!"); 823 } 824 else if (val >= 0 && val <= 5) 825 { 826 VectorCoalescingControl = val; 827 } 828 Pos = valEnd; 829 continue; 830 } 831 // -cl-intel-allow-zebin 832 else if (suffix.equals("-allow-zebin")) 833 { 834 EnableZEBinary = true; 835 } 836 // -cl-intel-no-spill 837 else if (suffix.equals("-no-spill")) 838 { 839 // This is an option to avoid spill/fill instructions in scheduler kernel. 840 // OpenCL Runtime triggers scheduler kernel offline compilation while driver building, 841 // since scratch space is not supported in this specific case, we cannot end up with 842 // spilling kernel. If this option is set, then IGC will recompile the kernel with 843 // some some optimizations disabled to avoid spill/fill instructions. 844 NoSpill = true; 845 } 846 847 // advance to the next flag 848 Pos = opts.find_first_of(' ', Pos); 849 } 850 } 851 initLLVMContextWrapper(bool createResourceDimTypes)852 void CodeGenContext::initLLVMContextWrapper(bool createResourceDimTypes) 853 { 854 llvmCtxWrapper = new LLVMContextWrapper(createResourceDimTypes); 855 llvmCtxWrapper->AddRef(); 856 } 857 getLLVMContext() const858 llvm::LLVMContext* CodeGenContext::getLLVMContext() const { 859 return llvmCtxWrapper; 860 } 861 getMetaDataUtils() const862 IGC::IGCMD::MetaDataUtils* CodeGenContext::getMetaDataUtils() const 863 { 864 IGC_ASSERT_MESSAGE(nullptr != m_pMdUtils, "Metadata Utils is not initialized"); 865 return m_pMdUtils; 866 } 867 getModule() const868 IGCLLVM::Module* CodeGenContext::getModule() const { return module; } 869 initCompOptionFromRegkey(CodeGenContext * ctx)870 static void initCompOptionFromRegkey(CodeGenContext* ctx) 871 { 872 CompOptions& opt = ctx->getModuleMetaData()->compOpt; 873 874 opt.pixelShaderDoNotAbortOnSpill = 875 IGC_IS_FLAG_ENABLED(PixelShaderDoNotAbortOnSpill); 876 opt.forcePixelShaderSIMDMode = 877 IGC_GET_FLAG_VALUE(ForcePixelShaderSIMDMode); 878 } 879 setModule(llvm::Module * m)880 void CodeGenContext::setModule(llvm::Module* m) 881 { 882 module = (IGCLLVM::Module*)m; 883 m_pMdUtils = new IGC::IGCMD::MetaDataUtils(m); 884 modMD = new IGC::ModuleMetaData(); 885 initCompOptionFromRegkey(this); 886 } 887 888 // Several clients explicitly delete module without resetting module to null. 889 // This causes the issue later when the dtor is invoked (trying to delete a 890 // dangling pointer again). This function is used to replace any explicit 891 // delete in order to prevent deleting dangling pointers happening. deleteModule()892 void CodeGenContext::deleteModule() 893 { 894 delete m_pMdUtils; 895 delete modMD; 896 delete module; 897 m_pMdUtils = nullptr; 898 modMD = nullptr; 899 module = nullptr; 900 delete annotater; 901 annotater = nullptr; 902 } 903 getModuleMetaData() const904 IGC::ModuleMetaData* CodeGenContext::getModuleMetaData() const 905 { 906 IGC_ASSERT_MESSAGE(nullptr != modMD, "Module Metadata is not initialized"); 907 return modMD; 908 } 909 getRegisterPointerSizeInBits(unsigned int AS) const910 unsigned int CodeGenContext::getRegisterPointerSizeInBits(unsigned int AS) const 911 { 912 unsigned int pointerSizeInRegister = 32; 913 switch (AS) 914 { 915 case ADDRESS_SPACE_GLOBAL: 916 case ADDRESS_SPACE_CONSTANT: 917 case ADDRESS_SPACE_GENERIC: 918 case ADDRESS_SPACE_GLOBAL_OR_PRIVATE: 919 pointerSizeInRegister = 920 getModule()->getDataLayout().getPointerSizeInBits(AS); 921 break; 922 case ADDRESS_SPACE_LOCAL: 923 case ADDRESS_SPACE_A32: 924 pointerSizeInRegister = 32; 925 break; 926 case ADDRESS_SPACE_PRIVATE: 927 if (getModuleMetaData()->compOpt.UseScratchSpacePrivateMemory) 928 { 929 pointerSizeInRegister = 32; 930 } 931 else 932 { 933 pointerSizeInRegister = ((type == ShaderType::OPENCL_SHADER) ? 934 getModule()->getDataLayout().getPointerSizeInBits(AS) : 64); 935 } 936 break; 937 default: 938 pointerSizeInRegister = 32; 939 break; 940 } 941 return pointerSizeInRegister; 942 } 943 enableFunctionCall() const944 bool CodeGenContext::enableFunctionCall() const 945 { 946 return (m_enableSubroutine || m_enableFunctionPointer); 947 } 948 949 /// Check for user functions in the module and enable the m_enableSubroutine flag if exists CheckEnableSubroutine(llvm::Module & M)950 void CodeGenContext::CheckEnableSubroutine(llvm::Module& M) 951 { 952 bool EnableSubroutine = false; 953 for (auto& F : M) 954 { 955 if (F.isDeclaration() || 956 F.use_empty() || 957 isEntryFunc(getMetaDataUtils(), &F)) 958 { 959 continue; 960 } 961 962 if (F.hasFnAttribute("KMPLOCK") || 963 F.hasFnAttribute(llvm::Attribute::NoInline) || 964 !F.hasFnAttribute(llvm::Attribute::AlwaysInline)) 965 { 966 EnableSubroutine = true; 967 break; 968 } 969 } 970 m_enableSubroutine = EnableSubroutine; 971 } 972 InitVarMetaData()973 void CodeGenContext::InitVarMetaData() {} 974 ~CodeGenContext()975 CodeGenContext::~CodeGenContext() 976 { 977 clear(); 978 } 979 980 clear()981 void CodeGenContext::clear() 982 { 983 m_enableSubroutine = false; 984 m_enableFunctionPointer = false; 985 986 delete modMD; 987 delete m_pMdUtils; 988 modMD = nullptr; 989 m_pMdUtils = nullptr; 990 991 delete module; 992 llvmCtxWrapper->Release(); 993 module = nullptr; 994 llvmCtxWrapper = nullptr; 995 } 996 getRelatedFunction(const llvm::Value * value)997 static const llvm::Function *getRelatedFunction(const llvm::Value *value) 998 { 999 if (value == nullptr) 1000 return nullptr; 1001 1002 if (const llvm::Function *F = llvm::dyn_cast<llvm::Function>(value)) { 1003 return F; 1004 } 1005 if (const llvm::Argument *A = llvm::dyn_cast<llvm::Argument>(value)) { 1006 return A->getParent(); 1007 } 1008 if (const llvm::BasicBlock *BB = llvm::dyn_cast<llvm::BasicBlock>(value)) { 1009 return BB->getParent(); 1010 } 1011 if (const llvm::Instruction *I = llvm::dyn_cast<llvm::Instruction>(value)) { 1012 return I->getParent()->getParent(); 1013 } 1014 1015 return nullptr; 1016 } 1017 isEntryPoint(const CodeGenContext * ctx,const llvm::Function * F)1018 static bool isEntryPoint(const CodeGenContext *ctx, const llvm::Function *F) 1019 { 1020 if (F == nullptr) { 1021 return false; 1022 } 1023 1024 auto& FuncMD = ctx->getModuleMetaData()->FuncMD; 1025 auto FuncInfo = FuncMD.find(const_cast<llvm::Function *>(F)); 1026 if (FuncInfo == FuncMD.end()) { 1027 return false; 1028 } 1029 1030 const FunctionMetaData* MD = &FuncInfo->second; 1031 return MD->functionType == KernelFunction; 1032 } 1033 findCallingKernels(const CodeGenContext * ctx,const llvm::Function * F,llvm::SmallPtrSetImpl<const llvm::Function * > & kernels)1034 static void findCallingKernels 1035 (const CodeGenContext *ctx, const llvm::Function *F, llvm::SmallPtrSetImpl<const llvm::Function *> &kernels) 1036 { 1037 if (F == nullptr || kernels.count(F)) 1038 return; 1039 1040 for (const llvm::User *U : F->users()) { 1041 auto *CI = llvm::dyn_cast<llvm::CallInst>(U); 1042 if (CI == nullptr) 1043 continue; 1044 1045 if (CI->getCalledFunction() != F) 1046 continue; 1047 1048 const llvm::Function *caller = getRelatedFunction(CI); 1049 if (isEntryPoint(ctx, caller)) { 1050 kernels.insert(caller); 1051 continue; 1052 } 1053 // Caller is not a kernel, try to check which kerneles might 1054 // be calling it: 1055 findCallingKernels(ctx, caller, kernels); 1056 } 1057 } 1058 handleOpenMPDemangling(const std::string & name,std::string * strippedName)1059 static bool handleOpenMPDemangling(const std::string &name, std::string *strippedName) { 1060 // OpenMP mangled names have following structure: 1061 // 1062 // __omp_offloading_DD_FFFF_PP_lBB 1063 // 1064 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 1065 // mangled name of the function that encloses the target region and BB is the 1066 // line number of the target region. 1067 if (name.rfind("__omp_offloading_", 0) != 0) { 1068 return false; 1069 } 1070 size_t offset = sizeof "__omp_offloading_"; 1071 offset = name.find('_', offset + 1); // Find end of DD. 1072 if (offset == std::string::npos) 1073 return false; 1074 offset = name.find('_', offset + 1); // Find end of FFFF. 1075 if (offset == std::string::npos) 1076 return false; 1077 1078 const size_t start = offset + 1; 1079 const size_t end = name.rfind('_'); // Find beginning of lBB. 1080 if (end == std::string::npos) 1081 return false; 1082 1083 *strippedName = name.substr(start, end - start); 1084 return true; 1085 } 1086 1087 demangleFuncName(const std::string & rawName)1088 static std::string demangleFuncName(const std::string &rawName) { 1089 // OpenMP adds additional prefix and suffix to the mangling scheme, 1090 // remove it if present. 1091 std::string name; 1092 if (!handleOpenMPDemangling(rawName, &name)) { 1093 // If OpenMP demangling didn't succeed just proceed with received 1094 // symbol name 1095 name = rawName; 1096 } 1097 #if LLVM_VERSION_MAJOR >= 10 1098 return llvm::demangle(name); 1099 #else 1100 char *demangled = nullptr; 1101 1102 demangled = llvm::itaniumDemangle(name.c_str(), nullptr, nullptr, nullptr); 1103 if (demangled == nullptr) { 1104 demangled = llvm::microsoftDemangle(name.c_str(), nullptr, nullptr, nullptr); 1105 } 1106 1107 if (demangled == nullptr) { 1108 return name; 1109 } 1110 1111 std::string result = demangled; 1112 std::free(demangled); 1113 return result; 1114 #endif 1115 } 1116 EmitError(std::ostream & OS,const char * errorstr,const llvm::Value * context) const1117 void CodeGenContext::EmitError(std::ostream &OS, const char* errorstr, const llvm::Value* context) const 1118 { 1119 OS << "\nerror: "; 1120 OS << errorstr; 1121 // Try to get debug location to print out the relevant info. 1122 if (const llvm::Instruction *I = llvm::dyn_cast_or_null<llvm::Instruction>(context)) { 1123 if (const llvm::DILocation *DL = I->getDebugLoc()) { 1124 OS << "\nin file: " << DL->getFilename().str() << ":" << DL->getLine() << "\n"; 1125 } 1126 } 1127 // Try to find function related to given context 1128 // to print more informative error message. 1129 if (const llvm::Function *F = getRelatedFunction(context)) { 1130 // If the function is a kernel just print the kernel name. 1131 if (isEntryPoint(this, F)) { 1132 OS << "\nin kernel: '" << demangleFuncName(std::string(F->getName())) << "'"; 1133 // If the function is not a kernel try to print all kernels that 1134 // might be using this function. 1135 } else { 1136 llvm::SmallPtrSet<const llvm::Function *, 16> kernels; 1137 findCallingKernels(this, F, kernels); 1138 1139 const size_t kernelsCount = kernels.size(); 1140 OS << "\nin function: '" << demangleFuncName(std::string(F->getName())) << "' "; 1141 if (kernelsCount == 0) { 1142 OS << "called indirectly by at least one of the kernels.\n"; 1143 } else if (kernelsCount == 1) { 1144 const llvm::Function *kernel = *kernels.begin(); 1145 OS << "called by kernel: '" << demangleFuncName(std::string(kernel->getName())) << "'\n"; 1146 } else { 1147 OS << "called by kernels:\n"; 1148 for (const llvm::Function *kernel : kernels) { 1149 OS << " - '" << demangleFuncName(std::string(kernel->getName())) << "'\n"; 1150 } 1151 } 1152 } 1153 } 1154 OS << "\nerror: backend compiler failed build.\n"; 1155 } 1156 EmitError(const char * errorstr,const llvm::Value * context)1157 void CodeGenContext::EmitError(const char* errorstr, const llvm::Value *context) 1158 { 1159 EmitError(this->oclErrorMessage, errorstr, context); 1160 } 1161 EmitWarning(const char * warningstr)1162 void CodeGenContext::EmitWarning(const char* warningstr) 1163 { 1164 this->oclWarningMessage << "\nwarning: "; 1165 this->oclWarningMessage << warningstr; 1166 this->oclWarningMessage << "\n"; 1167 } 1168 getCompilerOption()1169 CompOptions& CodeGenContext::getCompilerOption() 1170 { 1171 return getModuleMetaData()->compOpt; 1172 } 1173 resetOnRetry()1174 void CodeGenContext::resetOnRetry() 1175 { 1176 m_tempCount = 0; 1177 } 1178 getNumThreadsPerEU() const1179 uint32_t CodeGenContext::getNumThreadsPerEU() const 1180 { 1181 return 0; 1182 } 1183 getNumGRFPerThread() const1184 uint32_t CodeGenContext::getNumGRFPerThread() const 1185 { 1186 if (IGC_GET_FLAG_VALUE(TotalGRFNum) != 0) 1187 { 1188 return IGC_GET_FLAG_VALUE(TotalGRFNum); 1189 } 1190 if (getModuleMetaData()->csInfo.forceTotalGRFNum != 0) 1191 { 1192 return getModuleMetaData()->csInfo.forceTotalGRFNum; 1193 } 1194 if (this->type == ShaderType::COMPUTE_SHADER && IGC_GET_FLAG_VALUE(TotalGRFNum4CS) != 0) 1195 { 1196 return IGC_GET_FLAG_VALUE(TotalGRFNum4CS); 1197 } 1198 return 128; 1199 } 1200 forceGlobalMemoryAllocation() const1201 bool CodeGenContext::forceGlobalMemoryAllocation() const 1202 { 1203 return false; 1204 } 1205 allocatePrivateAsGlobalBuffer() const1206 bool CodeGenContext::allocatePrivateAsGlobalBuffer() const 1207 { 1208 return false; 1209 } 1210 hasNoLocalToGenericCast() const1211 bool CodeGenContext::hasNoLocalToGenericCast() const 1212 { 1213 return false; 1214 } 1215 hasNoPrivateToGenericCast() const1216 bool CodeGenContext::hasNoPrivateToGenericCast() const 1217 { 1218 return false; 1219 } 1220 getVectorCoalescingControl() const1221 int16_t CodeGenContext::getVectorCoalescingControl() const 1222 { 1223 return 0; 1224 } 1225 isPOSH() const1226 bool CodeGenContext::isPOSH() const 1227 { 1228 return this->getModule()->getModuleFlag( 1229 "IGC::PositionOnlyVertexShader") != nullptr; 1230 } 1231 setFlagsPerCtx()1232 void CodeGenContext::setFlagsPerCtx() 1233 { 1234 if (m_DriverInfo.DessaAliasLevel() != -1) { 1235 if ((int)IGC_GET_FLAG_VALUE(EnableDeSSAAlias) > m_DriverInfo.DessaAliasLevel()) 1236 { 1237 IGC_SET_FLAG_VALUE(EnableDeSSAAlias, m_DriverInfo.DessaAliasLevel()); 1238 } 1239 } 1240 } 1241 1242 1243 } 1244