1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #include "HullShaderLowering.hpp" 10 #include "Compiler/CISACodeGen/helper.h" 11 #include "Compiler/CISACodeGen/HullShaderCodeGen.hpp" 12 #include "Compiler/MetaDataUtilsWrapper.h" 13 #include "Compiler/IGCPassSupport.h" 14 #include "Probe/Assertion.h" 15 16 namespace IGC 17 { 18 using namespace llvm; 19 using namespace IGCMD; 20 21 class HullShaderLowering : public llvm::FunctionPass 22 { 23 public: 24 HullShaderLowering(); 25 static char ID; 26 virtual bool runOnFunction(llvm::Function& F) override; 27 getAnalysisUsage(llvm::AnalysisUsage & AU) const28 virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override 29 { 30 AU.setPreservesCFG(); 31 AU.addRequired<MetaDataUtilsWrapper>(); 32 AU.addRequired<CollectHullShaderProperties>(); 33 AU.addRequired<CodeGenContextWrapper>(); 34 } 35 36 private: 37 void LowerIntrinsicInputOutput(llvm::Function& F); 38 39 unsigned int GetDomainType(); 40 bool IsTEFactorsPaddingAllowed(llvm::BasicBlock* bb, unsigned int tessShaderDomain); 41 42 llvm::GenIntrinsicInst* AddURBWriteControlPointOutputs( 43 Value* mask, 44 Value* data[8], 45 Instruction* prev); 46 47 llvm::GenIntrinsicInst* AddURBWrite( 48 llvm::Value* offset, 49 llvm::Value* mask, 50 llvm::Value* data[8], 51 llvm::Instruction* prev); 52 53 54 void AddURBRead(Value* index, Value* offset, Instruction* prev); 55 56 void AddURBReadOutput(llvm::Value* offset, llvm::Instruction* prev); 57 58 llvm::Module* m_module; 59 60 std::map<Value*, std::vector<GenIntrinsicInst*>> m_pControlPointOutputs; 61 QuadEltUnit m_headerSize; 62 CollectHullShaderProperties* m_hullShaderInfo; 63 64 }; 65 66 #define PASS_FLAG "igc-collect-hull-shader-properties" 67 #define PASS_DESCRIPTION "Collect information related to hull shader" 68 #define PASS_CFG_ONLY false 69 #define PASS_ANALYSIS true 70 IGC_INITIALIZE_PASS_BEGIN(CollectHullShaderProperties, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 71 IGC_INITIALIZE_PASS_END(CollectHullShaderProperties, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 72 73 // undef macros to avoid redefinition compiler warnings 74 #undef PASS_FLAG 75 #undef PASS_DESCRIPTION 76 #undef PASS_ANALYSIS 77 78 #define PASS_FLAG "igc-hull-shader-lowering" 79 #define PASS_DESCRIPTION "Lower inputs outputs for hull shader" 80 #define PASS_CFG_ONLY false 81 #define PASS_ANALYSIS false 82 IGC_INITIALIZE_PASS_BEGIN(HullShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 83 IGC_INITIALIZE_PASS_DEPENDENCY(CollectHullShaderProperties) 84 IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper) 85 IGC_INITIALIZE_PASS_END(HullShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 86 87 char HullShaderLowering::ID = 0; 88 char CollectHullShaderProperties::ID = 0; 89 HullShaderLowering()90 HullShaderLowering::HullShaderLowering() : FunctionPass(ID) 91 { 92 initializeHullShaderLoweringPass(*PassRegistry::getPassRegistry()); 93 } 94 runOnFunction(llvm::Function & F)95 bool HullShaderLowering::runOnFunction(llvm::Function& F) 96 { 97 MetaDataUtils* pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils(); 98 if (!isEntryFunc(pMdUtils, &F)) 99 { 100 return false; 101 } 102 m_headerSize = QuadEltUnit(2); 103 m_hullShaderInfo = &getAnalysis<CollectHullShaderProperties>(); 104 // Collect Hull shader information 105 m_hullShaderInfo->gatherInformation(&F); 106 107 m_module = F.getParent(); 108 109 LowerIntrinsicInputOutput(F); 110 return false; 111 } 112 LowerIntrinsicInputOutput(Function & F)113 void HullShaderLowering::LowerIntrinsicInputOutput(Function& F) 114 { 115 SmallVector<Instruction*, 10> instructionToRemove; 116 117 IRBuilder<> builder(F.getContext()); 118 119 IGC::CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext(); 120 unsigned int tessShaderDomain = GetDomainType(); 121 122 for (auto BI = F.begin(), BE = F.end(); BI != BE; BI++) 123 { 124 m_pControlPointOutputs.clear(); 125 126 bool checkedForTEFactorsPadding = false; 127 128 for (auto II = BI->begin(), IE = BI->end(); II != IE; II++) 129 { 130 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(II)) 131 { 132 const GenISAIntrinsic::ID IID = inst->getIntrinsicID(); 133 // In oword units 134 const unsigned int vertexHeaderSize = ctx->getModuleMetaData()->URBInfo.hasVertexHeader ? 135 (m_hullShaderInfo->GetProperties().m_HasClipCullAsInput ? 4 : 2) : 0; 136 if (IID == GenISAIntrinsic::GenISA_DCL_HSinputVec) 137 { 138 Value* index = nullptr; 139 if (llvm::isa<ConstantInt>(inst->getOperand(0))) 140 { 141 // In case of direct access of HSInputVec we need to be sure to not use vertex index 142 // bigger than number of declared ICP. 143 // This might happen in OGL, when number of Input Control Points might not be known 144 // during first compilation. 145 uint32_t usedIndex = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue()); 146 uint32_t validIndex = 147 iSTD::Min(usedIndex, m_hullShaderInfo->GetProperties().m_pInputControlPointCount - 1); 148 149 index = builder.getInt32(validIndex); 150 } 151 else 152 { 153 index = inst->getOperand(0); 154 } 155 156 builder.SetInsertPoint(inst); 157 158 AddURBRead( 159 index, 160 builder.CreateAdd(inst->getOperand(1), builder.getInt32(vertexHeaderSize)), 161 inst); 162 instructionToRemove.push_back(inst); 163 } 164 165 if (IID == GenISAIntrinsic::GenISA_PatchConstantOutput) 166 { 167 // handle GenISA_OUTPUT intrinsic instructions 168 const uint patchConstantOutputIndex = 4; 169 Value* offsetVal = nullptr; 170 llvm::Value* pPatchConstantOffset = inst->getOperand(patchConstantOutputIndex); 171 172 // lower patch constant outputs to URBWrite 173 if (auto pPCOffsetIdx = llvm::dyn_cast<llvm::ConstantInt>(pPatchConstantOffset)) 174 { 175 // patch constant output index is a constant. 176 const uint offsetIndex = int_cast<unsigned int>(pPCOffsetIdx->getZExtValue()); 177 const QuadEltUnit staticOffset = QuadEltUnit(offsetIndex) + OctEltUnit(1); // Add 1 for vertex header 178 Value* staticOffsetVal = builder.getInt32(staticOffset.Count()); 179 offsetVal = staticOffsetVal; 180 } 181 else 182 { 183 // patch constant output is indirect output 184 const QuadEltUnit staticOffset = OctEltUnit(1); // Add 1 for vertex header 185 Value* staticOffsetVal = builder.getInt32(staticOffset.Count()); 186 187 Instruction* sum = BinaryOperator::CreateAdd(pPatchConstantOffset, staticOffsetVal); 188 sum->insertBefore(inst); 189 offsetVal = sum; 190 } 191 192 Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext())); 193 Value* data[8] = 194 { 195 inst->getOperand(0), 196 inst->getOperand(1), 197 inst->getOperand(2), 198 inst->getOperand(3), 199 undef, 200 undef, 201 undef, 202 undef, 203 }; 204 AddURBWrite( 205 offsetVal, 206 inst->getOperand(5), 207 data, 208 inst); 209 instructionToRemove.push_back(inst); 210 } 211 212 if (IID == GenISAIntrinsic::GenISA_OutputTessControlPoint) 213 { 214 // for each BB handle OutputHSControlPoint intrinsic instructions 215 Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext())); 216 Value* data[8] = 217 { 218 inst->getOperand(0), 219 inst->getOperand(1), 220 inst->getOperand(2), 221 inst->getOperand(3), 222 undef, 223 undef, 224 undef, 225 undef, 226 }; 227 AddURBWriteControlPointOutputs(inst->getOperand(6), data, inst); 228 instructionToRemove.push_back(inst); 229 } 230 231 if ((IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) || 232 (IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors)) 233 { 234 // Apply URB padding for TE factors. 235 if (IGC_IS_FLAG_ENABLED(EnableTEFactorsPadding)) 236 { 237 if (!checkedForTEFactorsPadding) 238 { 239 checkedForTEFactorsPadding = true; 240 241 BasicBlock* bb = dyn_cast<BasicBlock>(BI); 242 if (IsTEFactorsPaddingAllowed(bb, tessShaderDomain)) 243 { 244 Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext())); 245 Value* data[8] = { undef,undef,undef,undef,undef,undef,undef,undef }; 246 // Add padding at offset 0 247 AddURBWrite(builder.getInt32(0), builder.getInt32(0xF), data, inst); 248 // Add padding at offset 1 249 AddURBWrite(builder.getInt32(1), builder.getInt32(0xF), data, inst); 250 } 251 } 252 } 253 254 // The URB Location for tessellation factors spans the first two offsets 255 // offset 0 and 1. The tessellation factors occupy the two offsets as mentioned below 256 // Quad domain has 4 outer and 2 inner tessellation factors 257 // Triangle domain has 3 outer and 1 inner tessellation factor 258 // Isolines have 2 outer tessellation factors 259 // 260 //---------------------------------------------------------------------------------- 261 //| URB Offset 1.3 | URB Offset 1.2 | URB Offset 1.1 | URB Offset 1.0 | 262 //---------------------------------------------------------------------------------- 263 //| OUTER_QUAD_U_EQ_0 | OUTER_QUAD_V_EQ_0 | OUTER_QUAD_U_EQ_1 | OUTER_QUAD_V_EQ_1 | 264 //---------------------------------------------------------------------------------- 265 //| OUTER_TRI_U_EQ_0 | OUTER_TRI_V_EQ_0 | OUTER_TRI_W_EQ_0 | INNER_TRI_INSIDE | 266 //---------------------------------------------------------------------------------- 267 //| OUTER_LINE_DETAIL | OUTER_LINE_DENSITY | | | 268 //---------------------------------------------------------------------------------- 269 //------------------------------------------------------------------------------------ 270 //| URB Offset 0.3 | URB Offset 0.2 | URB Offset 0.1 | URB Offset 0.0 | 271 //------------------------------------------------------------------------------------ 272 //| INNER_QUAD_U_INSIDE | INNER_OUTER_QUAD_V_INSIDE | | | 273 //------------------------------------------------------------------------------------ 274 //| | | | | 275 //------------------------------------------------------------------------------------ 276 //| | | | | 277 //------------------------------------------------------------------------------------ 278 279 // offset into URB is 1 for outerScalarTessFactors and 280 // 1 if its triangle domain and inner scalar tessellation factor 281 // 0 if its the quad domain inner tessellation factor 282 int offset = (IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) ? 1 : 283 (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI) ? 1 : 0; 284 Value* pOffsetVal = builder.getInt32(offset); 285 286 Value* data[8] = 287 { 288 inst->getArgOperand(1), 289 inst->getArgOperand(1), 290 inst->getArgOperand(1), 291 inst->getArgOperand(1), 292 inst->getArgOperand(1), 293 inst->getArgOperand(1), 294 inst->getArgOperand(1), 295 inst->getArgOperand(1) 296 }; 297 298 if (llvm::isa<ConstantInt>(inst->getOperand(0))) 299 { 300 unsigned int tessFactor = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue()); 301 302 if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE) 303 { 304 // For isolines first tessellation factor(0) is line-density. The second one(1) 305 // is line-detail tessellation factor. To store them properly in patch header 306 // we need to set correct bits in URB write mask i.e. 0x4 for line-density 307 // and 0x8 for line-detail. Swap the indexes. 308 tessFactor ^= 1; 309 } 310 else 311 { 312 tessFactor = ((IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors) && 313 (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI)) ? 3 : tessFactor; 314 } 315 316 AddURBWrite(pOffsetVal, 317 builder.getInt32(1 << (3 - tessFactor)), 318 data, 319 inst); 320 } 321 else 322 { 323 builder.SetInsertPoint(inst); 324 Value* pSubRes = nullptr; 325 Value* pSubResRHS = nullptr; 326 if ((IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors) 327 && (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI)) 328 { 329 pSubRes = inst->getOperand(0); 330 } 331 else 332 { 333 if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE) 334 { 335 pSubResRHS = builder.CreateXor(inst->getOperand(0), builder.getInt32(1)); 336 } 337 else 338 { 339 pSubResRHS = inst->getOperand(0); 340 } 341 342 pSubRes = builder.CreateSub( 343 builder.getInt32(3), 344 pSubResRHS); 345 } 346 347 Value* pShiftVal = builder.CreateShl( 348 builder.getInt32(1), 349 pSubRes); 350 351 AddURBWrite(pOffsetVal, pShiftVal, data, inst); 352 } 353 instructionToRemove.push_back(inst); 354 } 355 356 if (IID == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead || 357 IID == GenISAIntrinsic::GenISA_DCL_HSPatchConstInputVec) 358 { 359 builder.SetInsertPoint(inst); 360 361 const bool readHeader = IID == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead; 362 363 llvm::Value* urbOffset = nullptr; 364 if (!readHeader) 365 { 366 // Patch constant output read 367 llvm::Value* attributeIndex = inst->getOperand(0); 368 369 if (llvm::ConstantInt * constAttributeIndex = llvm::dyn_cast<llvm::ConstantInt>(attributeIndex)) 370 { 371 // Constant, so global offset is sufficient in urb read message 372 urbOffset = builder.getInt32( 373 int_cast<unsigned int>(constAttributeIndex->getZExtValue()) + vertexHeaderSize); 374 } 375 else 376 { 377 // Runtime value, so per-slot offset is required in urb read message 378 urbOffset = builder.CreateAdd(attributeIndex, builder.getInt32(vertexHeaderSize)); 379 } 380 } 381 else 382 { 383 // Patch header read 384 urbOffset = builder.getInt32(0); 385 } 386 387 AddURBReadOutput(urbOffset, inst); 388 instructionToRemove.push_back(inst); 389 } 390 391 if (IID == GenISAIntrinsic::GenISA_DCL_HSOutputCntrlPtInputVec) 392 { 393 /// Returns the size of the output patch constant block in owords 394 /// Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled 395 /// Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2. 396 auto GetPatchConstantOutputSize = [&]()->QuadEltUnit 397 { 398 constexpr unsigned int paychConstantHeaderSize = 2; // in owords 399 const unsigned int numPatchConstantsPadded = iSTD::Align( 400 m_hullShaderInfo->GetProperties().m_pMaxPatchConstantSignatureDeclarations, 2); 401 return QuadEltUnit(paychConstantHeaderSize + numPatchConstantsPadded); 402 }; 403 404 builder.SetInsertPoint(inst); 405 406 const unsigned int maxOutputSignatureCount = m_hullShaderInfo->GetProperties().m_pMaxOutputSignatureCount; 407 408 llvm::Value* const vertexIndex = inst->getOperand(0); 409 llvm::Value* const attributeIndex = inst->getOperand(1); 410 411 const unsigned int patchConstantOutputSize = GetPatchConstantOutputSize().Count(); 412 llvm::Value* urbOffset = nullptr; 413 414 // Compute offset from vertex index 415 if (llvm::ConstantInt * constVertexIndex = llvm::dyn_cast<llvm::ConstantInt>(vertexIndex)) 416 { 417 // Constant, so global offset is sufficient in urb read message 418 urbOffset = builder.getInt32(patchConstantOutputSize + 419 int_cast<unsigned int>(constVertexIndex->getZExtValue() * maxOutputSignatureCount)); 420 } 421 else 422 { 423 // Runtime value, so per-slot offset is required in urb read message 424 if (QuadEltUnit(maxOutputSignatureCount).Count() != 1) 425 { 426 urbOffset = builder.CreateAdd(builder.getInt32(patchConstantOutputSize), 427 builder.CreateMul(builder.getInt32(maxOutputSignatureCount), vertexIndex)); 428 } 429 else 430 { 431 urbOffset = builder.CreateAdd(builder.getInt32(patchConstantOutputSize), vertexIndex); 432 } 433 } 434 435 // Compute additional offset coming from attribute index 436 IGC_ASSERT(urbOffset); 437 if (llvm::isa<llvm::ConstantInt>(urbOffset) && llvm::isa<llvm::ConstantInt>(attributeIndex)) 438 { 439 urbOffset = builder.getInt32(int_cast<unsigned int>( 440 llvm::cast<llvm::ConstantInt>(urbOffset)->getZExtValue() + 441 llvm::cast<llvm::ConstantInt>(attributeIndex)->getZExtValue())); 442 } 443 else 444 { 445 urbOffset = builder.CreateAdd(urbOffset, attributeIndex); 446 } 447 448 AddURBReadOutput(urbOffset, inst); 449 instructionToRemove.push_back(inst); 450 } 451 } 452 } 453 } 454 455 for (unsigned int i = 0; i < instructionToRemove.size(); i++) 456 { 457 instructionToRemove[i]->eraseFromParent(); 458 } 459 } 460 GetDomainType()461 unsigned int HullShaderLowering::GetDomainType() 462 { 463 unsigned int tessShaderDomain = USC::TESSELLATOR_DOMAIN_ISOLINE; 464 llvm::NamedMDNode* pMetaData = m_module->getOrInsertNamedMetadata("TessellationShaderDomain"); 465 if (pMetaData && (pMetaData->getNumOperands() == 1)) 466 { 467 llvm::MDNode* pTessShaderDomain = pMetaData->getOperand(0); 468 if (pTessShaderDomain) 469 { 470 tessShaderDomain = int_cast<uint32_t>( 471 mdconst::dyn_extract<ConstantInt>(pTessShaderDomain->getOperand(0))->getZExtValue()); 472 } 473 } 474 return tessShaderDomain; 475 } 476 IsTEFactorsPaddingAllowed(llvm::BasicBlock * bb,unsigned int tessShaderDomain)477 bool HullShaderLowering::IsTEFactorsPaddingAllowed(llvm::BasicBlock* bb, unsigned int tessShaderDomain) 478 { 479 unsigned int outerTessellationFactorsMask = 0; 480 unsigned int innerTessellationFactorsMask = 0; 481 for (auto II = bb->begin(), IE = bb->end(); II != IE; II++) 482 { 483 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(II)) 484 { 485 GenISAIntrinsic::ID IID = inst->getIntrinsicID(); 486 if ((IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) || 487 (IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors)) 488 { 489 if (llvm::isa<ConstantInt>(inst->getOperand(0))) 490 { 491 unsigned int factor = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue()); 492 if (IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) 493 { 494 outerTessellationFactorsMask |= (1 << factor); 495 } 496 else 497 { 498 innerTessellationFactorsMask |= (1 << factor); 499 } 500 } 501 } 502 } 503 } 504 505 bool paddingAllowed = false; 506 // Allow padding only in case current basic block writes complete set of tessellation factors 507 // defined for given domain. 508 if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI) 509 { 510 // For triangle domain there are three outer tessellation factors and one inner tessellation factor. 511 if ((outerTessellationFactorsMask == 0x7) && (innerTessellationFactorsMask == 0x1)) paddingAllowed = true; 512 } 513 else if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_QUAD) 514 { 515 // For quad domain there are four outer tessellation factors and two inner tessellation factors. 516 if ((outerTessellationFactorsMask == 0xF) && (innerTessellationFactorsMask == 0x3)) paddingAllowed = true; 517 } 518 else if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE) 519 { 520 // For isoline domain there are two outer tessellation factors and no inner tessellation factors. 521 if ((outerTessellationFactorsMask == 0x3) && (innerTessellationFactorsMask == 0x0)) paddingAllowed = true; 522 } 523 return paddingAllowed; 524 } 525 AddURBWriteControlPointOutputs(Value * mask,Value * data[8],Instruction * prev)526 llvm::GenIntrinsicInst* HullShaderLowering::AddURBWriteControlPointOutputs(Value* mask, Value* data[8], Instruction* prev) 527 { 528 llvm::IRBuilder<> builder(m_module->getContext()); 529 builder.SetInsertPoint(prev); 530 531 // Now calculate the correct offset. This would be 532 // CPID * maxAttrIndex + maxPatchConstantOutputs + patchHeaderSize + attributeOffset 533 // Step1: mulRes = CPID * maxAttrIndex 534 llvm::GlobalVariable* pGlobal = m_module->getGlobalVariable("MaxNumOfOutputSignatureEntries"); 535 uint32_t maxOutputSignatureCount = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 536 llvm::Value* controlPtId = prev->getOperand(5); 537 llvm::Value* m_pMulRes = nullptr; 538 llvm::Value* m_pFinalOffset = nullptr; 539 bool isOutputControlPointIdImmed = llvm::isa<llvm::ConstantInt>(controlPtId); 540 uint32_t outputControlPointid = 0; 541 542 if (isOutputControlPointIdImmed) 543 { 544 outputControlPointid = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(controlPtId)->getZExtValue()); 545 } 546 547 if (QuadEltUnit(maxOutputSignatureCount).Count() != 1) 548 { 549 if (isOutputControlPointIdImmed) 550 { 551 m_pMulRes = builder.getInt32(outputControlPointid * QuadEltUnit(maxOutputSignatureCount).Count()); 552 } 553 else 554 { 555 m_pMulRes = builder.CreateMul(controlPtId, builder.getInt32(QuadEltUnit(maxOutputSignatureCount).Count())); 556 } 557 } 558 559 // Step2: m_pAddedPatchConstantOutput = maxPatchConstantOutputs + patchHeaderSize + attributeOffset 560 pGlobal = m_module->getGlobalVariable("MaxNumOfPatchConstantSignatureEntries"); 561 const uint32_t m_pMaxPatchConstantSignatureDeclarations = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 562 const uint numPatchConstantsPadded = iSTD::Align(m_pMaxPatchConstantSignatureDeclarations, 2); 563 llvm::Value* attributeOffset = prev->getOperand(4); 564 bool isAttributeOffsetImmed = llvm::isa<llvm::ConstantInt>(attributeOffset); 565 uint32_t immedAttributeOffset = 0; 566 567 if (isAttributeOffsetImmed) 568 { 569 immedAttributeOffset = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(prev->getOperand(4))->getZExtValue()); 570 } 571 // patch constant header is 2 QuadEltUnits 572 llvm::Value* m_pAddedPatchConstantOutput = builder.getInt32((QuadEltUnit(numPatchConstantsPadded + 2 + immedAttributeOffset)).Count()); 573 if (!isAttributeOffsetImmed) 574 { 575 m_pAddedPatchConstantOutput = builder.CreateAdd(m_pAddedPatchConstantOutput, attributeOffset); 576 } 577 578 // Step3: 579 // finalOffset = ( mulRes + m_addedPatchConstantOutput ) 580 if (m_pMulRes != nullptr) 581 { 582 if (isOutputControlPointIdImmed && isAttributeOffsetImmed) 583 { 584 uint32_t mulRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pMulRes)->getZExtValue()); 585 uint32_t addRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pAddedPatchConstantOutput)->getZExtValue()); 586 m_pFinalOffset = builder.getInt32(mulRes + addRes); 587 } 588 else 589 { 590 m_pFinalOffset = builder.CreateAdd(m_pMulRes, m_pAddedPatchConstantOutput); 591 } 592 } 593 else if (isOutputControlPointIdImmed && isAttributeOffsetImmed) 594 { 595 uint32_t addRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pAddedPatchConstantOutput)->getZExtValue()); 596 m_pFinalOffset = builder.getInt32(outputControlPointid + addRes); 597 } 598 else 599 { 600 m_pFinalOffset = builder.CreateAdd(controlPtId, m_pAddedPatchConstantOutput); 601 } 602 603 llvm::CallInst* write = AddURBWrite( 604 m_pFinalOffset, 605 mask, 606 data, 607 prev); 608 return (llvm::GenIntrinsicInst*)write; 609 } 610 611 /// Inserts new URBWrite instruction with given mask and arguments before 612 /// instuction 'prev'. 613 /// TODO: This should be a common function for all Lowering passes. AddURBWrite(llvm::Value * offset,llvm::Value * mask,llvm::Value * data[8],llvm::Instruction * prev)614 llvm::GenIntrinsicInst* HullShaderLowering::AddURBWrite( 615 llvm::Value* offset, 616 llvm::Value* mask, 617 llvm::Value* data[8], 618 llvm::Instruction* prev) 619 { 620 Value* arguments[] = 621 { 622 offset, 623 mask, 624 data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7] 625 }; 626 627 CallInst* write = GenIntrinsicInst::Create( 628 GenISAIntrinsic::getDeclaration(m_module, GenISAIntrinsic::GenISA_URBWrite), 629 arguments, 630 "", 631 prev); 632 633 write->setDebugLoc(prev->getDebugLoc()); 634 return (llvm::GenIntrinsicInst*)write; 635 } 636 AddURBRead(Value * index,Value * offset,Instruction * prev)637 void HullShaderLowering::AddURBRead(Value* index, Value* offset, Instruction* prev) 638 { 639 Value* arguments[] = 640 { 641 index, 642 offset 643 }; 644 645 Instruction* urbRead = GenIntrinsicInst::Create( 646 GenISAIntrinsic::getDeclaration(m_module, GenISAIntrinsic::GenISA_URBRead), 647 arguments, 648 "", 649 prev); 650 651 urbRead->setDebugLoc(prev->getDebugLoc()); 652 653 Value* vec4 = nullptr; 654 while (!prev->use_empty()) 655 { 656 auto I = prev->user_begin(); 657 if (ExtractElementInst * elem = dyn_cast<ExtractElementInst>(*I)) 658 { 659 Instruction* newExt = ExtractElementInst::Create(urbRead, elem->getIndexOperand(), "", elem); 660 newExt->setDebugLoc(prev->getDebugLoc()); 661 662 elem->replaceAllUsesWith(newExt); 663 elem->eraseFromParent(); 664 } 665 else 666 { 667 // the vector is used directly, extract the first 4 elements and recreate a vec4 668 if (vec4 == nullptr) 669 { 670 Value* data[4] = { nullptr, nullptr, nullptr, nullptr }; 671 Type* int32Ty = Type::getInt32Ty(m_module->getContext()); 672 673 VectorToElement(urbRead, data, int32Ty, prev, 4); 674 vec4 = ElementToVector(data, int32Ty, prev, 4); 675 } 676 677 (*I)->replaceUsesOfWith(prev, vec4); 678 } 679 } 680 } 681 AddURBReadOutput(llvm::Value * offset,llvm::Instruction * prev)682 void HullShaderLowering::AddURBReadOutput(llvm::Value* offset, llvm::Instruction* prev) 683 { 684 llvm::Value* arguments[] = 685 { 686 offset 687 }; 688 689 llvm::Instruction* urbReadOutput = llvm::GenIntrinsicInst::Create( 690 llvm::GenISAIntrinsic::getDeclaration(m_module, llvm::GenISAIntrinsic::GenISA_URBReadOutput), 691 arguments, 692 "", 693 prev); 694 urbReadOutput->setDebugLoc(prev->getDebugLoc()); 695 696 llvm::Value* vec4 = nullptr; 697 while (!prev->use_empty()) 698 { 699 auto I = prev->user_begin(); 700 if (ExtractElementInst * elem = dyn_cast<ExtractElementInst>(*I)) 701 { 702 Instruction* newExt = ExtractElementInst::Create(urbReadOutput, elem->getIndexOperand(), "", elem); 703 newExt->setDebugLoc(prev->getDebugLoc()); 704 705 elem->replaceAllUsesWith(newExt); 706 elem->eraseFromParent(); 707 } 708 else 709 { 710 // the vector is used directly, extract the first 4 elements and recreate a vec4 711 if (vec4 == nullptr) 712 { 713 Value* data[4] = { nullptr, nullptr, nullptr, nullptr }; 714 Type* int32Ty = Type::getInt32Ty(m_module->getContext()); 715 716 VectorToElement(urbReadOutput, data, int32Ty, prev, 4); 717 vec4 = ElementToVector(data, int32Ty, prev, 4); 718 } 719 720 (*I)->replaceUsesOfWith(prev, vec4); 721 } 722 } 723 } 724 CollectHullShaderProperties()725 CollectHullShaderProperties::CollectHullShaderProperties() : llvm::ImmutablePass(ID) 726 { 727 initializeCollectHullShaderPropertiesPass(*PassRegistry::getPassRegistry()); 728 } 729 gatherInformation(llvm::Function * kernel)730 void CollectHullShaderProperties::gatherInformation(llvm::Function* kernel) 731 { 732 llvm::Module* module = kernel->getParent(); 733 734 llvm::GlobalVariable* pGlobal = module->getGlobalVariable("HSOutputControlPointCount"); 735 m_hsProps.m_pOutputControlPointCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 736 737 pGlobal = module->getGlobalVariable("TessInputControlPointCount"); 738 m_hsProps.m_pInputControlPointCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 739 740 pGlobal = module->getGlobalVariable("MaxNumOfInputSignatureEntries"); 741 m_hsProps.m_pMaxInputSignatureCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 742 743 pGlobal = module->getGlobalVariable("MaxNumOfOutputSignatureEntries"); 744 m_hsProps.m_pMaxOutputSignatureCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 745 746 pGlobal = module->getGlobalVariable("MaxNumOfPatchConstantSignatureEntries"); 747 m_hsProps.m_pMaxPatchConstantSignatureDeclarations = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue()); 748 749 // Dispatch mode might be also determined based on MetaData (which might be treated as Global Variable). 750 m_hsProps.m_pShaderDispatchMode = DetermineDispatchMode(kernel); 751 752 m_hsProps.m_ForcedDispatchMask = GetForcedDispatchMask(kernel); 753 754 pGlobal = module->getGlobalVariable("ShaderHasClipCullInput"); 755 auto clipCullAsInput = (pGlobal == nullptr) ? false : true; 756 IGC::CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext(); 757 if (ctx->getModuleMetaData()->URBInfo.has64BVertexHeaderInput) { 758 // In case we have no linking information we need the URB header to have a fixed size 759 clipCullAsInput = true; 760 } 761 762 m_hsProps.m_HasClipCullAsInput = clipCullAsInput; 763 } 764 DetermineDispatchMode(Function * kernel) const765 HullShaderDispatchModes CollectHullShaderProperties::DetermineDispatchMode(Function* kernel) const 766 { 767 HullShaderDispatchModes shaderDispatchMode = SINGLE_PATCH_DISPATCH_MODE; 768 llvm::NamedMDNode* pMetaData = kernel->getParent()->getNamedMetadata("HullShaderDispatchMode"); 769 if (pMetaData) 770 { 771 llvm::MDNode* pMdNode = pMetaData->getOperand(0); 772 if (pMdNode) 773 { 774 llvm::Metadata* pShaderDispatchMode = pMdNode->getOperand(0); 775 shaderDispatchMode = (HullShaderDispatchModes) 776 (llvm::mdconst::dyn_extract<ConstantInt>(pShaderDispatchMode))->getZExtValue(); 777 } 778 } 779 return shaderDispatchMode; 780 } 781 GetForcedDispatchMask(Function * kernel) const782 unsigned CollectHullShaderProperties::GetForcedDispatchMask(Function* kernel) const 783 { 784 unsigned dispatchMask = 0; 785 llvm::NamedMDNode* pMetaData = kernel->getParent()->getNamedMetadata("HullShaderForcedDispatchMask"); 786 if (pMetaData) 787 { 788 llvm::MDNode* pMdNode = pMetaData->getOperand(0); 789 if (pMdNode) 790 { 791 llvm::Metadata* pShaderForcedMask = pMdNode->getOperand(0); 792 dispatchMask = static_cast<unsigned>((llvm::mdconst::dyn_extract<ConstantInt>(pShaderForcedMask))->getZExtValue()); 793 } 794 } 795 return dispatchMask; 796 } 797 798 HullShaderProperties()799 HullShaderProperties::HullShaderProperties() : 800 m_pOutputControlPointCount(0), 801 m_pInputControlPointCount(0), 802 m_pMaxInputSignatureCount(0), 803 m_pMaxOutputSignatureCount(0), 804 m_pMaxPatchConstantSignatureDeclarations(0), 805 m_pShaderDispatchMode(SINGLE_PATCH_DISPATCH_MODE), 806 m_HasClipCullAsInput(false), 807 m_ForcedDispatchMask(0) 808 {} 809 GetMaxInputPushed() const810 unsigned int HullShaderProperties::GetMaxInputPushed() const 811 { 812 const unsigned int maxNumOfHSPushedInputs = 96; 813 uint numberOfPatches = (m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1; 814 815 // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed 816 // in current dispatch mode for current topology ( InputPatch size ). 817 uint32_t maxNumOfPushedInputAttributesPerICP = 818 (m_pInputControlPointCount * numberOfPatches > 0) 819 ? maxNumOfHSPushedInputs / (m_pInputControlPointCount * numberOfPatches) 820 : maxNumOfHSPushedInputs; 821 822 // Input attributes can be pushed only in pairs, so we need to round down the limit. 823 maxNumOfPushedInputAttributesPerICP = iSTD::Align(maxNumOfPushedInputAttributesPerICP - 1, 2); 824 825 // Determine required number of input attributes. 826 // They can be pushed only in pairs. 827 uint32_t reqNumOfInputAttributesPerICP = iSTD::Align(m_pMaxInputSignatureCount, 2); 828 829 // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header 830 // in case of SGV inputs have to be taken into consideration (will be done in next step). 831 // reqNumOfInputAttributes += HeaderSize().Count(); 832 833 // Determine ACTUAL number of attributes that can be pushed. 834 // If the required number of input attributes is less that maximum potential number, 835 // than all of the will be pushed. 836 uint32_t actualNumOfPushedInputAttributesPerICP = 837 iSTD::Min(reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP); 838 839 return actualNumOfPushedInputAttributesPerICP; 840 } 841 842 createHullShaderLoweringPass()843 llvm::FunctionPass* createHullShaderLoweringPass() 844 { 845 return new HullShaderLowering(); 846 } 847 848 } // namespace IGC 849