1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "HullShaderLowering.hpp"
10 #include "Compiler/CISACodeGen/helper.h"
11 #include "Compiler/CISACodeGen/HullShaderCodeGen.hpp"
12 #include "Compiler/MetaDataUtilsWrapper.h"
13 #include "Compiler/IGCPassSupport.h"
14 #include "Probe/Assertion.h"
15 
16 namespace IGC
17 {
18     using namespace llvm;
19     using namespace IGCMD;
20 
21     class HullShaderLowering : public llvm::FunctionPass
22     {
23     public:
24         HullShaderLowering();
25         static char         ID;
26         virtual bool runOnFunction(llvm::Function& F) override;
27 
getAnalysisUsage(llvm::AnalysisUsage & AU) const28         virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
29         {
30             AU.setPreservesCFG();
31             AU.addRequired<MetaDataUtilsWrapper>();
32             AU.addRequired<CollectHullShaderProperties>();
33             AU.addRequired<CodeGenContextWrapper>();
34         }
35 
36     private:
37         void LowerIntrinsicInputOutput(llvm::Function& F);
38 
39         unsigned int GetDomainType();
40         bool IsTEFactorsPaddingAllowed(llvm::BasicBlock* bb, unsigned int tessShaderDomain);
41 
42         llvm::GenIntrinsicInst* AddURBWriteControlPointOutputs(
43             Value* mask,
44             Value* data[8],
45             Instruction* prev);
46 
47         llvm::GenIntrinsicInst* AddURBWrite(
48             llvm::Value* offset,
49             llvm::Value* mask,
50             llvm::Value* data[8],
51             llvm::Instruction* prev);
52 
53 
54         void AddURBRead(Value* index, Value* offset, Instruction* prev);
55 
56         void AddURBReadOutput(llvm::Value* offset, llvm::Instruction* prev);
57 
58         llvm::Module* m_module;
59 
60         std::map<Value*, std::vector<GenIntrinsicInst*>>  m_pControlPointOutputs;
61         QuadEltUnit m_headerSize;
62         CollectHullShaderProperties* m_hullShaderInfo;
63 
64     };
65 
66 #define PASS_FLAG "igc-collect-hull-shader-properties"
67 #define PASS_DESCRIPTION "Collect information related to hull shader"
68 #define PASS_CFG_ONLY false
69 #define PASS_ANALYSIS true
70     IGC_INITIALIZE_PASS_BEGIN(CollectHullShaderProperties, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
71         IGC_INITIALIZE_PASS_END(CollectHullShaderProperties, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
72 
73         // undef macros to avoid redefinition compiler warnings
74 #undef PASS_FLAG
75 #undef PASS_DESCRIPTION
76 #undef PASS_ANALYSIS
77 
78 #define PASS_FLAG "igc-hull-shader-lowering"
79 #define PASS_DESCRIPTION "Lower inputs outputs for hull shader"
80 #define PASS_CFG_ONLY false
81 #define PASS_ANALYSIS false
82         IGC_INITIALIZE_PASS_BEGIN(HullShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
83         IGC_INITIALIZE_PASS_DEPENDENCY(CollectHullShaderProperties)
84         IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
85         IGC_INITIALIZE_PASS_END(HullShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
86 
87         char HullShaderLowering::ID = 0;
88     char CollectHullShaderProperties::ID = 0;
89 
HullShaderLowering()90     HullShaderLowering::HullShaderLowering() : FunctionPass(ID)
91     {
92         initializeHullShaderLoweringPass(*PassRegistry::getPassRegistry());
93     }
94 
runOnFunction(llvm::Function & F)95     bool HullShaderLowering::runOnFunction(llvm::Function& F)
96     {
97         MetaDataUtils* pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
98         if (!isEntryFunc(pMdUtils, &F))
99         {
100             return false;
101         }
102         m_headerSize = QuadEltUnit(2);
103         m_hullShaderInfo = &getAnalysis<CollectHullShaderProperties>();
104         // Collect Hull shader information
105         m_hullShaderInfo->gatherInformation(&F);
106 
107         m_module = F.getParent();
108 
109         LowerIntrinsicInputOutput(F);
110         return false;
111     }
112 
LowerIntrinsicInputOutput(Function & F)113     void HullShaderLowering::LowerIntrinsicInputOutput(Function& F)
114     {
115         SmallVector<Instruction*, 10> instructionToRemove;
116 
117         IRBuilder<> builder(F.getContext());
118 
119         IGC::CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
120         unsigned int tessShaderDomain = GetDomainType();
121 
122         for (auto BI = F.begin(), BE = F.end(); BI != BE; BI++)
123         {
124             m_pControlPointOutputs.clear();
125 
126             bool checkedForTEFactorsPadding = false;
127 
128             for (auto II = BI->begin(), IE = BI->end(); II != IE; II++)
129             {
130                 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(II))
131                 {
132                     const GenISAIntrinsic::ID IID = inst->getIntrinsicID();
133                     // In oword units
134                     const unsigned int vertexHeaderSize = ctx->getModuleMetaData()->URBInfo.hasVertexHeader ?
135                         (m_hullShaderInfo->GetProperties().m_HasClipCullAsInput ? 4 : 2) : 0;
136                     if (IID == GenISAIntrinsic::GenISA_DCL_HSinputVec)
137                     {
138                         Value* index = nullptr;
139                         if (llvm::isa<ConstantInt>(inst->getOperand(0)))
140                         {
141                             // In case of direct access of HSInputVec we need to be sure to not use vertex index
142                             // bigger than number of declared ICP.
143                             // This might happen in OGL, when number of Input Control Points might not be known
144                             // during first compilation.
145                             uint32_t usedIndex = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue());
146                             uint32_t validIndex =
147                                 iSTD::Min(usedIndex, m_hullShaderInfo->GetProperties().m_pInputControlPointCount - 1);
148 
149                             index = builder.getInt32(validIndex);
150                         }
151                         else
152                         {
153                             index = inst->getOperand(0);
154                         }
155 
156                         builder.SetInsertPoint(inst);
157 
158                         AddURBRead(
159                             index,
160                             builder.CreateAdd(inst->getOperand(1), builder.getInt32(vertexHeaderSize)),
161                             inst);
162                         instructionToRemove.push_back(inst);
163                     }
164 
165                     if (IID == GenISAIntrinsic::GenISA_PatchConstantOutput)
166                     {
167                         // handle GenISA_OUTPUT intrinsic instructions
168                         const uint patchConstantOutputIndex = 4;
169                         Value* offsetVal = nullptr;
170                         llvm::Value* pPatchConstantOffset = inst->getOperand(patchConstantOutputIndex);
171 
172                         // lower patch constant outputs to URBWrite
173                         if (auto pPCOffsetIdx = llvm::dyn_cast<llvm::ConstantInt>(pPatchConstantOffset))
174                         {
175                             // patch constant output index is a constant.
176                             const uint offsetIndex = int_cast<unsigned int>(pPCOffsetIdx->getZExtValue());
177                             const QuadEltUnit staticOffset = QuadEltUnit(offsetIndex) + OctEltUnit(1); // Add 1 for vertex header
178                             Value* staticOffsetVal = builder.getInt32(staticOffset.Count());
179                             offsetVal = staticOffsetVal;
180                         }
181                         else
182                         {
183                             // patch constant output is indirect output
184                             const QuadEltUnit staticOffset = OctEltUnit(1); // Add 1 for vertex header
185                             Value* staticOffsetVal = builder.getInt32(staticOffset.Count());
186 
187                             Instruction* sum = BinaryOperator::CreateAdd(pPatchConstantOffset, staticOffsetVal);
188                             sum->insertBefore(inst);
189                             offsetVal = sum;
190                         }
191 
192                         Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext()));
193                         Value* data[8] =
194                         {
195                             inst->getOperand(0),
196                             inst->getOperand(1),
197                             inst->getOperand(2),
198                             inst->getOperand(3),
199                             undef,
200                             undef,
201                             undef,
202                             undef,
203                         };
204                         AddURBWrite(
205                             offsetVal,
206                             inst->getOperand(5),
207                             data,
208                             inst);
209                         instructionToRemove.push_back(inst);
210                     }
211 
212                     if (IID == GenISAIntrinsic::GenISA_OutputTessControlPoint)
213                     {
214                         // for each BB handle OutputHSControlPoint intrinsic instructions
215                         Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext()));
216                         Value* data[8] =
217                         {
218                             inst->getOperand(0),
219                             inst->getOperand(1),
220                             inst->getOperand(2),
221                             inst->getOperand(3),
222                             undef,
223                             undef,
224                             undef,
225                             undef,
226                         };
227                         AddURBWriteControlPointOutputs(inst->getOperand(6), data, inst);
228                         instructionToRemove.push_back(inst);
229                     }
230 
231                     if ((IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) ||
232                         (IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors))
233                     {
234                         // Apply URB padding for TE factors.
235                         if (IGC_IS_FLAG_ENABLED(EnableTEFactorsPadding))
236                         {
237                             if (!checkedForTEFactorsPadding)
238                             {
239                                 checkedForTEFactorsPadding = true;
240 
241                                 BasicBlock* bb = dyn_cast<BasicBlock>(BI);
242                                 if (IsTEFactorsPaddingAllowed(bb, tessShaderDomain))
243                                 {
244                                     Value* undef = llvm::UndefValue::get(Type::getFloatTy(F.getContext()));
245                                     Value* data[8] = { undef,undef,undef,undef,undef,undef,undef,undef };
246                                     // Add padding at offset 0
247                                     AddURBWrite(builder.getInt32(0), builder.getInt32(0xF), data, inst);
248                                     // Add padding at offset 1
249                                     AddURBWrite(builder.getInt32(1), builder.getInt32(0xF), data, inst);
250                                 }
251                             }
252                         }
253 
254                         // The URB Location for tessellation factors spans the first two offsets
255                         // offset 0 and 1. The tessellation factors occupy the two offsets as mentioned below
256                         // Quad domain has 4 outer and 2 inner tessellation factors
257                         // Triangle domain has 3 outer and 1 inner tessellation factor
258                         // Isolines have 2 outer tessellation factors
259                         //
260                         //----------------------------------------------------------------------------------
261                         //| URB Offset 1.3    | URB Offset 1.2     | URB Offset 1.1    | URB Offset 1.0     |
262                         //----------------------------------------------------------------------------------
263                         //| OUTER_QUAD_U_EQ_0 | OUTER_QUAD_V_EQ_0  | OUTER_QUAD_U_EQ_1 | OUTER_QUAD_V_EQ_1  |
264                         //----------------------------------------------------------------------------------
265                         //| OUTER_TRI_U_EQ_0  | OUTER_TRI_V_EQ_0   | OUTER_TRI_W_EQ_0  | INNER_TRI_INSIDE   |
266                         //----------------------------------------------------------------------------------
267                         //| OUTER_LINE_DETAIL | OUTER_LINE_DENSITY |                    |                    |
268                         //----------------------------------------------------------------------------------
269                         //------------------------------------------------------------------------------------
270                         //| URB Offset 0.3      | URB Offset 0.2            | URB Offset 0.1 | URB Offset 0.0 |
271                         //------------------------------------------------------------------------------------
272                         //| INNER_QUAD_U_INSIDE | INNER_OUTER_QUAD_V_INSIDE |                |                |
273                         //------------------------------------------------------------------------------------
274                         //|                      |                            |                |                |
275                         //------------------------------------------------------------------------------------
276                         //|                     |                              |                  |                |
277                         //------------------------------------------------------------------------------------
278 
279                         // offset into URB is 1 for outerScalarTessFactors and
280                         // 1 if its triangle domain and inner scalar tessellation factor
281                         // 0 if its the quad domain inner tessellation factor
282                         int offset = (IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) ? 1 :
283                             (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI) ? 1 : 0;
284                         Value* pOffsetVal = builder.getInt32(offset);
285 
286                         Value* data[8] =
287                         {
288                             inst->getArgOperand(1),
289                             inst->getArgOperand(1),
290                             inst->getArgOperand(1),
291                             inst->getArgOperand(1),
292                             inst->getArgOperand(1),
293                             inst->getArgOperand(1),
294                             inst->getArgOperand(1),
295                             inst->getArgOperand(1)
296                         };
297 
298                         if (llvm::isa<ConstantInt>(inst->getOperand(0)))
299                         {
300                             unsigned int tessFactor = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue());
301 
302                             if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE)
303                             {
304                                 // For isolines first tessellation factor(0) is line-density. The second one(1)
305                                 // is line-detail tessellation factor. To store them properly in patch header
306                                 // we need to set correct bits in URB write mask i.e. 0x4 for line-density
307                                 // and 0x8 for line-detail. Swap the indexes.
308                                 tessFactor ^= 1;
309                             }
310                             else
311                             {
312                                 tessFactor = ((IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors) &&
313                                     (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI)) ? 3 : tessFactor;
314                             }
315 
316                             AddURBWrite(pOffsetVal,
317                                 builder.getInt32(1 << (3 - tessFactor)),
318                                 data,
319                                 inst);
320                         }
321                         else
322                         {
323                             builder.SetInsertPoint(inst);
324                             Value* pSubRes = nullptr;
325                             Value* pSubResRHS = nullptr;
326                             if ((IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors)
327                                 && (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI))
328                             {
329                                 pSubRes = inst->getOperand(0);
330                             }
331                             else
332                             {
333                                 if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE)
334                                 {
335                                     pSubResRHS = builder.CreateXor(inst->getOperand(0), builder.getInt32(1));
336                                 }
337                                 else
338                                 {
339                                     pSubResRHS = inst->getOperand(0);
340                                 }
341 
342                                 pSubRes = builder.CreateSub(
343                                     builder.getInt32(3),
344                                     pSubResRHS);
345                             }
346 
347                             Value* pShiftVal = builder.CreateShl(
348                                 builder.getInt32(1),
349                                 pSubRes);
350 
351                             AddURBWrite(pOffsetVal, pShiftVal, data, inst);
352                         }
353                         instructionToRemove.push_back(inst);
354                     }
355 
356                     if (IID == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead ||
357                         IID == GenISAIntrinsic::GenISA_DCL_HSPatchConstInputVec)
358                     {
359                         builder.SetInsertPoint(inst);
360 
361                         const bool readHeader = IID == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead;
362 
363                         llvm::Value* urbOffset = nullptr;
364                         if (!readHeader)
365                         {
366                             // Patch constant output read
367                             llvm::Value* attributeIndex = inst->getOperand(0);
368 
369                             if (llvm::ConstantInt * constAttributeIndex = llvm::dyn_cast<llvm::ConstantInt>(attributeIndex))
370                             {
371                                 // Constant, so global offset is sufficient in urb read message
372                                 urbOffset = builder.getInt32(
373                                     int_cast<unsigned int>(constAttributeIndex->getZExtValue()) + vertexHeaderSize);
374                             }
375                             else
376                             {
377                                 // Runtime value, so per-slot offset is required in urb read message
378                                 urbOffset = builder.CreateAdd(attributeIndex, builder.getInt32(vertexHeaderSize));
379                             }
380                         }
381                         else
382                         {
383                             // Patch header read
384                             urbOffset = builder.getInt32(0);
385                         }
386 
387                         AddURBReadOutput(urbOffset, inst);
388                         instructionToRemove.push_back(inst);
389                     }
390 
391                     if (IID == GenISAIntrinsic::GenISA_DCL_HSOutputCntrlPtInputVec)
392                     {
393                         /// Returns the size of the output patch constant block in owords
394                         /// Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled
395                         /// Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2.
396                         auto GetPatchConstantOutputSize = [&]()->QuadEltUnit
397                         {
398                             constexpr unsigned int paychConstantHeaderSize = 2; // in owords
399                             const unsigned int numPatchConstantsPadded = iSTD::Align(
400                                 m_hullShaderInfo->GetProperties().m_pMaxPatchConstantSignatureDeclarations, 2);
401                             return QuadEltUnit(paychConstantHeaderSize + numPatchConstantsPadded);
402                         };
403 
404                         builder.SetInsertPoint(inst);
405 
406                         const unsigned int maxOutputSignatureCount = m_hullShaderInfo->GetProperties().m_pMaxOutputSignatureCount;
407 
408                         llvm::Value* const vertexIndex = inst->getOperand(0);
409                         llvm::Value* const attributeIndex = inst->getOperand(1);
410 
411                         const unsigned int patchConstantOutputSize = GetPatchConstantOutputSize().Count();
412                         llvm::Value* urbOffset = nullptr;
413 
414                         // Compute offset from vertex index
415                         if (llvm::ConstantInt * constVertexIndex = llvm::dyn_cast<llvm::ConstantInt>(vertexIndex))
416                         {
417                             // Constant, so global offset is sufficient in urb read message
418                             urbOffset = builder.getInt32(patchConstantOutputSize +
419                                 int_cast<unsigned int>(constVertexIndex->getZExtValue() * maxOutputSignatureCount));
420                         }
421                         else
422                         {
423                             // Runtime value, so per-slot offset is required in urb read message
424                             if (QuadEltUnit(maxOutputSignatureCount).Count() != 1)
425                             {
426                                 urbOffset = builder.CreateAdd(builder.getInt32(patchConstantOutputSize),
427                                     builder.CreateMul(builder.getInt32(maxOutputSignatureCount), vertexIndex));
428                             }
429                             else
430                             {
431                                 urbOffset = builder.CreateAdd(builder.getInt32(patchConstantOutputSize), vertexIndex);
432                             }
433                         }
434 
435                         // Compute additional offset coming from attribute index
436                         IGC_ASSERT(urbOffset);
437                         if (llvm::isa<llvm::ConstantInt>(urbOffset) && llvm::isa<llvm::ConstantInt>(attributeIndex))
438                         {
439                             urbOffset = builder.getInt32(int_cast<unsigned int>(
440                                 llvm::cast<llvm::ConstantInt>(urbOffset)->getZExtValue() +
441                                 llvm::cast<llvm::ConstantInt>(attributeIndex)->getZExtValue()));
442                         }
443                         else
444                         {
445                             urbOffset = builder.CreateAdd(urbOffset, attributeIndex);
446                         }
447 
448                         AddURBReadOutput(urbOffset, inst);
449                         instructionToRemove.push_back(inst);
450                     }
451                 }
452             }
453         }
454 
455         for (unsigned int i = 0; i < instructionToRemove.size(); i++)
456         {
457             instructionToRemove[i]->eraseFromParent();
458         }
459     }
460 
GetDomainType()461     unsigned int HullShaderLowering::GetDomainType()
462     {
463         unsigned int tessShaderDomain = USC::TESSELLATOR_DOMAIN_ISOLINE;
464         llvm::NamedMDNode* pMetaData = m_module->getOrInsertNamedMetadata("TessellationShaderDomain");
465         if (pMetaData && (pMetaData->getNumOperands() == 1))
466         {
467             llvm::MDNode* pTessShaderDomain = pMetaData->getOperand(0);
468             if (pTessShaderDomain)
469             {
470                 tessShaderDomain = int_cast<uint32_t>(
471                     mdconst::dyn_extract<ConstantInt>(pTessShaderDomain->getOperand(0))->getZExtValue());
472             }
473         }
474         return tessShaderDomain;
475     }
476 
IsTEFactorsPaddingAllowed(llvm::BasicBlock * bb,unsigned int tessShaderDomain)477     bool HullShaderLowering::IsTEFactorsPaddingAllowed(llvm::BasicBlock* bb, unsigned int tessShaderDomain)
478     {
479         unsigned int outerTessellationFactorsMask = 0;
480         unsigned int innerTessellationFactorsMask = 0;
481         for (auto II = bb->begin(), IE = bb->end(); II != IE; II++)
482         {
483             if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(II))
484             {
485                 GenISAIntrinsic::ID IID = inst->getIntrinsicID();
486                 if ((IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors) ||
487                     (IID == GenISAIntrinsic::GenISA_InnerScalarTessFactors))
488                 {
489                     if (llvm::isa<ConstantInt>(inst->getOperand(0)))
490                     {
491                         unsigned int factor = int_cast<unsigned int>(llvm::cast<ConstantInt>(inst->getOperand(0))->getZExtValue());
492                         if (IID == GenISAIntrinsic::GenISA_OuterScalarTessFactors)
493                         {
494                             outerTessellationFactorsMask |= (1 << factor);
495                         }
496                         else
497                         {
498                             innerTessellationFactorsMask |= (1 << factor);
499                         }
500                     }
501                 }
502             }
503         }
504 
505         bool paddingAllowed = false;
506         // Allow padding only in case current basic block writes complete set of tessellation factors
507         // defined for given domain.
508         if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_TRI)
509         {
510             // For triangle domain there are three outer tessellation factors and one inner tessellation factor.
511             if ((outerTessellationFactorsMask == 0x7) && (innerTessellationFactorsMask == 0x1)) paddingAllowed = true;
512         }
513         else if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_QUAD)
514         {
515             // For quad domain there are four outer tessellation factors and two inner tessellation factors.
516             if ((outerTessellationFactorsMask == 0xF) && (innerTessellationFactorsMask == 0x3)) paddingAllowed = true;
517         }
518         else if (tessShaderDomain == USC::TESSELLATOR_DOMAIN_ISOLINE)
519         {
520             // For isoline domain there are two outer tessellation factors and no inner tessellation factors.
521             if ((outerTessellationFactorsMask == 0x3) && (innerTessellationFactorsMask == 0x0)) paddingAllowed = true;
522         }
523         return paddingAllowed;
524     }
525 
AddURBWriteControlPointOutputs(Value * mask,Value * data[8],Instruction * prev)526     llvm::GenIntrinsicInst* HullShaderLowering::AddURBWriteControlPointOutputs(Value* mask, Value* data[8], Instruction* prev)
527     {
528         llvm::IRBuilder<> builder(m_module->getContext());
529         builder.SetInsertPoint(prev);
530 
531         // Now calculate the correct offset. This would be
532         // CPID * maxAttrIndex + maxPatchConstantOutputs + patchHeaderSize + attributeOffset
533         // Step1: mulRes = CPID * maxAttrIndex
534         llvm::GlobalVariable* pGlobal = m_module->getGlobalVariable("MaxNumOfOutputSignatureEntries");
535         uint32_t maxOutputSignatureCount = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
536         llvm::Value* controlPtId = prev->getOperand(5);
537         llvm::Value* m_pMulRes = nullptr;
538         llvm::Value* m_pFinalOffset = nullptr;
539         bool isOutputControlPointIdImmed = llvm::isa<llvm::ConstantInt>(controlPtId);
540         uint32_t outputControlPointid = 0;
541 
542         if (isOutputControlPointIdImmed)
543         {
544             outputControlPointid = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(controlPtId)->getZExtValue());
545         }
546 
547         if (QuadEltUnit(maxOutputSignatureCount).Count() != 1)
548         {
549             if (isOutputControlPointIdImmed)
550             {
551                 m_pMulRes = builder.getInt32(outputControlPointid * QuadEltUnit(maxOutputSignatureCount).Count());
552             }
553             else
554             {
555                 m_pMulRes = builder.CreateMul(controlPtId, builder.getInt32(QuadEltUnit(maxOutputSignatureCount).Count()));
556             }
557         }
558 
559         // Step2: m_pAddedPatchConstantOutput = maxPatchConstantOutputs + patchHeaderSize + attributeOffset
560         pGlobal = m_module->getGlobalVariable("MaxNumOfPatchConstantSignatureEntries");
561         const uint32_t m_pMaxPatchConstantSignatureDeclarations = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
562         const uint numPatchConstantsPadded = iSTD::Align(m_pMaxPatchConstantSignatureDeclarations, 2);
563         llvm::Value* attributeOffset = prev->getOperand(4);
564         bool isAttributeOffsetImmed = llvm::isa<llvm::ConstantInt>(attributeOffset);
565         uint32_t immedAttributeOffset = 0;
566 
567         if (isAttributeOffsetImmed)
568         {
569             immedAttributeOffset = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(prev->getOperand(4))->getZExtValue());
570         }
571         // patch constant header is 2 QuadEltUnits
572         llvm::Value* m_pAddedPatchConstantOutput = builder.getInt32((QuadEltUnit(numPatchConstantsPadded + 2 + immedAttributeOffset)).Count());
573         if (!isAttributeOffsetImmed)
574         {
575             m_pAddedPatchConstantOutput = builder.CreateAdd(m_pAddedPatchConstantOutput, attributeOffset);
576         }
577 
578         // Step3:
579         // finalOffset = ( mulRes + m_addedPatchConstantOutput )
580         if (m_pMulRes != nullptr)
581         {
582             if (isOutputControlPointIdImmed && isAttributeOffsetImmed)
583             {
584                 uint32_t mulRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pMulRes)->getZExtValue());
585                 uint32_t addRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pAddedPatchConstantOutput)->getZExtValue());
586                 m_pFinalOffset = builder.getInt32(mulRes + addRes);
587             }
588             else
589             {
590                 m_pFinalOffset = builder.CreateAdd(m_pMulRes, m_pAddedPatchConstantOutput);
591             }
592         }
593         else if (isOutputControlPointIdImmed && isAttributeOffsetImmed)
594         {
595             uint32_t addRes = int_cast<uint32_t>(llvm::cast<llvm::ConstantInt>(m_pAddedPatchConstantOutput)->getZExtValue());
596             m_pFinalOffset = builder.getInt32(outputControlPointid + addRes);
597         }
598         else
599         {
600             m_pFinalOffset = builder.CreateAdd(controlPtId, m_pAddedPatchConstantOutput);
601         }
602 
603         llvm::CallInst* write = AddURBWrite(
604             m_pFinalOffset,
605             mask,
606             data,
607             prev);
608         return (llvm::GenIntrinsicInst*)write;
609     }
610 
611     /// Inserts new URBWrite instruction with given mask and arguments before
612     /// instuction 'prev'.
613     /// TODO: This should be a common function for all Lowering passes.
AddURBWrite(llvm::Value * offset,llvm::Value * mask,llvm::Value * data[8],llvm::Instruction * prev)614     llvm::GenIntrinsicInst* HullShaderLowering::AddURBWrite(
615         llvm::Value* offset,
616         llvm::Value* mask,
617         llvm::Value* data[8],
618         llvm::Instruction* prev)
619     {
620         Value* arguments[] =
621         {
622             offset,
623             mask,
624             data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]
625         };
626 
627         CallInst* write = GenIntrinsicInst::Create(
628             GenISAIntrinsic::getDeclaration(m_module, GenISAIntrinsic::GenISA_URBWrite),
629             arguments,
630             "",
631             prev);
632 
633         write->setDebugLoc(prev->getDebugLoc());
634         return (llvm::GenIntrinsicInst*)write;
635     }
636 
AddURBRead(Value * index,Value * offset,Instruction * prev)637     void HullShaderLowering::AddURBRead(Value* index, Value* offset, Instruction* prev)
638     {
639         Value* arguments[] =
640         {
641             index,
642             offset
643         };
644 
645         Instruction* urbRead = GenIntrinsicInst::Create(
646             GenISAIntrinsic::getDeclaration(m_module, GenISAIntrinsic::GenISA_URBRead),
647             arguments,
648             "",
649             prev);
650 
651         urbRead->setDebugLoc(prev->getDebugLoc());
652 
653         Value* vec4 = nullptr;
654         while (!prev->use_empty())
655         {
656             auto I = prev->user_begin();
657             if (ExtractElementInst * elem = dyn_cast<ExtractElementInst>(*I))
658             {
659                 Instruction* newExt = ExtractElementInst::Create(urbRead, elem->getIndexOperand(), "", elem);
660                 newExt->setDebugLoc(prev->getDebugLoc());
661 
662                 elem->replaceAllUsesWith(newExt);
663                 elem->eraseFromParent();
664             }
665             else
666             {
667                 // the vector is used directly, extract the first 4 elements and recreate a vec4
668                 if (vec4 == nullptr)
669                 {
670                     Value* data[4] = { nullptr, nullptr, nullptr, nullptr };
671                     Type* int32Ty = Type::getInt32Ty(m_module->getContext());
672 
673                     VectorToElement(urbRead, data, int32Ty, prev, 4);
674                     vec4 = ElementToVector(data, int32Ty, prev, 4);
675                 }
676 
677                 (*I)->replaceUsesOfWith(prev, vec4);
678             }
679         }
680     }
681 
AddURBReadOutput(llvm::Value * offset,llvm::Instruction * prev)682     void HullShaderLowering::AddURBReadOutput(llvm::Value* offset, llvm::Instruction* prev)
683     {
684         llvm::Value* arguments[] =
685         {
686             offset
687         };
688 
689         llvm::Instruction* urbReadOutput = llvm::GenIntrinsicInst::Create(
690             llvm::GenISAIntrinsic::getDeclaration(m_module, llvm::GenISAIntrinsic::GenISA_URBReadOutput),
691             arguments,
692             "",
693             prev);
694         urbReadOutput->setDebugLoc(prev->getDebugLoc());
695 
696         llvm::Value* vec4 = nullptr;
697         while (!prev->use_empty())
698         {
699             auto I = prev->user_begin();
700             if (ExtractElementInst * elem = dyn_cast<ExtractElementInst>(*I))
701             {
702                 Instruction* newExt = ExtractElementInst::Create(urbReadOutput, elem->getIndexOperand(), "", elem);
703                 newExt->setDebugLoc(prev->getDebugLoc());
704 
705                 elem->replaceAllUsesWith(newExt);
706                 elem->eraseFromParent();
707             }
708             else
709             {
710                 // the vector is used directly, extract the first 4 elements and recreate a vec4
711                 if (vec4 == nullptr)
712                 {
713                     Value* data[4] = { nullptr, nullptr, nullptr, nullptr };
714                     Type* int32Ty = Type::getInt32Ty(m_module->getContext());
715 
716                     VectorToElement(urbReadOutput, data, int32Ty, prev, 4);
717                     vec4 = ElementToVector(data, int32Ty, prev, 4);
718                 }
719 
720                 (*I)->replaceUsesOfWith(prev, vec4);
721             }
722         }
723     }
724 
CollectHullShaderProperties()725     CollectHullShaderProperties::CollectHullShaderProperties() : llvm::ImmutablePass(ID)
726     {
727         initializeCollectHullShaderPropertiesPass(*PassRegistry::getPassRegistry());
728     }
729 
gatherInformation(llvm::Function * kernel)730     void CollectHullShaderProperties::gatherInformation(llvm::Function* kernel)
731     {
732         llvm::Module* module = kernel->getParent();
733 
734         llvm::GlobalVariable* pGlobal = module->getGlobalVariable("HSOutputControlPointCount");
735         m_hsProps.m_pOutputControlPointCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
736 
737         pGlobal = module->getGlobalVariable("TessInputControlPointCount");
738         m_hsProps.m_pInputControlPointCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
739 
740         pGlobal = module->getGlobalVariable("MaxNumOfInputSignatureEntries");
741         m_hsProps.m_pMaxInputSignatureCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
742 
743         pGlobal = module->getGlobalVariable("MaxNumOfOutputSignatureEntries");
744         m_hsProps.m_pMaxOutputSignatureCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
745 
746         pGlobal = module->getGlobalVariable("MaxNumOfPatchConstantSignatureEntries");
747         m_hsProps.m_pMaxPatchConstantSignatureDeclarations = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
748 
749         // Dispatch mode might be also determined based on MetaData (which might be treated as Global Variable).
750         m_hsProps.m_pShaderDispatchMode = DetermineDispatchMode(kernel);
751 
752         m_hsProps.m_ForcedDispatchMask = GetForcedDispatchMask(kernel);
753 
754         pGlobal = module->getGlobalVariable("ShaderHasClipCullInput");
755         auto clipCullAsInput = (pGlobal == nullptr) ? false : true;
756         IGC::CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
757         if (ctx->getModuleMetaData()->URBInfo.has64BVertexHeaderInput) {
758             // In case we have no linking information we need the URB header to have a fixed size
759             clipCullAsInput = true;
760         }
761 
762         m_hsProps.m_HasClipCullAsInput = clipCullAsInput;
763     }
764 
DetermineDispatchMode(Function * kernel) const765     HullShaderDispatchModes CollectHullShaderProperties::DetermineDispatchMode(Function* kernel) const
766     {
767         HullShaderDispatchModes shaderDispatchMode = SINGLE_PATCH_DISPATCH_MODE;
768         llvm::NamedMDNode* pMetaData = kernel->getParent()->getNamedMetadata("HullShaderDispatchMode");
769         if (pMetaData)
770         {
771             llvm::MDNode* pMdNode = pMetaData->getOperand(0);
772             if (pMdNode)
773             {
774                 llvm::Metadata* pShaderDispatchMode = pMdNode->getOperand(0);
775                 shaderDispatchMode = (HullShaderDispatchModes)
776                     (llvm::mdconst::dyn_extract<ConstantInt>(pShaderDispatchMode))->getZExtValue();
777             }
778         }
779         return shaderDispatchMode;
780     }
781 
GetForcedDispatchMask(Function * kernel) const782     unsigned CollectHullShaderProperties::GetForcedDispatchMask(Function* kernel) const
783     {
784         unsigned dispatchMask = 0;
785         llvm::NamedMDNode* pMetaData = kernel->getParent()->getNamedMetadata("HullShaderForcedDispatchMask");
786         if (pMetaData)
787         {
788             llvm::MDNode* pMdNode = pMetaData->getOperand(0);
789             if (pMdNode)
790             {
791                 llvm::Metadata* pShaderForcedMask = pMdNode->getOperand(0);
792                 dispatchMask = static_cast<unsigned>((llvm::mdconst::dyn_extract<ConstantInt>(pShaderForcedMask))->getZExtValue());
793             }
794         }
795         return dispatchMask;
796     }
797 
798 
HullShaderProperties()799     HullShaderProperties::HullShaderProperties() :
800         m_pOutputControlPointCount(0),
801         m_pInputControlPointCount(0),
802         m_pMaxInputSignatureCount(0),
803         m_pMaxOutputSignatureCount(0),
804         m_pMaxPatchConstantSignatureDeclarations(0),
805         m_pShaderDispatchMode(SINGLE_PATCH_DISPATCH_MODE),
806         m_HasClipCullAsInput(false),
807         m_ForcedDispatchMask(0)
808     {}
809 
GetMaxInputPushed() const810     unsigned int HullShaderProperties::GetMaxInputPushed() const
811     {
812         const unsigned int maxNumOfHSPushedInputs = 96;
813         uint numberOfPatches = (m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1;
814 
815         // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
816         // in current dispatch mode for current topology ( InputPatch size ).
817         uint32_t maxNumOfPushedInputAttributesPerICP =
818             (m_pInputControlPointCount * numberOfPatches > 0)
819             ? maxNumOfHSPushedInputs / (m_pInputControlPointCount * numberOfPatches)
820             : maxNumOfHSPushedInputs;
821 
822         // Input attributes can be pushed only in pairs, so we need to round down the limit.
823         maxNumOfPushedInputAttributesPerICP = iSTD::Align(maxNumOfPushedInputAttributesPerICP - 1, 2);
824 
825         // Determine required number of input attributes.
826         // They can be pushed only in pairs.
827         uint32_t reqNumOfInputAttributesPerICP = iSTD::Align(m_pMaxInputSignatureCount, 2);
828 
829         // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
830         // in case of SGV inputs have to be taken into consideration (will be done in next step).
831         // reqNumOfInputAttributes += HeaderSize().Count();
832 
833         // Determine ACTUAL number of attributes that can be pushed.
834         // If the required number of input attributes is less that maximum potential number,
835         // than all of the will be pushed.
836         uint32_t actualNumOfPushedInputAttributesPerICP =
837             iSTD::Min(reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP);
838 
839         return actualNumOfPushedInputAttributesPerICP;
840     }
841 
842 
createHullShaderLoweringPass()843     llvm::FunctionPass* createHullShaderLoweringPass()
844     {
845         return new HullShaderLowering();
846     }
847 
848 } // namespace IGC
849