1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/Support/ScaledNumber.h>
11 #include "llvm/ADT/StringSwitch.h"
12 #include "common/LLVMWarningsPop.hpp"
13 #include "Compiler/CISACodeGen/PixelShaderCodeGen.hpp"
14 #include "Compiler/CISACodeGen/messageEncoding.hpp"
15 #include "common/allocator.h"
16 #include <iStdLib/utility.h>
17 #include "common/secure_mem.h"
18 #include "Simd32Profitability.hpp"
19 #include "EmitVISAPass.hpp"
20 #include "AdaptorCommon/API/igc.h"
21 #include "Probe/Assertion.h"
22 
23 /***********************************************************************************
24 This file contains the code specific to pixel shader
25 ************************************************************************************/
26 using namespace llvm;
27 using namespace IGC::IGCMD;
28 
29 namespace IGC
30 {
GetR1()31 CVariable* CPixelShader::GetR1()
32 {
33     return m_R1;
34 }
35 
GetR1Lo()36 std::vector<CVariable*>& CPixelShader::GetR1Lo()
37 {
38     return m_R1Lo;
39 }
40 
AppendR1Lo(CVariable * var)41 void CPixelShader::AppendR1Lo(CVariable* var)
42 {
43     m_R1Lo.push_back(var);
44 }
45 
GetCoarseR1()46 CVariable* CPixelShader::GetCoarseR1()
47 {
48     IGC_ASSERT(m_phase == PSPHASE_PIXEL);
49     return m_CoarseR1;
50 }
51 
AllocatePayload()52 void CPixelShader::AllocatePayload()
53 {
54     if (m_phase == PSPHASE_COARSE)
55     {
56         CreatePassThroughVar();
57     }
58     switch (m_phase)
59     {
60     case PSPHASE_LEGACY:
61     case PSPHASE_COARSE:
62         AllocatePSPayload();
63         break;
64     case PSPHASE_PIXEL:
65         AllocatePixelPhasePayload();
66         break;
67     default:
68         IGC_ASSERT_MESSAGE(0, "unknown phase");
69         break;
70     }
71 }
72 
AllocatePixelPhasePayload()73 void CPixelShader::AllocatePixelPhasePayload()
74 {
75     unsigned int r1Offset = GetDispatchSignature().r1;
76     AllocateInput(m_CoarseR1, r1Offset);
77     for (uint i = 0; i < setup.size(); i++)
78     {
79         if (setup[i])
80         {
81             uint subRegOffset = 0;
82             // PS uniform inputs are stored in the 3rd subreg
83             if (setup[i]->GetSize() == SIZE_DWORD)
84             {
85                 subRegOffset = 3 * SIZE_DWORD;
86             }
87             IGC_ASSERT(m_Signature != nullptr);
88             uint offset = GetDispatchSignature().inputOffset[i];
89             AllocateInput(setup[i], offset + subRegOffset);
90         }
91     }
92     if (m_ZWDelta)
93     {
94         unsigned int offset = GetDispatchSignature().ZWDelta;
95         AllocateInput(m_ZWDelta, offset);
96     }
97     if (m_SampleOffsetX || m_SampleOffsetY)
98     {
99         unsigned int offset = GetDispatchSignature().pixelOffset;
100         if (m_SampleOffsetX)
101         {
102             AllocateInput(m_SampleOffsetX, offset);
103         }
104         if (m_SampleOffsetY)
105         {
106             AllocateInput(m_SampleOffsetY, offset + SIZE_OWORD);
107         }
108     }
109     for (auto it = m_CoarseInput.begin(), ie = m_CoarseInput.end(); it != ie; ++it)
110     {
111         uint offset = GetDispatchSignature().PSOutputOffset.find(it->first)->second;
112         AllocateInput(it->second, offset);
113     }
114     if (m_CoarseMaskInput)
115     {
116         uint offset = GetDispatchSignature().oMaskOffset;
117         AllocateInput(m_CoarseMaskInput, offset);
118     }
119 }
120 
AllocatePSPayload()121 void CPixelShader::AllocatePSPayload()
122 {
123     bool forceLiveOut = false;
124     // In bytes
125     uint offset = 0;
126 
127     // R0 is always allocated as a predefined variable. Increase offset for R0
128     IGC_ASSERT(m_R0);
129     if (encoder.IsCodePatchCandidate())
130     {
131         // For the payload section, we need to mark inputs to be outputs
132         // so that inputs will be alive across the entire payload section
133         forceLiveOut = true;
134         encoder.MarkAsPayloadLiveOut(m_R0);
135     }
136     offset += getGRFSize();
137 
138     if (m_Signature)
139     {
140         GetDispatchSignature().r1 = offset;
141     }
142 
143     {
144         IGC_ASSERT(GetR1());
145         for (uint i = 0; i < GetR1()->GetNumberInstance(); i++)
146         {
147             AllocateInput(GetR1(), offset, i, forceLiveOut);
148             offset += getGRFSize();
149         }
150     }
151 
152     uint numInstances = m_numberInstance;
153 
154     for (uint i = 0; i < numInstances; i++)
155     {
156         // allocate size for bary
157         if (m_PerspectivePixel)
158         {
159             AllocateInput(m_PerspectivePixel, offset, i, forceLiveOut);
160             offset += m_PerspectivePixel->GetSize();
161         }
162         if (m_PerspectiveCentroid)
163         {
164             AllocateInput(m_PerspectiveCentroid, offset, i, forceLiveOut);
165             offset += m_PerspectiveCentroid->GetSize();
166         }
167         if (m_PerspectiveSample)
168         {
169             AllocateInput(m_PerspectiveSample, offset, i, forceLiveOut);
170             offset += m_PerspectiveSample->GetSize();
171         }
172         if (m_NoPerspectivePixel)
173         {
174             AllocateInput(m_NoPerspectivePixel, offset, i, forceLiveOut);
175             offset += m_NoPerspectivePixel->GetSize();
176         }
177         if (m_NoPerspectiveCentroid)
178         {
179             AllocateInput(m_NoPerspectiveCentroid, offset, i, forceLiveOut);
180             offset += m_NoPerspectiveCentroid->GetSize();
181         }
182         if (m_NoPerspectiveSample)
183         {
184             AllocateInput(m_NoPerspectiveSample, offset, i, forceLiveOut);
185             offset += m_NoPerspectiveSample->GetSize();
186         }
187 
188         // Add support for POSITION_Z
189         if (m_pPositionZPixel)
190         {
191             AllocateInput(m_pPositionZPixel, offset, i, forceLiveOut);
192             offset += m_pPositionZPixel->GetSize();
193         }
194 
195         // Add support for POSITION_W
196         if (m_pPositionWPixel)
197         {
198             AllocateInput(m_pPositionWPixel, offset, i, forceLiveOut);
199             offset += m_pPositionWPixel->GetSize();
200         }
201 
202         // Add support for POSITION_XY_OFFSET
203         if (m_pPositionXYOffset)
204         {
205             {
206                 AllocateInput(m_pPositionXYOffset, offset, i, forceLiveOut);
207                 offset += m_pPositionXYOffset->GetSize();
208             }
209         }
210 
211         // Add support for input coverage mask
212         if (m_pInputCoverageMask)
213         {
214             AllocateInput(m_pInputCoverageMask, offset, i, forceLiveOut);
215             offset += m_pInputCoverageMask->GetSize();
216         }
217 
218         {
219             if (m_pCPSRequestedSizeX || m_pCPSRequestedSizeY)
220             {
221                 if (m_pCPSRequestedSizeX)
222                 {
223                     AllocateInput(m_pCPSRequestedSizeX, offset, i, forceLiveOut);
224                 }
225                 if (m_pCPSRequestedSizeY)
226                 {
227                     AllocateInput(m_pCPSRequestedSizeY, offset + SIZE_OWORD, i, forceLiveOut);
228                 }
229                 offset += getGRFSize();
230             }
231             if (m_ZWDelta && i == numInstances - 1)
232             {
233                 AllocateInput(m_ZWDelta, offset, 0, forceLiveOut);
234                 if (m_Signature)
235                 {
236                     GetDispatchSignature().ZWDelta = offset;
237                 }
238                 offset += getGRFSize();
239             }
240             if (m_SampleOffsetX || m_SampleOffsetY)
241             {
242                 if (m_SampleOffsetX)
243                 {
244                     AllocateInput(m_SampleOffsetX, offset, i, forceLiveOut);
245                 }
246                 if (m_SampleOffsetY)
247                 {
248                     AllocateInput(m_SampleOffsetY, offset + SIZE_OWORD, i, forceLiveOut);
249                 }
250                 if (m_Signature)
251                 {
252                     GetDispatchSignature().pixelOffset = offset;
253                 }
254                 offset += getGRFSize();
255             }
256         }
257     }
258 
259     IGC_ASSERT(offset % getGRFSize() == 0);
260 
261     // need to return the starting grf for constant to client
262     ProgramOutput()->m_startReg = offset / getGRFSize();
263 
264     // allocate space for NOS constants and pushed constants
265     AllocateConstants3DShader(offset);
266 
267 
268     // Allocate size for attributes coming from VS
269     IGC_ASSERT(offset % getGRFSize() == 0);
270     unsigned int payloadEnd = offset;
271     for (uint i = 0; i < setup.size(); i++)
272     {
273         if (setup[i] && setup[i]->GetAlias() == NULL)
274         {
275             uint subRegOffset = 0;
276             // PS uniform (constant interpolation) inputs
277             if (setup[i]->GetSize() == SIZE_DWORD)
278             {
279                 {
280                     subRegOffset = 3 * SIZE_DWORD;
281                 }
282             }
283 
284             AllocateInput(setup[i], offset + subRegOffset, 0, forceLiveOut);
285 
286             if (m_Signature)
287             {
288                 GetDispatchSignature().inputOffset[i] = offset;
289             }
290             payloadEnd = offset + setup[i]->GetSize();
291         }
292 
293         {
294             offset += 4 * SIZE_DWORD;
295         }
296     }
297 
298     offset = payloadEnd;
299 
300     // For code patching, we preallocate live-out of payload into physical registers.
301     // The preallocation must be aligned across contexts to ensure it is patchable.
302     if (encoder.IsCodePatchCandidate())
303     {
304         if (encoder.HasPrevKernel())
305         {
306             // Get previous context's PayloadEnd to ensure it is patchable
307             offset = encoder.GetPayloadEnd();
308         }
309         else
310         {
311             encoder.SetPayloadEnd(payloadEnd);
312         }
313     }
314     if (offset % getGRFSize() != 0)
315     {
316         offset += (getGRFSize() - (offset % getGRFSize()));
317     }
318 
319     // This is the preallocation for payload live-outs.
320     for (auto& var : payloadLiveOutSetup)
321     {
322         IGC_ASSERT(offset% getGRFSize() == 0);
323         AllocateInput(var, offset, 0, true);
324         offset += var->GetSize();
325     }
326 
327     // This is the preallocation for temp variables in payload sections
328     for (auto& var : payloadTempSetup)
329     {
330         AllocateInput(var, offset);
331         offset += var->GetSize();
332     }
333 
334     // When preallocation failed (exceeding the total number of physical registers), early exit and give up this compilation._
335     ProgramOutput()->m_scratchSpaceUsedBySpills = offset >= encoder.GetVISAKernel()->getNumRegTotal() * getGRFSize();
336     if (ProgramOutput()->m_scratchSpaceUsedBySpills)
337     {
338         return;
339     }
340 
341     offset = payloadEnd;
342 
343     // create output registers for coarse phase
344     for (const auto& it : m_CoarseOutput)
345     {
346         CVariable* output = it.second;
347         offset = iSTD::Align(offset, (size_t) 1 << output->GetAlign());
348         AllocateOutput(output, offset);
349         if (m_Signature)
350         {
351             GetDispatchSignature().PSOutputOffset[it.first] = offset;
352         }
353         offset += output->GetSize();
354     }
355     if (m_CoarseoMask)
356     {
357         offset = iSTD::Align(offset, (size_t) 1 << m_CoarseoMask->GetAlign());
358         AllocateOutput(m_CoarseoMask, offset);
359         if (m_Signature)
360         {
361             GetDispatchSignature().oMaskOffset = offset;
362             GetDispatchSignature().CoarseMask = true;
363         }
364         offset += m_CoarseoMask->GetSize();
365     }
366     ProgramOutput()->m_scratchSpaceUsedBySpills = (offset >= encoder.GetVISAKernel()->getNumRegTotal() * getGRFSize());
367 }
368 
GetDispatchSignature()369 PSSignature::DispatchSignature& CPixelShader::GetDispatchSignature()
370 {
371     switch (m_dispatchSize)
372     {
373     case SIMDMode::SIMD8:
374         return m_Signature->dispatchSign[0];
375     case SIMDMode::SIMD16:
376         return m_Signature->dispatchSign[1];
377     case SIMDMode::SIMD32:
378         return m_Signature->dispatchSign[2];
379     default:
380         IGC_ASSERT_MESSAGE(0, "bad dispatch size");
381         break;
382     }
383     return m_Signature->dispatchSign[0];
384 }
385 
GetBaryReg(e_interpolation mode)386 CVariable* CPixelShader::GetBaryReg(e_interpolation mode)
387 {
388     uint numInstances = m_numberInstance;
389     uint numElements = 2 * numLanes(m_SIMDSize);
390 
391 
392     CVariable* baryReg = 0;
393     switch (mode)
394     {
395     case EINTERPOLATION_LINEAR:
396         if (!m_PerspectivePixel) {
397             m_PerspectivePixel =
398                 GetNewVariable(numElements, ISA_TYPE_F, EALIGN_GRF, false, numInstances, "PerspectivePixel");
399         }
400         baryReg = m_PerspectivePixel;
401         break;
402     case EINTERPOLATION_LINEARCENTROID:
403         if (!m_PerspectiveCentroid) {
404             m_PerspectiveCentroid =
405                 GetNewVariable(
406                     numElements, ISA_TYPE_F, EALIGN_GRF,
407                     false, numInstances, "LinearCentroid");
408         }
409         baryReg = m_PerspectiveCentroid;
410         break;
411     case EINTERPOLATION_LINEARSAMPLE:
412         if (!m_PerspectiveSample) {
413             m_PerspectiveSample =
414                 GetNewVariable(
415                     numElements, ISA_TYPE_F, EALIGN_GRF,
416                     false, numInstances, "LinearSample");
417         }
418         baryReg = m_PerspectiveSample;
419         break;
420     case EINTERPOLATION_LINEARNOPERSPECTIVE:
421         if (!m_NoPerspectivePixel) {
422             m_NoPerspectivePixel =
423                 GetNewVariable(
424                     numElements, ISA_TYPE_F, EALIGN_GRF,
425                     false, numInstances, "LinearNoPerspective");
426         }
427         baryReg = m_NoPerspectivePixel;
428         break;
429     case EINTERPOLATION_LINEARNOPERSPECTIVECENTROID:
430         if (!m_NoPerspectiveCentroid) {
431             m_NoPerspectiveCentroid =
432                 GetNewVariable(
433                     numElements, ISA_TYPE_F, EALIGN_GRF,
434                     false, numInstances, "NoPerspectiveCentroid");
435         }
436         baryReg = m_NoPerspectiveCentroid;
437         break;
438     case EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE:
439         if (!m_NoPerspectiveSample) {
440             m_NoPerspectiveSample =
441                 GetNewVariable(
442                     numElements, ISA_TYPE_F, EALIGN_GRF,
443                     false, numInstances, "NoPerspectiveSample");
444         }
445         baryReg = m_NoPerspectiveSample;
446         break;
447     default:
448         IGC_ASSERT(0);
449     }
450     return baryReg;
451 }
452 
GetBaryRegLoweredHalf(e_interpolation mode)453 CVariable* CPixelShader::GetBaryRegLoweredHalf(e_interpolation mode)
454 {
455     IGC_ASSERT(IsInterpolationLinear(mode));
456 
457     const char* const name =
458         mode == EINTERPOLATION_LINEAR ? "PerspectivePixelLoweredHalf" :
459         mode == EINTERPOLATION_LINEARCENTROID ? "PerspectiveCentroidLoweredHalf" :
460         mode == EINTERPOLATION_LINEARSAMPLE ? "PerspectiveSampleLoweredHalf" :
461         mode == EINTERPOLATION_LINEARNOPERSPECTIVE ? "NoPerspectivePixelLoweredHalf" :
462         mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ? "NoPerspectiveCentroidLoweredHalf" :
463         mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE ? "NoPerspectiveSampleLoweredHalf" : "";
464 
465     if (!m_BaryRegLoweredHalf[mode])
466     {
467         m_BaryRegLoweredHalf[mode] =
468             GetNewVariable(
469                 2 * numLanes(m_SIMDSize), ISA_TYPE_HF, EALIGN_GRF,
470                 false, m_numberInstance, name);
471         if (encoder.IsCodePatchCandidate())
472         {
473             AddPatchTempSetup(m_BaryRegLoweredHalf[mode]);
474         }
475     }
476     return m_BaryRegLoweredHalf[mode];
477 }
478 
GetBaryRegLoweredFloat(e_interpolation mode)479 CVariable* CPixelShader::GetBaryRegLoweredFloat(e_interpolation mode)
480 {
481     IGC_ASSERT(IsInterpolationLinear(mode));
482 
483     const char* const name =
484         mode == EINTERPOLATION_LINEAR ? "PerspectivePixelLoweredFloat" :
485         mode == EINTERPOLATION_LINEARCENTROID ? "PerspectiveCentroidLoweredFloat" :
486         mode == EINTERPOLATION_LINEARSAMPLE ? "PerspectiveSampleLoweredFloat" :
487         mode == EINTERPOLATION_LINEARNOPERSPECTIVE ? "NoPerspectivePixelLoweredFloat" :
488         mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ? "NoPerspectiveCentroidLoweredFloat" :
489         mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE ? "NoPerspectiveSampleLoweredFloat" : "";
490 
491     if (!m_BaryRegLoweredFloat[mode])
492     {
493         m_BaryRegLoweredFloat[mode] =
494             GetNewVariable(
495                 2 * numLanes(m_SIMDSize), ISA_TYPE_F, EALIGN_GRF,
496                 false, m_numberInstance, name);
497         if (encoder.IsCodePatchCandidate())
498         {
499             AddPatchTempSetup(m_BaryRegLoweredFloat[mode]);
500         }
501     }
502     return m_BaryRegLoweredFloat[mode];
503 }
504 
505 
506 
GetInputDelta(uint index,bool loweredInput)507 CVariable* CPixelShader::GetInputDelta(uint index, bool loweredInput)
508 {
509     CVariable* inputVar = setup[index];
510     if (inputVar == nullptr)
511     {
512         if (loweredInput)
513         {
514             if (index % 2 == 0)
515             {
516                 inputVar = GetNewVariable(8, ISA_TYPE_F, EALIGN_GRF, true, CName::NONE);
517                 setup[index + 1] = GetNewAlias(inputVar, ISA_TYPE_F, 16, 4);
518             }
519             else
520             {
521                 inputVar = GetNewAlias(GetInputDelta(index - 1), ISA_TYPE_F, 16, 4);
522             }
523         }
524         else
525         {
526             inputVar = GetNewVariable(4, ISA_TYPE_F, EALIGN_OWORD, true, CName::NONE);
527         }
528         setup[index] = inputVar;
529     }
530     return inputVar;
531 }
532 
GetInputDeltaLowered(uint index)533 CVariable* CPixelShader::GetInputDeltaLowered(uint index)
534 {
535     CVariable* inputVar = setupLowered[index];
536     if (inputVar == nullptr)
537     {
538         IGC_ASSERT(LowerPSInput());
539         if (index % 2 == 0)
540         {
541             inputVar = GetNewVariable(8, ISA_TYPE_HF, EALIGN_OWORD, true, CName::NONE);
542         }
543         else
544         {
545             if (setupLowered[index - 1])
546             {
547                 inputVar = GetNewAlias(setupLowered[index - 1], ISA_TYPE_HF, 8, 4);
548             }
549             else
550             {
551                 inputVar = GetNewVariable(4, ISA_TYPE_HF, EALIGN_OWORD, true, CName::NONE);
552             }
553         }
554 
555         setupLowered[index] = inputVar;
556     }
557     return inputVar;
558 }
559 
GetZWDelta()560 CVariable* CPixelShader::GetZWDelta()
561 {
562     if (!m_ZWDelta)
563     {
564         uint numLanes = getGRFSize() / SIZE_DWORD ; // single GRF
565 
566         m_ZWDelta =
567             GetNewVariable(numLanes, ISA_TYPE_F, EALIGN_GRF, false, 1, "ZWDelta");
568     }
569     return m_ZWDelta;
570 }
571 
572 
GetPositionZ()573 CVariable* CPixelShader::GetPositionZ()
574 {
575     uint16_t numberInstance, numberLanes;
576     {
577         numberLanes = numLanes(m_SIMDSize);
578         numberInstance = m_numberInstance;
579     }
580     if (!m_pPositionZPixel)
581     {
582         m_pPositionZPixel =
583             GetNewVariable(numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "PosZPixel");
584     }
585     return m_pPositionZPixel;
586 }
587 
GetPositionW()588 CVariable* CPixelShader::GetPositionW()
589 {
590     uint16_t numberInstance, numberLanes;
591     {
592         numberLanes = numLanes(m_SIMDSize);
593         numberInstance = m_numberInstance;
594     }
595     if (!m_pPositionWPixel)
596     {
597         m_pPositionWPixel =
598             GetNewVariable(numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "PosWPixel");
599     }
600     return m_pPositionWPixel;
601 }
602 
GetPositionXYOffset()603 CVariable* CPixelShader::GetPositionXYOffset()
604 {
605     if (!m_pPositionXYOffset)
606     {
607         m_pPositionXYOffset =
608             GetNewVariable(32, ISA_TYPE_B, EALIGN_GRF, false, m_numberInstance, "PosXYOff");
609     }
610     return m_pPositionXYOffset;
611 }
612 
GetInputCoverageMask()613 CVariable* CPixelShader::GetInputCoverageMask()
614 {
615     uint16_t numberInstance, numberLanes;
616     {
617         numberLanes = numLanes(m_SIMDSize);
618         numberInstance = m_numberInstance;
619     }
620     if (!m_pInputCoverageMask)
621     {
622         m_pInputCoverageMask = GetNewVariable(
623                 numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "InputCoverageMask");
624     }
625     return m_pInputCoverageMask;
626 }
627 
GetSampleOffsetX()628 CVariable* CPixelShader::GetSampleOffsetX()
629 {
630     if (!m_SampleOffsetX)
631     {
632         m_SampleOffsetX = GetNewVariable(16, ISA_TYPE_UB, EALIGN_OWORD, true, "SmplOffX");
633     }
634     return m_SampleOffsetX;
635 }
636 
GetSampleOffsetY()637 CVariable* CPixelShader::GetSampleOffsetY()
638 {
639     if (!m_SampleOffsetY)
640     {
641         m_SampleOffsetY = GetNewVariable(16, ISA_TYPE_UB, EALIGN_OWORD, true, "SmplOffY");
642     }
643     return m_SampleOffsetY;
644 }
645 
GetCPSRequestedSizeX()646 CVariable* CPixelShader::GetCPSRequestedSizeX()
647 {
648     if (!m_pCPSRequestedSizeX)
649     {
650         m_pCPSRequestedSizeX =
651             GetNewVariable(
652                 numLanes(m_SIMDSize) / 4, ISA_TYPE_F,
653                 EALIGN_OWORD, false, m_numberInstance, "CPSReqSizeX");
654     }
655     return m_pCPSRequestedSizeX;
656 }
657 
GetCPSRequestedSizeY()658 CVariable* CPixelShader::GetCPSRequestedSizeY()
659 {
660     if (!m_pCPSRequestedSizeY)
661     {
662         m_pCPSRequestedSizeY =
663             GetNewVariable(
664                 numLanes(m_SIMDSize) / 4, ISA_TYPE_F,
665                 EALIGN_OWORD, false, m_numberInstance, "CPSReqSizeY");
666     }
667     return m_pCPSRequestedSizeY;
668 }
669 
CPixelShader(llvm::Function * pFunc,CShaderProgram * pProgram)670 CPixelShader::CPixelShader(llvm::Function* pFunc, CShaderProgram* pProgram)
671     : CShader(pFunc, pProgram)
672 {
673     m_RenderTargetMask = 0;
674     m_HasoDepth = false;
675     m_HasoStencil = false;
676     m_HasoMask = false;
677     m_isPerSample = false;
678     m_HasInputCoverageMask = false;
679     m_HasPullBary = false;
680     m_HasCoarseSize = false;
681     m_HasDouble = false;
682     m_hasDualBlendSource = false;
683     m_HasDiscard = false;
684     m_IsLastPhase = false;
685     m_phase = PSPHASE_LEGACY;
686     m_Signature = nullptr;
687     m_samplerCount = 0;
688     m_ModeUsedHalf.reset();
689     m_ModeUsedFloat.reset();
690     setupLowered.clear();
691     loweredSetupIndexes.clear();
692 
693     m_BaryRegLoweredHalf.fill(nullptr);
694     m_BaryRegLoweredFloat.fill(nullptr);
695 
696     Function* coarsePhase = nullptr;
697     Function* pixelPhase = nullptr;
698     NamedMDNode* coarseNode = pFunc->getParent()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
699     NamedMDNode* pixelNode = pFunc->getParent()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
700     if (coarseNode)
701     {
702         coarsePhase = llvm::mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0));
703     }
704     if (pixelNode)
705     {
706         pixelPhase = llvm::mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
707     }
708     if (pFunc == coarsePhase)
709     {
710         m_phase = PSPHASE_COARSE;
711     }
712     if (coarsePhase && pixelPhase)
713     {
714         if (pFunc == pixelPhase)
715         {
716             m_phase = PSPHASE_PIXEL;
717             m_IsLastPhase = true;
718         }
719     }
720     else
721     {
722         m_IsLastPhase = true;
723     }
724 }
725 
~CPixelShader()726 CPixelShader::~CPixelShader()
727 {
728 }
729 
InitEncoder(SIMDMode simdMode,bool canAbortOnSpill,ShaderDispatchMode shaderMode)730 void CPixelShader::InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, ShaderDispatchMode shaderMode)
731 {
732     m_R1 = NULL;
733     m_PerspectiveBaryPlanes = nullptr;
734     m_NonPerspectiveBaryPlanes = nullptr;
735     m_PerspectivePixel = NULL;
736     m_PerspectiveCentroid = NULL;
737     m_PerspectiveSample = NULL;
738     m_NoPerspectivePixel = NULL;
739     m_NoPerspectiveCentroid = NULL;
740     m_NoPerspectiveSample = NULL;
741     m_BaryRegLoweredHalf.fill(nullptr);
742     m_BaryRegLoweredFloat.fill(nullptr);
743     m_KillPixelMask = NULL;
744     m_HasDiscard = false;
745     m_pPositionZPixel = NULL;
746     m_pPositionWPixel = NULL;
747     m_pPositionXYOffset = NULL;
748     m_pInputCoverageMask = NULL;
749     m_pCPSRequestedSizeX = NULL;
750     m_pCPSRequestedSizeY = NULL;
751     m_PixelPhasePayload = nullptr;
752     m_PixelPhaseCounter = nullptr;
753     m_SampleOffsetX = nullptr;
754     m_SampleOffsetY = nullptr;
755     m_ZWDelta = nullptr;
756     m_hasEOT = false;
757     m_NeedPSSync = false;
758     m_CoarseoMask = nullptr;
759     m_CoarseMaskInput = nullptr;
760     m_CoarseR1 = nullptr;
761 
762     m_CoarseOutput.clear();
763     m_CoarseInput.clear();
764     rtWriteList.clear();
765     setupLowered.clear();
766     loweredSetupIndexes.clear();
767     m_ModeUsedHalf.reset();
768     m_ModeUsedFloat.reset();
769     CShader::InitEncoder(simdMode, canAbortOnSpill, shaderMode);
770 }
771 
772 
FillProgram(SPixelShaderKernelProgram * pKernelProgram)773 void CShaderProgram::FillProgram(SPixelShaderKernelProgram* pKernelProgram)
774 {
775 
776     const unsigned int InstCacheSize = 0xC000;
777     CPixelShader* simd8Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD8));
778     CPixelShader* simd16Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD16));
779     CPixelShader* simd32Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD32));
780     CPixelShader* pShader = nullptr;
781     if (simd32Shader)
782     {
783         const unsigned kernelSize = simd32Shader->m_simdProgram.m_programSize;
784         const bool forceSIMD32 =
785             (this->GetContext()->getCompilerOption().forcePixelShaderSIMDMode &
786                 FLAG_PS_SIMD_MODE_FORCE_SIMD32) != 0;
787 
788         if ((!simd8Shader && !simd16Shader) ||
789             (kernelSize > 0 && (kernelSize < InstCacheSize || forceSIMD32)))
790         {
791             pKernelProgram->simd32 = *simd32Shader->ProgramOutput();
792             pShader = simd32Shader;
793             GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD32, ShaderDispatchMode::NOT_APPLICABLE);
794         }
795         else if (kernelSize > 0 && (kernelSize < InstCacheSize))
796         {
797             GetContext()->SetSIMDInfo(SIMD_SKIP_PERF, SIMDMode::SIMD32, ShaderDispatchMode::NOT_APPLICABLE);
798         }
799     }
800     if (simd16Shader)
801     {
802         if (!simd8Shader ||
803             (simd16Shader->m_simdProgram.m_programSize > 0))
804         {
805             pKernelProgram->simd16 = *simd16Shader->ProgramOutput();
806             pShader = simd16Shader;
807             GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD16, ShaderDispatchMode::NOT_APPLICABLE);
808         }
809     }
810     {
811         if (simd8Shader && simd8Shader->m_simdProgram.m_programSize > 0)
812         {
813             pKernelProgram->simd8 = *simd8Shader->ProgramOutput();
814             pShader = simd8Shader;
815             GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD8, ShaderDispatchMode::NOT_APPLICABLE);
816         }
817     }
818 
819     if (pShader)
820     {
821         pShader->FillProgram(pKernelProgram);
822     }
823     pKernelProgram->SIMDInfo = GetContext()->GetSIMDInfo();
824 }
825 
FillProgram(SPixelShaderKernelProgram * pKernelProgram)826 void CPixelShader::FillProgram(SPixelShaderKernelProgram* pKernelProgram)
827 {
828     const PixelShaderInfo& psInfo = GetContext()->getModuleMetaData()->psInfo;
829 
830     pKernelProgram->blendToFillEnabled = psInfo.blendToFillEnabled;
831     pKernelProgram->forceEarlyZ = psInfo.forceEarlyZ;
832 
833     pKernelProgram->isCoarsePS = m_phase == PSPHASE_COARSE;
834     pKernelProgram->hasCoarsePixelSize = m_HasCoarseSize;
835     pKernelProgram->hasSampleOffset = m_SampleOffsetX || m_SampleOffsetY;
836     pKernelProgram->hasZWDelta = m_ZWDelta;
837     pKernelProgram->needPerspectiveBaryPlane = m_PerspectiveBaryPlanes ? true : false;;
838     pKernelProgram->needNonPerspectiveBaryPlane = m_NonPerspectiveBaryPlanes ? true : false;;
839     pKernelProgram->ConstantBufferLoaded = m_constantBufferLoaded;
840     pKernelProgram->UavLoaded = m_uavLoaded;
841     for (int i = 0; i < 4; i++)
842     {
843         pKernelProgram->ShaderResourceLoaded[i] = m_shaderResourceLoaded[i];
844     }
845     pKernelProgram->RenderTargetLoaded = m_renderTargetLoaded;
846 
847     pKernelProgram->hasControlFlow = m_numBlocks > 1 ? true : false;
848     pKernelProgram->MaxNumberOfThreads = m_Platform->getMaxPixelShaderThreads() / GetShaderThreadUsageRate();
849     pKernelProgram->needPerspectiveBary = m_PerspectivePixel ? true : false;
850     pKernelProgram->needPerspectiveCentroidBary = m_PerspectiveCentroid ? true : false;
851     pKernelProgram->needPerspectiveSampleBary = m_PerspectiveSample ? true : false;
852     pKernelProgram->needNonPerspectiveBary = m_NoPerspectivePixel ? true : false;
853     pKernelProgram->needNonPerspectiveCentroidBary = m_NoPerspectiveCentroid ? true : false;
854     pKernelProgram->needNonPerspectiveSampleBary = m_NoPerspectiveSample ? true : false;
855     pKernelProgram->killPixel = m_HasDiscard;
856     pKernelProgram->needSourceDepth = m_pPositionZPixel != nullptr;
857     pKernelProgram->needSourceW = m_pPositionWPixel != nullptr;
858     pKernelProgram->outputDepth = m_HasoDepth;
859     pKernelProgram->oMask = m_HasoMask;
860     pKernelProgram->outputStencil = m_HasoStencil;
861     pKernelProgram->sampleCmpToDiscardOptimizationPossible = GetContext()->m_instrTypes.sampleCmpToDiscardOptimizationPossible;
862     pKernelProgram->sampleCmpToDiscardOptimizationSlot = GetContext()->m_instrTypes.sampleCmpToDiscardOptimizationSlot;
863     pKernelProgram->needPSSync = m_NeedPSSync;
864     pKernelProgram->hasInputCoverageMask = m_HasInputCoverageMask;
865     pKernelProgram->hasPullBary = m_HasPullBary;
866     pKernelProgram->isPerSample = IsPerSample();
867     if (NamedMDNode * primIdNod = entry->getParent()->getNamedMetadata("PrimIdLocation"))
868     {
869         pKernelProgram->primIdLocation = int_cast<uint>(
870             mdconst::dyn_extract<ConstantInt>(primIdNod->getOperand(0)->getOperand(0))->getZExtValue());
871         pKernelProgram->hasPrimID = true;
872     }
873     if (NamedMDNode* PointCoordNod = entry->getParent()->getNamedMetadata("PointCoordLocation"))
874     {
875         pKernelProgram->pointCoordLocation = int_cast<uint>(
876             mdconst::dyn_extract<ConstantInt>(PointCoordNod->getOperand(0)->getOperand(0))->getZExtValue());
877         pKernelProgram->hasPointCoord = true;
878     }
879 
880     pKernelProgram->posXYOffsetEnable = m_pPositionXYOffset ? true : false;
881     pKernelProgram->VectorMask = m_VectorMask;
882     pKernelProgram->samplerCount = GetSamplerCount(m_samplerCount);
883     pKernelProgram->renderTargetMask = m_RenderTargetMask;
884     pKernelProgram->constantInterpolationEnableMask = m_ConstantInterpolationMask;
885     pKernelProgram->NOSBufferSize = m_NOSBufferSize / getMinPushConstantBufferAlignmentInBytes();
886     pKernelProgram->isMessageTargetDataCacheDataPort = isMessageTargetDataCacheDataPort;
887 
888 
889     CreateGatherMap();
890     CreateConstantBufferOutput(pKernelProgram);
891 
892     pKernelProgram->bindingTableEntryCount = this->GetMaxUsedBindingTableEntryCount();
893     pKernelProgram->BindingTableEntryBitmap = this->GetBindingTableEntryBitmap();
894 
895     // PS packed attributes
896     for (uint attribute = 0; attribute <= m_MaxSetupIndex / 4; ++attribute)
897     {
898         pKernelProgram->attributeActiveComponent[attribute] = GetActiveComponents(attribute);
899 
900         const bool useComponent = pKernelProgram->attributeActiveComponent[attribute] !=
901             USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED;
902         if (useComponent)
903         {
904             pKernelProgram->nbOfSFOutput = attribute + 1;
905         }
906     }
907 
908     for (unsigned i = 0; i < USC::NUM_PSHADER_OUTPUT_REGISTERS; i++)
909     {
910         pKernelProgram->OutputUseMask[i] = (unsigned char)psInfo.colorOutputMask[i];
911     }
912 }
913 
PreCompile()914 void CPixelShader::PreCompile()
915 {
916     CreateImplicitArgs();
917     CodeGenContext* ctx = GetContext();
918 
919     const uint8_t numberInstance = m_numberInstance;
920     const bool isR1Available = true;
921 
922     if (isR1Available)
923     {
924         m_R1 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, numberInstance, "R1");
925     }
926 
927     // make sure the return block is properly set
928     if (ctx->getModule()->getNamedMetadata("KillPixel"))
929     {
930         m_HasDiscard = true;
931     }
932 
933     setup.resize(4 * g_c_Max_PS_attributes, nullptr);
934     if (LowerPSInput())
935     {
936         setupLowered.resize(4 * g_c_Max_PS_attributes, nullptr);
937     }
938 }
939 
ParseShaderSpecificOpcode(llvm::Instruction * inst)940 void CPixelShader::ParseShaderSpecificOpcode(llvm::Instruction* inst)
941 {
942     // temporary workaround to disable SIMD16 when double are present
943     if (inst->getType()->isDoubleTy())
944     {
945         m_HasDouble = true;
946     }
947     if (GenIntrinsicInst * genIntr = dyn_cast<GenIntrinsicInst>(inst))
948     {
949         uint setupIndex;
950         switch (genIntr->getIntrinsicID())
951         {
952         case GenISAIntrinsic::GenISA_RenderTargetRead:
953             m_NeedPSSync = true;
954             if (GetPhase() == PSPHASE_LEGACY)
955             {
956                 m_isPerSample = true;
957             }
958             break;
959         case GenISAIntrinsic::GenISA_uavSerializeAll:
960         case GenISAIntrinsic::GenISA_uavSerializeOnResID:
961             m_NeedPSSync = true;
962             break;
963         case GenISAIntrinsic::GenISA_RTDualBlendSource:
964             m_hasDualBlendSource = true;
965             AddRenderTarget(
966                 cast<RTDualBlendSourceIntrinsic>(genIntr)->getRTIndexImm());
967             break;
968         case GenISAIntrinsic::GenISA_RTWrite:
969         {
970             RTWritIntrinsic* rt = cast<RTWritIntrinsic>(genIntr);
971             if (rt->getRTIndexImm() != -1)
972             {
973                 AddRenderTarget(rt->getRTIndexImm());
974             }
975             if (rt->hasStencil())
976             {
977                 OutputStencil();
978             }
979             break;
980         }
981         case GenISAIntrinsic::GenISA_DCL_ShaderInputVec:
982         case GenISAIntrinsic::GenISA_DCL_inputVec:
983         {
984             IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(0)));
985             IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(1)));
986             setupIndex = int_cast<uint>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
987             m_MaxSetupIndex = std::max(setupIndex, m_MaxSetupIndex);
988             // attribute packing
989             m_SetupIndicesUsed.insert(setupIndex);
990 
991             e_interpolation mode = static_cast<e_interpolation>(llvm::cast<llvm::ConstantInt>(inst->getOperand(1))->getZExtValue());
992             if (mode != EINTERPOLATION_CONSTANT)
993             {
994                 {
995                     if (inst->getType()->isHalfTy())
996                     {
997                         loweredSetupIndexes.insert(setupIndex);
998                         m_ModeUsedHalf.set(mode);
999                     }
1000                     else
1001                     {
1002                         m_ModeUsedFloat.set(mode);
1003                     }
1004                 }
1005             }
1006             break;
1007         }
1008         case GenISAIntrinsic::GenISA_PullSampleIndexBarys:
1009         case GenISAIntrinsic::GenISA_PullSnappedBarys:
1010         case GenISAIntrinsic::GenISA_PullCentroidBarys:
1011             m_HasPullBary = true;
1012             break;
1013         case GenISAIntrinsic::GenISA_Interpolate:
1014             IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(0)));
1015             setupIndex = int_cast<uint>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
1016             m_MaxSetupIndex = std::max(setupIndex, m_MaxSetupIndex);
1017             // attribute packing
1018             m_SetupIndicesUsed.insert(setupIndex);
1019             break;
1020         default:
1021             break;
1022         }
1023     }
1024 }
1025 
AddRenderTarget(uint RTIndex)1026 void CPixelShader::AddRenderTarget(uint RTIndex)
1027 {
1028     m_RenderTargetMask |= 1 << RTIndex;
1029 }
1030 
DeclareSGV(uint usage)1031 void CPixelShader::DeclareSGV(uint usage)
1032 {
1033     switch (usage)
1034     {
1035     case POSITION_X:
1036     case POSITION_Y:
1037         break;
1038     case POSITION_Z:
1039         break;
1040     case POSITION_W:
1041         break;
1042     case VFACE:
1043         break;
1044     case INPUT_COVERAGE_MASK:
1045         m_HasInputCoverageMask = true;
1046         break;
1047     case SAMPLEINDEX:
1048         m_isPerSample = true;
1049         break;
1050     case REQUESTED_COARSE_SIZE_X:
1051     case REQUESTED_COARSE_SIZE_Y:
1052         m_HasCoarseSize = true;
1053         break;
1054     default:
1055         break;
1056         // nothing to do
1057     }
1058 }
1059 
PullPixelPhasePayload()1060 void CPixelShader::PullPixelPhasePayload()
1061 {
1062     CVariable* payload = nullptr;
1063     bool oMask = false;
1064     if (GetDispatchSignature().CoarseMask)
1065     {
1066         payload = GetCoarseMask();
1067         oMask = true;
1068     }
1069     else
1070     {
1071         payload = GetNewVariable(8, ISA_TYPE_D, EALIGN_GRF, CName::NONE);
1072     }
1073     uint messageDescriptor = PIPullPixelPayload(
1074         m_SIMDSize == SIMDMode::SIMD8 ? EU_PI_MESSAGE_SIMD8 : EU_PI_MESSAGE_SIMD16,
1075         m_PixelPhasePayload->GetSize() / getGRFSize(),
1076         payload->GetSize() / getGRFSize(),
1077         false,
1078         false,
1079         false,
1080         false,
1081         false,
1082         oMask);
1083 
1084     CVariable* desc = ImmToVariable(messageDescriptor, ISA_TYPE_UD);
1085     // save the current phase counter as it is needed by the RT write
1086     m_CurrentPhaseCounter = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, "CurrPhaseCounter");
1087     encoder.SetSrcRegion(0, 0, 1, 0);
1088     encoder.Shl(m_CurrentPhaseCounter, m_PixelPhaseCounter, ImmToVariable(0x10, ISA_TYPE_UW));
1089     encoder.Push();
1090     CVariable* nextPhase = GetNewVariable(1, ISA_TYPE_UW, EALIGN_DWORD, true, "NextPhase");
1091     encoder.SetSrcRegion(0, 0, 1, 0);
1092     encoder.Shl(nextPhase, m_PixelPhaseCounter, ImmToVariable(8, ISA_TYPE_D));
1093     encoder.Push();
1094     CVariable* a0 = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
1095     encoder.Or(a0, nextPhase, desc);
1096     encoder.Push();
1097     encoder.SetNoMask();
1098     encoder.Send(m_PixelPhasePayload, payload, EU_GEN7_MESSAGE_TARGET_PIXEL_INTERPOLATOR, a0);
1099     encoder.Push();
1100     CVariable* mask = BitCast(m_PixelPhasePayload, ISA_TYPE_UW);
1101     CVariable* f0 = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_DWORD, true, CName::NONE);
1102     encoder.SetSrcSubReg(0, 14);
1103     encoder.SetSrcRegion(0, 0, 1, 0);
1104     encoder.Cmp(EPREDICATE_EQ, f0, mask, ImmToVariable(0, ISA_TYPE_UW));
1105     encoder.Push();
1106     m_epilogueLabel = encoder.GetNewLabelID("epilogue");
1107     encoder.Jump(f0, m_epilogueLabel);
1108     encoder.Push();
1109     // override the execution mask in sr0.2
1110     encoder.SetSrcSubReg(0, 14);
1111     encoder.SetDstSubReg(2);
1112     encoder.SetSrcRegion(0, 0, 1, 0);
1113     encoder.Cast(GetSR0(), mask);
1114     encoder.Push();
1115 }
1116 
AddPrologue()1117 void CPixelShader::AddPrologue()
1118 {
1119     if (m_phase == PSPHASE_PIXEL)
1120     {
1121         uint responseLength = 2;
1122         m_CoarseR1 = GetR1();
1123         m_PixelPhasePayload =
1124             GetNewVariable(responseLength * (getGRFSize() >> 2),
1125                 ISA_TYPE_D, EALIGN_GRF, "PixelPhasePayload");
1126         m_PixelPhaseCounter = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_UW, 0, 1);
1127         m_CoarseParentIndex = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_UW, getGRFSize(), numLanes(m_SIMDSize));
1128         m_R1 = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_D, 0, getGRFSize() / SIZE_DWORD);
1129         encoder.SetNoMask();
1130         encoder.SetSimdSize(SIMDMode::SIMD1);
1131         encoder.Copy(m_PixelPhaseCounter, ImmToVariable(0, ISA_TYPE_UW));
1132         encoder.Push();
1133         m_pixelPhaseLabel = encoder.GetNewLabelID("pixel_phase");
1134         encoder.Label(m_pixelPhaseLabel);
1135         encoder.Push();
1136         PullPixelPhasePayload();
1137     }
1138     {
1139         emitPSInputLowering();
1140     }
1141 }
1142 
PreAnalysisPass()1143 void CPixelShader::PreAnalysisPass()
1144 {
1145     m_VectorMask = m_CG->NeedVMask();
1146     CShader::PreAnalysisPass();
1147 }
1148 
AddEpilogue(llvm::ReturnInst * ret)1149 void CPixelShader::AddEpilogue(llvm::ReturnInst* ret)
1150 {
1151     if (!IsLastPhase() && m_KillPixelMask)
1152     {
1153         if (!m_CoarseoMask)
1154         {
1155             m_CoarseoMask = GetNewVariable(
1156                 numLanes(m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF, "CoarseOMask");
1157             encoder.Copy(m_CoarseoMask, ImmToVariable(0xFFFFFFFF, ISA_TYPE_UD));
1158             encoder.Push();
1159         }
1160         encoder.SetPredicate(m_KillPixelMask);
1161         encoder.Copy(m_CoarseoMask, ImmToVariable(0x0, ISA_TYPE_UD));
1162         encoder.Push();
1163     }
1164     if (m_phase == PSPHASE_PIXEL)
1165     {
1166         encoder.Label(m_epilogueLabel);
1167         encoder.Push();
1168         // next phase index is in the first dword of the payload
1169         CVariable* flag = GetNewVariable(
1170             numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_BYTE, true, CName::NONE);
1171         encoder.SetSrcRegion(0, 0, 1, 0);
1172         encoder.Cmp(EPREDICATE_NE, flag, m_PixelPhaseCounter, ImmToVariable(0, ISA_TYPE_UW));
1173         encoder.Push();
1174         encoder.Jump(flag, m_pixelPhaseLabel);
1175         encoder.Push();
1176         const bool isPerCoarse = true;
1177         EOTRenderTarget(GetR1(), isPerCoarse);
1178         m_hasEOT = true;
1179     }
1180     if (IsLastPhase())
1181     {
1182         CShader::AddEpilogue(ret);
1183     }
1184 }
1185 
AddCoarseOutput(CVariable * output,unsigned int index)1186 void CPixelShader::AddCoarseOutput(CVariable* output, unsigned int index)
1187 {
1188     IGC_ASSERT(m_CoarseOutput.find(index) == m_CoarseOutput.end());
1189     m_CoarseOutput[index] = output;
1190 }
1191 
GetCoarseInput(unsigned int index,uint16_t vectorSize,VISA_Type type)1192 CVariable* CPixelShader::GetCoarseInput(unsigned int index, uint16_t vectorSize, VISA_Type type)
1193 {
1194     auto it = m_CoarseInput.find(index);
1195     CVariable* coarseInput = nullptr;
1196     if (it == m_CoarseInput.end())
1197     {
1198         coarseInput = GetNewVariable(
1199             numLanes(m_SIMDSize) * vectorSize, type, EALIGN_GRF, "CoarseInput");
1200         m_CoarseInput[index] = coarseInput;
1201     }
1202     else
1203     {
1204         coarseInput = it->second;
1205     }
1206     return coarseInput;
1207 }
1208 
SetCoarseoMask(CVariable * oMask)1209 void CPixelShader::SetCoarseoMask(CVariable* oMask)
1210 {
1211     m_CoarseoMask = oMask;
1212 }
1213 
GetCoarseMask()1214 CVariable* CPixelShader::GetCoarseMask()
1215 {
1216     if (m_CoarseMaskInput == nullptr)
1217     {
1218         m_CoarseMaskInput = GetNewVariable(
1219             numLanes(m_SIMDSize), ISA_TYPE_F, EALIGN_GRF, "CoarseMaskInput");
1220     }
1221     return m_CoarseMaskInput;
1222 }
1223 
GetCoarseParentIndex()1224 CVariable* CPixelShader::GetCoarseParentIndex()
1225 {
1226     return m_CoarseParentIndex;
1227 }
1228 
GetCurrentPhaseCounter()1229 CVariable* CPixelShader::GetCurrentPhaseCounter()
1230 {
1231     return m_CurrentPhaseCounter;
1232 }
1233 
1234 
CompileSIMDSize(SIMDMode simdMode,EmitPass & EP,llvm::Function & F)1235 bool CPixelShader::CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
1236 {
1237     if (!CompileSIMDSizeInCommon(simdMode))
1238         return false;
1239 
1240 
1241     CodeGenContext* ctx = GetContext();
1242     if (!ctx->m_retryManager.IsFirstTry())
1243     {
1244         ctx->ClearSIMDInfo(simdMode, EP.m_ShaderDispatchMode);
1245         ctx->SetSIMDInfo(SIMD_RETRY, simdMode, EP.m_ShaderDispatchMode);
1246     }
1247 
1248     bool forceSIMD32 =
1249         (ctx->getCompilerOption().forcePixelShaderSIMDMode &
1250             FLAG_PS_SIMD_MODE_FORCE_SIMD32) != 0;
1251     bool forceSIMD16 =
1252         (ctx->getCompilerOption().forcePixelShaderSIMDMode &
1253             FLAG_PS_SIMD_MODE_FORCE_SIMD16) != 0;
1254 
1255     // For staged compilation, we try to avoid duplicated compilation for the same SIMD mode
1256     if ((simdMode == SIMDMode::SIMD8  && AvoidDupStage2(8 , ctx->m_CgFlag, ctx->m_StagingCtx)) ||
1257         (simdMode == SIMDMode::SIMD16 && AvoidDupStage2(16, ctx->m_CgFlag, ctx->m_StagingCtx)))
1258     {
1259         return false;
1260     }
1261 
1262     if (ctx->PsHighSimdDisable)
1263     {
1264         if (simdMode == SIMDMode::SIMD32)
1265             return false;
1266     }
1267 
1268     if (m_HasoStencil && !ctx->platform.supportsStencil(simdMode))
1269     {
1270         ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1271         return false;
1272     }
1273     if (m_HasDouble && simdMode != SIMDMode::SIMD8)
1274     {
1275         ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1276         return false;
1277     }
1278     if (m_hasDualBlendSource && simdMode != SIMDMode::SIMD8 &&
1279         (m_phase == PSPHASE_PIXEL || ((m_phase != PSPHASE_LEGACY) && (ctx->platform.getWATable().Wa_1409392000 || ctx->platform.getPlatformInfo().eProductFamily == IGFX_ICELAKE))))
1280     {
1281         // Spec restriction CPS multi-phase cannot use SIMD16 with dual source blending
1282         ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1283         return false;
1284     }
1285     if (m_phase != PSPHASE_LEGACY &&
1286         simdMode == SIMDMode::SIMD32)
1287     {
1288         ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1289         return false;
1290     }
1291 
1292     if (GetContext()->platform.hasFusedEU() &&
1293         simdMode == SIMDMode::SIMD32 &&
1294         IsPerSample() && !IsStage1(ctx))
1295     {
1296         //Fused SIMD32 not enabled when dispatch rate is per sample
1297         ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1298         return false;
1299     }
1300 
1301     if (simdMode == SIMDMode::SIMD16 && EP.m_ShaderDispatchMode == ShaderDispatchMode::NOT_APPLICABLE)
1302     {
1303         if (IsStage1BestPerf(ctx->m_CgFlag, ctx->m_StagingCtx))
1304         {
1305             return true;
1306         }
1307         if (DoSimd16Stage2(ctx->m_StagingCtx))
1308         {
1309             return true;
1310         }
1311 
1312         if (IGC_IS_FLAG_ENABLED(ForceBestSIMD))
1313         {
1314             return true;
1315         }
1316 
1317         if (forceSIMD16)
1318         {
1319             return true;
1320         }
1321         CShader* simd8Program = m_parent->GetShader(SIMDMode::SIMD8);
1322         if (simd8Program != nullptr && simd8Program->ProgramOutput()->m_scratchSpaceUsedBySpills > 0)
1323         {
1324             ctx->SetSIMDInfo(SIMD_SKIP_REGPRES, simdMode, EP.m_ShaderDispatchMode);
1325             return false;
1326         }
1327     }
1328     if (simdMode == SIMDMode::SIMD32)
1329     {
1330         if (DoSimd32Stage2(ctx->m_StagingCtx))
1331         {
1332             return true;
1333         }
1334 
1335         if (forceSIMD32)
1336         {
1337             return true;
1338         }
1339 
1340         CShader* simd16Program = m_parent->GetShader(SIMDMode::SIMD16);
1341         if ((simd16Program == nullptr ||
1342             simd16Program->ProgramOutput()->m_programBin == 0 ||
1343             simd16Program->ProgramOutput()->m_scratchSpaceUsedBySpills > 0))
1344         {
1345             ctx->SetSIMDInfo(SIMD_SKIP_REGPRES, simdMode, EP.m_ShaderDispatchMode);
1346             return false;
1347         }
1348 
1349         const PixelShaderInfo& psInfo = ctx->getModuleMetaData()->psInfo;
1350 
1351         // Disable simd32 compilation on platforms that do not support per-pixel
1352         // dispatch with num samples == 16.
1353         if (psInfo.NumSamples == 16 &&
1354             !ctx->platform.supportSimd32PerPixelPSWithNumSamples16() &&
1355             !IsPerSample())
1356         {
1357             return false;
1358         }
1359 
1360         if (psInfo.ForceEnableSimd32) // UMD forced compilation of simd32.
1361         {
1362             return true;
1363         }
1364 
1365         if (!ctx->platform.enablePSsimd32())
1366         {
1367             ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1368             return false;
1369         }
1370 
1371         if (iSTD::BitCount(m_RenderTargetMask) > 1)
1372         {
1373             // don't compile SIMD32 for MRT as we may trash the render cache
1374             ctx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, EP.m_ShaderDispatchMode);
1375             return false;
1376         }
1377 
1378         Simd32ProfitabilityAnalysis& PA = EP.getAnalysis<Simd32ProfitabilityAnalysis>();
1379         if (PA.isSimd32Profitable())
1380         {
1381             return true;
1382         }
1383         else
1384         {
1385             ctx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, EP.m_ShaderDispatchMode);
1386         }
1387 
1388         if (simd16Program && static_cast<CPixelShader*>(simd16Program)->m_sendStallCycle == 0)
1389         {
1390             // simd16 doesn't have any latency issue, no need to try simd32
1391             ctx->SetSIMDInfo(SIMD_SKIP_STALL, simdMode, EP.m_ShaderDispatchMode);
1392             return false;
1393         }
1394 
1395         if (ctx->platform.psSimd32SkipStallHeuristic() && ctx->m_DriverInfo.AlwaysEnableSimd32())
1396         {
1397             return true;
1398         }
1399 
1400         if (simd16Program)
1401         {
1402             uint sendStallCycle = static_cast<CPixelShader*>(simd16Program)->m_sendStallCycle;
1403             uint staticCycle = static_cast<CPixelShader*>(simd16Program)->m_staticCycle;
1404             if (sendStallCycle / (float)staticCycle > 0.4)
1405             {
1406                 return true;
1407             }
1408             else
1409             {
1410                 ctx->SetSIMDInfo(SIMD_SKIP_STALL, simdMode, EP.m_ShaderDispatchMode);
1411             }
1412         }
1413         return false;
1414     }
1415     return true;
1416 }
1417 
linkProgram(const SProgramOutput & cps,const SProgramOutput & ps,SProgramOutput & linked)1418 void linkProgram(const SProgramOutput& cps, const SProgramOutput& ps, SProgramOutput& linked)
1419 {
1420     linked.m_unpaddedProgramSize =
1421         cps.m_unpaddedProgramSize + ps.m_unpaddedProgramSize;
1422     linked.m_scratchSpaceUsedByShader =
1423         cps.m_scratchSpaceUsedByShader + ps.m_scratchSpaceUsedByShader;
1424     linked.m_scratchSpaceUsedBySpills =
1425         cps.m_scratchSpaceUsedBySpills + ps.m_scratchSpaceUsedBySpills;
1426     linked.m_scratchSpaceUsedByGtpin =
1427         cps.m_scratchSpaceUsedByGtpin + ps.m_scratchSpaceUsedByGtpin;
1428     linked.m_programSize = iSTD::Align(linked.m_unpaddedProgramSize, 64);
1429     linked.m_programBin = IGC::aligned_malloc(linked.m_programSize, 16);
1430     // Copy coarse phase
1431     memcpy_s(linked.m_programBin,
1432         cps.m_unpaddedProgramSize,
1433         cps.m_programBin,
1434         cps.m_unpaddedProgramSize);
1435     // Copy pixel phase
1436     memcpy_s((char*)linked.m_programBin + cps.m_unpaddedProgramSize,
1437         ps.m_unpaddedProgramSize,
1438         ps.m_programBin,
1439         ps.m_unpaddedProgramSize);
1440     memset((char*)linked.m_programBin + linked.m_unpaddedProgramSize,
1441         0,
1442         linked.m_programSize - linked.m_unpaddedProgramSize);
1443 }
1444 
linkCPS(SPixelShaderKernelProgram * output,SPixelShaderKernelProgram & linked,unsigned int numberPhases)1445 void linkCPS(SPixelShaderKernelProgram* output, SPixelShaderKernelProgram& linked, unsigned int numberPhases)
1446 {
1447     SPixelShaderKernelProgram CoarsePhaseOutput = output[0];
1448     SPixelShaderKernelProgram PixelPhaseOutput = output[1];
1449     linked = output[0];
1450 
1451     if (CoarsePhaseOutput.simd16.m_scratchSpaceUsedBySpills == 0 &&
1452         CoarsePhaseOutput.simd16.m_programBin != nullptr &&
1453         PixelPhaseOutput.simd16.m_scratchSpaceUsedBySpills == 0 &&
1454         PixelPhaseOutput.simd16.m_programBin != nullptr)
1455     {
1456         linkProgram(CoarsePhaseOutput.simd16, PixelPhaseOutput.simd16, linked.simd16);
1457     }
1458     else
1459     {
1460         linked.simd16.m_programBin = nullptr;
1461         linked.simd16.m_programSize = 0;
1462     }
1463     linkProgram(CoarsePhaseOutput.simd8, PixelPhaseOutput.simd8, linked.simd8);
1464     linked.hasPullBary = true;
1465     linked.renderTargetMask = (CoarsePhaseOutput.renderTargetMask || PixelPhaseOutput.renderTargetMask);
1466     IGC_ASSERT_MESSAGE(numberPhases == 2, "maximum number of phases is 2");
1467 }
1468 
CodeGen(PixelShaderContext * ctx)1469 void CodeGen(PixelShaderContext* ctx)
1470 {
1471     Function* coarsePhase = nullptr;
1472     Function* pixelPhase = nullptr;
1473     NamedMDNode* coarseNode = nullptr;
1474     NamedMDNode* pixelNode = nullptr;
1475     MetaDataUtils* pMdUtils = nullptr;
1476     if (!HasSavedIR(ctx))
1477     {
1478         coarseNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
1479         pixelNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
1480         if (coarseNode)
1481         {
1482             coarsePhase = mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0));
1483         }
1484         if (pixelNode)
1485         {
1486             pixelPhase = mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1487         }
1488         pMdUtils = ctx->getMetaDataUtils();
1489     }
1490 
1491     bool codegenDone = false;
1492 
1493     CShaderProgram::KernelShaderMap coarseShaders;
1494     CShaderProgram::KernelShaderMap pixelShaders;
1495 
1496     if (coarsePhase && pixelPhase)
1497     {
1498         // Cancelling staged compilation for multi stage PS.
1499         ctx->m_CgFlag = FLAG_CG_ALL_SIMDS;
1500 
1501         //Multi stage PS, need to do separate compiler and link them
1502         unsigned int numStage = 2;
1503         PSSignature signature;
1504         FunctionInfoMetaDataHandle coarseFI, pixelFI;
1505         coarseFI = pMdUtils->getFunctionsInfoItem(coarsePhase);
1506         pixelFI = pMdUtils->getFunctionsInfoItem(pixelPhase);
1507 
1508         for (unsigned int i = 0; i < numStage; i++)
1509         {
1510             Function* phaseFunc = (i == 0) ? coarsePhase : pixelPhase;
1511             FunctionInfoMetaDataHandle phaseFI = (i == 0) ? coarseFI : pixelFI;
1512             CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1513 
1514             pMdUtils->clearFunctionsInfo();
1515             pMdUtils->setFunctionsInfoItem(phaseFunc, phaseFI);
1516             pMdUtils->save(phaseFunc->getContext());
1517             CodeGen(ctx, shaders, &signature);
1518 
1519             // Read the phase function from metadata again as it could be changed in the PushAnalysis pass
1520             if (i == 0)
1521             {
1522                 coarseNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
1523             }
1524             else
1525             {
1526                 pixelNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
1527             }
1528         }
1529 
1530         codegenDone = true;
1531 
1532 
1533         for (unsigned int i = 0; i < numStage; i++)
1534         {
1535             Function* phaseFunc = (i == 0) ?
1536                 mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0)) :
1537                 mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1538 
1539             CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1540             CPixelShader* simd8Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD8));
1541             CPixelShader* simd16Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD16));
1542             CPixelShader* simd32Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD32));
1543             if (!((simd8Shader && simd8Shader->ProgramOutput()->m_programBin) ||
1544                 (simd16Shader && simd16Shader->ProgramOutput()->m_programBin) ||
1545                 (simd32Shader && simd32Shader->ProgramOutput()->m_programBin)
1546                 ))
1547             {
1548                 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD8);
1549                 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD16);
1550                 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD32);
1551                 if (i == 0)
1552                 {
1553                     delete shaders[coarsePhase];
1554                     coarsePhase = nullptr;
1555                 }
1556                 else
1557                 {
1558                     delete shaders[pixelPhase];
1559                     pixelPhase = nullptr;
1560                 }
1561             }
1562         }
1563 
1564         if (coarsePhase && pixelPhase)
1565         {
1566             SPixelShaderKernelProgram outputs[2];
1567             memset(&outputs, 0, 2 * sizeof(SPixelShaderKernelProgram));
1568 
1569             for (unsigned int i = 0; i < numStage; i++)
1570             {
1571                 Function* phaseFunc = (i == 0) ?
1572                     mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0)) :
1573                     mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1574 
1575                 CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1576 
1577                 shaders[phaseFunc]->FillProgram(&outputs[i]);
1578                 COMPILER_SHADER_STATS_PRINT(shaders[phaseFunc]->m_shaderStats, ShaderType::PIXEL_SHADER, ctx->hash, "");
1579                 COMPILER_SHADER_STATS_SUM(ctx->m_sumShaderStats, shaders[phaseFunc]->m_shaderStats, ShaderType::PIXEL_SHADER);
1580                 COMPILER_SHADER_STATS_DEL(shaders[phaseFunc]->m_shaderStats);
1581                 delete shaders[phaseFunc];
1582             }
1583 
1584             linkCPS(outputs, ctx->programOutput, numStage);
1585             // Kernels allocated in CISABuilder.cpp (Compile())
1586             // are freed in CompilerOutputOGL.hpp (DeleteShaderCompilerOutputOGL())
1587             // in case of CPS multistage PS they are separated.
1588             // Need to free original kernels here as DeleteShaderCompilerOutputOGL()
1589             // will clear new allocations for separated phases in this case.
1590             for (unsigned int i = 0; i < numStage; i++)
1591             {
1592                 outputs[i].simd8.Destroy();
1593                 outputs[i].simd16.Destroy();
1594                 outputs[i].simd32.Destroy();
1595             }
1596         }
1597     }
1598 
1599     if (!(coarsePhase && pixelPhase))
1600     {
1601         CShaderProgram::KernelShaderMap shaders;
1602         Function* pFunc = nullptr;
1603 
1604         if (!codegenDone)
1605         {
1606             // Single PS
1607             CodeGen(ctx, shaders);
1608             pFunc = getUniqueEntryFunc(ctx->getMetaDataUtils(), ctx->getModuleMetaData());
1609         }
1610         else
1611         {
1612             shaders = coarsePhase ? coarseShaders : pixelShaders;
1613             pFunc = coarsePhase ? coarsePhase : pixelPhase;
1614         }
1615 
1616         // gather data to send back to the driver
1617         shaders[pFunc]->FillProgram(&ctx->programOutput);
1618         COMPILER_SHADER_STATS_PRINT(shaders[pFunc]->m_shaderStats, ShaderType::PIXEL_SHADER, ctx->hash, "");
1619         COMPILER_SHADER_STATS_SUM(ctx->m_sumShaderStats, shaders[pFunc]->m_shaderStats, ShaderType::PIXEL_SHADER);
1620         COMPILER_SHADER_STATS_DEL(shaders[pFunc]->m_shaderStats);
1621         delete shaders[pFunc];
1622     }
1623 
1624 }
1625 
CreatePassThroughVar()1626 void CPixelShader::CreatePassThroughVar()
1627 {
1628     CodeGenContext* ctx = GetContext();
1629     NamedMDNode* pixelNode = ctx->getModule()->getNamedMetadata("pixel_phase");
1630     if (!pixelNode)
1631     {
1632         // if there is no pixel phase we have nothing to do
1633         return;
1634     }
1635     IGC_ASSERT(nullptr != GetR1());
1636     encoder.MarkAsOutput(GetR1());
1637     Function* pixelPhase = mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1638     for (auto BB = pixelPhase->begin(), BE = pixelPhase->end(); BB != BE; ++BB)
1639     {
1640         llvm::BasicBlock* pLLVMBB = &(*BB);
1641         llvm::BasicBlock::InstListType& instructionList = pLLVMBB->getInstList();
1642         for (auto I = instructionList.begin(), E = instructionList.end(); I != E; ++I)
1643         {
1644             if (GenIntrinsicInst * intr = dyn_cast<GenIntrinsicInst>(I))
1645             {
1646                 GenISAIntrinsic::ID IID = intr->getIntrinsicID();
1647                 if (IID == GenISAIntrinsic::GenISA_DCL_inputVec)
1648                 {
1649                     unsigned int setupIndex =
1650                         (uint)llvm::cast<llvm::ConstantInt>(intr->getOperand(0))->getZExtValue();
1651                     CVariable* input = GetInputDelta(setupIndex);
1652                     encoder.MarkAsOutput(input);
1653                 }
1654                 else if (IID == GenISAIntrinsic::GenISA_SampleOffsetX)
1655                 {
1656                     CVariable* offset = GetSampleOffsetX();
1657                     encoder.MarkAsOutput(offset);
1658                 }
1659                 else if (IID == GenISAIntrinsic::GenISA_SampleOffsetY)
1660                 {
1661                     CVariable* offset = GetSampleOffsetY();
1662                     encoder.MarkAsOutput(offset);
1663                 }
1664                 else if (IID == GenISAIntrinsic::GenISA_DCL_SystemValue)
1665                 {
1666                     SGVUsage usage = (SGVUsage)llvm::cast<llvm::ConstantInt>(intr->getOperand(0))->getZExtValue();
1667                     if (usage == POSITION_Z || usage == POSITION_W)
1668                     {
1669                         CVariable* deltas = GetZWDelta();
1670                         encoder.MarkAsOutput(deltas);
1671                     }
1672                 }
1673             }
1674         }
1675     }
1676     GetDispatchSignature().inputOffset.resize(setup.size());
1677 }
1678 
ExtractGlobalVariables()1679 void CPixelShader::ExtractGlobalVariables()
1680 {
1681     llvm::Module* module = GetContext()->getModule();
1682     llvm::GlobalVariable* pGlobal = module->getGlobalVariable("SamplerCount");
1683     if (pGlobal)
1684     {
1685         auto samplerCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
1686         m_samplerCount = samplerCount;
1687     }
1688 }
1689 
IsReturnBlock(llvm::BasicBlock * bb)1690 bool CPixelShader::IsReturnBlock(llvm::BasicBlock* bb)
1691 {
1692     return llvm::isa<llvm::ReturnInst>(bb->getTerminator());
1693 }
1694 
IsLastRTWrite(llvm::GenIntrinsicInst * inst)1695 bool CPixelShader::IsLastRTWrite(llvm::GenIntrinsicInst* inst)
1696 {
1697     bool isLastRT;
1698     isLastRT = llvm::isa<llvm::ReturnInst>(inst->getNextNode());
1699 
1700     return isLastRT && IsLastPhase() && GetPhase() != PSPHASE_PIXEL;
1701 }
1702 
LowerPSInput()1703 bool CPixelShader::LowerPSInput()
1704 {
1705     return (m_SIMDSize == SIMDMode::SIMD16 || !m_Platform->supportMixMode());
1706 }
1707 
IsInterpolationLinear(e_interpolation mode)1708 bool CPixelShader::IsInterpolationLinear(e_interpolation mode)
1709 {
1710     return mode == EINTERPOLATION_LINEAR ||
1711         mode == EINTERPOLATION_LINEARCENTROID ||
1712         mode == EINTERPOLATION_LINEARSAMPLE ||
1713         mode == EINTERPOLATION_LINEARNOPERSPECTIVE ||
1714         mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ||
1715         mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE;
1716 }
1717 
emitPSInputLowering()1718 void CPixelShader::emitPSInputLowering()
1719 {
1720     auto iterSetupIndex = loweredSetupIndexes.begin();
1721     auto iterSetupIndexEnd = loweredSetupIndexes.end();
1722 
1723 
1724     if (LowerPSInput())
1725     {
1726         for (; iterSetupIndex != iterSetupIndexEnd; ++iterSetupIndex)
1727         {
1728             bool combineTwoDelta = false;
1729             auto nextElemt = iterSetupIndex;
1730             nextElemt++;
1731             if (nextElemt != iterSetupIndexEnd && *iterSetupIndex % 2 == 0 && *iterSetupIndex + 1 == *nextElemt)
1732             {
1733                 combineTwoDelta = true;
1734             }
1735             unsigned int index = *iterSetupIndex;
1736             CVariable* inputVar = GetInputDelta(index, combineTwoDelta);
1737             CVariable* inputVarLowered = GetInputDeltaLowered(index);
1738             if (encoder.IsCodePatchCandidate())
1739             {
1740                 encoder.SetPayloadSectionAsPrimary();
1741                 AddPatchTempSetup(inputVarLowered);
1742             }
1743 
1744             encoder.SetSrcRegion(0, 1, 1, 0);
1745             encoder.SetUniformSIMDSize(combineTwoDelta ? SIMDMode::SIMD8 : SIMDMode::SIMD4);
1746             encoder.SetNoMask();
1747             encoder.Cast(inputVarLowered, inputVar);
1748             encoder.Push();
1749             if (encoder.IsCodePatchCandidate())
1750             {
1751                 encoder.SetPayloadSectionAsSecondary();
1752             }
1753             if (combineTwoDelta)
1754             {
1755                 ++iterSetupIndex;
1756             }
1757         }
1758 
1759         for (uint i = EINTERPOLATION_LINEAR; i < NUMBER_EINTERPOLATION; ++i)
1760         {
1761             if (m_ModeUsedHalf.test(i))
1762             {
1763                 CVariable* baryVar = GetBaryReg((e_interpolation)i);
1764                 CVariable* baryVarLowered = GetBaryRegLoweredHalf((e_interpolation)i);
1765 
1766                 if (encoder.IsCodePatchCandidate())
1767                 {
1768                     encoder.SetPayloadSectionAsPrimary();
1769                 }
1770                 for (uint8_t i = 0; i < m_numberInstance; ++i)
1771                 {
1772                     encoder.SetSecondHalf(i == 1);
1773 
1774                     // mov SIMD8 U1/barry(0, 0) in to tmpU(0, 0)
1775                     // mov (8) r1.0<1>:hf r2.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$27:%30
1776                     encoder.SetSimdSize(SIMDMode::SIMD8);
1777                     encoder.SetNoMask();
1778                     encoder.Cast(baryVarLowered, baryVar);
1779                     encoder.Push();
1780 
1781                     if (m_SIMDSize == SIMDMode::SIMD16)
1782                     {
1783                         // mov SIMD8 U2/barry(2, 0) in to tmpU(0, 8)
1784                         // mov (8) r1.8<1>:hf r4.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$28:%31
1785                         encoder.SetSrcSubVar(0, 2);
1786                         encoder.SetDstSubReg(8);
1787                         encoder.SetSimdSize(SIMDMode::SIMD8);
1788                         encoder.SetNoMask();
1789                         encoder.Cast(baryVarLowered, baryVar);
1790                         encoder.Push();
1791                     }
1792 
1793                     // mov SIMD8 V1/barry(1, 0) in to tmpV(0, 0)
1794                     // mov (8) r12.0<1>:hf r3.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$29:%32
1795                     encoder.SetSrcSubVar(0, 1);
1796                     encoder.SetSimdSize(SIMDMode::SIMD8);
1797                     encoder.SetNoMask();
1798                     encoder.SetDstSubReg(numLanes(m_SIMDSize));
1799                     encoder.Cast(baryVarLowered, baryVar);
1800                     encoder.Push();
1801 
1802                     if (m_SIMDSize == SIMDMode::SIMD16)
1803                     {
1804                         // mov SIMD8 V1/barry(3, 0) in to tmpV(0, 0)
1805                         // mov (8) r12.8<1>:hf r5.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$30:%33
1806                         encoder.SetSrcSubVar(0, 3);
1807                         encoder.SetDstSubReg(8);
1808                         encoder.SetSimdSize(SIMDMode::SIMD8);
1809                         encoder.SetNoMask();
1810                         encoder.SetDstSubVar(1);
1811                         encoder.Cast(baryVarLowered, baryVar);
1812                         encoder.Push();
1813                     }
1814                     encoder.SetSecondHalf(false);
1815                 }
1816                 if (encoder.IsCodePatchCandidate())
1817                 {
1818                     encoder.SetPayloadSectionAsSecondary();
1819                 }
1820             }
1821         }
1822     }
1823 }
1824 
MarkConstantInterpolation(unsigned int index)1825 void CPixelShader::MarkConstantInterpolation(unsigned int index)
1826 {
1827     m_ConstantInterpolationMask |= BIT(index / 4);
1828 }
1829 
1830 // Take PS attribute and return active components within, encoded as HW expects.
GetActiveComponents(uint attribute) const1831 USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT CPixelShader::GetActiveComponents(uint attribute) const
1832 {
1833     USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT result =
1834         USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED;
1835     for (auto it = m_SetupIndicesUsed.lower_bound(attribute * 4);
1836         it != m_SetupIndicesUsed.end(); ++it)
1837     {
1838         if (attribute != (*it / 4)) break;
1839         switch (*it % 4)
1840         {
1841         case 0:
1842         case 1:
1843             result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XY;
1844             break;
1845         case 2:
1846             result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZ;
1847             break;
1848         case 3:
1849             result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZW;
1850             break;
1851         }
1852     }
1853     return result;
1854 }
1855 
1856 // this method must be run only after CShader::PreAnalysisPass() was run
MapPushedInputs()1857 void CPixelShader::MapPushedInputs()
1858 {
1859     // first gather setup index info
1860     for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
1861     {
1862         m_SetupIndicesUsed.insert(I->second.index);
1863         m_MaxSetupIndex = std::max(I->second.index, m_MaxSetupIndex);
1864     }
1865     // then map using proper indexing
1866     for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
1867     {
1868         // We need to map the value associated with the value pushed to a physical register
1869         if (I->second.interpolationMode == EINTERPOLATION_CONSTANT)
1870         {
1871             this->MarkConstantInterpolation(I->second.index);
1872         }
1873         CVariable* var = GetSymbol(m_argListCache[I->second.argIndex]);
1874         AddSetup(getSetupIndex(I->second.index), var);
1875     }
1876 }
1877 
getSetupIndex(uint inputIndex)1878 int CPixelShader::getSetupIndex(uint inputIndex)
1879 {
1880     {
1881         return inputIndex;
1882     }
1883 }
1884 
1885 } // namespace IGC
1886