1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "common/LLVMWarningsPush.hpp"
10 #include <llvm/Support/ScaledNumber.h>
11 #include "llvm/ADT/StringSwitch.h"
12 #include "common/LLVMWarningsPop.hpp"
13 #include "Compiler/CISACodeGen/PixelShaderCodeGen.hpp"
14 #include "Compiler/CISACodeGen/messageEncoding.hpp"
15 #include "common/allocator.h"
16 #include <iStdLib/utility.h>
17 #include "common/secure_mem.h"
18 #include "Simd32Profitability.hpp"
19 #include "EmitVISAPass.hpp"
20 #include "AdaptorCommon/API/igc.h"
21 #include "Probe/Assertion.h"
22
23 /***********************************************************************************
24 This file contains the code specific to pixel shader
25 ************************************************************************************/
26 using namespace llvm;
27 using namespace IGC::IGCMD;
28
29 namespace IGC
30 {
GetR1()31 CVariable* CPixelShader::GetR1()
32 {
33 return m_R1;
34 }
35
GetR1Lo()36 std::vector<CVariable*>& CPixelShader::GetR1Lo()
37 {
38 return m_R1Lo;
39 }
40
AppendR1Lo(CVariable * var)41 void CPixelShader::AppendR1Lo(CVariable* var)
42 {
43 m_R1Lo.push_back(var);
44 }
45
GetCoarseR1()46 CVariable* CPixelShader::GetCoarseR1()
47 {
48 IGC_ASSERT(m_phase == PSPHASE_PIXEL);
49 return m_CoarseR1;
50 }
51
AllocatePayload()52 void CPixelShader::AllocatePayload()
53 {
54 if (m_phase == PSPHASE_COARSE)
55 {
56 CreatePassThroughVar();
57 }
58 switch (m_phase)
59 {
60 case PSPHASE_LEGACY:
61 case PSPHASE_COARSE:
62 AllocatePSPayload();
63 break;
64 case PSPHASE_PIXEL:
65 AllocatePixelPhasePayload();
66 break;
67 default:
68 IGC_ASSERT_MESSAGE(0, "unknown phase");
69 break;
70 }
71 }
72
AllocatePixelPhasePayload()73 void CPixelShader::AllocatePixelPhasePayload()
74 {
75 unsigned int r1Offset = GetDispatchSignature().r1;
76 AllocateInput(m_CoarseR1, r1Offset);
77 for (uint i = 0; i < setup.size(); i++)
78 {
79 if (setup[i])
80 {
81 uint subRegOffset = 0;
82 // PS uniform inputs are stored in the 3rd subreg
83 if (setup[i]->GetSize() == SIZE_DWORD)
84 {
85 subRegOffset = 3 * SIZE_DWORD;
86 }
87 IGC_ASSERT(m_Signature != nullptr);
88 uint offset = GetDispatchSignature().inputOffset[i];
89 AllocateInput(setup[i], offset + subRegOffset);
90 }
91 }
92 if (m_ZWDelta)
93 {
94 unsigned int offset = GetDispatchSignature().ZWDelta;
95 AllocateInput(m_ZWDelta, offset);
96 }
97 if (m_SampleOffsetX || m_SampleOffsetY)
98 {
99 unsigned int offset = GetDispatchSignature().pixelOffset;
100 if (m_SampleOffsetX)
101 {
102 AllocateInput(m_SampleOffsetX, offset);
103 }
104 if (m_SampleOffsetY)
105 {
106 AllocateInput(m_SampleOffsetY, offset + SIZE_OWORD);
107 }
108 }
109 for (auto it = m_CoarseInput.begin(), ie = m_CoarseInput.end(); it != ie; ++it)
110 {
111 uint offset = GetDispatchSignature().PSOutputOffset.find(it->first)->second;
112 AllocateInput(it->second, offset);
113 }
114 if (m_CoarseMaskInput)
115 {
116 uint offset = GetDispatchSignature().oMaskOffset;
117 AllocateInput(m_CoarseMaskInput, offset);
118 }
119 }
120
AllocatePSPayload()121 void CPixelShader::AllocatePSPayload()
122 {
123 bool forceLiveOut = false;
124 // In bytes
125 uint offset = 0;
126
127 // R0 is always allocated as a predefined variable. Increase offset for R0
128 IGC_ASSERT(m_R0);
129 if (encoder.IsCodePatchCandidate())
130 {
131 // For the payload section, we need to mark inputs to be outputs
132 // so that inputs will be alive across the entire payload section
133 forceLiveOut = true;
134 encoder.MarkAsPayloadLiveOut(m_R0);
135 }
136 offset += getGRFSize();
137
138 if (m_Signature)
139 {
140 GetDispatchSignature().r1 = offset;
141 }
142
143 {
144 IGC_ASSERT(GetR1());
145 for (uint i = 0; i < GetR1()->GetNumberInstance(); i++)
146 {
147 AllocateInput(GetR1(), offset, i, forceLiveOut);
148 offset += getGRFSize();
149 }
150 }
151
152 uint numInstances = m_numberInstance;
153
154 for (uint i = 0; i < numInstances; i++)
155 {
156 // allocate size for bary
157 if (m_PerspectivePixel)
158 {
159 AllocateInput(m_PerspectivePixel, offset, i, forceLiveOut);
160 offset += m_PerspectivePixel->GetSize();
161 }
162 if (m_PerspectiveCentroid)
163 {
164 AllocateInput(m_PerspectiveCentroid, offset, i, forceLiveOut);
165 offset += m_PerspectiveCentroid->GetSize();
166 }
167 if (m_PerspectiveSample)
168 {
169 AllocateInput(m_PerspectiveSample, offset, i, forceLiveOut);
170 offset += m_PerspectiveSample->GetSize();
171 }
172 if (m_NoPerspectivePixel)
173 {
174 AllocateInput(m_NoPerspectivePixel, offset, i, forceLiveOut);
175 offset += m_NoPerspectivePixel->GetSize();
176 }
177 if (m_NoPerspectiveCentroid)
178 {
179 AllocateInput(m_NoPerspectiveCentroid, offset, i, forceLiveOut);
180 offset += m_NoPerspectiveCentroid->GetSize();
181 }
182 if (m_NoPerspectiveSample)
183 {
184 AllocateInput(m_NoPerspectiveSample, offset, i, forceLiveOut);
185 offset += m_NoPerspectiveSample->GetSize();
186 }
187
188 // Add support for POSITION_Z
189 if (m_pPositionZPixel)
190 {
191 AllocateInput(m_pPositionZPixel, offset, i, forceLiveOut);
192 offset += m_pPositionZPixel->GetSize();
193 }
194
195 // Add support for POSITION_W
196 if (m_pPositionWPixel)
197 {
198 AllocateInput(m_pPositionWPixel, offset, i, forceLiveOut);
199 offset += m_pPositionWPixel->GetSize();
200 }
201
202 // Add support for POSITION_XY_OFFSET
203 if (m_pPositionXYOffset)
204 {
205 {
206 AllocateInput(m_pPositionXYOffset, offset, i, forceLiveOut);
207 offset += m_pPositionXYOffset->GetSize();
208 }
209 }
210
211 // Add support for input coverage mask
212 if (m_pInputCoverageMask)
213 {
214 AllocateInput(m_pInputCoverageMask, offset, i, forceLiveOut);
215 offset += m_pInputCoverageMask->GetSize();
216 }
217
218 {
219 if (m_pCPSRequestedSizeX || m_pCPSRequestedSizeY)
220 {
221 if (m_pCPSRequestedSizeX)
222 {
223 AllocateInput(m_pCPSRequestedSizeX, offset, i, forceLiveOut);
224 }
225 if (m_pCPSRequestedSizeY)
226 {
227 AllocateInput(m_pCPSRequestedSizeY, offset + SIZE_OWORD, i, forceLiveOut);
228 }
229 offset += getGRFSize();
230 }
231 if (m_ZWDelta && i == numInstances - 1)
232 {
233 AllocateInput(m_ZWDelta, offset, 0, forceLiveOut);
234 if (m_Signature)
235 {
236 GetDispatchSignature().ZWDelta = offset;
237 }
238 offset += getGRFSize();
239 }
240 if (m_SampleOffsetX || m_SampleOffsetY)
241 {
242 if (m_SampleOffsetX)
243 {
244 AllocateInput(m_SampleOffsetX, offset, i, forceLiveOut);
245 }
246 if (m_SampleOffsetY)
247 {
248 AllocateInput(m_SampleOffsetY, offset + SIZE_OWORD, i, forceLiveOut);
249 }
250 if (m_Signature)
251 {
252 GetDispatchSignature().pixelOffset = offset;
253 }
254 offset += getGRFSize();
255 }
256 }
257 }
258
259 IGC_ASSERT(offset % getGRFSize() == 0);
260
261 // need to return the starting grf for constant to client
262 ProgramOutput()->m_startReg = offset / getGRFSize();
263
264 // allocate space for NOS constants and pushed constants
265 AllocateConstants3DShader(offset);
266
267
268 // Allocate size for attributes coming from VS
269 IGC_ASSERT(offset % getGRFSize() == 0);
270 unsigned int payloadEnd = offset;
271 for (uint i = 0; i < setup.size(); i++)
272 {
273 if (setup[i] && setup[i]->GetAlias() == NULL)
274 {
275 uint subRegOffset = 0;
276 // PS uniform (constant interpolation) inputs
277 if (setup[i]->GetSize() == SIZE_DWORD)
278 {
279 {
280 subRegOffset = 3 * SIZE_DWORD;
281 }
282 }
283
284 AllocateInput(setup[i], offset + subRegOffset, 0, forceLiveOut);
285
286 if (m_Signature)
287 {
288 GetDispatchSignature().inputOffset[i] = offset;
289 }
290 payloadEnd = offset + setup[i]->GetSize();
291 }
292
293 {
294 offset += 4 * SIZE_DWORD;
295 }
296 }
297
298 offset = payloadEnd;
299
300 // For code patching, we preallocate live-out of payload into physical registers.
301 // The preallocation must be aligned across contexts to ensure it is patchable.
302 if (encoder.IsCodePatchCandidate())
303 {
304 if (encoder.HasPrevKernel())
305 {
306 // Get previous context's PayloadEnd to ensure it is patchable
307 offset = encoder.GetPayloadEnd();
308 }
309 else
310 {
311 encoder.SetPayloadEnd(payloadEnd);
312 }
313 }
314 if (offset % getGRFSize() != 0)
315 {
316 offset += (getGRFSize() - (offset % getGRFSize()));
317 }
318
319 // This is the preallocation for payload live-outs.
320 for (auto& var : payloadLiveOutSetup)
321 {
322 IGC_ASSERT(offset% getGRFSize() == 0);
323 AllocateInput(var, offset, 0, true);
324 offset += var->GetSize();
325 }
326
327 // This is the preallocation for temp variables in payload sections
328 for (auto& var : payloadTempSetup)
329 {
330 AllocateInput(var, offset);
331 offset += var->GetSize();
332 }
333
334 // When preallocation failed (exceeding the total number of physical registers), early exit and give up this compilation._
335 ProgramOutput()->m_scratchSpaceUsedBySpills = offset >= encoder.GetVISAKernel()->getNumRegTotal() * getGRFSize();
336 if (ProgramOutput()->m_scratchSpaceUsedBySpills)
337 {
338 return;
339 }
340
341 offset = payloadEnd;
342
343 // create output registers for coarse phase
344 for (const auto& it : m_CoarseOutput)
345 {
346 CVariable* output = it.second;
347 offset = iSTD::Align(offset, (size_t) 1 << output->GetAlign());
348 AllocateOutput(output, offset);
349 if (m_Signature)
350 {
351 GetDispatchSignature().PSOutputOffset[it.first] = offset;
352 }
353 offset += output->GetSize();
354 }
355 if (m_CoarseoMask)
356 {
357 offset = iSTD::Align(offset, (size_t) 1 << m_CoarseoMask->GetAlign());
358 AllocateOutput(m_CoarseoMask, offset);
359 if (m_Signature)
360 {
361 GetDispatchSignature().oMaskOffset = offset;
362 GetDispatchSignature().CoarseMask = true;
363 }
364 offset += m_CoarseoMask->GetSize();
365 }
366 ProgramOutput()->m_scratchSpaceUsedBySpills = (offset >= encoder.GetVISAKernel()->getNumRegTotal() * getGRFSize());
367 }
368
GetDispatchSignature()369 PSSignature::DispatchSignature& CPixelShader::GetDispatchSignature()
370 {
371 switch (m_dispatchSize)
372 {
373 case SIMDMode::SIMD8:
374 return m_Signature->dispatchSign[0];
375 case SIMDMode::SIMD16:
376 return m_Signature->dispatchSign[1];
377 case SIMDMode::SIMD32:
378 return m_Signature->dispatchSign[2];
379 default:
380 IGC_ASSERT_MESSAGE(0, "bad dispatch size");
381 break;
382 }
383 return m_Signature->dispatchSign[0];
384 }
385
GetBaryReg(e_interpolation mode)386 CVariable* CPixelShader::GetBaryReg(e_interpolation mode)
387 {
388 uint numInstances = m_numberInstance;
389 uint numElements = 2 * numLanes(m_SIMDSize);
390
391
392 CVariable* baryReg = 0;
393 switch (mode)
394 {
395 case EINTERPOLATION_LINEAR:
396 if (!m_PerspectivePixel) {
397 m_PerspectivePixel =
398 GetNewVariable(numElements, ISA_TYPE_F, EALIGN_GRF, false, numInstances, "PerspectivePixel");
399 }
400 baryReg = m_PerspectivePixel;
401 break;
402 case EINTERPOLATION_LINEARCENTROID:
403 if (!m_PerspectiveCentroid) {
404 m_PerspectiveCentroid =
405 GetNewVariable(
406 numElements, ISA_TYPE_F, EALIGN_GRF,
407 false, numInstances, "LinearCentroid");
408 }
409 baryReg = m_PerspectiveCentroid;
410 break;
411 case EINTERPOLATION_LINEARSAMPLE:
412 if (!m_PerspectiveSample) {
413 m_PerspectiveSample =
414 GetNewVariable(
415 numElements, ISA_TYPE_F, EALIGN_GRF,
416 false, numInstances, "LinearSample");
417 }
418 baryReg = m_PerspectiveSample;
419 break;
420 case EINTERPOLATION_LINEARNOPERSPECTIVE:
421 if (!m_NoPerspectivePixel) {
422 m_NoPerspectivePixel =
423 GetNewVariable(
424 numElements, ISA_TYPE_F, EALIGN_GRF,
425 false, numInstances, "LinearNoPerspective");
426 }
427 baryReg = m_NoPerspectivePixel;
428 break;
429 case EINTERPOLATION_LINEARNOPERSPECTIVECENTROID:
430 if (!m_NoPerspectiveCentroid) {
431 m_NoPerspectiveCentroid =
432 GetNewVariable(
433 numElements, ISA_TYPE_F, EALIGN_GRF,
434 false, numInstances, "NoPerspectiveCentroid");
435 }
436 baryReg = m_NoPerspectiveCentroid;
437 break;
438 case EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE:
439 if (!m_NoPerspectiveSample) {
440 m_NoPerspectiveSample =
441 GetNewVariable(
442 numElements, ISA_TYPE_F, EALIGN_GRF,
443 false, numInstances, "NoPerspectiveSample");
444 }
445 baryReg = m_NoPerspectiveSample;
446 break;
447 default:
448 IGC_ASSERT(0);
449 }
450 return baryReg;
451 }
452
GetBaryRegLoweredHalf(e_interpolation mode)453 CVariable* CPixelShader::GetBaryRegLoweredHalf(e_interpolation mode)
454 {
455 IGC_ASSERT(IsInterpolationLinear(mode));
456
457 const char* const name =
458 mode == EINTERPOLATION_LINEAR ? "PerspectivePixelLoweredHalf" :
459 mode == EINTERPOLATION_LINEARCENTROID ? "PerspectiveCentroidLoweredHalf" :
460 mode == EINTERPOLATION_LINEARSAMPLE ? "PerspectiveSampleLoweredHalf" :
461 mode == EINTERPOLATION_LINEARNOPERSPECTIVE ? "NoPerspectivePixelLoweredHalf" :
462 mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ? "NoPerspectiveCentroidLoweredHalf" :
463 mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE ? "NoPerspectiveSampleLoweredHalf" : "";
464
465 if (!m_BaryRegLoweredHalf[mode])
466 {
467 m_BaryRegLoweredHalf[mode] =
468 GetNewVariable(
469 2 * numLanes(m_SIMDSize), ISA_TYPE_HF, EALIGN_GRF,
470 false, m_numberInstance, name);
471 if (encoder.IsCodePatchCandidate())
472 {
473 AddPatchTempSetup(m_BaryRegLoweredHalf[mode]);
474 }
475 }
476 return m_BaryRegLoweredHalf[mode];
477 }
478
GetBaryRegLoweredFloat(e_interpolation mode)479 CVariable* CPixelShader::GetBaryRegLoweredFloat(e_interpolation mode)
480 {
481 IGC_ASSERT(IsInterpolationLinear(mode));
482
483 const char* const name =
484 mode == EINTERPOLATION_LINEAR ? "PerspectivePixelLoweredFloat" :
485 mode == EINTERPOLATION_LINEARCENTROID ? "PerspectiveCentroidLoweredFloat" :
486 mode == EINTERPOLATION_LINEARSAMPLE ? "PerspectiveSampleLoweredFloat" :
487 mode == EINTERPOLATION_LINEARNOPERSPECTIVE ? "NoPerspectivePixelLoweredFloat" :
488 mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ? "NoPerspectiveCentroidLoweredFloat" :
489 mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE ? "NoPerspectiveSampleLoweredFloat" : "";
490
491 if (!m_BaryRegLoweredFloat[mode])
492 {
493 m_BaryRegLoweredFloat[mode] =
494 GetNewVariable(
495 2 * numLanes(m_SIMDSize), ISA_TYPE_F, EALIGN_GRF,
496 false, m_numberInstance, name);
497 if (encoder.IsCodePatchCandidate())
498 {
499 AddPatchTempSetup(m_BaryRegLoweredFloat[mode]);
500 }
501 }
502 return m_BaryRegLoweredFloat[mode];
503 }
504
505
506
GetInputDelta(uint index,bool loweredInput)507 CVariable* CPixelShader::GetInputDelta(uint index, bool loweredInput)
508 {
509 CVariable* inputVar = setup[index];
510 if (inputVar == nullptr)
511 {
512 if (loweredInput)
513 {
514 if (index % 2 == 0)
515 {
516 inputVar = GetNewVariable(8, ISA_TYPE_F, EALIGN_GRF, true, CName::NONE);
517 setup[index + 1] = GetNewAlias(inputVar, ISA_TYPE_F, 16, 4);
518 }
519 else
520 {
521 inputVar = GetNewAlias(GetInputDelta(index - 1), ISA_TYPE_F, 16, 4);
522 }
523 }
524 else
525 {
526 inputVar = GetNewVariable(4, ISA_TYPE_F, EALIGN_OWORD, true, CName::NONE);
527 }
528 setup[index] = inputVar;
529 }
530 return inputVar;
531 }
532
GetInputDeltaLowered(uint index)533 CVariable* CPixelShader::GetInputDeltaLowered(uint index)
534 {
535 CVariable* inputVar = setupLowered[index];
536 if (inputVar == nullptr)
537 {
538 IGC_ASSERT(LowerPSInput());
539 if (index % 2 == 0)
540 {
541 inputVar = GetNewVariable(8, ISA_TYPE_HF, EALIGN_OWORD, true, CName::NONE);
542 }
543 else
544 {
545 if (setupLowered[index - 1])
546 {
547 inputVar = GetNewAlias(setupLowered[index - 1], ISA_TYPE_HF, 8, 4);
548 }
549 else
550 {
551 inputVar = GetNewVariable(4, ISA_TYPE_HF, EALIGN_OWORD, true, CName::NONE);
552 }
553 }
554
555 setupLowered[index] = inputVar;
556 }
557 return inputVar;
558 }
559
GetZWDelta()560 CVariable* CPixelShader::GetZWDelta()
561 {
562 if (!m_ZWDelta)
563 {
564 uint numLanes = getGRFSize() / SIZE_DWORD ; // single GRF
565
566 m_ZWDelta =
567 GetNewVariable(numLanes, ISA_TYPE_F, EALIGN_GRF, false, 1, "ZWDelta");
568 }
569 return m_ZWDelta;
570 }
571
572
GetPositionZ()573 CVariable* CPixelShader::GetPositionZ()
574 {
575 uint16_t numberInstance, numberLanes;
576 {
577 numberLanes = numLanes(m_SIMDSize);
578 numberInstance = m_numberInstance;
579 }
580 if (!m_pPositionZPixel)
581 {
582 m_pPositionZPixel =
583 GetNewVariable(numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "PosZPixel");
584 }
585 return m_pPositionZPixel;
586 }
587
GetPositionW()588 CVariable* CPixelShader::GetPositionW()
589 {
590 uint16_t numberInstance, numberLanes;
591 {
592 numberLanes = numLanes(m_SIMDSize);
593 numberInstance = m_numberInstance;
594 }
595 if (!m_pPositionWPixel)
596 {
597 m_pPositionWPixel =
598 GetNewVariable(numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "PosWPixel");
599 }
600 return m_pPositionWPixel;
601 }
602
GetPositionXYOffset()603 CVariable* CPixelShader::GetPositionXYOffset()
604 {
605 if (!m_pPositionXYOffset)
606 {
607 m_pPositionXYOffset =
608 GetNewVariable(32, ISA_TYPE_B, EALIGN_GRF, false, m_numberInstance, "PosXYOff");
609 }
610 return m_pPositionXYOffset;
611 }
612
GetInputCoverageMask()613 CVariable* CPixelShader::GetInputCoverageMask()
614 {
615 uint16_t numberInstance, numberLanes;
616 {
617 numberLanes = numLanes(m_SIMDSize);
618 numberInstance = m_numberInstance;
619 }
620 if (!m_pInputCoverageMask)
621 {
622 m_pInputCoverageMask = GetNewVariable(
623 numberLanes, ISA_TYPE_F, EALIGN_GRF, false, numberInstance, "InputCoverageMask");
624 }
625 return m_pInputCoverageMask;
626 }
627
GetSampleOffsetX()628 CVariable* CPixelShader::GetSampleOffsetX()
629 {
630 if (!m_SampleOffsetX)
631 {
632 m_SampleOffsetX = GetNewVariable(16, ISA_TYPE_UB, EALIGN_OWORD, true, "SmplOffX");
633 }
634 return m_SampleOffsetX;
635 }
636
GetSampleOffsetY()637 CVariable* CPixelShader::GetSampleOffsetY()
638 {
639 if (!m_SampleOffsetY)
640 {
641 m_SampleOffsetY = GetNewVariable(16, ISA_TYPE_UB, EALIGN_OWORD, true, "SmplOffY");
642 }
643 return m_SampleOffsetY;
644 }
645
GetCPSRequestedSizeX()646 CVariable* CPixelShader::GetCPSRequestedSizeX()
647 {
648 if (!m_pCPSRequestedSizeX)
649 {
650 m_pCPSRequestedSizeX =
651 GetNewVariable(
652 numLanes(m_SIMDSize) / 4, ISA_TYPE_F,
653 EALIGN_OWORD, false, m_numberInstance, "CPSReqSizeX");
654 }
655 return m_pCPSRequestedSizeX;
656 }
657
GetCPSRequestedSizeY()658 CVariable* CPixelShader::GetCPSRequestedSizeY()
659 {
660 if (!m_pCPSRequestedSizeY)
661 {
662 m_pCPSRequestedSizeY =
663 GetNewVariable(
664 numLanes(m_SIMDSize) / 4, ISA_TYPE_F,
665 EALIGN_OWORD, false, m_numberInstance, "CPSReqSizeY");
666 }
667 return m_pCPSRequestedSizeY;
668 }
669
CPixelShader(llvm::Function * pFunc,CShaderProgram * pProgram)670 CPixelShader::CPixelShader(llvm::Function* pFunc, CShaderProgram* pProgram)
671 : CShader(pFunc, pProgram)
672 {
673 m_RenderTargetMask = 0;
674 m_HasoDepth = false;
675 m_HasoStencil = false;
676 m_HasoMask = false;
677 m_isPerSample = false;
678 m_HasInputCoverageMask = false;
679 m_HasPullBary = false;
680 m_HasCoarseSize = false;
681 m_HasDouble = false;
682 m_hasDualBlendSource = false;
683 m_HasDiscard = false;
684 m_IsLastPhase = false;
685 m_phase = PSPHASE_LEGACY;
686 m_Signature = nullptr;
687 m_samplerCount = 0;
688 m_ModeUsedHalf.reset();
689 m_ModeUsedFloat.reset();
690 setupLowered.clear();
691 loweredSetupIndexes.clear();
692
693 m_BaryRegLoweredHalf.fill(nullptr);
694 m_BaryRegLoweredFloat.fill(nullptr);
695
696 Function* coarsePhase = nullptr;
697 Function* pixelPhase = nullptr;
698 NamedMDNode* coarseNode = pFunc->getParent()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
699 NamedMDNode* pixelNode = pFunc->getParent()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
700 if (coarseNode)
701 {
702 coarsePhase = llvm::mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0));
703 }
704 if (pixelNode)
705 {
706 pixelPhase = llvm::mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
707 }
708 if (pFunc == coarsePhase)
709 {
710 m_phase = PSPHASE_COARSE;
711 }
712 if (coarsePhase && pixelPhase)
713 {
714 if (pFunc == pixelPhase)
715 {
716 m_phase = PSPHASE_PIXEL;
717 m_IsLastPhase = true;
718 }
719 }
720 else
721 {
722 m_IsLastPhase = true;
723 }
724 }
725
~CPixelShader()726 CPixelShader::~CPixelShader()
727 {
728 }
729
InitEncoder(SIMDMode simdMode,bool canAbortOnSpill,ShaderDispatchMode shaderMode)730 void CPixelShader::InitEncoder(SIMDMode simdMode, bool canAbortOnSpill, ShaderDispatchMode shaderMode)
731 {
732 m_R1 = NULL;
733 m_PerspectiveBaryPlanes = nullptr;
734 m_NonPerspectiveBaryPlanes = nullptr;
735 m_PerspectivePixel = NULL;
736 m_PerspectiveCentroid = NULL;
737 m_PerspectiveSample = NULL;
738 m_NoPerspectivePixel = NULL;
739 m_NoPerspectiveCentroid = NULL;
740 m_NoPerspectiveSample = NULL;
741 m_BaryRegLoweredHalf.fill(nullptr);
742 m_BaryRegLoweredFloat.fill(nullptr);
743 m_KillPixelMask = NULL;
744 m_HasDiscard = false;
745 m_pPositionZPixel = NULL;
746 m_pPositionWPixel = NULL;
747 m_pPositionXYOffset = NULL;
748 m_pInputCoverageMask = NULL;
749 m_pCPSRequestedSizeX = NULL;
750 m_pCPSRequestedSizeY = NULL;
751 m_PixelPhasePayload = nullptr;
752 m_PixelPhaseCounter = nullptr;
753 m_SampleOffsetX = nullptr;
754 m_SampleOffsetY = nullptr;
755 m_ZWDelta = nullptr;
756 m_hasEOT = false;
757 m_NeedPSSync = false;
758 m_CoarseoMask = nullptr;
759 m_CoarseMaskInput = nullptr;
760 m_CoarseR1 = nullptr;
761
762 m_CoarseOutput.clear();
763 m_CoarseInput.clear();
764 rtWriteList.clear();
765 setupLowered.clear();
766 loweredSetupIndexes.clear();
767 m_ModeUsedHalf.reset();
768 m_ModeUsedFloat.reset();
769 CShader::InitEncoder(simdMode, canAbortOnSpill, shaderMode);
770 }
771
772
FillProgram(SPixelShaderKernelProgram * pKernelProgram)773 void CShaderProgram::FillProgram(SPixelShaderKernelProgram* pKernelProgram)
774 {
775
776 const unsigned int InstCacheSize = 0xC000;
777 CPixelShader* simd8Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD8));
778 CPixelShader* simd16Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD16));
779 CPixelShader* simd32Shader = static_cast<CPixelShader*>(GetShader(SIMDMode::SIMD32));
780 CPixelShader* pShader = nullptr;
781 if (simd32Shader)
782 {
783 const unsigned kernelSize = simd32Shader->m_simdProgram.m_programSize;
784 const bool forceSIMD32 =
785 (this->GetContext()->getCompilerOption().forcePixelShaderSIMDMode &
786 FLAG_PS_SIMD_MODE_FORCE_SIMD32) != 0;
787
788 if ((!simd8Shader && !simd16Shader) ||
789 (kernelSize > 0 && (kernelSize < InstCacheSize || forceSIMD32)))
790 {
791 pKernelProgram->simd32 = *simd32Shader->ProgramOutput();
792 pShader = simd32Shader;
793 GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD32, ShaderDispatchMode::NOT_APPLICABLE);
794 }
795 else if (kernelSize > 0 && (kernelSize < InstCacheSize))
796 {
797 GetContext()->SetSIMDInfo(SIMD_SKIP_PERF, SIMDMode::SIMD32, ShaderDispatchMode::NOT_APPLICABLE);
798 }
799 }
800 if (simd16Shader)
801 {
802 if (!simd8Shader ||
803 (simd16Shader->m_simdProgram.m_programSize > 0))
804 {
805 pKernelProgram->simd16 = *simd16Shader->ProgramOutput();
806 pShader = simd16Shader;
807 GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD16, ShaderDispatchMode::NOT_APPLICABLE);
808 }
809 }
810 {
811 if (simd8Shader && simd8Shader->m_simdProgram.m_programSize > 0)
812 {
813 pKernelProgram->simd8 = *simd8Shader->ProgramOutput();
814 pShader = simd8Shader;
815 GetContext()->SetSIMDInfo(SIMD_SELECTED, SIMDMode::SIMD8, ShaderDispatchMode::NOT_APPLICABLE);
816 }
817 }
818
819 if (pShader)
820 {
821 pShader->FillProgram(pKernelProgram);
822 }
823 pKernelProgram->SIMDInfo = GetContext()->GetSIMDInfo();
824 }
825
FillProgram(SPixelShaderKernelProgram * pKernelProgram)826 void CPixelShader::FillProgram(SPixelShaderKernelProgram* pKernelProgram)
827 {
828 const PixelShaderInfo& psInfo = GetContext()->getModuleMetaData()->psInfo;
829
830 pKernelProgram->blendToFillEnabled = psInfo.blendToFillEnabled;
831 pKernelProgram->forceEarlyZ = psInfo.forceEarlyZ;
832
833 pKernelProgram->isCoarsePS = m_phase == PSPHASE_COARSE;
834 pKernelProgram->hasCoarsePixelSize = m_HasCoarseSize;
835 pKernelProgram->hasSampleOffset = m_SampleOffsetX || m_SampleOffsetY;
836 pKernelProgram->hasZWDelta = m_ZWDelta;
837 pKernelProgram->needPerspectiveBaryPlane = m_PerspectiveBaryPlanes ? true : false;;
838 pKernelProgram->needNonPerspectiveBaryPlane = m_NonPerspectiveBaryPlanes ? true : false;;
839 pKernelProgram->ConstantBufferLoaded = m_constantBufferLoaded;
840 pKernelProgram->UavLoaded = m_uavLoaded;
841 for (int i = 0; i < 4; i++)
842 {
843 pKernelProgram->ShaderResourceLoaded[i] = m_shaderResourceLoaded[i];
844 }
845 pKernelProgram->RenderTargetLoaded = m_renderTargetLoaded;
846
847 pKernelProgram->hasControlFlow = m_numBlocks > 1 ? true : false;
848 pKernelProgram->MaxNumberOfThreads = m_Platform->getMaxPixelShaderThreads() / GetShaderThreadUsageRate();
849 pKernelProgram->needPerspectiveBary = m_PerspectivePixel ? true : false;
850 pKernelProgram->needPerspectiveCentroidBary = m_PerspectiveCentroid ? true : false;
851 pKernelProgram->needPerspectiveSampleBary = m_PerspectiveSample ? true : false;
852 pKernelProgram->needNonPerspectiveBary = m_NoPerspectivePixel ? true : false;
853 pKernelProgram->needNonPerspectiveCentroidBary = m_NoPerspectiveCentroid ? true : false;
854 pKernelProgram->needNonPerspectiveSampleBary = m_NoPerspectiveSample ? true : false;
855 pKernelProgram->killPixel = m_HasDiscard;
856 pKernelProgram->needSourceDepth = m_pPositionZPixel != nullptr;
857 pKernelProgram->needSourceW = m_pPositionWPixel != nullptr;
858 pKernelProgram->outputDepth = m_HasoDepth;
859 pKernelProgram->oMask = m_HasoMask;
860 pKernelProgram->outputStencil = m_HasoStencil;
861 pKernelProgram->sampleCmpToDiscardOptimizationPossible = GetContext()->m_instrTypes.sampleCmpToDiscardOptimizationPossible;
862 pKernelProgram->sampleCmpToDiscardOptimizationSlot = GetContext()->m_instrTypes.sampleCmpToDiscardOptimizationSlot;
863 pKernelProgram->needPSSync = m_NeedPSSync;
864 pKernelProgram->hasInputCoverageMask = m_HasInputCoverageMask;
865 pKernelProgram->hasPullBary = m_HasPullBary;
866 pKernelProgram->isPerSample = IsPerSample();
867 if (NamedMDNode * primIdNod = entry->getParent()->getNamedMetadata("PrimIdLocation"))
868 {
869 pKernelProgram->primIdLocation = int_cast<uint>(
870 mdconst::dyn_extract<ConstantInt>(primIdNod->getOperand(0)->getOperand(0))->getZExtValue());
871 pKernelProgram->hasPrimID = true;
872 }
873 if (NamedMDNode* PointCoordNod = entry->getParent()->getNamedMetadata("PointCoordLocation"))
874 {
875 pKernelProgram->pointCoordLocation = int_cast<uint>(
876 mdconst::dyn_extract<ConstantInt>(PointCoordNod->getOperand(0)->getOperand(0))->getZExtValue());
877 pKernelProgram->hasPointCoord = true;
878 }
879
880 pKernelProgram->posXYOffsetEnable = m_pPositionXYOffset ? true : false;
881 pKernelProgram->VectorMask = m_VectorMask;
882 pKernelProgram->samplerCount = GetSamplerCount(m_samplerCount);
883 pKernelProgram->renderTargetMask = m_RenderTargetMask;
884 pKernelProgram->constantInterpolationEnableMask = m_ConstantInterpolationMask;
885 pKernelProgram->NOSBufferSize = m_NOSBufferSize / getMinPushConstantBufferAlignmentInBytes();
886 pKernelProgram->isMessageTargetDataCacheDataPort = isMessageTargetDataCacheDataPort;
887
888
889 CreateGatherMap();
890 CreateConstantBufferOutput(pKernelProgram);
891
892 pKernelProgram->bindingTableEntryCount = this->GetMaxUsedBindingTableEntryCount();
893 pKernelProgram->BindingTableEntryBitmap = this->GetBindingTableEntryBitmap();
894
895 // PS packed attributes
896 for (uint attribute = 0; attribute <= m_MaxSetupIndex / 4; ++attribute)
897 {
898 pKernelProgram->attributeActiveComponent[attribute] = GetActiveComponents(attribute);
899
900 const bool useComponent = pKernelProgram->attributeActiveComponent[attribute] !=
901 USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED;
902 if (useComponent)
903 {
904 pKernelProgram->nbOfSFOutput = attribute + 1;
905 }
906 }
907
908 for (unsigned i = 0; i < USC::NUM_PSHADER_OUTPUT_REGISTERS; i++)
909 {
910 pKernelProgram->OutputUseMask[i] = (unsigned char)psInfo.colorOutputMask[i];
911 }
912 }
913
PreCompile()914 void CPixelShader::PreCompile()
915 {
916 CreateImplicitArgs();
917 CodeGenContext* ctx = GetContext();
918
919 const uint8_t numberInstance = m_numberInstance;
920 const bool isR1Available = true;
921
922 if (isR1Available)
923 {
924 m_R1 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, numberInstance, "R1");
925 }
926
927 // make sure the return block is properly set
928 if (ctx->getModule()->getNamedMetadata("KillPixel"))
929 {
930 m_HasDiscard = true;
931 }
932
933 setup.resize(4 * g_c_Max_PS_attributes, nullptr);
934 if (LowerPSInput())
935 {
936 setupLowered.resize(4 * g_c_Max_PS_attributes, nullptr);
937 }
938 }
939
ParseShaderSpecificOpcode(llvm::Instruction * inst)940 void CPixelShader::ParseShaderSpecificOpcode(llvm::Instruction* inst)
941 {
942 // temporary workaround to disable SIMD16 when double are present
943 if (inst->getType()->isDoubleTy())
944 {
945 m_HasDouble = true;
946 }
947 if (GenIntrinsicInst * genIntr = dyn_cast<GenIntrinsicInst>(inst))
948 {
949 uint setupIndex;
950 switch (genIntr->getIntrinsicID())
951 {
952 case GenISAIntrinsic::GenISA_RenderTargetRead:
953 m_NeedPSSync = true;
954 if (GetPhase() == PSPHASE_LEGACY)
955 {
956 m_isPerSample = true;
957 }
958 break;
959 case GenISAIntrinsic::GenISA_uavSerializeAll:
960 case GenISAIntrinsic::GenISA_uavSerializeOnResID:
961 m_NeedPSSync = true;
962 break;
963 case GenISAIntrinsic::GenISA_RTDualBlendSource:
964 m_hasDualBlendSource = true;
965 AddRenderTarget(
966 cast<RTDualBlendSourceIntrinsic>(genIntr)->getRTIndexImm());
967 break;
968 case GenISAIntrinsic::GenISA_RTWrite:
969 {
970 RTWritIntrinsic* rt = cast<RTWritIntrinsic>(genIntr);
971 if (rt->getRTIndexImm() != -1)
972 {
973 AddRenderTarget(rt->getRTIndexImm());
974 }
975 if (rt->hasStencil())
976 {
977 OutputStencil();
978 }
979 break;
980 }
981 case GenISAIntrinsic::GenISA_DCL_ShaderInputVec:
982 case GenISAIntrinsic::GenISA_DCL_inputVec:
983 {
984 IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(0)));
985 IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(1)));
986 setupIndex = int_cast<uint>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
987 m_MaxSetupIndex = std::max(setupIndex, m_MaxSetupIndex);
988 // attribute packing
989 m_SetupIndicesUsed.insert(setupIndex);
990
991 e_interpolation mode = static_cast<e_interpolation>(llvm::cast<llvm::ConstantInt>(inst->getOperand(1))->getZExtValue());
992 if (mode != EINTERPOLATION_CONSTANT)
993 {
994 {
995 if (inst->getType()->isHalfTy())
996 {
997 loweredSetupIndexes.insert(setupIndex);
998 m_ModeUsedHalf.set(mode);
999 }
1000 else
1001 {
1002 m_ModeUsedFloat.set(mode);
1003 }
1004 }
1005 }
1006 break;
1007 }
1008 case GenISAIntrinsic::GenISA_PullSampleIndexBarys:
1009 case GenISAIntrinsic::GenISA_PullSnappedBarys:
1010 case GenISAIntrinsic::GenISA_PullCentroidBarys:
1011 m_HasPullBary = true;
1012 break;
1013 case GenISAIntrinsic::GenISA_Interpolate:
1014 IGC_ASSERT(llvm::isa<llvm::ConstantInt>(inst->getOperand(0)));
1015 setupIndex = int_cast<uint>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
1016 m_MaxSetupIndex = std::max(setupIndex, m_MaxSetupIndex);
1017 // attribute packing
1018 m_SetupIndicesUsed.insert(setupIndex);
1019 break;
1020 default:
1021 break;
1022 }
1023 }
1024 }
1025
AddRenderTarget(uint RTIndex)1026 void CPixelShader::AddRenderTarget(uint RTIndex)
1027 {
1028 m_RenderTargetMask |= 1 << RTIndex;
1029 }
1030
DeclareSGV(uint usage)1031 void CPixelShader::DeclareSGV(uint usage)
1032 {
1033 switch (usage)
1034 {
1035 case POSITION_X:
1036 case POSITION_Y:
1037 break;
1038 case POSITION_Z:
1039 break;
1040 case POSITION_W:
1041 break;
1042 case VFACE:
1043 break;
1044 case INPUT_COVERAGE_MASK:
1045 m_HasInputCoverageMask = true;
1046 break;
1047 case SAMPLEINDEX:
1048 m_isPerSample = true;
1049 break;
1050 case REQUESTED_COARSE_SIZE_X:
1051 case REQUESTED_COARSE_SIZE_Y:
1052 m_HasCoarseSize = true;
1053 break;
1054 default:
1055 break;
1056 // nothing to do
1057 }
1058 }
1059
PullPixelPhasePayload()1060 void CPixelShader::PullPixelPhasePayload()
1061 {
1062 CVariable* payload = nullptr;
1063 bool oMask = false;
1064 if (GetDispatchSignature().CoarseMask)
1065 {
1066 payload = GetCoarseMask();
1067 oMask = true;
1068 }
1069 else
1070 {
1071 payload = GetNewVariable(8, ISA_TYPE_D, EALIGN_GRF, CName::NONE);
1072 }
1073 uint messageDescriptor = PIPullPixelPayload(
1074 m_SIMDSize == SIMDMode::SIMD8 ? EU_PI_MESSAGE_SIMD8 : EU_PI_MESSAGE_SIMD16,
1075 m_PixelPhasePayload->GetSize() / getGRFSize(),
1076 payload->GetSize() / getGRFSize(),
1077 false,
1078 false,
1079 false,
1080 false,
1081 false,
1082 oMask);
1083
1084 CVariable* desc = ImmToVariable(messageDescriptor, ISA_TYPE_UD);
1085 // save the current phase counter as it is needed by the RT write
1086 m_CurrentPhaseCounter = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, "CurrPhaseCounter");
1087 encoder.SetSrcRegion(0, 0, 1, 0);
1088 encoder.Shl(m_CurrentPhaseCounter, m_PixelPhaseCounter, ImmToVariable(0x10, ISA_TYPE_UW));
1089 encoder.Push();
1090 CVariable* nextPhase = GetNewVariable(1, ISA_TYPE_UW, EALIGN_DWORD, true, "NextPhase");
1091 encoder.SetSrcRegion(0, 0, 1, 0);
1092 encoder.Shl(nextPhase, m_PixelPhaseCounter, ImmToVariable(8, ISA_TYPE_D));
1093 encoder.Push();
1094 CVariable* a0 = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, CName::NONE);
1095 encoder.Or(a0, nextPhase, desc);
1096 encoder.Push();
1097 encoder.SetNoMask();
1098 encoder.Send(m_PixelPhasePayload, payload, EU_GEN7_MESSAGE_TARGET_PIXEL_INTERPOLATOR, a0);
1099 encoder.Push();
1100 CVariable* mask = BitCast(m_PixelPhasePayload, ISA_TYPE_UW);
1101 CVariable* f0 = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_DWORD, true, CName::NONE);
1102 encoder.SetSrcSubReg(0, 14);
1103 encoder.SetSrcRegion(0, 0, 1, 0);
1104 encoder.Cmp(EPREDICATE_EQ, f0, mask, ImmToVariable(0, ISA_TYPE_UW));
1105 encoder.Push();
1106 m_epilogueLabel = encoder.GetNewLabelID("epilogue");
1107 encoder.Jump(f0, m_epilogueLabel);
1108 encoder.Push();
1109 // override the execution mask in sr0.2
1110 encoder.SetSrcSubReg(0, 14);
1111 encoder.SetDstSubReg(2);
1112 encoder.SetSrcRegion(0, 0, 1, 0);
1113 encoder.Cast(GetSR0(), mask);
1114 encoder.Push();
1115 }
1116
AddPrologue()1117 void CPixelShader::AddPrologue()
1118 {
1119 if (m_phase == PSPHASE_PIXEL)
1120 {
1121 uint responseLength = 2;
1122 m_CoarseR1 = GetR1();
1123 m_PixelPhasePayload =
1124 GetNewVariable(responseLength * (getGRFSize() >> 2),
1125 ISA_TYPE_D, EALIGN_GRF, "PixelPhasePayload");
1126 m_PixelPhaseCounter = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_UW, 0, 1);
1127 m_CoarseParentIndex = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_UW, getGRFSize(), numLanes(m_SIMDSize));
1128 m_R1 = GetNewAlias(m_PixelPhasePayload, ISA_TYPE_D, 0, getGRFSize() / SIZE_DWORD);
1129 encoder.SetNoMask();
1130 encoder.SetSimdSize(SIMDMode::SIMD1);
1131 encoder.Copy(m_PixelPhaseCounter, ImmToVariable(0, ISA_TYPE_UW));
1132 encoder.Push();
1133 m_pixelPhaseLabel = encoder.GetNewLabelID("pixel_phase");
1134 encoder.Label(m_pixelPhaseLabel);
1135 encoder.Push();
1136 PullPixelPhasePayload();
1137 }
1138 {
1139 emitPSInputLowering();
1140 }
1141 }
1142
PreAnalysisPass()1143 void CPixelShader::PreAnalysisPass()
1144 {
1145 m_VectorMask = m_CG->NeedVMask();
1146 CShader::PreAnalysisPass();
1147 }
1148
AddEpilogue(llvm::ReturnInst * ret)1149 void CPixelShader::AddEpilogue(llvm::ReturnInst* ret)
1150 {
1151 if (!IsLastPhase() && m_KillPixelMask)
1152 {
1153 if (!m_CoarseoMask)
1154 {
1155 m_CoarseoMask = GetNewVariable(
1156 numLanes(m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF, "CoarseOMask");
1157 encoder.Copy(m_CoarseoMask, ImmToVariable(0xFFFFFFFF, ISA_TYPE_UD));
1158 encoder.Push();
1159 }
1160 encoder.SetPredicate(m_KillPixelMask);
1161 encoder.Copy(m_CoarseoMask, ImmToVariable(0x0, ISA_TYPE_UD));
1162 encoder.Push();
1163 }
1164 if (m_phase == PSPHASE_PIXEL)
1165 {
1166 encoder.Label(m_epilogueLabel);
1167 encoder.Push();
1168 // next phase index is in the first dword of the payload
1169 CVariable* flag = GetNewVariable(
1170 numLanes(m_SIMDSize), ISA_TYPE_BOOL, EALIGN_BYTE, true, CName::NONE);
1171 encoder.SetSrcRegion(0, 0, 1, 0);
1172 encoder.Cmp(EPREDICATE_NE, flag, m_PixelPhaseCounter, ImmToVariable(0, ISA_TYPE_UW));
1173 encoder.Push();
1174 encoder.Jump(flag, m_pixelPhaseLabel);
1175 encoder.Push();
1176 const bool isPerCoarse = true;
1177 EOTRenderTarget(GetR1(), isPerCoarse);
1178 m_hasEOT = true;
1179 }
1180 if (IsLastPhase())
1181 {
1182 CShader::AddEpilogue(ret);
1183 }
1184 }
1185
AddCoarseOutput(CVariable * output,unsigned int index)1186 void CPixelShader::AddCoarseOutput(CVariable* output, unsigned int index)
1187 {
1188 IGC_ASSERT(m_CoarseOutput.find(index) == m_CoarseOutput.end());
1189 m_CoarseOutput[index] = output;
1190 }
1191
GetCoarseInput(unsigned int index,uint16_t vectorSize,VISA_Type type)1192 CVariable* CPixelShader::GetCoarseInput(unsigned int index, uint16_t vectorSize, VISA_Type type)
1193 {
1194 auto it = m_CoarseInput.find(index);
1195 CVariable* coarseInput = nullptr;
1196 if (it == m_CoarseInput.end())
1197 {
1198 coarseInput = GetNewVariable(
1199 numLanes(m_SIMDSize) * vectorSize, type, EALIGN_GRF, "CoarseInput");
1200 m_CoarseInput[index] = coarseInput;
1201 }
1202 else
1203 {
1204 coarseInput = it->second;
1205 }
1206 return coarseInput;
1207 }
1208
SetCoarseoMask(CVariable * oMask)1209 void CPixelShader::SetCoarseoMask(CVariable* oMask)
1210 {
1211 m_CoarseoMask = oMask;
1212 }
1213
GetCoarseMask()1214 CVariable* CPixelShader::GetCoarseMask()
1215 {
1216 if (m_CoarseMaskInput == nullptr)
1217 {
1218 m_CoarseMaskInput = GetNewVariable(
1219 numLanes(m_SIMDSize), ISA_TYPE_F, EALIGN_GRF, "CoarseMaskInput");
1220 }
1221 return m_CoarseMaskInput;
1222 }
1223
GetCoarseParentIndex()1224 CVariable* CPixelShader::GetCoarseParentIndex()
1225 {
1226 return m_CoarseParentIndex;
1227 }
1228
GetCurrentPhaseCounter()1229 CVariable* CPixelShader::GetCurrentPhaseCounter()
1230 {
1231 return m_CurrentPhaseCounter;
1232 }
1233
1234
CompileSIMDSize(SIMDMode simdMode,EmitPass & EP,llvm::Function & F)1235 bool CPixelShader::CompileSIMDSize(SIMDMode simdMode, EmitPass& EP, llvm::Function& F)
1236 {
1237 if (!CompileSIMDSizeInCommon(simdMode))
1238 return false;
1239
1240
1241 CodeGenContext* ctx = GetContext();
1242 if (!ctx->m_retryManager.IsFirstTry())
1243 {
1244 ctx->ClearSIMDInfo(simdMode, EP.m_ShaderDispatchMode);
1245 ctx->SetSIMDInfo(SIMD_RETRY, simdMode, EP.m_ShaderDispatchMode);
1246 }
1247
1248 bool forceSIMD32 =
1249 (ctx->getCompilerOption().forcePixelShaderSIMDMode &
1250 FLAG_PS_SIMD_MODE_FORCE_SIMD32) != 0;
1251 bool forceSIMD16 =
1252 (ctx->getCompilerOption().forcePixelShaderSIMDMode &
1253 FLAG_PS_SIMD_MODE_FORCE_SIMD16) != 0;
1254
1255 // For staged compilation, we try to avoid duplicated compilation for the same SIMD mode
1256 if ((simdMode == SIMDMode::SIMD8 && AvoidDupStage2(8 , ctx->m_CgFlag, ctx->m_StagingCtx)) ||
1257 (simdMode == SIMDMode::SIMD16 && AvoidDupStage2(16, ctx->m_CgFlag, ctx->m_StagingCtx)))
1258 {
1259 return false;
1260 }
1261
1262 if (ctx->PsHighSimdDisable)
1263 {
1264 if (simdMode == SIMDMode::SIMD32)
1265 return false;
1266 }
1267
1268 if (m_HasoStencil && !ctx->platform.supportsStencil(simdMode))
1269 {
1270 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1271 return false;
1272 }
1273 if (m_HasDouble && simdMode != SIMDMode::SIMD8)
1274 {
1275 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1276 return false;
1277 }
1278 if (m_hasDualBlendSource && simdMode != SIMDMode::SIMD8 &&
1279 (m_phase == PSPHASE_PIXEL || ((m_phase != PSPHASE_LEGACY) && (ctx->platform.getWATable().Wa_1409392000 || ctx->platform.getPlatformInfo().eProductFamily == IGFX_ICELAKE))))
1280 {
1281 // Spec restriction CPS multi-phase cannot use SIMD16 with dual source blending
1282 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1283 return false;
1284 }
1285 if (m_phase != PSPHASE_LEGACY &&
1286 simdMode == SIMDMode::SIMD32)
1287 {
1288 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1289 return false;
1290 }
1291
1292 if (GetContext()->platform.hasFusedEU() &&
1293 simdMode == SIMDMode::SIMD32 &&
1294 IsPerSample() && !IsStage1(ctx))
1295 {
1296 //Fused SIMD32 not enabled when dispatch rate is per sample
1297 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1298 return false;
1299 }
1300
1301 if (simdMode == SIMDMode::SIMD16 && EP.m_ShaderDispatchMode == ShaderDispatchMode::NOT_APPLICABLE)
1302 {
1303 if (IsStage1BestPerf(ctx->m_CgFlag, ctx->m_StagingCtx))
1304 {
1305 return true;
1306 }
1307 if (DoSimd16Stage2(ctx->m_StagingCtx))
1308 {
1309 return true;
1310 }
1311
1312 if (IGC_IS_FLAG_ENABLED(ForceBestSIMD))
1313 {
1314 return true;
1315 }
1316
1317 if (forceSIMD16)
1318 {
1319 return true;
1320 }
1321 CShader* simd8Program = m_parent->GetShader(SIMDMode::SIMD8);
1322 if (simd8Program != nullptr && simd8Program->ProgramOutput()->m_scratchSpaceUsedBySpills > 0)
1323 {
1324 ctx->SetSIMDInfo(SIMD_SKIP_REGPRES, simdMode, EP.m_ShaderDispatchMode);
1325 return false;
1326 }
1327 }
1328 if (simdMode == SIMDMode::SIMD32)
1329 {
1330 if (DoSimd32Stage2(ctx->m_StagingCtx))
1331 {
1332 return true;
1333 }
1334
1335 if (forceSIMD32)
1336 {
1337 return true;
1338 }
1339
1340 CShader* simd16Program = m_parent->GetShader(SIMDMode::SIMD16);
1341 if ((simd16Program == nullptr ||
1342 simd16Program->ProgramOutput()->m_programBin == 0 ||
1343 simd16Program->ProgramOutput()->m_scratchSpaceUsedBySpills > 0))
1344 {
1345 ctx->SetSIMDInfo(SIMD_SKIP_REGPRES, simdMode, EP.m_ShaderDispatchMode);
1346 return false;
1347 }
1348
1349 const PixelShaderInfo& psInfo = ctx->getModuleMetaData()->psInfo;
1350
1351 // Disable simd32 compilation on platforms that do not support per-pixel
1352 // dispatch with num samples == 16.
1353 if (psInfo.NumSamples == 16 &&
1354 !ctx->platform.supportSimd32PerPixelPSWithNumSamples16() &&
1355 !IsPerSample())
1356 {
1357 return false;
1358 }
1359
1360 if (psInfo.ForceEnableSimd32) // UMD forced compilation of simd32.
1361 {
1362 return true;
1363 }
1364
1365 if (!ctx->platform.enablePSsimd32())
1366 {
1367 ctx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, EP.m_ShaderDispatchMode);
1368 return false;
1369 }
1370
1371 if (iSTD::BitCount(m_RenderTargetMask) > 1)
1372 {
1373 // don't compile SIMD32 for MRT as we may trash the render cache
1374 ctx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, EP.m_ShaderDispatchMode);
1375 return false;
1376 }
1377
1378 Simd32ProfitabilityAnalysis& PA = EP.getAnalysis<Simd32ProfitabilityAnalysis>();
1379 if (PA.isSimd32Profitable())
1380 {
1381 return true;
1382 }
1383 else
1384 {
1385 ctx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, EP.m_ShaderDispatchMode);
1386 }
1387
1388 if (simd16Program && static_cast<CPixelShader*>(simd16Program)->m_sendStallCycle == 0)
1389 {
1390 // simd16 doesn't have any latency issue, no need to try simd32
1391 ctx->SetSIMDInfo(SIMD_SKIP_STALL, simdMode, EP.m_ShaderDispatchMode);
1392 return false;
1393 }
1394
1395 if (ctx->platform.psSimd32SkipStallHeuristic() && ctx->m_DriverInfo.AlwaysEnableSimd32())
1396 {
1397 return true;
1398 }
1399
1400 if (simd16Program)
1401 {
1402 uint sendStallCycle = static_cast<CPixelShader*>(simd16Program)->m_sendStallCycle;
1403 uint staticCycle = static_cast<CPixelShader*>(simd16Program)->m_staticCycle;
1404 if (sendStallCycle / (float)staticCycle > 0.4)
1405 {
1406 return true;
1407 }
1408 else
1409 {
1410 ctx->SetSIMDInfo(SIMD_SKIP_STALL, simdMode, EP.m_ShaderDispatchMode);
1411 }
1412 }
1413 return false;
1414 }
1415 return true;
1416 }
1417
linkProgram(const SProgramOutput & cps,const SProgramOutput & ps,SProgramOutput & linked)1418 void linkProgram(const SProgramOutput& cps, const SProgramOutput& ps, SProgramOutput& linked)
1419 {
1420 linked.m_unpaddedProgramSize =
1421 cps.m_unpaddedProgramSize + ps.m_unpaddedProgramSize;
1422 linked.m_scratchSpaceUsedByShader =
1423 cps.m_scratchSpaceUsedByShader + ps.m_scratchSpaceUsedByShader;
1424 linked.m_scratchSpaceUsedBySpills =
1425 cps.m_scratchSpaceUsedBySpills + ps.m_scratchSpaceUsedBySpills;
1426 linked.m_scratchSpaceUsedByGtpin =
1427 cps.m_scratchSpaceUsedByGtpin + ps.m_scratchSpaceUsedByGtpin;
1428 linked.m_programSize = iSTD::Align(linked.m_unpaddedProgramSize, 64);
1429 linked.m_programBin = IGC::aligned_malloc(linked.m_programSize, 16);
1430 // Copy coarse phase
1431 memcpy_s(linked.m_programBin,
1432 cps.m_unpaddedProgramSize,
1433 cps.m_programBin,
1434 cps.m_unpaddedProgramSize);
1435 // Copy pixel phase
1436 memcpy_s((char*)linked.m_programBin + cps.m_unpaddedProgramSize,
1437 ps.m_unpaddedProgramSize,
1438 ps.m_programBin,
1439 ps.m_unpaddedProgramSize);
1440 memset((char*)linked.m_programBin + linked.m_unpaddedProgramSize,
1441 0,
1442 linked.m_programSize - linked.m_unpaddedProgramSize);
1443 }
1444
linkCPS(SPixelShaderKernelProgram * output,SPixelShaderKernelProgram & linked,unsigned int numberPhases)1445 void linkCPS(SPixelShaderKernelProgram* output, SPixelShaderKernelProgram& linked, unsigned int numberPhases)
1446 {
1447 SPixelShaderKernelProgram CoarsePhaseOutput = output[0];
1448 SPixelShaderKernelProgram PixelPhaseOutput = output[1];
1449 linked = output[0];
1450
1451 if (CoarsePhaseOutput.simd16.m_scratchSpaceUsedBySpills == 0 &&
1452 CoarsePhaseOutput.simd16.m_programBin != nullptr &&
1453 PixelPhaseOutput.simd16.m_scratchSpaceUsedBySpills == 0 &&
1454 PixelPhaseOutput.simd16.m_programBin != nullptr)
1455 {
1456 linkProgram(CoarsePhaseOutput.simd16, PixelPhaseOutput.simd16, linked.simd16);
1457 }
1458 else
1459 {
1460 linked.simd16.m_programBin = nullptr;
1461 linked.simd16.m_programSize = 0;
1462 }
1463 linkProgram(CoarsePhaseOutput.simd8, PixelPhaseOutput.simd8, linked.simd8);
1464 linked.hasPullBary = true;
1465 linked.renderTargetMask = (CoarsePhaseOutput.renderTargetMask || PixelPhaseOutput.renderTargetMask);
1466 IGC_ASSERT_MESSAGE(numberPhases == 2, "maximum number of phases is 2");
1467 }
1468
CodeGen(PixelShaderContext * ctx)1469 void CodeGen(PixelShaderContext* ctx)
1470 {
1471 Function* coarsePhase = nullptr;
1472 Function* pixelPhase = nullptr;
1473 NamedMDNode* coarseNode = nullptr;
1474 NamedMDNode* pixelNode = nullptr;
1475 MetaDataUtils* pMdUtils = nullptr;
1476 if (!HasSavedIR(ctx))
1477 {
1478 coarseNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
1479 pixelNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
1480 if (coarseNode)
1481 {
1482 coarsePhase = mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0));
1483 }
1484 if (pixelNode)
1485 {
1486 pixelPhase = mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1487 }
1488 pMdUtils = ctx->getMetaDataUtils();
1489 }
1490
1491 bool codegenDone = false;
1492
1493 CShaderProgram::KernelShaderMap coarseShaders;
1494 CShaderProgram::KernelShaderMap pixelShaders;
1495
1496 if (coarsePhase && pixelPhase)
1497 {
1498 // Cancelling staged compilation for multi stage PS.
1499 ctx->m_CgFlag = FLAG_CG_ALL_SIMDS;
1500
1501 //Multi stage PS, need to do separate compiler and link them
1502 unsigned int numStage = 2;
1503 PSSignature signature;
1504 FunctionInfoMetaDataHandle coarseFI, pixelFI;
1505 coarseFI = pMdUtils->getFunctionsInfoItem(coarsePhase);
1506 pixelFI = pMdUtils->getFunctionsInfoItem(pixelPhase);
1507
1508 for (unsigned int i = 0; i < numStage; i++)
1509 {
1510 Function* phaseFunc = (i == 0) ? coarsePhase : pixelPhase;
1511 FunctionInfoMetaDataHandle phaseFI = (i == 0) ? coarseFI : pixelFI;
1512 CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1513
1514 pMdUtils->clearFunctionsInfo();
1515 pMdUtils->setFunctionsInfoItem(phaseFunc, phaseFI);
1516 pMdUtils->save(phaseFunc->getContext());
1517 CodeGen(ctx, shaders, &signature);
1518
1519 // Read the phase function from metadata again as it could be changed in the PushAnalysis pass
1520 if (i == 0)
1521 {
1522 coarseNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_COARSE_PHASE);
1523 }
1524 else
1525 {
1526 pixelNode = ctx->getModule()->getNamedMetadata(NAMED_METADATA_PIXEL_PHASE);
1527 }
1528 }
1529
1530 codegenDone = true;
1531
1532
1533 for (unsigned int i = 0; i < numStage; i++)
1534 {
1535 Function* phaseFunc = (i == 0) ?
1536 mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0)) :
1537 mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1538
1539 CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1540 CPixelShader* simd8Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD8));
1541 CPixelShader* simd16Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD16));
1542 CPixelShader* simd32Shader = static_cast<CPixelShader*>(shaders[phaseFunc]->GetShader(SIMDMode::SIMD32));
1543 if (!((simd8Shader && simd8Shader->ProgramOutput()->m_programBin) ||
1544 (simd16Shader && simd16Shader->ProgramOutput()->m_programBin) ||
1545 (simd32Shader && simd32Shader->ProgramOutput()->m_programBin)
1546 ))
1547 {
1548 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD8);
1549 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD16);
1550 shaders[phaseFunc]->DeleteShader(SIMDMode::SIMD32);
1551 if (i == 0)
1552 {
1553 delete shaders[coarsePhase];
1554 coarsePhase = nullptr;
1555 }
1556 else
1557 {
1558 delete shaders[pixelPhase];
1559 pixelPhase = nullptr;
1560 }
1561 }
1562 }
1563
1564 if (coarsePhase && pixelPhase)
1565 {
1566 SPixelShaderKernelProgram outputs[2];
1567 memset(&outputs, 0, 2 * sizeof(SPixelShaderKernelProgram));
1568
1569 for (unsigned int i = 0; i < numStage; i++)
1570 {
1571 Function* phaseFunc = (i == 0) ?
1572 mdconst::dyn_extract<Function>(coarseNode->getOperand(0)->getOperand(0)) :
1573 mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1574
1575 CShaderProgram::KernelShaderMap& shaders = (i == 0) ? coarseShaders : pixelShaders;
1576
1577 shaders[phaseFunc]->FillProgram(&outputs[i]);
1578 COMPILER_SHADER_STATS_PRINT(shaders[phaseFunc]->m_shaderStats, ShaderType::PIXEL_SHADER, ctx->hash, "");
1579 COMPILER_SHADER_STATS_SUM(ctx->m_sumShaderStats, shaders[phaseFunc]->m_shaderStats, ShaderType::PIXEL_SHADER);
1580 COMPILER_SHADER_STATS_DEL(shaders[phaseFunc]->m_shaderStats);
1581 delete shaders[phaseFunc];
1582 }
1583
1584 linkCPS(outputs, ctx->programOutput, numStage);
1585 // Kernels allocated in CISABuilder.cpp (Compile())
1586 // are freed in CompilerOutputOGL.hpp (DeleteShaderCompilerOutputOGL())
1587 // in case of CPS multistage PS they are separated.
1588 // Need to free original kernels here as DeleteShaderCompilerOutputOGL()
1589 // will clear new allocations for separated phases in this case.
1590 for (unsigned int i = 0; i < numStage; i++)
1591 {
1592 outputs[i].simd8.Destroy();
1593 outputs[i].simd16.Destroy();
1594 outputs[i].simd32.Destroy();
1595 }
1596 }
1597 }
1598
1599 if (!(coarsePhase && pixelPhase))
1600 {
1601 CShaderProgram::KernelShaderMap shaders;
1602 Function* pFunc = nullptr;
1603
1604 if (!codegenDone)
1605 {
1606 // Single PS
1607 CodeGen(ctx, shaders);
1608 pFunc = getUniqueEntryFunc(ctx->getMetaDataUtils(), ctx->getModuleMetaData());
1609 }
1610 else
1611 {
1612 shaders = coarsePhase ? coarseShaders : pixelShaders;
1613 pFunc = coarsePhase ? coarsePhase : pixelPhase;
1614 }
1615
1616 // gather data to send back to the driver
1617 shaders[pFunc]->FillProgram(&ctx->programOutput);
1618 COMPILER_SHADER_STATS_PRINT(shaders[pFunc]->m_shaderStats, ShaderType::PIXEL_SHADER, ctx->hash, "");
1619 COMPILER_SHADER_STATS_SUM(ctx->m_sumShaderStats, shaders[pFunc]->m_shaderStats, ShaderType::PIXEL_SHADER);
1620 COMPILER_SHADER_STATS_DEL(shaders[pFunc]->m_shaderStats);
1621 delete shaders[pFunc];
1622 }
1623
1624 }
1625
CreatePassThroughVar()1626 void CPixelShader::CreatePassThroughVar()
1627 {
1628 CodeGenContext* ctx = GetContext();
1629 NamedMDNode* pixelNode = ctx->getModule()->getNamedMetadata("pixel_phase");
1630 if (!pixelNode)
1631 {
1632 // if there is no pixel phase we have nothing to do
1633 return;
1634 }
1635 IGC_ASSERT(nullptr != GetR1());
1636 encoder.MarkAsOutput(GetR1());
1637 Function* pixelPhase = mdconst::dyn_extract<Function>(pixelNode->getOperand(0)->getOperand(0));
1638 for (auto BB = pixelPhase->begin(), BE = pixelPhase->end(); BB != BE; ++BB)
1639 {
1640 llvm::BasicBlock* pLLVMBB = &(*BB);
1641 llvm::BasicBlock::InstListType& instructionList = pLLVMBB->getInstList();
1642 for (auto I = instructionList.begin(), E = instructionList.end(); I != E; ++I)
1643 {
1644 if (GenIntrinsicInst * intr = dyn_cast<GenIntrinsicInst>(I))
1645 {
1646 GenISAIntrinsic::ID IID = intr->getIntrinsicID();
1647 if (IID == GenISAIntrinsic::GenISA_DCL_inputVec)
1648 {
1649 unsigned int setupIndex =
1650 (uint)llvm::cast<llvm::ConstantInt>(intr->getOperand(0))->getZExtValue();
1651 CVariable* input = GetInputDelta(setupIndex);
1652 encoder.MarkAsOutput(input);
1653 }
1654 else if (IID == GenISAIntrinsic::GenISA_SampleOffsetX)
1655 {
1656 CVariable* offset = GetSampleOffsetX();
1657 encoder.MarkAsOutput(offset);
1658 }
1659 else if (IID == GenISAIntrinsic::GenISA_SampleOffsetY)
1660 {
1661 CVariable* offset = GetSampleOffsetY();
1662 encoder.MarkAsOutput(offset);
1663 }
1664 else if (IID == GenISAIntrinsic::GenISA_DCL_SystemValue)
1665 {
1666 SGVUsage usage = (SGVUsage)llvm::cast<llvm::ConstantInt>(intr->getOperand(0))->getZExtValue();
1667 if (usage == POSITION_Z || usage == POSITION_W)
1668 {
1669 CVariable* deltas = GetZWDelta();
1670 encoder.MarkAsOutput(deltas);
1671 }
1672 }
1673 }
1674 }
1675 }
1676 GetDispatchSignature().inputOffset.resize(setup.size());
1677 }
1678
ExtractGlobalVariables()1679 void CPixelShader::ExtractGlobalVariables()
1680 {
1681 llvm::Module* module = GetContext()->getModule();
1682 llvm::GlobalVariable* pGlobal = module->getGlobalVariable("SamplerCount");
1683 if (pGlobal)
1684 {
1685 auto samplerCount = int_cast<unsigned int>(llvm::cast<llvm::ConstantInt>(pGlobal->getInitializer())->getZExtValue());
1686 m_samplerCount = samplerCount;
1687 }
1688 }
1689
IsReturnBlock(llvm::BasicBlock * bb)1690 bool CPixelShader::IsReturnBlock(llvm::BasicBlock* bb)
1691 {
1692 return llvm::isa<llvm::ReturnInst>(bb->getTerminator());
1693 }
1694
IsLastRTWrite(llvm::GenIntrinsicInst * inst)1695 bool CPixelShader::IsLastRTWrite(llvm::GenIntrinsicInst* inst)
1696 {
1697 bool isLastRT;
1698 isLastRT = llvm::isa<llvm::ReturnInst>(inst->getNextNode());
1699
1700 return isLastRT && IsLastPhase() && GetPhase() != PSPHASE_PIXEL;
1701 }
1702
LowerPSInput()1703 bool CPixelShader::LowerPSInput()
1704 {
1705 return (m_SIMDSize == SIMDMode::SIMD16 || !m_Platform->supportMixMode());
1706 }
1707
IsInterpolationLinear(e_interpolation mode)1708 bool CPixelShader::IsInterpolationLinear(e_interpolation mode)
1709 {
1710 return mode == EINTERPOLATION_LINEAR ||
1711 mode == EINTERPOLATION_LINEARCENTROID ||
1712 mode == EINTERPOLATION_LINEARSAMPLE ||
1713 mode == EINTERPOLATION_LINEARNOPERSPECTIVE ||
1714 mode == EINTERPOLATION_LINEARNOPERSPECTIVECENTROID ||
1715 mode == EINTERPOLATION_LINEARNOPERSPECTIVESAMPLE;
1716 }
1717
emitPSInputLowering()1718 void CPixelShader::emitPSInputLowering()
1719 {
1720 auto iterSetupIndex = loweredSetupIndexes.begin();
1721 auto iterSetupIndexEnd = loweredSetupIndexes.end();
1722
1723
1724 if (LowerPSInput())
1725 {
1726 for (; iterSetupIndex != iterSetupIndexEnd; ++iterSetupIndex)
1727 {
1728 bool combineTwoDelta = false;
1729 auto nextElemt = iterSetupIndex;
1730 nextElemt++;
1731 if (nextElemt != iterSetupIndexEnd && *iterSetupIndex % 2 == 0 && *iterSetupIndex + 1 == *nextElemt)
1732 {
1733 combineTwoDelta = true;
1734 }
1735 unsigned int index = *iterSetupIndex;
1736 CVariable* inputVar = GetInputDelta(index, combineTwoDelta);
1737 CVariable* inputVarLowered = GetInputDeltaLowered(index);
1738 if (encoder.IsCodePatchCandidate())
1739 {
1740 encoder.SetPayloadSectionAsPrimary();
1741 AddPatchTempSetup(inputVarLowered);
1742 }
1743
1744 encoder.SetSrcRegion(0, 1, 1, 0);
1745 encoder.SetUniformSIMDSize(combineTwoDelta ? SIMDMode::SIMD8 : SIMDMode::SIMD4);
1746 encoder.SetNoMask();
1747 encoder.Cast(inputVarLowered, inputVar);
1748 encoder.Push();
1749 if (encoder.IsCodePatchCandidate())
1750 {
1751 encoder.SetPayloadSectionAsSecondary();
1752 }
1753 if (combineTwoDelta)
1754 {
1755 ++iterSetupIndex;
1756 }
1757 }
1758
1759 for (uint i = EINTERPOLATION_LINEAR; i < NUMBER_EINTERPOLATION; ++i)
1760 {
1761 if (m_ModeUsedHalf.test(i))
1762 {
1763 CVariable* baryVar = GetBaryReg((e_interpolation)i);
1764 CVariable* baryVarLowered = GetBaryRegLoweredHalf((e_interpolation)i);
1765
1766 if (encoder.IsCodePatchCandidate())
1767 {
1768 encoder.SetPayloadSectionAsPrimary();
1769 }
1770 for (uint8_t i = 0; i < m_numberInstance; ++i)
1771 {
1772 encoder.SetSecondHalf(i == 1);
1773
1774 // mov SIMD8 U1/barry(0, 0) in to tmpU(0, 0)
1775 // mov (8) r1.0<1>:hf r2.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$27:%30
1776 encoder.SetSimdSize(SIMDMode::SIMD8);
1777 encoder.SetNoMask();
1778 encoder.Cast(baryVarLowered, baryVar);
1779 encoder.Push();
1780
1781 if (m_SIMDSize == SIMDMode::SIMD16)
1782 {
1783 // mov SIMD8 U2/barry(2, 0) in to tmpU(0, 8)
1784 // mov (8) r1.8<1>:hf r4.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$28:%31
1785 encoder.SetSrcSubVar(0, 2);
1786 encoder.SetDstSubReg(8);
1787 encoder.SetSimdSize(SIMDMode::SIMD8);
1788 encoder.SetNoMask();
1789 encoder.Cast(baryVarLowered, baryVar);
1790 encoder.Push();
1791 }
1792
1793 // mov SIMD8 V1/barry(1, 0) in to tmpV(0, 0)
1794 // mov (8) r12.0<1>:hf r3.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$29:%32
1795 encoder.SetSrcSubVar(0, 1);
1796 encoder.SetSimdSize(SIMDMode::SIMD8);
1797 encoder.SetNoMask();
1798 encoder.SetDstSubReg(numLanes(m_SIMDSize));
1799 encoder.Cast(baryVarLowered, baryVar);
1800 encoder.Push();
1801
1802 if (m_SIMDSize == SIMDMode::SIMD16)
1803 {
1804 // mov SIMD8 V1/barry(3, 0) in to tmpV(0, 0)
1805 // mov (8) r12.8<1>:hf r5.0<8;8,1>:f {Align1, Q1, NoMask} // #??:$30:%33
1806 encoder.SetSrcSubVar(0, 3);
1807 encoder.SetDstSubReg(8);
1808 encoder.SetSimdSize(SIMDMode::SIMD8);
1809 encoder.SetNoMask();
1810 encoder.SetDstSubVar(1);
1811 encoder.Cast(baryVarLowered, baryVar);
1812 encoder.Push();
1813 }
1814 encoder.SetSecondHalf(false);
1815 }
1816 if (encoder.IsCodePatchCandidate())
1817 {
1818 encoder.SetPayloadSectionAsSecondary();
1819 }
1820 }
1821 }
1822 }
1823 }
1824
MarkConstantInterpolation(unsigned int index)1825 void CPixelShader::MarkConstantInterpolation(unsigned int index)
1826 {
1827 m_ConstantInterpolationMask |= BIT(index / 4);
1828 }
1829
1830 // Take PS attribute and return active components within, encoded as HW expects.
GetActiveComponents(uint attribute) const1831 USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT CPixelShader::GetActiveComponents(uint attribute) const
1832 {
1833 USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT result =
1834 USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_DISABLED;
1835 for (auto it = m_SetupIndicesUsed.lower_bound(attribute * 4);
1836 it != m_SetupIndicesUsed.end(); ++it)
1837 {
1838 if (attribute != (*it / 4)) break;
1839 switch (*it % 4)
1840 {
1841 case 0:
1842 case 1:
1843 result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XY;
1844 break;
1845 case 2:
1846 result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZ;
1847 break;
1848 case 3:
1849 result = USC::GFX3DSTATE_SF_ATTRIBUTE_ACTIVE_COMPONENT_XYZW;
1850 break;
1851 }
1852 }
1853 return result;
1854 }
1855
1856 // this method must be run only after CShader::PreAnalysisPass() was run
MapPushedInputs()1857 void CPixelShader::MapPushedInputs()
1858 {
1859 // first gather setup index info
1860 for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
1861 {
1862 m_SetupIndicesUsed.insert(I->second.index);
1863 m_MaxSetupIndex = std::max(I->second.index, m_MaxSetupIndex);
1864 }
1865 // then map using proper indexing
1866 for (auto I = pushInfo.inputs.begin(), E = pushInfo.inputs.end(); I != E; I++)
1867 {
1868 // We need to map the value associated with the value pushed to a physical register
1869 if (I->second.interpolationMode == EINTERPOLATION_CONSTANT)
1870 {
1871 this->MarkConstantInterpolation(I->second.index);
1872 }
1873 CVariable* var = GetSymbol(m_argListCache[I->second.argIndex]);
1874 AddSetup(getSetupIndex(I->second.index), var);
1875 }
1876 }
1877
getSetupIndex(uint inputIndex)1878 int CPixelShader::getSetupIndex(uint inputIndex)
1879 {
1880 {
1881 return inputIndex;
1882 }
1883 }
1884
1885 } // namespace IGC
1886