1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 //===----------------------------------------------------------------------===//
10 //
11 // A helper (static) class that returns a mapping from message generating
12 // intrinsic (e.g. sample, load, urb_write) arguments to their respective
13 // positions in the payload message.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Compiler/CISACodeGen/PayloadMapping.hpp"
18 #include "Compiler/CodeGenPublic.h"
19 #include "Compiler/CISACodeGen/Platform.hpp"
20 #include "Compiler/CISACodeGen/helper.h"
21 #include "common/LLVMWarningsPush.hpp"
22 #include <llvm/IR/Function.h>
23 #include <llvm/IR/Instruction.h>
24 #include <llvm/IR/Instructions.h>
25 #include <llvm/IR/IntrinsicInst.h>
26 #include <llvm/IR/BasicBlock.h>
27 #include "common/LLVMWarningsPop.hpp"
28 #include "Probe/Assertion.h"
29 
30 using namespace llvm;
31 using namespace IGC;
32 
33 ///
GetSupremumOfNonHomogeneousPart(const Instruction * inst1,const Instruction * inst2)34 const Instruction* PayloadMapping::GetSupremumOfNonHomogeneousPart(
35     const Instruction* inst1,
36     const Instruction* inst2)
37 {
38     const GenIntrinsicInst* intrinsicInst1 = dyn_cast<GenIntrinsicInst>(inst1);
39     const GenIntrinsicInst* intrinsicInst2 = dyn_cast<GenIntrinsicInst>(inst2);
40 
41     IGC_ASSERT(intrinsicInst1);
42     IGC_ASSERT(intrinsicInst2);
43     IGC_ASSERT(intrinsicInst1->getIntrinsicID() == GenISAIntrinsic::GenISA_RTWrite);
44     IGC_ASSERT(intrinsicInst2->getIntrinsicID() == GenISAIntrinsic::GenISA_RTWrite);
45 
46     switch (intrinsicInst1->getIntrinsicID())
47     {
48     case GenISAIntrinsic::GenISA_RTWrite:
49         return GetSupremumOfNonHomogeneousPart_RTWrite(inst1, inst2);
50     default:
51         IGC_ASSERT_MESSAGE(0, "should not be called at all for other intrinsics");
52         return nullptr;
53     }
54 }
55 
56 
57 /// Helper method.
58 /// compare rtwi1 and rtwi2 w.r.t. oMask
59 /// biased towards rtwi1:
60 /// X X -> return rtwi1
61 /// X 2 -> return rtwi2
62 /// 1 X -> return rtwi1
63 /// 1 2 -> return rtwi1
CompareOnMask(const RTWritIntrinsic * rtwi1,const RTWritIntrinsic * rtwi2)64 static const Instruction* CompareOnMask(
65     const RTWritIntrinsic* rtwi1,
66     const RTWritIntrinsic* rtwi2)
67 {
68     if (rtwi1->hasMask() && rtwi2->hasMask())
69     {
70         return rtwi1;
71     }
72     /// rtwi1 S0 < rtwi2 S0
73     else if (!rtwi1->hasMask() && rtwi2->hasMask())
74     {
75         return rtwi2;
76     }
77     /// rtwi1 S0 > rtwi2 S0
78     else if (rtwi1->hasMask() && !rtwi2->hasMask())
79     {
80         return rtwi1;
81     }
82 
83     return rtwi1;
84 }
85 
86 ///
GetSupremumOfNonHomogeneousPart_RTWrite(const Instruction * inst1,const Instruction * inst2)87 const Instruction* PayloadMapping::GetSupremumOfNonHomogeneousPart_RTWrite(
88     const Instruction* inst1,
89     const Instruction* inst2)
90 {
91     IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst1));
92     IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst2));
93     ModuleMetaData* modMD = m_CodeGenContext->getModuleMetaData();
94     const RTWritIntrinsic* rtwi1 = cast<RTWritIntrinsic>(inst1);
95     const RTWritIntrinsic* rtwi2 = cast<RTWritIntrinsic>(inst2);
96 
97     /// rtwi1 S0 == rtwi2 S0
98     if (RTWriteHasSource0Alpha(rtwi1, modMD) && RTWriteHasSource0Alpha(rtwi2, modMD))
99     {
100         return CompareOnMask(rtwi1, rtwi2);
101     }
102     /// rtwi1 X < rtwi2 S0
103     else if (!RTWriteHasSource0Alpha(rtwi1, modMD) && RTWriteHasSource0Alpha(rtwi2, modMD))
104     {
105         return CompareOnMask(rtwi2, rtwi1) == rtwi2 ? rtwi2 : nullptr;
106     }
107     /// rtwi1 S0 > rtwi2 S0
108     else if (RTWriteHasSource0Alpha(rtwi1, m_CodeGenContext->getModuleMetaData()) && !RTWriteHasSource0Alpha(rtwi2, modMD))
109     {
110         return CompareOnMask(rtwi1, rtwi2) == rtwi1 ? rtwi1 : nullptr;
111     }
112 
113     return nullptr;
114 }
115 
116 
117 /// Return the 'reserved' left offset of modeled payload (in bytes).
118 /// By default, homogeneous payload messages returns 0, but if one
119 /// wants to model the 'reserved' part of the payload (which cannot
120 /// be coalesced due to non-homogeneous) this method should handle
121 /// that intrinsic and return the offset (in units of bytes).
GetLeftReservedOffset(const Instruction * inst,SIMDMode simdMode)122 int PayloadMapping::GetLeftReservedOffset(const Instruction* inst, SIMDMode simdMode)
123 {
124     const GenIntrinsicInst* intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
125     IGC_ASSERT(intrinsicInst);
126 
127     switch (intrinsicInst->getIntrinsicID())
128     {
129     case GenISAIntrinsic::GenISA_RTWrite:
130         IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst));
131         return GetLeftReservedOffset_RTWrite(cast<RTWritIntrinsic>(inst), simdMode);
132     case GenISAIntrinsic::GenISA_RTDualBlendSource:
133         IGC_ASSERT(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst));
134         return GetLeftReservedOffset_RTWrite(cast<RTDualBlendSourceIntrinsic>(inst), simdMode);
135     default:
136         return 0;
137     }
138 }
139 
140 ///
141 template <typename T>
GetLeftReservedOffset_RTWrite(const T * inst,SIMDMode simdMode)142 int PayloadMapping::GetLeftReservedOffset_RTWrite(const T* inst, SIMDMode simdMode)
143 {
144     int offset = 0;
145 
146     if (inst->hasMask())
147     {
148         //Output mask is always fixed size regardless of SIMD mode.
149         offset += m_CodeGenContext->platform.getGRFSize();
150     }
151 
152     if (RTWriteHasSource0Alpha(inst, m_CodeGenContext->getModuleMetaData()))
153     {
154         IGC_ASSERT(simdMode == SIMDMode::SIMD8 || simdMode == SIMDMode::SIMD16);
155         int multiplier = inst->getSource0Alpha()->getType()->isHalfTy() ? 1 : 2;
156         if (simdMode == SIMDMode::SIMD8)
157         {
158             offset += m_CodeGenContext->platform.getGRFSize(); //always 1GRF, regardless of HF
159         }
160         else if (simdMode == SIMDMode::SIMD16)
161         {
162             offset += m_CodeGenContext->platform.getGRFSize() * multiplier; // 2GRFs normal precision, 1GRF HF precision
163         }
164     }
165 
166     return offset;
167 }
168 
169 ///
GetRightReservedOffset(const Instruction * inst,SIMDMode simdMode)170 int PayloadMapping::GetRightReservedOffset(const Instruction* inst, SIMDMode simdMode)
171 {
172     const GenIntrinsicInst* intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
173     IGC_ASSERT(intrinsicInst);
174 
175     switch (intrinsicInst->getIntrinsicID())
176     {
177     case GenISAIntrinsic::GenISA_RTWrite:
178         IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst));
179         return GetRightReservedOffset_RTWrite(cast<RTWritIntrinsic>(inst), simdMode);
180     case GenISAIntrinsic::GenISA_RTDualBlendSource:
181         IGC_ASSERT(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst));
182         return GetRightReservedOffset_RTWrite(cast<RTDualBlendSourceIntrinsic>(inst), simdMode);
183     default:
184         return 0;
185     }
186 }
187 
188 template <typename T>
GetRightReservedOffset_RTWrite(const T * inst,SIMDMode simdMode)189 int PayloadMapping::GetRightReservedOffset_RTWrite(const T* inst, SIMDMode simdMode)
190 {
191     int offset = 0;
192 
193     if (inst->hasStencil())
194     {
195         IGC_ASSERT(m_CodeGenContext->platform.supportsStencil(simdMode));
196         offset += m_CodeGenContext->platform.getGRFSize();
197     }
198 
199     if (inst->hasDepth())
200     {
201 
202         IGC_ASSERT(simdMode == SIMDMode::SIMD8 || simdMode == SIMDMode::SIMD16);
203 
204         //Depth always has normal precision, regardless of the rest of the fields.
205         if (simdMode == SIMDMode::SIMD8)
206         {
207             offset += m_CodeGenContext->platform.getGRFSize(); //always 1GRF, 32 bytes
208         }
209         else if (simdMode == SIMDMode::SIMD16)
210         {
211             offset += m_CodeGenContext->platform.getGRFSize() * 2; // 2GRFs normal precision, 64 bytes
212         }
213     }
214 
215     return offset;
216 }
217 
218 /// Returns true if a given intrinsic (and its specific configuration) will produce
219 /// non-homogeneous payload (the one in which some elements are not 'expanded' with simd mode)
220 /// e.g. RT writes with oMask
HasNonHomogeneousPayloadElements(const Instruction * inst)221 bool PayloadMapping::HasNonHomogeneousPayloadElements(const Instruction* inst)
222 {
223     const GenIntrinsicInst* intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
224     IGC_ASSERT(intrinsicInst);
225 
226     switch (intrinsicInst->getIntrinsicID())
227     {
228     case GenISAIntrinsic::GenISA_RTWrite:
229         IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst));
230         return HasNonHomogeneousPayloadElements_RTWrite(cast<RTWritIntrinsic>(inst));
231     case GenISAIntrinsic::GenISA_RTDualBlendSource:
232         IGC_ASSERT(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst));
233         return HasNonHomogeneousPayloadElements_RTWrite(cast<RTDualBlendSourceIntrinsic>(inst));
234     default:
235         return false;
236     }
237 
238 }
239 
240 ///
241 template <typename T>
HasNonHomogeneousPayloadElements_RTWrite(const T * inst)242 bool PayloadMapping::HasNonHomogeneousPayloadElements_RTWrite(const T* inst)
243 {
244     if (inst->hasMask())
245     {
246         return true;
247     }
248     if (RTWriteHasSource0Alpha(inst, m_CodeGenContext->getModuleMetaData()))
249     {
250         return true;
251     }
252     if (inst->hasDepth())
253     {
254         return true;
255     }
256     if (inst->hasStencil())
257     {
258         return true;
259     }
260 
261     return false;
262 }
263 
264 /// \brief
IsUndefOrZeroImmediate(const Value * value)265 bool PayloadMapping::IsUndefOrZeroImmediate(const Value* value)
266 {
267     if (llvm::isa<llvm::UndefValue>(value))
268     {
269         return true;
270     }
271 
272     if (const llvm::ConstantInt * CInt = llvm::dyn_cast<llvm::ConstantInt>(value)) {
273         if (CInt->getZExtValue() == 0) {
274             return true;
275         }
276     }
277 
278     if (const llvm::ConstantFP * CFP = llvm::dyn_cast<llvm::ConstantFP>(value))
279     {
280         APInt api = CFP->getValueAPF().bitcastToAPInt();
281         if (api.getZExtValue() == 0)
282         {
283             return true;
284         }
285     }
286     return false;
287 }
288 
289 /// Return the number of payload elements that this instruction will
290 /// generate.
GetNumPayloadElements(const Instruction * inst)291 uint PayloadMapping::GetNumPayloadElements(const Instruction* inst)
292 {
293     const GenIntrinsicInst* intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
294     IGC_ASSERT(intrinsicInst);
295     if (const SampleIntrinsic * sampleInst = dyn_cast<SampleIntrinsic>(inst))
296     {
297         return GetNumPayloadElements_Sample(sampleInst);
298     }
299     else if (const SamplerLoadIntrinsic * sampleInst = dyn_cast<SamplerLoadIntrinsic>(inst))
300     {
301         return GetNumPayloadElements_LDMS(sampleInst);
302     }
303 
304     switch (intrinsicInst->getIntrinsicID())
305     {
306     case GenISAIntrinsic::GenISA_URBWrite:
307         return GetNumPayloadElements_URBWrite(intrinsicInst);
308     case GenISAIntrinsic::GenISA_RTWrite:
309         return GetNumPayloadElements_RTWrite(intrinsicInst);
310     case GenISAIntrinsic::GenISA_RTDualBlendSource:
311         return GetNumPayloadElements_DSRTWrite(intrinsicInst);
312     default:
313         break;
314     }
315 
316     IGC_ASSERT(0);
317     return 0;
318 }
319 
320 /// \brief
GetNumPayloadElements_LDMS(const GenIntrinsicInst * inst)321 uint PayloadMapping::GetNumPayloadElements_LDMS(const GenIntrinsicInst* inst)
322 {
323     const uint numOperands = inst->getNumOperands();
324 
325     //Subtract the offsets, and texture resource, lod to get
326     //the number of texture coordinates and index to texture source
327     uint numSources = numOperands - 5;
328 
329     for (uint i = numSources - 1; i > 0; i--)
330     {
331         if (IsUndefOrZeroImmediate(inst->getOperand(i)))
332         {
333             numSources--;
334         }
335         else
336         {
337             break;
338         }
339     }
340 
341     return numSources;
342 }
343 
344 /// \brief Adjusts the number of sources for a sampler, based on a sampler type.
ValidateNumberofSources(EOPCODE opCode,bool isCube,uint & numberofSrcs)345 void PayloadMapping::ValidateNumberofSources(EOPCODE opCode, bool isCube, uint& numberofSrcs)
346 {
347     switch (opCode)
348     {
349     case llvm_sampleptr:
350     case llvm_sample_killpix:
351     case llvm_lodptr:
352     {
353     }
354     break;
355 
356     case llvm_sample_bptr:
357     case llvm_sample_cptr:
358     case llvm_sample_lptr:
359     {
360 
361         switch (numberofSrcs)
362         {
363         case 1:
364             numberofSrcs++;
365             break;
366         }
367     }
368     break;
369 
370     case llvm_sample_dptr:
371     {
372 
373         switch (numberofSrcs)
374         {
375         case 1:
376             numberofSrcs++;
377         case 2:
378             numberofSrcs++;
379             break;
380         case 5:
381             numberofSrcs++;
382             break;
383         case 8:
384             numberofSrcs++;
385             break;
386         }
387     }
388     break;
389     case llvm_sample_dcptr:
390     {
391 
392         switch (numberofSrcs)
393         {
394         case 2:
395             numberofSrcs++;
396         case 3:
397             numberofSrcs++;
398             break;
399         case 5:
400             numberofSrcs++;
401         case 6:
402             numberofSrcs++;
403             break;
404         case 8:
405             numberofSrcs++;
406         case 9:
407             numberofSrcs++;
408             break;
409         }
410     }
411     break;
412 
413     case llvm_sample_lcptr:
414     case llvm_sample_bcptr:
415     {
416 
417         switch (numberofSrcs)
418         {
419         case 1:
420             numberofSrcs++;
421         case 2:
422             numberofSrcs++;
423             break;
424         }
425     }
426     break;
427     default:
428         break;
429     }
430 
431 }
432 
433 /// \brief Provides the total number of payload elements for a sample.
434 ///
435 /// Does not consider 'peeling' of the first element for split-send.
436 /// The peeling itself is supposed to be done by a wrapper method.
GetNonAdjustedNumPayloadElements_Sample(const SampleIntrinsic * inst)437 uint PayloadMapping::GetNonAdjustedNumPayloadElements_Sample(const SampleIntrinsic* inst)
438 {
439     const unsigned int  numOperands = inst->getNumOperands();
440     unsigned int numSources = numOperands - 6;
441 
442     if (inst->IsLODInst())
443     {
444         //Subtract resource and sampler sources to get
445         //the number of texture coordinates and index to texture source
446         numSources = numOperands - 3;
447     }
448 
449     //Check for valid number of sources from the end of the list
450     for (uint i = (numSources - 1); i >= 1; i--)
451     {
452         if (IsUndefOrZeroImmediate(inst->getOperand(i)))
453         {
454             numSources--;
455         }
456         else
457         {
458             break;
459         }
460     }
461 
462     //temp solution to send valid sources but having 0 as their values.
463     EOPCODE opCode = GetOpCode(inst);
464     llvm::Type* cubeTextureType = GetResourceDimensionType(*inst->getModule(), RESOURCE_DIMENSION_TYPE::DIM_CUBE_TYPE);
465     llvm::Type* cubeArrayTextureType = GetResourceDimensionType(*inst->getModule(), RESOURCE_DIMENSION_TYPE::DIM_CUBE_ARRAY_TYPE);
466     llvm::Type* textureType = inst->getTextureValue()->getType()->getPointerElementType();
467     bool isCube = (textureType == cubeTextureType || textureType == cubeArrayTextureType);
468     ValidateNumberofSources(opCode, isCube, numSources);
469 
470     if (IsZeroLOD(inst))
471     {
472         numSources--;
473     }
474 
475     return numSources;
476 }
477 
478 /// \brief Gets the adjusted number of payload elements for sampler.
479 ///
480 /// Whether to peel is determined by the target GEN architecture.
GetNumPayloadElements_Sample(const SampleIntrinsic * inst)481 uint PayloadMapping::GetNumPayloadElements_Sample(const SampleIntrinsic* inst)
482 {
483     unsigned int numSources = GetNonAdjustedNumPayloadElements_Sample(inst);
484     return numSources;
485 }
486 
487 /// \brief Determines whether sample instruction has LOD of zero
IsZeroLOD(const SampleIntrinsic * inst)488 bool PayloadMapping::IsZeroLOD(const SampleIntrinsic* inst)
489 {
490     const CPlatform& platform = m_CodeGenContext->platform;
491 
492     if (platform.supportSampleAndLd_lz() && !platform.WaDisableSampleLz())
493     {
494         if (const SampleIntrinsic * sampleInst = dyn_cast<SampleIntrinsic>(inst))
495         {
496             return sampleInst->ZeroLOD();
497         }
498     }
499     return false;
500 }
501 
502 ///
GetNumPayloadElements_URBWrite(const GenIntrinsicInst * inst)503 uint PayloadMapping::GetNumPayloadElements_URBWrite(const GenIntrinsicInst* inst)
504 {
505 
506     //FIXME: this was copied from EmitPass::emitURBWrite.
507     //find a way to unify this, so not to cause sudden troubles if it is
508     //modified there
509 
510     uint size = 0;
511     if (llvm::isa<llvm::ConstantInt>(inst->getOperand(1)))
512     {
513         uint mask = (uint)llvm::cast<llvm::ConstantInt>(inst->getOperand(1))->getZExtValue();
514         size = iSTD::bsr(mask) + 1;
515     }
516     else
517     {
518         // All 4 elements will be send - we don't know which are masked out.
519         size = 4;
520     }
521 
522     return size;
523 }
524 
525 /// \brief Returns the number of homogeneous slots in payload.
526 ///
527 /// RT write complete format: s0Alpha oM [R G B A] sZ oS
528 /// Only [R G B A] part forms the homogeneous sequence. (Though one can
529 /// also include s0Alpha in the homogeneous part if oM is not present, but that
530 /// seemed not to give good results in terms of coalescing efficiency).
GetNumPayloadElements_RTWrite(const GenIntrinsicInst * inst)531 uint PayloadMapping::GetNumPayloadElements_RTWrite(const GenIntrinsicInst* inst)
532 {
533     const int numElements = 4; //4 colors, always form homogeneous 'part'.
534     return numElements;
535 }
536 
537 ///
GetPayloadElementToValueMapping_URBWrite(const GenIntrinsicInst * inst,uint index)538 Value* PayloadMapping::GetPayloadElementToValueMapping_URBWrite(const GenIntrinsicInst* inst, uint index)
539 {
540     //First 3 operands are immediates with specific meaning (not part of payload)
541     return inst->getOperand(index + 2);
542 }
543 
544 /// \brief Gets non-adjusted mapping from the payload element index to intrinsic value(argument) index.
545 ///
546 /// Peeling is not applied here.
GetNonAdjustedPayloadElementIndexToValueIndexMapping_sample(const SampleIntrinsic * inst,uint index)547 uint PayloadMapping::GetNonAdjustedPayloadElementIndexToValueIndexMapping_sample(
548     const SampleIntrinsic* inst, uint index)
549 {
550     const bool zeroLOD = IsZeroLOD(inst);
551     uint startIndex = zeroLOD ? 1 : 0;
552 
553     GenISAIntrinsic::ID IID = inst->getIntrinsicID();
554     //Here we want to get 'C', but to skip 'L'.
555     // C L ...
556     if (!(zeroLOD &&
557         index == 0 &&
558         IID == GenISAIntrinsic::GenISA_sampleLCptr))
559     {
560         index = index + startIndex;
561     }
562 
563     return index;
564 }
565 
566 /// \brief Gets payload element index to value mapping, adjusted with splitting decision(peeling).
GetPayloadElementToValueMapping_sample(const SampleIntrinsic * inst,const uint index)567 Value* PayloadMapping::GetPayloadElementToValueMapping_sample(const SampleIntrinsic* inst, const uint index)
568 {
569     uint valueIndex = GetNonAdjustedPayloadElementIndexToValueIndexMapping_sample(inst, index);
570     return inst->getOperand(valueIndex);
571 }
572 
573 /// \brief Gets payload element index to value mapping for RT writes.
GetPayloadElementToValueMapping_RTWrite(const GenIntrinsicInst * inst,const uint index)574 Value* PayloadMapping::GetPayloadElementToValueMapping_RTWrite(const GenIntrinsicInst* inst, const uint index)
575 {
576     //s0Alpha oM[R G B A] sZ oS
577     IGC_ASSERT(index < GetNumPayloadElements(inst));
578     IGC_ASSERT(llvm::isa<llvm::RTWritIntrinsic>(inst));
579     const RTWritIntrinsic* rtwi = cast<RTWritIntrinsic>(inst);
580 
581     if (index < 4)
582     {
583         switch (index)
584         {
585         case 0: return rtwi->getRed();
586         case 1: return rtwi->getGreen();
587         case 2: return rtwi->getBlue();
588         case 3: return rtwi->getAlpha();
589         }
590     }
591 
592     return nullptr;
593 }
594 
GetPayloadElementToValueMapping_LDMS(const SamplerLoadIntrinsic * inst,const uint index)595 Value* PayloadMapping::GetPayloadElementToValueMapping_LDMS(const SamplerLoadIntrinsic* inst, const uint index)
596 {
597     uint valueIndex = index;
598 
599     return inst->getOperand(valueIndex);
600 }
601 
602 
GetPayloadElementToValueMapping(const Instruction * inst,uint index)603 Value* PayloadMapping::GetPayloadElementToValueMapping(const Instruction* inst, uint index)
604 {
605     IGC_ASSERT(index < GetNumPayloadElements(inst));
606 
607     const llvm::GenIntrinsicInst* intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
608     IGC_ASSERT(intrinsicInst);
609 
610     std::pair<const llvm::Instruction*, uint> instIndexPair(inst, index);
611 
612     PayloadMappingCache::iterator cachedValue = m_PayloadMappingCache.find(instIndexPair);
613     if (cachedValue != m_PayloadMappingCache.end())
614     {
615         return cachedValue->second;
616     }
617 
618     Value* payloadValue = nullptr;
619 
620     if (const SampleIntrinsic * sampleInst = dyn_cast<SampleIntrinsic>(inst))
621     {
622         payloadValue = GetPayloadElementToValueMapping_sample(sampleInst, index);
623         IGC_ASSERT(payloadValue != nullptr);
624         m_PayloadMappingCache.insert(std::pair<std::pair<const llvm::Instruction*, uint>, Value*>(instIndexPair, payloadValue));
625         return payloadValue;
626     }
627     else if (const SamplerLoadIntrinsic * sampleInst = dyn_cast<SamplerLoadIntrinsic>(inst))
628     {
629         payloadValue = GetPayloadElementToValueMapping_LDMS(sampleInst, index);
630         IGC_ASSERT(payloadValue != nullptr);
631         m_PayloadMappingCache.insert(std::pair<std::pair<const llvm::Instruction*, uint>, Value*>(instIndexPair, payloadValue));
632         return payloadValue;
633     }
634 
635 
636     switch (intrinsicInst->getIntrinsicID())
637     {
638     case GenISAIntrinsic::GenISA_URBWrite:
639         payloadValue = GetPayloadElementToValueMapping_URBWrite(intrinsicInst, index);
640         IGC_ASSERT(payloadValue != nullptr);
641         m_PayloadMappingCache.insert(std::pair<std::pair<const llvm::Instruction*, uint>, Value*>(instIndexPair, payloadValue));
642         return payloadValue;
643     case GenISAIntrinsic::GenISA_RTWrite:
644         payloadValue = GetPayloadElementToValueMapping_RTWrite(intrinsicInst, index);
645         IGC_ASSERT(payloadValue != nullptr);
646         m_PayloadMappingCache.insert(std::pair<std::pair<const llvm::Instruction*, uint>, Value*>(instIndexPair, payloadValue));
647         return payloadValue;
648     case GenISAIntrinsic::GenISA_RTDualBlendSource:
649         payloadValue = GetPayloadElementToValueMapping_DSRTWrite(intrinsicInst, index);
650         IGC_ASSERT(payloadValue != nullptr);
651         m_PayloadMappingCache.insert(std::pair<std::pair<const llvm::Instruction*, uint>, Value*>(instIndexPair, payloadValue));
652         return payloadValue;
653     default:
654         break;
655     }
656 
657     IGC_ASSERT(0);
658     return NULL;
659 }
660 
661 /// \brief Determines whether the payload is being split by peeling the first element.
DoPeelFirstElement(const Instruction * inst)662 bool PayloadMapping::DoPeelFirstElement(const Instruction* inst)
663 {
664     if (dyn_cast<SamplerLoadIntrinsic>(inst))
665     {
666         return true;
667     }
668 
669     return false;
670 }
671 
672 /// \brief Returns the number of homogeneous slots in dual-source payload.
673 ///
674 /// dual-source RT write complete format: oM [R0 G0 B0 A0 R1 G1 B1 A1] sZ oS
675 /// Only [R0 G0 B0 A0 R1 G1 B1 A1] part forms the homogeneous sequence.
GetNumPayloadElements_DSRTWrite(const GenIntrinsicInst * inst)676 uint PayloadMapping::GetNumPayloadElements_DSRTWrite(const GenIntrinsicInst* inst)
677 {
678     IGC_ASSERT(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst));
679     return 8; //8 colors, always form homogeneous 'part'.
680 }
681 
682 /// \brief Gets payload element index to value mapping for dual-source RT writes.
GetPayloadElementToValueMapping_DSRTWrite(const GenIntrinsicInst * inst,const uint index)683 Value* PayloadMapping::GetPayloadElementToValueMapping_DSRTWrite(const GenIntrinsicInst* inst, const uint index)
684 {
685     //oM [R0 G0 B0 A0 R1 G1 B1 A1] sZ oS
686     IGC_ASSERT(index < GetNumPayloadElements(inst));
687     IGC_ASSERT(llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst));
688     const RTDualBlendSourceIntrinsic* dsrtwi = cast<RTDualBlendSourceIntrinsic>(inst);
689 
690     if (index < 8)
691     {
692         switch (index)
693         {
694         case 0: return dsrtwi->getRed0();
695         case 1: return dsrtwi->getGreen0();
696         case 2: return dsrtwi->getBlue0();
697         case 3: return dsrtwi->getAlpha0();
698         case 4: return dsrtwi->getRed1();
699         case 5: return dsrtwi->getGreen1();
700         case 6: return dsrtwi->getBlue1();
701         case 7: return dsrtwi->getAlpha1();
702         }
703     }
704 
705     return nullptr;
706 }
707