1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef BUILTINS_FRONTEND_DEFINITIONS_HPP
10 #define BUILTINS_FRONTEND_DEFINITIONS_HPP
11 
12 #include "common/debug/DebugMacros.hpp" // VALUE_NAME() definition.
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvm/Config/llvm-config.h"
15 #include "llvm/AsmParser/Parser.h"
16 #include "llvmWrapper/IR/DerivedTypes.h"
17 #include "llvm/Support/Casting.h"
18 #include "common/LLVMWarningsPop.hpp"
19 #include "Probe/Assertion.h"
20 
21 typedef union _gfxResourceAddressSpace
22 {
23     struct _bits
24     {
25         unsigned int  bufId    : 16;
26         unsigned int  bufType  : 5;
27         unsigned int  indirect : 1;     // bool
28         unsigned int  reserved : 10;
29     } bits;
30     unsigned int   u32Val;
31 } GFXResourceAddressSpace;
32 
33 enum class ADDRESS_SPACE_TYPE : unsigned int
34 {
35     ADDRESS_SPACE_PRIVATE = 0,
36     ADDRESS_SPACE_GLOBAL = 1,
37     ADDRESS_SPACE_CONSTANT = 2,
38     ADDRESS_SPACE_LOCAL = 3,
39     ADDRESS_SPACE_GENERIC = 4,
40     ADDRESS_SPACE_LOCAL_32 = 13,
41 };
42 
43 template<bool preserveNames, typename T, typename Inserter>
EncodeASForGFXResource(const llvm::Value & bufIdx,IGC::BufferType bufType,unsigned uniqueIndAS)44 unsigned LLVM3DBuilder<preserveNames, T, Inserter>::EncodeASForGFXResource(
45     const llvm::Value& bufIdx,
46     IGC::BufferType bufType,
47     unsigned uniqueIndAS)
48 {
49     GFXResourceAddressSpace temp = {};
50 
51     static_assert(sizeof(temp) == 4, "Code below may need and update.");
52 
53     temp.u32Val = 0;
54     IGC_ASSERT((bufType + 1) < IGC::BUFFER_TYPE_UNKNOWN + 1);
55     temp.bits.bufType = bufType + 1;
56     if (bufType == IGC::BufferType::SLM)
57     {
58         return static_cast<unsigned int>(ADDRESS_SPACE_TYPE::ADDRESS_SPACE_LOCAL); // OCL uses addrspace 3 for SLM. We should use the same thing.
59     }
60     else if (llvm::isa<llvm::ConstantInt>(&bufIdx))
61     {
62         const unsigned bufId = (unsigned)(llvm::cast<llvm::ConstantInt>(&bufIdx)->getZExtValue());
63         IGC_ASSERT(bufId < (1 << 16));
64         temp.bits.bufId = bufId;
65         return temp.u32Val;
66     }
67 
68     // if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
69     temp.bits.bufId = uniqueIndAS;
70     temp.bits.indirect = 1;
71     return temp.u32Val;
72 }
73 
74 
75 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_ubfe() const76 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ubfe() const
77 {
78     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
79 
80     llvm::Function* func_llvm_GenISA_ubfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ubfe);
81     return func_llvm_GenISA_ubfe;
82 }
83 
84 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_ibfe() const85 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ibfe() const
86 {
87     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
88 
89     llvm::Function* func_llvm_GenISA_ibfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ibfe);
90     return func_llvm_GenISA_ibfe;
91 }
92 
93 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_bfi() const94 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfi() const
95 {
96     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
97 
98     llvm::Function* func_llvm_GenISA_bfi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfi);
99     return func_llvm_GenISA_bfi;
100 }
101 
102 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_bfrev() const103 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfrev() const
104 {
105     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
106 
107     llvm::Function* func_llvm_GenISA_bfrev = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfrev);
108     return func_llvm_GenISA_bfrev;
109 }
110 
111 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitHi() const112 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitHi() const
113 {
114     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
115 
116     llvm::Function* func_llvm_GenISA_firstbitHi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitHi);
117     return func_llvm_GenISA_firstbitHi;
118 }
119 
120 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitLo() const121 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitLo() const
122 {
123     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
124 
125     llvm::Function* func_llvm_GenISA_firstbitLo = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitLo);
126     return func_llvm_GenISA_firstbitLo;
127 }
128 
129 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitShi() const130 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitShi() const
131 {
132     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
133 
134     llvm::Function* func_llvm_GenISA_firstbitShi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitShi);
135     return func_llvm_GenISA_firstbitShi;
136 }
137 
138 template<bool preserveNames, typename T, typename Inserter>
Init()139 void LLVM3DBuilder<preserveNames, T, Inserter>::Init()
140 {
141     // Cached constants
142     m_int0 = this->getInt32( 0 );
143     m_int1 = this->getInt32( 1 );
144     m_int2 = this->getInt32( 2 );
145     m_int3 = this->getInt32( 3 );
146     m_float0 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 0.0));
147     m_float1 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 1.0));
148 }
149 
150 template<bool preserveNames, typename T, typename Inserter>
Create_resinfo(llvm::Value * int32_src_s_mip,llvm::Value * int32_textureIdx)151 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfo(
152     llvm::Value* int32_src_s_mip,
153     llvm::Value* int32_textureIdx)
154 {
155     llvm::Value * packed_params[] = {
156         int32_textureIdx,
157         int32_src_s_mip,
158     };
159 
160     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
161 
162     llvm::Function* func_llvm_GenISA_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
163         (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, int32_textureIdx->getType());
164 
165     llvm::CallInst* packed_resinfo_call = this->CreateCall(func_llvm_GenISA_resinfoptr, packed_params);
166     return packed_resinfo_call;
167 }
168 
169 template<bool preserveNames, typename T, typename Inserter>
Create_resinfoptr_msaa(llvm::Value * srcBuffer,llvm::Value * float_src_s_mip)170 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfoptr_msaa(
171     llvm::Value* srcBuffer,
172     llvm::Value* float_src_s_mip)
173 {
174     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
175 
176     llvm::Function* func_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
177         (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, srcBuffer->getType());
178 
179     //%mip_s = bitcast float %float_src_s_mip to i32
180     llvm::Value* int32_mip = this->CreateBitCast(float_src_s_mip, this->getInt32Ty(), VALUE_NAME("mip_s"));
181 
182     llvm::Value * packed_params[] = {
183         srcBuffer,
184         int32_mip
185     };
186 
187     llvm::CallInst* packed_resinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_resinfoptr, packed_params));
188 
189     // %tex_s.chan0 = extractelement <4 x i32> %packed_resinfo_call, i32 2
190     llvm::Value* int32_info_s_ch2 = this->CreateExtractElement(packed_resinfo_call, this->m_int2);
191 
192     llvm::Function* func_sampleinfoptr = llvm::GenISAIntrinsic::getDeclaration
193         (module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());
194 
195     llvm::Value * packed_sampleinfo_params[] = {
196         srcBuffer
197     };
198 
199     // Call sampleinfoptr intrinsic to get the number of samples.
200     llvm::CallInst* packed_sampleinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_sampleinfoptr, packed_sampleinfo_params));
201 
202     // We can not use channel 0 of sampleinfo which should contain the correct
203     // number of samples retrieved from surface state because this value in surface
204     // state must be set to 1 in case of MSAA UAV emulation due to fact that
205     // IGC does not support native MSAA UAV messages at the moment.
206     // Instead of channel 0 we can use channel 3 of sampleinfo which contains
207     // sample position palette index field retrieved from surface state.
208     // The sample position palette index field is set to log2(number of samples).
209 
210     // Get sample position palette index from sampleinfo. Note that this value
211     // is incremented by one from its value in the surface state.
212     llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
213     llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
214 
215     // Number of samples = 2 ^ "sample position palette index".
216     llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
217 
218     // Divide depth by number of samples.
219     // %depth_s = udiv i32 %src_s.chan2, %src1_s_ch0
220     llvm::Value* int32_depth = this->CreateUDiv(int32_info_s_ch2, int32_numberOfSamples, VALUE_NAME("depth_s"));
221 
222     llvm::Value *resinfo = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getInt32Ty(module->getContext()), 4));
223 
224     resinfo = this->CreateInsertElement(
225         resinfo,
226         this->CreateExtractElement(packed_resinfo_call, this->m_int0),
227         this->getInt32(0),
228         "call_inst");
229 
230     resinfo = this->CreateInsertElement(
231         resinfo,
232         this->CreateExtractElement(packed_resinfo_call, this->m_int1),
233         this->getInt32(1),
234         "call_inst");
235 
236     resinfo = this->CreateInsertElement(
237         resinfo,
238         this->CreateExtractElement(packed_resinfo_call, this->m_int3),
239         this->getInt32(3),
240         "call_inst");
241 
242     resinfo = this->CreateInsertElement(
243         resinfo,
244         int32_depth,
245         this->getInt32(2),
246         "call_inst");
247 
248     return resinfo;
249 }
250 
251 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite(llvm::Value * dstBuffer,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressW,llvm::Value * lod,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)252 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite(
253     llvm::Value* dstBuffer,
254     llvm::Value* srcAddressU,
255     llvm::Value* srcAddressV,
256     llvm::Value* srcAddressW,
257     llvm::Value* lod,
258     llvm::Value* float_X,
259     llvm::Value* float_Y,
260     llvm::Value* float_Z,
261     llvm::Value* float_W)
262 {
263     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
264 
265     llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
266         module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
267 
268     //R = SampleIndex
269     llvm::Value * args[] = {
270         dstBuffer,
271         srcAddressU,
272         srcAddressV,
273         srcAddressW,
274         lod,
275         float_X,
276         float_Y,
277         float_Z,
278         float_W,
279     };
280 
281     llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
282     return typedwrite;
283 }
284 
285 template<bool preserveNames, typename T, typename Inserter>
Create_typedread(llvm::Value * srcBuffer,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressW,llvm::Value * lod)286 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread(
287     llvm::Value* srcBuffer,
288     llvm::Value* srcAddressU,
289     llvm::Value* srcAddressV,
290     llvm::Value* srcAddressW,
291     llvm::Value* lod)
292 {
293     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
294 
295     llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
296         module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
297 
298     llvm::Value * args[] = {
299         srcBuffer,
300         srcAddressU,
301         srcAddressV,
302         srcAddressW,
303         lod
304     };
305 
306     llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
307     return typedread;
308 }
309 
310 template<bool preserveNames, typename T, typename Inserter>
Create_typedread_msaa2D(llvm::Value * srcBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * lod)311 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2D(
312     llvm::Value* srcBuffer,
313     llvm::Value* sampleIdx,
314     llvm::Value* srcAddressU,
315     llvm::Value* srcAddressV,
316     llvm::Value* lod)
317 {
318     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
319 
320     llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
321         module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
322 
323     //R = SampleIndex
324     llvm::Value * args[] = {
325         srcBuffer,
326         srcAddressU,
327         srcAddressV,
328         sampleIdx,
329         lod
330     };
331 
332     llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
333     return typedread;
334 }
335 
336 template<bool preserveNames, typename T, typename Inserter>
Create_typedread_msaa2DArray(llvm::Value * srcBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * lod)337 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2DArray(
338     llvm::Value* srcBuffer,
339     llvm::Value* sampleIdx,
340     llvm::Value* srcAddressU,
341     llvm::Value* srcAddressV,
342     llvm::Value* srcAddressR,
343     llvm::Value* lod)
344 {
345     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
346 
347     // Call sampleinfoptr intrinsic to get the number of samples.
348     // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
349     llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
350         module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());
351     llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, srcBuffer);
352 
353     // We can not use channel 0 of sampleinfo which should contain the correct
354     // number of samples retrieved from surface state because this value in surface
355     // state must be set to 1 in case of MSAA UAV emulation due to fact that
356     // IGC does not support native MSAA UAV messages at the moment.
357     // Instead of channel 0 we can use channel 3 of sampleinfo which contains
358     // sample position palette index field retrieved from surface state.
359     // The sample position palette index field is set to log2(number of samples).
360 
361     // Get sample position palette index from surface state. Note that this value
362     // is incremented by one from its value in the surface state.
363     llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
364     llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
365 
366     // Number of samples = 2 ^ "sample position palette index".
367     llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
368 
369     //R = R' * num of Samples + SampleIndex
370     llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
371     llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
372 
373     llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
374         module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
375 
376     llvm::Value * args[] = {
377         srcBuffer,
378         srcAddressU,
379         srcAddressV,
380         int32_SrcAddrR,
381         lod
382     };
383 
384     llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
385     return typedread;
386 }
387 
388 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite_msaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)389 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2D(
390     llvm::Value* dstBuffer,
391     llvm::Value* sampleIdx,
392     llvm::Value* srcAddressU,
393     llvm::Value* srcAddressV,
394     llvm::Value* float_X,
395     llvm::Value* float_Y,
396     llvm::Value* float_Z,
397     llvm::Value* float_W)
398 {
399     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
400 
401     llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
402         module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
403 
404     //R = SampleIndex
405     llvm::Value * args[] = {
406         dstBuffer,
407         srcAddressU,
408         srcAddressV,
409         sampleIdx,
410         m_int0,
411         float_X,
412         float_Y,
413         float_Z,
414         float_W,
415     };
416 
417     llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
418     return typedwrite;
419 }
420 
421 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite_msaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)422 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2DArray(
423     llvm::Value* dstBuffer,
424     llvm::Value* sampleIdx,
425     llvm::Value* srcAddressU,
426     llvm::Value* srcAddressV,
427     llvm::Value* srcAddressR,
428     llvm::Value* float_X,
429     llvm::Value* float_Y,
430     llvm::Value* float_Z,
431     llvm::Value* float_W)
432 {
433     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
434 
435     // Call sampleinfoptr intrinsic to get the number of samples.
436     // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
437     llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
438         module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
439     llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
440 
441     // We can not use channel 0 of sampleinfo which should contain the correct
442     // number of samples retrieved from surface state because this value in surface
443     // state must be set to 1 in case of MSAA UAV emulation due to fact that
444     // IGC does not support native MSAA UAV messages at the moment.
445     // Instead of channel 0 we can use channel 3 of sampleinfo which contains
446     // sample position palette index field retrieved from surface state.
447     // The sample position palette index field is set to log2(number of samples).
448 
449     // Get sample position palette index from surface state. Note that this value
450     // is incremented by one from its value in the surface state.
451     llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
452     llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
453 
454     // Number of samples = 2 ^ "sample position palette index".
455     llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
456 
457     //R = R' * num of Samples + SampleIndex
458     llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
459     llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
460 
461     llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
462         module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
463 
464     //R = SampleIndex
465     llvm::Value * args[] = {
466         dstBuffer,
467         srcAddressU,
468         srcAddressV,
469         int32_SrcAddrR,
470         m_int0,
471         float_X,
472         float_Y,
473         float_Z,
474         float_W,
475     };
476 
477     llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
478     return typedwrite;
479 }
480 
481 template<bool preserveNames, typename T, typename Inserter>
Create_dwordatomictypedMsaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * src,llvm::Value * instType)482 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2D(
483     llvm::Value* dstBuffer,
484     llvm::Value* sampleIdx,
485     llvm::Value* srcAddressU,
486     llvm::Value* srcAddressV,
487     llvm::Value* src,
488     llvm::Value* instType)
489 {
490     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
491 
492     llvm::Type *types[] = { src->getType(), dstBuffer->getType() };
493 
494     llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
495         module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
496 
497     //R = SampleIndex
498     llvm::Value * args[] = {
499         dstBuffer,
500         srcAddressU,
501         srcAddressV,
502         sampleIdx,
503         src,
504         instType
505     };
506 
507     llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
508     return dwordAtomicTyped;
509 }
510 
511 template<bool preserveNames, typename T, typename Inserter>
Create_dwordatomictypedMsaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * src,llvm::Value * instType)512 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2DArray(
513     llvm::Value* dstBuffer,
514     llvm::Value* sampleIdx,
515     llvm::Value* srcAddressU,
516     llvm::Value* srcAddressV,
517     llvm::Value* srcAddressR,
518     llvm::Value* src,
519     llvm::Value* instType)
520 {
521     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
522 
523     // Call sampleinfoptr intrinsic to get the number of samples.
524     // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
525     llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
526         module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
527     llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
528 
529     // We can not use channel 0 of sampleinfo which should contain the correct
530     // number of samples retrieved from surface state because this value in surface
531     // state must be set to 1 in case of MSAA UAV emulation due to fact that
532     // IGC does not support native MSAA UAV messages at the moment.
533     // Instead of channel 0 we can use channel 3 of sampleinfo which contains
534     // sample position palette index field retrieved from surface state.
535     // The sample position palette index field is set to log2(number of samples).
536 
537     // Get sample position palette index from surface state. Note that this value
538     // is incremented by one from its value in the surface state.
539     llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
540     llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
541 
542     // Number of samples = 2 ^ "sample position palette index".
543     llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
544 
545     //R = R' * num of Samples + SampleIndex
546     llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
547     llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
548 
549     llvm::Type *types[] = { src->getType(), dstBuffer->getType() };
550 
551     llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
552         module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
553 
554     llvm::Value * args[] = {
555         dstBuffer,
556         srcAddressU,
557         srcAddressV,
558         int32_SrcAddrR,
559         src,
560         instType
561     };
562 
563     llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
564     return dwordAtomicTyped;
565 }
566 
567 template<bool preserveNames, typename T, typename Inserter>
Create_StatelessAtomic(llvm::Value * ptr,llvm::Value * data,IGC::AtomicOp opcode)568 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomic(
569     llvm::Value* ptr,
570     llvm::Value* data,
571     IGC::AtomicOp opcode)
572 {
573     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
574     llvm::Type* types[] = { data->getType(), ptr->getType(), ptr->getType() };
575     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
576         module, llvm::GenISAIntrinsic::GenISA_intatomicrawA64, types);
577 
578     llvm::Value* args[] =
579     {
580         ptr,
581         ptr,
582         data,
583         this->getInt32(opcode)
584     };
585     return this->CreateCall(pFunc, args);
586 }
587 
588 template<bool preserveNames, typename T, typename Inserter>
Create_InidrectAtomic(llvm::Value * resource,llvm::Value * offset,llvm::Value * data,IGC::AtomicOp opcode)589 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomic(
590     llvm::Value* resource,
591     llvm::Value* offset,
592     llvm::Value* data,
593     IGC::AtomicOp opcode)
594 {
595     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
596 
597     llvm::Type *types[] = { data->getType(), resource->getType() };
598 
599     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
600         module, llvm::GenISAIntrinsic::GenISA_intatomicraw, types);
601 
602     llvm::Value* args[] =
603     {
604         resource,
605         offset,
606         data,
607         this->getInt32(opcode)
608     };
609     return this->CreateCall(pFunc, args);
610 }
611 
612 template<bool preserveNames, typename T, typename Inserter>
Create_StatelessAtomicCmpXChg(llvm::Value * ptr,llvm::Value * data0,llvm::Value * data1)613 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomicCmpXChg(
614     llvm::Value* ptr,
615     llvm::Value* data0,
616     llvm::Value* data1)
617 {
618     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
619     llvm::Type* types[] = { data0->getType(), ptr->getType(), ptr->getType() };
620     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
621         module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64, types);
622 
623     llvm::Value* args[] =
624     {
625         ptr,
626         ptr,
627         data0,
628         data1,
629     };
630     return this->CreateCall(pFunc, args);
631 }
632 
633 template<bool preserveNames, typename T, typename Inserter>
Create_InidrectAtomicCmpXChg(llvm::Value * resource,llvm::Value * offset,llvm::Value * data0,llvm::Value * data1)634 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomicCmpXChg(
635     llvm::Value* resource,
636     llvm::Value* offset,
637     llvm::Value* data0,
638     llvm::Value* data1)
639 {
640     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
641 
642     llvm::Type *types[] = { data0->getType(), resource->getType() };
643 
644     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
645         module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw, types);
646 
647     llvm::Value* args[] =
648     {
649         resource,
650         offset,
651         data0,
652         data1,
653     };
654     return this->CreateCall(pFunc, args);
655 }
656 
657 template<bool preserveNames, typename T, typename Inserter>
Create_cmpxchgatomictypedMsaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * src0,llvm::Value * src1)658 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2D(
659     llvm::Value* dstBuffer,
660     llvm::Value* sampleIdx,
661     llvm::Value* srcAddressU,
662     llvm::Value* srcAddressV,
663     llvm::Value* src0,
664     llvm::Value* src1)
665 {
666     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
667 
668     llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };
669 
670     llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
671         module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
672 
673     //R = SampleIndex
674     llvm::Value * args[] = {
675         dstBuffer,
676         srcAddressU,
677         srcAddressV,
678         sampleIdx,
679         src0,
680         src1
681     };
682 
683     llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
684     return dwordCmpxchgatomictyped;
685 }
686 
687 template<bool preserveNames, typename T, typename Inserter>
Create_cmpxchgatomictypedMsaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * src0,llvm::Value * src1)688 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2DArray(
689     llvm::Value* dstBuffer,
690     llvm::Value* sampleIdx,
691     llvm::Value* srcAddressU,
692     llvm::Value* srcAddressV,
693     llvm::Value* srcAddressR,
694     llvm::Value* src0,
695     llvm::Value* src1)
696 {
697     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
698 
699     // Call sampleinfoptr intrinsic to get the number of samples.
700     // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
701     llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
702         module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
703     llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
704 
705     // We can not use channel 0 of sampleinfo which should contain the correct
706     // number of samples retrieved from surface state because this value in surface
707     // state must be set to 1 in case of MSAA UAV emulation due to fact that
708     // IGC does not support native MSAA UAV messages at the moment.
709     // Instead of channel 0 we can use channel 3 of sampleinfo which contains
710     // sample position palette index field retrieved from surface state.
711     // The sample position palette index field is set to log2(number of samples).
712 
713     // Get sample position palette index from surface state. Note that this value
714     // is incremented by one from its value in the surface state.
715     llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
716     llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
717 
718     // Number of samples = 2 ^ "sample position palette index".
719     llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
720 
721     //R = R' * num of Samples + SampleIndex
722     llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
723     llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
724 
725     llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };
726 
727     llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
728         module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
729 
730     llvm::Value * args[] = {
731         dstBuffer,
732         srcAddressU,
733         srcAddressV,
734         int32_SrcAddrR,
735         src0,
736         src1
737     };
738 
739     llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
740     return dwordCmpxchgatomictyped;
741 }
742 
743 template<bool preserveNames, typename T, typename Inserter>
Create_TypedAtomic(llvm::Value * resource,llvm::Value * addressU,llvm::Value * addressV,llvm::Value * addressR,llvm::Value * data,IGC::AtomicOp opcode)744 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomic(
745     llvm::Value* resource,
746     llvm::Value* addressU,
747     llvm::Value* addressV,
748     llvm::Value* addressR,
749     llvm::Value* data,
750     IGC::AtomicOp opcode)
751 {
752     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
753 
754     llvm::Type *types[] = { data->getType(), resource->getType() };
755 
756     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
757         module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
758 
759     llvm::Value* args[] =
760     {
761         resource,
762         addressU,
763         addressV,
764         addressR,
765         data,
766         this->getInt32(opcode)
767     };
768     return this->CreateCall(pFunc, args);
769 }
770 
771 template<bool preserveNames, typename T, typename Inserter>
Create_TypedAtomicCmpXChg(llvm::Value * resource,llvm::Value * addressU,llvm::Value * addressV,llvm::Value * addressR,llvm::Value * data0,llvm::Value * data1)772 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomicCmpXChg(
773     llvm::Value* resource,
774     llvm::Value* addressU,
775     llvm::Value* addressV,
776     llvm::Value* addressR,
777     llvm::Value* data0,
778     llvm::Value* data1)
779 {
780     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
781 
782     llvm::Type *types[] = { data0->getType(), resource->getType() };
783 
784     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
785         module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
786 
787     llvm::Value* args[] =
788     {
789         resource,
790         addressU,
791         addressV,
792         addressR,
793         data0,
794         data1,
795     };
796     return this->CreateCall(pFunc, args);
797 }
798 
799 template<bool preserveNames, typename T, typename Inserter>
Create_SampleInfo(llvm::Value * resourcePtr)800 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SampleInfo(
801     llvm::Value* resourcePtr)
802 {
803     llvm::Value * packed_tex_params[] = {
804         resourcePtr,
805     };
806 
807     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
808 
809     llvm::CallInst* packed_tex_call = llvm::cast<llvm::CallInst>(this->CreateCall(
810         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, resourcePtr->getType()),
811         packed_tex_params));
812 
813     return packed_tex_call;
814 }
815 
816 template<bool preserveNames, typename T, typename Inserter>
CreateReadSurfaceInfo(llvm::Value * resourcePtr,llvm::Value * mipmap)817 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateReadSurfaceInfo(
818     llvm::Value* resourcePtr,
819     llvm::Value* mipmap)
820 {
821     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
822     llvm::Function* fn = llvm::GenISAIntrinsic::getDeclaration(
823         module, llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr, resourcePtr->getType());
824     llvm::Value* packed_tex_call = this->CreateCall2(fn, resourcePtr, mipmap);
825     return packed_tex_call;
826 }
827 
828 template<bool preserveNames, typename T, typename Inserter>
Create_SyncThreadGroup()829 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SyncThreadGroup()
830 {
831     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
832     return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_threadgroupbarrier));
833 }
834 
835 template<bool preserveNames, typename T, typename Inserter>
Create_FlushSampler()836 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FlushSampler()
837 {
838     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
839     return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_flushsampler));
840 }
841 
842 template<bool preserveNames, typename T, typename Inserter>
Create_MemoryFence(bool commit,bool flushRWDataCache,bool flushConstantCache,bool flushTextureCache,bool flushInstructionCache,bool globalFence)843 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MemoryFence(
844     bool commit,
845     bool flushRWDataCache,
846     bool flushConstantCache,
847     bool flushTextureCache,
848     bool flushInstructionCache,
849     bool globalFence)
850 {
851     llvm::Value* parameters[] =
852     {
853         this->getInt1(commit),
854         this->getInt1(flushRWDataCache),
855         this->getInt1(flushConstantCache),
856         this->getInt1(flushTextureCache),
857         this->getInt1(flushInstructionCache),
858         this->getInt1(globalFence),
859         this->getInt1(false),
860     };
861     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
862     return this->CreateCall(
863         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_memoryfence),
864         parameters);
865 }
866 
867 template<bool preserveNames, typename T, typename Inserter>
Create_GlobalSync()868 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_GlobalSync()
869 {
870     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
871     return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_globalSync));
872 }
873 
874 template<bool preserveNames, typename T, typename Inserter>
Create_SamplePos(llvm::Value * int32_resourceIdx,llvm::Value * int32_samplerIdx)875 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SamplePos(
876     llvm::Value* int32_resourceIdx,
877     llvm::Value* int32_samplerIdx)
878 {
879     llvm::Value* sampleInfo = this->Create_SampleInfo(int32_resourceIdx);
880 
881 
882     llvm::Value* int32_texX = this->CreateExtractElement(sampleInfo, m_int0);
883     llvm::Value* int32_texW = this->CreateExtractElement(sampleInfo, m_int3);
884 
885     llvm::Value* int32_tempIndex = this->CreateAdd(int32_texX, int32_samplerIdx);
886     llvm::Value* int1_ole = this->CreateICmp(llvm::ICmpInst::ICMP_UGT, int32_texX, int32_samplerIdx);
887     llvm::Value* int32_sel = this->CreateSelect(int1_ole, int32_tempIndex, m_int0);
888     llvm::Value* int1_one = this->CreateICmp(llvm::ICmpInst::ICMP_EQ, int32_texW, m_int1);
889     llvm::Value* int32_selIndex = this->CreateSelect(int1_one, m_int0, int32_sel);
890 
891     /*
892         %tempY = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0,
893                                             f32 -2.0 / 16.0, f32 2.0 / 16.0, f32 6.0 / 16.0, f32 -3.0 / 16.0,
894                                             f32 3.0 / 16.0, f32 1.0 / 16.0, f32 -5.0 / 16.0, f32 5.0 / 16.0,
895                                             f32 -1.0 / 16.0, f32 7.0 / 16.0, f32 -7.0 / 16.0, f32 1.0 / 16.0,
896                                             f32 -3.0 / 16.0, f32 2.0 / 16.0, f32 -1.0 / 16.0, f32 -2.0 / 16.0,
897                                             f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -5.0 / 16.0, f32 6.0 / 16.0,
898                                             f32 -7.0 / 16.0, f32 -6.0 / 16.0, f32 4.0 / 16.0, f32 0.0,
899                                             f32 -4.0 / 16.0, f32 7.0 / 16.0, f32 -8.0 / 16.0>, i32 %selIndex
900     */
901     llvm::Value* float_y = nullptr;
902     {
903         llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
904         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
905         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
906         temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
907         temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
908         temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(4));
909         temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(5));
910         temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(6));
911         temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(7));
912         temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(8));
913         temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(9));
914         temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(10));
915         temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(11));
916         temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(12));
917         temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(13));
918         temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(14));
919         temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(15));
920         temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
921         temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(17));
922         temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(18));
923         temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(19));
924         temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(20));
925         temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(21));
926         temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(22));
927         temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(23));
928         temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(24));
929         temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(25));
930         temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(26));
931         temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(27));
932         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(28));
933         temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(29));
934         temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(30));
935         temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(31));
936         float_y = this->CreateExtractElement(temp, int32_selIndex);
937     }
938 
939     /*
940         %tempX = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -2.0 / 16.0,
941                                             f32 6.0 / 16.0, f32 -6.0 / 16.0, f32 2.0 / 16.0, f32 1.0 / 16.0,
942                                             f32 -1.0 / 16.0, f32 5.0 / 16.0, f32 -3.0 / 16.0, f32 -5.0 / 16.0,
943                                             f32 -7.0 / 16.0, f32 3.0 / 16.0, f32 7.0 / 16.0, f32 1.0 / 16.0,
944                                             f32 -1.0 / 16.0, f32 -3.0 / 16.0, f32 4.0 / 16.0, f32 -5.0 / 16.0,
945                                             f32 2.0 / 16.0, f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -2.0 / 16.0,
946                                             f32 0.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0, f32 -8.0 / 16.0,
947                                             f32 7.0 / 16.0, f32 6.0 / 16.0, f32 -7.0 / 16.0>, i32 %selIndex
948     */
949     llvm::Value* float_x = nullptr;
950     {
951         llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
952         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
953         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
954         temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
955         temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
956         temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(4));
957         temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(5));
958         temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(6));
959         temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(7));
960         temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(8));
961         temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(9));
962         temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(10));
963         temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(11));
964         temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(12));
965         temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(13));
966         temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(14));
967         temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(15));
968         temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
969         temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(17));
970         temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(18));
971         temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(19));
972         temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(20));
973         temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(21));
974         temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(22));
975         temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(23));
976         temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(24));
977         temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(25));
978         temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(26));
979         temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(27));
980         temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(28));
981         temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(29));
982         temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(30));
983         temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(31));
984         float_x = this->CreateExtractElement(temp, int32_selIndex);
985     }
986 
987     llvm::Value* packed_ret_value = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
988     packed_ret_value = this->CreateInsertElement(packed_ret_value, float_x, this->getInt32(0));
989     packed_ret_value = this->CreateInsertElement(packed_ret_value, float_y, this->getInt32(1));
990     packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(2));
991     packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(3));
992 
993     return packed_ret_value;
994 }
995 
996 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLE(llvm::Value * coordinate_u,llvm::Value * coordinate_v,llvm::Value * coordinate_r,llvm::Value * coordinate_ai,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * offsetU,llvm::Value * offsetV,llvm::Value * offsetW,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)997 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLE(
998     llvm::Value* coordinate_u,
999     llvm::Value* coordinate_v,
1000     llvm::Value* coordinate_r,
1001     llvm::Value* coordinate_ai,
1002     llvm::Value* ptr_textureIdx,
1003     llvm::Value* ptr_sampler,
1004     llvm::Value* offsetU,
1005     llvm::Value* offsetV,
1006     llvm::Value* offsetW,
1007     llvm::Value* minlod,
1008     bool feedback_enabled,
1009     llvm::Type* returnType)
1010 {
1011     if (minlod == nullptr)
1012     {
1013         minlod = llvm::ConstantFP::get(coordinate_u->getType(), 0.0);
1014     }
1015 
1016     llvm::Value * packed_tex_params[] = {
1017         coordinate_u,
1018         coordinate_v,
1019         coordinate_r,
1020         coordinate_ai,
1021         minlod,
1022         ptr_textureIdx,
1023         ptr_sampler,
1024         offsetU,
1025         offsetV,
1026         offsetW
1027     };
1028 
1029     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1030 
1031     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1032     llvm::Type* types[] = {
1033         IGCLLVM::FixedVectorType::get(dstType, 4),
1034         coordinate_u->getType(),
1035         ptr_textureIdx->getType(),
1036         ptr_sampler->getType()
1037     };
1038     if (feedback_enabled)
1039     {
1040         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1041     }
1042     llvm::Function* func_llvm_GenISA_sampleptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1043         (module, llvm::GenISAIntrinsic::GenISA_sampleptr, types);
1044     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleptr_v4f32_f32, packed_tex_params);
1045     return packed_tex_call;
1046 }
1047 
1048 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1049 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC(
1050     llvm::Value* float_reference_0,
1051     llvm::Value* float_address_0,
1052     llvm::Value* float_address_1,
1053     llvm::Value* float_address_2,
1054     llvm::Value* float_address_3,
1055     llvm::Value* int32_textureIdx,
1056     llvm::Value* int32_sampler,
1057     llvm::Value* int32_offsetU,
1058     llvm::Value* int32_offsetV,
1059     llvm::Value* int32_offsetR,
1060     llvm::Value* minlod,
1061     bool feedback_enabled,
1062     llvm::Type* returnType)
1063 {
1064     if (minlod == nullptr)
1065     {
1066         minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
1067     }
1068 
1069     llvm::Value * packed_tex_params[] = {
1070         float_reference_0,
1071         float_address_0,
1072         float_address_1,
1073         float_address_2,
1074         float_address_3,
1075         minlod,
1076         int32_textureIdx,
1077         int32_sampler,
1078         int32_offsetU,
1079         int32_offsetV,
1080         int32_offsetR
1081     };
1082 
1083     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1084 
1085     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1086     llvm::Type* types[] = {
1087         IGCLLVM::FixedVectorType::get(dstType, 4),
1088         float_reference_0->getType(),
1089         int32_textureIdx->getType(),
1090         int32_sampler->getType()
1091     };
1092     if (feedback_enabled)
1093     {
1094         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1095     }
1096     llvm::Function* func_llvm_GenISA_sampleCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1097         (module, llvm::GenISAIntrinsic::GenISA_sampleCptr, types);
1098     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleCptr_v4f32_f32, packed_tex_params);
1099     return packed_tex_call;
1100 }
1101 
1102 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLELC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * float_lod,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)1103 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLELC(
1104     llvm::Value* float_reference_0,
1105     llvm::Value* float_address_0,
1106     llvm::Value* float_address_1,
1107     llvm::Value* float_address_2,
1108     llvm::Value* float_address_3,
1109     llvm::Value* float_lod,
1110     llvm::Value* int32_textureIdx,
1111     llvm::Value* int32_sampler,
1112     llvm::Value* int32_offsetU,
1113     llvm::Value* int32_offsetV,
1114     llvm::Value* int32_offsetW,
1115     llvm::Type* returnType)
1116 {
1117     llvm::Value * packed_tex_params[] = {
1118         float_reference_0,
1119         float_lod,
1120         float_address_0,
1121         float_address_1,
1122         float_address_2,
1123         float_address_3,
1124         int32_textureIdx,
1125         int32_sampler,
1126         int32_offsetU,
1127         int32_offsetV,
1128         int32_offsetW
1129     };
1130 
1131     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1132 
1133     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1134     llvm::Type* types[] = {
1135         IGCLLVM::FixedVectorType::get(dstType, 4),
1136         float_reference_0->getType(),
1137         int32_textureIdx->getType(),
1138         int32_sampler->getType()
1139     };
1140     llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1141         (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
1142     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
1143     return packed_tex_call;
1144 }
1145 
1146 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEC_LZ(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,bool feedback_enabled,llvm::Type * returnType)1147 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC_LZ(
1148     llvm::Value* float_reference_0,
1149     llvm::Value* float_address_0,
1150     llvm::Value* float_address_1,
1151     llvm::Value* float_address_2,
1152     llvm::Value* float_address_3,
1153     llvm::Value* int32_textureIdx,
1154     llvm::Value* int32_sampler,
1155     llvm::Value* int32_offsetU,
1156     llvm::Value* int32_offsetV,
1157     llvm::Value* int32_offsetW,
1158     bool feedback_enabled,
1159     llvm::Type* returnType)
1160 {
1161     llvm::Value * packed_tex_params[] = {
1162         float_reference_0,
1163         llvm::ConstantFP::get(float_address_0->getType(), 0.0),
1164         float_address_0,
1165         float_address_1,
1166         float_address_2,
1167         float_address_3,
1168         int32_textureIdx,
1169         int32_sampler,
1170         int32_offsetU,
1171         int32_offsetV,
1172         int32_offsetW
1173     };
1174 
1175     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1176 
1177     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1178     llvm::Type* types[] = {
1179         IGCLLVM::FixedVectorType::get(dstType, 4),
1180         float_reference_0->getType(),
1181         int32_textureIdx->getType(),
1182         int32_sampler->getType()
1183     };
1184     if (feedback_enabled)
1185     {
1186         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1187     }
1188     llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1189         (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
1190     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
1191     return packed_tex_call;
1192 }
1193 
1194 template<bool preserveNames, typename T, typename Inserter>
Create_gather4C(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1195 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4C(
1196     llvm::Value* float_reference_0,
1197     llvm::Value* float_address_0,
1198     llvm::Value* float_address_1,
1199     llvm::Value* float_address_2,
1200     llvm::Value* float_address_3,
1201     llvm::Value* int32_textureIdx,
1202     llvm::Value* int32_sampler,
1203     llvm::Value* int32_offsetU,
1204     llvm::Value* int32_offsetV,
1205     llvm::Value* int32_srcChannel,
1206     bool feedback_enabled,
1207     llvm::Type* returnType)
1208 {
1209     llvm::Value * packed_tex_params[] = {
1210         float_reference_0,
1211         float_address_0,
1212         float_address_1,
1213         float_address_2,
1214         float_address_3,
1215         int32_textureIdx,
1216         int32_sampler,
1217         int32_offsetU,
1218         int32_offsetV,
1219         m_int0,
1220         int32_srcChannel
1221     };
1222 
1223     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1224 
1225     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1226     llvm::Type* types[] = {
1227         IGCLLVM::FixedVectorType::get(dstType, 4),
1228         float_reference_0->getType(),
1229         int32_textureIdx->getType(),
1230         int32_sampler->getType()
1231     };
1232     if (feedback_enabled)
1233     {
1234         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1235     }
1236     llvm::Function* func_llvm_GenISA_gather4Cptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1237         (module, llvm::GenISAIntrinsic::GenISA_gather4Cptr, types);
1238     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4Cptr_v4f32_f32, packed_tex_params);
1239     return packed_tex_call;
1240 }
1241 
1242 template<bool preserveNames, typename T, typename Inserter>
Create_gather4POC(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * int_src_offset_0,llvm::Value * int_src_offset_1,llvm::Value * float_src_reference_0,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1243 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4POC(
1244     llvm::Value* float_address_0,
1245     llvm::Value* float_address_1,
1246     llvm::Value* float_address_2,
1247     llvm::Value* int_src_offset_0,
1248     llvm::Value* int_src_offset_1,
1249     llvm::Value* float_src_reference_0,
1250     llvm::Value* int32_textureIdx,
1251     llvm::Value* int32_sampler,
1252     llvm::Value* int32_offsetU,
1253     llvm::Value* int32_offsetV,
1254     llvm::Value* int32_srcChannel,
1255     bool feedback_enabled,
1256     llvm::Type* returnType)
1257 {
1258     llvm::Value * packed_tex_params[] = {
1259         float_src_reference_0,
1260         float_address_0,
1261         float_address_1,
1262         int_src_offset_0,
1263         int_src_offset_1,
1264         float_address_2,
1265         int32_textureIdx,
1266         int32_sampler,
1267         int32_offsetU,
1268         int32_offsetV,
1269         m_int0,
1270         int32_srcChannel
1271     };
1272 
1273     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1274 
1275     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1276     llvm::Type* types[] = {
1277         IGCLLVM::FixedVectorType::get(dstType, 4),
1278         float_src_reference_0->getType(),
1279         int32_textureIdx->getType(),
1280         int32_sampler->getType()
1281     };
1282     if (feedback_enabled)
1283     {
1284         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1285     }
1286     llvm::Function* func_llvm_GenISA_gather4POCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1287         (module, llvm::GenISAIntrinsic::GenISA_gather4POCptr, types);
1288     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POCptr_v4f32_f32, packed_tex_params);
1289     return packed_tex_call;
1290 }
1291 
1292 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PO(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * int_src_offset_0,llvm::Value * int_src_offset_1,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1293 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PO(
1294     llvm::Value* float_address_0,
1295     llvm::Value* float_address_1,
1296     llvm::Value* float_address_2,
1297     llvm::Value* int_src_offset_0,
1298     llvm::Value* int_src_offset_1,
1299     llvm::Value* int32_textureIdx,
1300     llvm::Value* int32_sampler,
1301     llvm::Value* int32_offsetU,
1302     llvm::Value* int32_offsetV,
1303     llvm::Value* int32_srcChannel,
1304     bool feedback_enabled,
1305     llvm::Type* returnType)
1306 {
1307     llvm::Value * packed_tex_params[] = {
1308         float_address_0,
1309         float_address_1,
1310         int_src_offset_0,
1311         int_src_offset_1,
1312         float_address_2,
1313         int32_textureIdx,
1314         int32_sampler,
1315         int32_offsetU,
1316         int32_offsetV,
1317         m_int0,
1318         int32_srcChannel
1319     };
1320 
1321     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1322 
1323     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1324     llvm::Type* types[] = {
1325         IGCLLVM::FixedVectorType::get(dstType, 4),
1326         float_address_0->getType(),
1327         int32_textureIdx->getType(),
1328         int32_sampler->getType()
1329     };
1330     if (feedback_enabled)
1331     {
1332         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1333     }
1334     llvm::Function* func_llvm_GenISA_gather4POptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1335         (module, llvm::GenISAIntrinsic::GenISA_gather4POptr, types);
1336     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POptr_v4f32_f32, packed_tex_params);
1337 
1338     return packed_tex_call;
1339 }
1340 
1341 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PositionOffsets(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::ArrayRef<llvm::Value * > int_src_offsets,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel)1342 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsets(
1343     llvm::Value* float_address_0,
1344     llvm::Value* float_address_1,
1345     llvm::Value* float_address_2,
1346     llvm::ArrayRef<llvm::Value *> int_src_offsets,
1347     llvm::Value* int32_textureIdx,
1348     llvm::Value* int32_sampler,
1349     llvm::Value* int32_offsetU,
1350     llvm::Value* int32_offsetV,
1351     llvm::Value* int32_srcChannel)
1352 {
1353     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1354 
1355     llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
1356     for (int i = 0, j = 0; i < 7; i = i + 2, j++)
1357     {
1358         llvm::Value* packed_tex_call = Create_gather4PO(
1359             float_address_0,
1360             float_address_1,
1361             float_address_2,
1362             int_src_offsets[i],
1363             int_src_offsets[i + 1],
1364             int32_textureIdx,
1365             int32_sampler,
1366             int32_offsetU,
1367             int32_offsetV,
1368             int32_srcChannel,
1369             false,
1370             llvm::Type::getFloatTy(module->getContext()));
1371 
1372 
1373         gatherReturn = this->CreateInsertElement(
1374             gatherReturn,
1375             this->CreateExtractElement(packed_tex_call, this->getInt32(3)),
1376             this->getInt32(j),
1377             "call_inst");
1378     }
1379 
1380     return gatherReturn;
1381 }
1382 
1383 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PositionOffsetsC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::ArrayRef<llvm::Value * > int_src_offsets,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel)1384 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsetsC(
1385     llvm::Value* float_reference_0,
1386     llvm::Value* float_address_0,
1387     llvm::Value* float_address_1,
1388     llvm::Value* float_address_2,
1389     llvm::ArrayRef<llvm::Value *> int_src_offsets,
1390     llvm::Value* int32_textureIdx_356,
1391     llvm::Value* int32_sampler_357,
1392     llvm::Value* int32_offsetU,
1393     llvm::Value* int32_offsetV,
1394     llvm::Value* int32_srcChannel)
1395 {
1396     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1397 
1398     llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
1399     for (int i = 0, j = 0; i < 7; i = i + 2, j++)
1400     {
1401         llvm::Value* packed_tex_1527_call = Create_gather4POC(
1402             float_address_0,
1403             float_address_1,
1404             float_address_2,
1405             int_src_offsets[i],
1406             int_src_offsets[i + 1],
1407             float_reference_0,
1408             int32_textureIdx_356,
1409             int32_sampler_357,
1410             int32_offsetU,
1411             int32_offsetV,
1412             int32_srcChannel,
1413             false,
1414             llvm::Type::getFloatTy(module->getContext()));
1415 
1416         gatherReturn = this->CreateInsertElement(
1417             gatherReturn,
1418             this->CreateExtractElement(packed_tex_1527_call, this->getInt32(3)),
1419             this->getInt32(j),
1420             "call_inst");
1421     }
1422 
1423     return gatherReturn;
1424 }
1425 
1426 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEB(llvm::Value * float_bias_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1427 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEB(
1428     llvm::Value* float_bias_0,
1429     llvm::Value* float_address_0,
1430     llvm::Value* float_address_1,
1431     llvm::Value* float_address_2,
1432     llvm::Value* float_address_3,
1433     llvm::Value* int32_textureIdx,
1434     llvm::Value* int32_sampler,
1435     llvm::Value* int32_offsetU,
1436     llvm::Value* int32_offsetV,
1437     llvm::Value* int32_offsetW,
1438     llvm::Value* minlod,
1439     bool feedback_enabled,
1440     llvm::Type* returnType)
1441 {
1442     if (minlod == nullptr)
1443     {
1444         minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
1445     }
1446 
1447     //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32(float %src_s.chan0, float %src_s.chan1, float %src_s.chan2, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
1448     llvm::Value * packed_tex_params[] = {
1449         float_bias_0,
1450         float_address_0,
1451         float_address_1,
1452         float_address_2,
1453         float_address_3,
1454         minlod,
1455         int32_textureIdx,
1456         int32_sampler,
1457         int32_offsetU,
1458         int32_offsetV,
1459         int32_offsetW
1460     };
1461 
1462     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1463 
1464     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1465     llvm::Type* types[] = {
1466         IGCLLVM::FixedVectorType::get(dstType, 4),
1467         float_bias_0->getType(),
1468         int32_textureIdx->getType(),
1469         int32_sampler->getType()
1470     };
1471     if (feedback_enabled)
1472     {
1473         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1474     }
1475     llvm::Function* func_llvm_GenISA_sampleB_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1476         (module, llvm::GenISAIntrinsic::GenISA_sampleBptr, types);
1477 
1478     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleB_v4f32_f32, packed_tex_params);
1479     return packed_tex_call;
1480 }
1481 
1482 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEL(llvm::Value * float_lod_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,bool feedback_enabled,llvm::Type * returnType)1483 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEL(
1484     llvm::Value* float_lod_0,
1485     llvm::Value* float_address_0,
1486     llvm::Value* float_address_1,
1487     llvm::Value* float_address_2,
1488     llvm::Value* float_address_3,
1489     llvm::Value* ptr_textureIdx,
1490     llvm::Value* ptr_sampler,
1491     llvm::Value* int32_offsetU,
1492     llvm::Value* int32_offsetV,
1493     llvm::Value* int32_offsetW,
1494     bool feedback_enabled,
1495     llvm::Type* returnType)
1496 {
1497     llvm::Value * packed_tex_params[] = {
1498         float_lod_0,
1499         float_address_0,
1500         float_address_1,
1501         float_address_2,
1502         float_address_3,
1503         ptr_textureIdx,
1504         ptr_sampler,
1505         int32_offsetU,
1506         int32_offsetV,
1507         int32_offsetW
1508     };
1509 
1510     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1511 
1512     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1513     llvm::Type* types[] = {
1514         IGCLLVM::FixedVectorType::get(dstType, 4),
1515         float_lod_0->getType(),
1516         ptr_textureIdx->getType(),
1517         ptr_sampler->getType()
1518     };
1519     if (feedback_enabled)
1520     {
1521         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1522     }
1523     llvm::Function* func_llvm_GenISA_sampleL_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1524         (module, llvm::GenISAIntrinsic::GenISA_sampleLptr, types);
1525 
1526     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleL_v4f32_f32, packed_tex_params);
1527     return packed_tex_call;
1528 }
1529 
1530 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLED(SampleD_DC_FromCubeParams & sampleParams,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1531 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
1532     SampleD_DC_FromCubeParams& sampleParams,
1533     llvm::Value* minlod,
1534     bool feedback_enabled,
1535     llvm::Type* returnType)
1536 {
1537     return Create_SAMPLED(
1538         sampleParams.get_float_src_u(),
1539         sampleParams.get_float_src_v(),
1540         sampleParams.get_float_src_r(),
1541         sampleParams.get_dxu(),
1542         sampleParams.get_dxv(),
1543         sampleParams.get_dxr(),
1544         sampleParams.get_dyu(),
1545         sampleParams.get_dyv(),
1546         sampleParams.get_dyr(),
1547         sampleParams.get_float_src_ai(),
1548         sampleParams.get_int32_textureIdx(),
1549         sampleParams.get_int32_sampler(),
1550         sampleParams.get_int32_offsetU(),
1551         sampleParams.get_int32_offsetV(),
1552         sampleParams.get_int32_offsetW(),
1553         minlod,
1554         feedback_enabled,
1555         returnType
1556     );
1557 }
1558 
1559 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLED(llvm::Value * float_src1_s_chan0,llvm::Value * float_src1_s_chan1,llvm::Value * float_src1_s_chan2,llvm::Value * float_src2_s_chan0,llvm::Value * float_src2_s_chan1,llvm::Value * float_src2_s_chan2,llvm::Value * float_src3_s_chan0,llvm::Value * float_src3_s_chan1,llvm::Value * float_src3_s_chan2,llvm::Value * float_src1_s_chan3,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * int32_offsetU_358,llvm::Value * int32_offsetV_359,llvm::Value * int32_offsetW_359,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1560 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
1561     llvm::Value* float_src1_s_chan0,
1562     llvm::Value* float_src1_s_chan1,
1563     llvm::Value* float_src1_s_chan2,
1564     llvm::Value* float_src2_s_chan0,
1565     llvm::Value* float_src2_s_chan1,
1566     llvm::Value* float_src2_s_chan2,
1567     llvm::Value* float_src3_s_chan0,
1568     llvm::Value* float_src3_s_chan1,
1569     llvm::Value* float_src3_s_chan2,
1570     llvm::Value* float_src1_s_chan3,
1571     llvm::Value* ptr_textureIdx,
1572     llvm::Value* ptr_sampler,
1573     llvm::Value* int32_offsetU_358,
1574     llvm::Value* int32_offsetV_359,
1575     llvm::Value* int32_offsetW_359,
1576     llvm::Value* minlod,
1577     bool feedback_enabled,
1578     llvm::Type* returnType)
1579 {
1580     if (minlod == nullptr)
1581     {
1582         minlod = llvm::ConstantFP::get(float_src1_s_chan0->getType(), 0.0);
1583     }
1584 
1585     //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %src_s.chan0, float %src2_s.chan0, float %src3_s.chan0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 0, i32 0)
1586     llvm::Value * packed_tex_params[] = {
1587         float_src1_s_chan0,
1588         float_src2_s_chan0,
1589         float_src3_s_chan0,
1590         float_src1_s_chan1,
1591         float_src2_s_chan1,
1592         float_src3_s_chan1,
1593         float_src1_s_chan2,
1594         float_src2_s_chan2,
1595         float_src3_s_chan2,
1596         float_src1_s_chan3,
1597         minlod,
1598         ptr_textureIdx,
1599         ptr_sampler,
1600         int32_offsetU_358,
1601         int32_offsetV_359,
1602         int32_offsetW_359
1603     };
1604 
1605     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1606 
1607     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1608     llvm::Type* types[] = {
1609         IGCLLVM::FixedVectorType::get(dstType, 4),
1610         float_src1_s_chan0->getType(),
1611         ptr_textureIdx->getType(),
1612         ptr_sampler->getType()
1613     };
1614     if(feedback_enabled)
1615     {
1616         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1617     }
1618 
1619     llvm::Function* func_llvm_GenISA_sampleDptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1620         (module, llvm::GenISAIntrinsic::GenISA_sampleDptr, types);
1621 
1622     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDptr_v4f32_f32, packed_tex_params);
1623 
1624     return packed_tex_call;
1625 }
1626 
1627 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEDC(llvm::Value * float_ref,llvm::Value * float_src_u,llvm::Value * dxu,llvm::Value * dyu,llvm::Value * float_src_v,llvm::Value * dxv,llvm::Value * dyv,llvm::Value * float_src_r,llvm::Value * dxr,llvm::Value * dyr,llvm::Value * float_src_ai,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)1628 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEDC(
1629     llvm::Value* float_ref,
1630     llvm::Value* float_src_u,
1631     llvm::Value* dxu,
1632     llvm::Value* dyu,
1633     llvm::Value* float_src_v,
1634     llvm::Value* dxv,
1635     llvm::Value* dyv,
1636     llvm::Value* float_src_r,
1637     llvm::Value* dxr,
1638     llvm::Value* dyr,
1639     llvm::Value* float_src_ai,
1640     llvm::Value* int32_textureIdx,
1641     llvm::Value* int32_sampler,
1642     llvm::Value* int32_offsetU,
1643     llvm::Value* int32_offsetV,
1644     llvm::Value* int32_offsetW,
1645     llvm::Type* returnType)
1646 {
1647     //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %float_ref, float %float_src_u, float %dxu, float %dxu, float %dyu, float float_src_v,
1648     //                           float %dxv, float %dyv, float %float_src_r, float %dxr, float %dyr, float 0.000000e+00,
1649     //                           i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
1650     llvm::Value * packed_tex_params[] = {
1651         float_ref,
1652         float_src_u,
1653         dxu,
1654         dyu,
1655         float_src_v,
1656         dxv,
1657         dyv,
1658         float_src_r,
1659         dxr,
1660         dyr,
1661         float_src_ai,
1662         int32_textureIdx,
1663         int32_sampler,
1664         int32_offsetU,
1665         int32_offsetV,
1666         int32_offsetW
1667     };
1668 
1669     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1670 
1671     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1672     llvm::Type* types[] = {
1673         IGCLLVM::FixedVectorType::get(dstType, 4),
1674         float_ref->getType(),
1675         int32_textureIdx->getType(),
1676         int32_sampler->getType()
1677     };
1678 
1679     llvm::Function* func_llvm_GenISA_sampleDCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1680         (module, llvm::GenISAIntrinsic::GenISA_sampleDCptr, types);
1681 
1682     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDCptr_v4f32_f32, packed_tex_params);
1683 
1684     return packed_tex_call;
1685 }
1686 
1687 template<bool preserveNames, typename T, typename Inserter>
Create_lod(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Type * returnType)1688 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_lod(
1689     llvm::Value* float_address_0,
1690     llvm::Value* float_address_1,
1691     llvm::Value* float_address_2,
1692     llvm::Value* float_address_3,
1693     llvm::Value* int32_textureIdx_356,
1694     llvm::Value* int32_sampler_357,
1695     llvm::Type* returnType)
1696 {
1697     llvm::Value * packed_tex_params[] = {
1698         float_address_0,
1699         float_address_1,
1700         float_address_2,
1701         float_address_3,
1702         int32_textureIdx_356,
1703         int32_sampler_357,
1704     };
1705 
1706     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1707 
1708     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1709     llvm::Type* types[] = {
1710         IGCLLVM::FixedVectorType::get(dstType, 4),
1711         float_address_0->getType(),
1712         int32_textureIdx_356->getType(),
1713         int32_sampler_357->getType()
1714     };
1715 
1716     llvm::Function* func_llvm_GenISA_lodptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1717         (module, llvm::GenISAIntrinsic::GenISA_lodptr, types);
1718 
1719     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_lodptr_v4f32_f32, packed_tex_params);
1720 
1721     return packed_tex_call;
1722 }
1723 
1724 template<bool preserveNames, typename T, typename Inserter>
Create_gather4(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1725 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4(
1726     llvm::Value* float_address_0,
1727     llvm::Value* float_address_1,
1728     llvm::Value* float_address_2,
1729     llvm::Value* float_address_3,
1730     llvm::Value* int32_textureIdx_356,
1731     llvm::Value* int32_sampler_357,
1732     llvm::Value* int32_offsetU,
1733     llvm::Value* int32_offsetV,
1734     llvm::Value* int32_offsetW,
1735     llvm::Value* int32_srcChannel,
1736     bool feedback_enabled,
1737     llvm::Type* returnType)
1738 {
1739     llvm::Value * packed_tex_params[] = {
1740         float_address_0,
1741         float_address_1,
1742         float_address_2,
1743         float_address_3,
1744         int32_textureIdx_356,
1745         int32_sampler_357,
1746         int32_offsetU,
1747         int32_offsetV,
1748         int32_offsetW,
1749         int32_srcChannel
1750     };
1751 
1752     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1753 
1754     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1755     llvm::Type* types[] = {
1756         IGCLLVM::FixedVectorType::get(dstType, 4),
1757         float_address_0->getType(),
1758         int32_textureIdx_356->getType(),
1759         int32_sampler_357->getType()
1760     };
1761     if (feedback_enabled)
1762     {
1763         types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1764     }
1765     llvm::Function* func_llvm_GenISA_gather4ptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1766         (module, llvm::GenISAIntrinsic::GenISA_gather4ptr, types);
1767 
1768     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4ptr_v4f32_f32, packed_tex_params);
1769 
1770     return packed_tex_call;
1771 }
1772 
1773 template<bool preserveNames, typename T, typename Inserter>
Create_load(llvm::Value * int32_sampleIdxU,llvm::Value * int32_sampleIdxV,llvm::Value * int32_sampleIdxR,llvm::Value * int32_lod,llvm::Value * ptr_textureIdx,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,bool feedback_enabled,llvm::Type * returnType)1774 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_load(
1775     llvm::Value* int32_sampleIdxU,
1776     llvm::Value* int32_sampleIdxV,
1777     llvm::Value* int32_sampleIdxR,
1778     llvm::Value* int32_lod,
1779     llvm::Value* ptr_textureIdx,
1780     llvm::Value* int32_offsetU,
1781     llvm::Value* int32_offsetV,
1782     llvm::Value* int32_offsetR,
1783     bool feedback_enabled,
1784     llvm::Type* returnType)
1785 {
1786     llvm::Value * packed_tex_params[] = {
1787         int32_sampleIdxU,
1788         int32_sampleIdxV,
1789         int32_lod,
1790         int32_sampleIdxR,
1791         ptr_textureIdx,
1792         int32_offsetU,
1793         int32_offsetV,
1794         int32_offsetR
1795     };
1796 
1797     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1798 
1799     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1800     llvm::Type* types[] = {
1801         IGCLLVM::FixedVectorType::get(dstType, feedback_enabled ? 5 : 4),
1802         ptr_textureIdx->getType()
1803     };
1804 
1805     llvm::Function* func_llvm_GenISA_ldptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1806         (module, llvm::GenISAIntrinsic::GenISA_ldptr, types);
1807 
1808     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldptr_v4f32_f32, packed_tex_params);
1809 
1810     return packed_tex_call;
1811 }
1812 
1813 template<bool preserveNames, typename T, typename Inserter>
Create_ldms(llvm::Value * int32_srcIdxU,llvm::Value * int32_srcIdxV,llvm::Value * int32_srcIdxR,llvm::Value * int32_sampleIdx,llvm::Value * int32_textureIdx,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,bool feedback_enabled,llvm::Type * returnType)1814 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_ldms(
1815     llvm::Value* int32_srcIdxU,
1816     llvm::Value* int32_srcIdxV,
1817     llvm::Value* int32_srcIdxR,
1818     llvm::Value* int32_sampleIdx,
1819     llvm::Value* int32_textureIdx,
1820     llvm::Value* int32_offsetU,
1821     llvm::Value* int32_offsetV,
1822     llvm::Value* int32_offsetR,
1823     bool feedback_enabled,
1824     llvm::Type* returnType)
1825 {
1826     llvm::Value * packed_mcs_params[] = {
1827         int32_srcIdxU,
1828         int32_srcIdxV,
1829         int32_srcIdxR,
1830         m_int0,
1831         int32_textureIdx,
1832         int32_offsetU,
1833         int32_offsetV,
1834         int32_offsetR
1835     };
1836 
1837     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1838 
1839     llvm::Type* types[] = { IGCLLVM::FixedVectorType::get(this->getInt32Ty(), 2), this->getInt32Ty(), int32_textureIdx->getType() };
1840     llvm::Function* func_llvm_GenISA_ldmcsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1841         (module, llvm::GenISAIntrinsic::GenISA_ldmcsptr, types);
1842     llvm::CallInst* packed_mcs_call = this->CreateCall(func_llvm_GenISA_ldmcsptr_v4f32_f32, packed_mcs_params);
1843 
1844 
1845     llvm::Value* mcs_ch0 = this->CreateExtractElement(packed_mcs_call, m_int0);
1846     llvm::Value* mcs_ch1 = this->CreateExtractElement(packed_mcs_call, m_int1);
1847 
1848     llvm::Value * packed_tex_params[] = {
1849         int32_sampleIdx,
1850         mcs_ch0,
1851         mcs_ch1,
1852         int32_srcIdxU,
1853         int32_srcIdxV,
1854         int32_srcIdxR,
1855         m_int0,
1856         int32_textureIdx,
1857         int32_offsetU,
1858         int32_offsetV,
1859         int32_offsetR
1860     };
1861 
1862     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1863     llvm::Type* types_ldms[] = {
1864         IGCLLVM::FixedVectorType::get(dstType, 4),
1865         int32_textureIdx->getType()
1866     };
1867     if (feedback_enabled)
1868     {
1869         types_ldms[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1870     }
1871 
1872     llvm::Function* func_llvm_GenISA_ldmsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1873         (module, llvm::GenISAIntrinsic::GenISA_ldmsptr, types_ldms);
1874 
1875     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldmsptr_v4f32_f32, packed_tex_params);
1876     return packed_tex_call;
1877 }
1878 
1879 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_Cube_ParamsFromUnormalizedCoords(llvm::Value * int32_lod,llvm::Value * int32_textureIdx,llvm::Value * int32_u,llvm::Value * int32_v,llvm::Value * int32_faceid,llvm::Value * int32_cube_array_index,llvm::Value * float_array_6_3,llvm::Value * int32_sampler)1880 inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_ParamsFromUnormalizedCoords(
1881     llvm::Value* int32_lod,
1882     llvm::Value* int32_textureIdx,
1883     llvm::Value* int32_u,
1884     llvm::Value* int32_v,
1885     llvm::Value* int32_faceid,
1886     llvm::Value* int32_cube_array_index,
1887     llvm::Value *float_array_6_3,
1888     llvm::Value* int32_sampler
1889     )
1890 {
1891     //Samplers point of reference is always center of the face, which is (0,0)
1892     //That means the four vertices of the normalized cube are depiced as below
1893     //(-1,-1)        (1,-1)
1894     //  -------|---------
1895     //  |      |        |
1896     //  |      |        |
1897     //  |---------------|
1898     //  |      |(0,0)   |
1899     //  |      |        |
1900     //  -------|---------
1901     //(-1,1)          (1,1)
1902     //Thus each un-normalized coordiate (x,y) needs to be normalized between <-1,1>
1903     //Below is the Math to normalize between <-1,1>
1904     //u = (u * 2 + 1)/width - 1
1905     //v = (v * 2 + 1)/height - 1
1906 
1907     //Using resinfo extract width and height of the buffer
1908     //Using resinfo extract width and height of the buffer
1909     llvm::Value *resinfo = this->Create_resinfo(int32_lod, int32_textureIdx);
1910     llvm::Value *width = this->CreateExtractElement(resinfo, m_int0);
1911     llvm::Value *height = this->CreateExtractElement(resinfo, m_int1);
1912 
1913     //convert u, v, width and height to float
1914     llvm::Value *float_u = this->CreateUIToFP(int32_u, this->getFloatTy());
1915     llvm::Value *float_v = this->CreateUIToFP(int32_v, this->getFloatTy());
1916     width = this->CreateUIToFP(width, this->getFloatTy());
1917     height = this->CreateUIToFP(height, this->getFloatTy());
1918     //define some constants
1919     llvm::Value* float_minus1 = this->getFloat(-1.0);
1920     llvm::Value* float_2 = this->getFloat(2.0);
1921 
1922     //u and v represent the coordinates of a texel for a given face
1923     //Now normalize u in the range [-1,1] using following equation
1924     //u = (2*u + 1)/width -1
1925     float_u = this->CreateFAdd(this->CreateFMul(float_u, float_2), m_float1);
1926     float_u = this->CreateFSub(this->CreateFDiv(float_u, width), m_float1);
1927     //Now normalize v in the range [-1,1] using following equation
1928     //v = (v * 2 + 1)/height - 1
1929     float_v = this->CreateFAdd(this->CreateFMul(float_v, float_2), m_float1);
1930     float_v = this->CreateFSub(this->CreateFDiv(float_v, height), m_float1);
1931 
1932     llvm::Value *minus_floatu = this->CreateFMul(float_u, float_minus1); //-u
1933     llvm::Value *minus_floatv = this->CreateFMul(float_v, float_minus1); //-v
1934     llvm::Value *float_arrayIndex = this->CreateUIToFP(int32_cube_array_index, this->getFloatTy());
1935     //This array represents how the u and v value needs to be picked, for a face
1936     unsigned num_cube_faces = 6;
1937     unsigned num_dimensions = 3;
1938 
1939     //The mapping of face-id to texture surface is as follows
1940     //+x->face 0, -x->face 1, +y -> face 2, -y -> face 3, +z -> face 4, -z -> face 5
1941     //Now for each face we need to transform the normalized coordinates as follows
1942     //face 0(+X) = (-v, -u), face 1(-X) = (-v, u), face 2(+Y) = (u, v)
1943     //face 3(-Y) = (u, -v) , face 4(+Z) = (u, -v), face 5(+Z) = (-u, -v)
1944     //Refer to https://en.wikipedia.org/wiki/Cube_mapping for details
1945     llvm::Value *cubeCoordMap[6][3] = {
1946         { m_float1     ,    minus_floatv,   minus_floatu    }, //+x = face0
1947         { float_minus1 ,    minus_floatv,   float_u         }, //-x = face1
1948         { float_u ,         m_float1    ,   float_v         }, //+y = face2
1949         { float_u ,         float_minus1,   minus_floatv    }, //-y = face3
1950         { float_u ,         minus_floatv,   m_float1        }, //+z = face4
1951         { minus_floatu ,    minus_floatv,   float_minus1    }  //-z = face5
1952     };
1953     //Now populate the 6x3 array with values of cubeCoordMap
1954     llvm::Value *indexList[2];
1955     llvm::Value *row, *elt;
1956     indexList[0] = m_int0;
1957     for (unsigned faceid = 0; faceid < num_cube_faces; faceid++) {
1958         indexList[1] = this->getInt32(faceid);
1959         row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1960         for (unsigned j = 0; j < num_dimensions; j++) {
1961             indexList[1] = this->getInt32(j);
1962             elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1963             this->CreateStore(cubeCoordMap[faceid][j], elt);
1964         }
1965     }
1966 
1967     //Now pick the one the row indexed by int32_faceid
1968     llvm::Value *finalCoords[3];
1969     indexList[1] = int32_faceid;
1970     row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1971     for (unsigned i = 0; i < 3; i++) {
1972         indexList[1] = this->getInt32(i);
1973         elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1974         finalCoords[i] = this->CreateLoad(elt);
1975     }
1976 
1977     SampleParamsFromCube CubeRetParams;
1978     CubeRetParams.float_xcube = finalCoords[0];
1979     CubeRetParams.float_ycube = finalCoords[1];
1980     CubeRetParams.float_address_3 = finalCoords[2];
1981     CubeRetParams.float_aicube = float_arrayIndex;
1982     CubeRetParams.int32_textureIdx = int32_textureIdx;
1983     CubeRetParams.int32_sampler = int32_sampler;
1984     CubeRetParams.offsetU = int32_u;
1985     CubeRetParams.offsetV = int32_v;
1986     CubeRetParams.offsetR = m_int0; //Not used
1987     return CubeRetParams;
1988 }
1989 
1990 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_Cube_Params(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler)1991 inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_Params(
1992     llvm::Value* float_address_0,
1993     llvm::Value* float_address_1,
1994     llvm::Value* float_address_2,
1995     llvm::Value* float_address_3,
1996     llvm::Value* int32_textureIdx,
1997     llvm::Value* int32_sampler)
1998 {
1999     IGC_ASSERT(nullptr != float_address_0);
2000     llvm::Type* const coordType = float_address_0->getType();
2001     IGC_ASSERT(nullptr != coordType);
2002     IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());
2003 
2004     llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);
2005 
2006     //   %xneg_s = fsub float 0.000000e+00, %src_s.chan0
2007     llvm::Value* float_xneg_s_1389 = this->CreateFSub(zero, float_address_0, VALUE_NAME("xneg_s"));
2008 
2009     //   %cmpx_s = fcmp oge float %src_s.chan0, 0.000000e+00
2010     llvm::Value* int1_cmpx_s_1390 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_0, zero, VALUE_NAME("cmpx_s"));
2011 
2012     //   %xabs_s = select i1 %cmpx_s, float %src_s.chan0, float %xneg_s
2013     llvm::Value* float_xabs_s_1391 = this->CreateSelect(int1_cmpx_s_1390, float_address_0, float_xneg_s_1389, VALUE_NAME("xabs_s"));
2014 
2015     //   %yneg_s = fsub float 0.000000e+00, %src_s.chan1
2016     llvm::Value* float_yneg_s_1392 = this->CreateFSub(zero, float_address_1, VALUE_NAME("yneg_s"));
2017 
2018     //   %cmpy_s = fcmp oge float %src_s.chan1, 0.000000e+00
2019     llvm::Value* int1_cmpy_s_1393 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_1, zero, VALUE_NAME("cmpy_s"));
2020 
2021     //   %yabs_s = select i1 %cmpy_s, float %src_s.chan1, float %yneg_s
2022     llvm::Value* float_yabs_s_1394 = this->CreateSelect(int1_cmpy_s_1393, float_address_1, float_yneg_s_1392, VALUE_NAME("yabs_s"));
2023 
2024     //   %aineg_s = fsub float 0.000000e+00, %src_s.chan2
2025     llvm::Value* float_aineg_s_1395 = this->CreateFSub(zero, float_address_2, VALUE_NAME("aineg_s"));
2026 
2027     //   %cmpai_s = fcmp oge float %src_s.chan2, 0.000000e+00
2028     llvm::Value* int1_cmpai_s_1396 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_2, zero, VALUE_NAME("cmpai_s"));
2029 
2030     //   %aiabs_s = select i1 %cmpai_s, float %src_s.chan2, float %aineg_s
2031     llvm::Value* float_aiabs_s_1397 = this->CreateSelect(int1_cmpai_s_1396, float_address_2, float_aineg_s_1395, VALUE_NAME("aiabs_s"));
2032 
2033     //   %oge0_s = fcmp oge float %xabs_s, %yabs_s
2034     llvm::Value* int1_oge0_s_1398 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("oge0_s"));
2035 
2036     //   %max1_s = select i1 %oge0_s, float %xabs_s, float %yabs_s
2037     llvm::Value* float_max1_s_1399 = this->CreateSelect(int1_oge0_s_1398, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("max1_s"));
2038 
2039     //   %oge1_s = fcmp oge float %max1_s, %aiabs_s
2040     llvm::Value* int1_oge1_s_1400 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("oge1_s"));
2041 
2042     //   %max2_s = select i1 %oge1_s, float %max1_s, float %aiabs_s
2043     llvm::Value* float_max2_s_1401 = this->CreateSelect(int1_oge1_s_1400, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("max2_s"));
2044 
2045     //   %xcube_s = fdiv float %src_s.chan0, %max2_s
2046     llvm::Value* float_xcube_s_1402 = this->CreateFDiv(float_address_0, float_max2_s_1401, VALUE_NAME("xcube_s"));
2047 
2048     //   %ycube_s = fdiv float %src_s.chan1, %max2_s
2049     llvm::Value* float_ycube_s_1403 = this->CreateFDiv(float_address_1, float_max2_s_1401, VALUE_NAME("ycube_s"));
2050 
2051     //   %aicube_s = fdiv float %src_s.chan2, %max2_s
2052     llvm::Value* float_aicube_s_1404 = this->CreateFDiv(float_address_2, float_max2_s_1401, VALUE_NAME("aicube_s"));
2053 
2054     SampleParamsFromCube CubeRetParams;
2055 
2056     CubeRetParams.float_xcube = float_xcube_s_1402;
2057     CubeRetParams.float_ycube = float_ycube_s_1403;
2058     CubeRetParams.float_aicube = float_aicube_s_1404;
2059     CubeRetParams.float_address_3 = float_address_3;
2060     CubeRetParams.int32_textureIdx = int32_textureIdx;
2061     CubeRetParams.int32_sampler = int32_sampler;
2062     CubeRetParams.offsetU = m_int0;
2063     CubeRetParams.offsetV = m_int0;
2064     CubeRetParams.offsetR = m_int0;
2065 
2066     return CubeRetParams;
2067 
2068 }
2069 
2070 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_D_DC_Cube_Params(SampleD_DC_FromCubeParams & params)2071 inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
2072     SampleD_DC_FromCubeParams& params)
2073 {
2074     return Prepare_SAMPLE_D_DC_Cube_Params(
2075         params.float_src_u,
2076         params.float_src_v,
2077         params.float_src_r,
2078         params.float_src_ai,
2079         params.dxu,
2080         params.dxv,
2081         params.dxr,
2082         params.dyu,
2083         params.dyv,
2084         params.dyr,
2085         params.int32_textureIdx,
2086         params.int32_sampler,
2087         params.int32_offsetU,
2088         params.int32_offsetV,
2089         params.int32_offsetW
2090     );
2091 }
2092 
2093 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_D_DC_Cube_Params(llvm::Value * float_src_r,llvm::Value * float_src_s,llvm::Value * float_src_t,llvm::Value * float_src_ai,llvm::Value * float_drdx,llvm::Value * float_dsdx,llvm::Value * float_dtdx,llvm::Value * float_drdy,llvm::Value * float_dsdy,llvm::Value * float_dtdy,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW)2094 inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
2095     llvm::Value* float_src_r,
2096     llvm::Value* float_src_s,
2097     llvm::Value* float_src_t,
2098     llvm::Value* float_src_ai,
2099     llvm::Value* float_drdx,
2100     llvm::Value* float_dsdx,
2101     llvm::Value* float_dtdx,
2102     llvm::Value* float_drdy,
2103     llvm::Value* float_dsdy,
2104     llvm::Value* float_dtdy,
2105     llvm::Value* int32_textureIdx,
2106     llvm::Value* int32_sampler,
2107     llvm::Value* int32_offsetU,
2108     llvm::Value* int32_offsetV,
2109     llvm::Value* int32_offsetW)
2110 {
2111     //  For cube texture sampling, sampling instruction must receive proper cube face ID
2112     //  together with coordinates projected onto that face. Gradients also have to be transformed
2113     //  into the same (cube face) address space.
2114     //  To achieve this we first have to find a major coordinate, then normalize coordinates
2115     //  and select remaining ones as u/v coordinates for the face. Because of the cube texture layout
2116     //  in memory (as 6 2D faces) this sometimes involves changing the coordinate direction (sign).
2117     //  Gradients are transformed using quotient rule for derivatives:
2118     //        (fA/fB)' = (fA'*fB - fB'*fA)/fB^2
2119     //  where fA and fB are base functions, i.e. base cube coordinates in this case.
2120     //  Note that we first normalize coordinates and all derivatives, so calculations
2121     //  here use the form:
2122     //        (fA/fB)' = [fA'/fB] - [fB'/fB]*[fA/fB]
2123 
2124     IGC_ASSERT(nullptr != this->GetInsertBlock());
2125     llvm::Function* const parentFunc = this->GetInsertBlock()->getParent();
2126     IGC_ASSERT(nullptr != float_src_r);
2127     llvm::Type* const coordType = float_src_r->getType();
2128     IGC_ASSERT(nullptr != coordType);
2129     IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());
2130 
2131     llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);
2132 
2133     // Create coordinate absolute values to look for major.
2134     llvm::Value* float_abs_r = this->CreateFAbs(float_src_r);
2135     llvm::Value* float_abs_s = this->CreateFAbs(float_src_s);
2136     llvm::Value* float_abs_t = this->CreateFAbs(float_src_t);
2137 
2138     {
2139         llvm::BasicBlock* currentBlock = this->GetInsertBlock();
2140         bool shouldSplitBB = this->GetInsertPoint() != currentBlock->end();
2141 
2142         // Create basic blocks.
2143         llvm::BasicBlock* block_final = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubefinal_block"));
2144 
2145         llvm::BasicBlock* block_major_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_t_block"));
2146         llvm::BasicBlock* block_not_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubenott_block"));
2147         llvm::BasicBlock* block_zp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zp_block"));
2148         llvm::BasicBlock* block_zm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zm_block"));
2149 
2150         llvm::BasicBlock* block_major_s = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_s_block"));
2151         llvm::BasicBlock* block_yp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_yp_block"));
2152         llvm::BasicBlock* block_ym = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_ym_block"));
2153 
2154         llvm::BasicBlock* block_major_r = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_r_block"));
2155         llvm::BasicBlock* block_xp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xp_block"));
2156         llvm::BasicBlock* block_xm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xm_block"));
2157 
2158         // Find the major coordinate (and thus cube face), precedence is Z,Y,X.
2159         llvm::Value* int1_cmp_tges = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_s, VALUE_NAME("cmp_tges"));
2160 
2161         llvm::Value* int1_cmp_tger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_r, VALUE_NAME("cmp_tger"));
2162 
2163         llvm::Value* int1_tgesr = this->CreateAnd(int1_cmp_tger, int1_cmp_tges);
2164 
2165         // Major coordinate is T, faces could be +Z or -Z
2166         llvm::BasicBlock* splitBlock = nullptr;
2167         if (shouldSplitBB)
2168         {
2169             IGC_ASSERT(nullptr != currentBlock);
2170             IGC_ASSERT(currentBlock->getTerminator());
2171             splitBlock = currentBlock->splitBasicBlock(this->GetInsertPoint()->getNextNode());
2172             currentBlock->getTerminator()->eraseFromParent();
2173             this->SetInsertPoint(currentBlock);
2174         }
2175         this->CreateCondBr(int1_tgesr, block_major_t, block_not_t);
2176         this->SetInsertPoint(block_major_t);
2177         parentFunc->getBasicBlockList().push_back(block_major_t);
2178 
2179         // Normalize coordinates and gradients.
2180         llvm::Value* float_tnorm_r = this->CreateFDiv(float_src_r, float_abs_t, VALUE_NAME("tnorm_r"));
2181         llvm::Value* float_tnorm_s = this->CreateFDiv(float_src_s, float_abs_t, VALUE_NAME("tnorm_s"));
2182         llvm::Value* float_tnorm_drdx = this->CreateFDiv(float_drdx, float_abs_t, VALUE_NAME("tnorm_drdx"));
2183         llvm::Value* float_tnorm_drdy = this->CreateFDiv(float_drdy, float_abs_t, VALUE_NAME("tnorm_drdy"));
2184         llvm::Value* float_tnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_t, VALUE_NAME("tnorm_dsdx"));
2185         llvm::Value* float_tnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_t, VALUE_NAME("tnorm_dsdy"));
2186         llvm::Value* float_tnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_t, VALUE_NAME("tnorm_dtdx"));
2187         llvm::Value* float_tnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_t, VALUE_NAME("tnorm_dtdy"));
2188 
2189         // Select positive or negative face.
2190         llvm::Value* int1_cmpx_t = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_t, zero, VALUE_NAME("cmpx_t"));
2191         this->CreateCondBr(int1_cmpx_t, block_zp, block_zm);
2192         this->SetInsertPoint(block_zp);
2193         parentFunc->getBasicBlockList().push_back(block_zp);
2194 
2195         // Face +Z,
2196         // major = neg T
2197         // u     = R
2198         // v     = neg S
2199 
2200         llvm::Value* float_face_zp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 4.0));
2201 
2202         // Select u from s/r/t
2203         llvm::Value* float_face_zp_u = float_tnorm_r;
2204 
2205         // Select v from s/r/t
2206         llvm::Value* float_face_zp_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zp_v"));
2207 
2208         // du/dx = dm * u + d{s/r/t}/dx
2209         llvm::Value* float_neg_dmx4 = this->CreateFNeg(float_tnorm_dtdx, VALUE_NAME("neg_dmx"));
2210         llvm::Value* float_dmxu4 = this->CreateFMul(float_neg_dmx4, float_tnorm_r, VALUE_NAME("dmxu"));
2211         llvm::Value* float_face_zp_dudx = this->CreateFAdd(float_dmxu4, float_tnorm_drdx, VALUE_NAME("face_zp_dudx"));
2212 
2213         // du/dy = dm * u + d{s/r/t}/dy
2214         llvm::Value* float_neg_dmy4 = this->CreateFNeg(float_tnorm_dtdy, VALUE_NAME("neg_dmy"));
2215         llvm::Value* float_dmyu4 = this->CreateFMul(float_neg_dmy4, float_tnorm_r, VALUE_NAME("dmyu"));
2216         llvm::Value* float_face_zp_dudy = this->CreateFAdd(float_dmyu4, float_tnorm_drdy, VALUE_NAME("face_zp_dvdx"));
2217 
2218         // dv/dx = dm * v + d{s/r/t}/dx
2219         llvm::Value* float_dmxv4 = this->CreateFMul(float_tnorm_dtdx, float_tnorm_s, VALUE_NAME("dmxv"));
2220         llvm::Value* float_face_zp_dvdx = this->CreateFSub(float_dmxv4, float_tnorm_dsdx, VALUE_NAME("face_zp_dvdx"));
2221 
2222         // dv/dy = dm * v + d{s/r/t}/dy
2223         llvm::Value* float_dmyv4 = this->CreateFMul(float_tnorm_dtdy, float_tnorm_s, VALUE_NAME("dmyv"));
2224         llvm::Value* float_face_zp_dvdy = this->CreateFSub(float_dmyv4, float_tnorm_dsdy, VALUE_NAME("face_zp_dvdy"));
2225 
2226         this->CreateBr(block_final);
2227         this->SetInsertPoint(block_zm);
2228         parentFunc->getBasicBlockList().push_back(block_zm);
2229 
2230         // Face -Z,
2231         // major = T
2232         // u     = neg R
2233         // v     = neg S
2234 
2235         llvm::Value* float_face_zm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 5.0));
2236 
2237         // Select u from s/r/t
2238         llvm::Value* float_face_zm_u = this->CreateFNeg(float_tnorm_r, VALUE_NAME("face_zm_u"));
2239 
2240         // Select v from s/r/t
2241         llvm::Value* float_face_zm_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zm_v"));
2242 
2243         // du/dx = dm * u + d{s/r/t}/dx
2244         llvm::Value* float_dmxu5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_u, VALUE_NAME("dmxu"));
2245         llvm::Value* float_face_zm_dudx = this->CreateFSub(float_dmxu5, float_tnorm_drdx, VALUE_NAME("face_zm_dudx"));
2246 
2247         // du/dy = dm * u + d{s/r/t}/dy
2248         llvm::Value* float_dmyu5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_u, VALUE_NAME("dmyu"));
2249         llvm::Value* float_face_zm_dudy = this->CreateFSub(float_dmyu5, float_tnorm_drdy, VALUE_NAME("face_zm_dvdx"));
2250 
2251         // dv/dx = dm * v + d{s/r/t}/dx
2252         llvm::Value* float_dmxv5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_v, VALUE_NAME("dmxv"));
2253         llvm::Value* float_face_zm_dvdx = this->CreateFSub(float_dmxv5, float_tnorm_dsdx, VALUE_NAME("face_zm_dvdx"));
2254 
2255         // dv/dy = dm * v + d{s/r/t}/dy
2256         llvm::Value* float_dmyv5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_v, VALUE_NAME("dmyv"));
2257         llvm::Value* float_face_zm_dvdy = this->CreateFSub(float_dmyv5, float_tnorm_dsdy, VALUE_NAME("face_zm_dvdy"));
2258 
2259         this->CreateBr(block_final);
2260         this->SetInsertPoint(block_not_t);
2261         parentFunc->getBasicBlockList().push_back(block_not_t);
2262 
2263         // Choose major S or R.
2264         llvm::Value* int1_cmp_sger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_s, float_abs_r, VALUE_NAME("cmp_sger"));
2265 
2266         // Major coordinate is S, faces could be +Y or -Y
2267         this->CreateCondBr(int1_cmp_sger, block_major_s, block_major_r);
2268         this->SetInsertPoint(block_major_s);
2269         parentFunc->getBasicBlockList().push_back(block_major_s);
2270 
2271         // Normalize coordinates and gradients.
2272         llvm::Value* float_snorm_r = this->CreateFDiv(float_src_r, float_abs_s, VALUE_NAME("snorm_r"));
2273         llvm::Value* float_snorm_t = this->CreateFDiv(float_src_t, float_abs_s, VALUE_NAME("snorm_t"));
2274         llvm::Value* float_snorm_drdx = this->CreateFDiv(float_drdx, float_abs_s, VALUE_NAME("snorm_drdx"));
2275         llvm::Value* float_snorm_drdy = this->CreateFDiv(float_drdy, float_abs_s, VALUE_NAME("snorm_drdy"));
2276         llvm::Value* float_snorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_s, VALUE_NAME("snorm_dsdx"));
2277         llvm::Value* float_snorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_s, VALUE_NAME("snorm_dsdy"));
2278         llvm::Value* float_snorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_s, VALUE_NAME("snorm_dtdx"));
2279         llvm::Value* float_snorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_s, VALUE_NAME("snorm_dtdy"));
2280 
2281         // Select positive or negative face.
2282         llvm::Value* int1_cmpx_s = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_s, zero, VALUE_NAME("cmpx_s"));
2283         this->CreateCondBr(int1_cmpx_s, block_yp, block_ym);
2284         this->SetInsertPoint(block_yp);
2285         parentFunc->getBasicBlockList().push_back(block_yp);
2286 
2287         // Face +Y,
2288         // major = neg S
2289         // u     = R
2290         // v     = T
2291 
2292         llvm::Value* float_face_yp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 2.0));
2293 
2294         // Select u from s/r/t
2295         llvm::Value* float_face_yp_u = float_snorm_r;
2296 
2297         // Select v from s/r/t
2298         llvm::Value* float_face_yp_v = float_snorm_t;
2299 
2300         // du/dx = dm * u + d{s/r/t}/dx
2301         llvm::Value* float_neg_dmx2 = this->CreateFNeg(float_snorm_dsdx, VALUE_NAME("neg_dmx"));
2302         llvm::Value* float_dmxu2 = this->CreateFMul(float_neg_dmx2, float_snorm_r, VALUE_NAME("dmxu"));
2303         llvm::Value* float_face_yp_dudx = this->CreateFAdd(float_dmxu2, float_snorm_drdx, VALUE_NAME("face_yp_dudx"));
2304 
2305         // du/dy = dm * u + d{s/r/t}/dy
2306         llvm::Value* float_neg_dmy2 = this->CreateFNeg(float_snorm_dsdy, VALUE_NAME("neg_dmy"));
2307         llvm::Value* float_dmyu2 = this->CreateFMul(float_neg_dmy2, float_snorm_r, VALUE_NAME("dmyu"));
2308         llvm::Value* float_face_yp_dudy = this->CreateFAdd(float_dmyu2, float_snorm_drdy, VALUE_NAME("face_yp_dvdx"));
2309 
2310         // dv/dx = dm * v + d{s/r/t}/dx
2311         llvm::Value* float_dmxv2 = this->CreateFMul(float_neg_dmx2, float_snorm_t, VALUE_NAME("dmxv"));
2312         llvm::Value* float_face_yp_dvdx = this->CreateFAdd(float_dmxv2, float_snorm_dtdx, VALUE_NAME("face_yp_dvdx"));
2313 
2314         // dv/dy = dm * v + d{s/r/t}/dy
2315         llvm::Value* float_dmyv2 = this->CreateFMul(float_neg_dmy2, float_snorm_t, VALUE_NAME("dmyv"));
2316         llvm::Value* float_face_yp_dvdy = this->CreateFAdd(float_dmyv2, float_snorm_dtdy, VALUE_NAME("face_yp_dvdy"));
2317 
2318         this->CreateBr(block_final);
2319         this->SetInsertPoint(block_ym);
2320         parentFunc->getBasicBlockList().push_back(block_ym);
2321 
2322         // Face -Y,
2323         // major = S
2324         // u     = R
2325         // v     = neg T
2326 
2327         llvm::Value* float_face_ym_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 3.0));
2328 
2329         // Select u from s/r/t
2330         llvm::Value* float_face_ym_u = float_snorm_r;
2331 
2332         // Select v from s/r/t
2333         llvm::Value* float_face_ym_v = this->CreateFNeg(float_snorm_t, VALUE_NAME("face_ym_v"));
2334 
2335         // du/dx = dm * u + d{s/r/t}/dx
2336         llvm::Value* float_dmxu3 = this->CreateFMul(float_snorm_dsdx, float_snorm_r, VALUE_NAME("dmxu"));
2337         llvm::Value* float_face_ym_dudx = this->CreateFAdd(float_dmxu3, float_snorm_drdx, VALUE_NAME("face_ym_dudx"));
2338 
2339         // du/dy = dm * u + d{s/r/t}/dy
2340         llvm::Value* float_dmyu3 = this->CreateFMul(float_snorm_dsdy, float_snorm_r, VALUE_NAME("dmyu"));
2341         llvm::Value* float_face_ym_dudy = this->CreateFAdd(float_dmyu3, float_snorm_drdy, VALUE_NAME("face_ym_dvdx"));
2342 
2343         // dv/dx = dm * v + d{s/r/t}/dx
2344         llvm::Value* float_dmxv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmxv"));
2345         llvm::Value* float_face_ym_dvdx = this->CreateFSub(float_dmxv3, float_snorm_dtdx, VALUE_NAME("face_ym_dvdx"));
2346 
2347         // dv/dy = dm * v + d{s/r/t}/dy
2348         llvm::Value* float_dmyv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmyv"));
2349         llvm::Value* float_face_ym_dvdy = this->CreateFSub(float_dmyv3, float_snorm_dtdy, VALUE_NAME("face_ym_dvdy"));
2350 
2351         this->CreateBr(block_final);
2352         this->SetInsertPoint(block_major_r);
2353         parentFunc->getBasicBlockList().push_back(block_major_r);
2354 
2355         // Major coordinate is R, faces could be +X or -X
2356 
2357         // Normalize coordinates and gradients.
2358         llvm::Value* float_rnorm_s = this->CreateFDiv(float_src_s, float_abs_r, VALUE_NAME("rnorm_r"));
2359         llvm::Value* float_rnorm_t = this->CreateFDiv(float_src_t, float_abs_r, VALUE_NAME("rnorm_t"));
2360         llvm::Value* float_rnorm_drdx = this->CreateFDiv(float_drdx, float_abs_r, VALUE_NAME("rnorm_drdx"));
2361         llvm::Value* float_rnorm_drdy = this->CreateFDiv(float_drdy, float_abs_r, VALUE_NAME("rnorm_drdy"));
2362         llvm::Value* float_rnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_r, VALUE_NAME("rnorm_dsdx"));
2363         llvm::Value* float_rnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_r, VALUE_NAME("rnorm_dsdy"));
2364         llvm::Value* float_rnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_r, VALUE_NAME("rnorm_dtdx"));
2365         llvm::Value* float_rnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_r, VALUE_NAME("rnorm_dtdy"));
2366 
2367         // Select positive or negative face.
2368         llvm::Value* int1_cmpx_r = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_r, zero, VALUE_NAME("cmpx_r"));
2369         this->CreateCondBr(int1_cmpx_r, block_xp, block_xm);
2370         this->SetInsertPoint(block_xp);
2371         parentFunc->getBasicBlockList().push_back(block_xp);
2372 
2373         // Face +X,
2374         // major = neg R
2375         // u     = neg T
2376         // v     = neg S
2377 
2378         llvm::Value* float_face_xp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 0.0));
2379 
2380         // Select u from s/r/t
2381         llvm::Value* float_face_xp_u = this->CreateFNeg(float_rnorm_t, VALUE_NAME("face_xp_u"));
2382 
2383         // Select v from s/r/t
2384         llvm::Value* float_face_xp_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xp_v"));
2385 
2386         // du/dx = dm * u + d{s/r/t}/dx
2387         llvm::Value* float_dmxu0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
2388         llvm::Value* float_face_xp_dudx = this->CreateFSub(float_dmxu0, float_rnorm_dtdx, VALUE_NAME("face_xp_dudx"));
2389 
2390         // du/dy = dm * u + d{s/r/t}/dy
2391         llvm::Value* float_dmyu0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
2392         llvm::Value* float_face_xp_dudy = this->CreateFSub(float_dmyu0, float_rnorm_dtdy, VALUE_NAME("face_xp_dvdx"));
2393 
2394         // dv/dx = dm * v + d{s/r/t}/dx
2395         llvm::Value* float_dmxv0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_s, VALUE_NAME("dmxv"));
2396         llvm::Value* float_face_xp_dvdx = this->CreateFSub(float_dmxv0, float_rnorm_dsdx, VALUE_NAME("face_xp_dvdx"));
2397 
2398         // dv/dy = dm * v + d{s/r/t}/dy
2399         llvm::Value* float_dmyv0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_s, VALUE_NAME("dmyv"));
2400         llvm::Value* float_face_xp_dvdy = this->CreateFSub(float_dmyv0, float_rnorm_dsdy, VALUE_NAME("face_xp_dvdy"));
2401 
2402         this->CreateBr(block_final);
2403         this->SetInsertPoint(block_xm);
2404         parentFunc->getBasicBlockList().push_back(block_xm);
2405 
2406         // Face -X,
2407         // major = R
2408         // u     = T
2409         // v     = neg S
2410 
2411         llvm::Value* float_face_xm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 1.0));
2412 
2413         // Select u from s/r/t
2414         llvm::Value* float_face_xm_u = float_rnorm_t;
2415 
2416         // Select v from s/r/t
2417         llvm::Value* float_face_xm_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xm_v"));
2418 
2419         // du/dx = dm * u + d{s/r/t}/dx
2420         llvm::Value* float_dmxu1 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
2421         llvm::Value* float_face_xm_dudx = this->CreateFAdd(float_dmxu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dudx"));
2422 
2423         // du/dy = dm * u + d{s/r/t}/dy
2424         llvm::Value* float_dmyu1 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
2425         llvm::Value* float_face_xm_dudy = this->CreateFAdd(float_dmyu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dvdx"));
2426 
2427         // dv/dx = dm * v + d{s/r/t}/dx
2428         llvm::Value* float_dmxv1 = this->CreateFMul(float_rnorm_drdx, float_face_xm_v, VALUE_NAME("dmxv"));
2429         llvm::Value* float_face_xm_dvdx = this->CreateFSub(float_dmxv1, float_rnorm_dsdx, VALUE_NAME("face_xm_dvdx"));
2430 
2431         // dv/dy = dm * v + d{s/r/t}/dy
2432         llvm::Value* float_dmyv1 = this->CreateFMul(float_rnorm_drdy, float_face_xm_v, VALUE_NAME("dmyv"));
2433         llvm::Value* float_face_xm_dvdy = this->CreateFSub(float_dmyv1, float_rnorm_dsdy, VALUE_NAME("face_xm_dvdy"));
2434 
2435         this->CreateBr(block_final);
2436         this->SetInsertPoint(block_final);
2437         parentFunc->getBasicBlockList().push_back(block_final);
2438 
2439         llvm::PHINode* phi_u = this->CreatePHI(coordType, 6, VALUE_NAME("phi_u"));
2440         phi_u->addIncoming(float_face_xp_u, block_xp);
2441         phi_u->addIncoming(float_face_xm_u, block_xm);
2442         phi_u->addIncoming(float_face_yp_u, block_yp);
2443         phi_u->addIncoming(float_face_ym_u, block_ym);
2444         phi_u->addIncoming(float_face_zp_u, block_zp);
2445         phi_u->addIncoming(float_face_zm_u, block_zm);
2446 
2447         llvm::PHINode* phi_v = this->CreatePHI(coordType, 6, VALUE_NAME("phi_v"));
2448         phi_v->addIncoming(float_face_xp_v, block_xp);
2449         phi_v->addIncoming(float_face_xm_v, block_xm);
2450         phi_v->addIncoming(float_face_yp_v, block_yp);
2451         phi_v->addIncoming(float_face_ym_v, block_ym);
2452         phi_v->addIncoming(float_face_zp_v, block_zp);
2453         phi_v->addIncoming(float_face_zm_v, block_zm);
2454 
2455         llvm::PHINode* phi_dudx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudx"));
2456         phi_dudx->addIncoming(float_face_xp_dudx, block_xp);
2457         phi_dudx->addIncoming(float_face_xm_dudx, block_xm);
2458         phi_dudx->addIncoming(float_face_yp_dudx, block_yp);
2459         phi_dudx->addIncoming(float_face_ym_dudx, block_ym);
2460         phi_dudx->addIncoming(float_face_zp_dudx, block_zp);
2461         phi_dudx->addIncoming(float_face_zm_dudx, block_zm);
2462 
2463         llvm::PHINode* phi_dudy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudy"));
2464         phi_dudy->addIncoming(float_face_xp_dudy, block_xp);
2465         phi_dudy->addIncoming(float_face_xm_dudy, block_xm);
2466         phi_dudy->addIncoming(float_face_yp_dudy, block_yp);
2467         phi_dudy->addIncoming(float_face_ym_dudy, block_ym);
2468         phi_dudy->addIncoming(float_face_zp_dudy, block_zp);
2469         phi_dudy->addIncoming(float_face_zm_dudy, block_zm);
2470 
2471         llvm::PHINode* phi_dvdx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdx"));
2472         phi_dvdx->addIncoming(float_face_xp_dvdx, block_xp);
2473         phi_dvdx->addIncoming(float_face_xm_dvdx, block_xm);
2474         phi_dvdx->addIncoming(float_face_yp_dvdx, block_yp);
2475         phi_dvdx->addIncoming(float_face_ym_dvdx, block_ym);
2476         phi_dvdx->addIncoming(float_face_zp_dvdx, block_zp);
2477         phi_dvdx->addIncoming(float_face_zm_dvdx, block_zm);
2478 
2479         llvm::PHINode* phi_dvdy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdy"));
2480         phi_dvdy->addIncoming(float_face_xp_dvdy, block_xp);
2481         phi_dvdy->addIncoming(float_face_xm_dvdy, block_xm);
2482         phi_dvdy->addIncoming(float_face_yp_dvdy, block_yp);
2483         phi_dvdy->addIncoming(float_face_ym_dvdy, block_ym);
2484         phi_dvdy->addIncoming(float_face_zp_dvdy, block_zp);
2485         phi_dvdy->addIncoming(float_face_zm_dvdy, block_zm);
2486 
2487         llvm::PHINode* phi_face_id = this->CreatePHI(coordType, 6, VALUE_NAME("phi_face_id"));
2488         phi_face_id->addIncoming(float_face_xp_id, block_xp);
2489         phi_face_id->addIncoming(float_face_xm_id, block_xm);
2490         phi_face_id->addIncoming(float_face_yp_id, block_yp);
2491         phi_face_id->addIncoming(float_face_ym_id, block_ym);
2492         phi_face_id->addIncoming(float_face_zp_id, block_zp);
2493         phi_face_id->addIncoming(float_face_zm_id, block_zm);
2494 
2495         if (shouldSplitBB)
2496         {
2497             llvm::BranchInst* brInst = this->CreateBr(splitBlock);
2498             this->SetInsertPoint(brInst);
2499         }
2500 
2501         SampleD_DC_FromCubeParams D_DC_CUBE_params;
2502 
2503         D_DC_CUBE_params.float_src_u = phi_u;
2504         D_DC_CUBE_params.dxu = phi_dudx;
2505         D_DC_CUBE_params.dyu = phi_dudy;
2506         D_DC_CUBE_params.float_src_v = phi_v;
2507         D_DC_CUBE_params.dxv = phi_dvdx;
2508         D_DC_CUBE_params.dyv = phi_dvdy;
2509         D_DC_CUBE_params.float_src_r = phi_face_id;
2510         D_DC_CUBE_params.dxr = zero;
2511         D_DC_CUBE_params.dyr = zero;
2512         D_DC_CUBE_params.float_src_ai = float_src_ai;
2513         D_DC_CUBE_params.int32_textureIdx = int32_textureIdx;
2514         D_DC_CUBE_params.int32_sampler = int32_sampler;
2515         D_DC_CUBE_params.int32_offsetU = m_int0;
2516         D_DC_CUBE_params.int32_offsetV = m_int0;
2517         D_DC_CUBE_params.int32_offsetW = m_int0;
2518 
2519         return D_DC_CUBE_params;
2520     }
2521 }
2522 
2523 template<bool preserveNames, typename T, typename Inserter>
CreateFAbs(llvm::Value * V)2524 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFAbs(llvm::Value* V)
2525 {
2526     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2527 
2528     llvm::Function* fabs = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fabs, V->getType());
2529     return this->CreateCall(fabs, V);
2530 }
2531 
2532 template<bool preserveNames, typename T, typename Inserter>
CreateFSat(llvm::Value * V)2533 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFSat(llvm::Value* V)
2534 {
2535     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2536 
2537     llvm::Function* fsat =
2538         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_fsat, V->getType());
2539     return this->CreateCall(fsat, V);
2540 }
2541 
2542 template<bool preserveNames, typename T, typename Inserter>
CreateF16TOF32(llvm::Value * f16_src)2543 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateF16TOF32(
2544     llvm::Value* f16_src)
2545 {
2546     llvm::Value* f32_dst = this->CreateFPExt(f16_src, this->getFloatTy(), VALUE_NAME("src0_s"));
2547     return f32_dst;
2548 }
2549 
2550 /*****************************************************************************\
2551 Description:
2552 Returns true if additional conversion is required if given format is
2553 128bit.
2554 
2555 Input:
2556 SURFACE_FORMAT format           - conversion format
2557 
2558 Output:
2559 bool - return value.
2560 
2561 \*****************************************************************************/
2562 template<bool preserveNames, typename T, typename Inserter>
NeedConversionFor128FormatRead(IGC::SURFACE_FORMAT format) const2563 bool LLVM3DBuilder<preserveNames, T, Inserter>::NeedConversionFor128FormatRead(
2564     IGC::SURFACE_FORMAT format) const
2565 {
2566     bool needsConversion = true;
2567 
2568     if ((format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_FLOAT) ||
2569         (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_UINT) ||
2570         (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_SINT))
2571     {
2572         needsConversion = false;
2573     }
2574 
2575     return needsConversion;
2576 }
2577 
2578 template<bool preserveNames, typename T, typename Inserter>
Create_UBFE(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source)2579 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_UBFE(
2580     llvm::Value* int32_width,
2581     llvm::Value* int32_offset,
2582     llvm::Value* int32_source)
2583 {
2584     //   %res = call i32 @llvm.GenISA.ubfe(i32 %src0_s, i32 %src1_s, i32 %src2_s)
2585     llvm::Value * packed_params[] = {
2586         int32_width,
2587         int32_offset,
2588         int32_source
2589     };
2590     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ubfe(), packed_params));
2591     return int32_res;
2592 }
2593 
2594 template<bool preserveNames, typename T, typename Inserter>
Create_IBFE(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source)2595 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_IBFE(
2596     llvm::Value* int32_width,
2597     llvm::Value* int32_offset,
2598     llvm::Value* int32_source)
2599 {
2600     //   %res = call i32 @llvm.GenISA.ibfe(i32 %int32_width, i32 %int32_offset, i32 %int32_source)
2601     llvm::Value * packed_params[] = {
2602         int32_width,
2603         int32_offset,
2604         int32_source
2605     };
2606     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ibfe(), packed_params));
2607     return int32_res;
2608 }
2609 
2610 template<bool preserveNames, typename T, typename Inserter>
Create_BFI(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source,llvm::Value * int32_replace)2611 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFI(
2612     llvm::Value* int32_width,
2613     llvm::Value* int32_offset,
2614     llvm::Value* int32_source,
2615     llvm::Value* int32_replace)
2616 {
2617     llvm::Value * packed_params[] = {
2618         int32_width,
2619         int32_offset,
2620         int32_source,
2621         int32_replace
2622     };
2623     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfi(), packed_params));
2624     return int32_res;
2625 }
2626 
2627 template<bool preserveNames, typename T, typename Inserter>
Create_BFREV(llvm::Value * int32_source)2628 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFREV(
2629     llvm::Value* int32_source)
2630 {
2631     llvm::Value * packed_params[] = {
2632         int32_source
2633     };
2634     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfrev(), packed_params));
2635     return int32_res;
2636 }
2637 
2638 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitHi(llvm::Value * int32_source)2639 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitHi(
2640     llvm::Value* int32_source)
2641 {
2642     llvm::Value * packed_params[] = {
2643         int32_source
2644     };
2645     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitHi(), packed_params));
2646     return int32_res;
2647 }
2648 
2649 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitLo(llvm::Value * int32_source)2650 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitLo(
2651     llvm::Value* int32_source)
2652 {
2653     llvm::Value * packed_params[] = {
2654         int32_source
2655     };
2656     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitLo(), packed_params));
2657     return int32_res;
2658 }
2659 
2660 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitShi(llvm::Value * int32_source)2661 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitShi(
2662     llvm::Value* int32_source)
2663 {
2664     llvm::Value * packed_params[] = {
2665         int32_source
2666     };
2667     llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitShi(), packed_params));
2668     return int32_res;
2669 }
2670 
2671 template<bool preserveNames, typename T, typename Inserter>
create_indirectLoad(llvm::Value * srcBuffer,llvm::Value * offset,llvm::Value * alignment,llvm::Type * returnType,bool isVolatile)2672 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectLoad(
2673     llvm::Value* srcBuffer,
2674     llvm::Value* offset,
2675     llvm::Value* alignment,
2676     llvm::Type* returnType,
2677     bool isVolatile /* false */)
2678 {
2679     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2680     llvm::Type* types[] = {
2681         returnType,
2682         srcBuffer->getType()
2683     };
2684     llvm::Function* pfuncLdPtr = llvm::GenISAIntrinsic::getDeclaration(
2685         module,
2686         llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed,
2687         types);
2688     return this->CreateCall4(pfuncLdPtr, srcBuffer, offset, alignment, this->getInt1(isVolatile));
2689 }
2690 
2691 template<bool preserveNames, typename T, typename Inserter>
create_indirectStore(llvm::Value * srcBuffer,llvm::Value * offset,llvm::Value * data,bool isVolatile)2692 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectStore(
2693     llvm::Value* srcBuffer,
2694     llvm::Value* offset,
2695     llvm::Value* data,
2696     bool isVolatile /* false */ )
2697 {
2698     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2699     llvm::Type* types[] = {
2700         srcBuffer->getType(),
2701         data->getType(),
2702     };
2703     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2704         module,
2705         llvm::GenISAIntrinsic::GenISA_storerawvector_indexed,
2706         types);
2707     llvm::Value* alignment = this->getInt32(data->getType()->getScalarSizeInBits() / 8);
2708     return this->CreateCall5(pFunc, srcBuffer, offset, data, alignment, this->getInt1(isVolatile));
2709 }
2710 
2711 template<bool preserveNames, typename T, typename Inserter>
create_atomicCounterIncrement(llvm::Value * srcBuffer)2712 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterIncrement(
2713     llvm::Value* srcBuffer)
2714 {
2715     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2716     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2717         module,
2718         llvm::GenISAIntrinsic::GenISA_atomiccounterinc,
2719         srcBuffer->getType());
2720     return this->CreateCall(pFunc, srcBuffer);
2721 }
2722 
2723 template<bool preserveNames, typename T, typename Inserter>
create_atomicCounterDecrement(llvm::Value * srcBuffer)2724 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterDecrement(
2725     llvm::Value* srcBuffer)
2726 {
2727     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2728     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2729         module,
2730         llvm::GenISAIntrinsic::GenISA_atomiccounterpredec,
2731         srcBuffer->getType());
2732     return this->CreateCall(pFunc, srcBuffer);
2733 }
2734 
2735 template<bool preserveNames, typename T, typename Inserter>
createThreadLocalId(unsigned int dim)2736 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createThreadLocalId(unsigned int dim)
2737 {
2738     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2739     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2740         module,
2741         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
2742         this->getInt32Ty());
2743     return this->CreateCall(pFunc, this->getInt32(IGC::THREAD_ID_IN_GROUP_X + dim));
2744 }
2745 
2746 template<bool preserveNames, typename T, typename Inserter>
createGroupId(unsigned int dim)2747 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createGroupId(unsigned int dim)
2748 {
2749     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2750     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2751         module,
2752         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
2753         this->getFloatTy());
2754     return this->CreateBitCast(
2755         this->CreateCall(pFunc, this->getInt32(IGC::THREAD_GROUP_ID_X + dim)), this->getInt32Ty());
2756 }
2757 
2758 template<bool preserveNames, typename T, typename Inserter>
CreateFrc(llvm::Value * V)2759 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFrc(llvm::Value* V)
2760 {
2761     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2762 
2763     llvm::Function* frc =
2764         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_frc);
2765     return this->CreateCall(frc, V);
2766 }
2767 
2768 template<bool preserveNames, typename T, typename Inserter>
CreateSin(llvm::Value * V)2769 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSin(llvm::Value* V)
2770 {
2771     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2772 
2773     llvm::Function* sin =
2774         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sin, V->getType());
2775     return this->CreateCall(sin, V);
2776 }
2777 
2778 template<bool preserveNames, typename T, typename Inserter>
CreateCos(llvm::Value * V)2779 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCos(llvm::Value* V)
2780 {
2781     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2782 
2783     llvm::Function* cos =
2784         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::cos, V->getType());
2785     return this->CreateCall(cos, V);
2786 }
2787 
2788 template<bool preserveNames, typename T, typename Inserter>
CreateSqrt(llvm::Value * V)2789 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSqrt(llvm::Value* V)
2790 {
2791     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2792 
2793     llvm::Function* sqrt =
2794         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sqrt, V->getType());
2795     return this->CreateCall(sqrt, V);
2796 }
2797 
2798 template<bool preserveNames, typename T, typename Inserter>
CreateFPow(llvm::Value * LHS,llvm::Value * RHS)2799 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFPow(llvm::Value *LHS, llvm::Value *RHS)
2800 {
2801     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2802 
2803     llvm::Function* fpow =
2804         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, LHS->getType());
2805     return this->CreateCall2(fpow, LHS, RHS);
2806 }
2807 
2808 template<bool preserveNames, typename T, typename Inserter>
CreateFMax(llvm::Value * LHS,llvm::Value * RHS)2809 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMax(llvm::Value *LHS, llvm::Value *RHS)
2810 {
2811     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2812 
2813     llvm::Function* fmax =
2814         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::maxnum, LHS->getType());
2815     return this->CreateCall2(fmax, LHS, RHS);
2816 }
2817 
2818 template<bool preserveNames, typename T, typename Inserter>
CreateFMin(llvm::Value * LHS,llvm::Value * RHS)2819 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMin(llvm::Value *LHS, llvm::Value *RHS)
2820 {
2821     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2822 
2823     llvm::Function* fmin =
2824         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::minnum, LHS->getType());
2825     return this->CreateCall2(fmin, LHS, RHS);
2826 }
2827 
2828 template<bool preserveNames, typename T, typename Inserter>
CreateIMulH(llvm::Value * LHS,llvm::Value * RHS)2829 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIMulH(llvm::Value *LHS, llvm::Value *RHS)
2830 {
2831     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2832 
2833     llvm::Function* imulh =
2834         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_imulH, LHS->getType());
2835     return this->CreateCall2(imulh, LHS, RHS);
2836 }
2837 
2838 template<bool preserveNames, typename T, typename Inserter>
CreateUMulH(llvm::Value * LHS,llvm::Value * RHS)2839 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateUMulH(llvm::Value *LHS, llvm::Value *RHS)
2840 {
2841     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2842 
2843     llvm::Function* umulh =
2844         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_umulH, LHS->getType());
2845     return this->CreateCall2(umulh, LHS, RHS);
2846 }
2847 
2848 template<bool preserveNames, typename T, typename Inserter>
CreateDiscard(llvm::Value * V)2849 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDiscard(llvm::Value* V)
2850 {
2851     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2852 
2853     llvm::Function* discard =
2854         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
2855     return this->CreateCall(discard, V);
2856 }
2857 
2858 template<bool preserveNames, typename T, typename Inserter>
CreateFLog(llvm::Value * V)2859 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFLog(llvm::Value *V)
2860 {
2861     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2862 
2863     llvm::Function* flog =
2864         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::log2, V->getType());
2865     return this->CreateCall(flog, V);
2866 }
2867 
2868 template<bool preserveNames, typename T, typename Inserter>
CreateFExp(llvm::Value * V)2869 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFExp(llvm::Value *V)
2870 {
2871     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2872 
2873     llvm::Function* fexp =
2874         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::exp2, V->getType());
2875     return this->CreateCall(fexp, V);
2876 }
2877 
2878 template<bool preserveNames, typename T, typename Inserter>
CreateDFloor(llvm::Value * src)2879 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDFloor(llvm::Value* src)
2880 {
2881     llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
2882     IGC_ASSERT(nullptr != mod);
2883     llvm::Function* func = mod->getFunction("__builtin_floor_f64");
2884     if (func != nullptr)
2885     {
2886         return this->CreateCall(func, src);
2887     }
2888 
2889     // from OCL builtin: double @__builtin_spirv_floor_f64(double %x)
2890     static const char* const code =
2891         "define double @__builtin_floor_f64(double %x)                              \n"
2892         "    alwaysinline nounwind readnone {                                       \n"
2893         "  %1 = bitcast double %x to i64                                            \n"
2894         "  %2 = lshr i64 %1, 32                                                     \n"
2895         "  %3 = trunc i64 %2 to i32                                                 \n"
2896         "  %4 = lshr i64 %1, 52                                                     \n"
2897         "  %5 = trunc i64 %4 to i32                                                 \n"
2898         "  %6 = and i32 %5, 2047                                                    \n"
2899         "  %7 = sub nsw i32 1023, %6                                                \n"
2900         "  %8 = add nsw i32 %7, 52                                                  \n"
2901         "  %9 = add nsw i32 %7, 20                                                  \n"
2902         "  %10 = icmp sgt i32 %8, 32                                                \n"
2903         "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
2904         "  %12 = icmp sgt i32 %9, 20                                                \n"
2905         "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
2906         "  %14 = icmp sgt i32 %11, 0                                                \n"
2907         "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
2908         "  %16 = icmp sgt i32 %13, 0                                                \n"
2909         "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
2910         "  %18 = and i32 %15, 31                                                    \n"
2911         "  %19 = shl i32 -1, %18                                                    \n"
2912         "  %20 = and i32 %17, 31                                                    \n"
2913         "  %21 = shl i32 -1, %20                                                    \n"
2914         "  %22 = icmp ne i32 %15, 32                                                \n"
2915         "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
2916         "  %24 = icmp eq i32 %17, 32                                                \n"
2917         "  %25 = icmp ult i32 %6, 1023                                              \n"
2918         "  %or.cond.i = or i1 %25, %24                                              \n"
2919         "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21   \n"
2920         "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23              \n"
2921         "  %26 = trunc i64 %1 to i32                                                \n"
2922         "  %27 = and i32 %maskValLow32bit.0.i, %26                                  \n"
2923         "  %28 = and i32 %maskValHigh32bit.0.i, %3                                  \n"
2924         "  %29 = zext i32 %28 to i64                                                \n"
2925         "  %30 = shl nuw i64 %29, 32                                                \n"
2926         "  %31 = zext i32 %27 to i64                                                \n"
2927         "  %32 = or i64 %30, %31                                                    \n"
2928         "  %33 = bitcast i64 %32 to double                                          \n"
2929         "  %34 = sub i64 %1, %32                                                    \n"
2930         "  %35 = lshr i64 %34, 32                                                   \n"
2931         "  %36 = or i64 %35, %34                                                    \n"
2932         "  %37 = trunc i64 %36 to i32                                               \n"
2933         "  %38 = icmp eq i32 %37, 0                                                 \n"
2934         "  %39 = ashr i64 %1, 31                                                    \n"
2935         "  %.op = and i64 %39, -4616189618054758400                                 \n"
2936         "  %40 = bitcast i64 %.op to double                                         \n"
2937         "  %41 = select i1 %38, double 0.000000e+00, double %40                     \n"
2938         "  %42 = fadd double %33, %41                                               \n"
2939         "  ret double %42                                                           \n"
2940         "}";
2941 
2942     llvm::MemoryBufferRef codeBuf(code, "<string>");
2943     llvm::SMDiagnostic diagnostic;
2944     const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
2945     (void) failed;
2946     IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
2947 
2948     func = mod->getFunction("__builtin_floor_f64");
2949     return this->CreateCall(func, src);
2950 }
2951 
2952 template<bool preserveNames, typename T, typename Inserter>
CreateFloor(llvm::Value * V)2953 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFloor(llvm::Value *V)
2954 {
2955     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2956 
2957     if( V->getType() == this->getDoubleTy() )
2958     {
2959         return CreateDFloor(V);
2960     }
2961     else
2962     {
2963         llvm::Function* floor =
2964             llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::floor, V->getType() );
2965         return this->CreateCall( floor, V );
2966     }
2967 }
2968 
2969 template<bool preserveNames, typename T, typename Inserter>
CreateDCeil(llvm::Value * src)2970 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDCeil(llvm::Value *src)
2971 {
2972     llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
2973     IGC_ASSERT(nullptr != mod);
2974     llvm::Function* func = mod->getFunction("__builtin_ceil_f64");
2975     if (func != nullptr)
2976     {
2977         return this->CreateCall(func, src);
2978     }
2979 
2980     // from OCL builtin: double @__builtin_spirv_ceil_f64(double %x)
2981     static const char* const code =
2982         "define double @__builtin_ceil_f64(double %x)                               \n"
2983         "    alwaysinline nounwind readnone {                                       \n"
2984         "  %1 = bitcast double %x to i64                                            \n"
2985         "  %2 = lshr i64 %1, 32                                                     \n"
2986         "  %3 = trunc i64 %2 to i32                                                 \n"
2987         "  %4 = lshr i64 %1, 52                                                     \n"
2988         "  %5 = trunc i64 %4 to i32                                                 \n"
2989         "  %6 = and i32 %5, 2047                                                    \n"
2990         "  %7 = sub nsw i32 1023, %6                                                \n"
2991         "  %8 = add nsw i32 %7, 52                                                  \n"
2992         "  %9 = add nsw i32 %7, 20                                                  \n"
2993         "  %10 = icmp sgt i32 %8, 32                                                \n"
2994         "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
2995         "  %12 = icmp sgt i32 %9, 20                                                \n"
2996         "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
2997         "  %14 = icmp sgt i32 %11, 0                                                \n"
2998         "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
2999         "  %16 = icmp sgt i32 %13, 0                                                \n"
3000         "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
3001         "  %18 = and i32 %15, 31                                                    \n"
3002         "  %19 = shl i32 -1, %18                                                    \n"
3003         "  %20 = and i32 %17, 31                                                    \n"
3004         "  %21 = shl i32 -1, %20                                                    \n"
3005         "  %22 = icmp ne i32 %15, 32                                                \n"
3006         "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
3007         "  %24 = icmp eq i32 %17, 32                                                \n"
3008         "  %25 = icmp ult i32 %6, 1023                                              \n"
3009         "  %or.cond.i = or i1 %25, %24                                              \n"
3010         "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21   \n"
3011         "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23              \n"
3012         "  %26 = trunc i64 %1 to i32                                                \n"
3013         "  %27 = and i32 %maskValLow32bit.0.i, %26                                  \n"
3014         "  %28 = and i32 %maskValHigh32bit.0.i, %3                                  \n"
3015         "  %29 = zext i32 %28 to i64                                                \n"
3016         "  %30 = shl nuw i64 %29, 32                                                \n"
3017         "  %31 = zext i32 %27 to i64                                                \n"
3018         "  %32 = or i64 %30, %31                                                    \n"
3019         "  %33 = bitcast i64 %32 to double                                          \n"
3020         "  %34 = sub i64 %1, %32                                                    \n"
3021         "  %35 = lshr i64 %34, 32                                                   \n"
3022         "  %36 = or i64 %35, %34                                                    \n"
3023         "  %37 = trunc i64 %36 to i32                                               \n"
3024         "  %38 = icmp eq i32 %37, 0                                                 \n"
3025         "  %39 = ashr i64 %1, 31                                                    \n"
3026         "  %40 = and i64 %39, -4607182418800017408                                  \n"
3027         "  %.op = add nsw i64 %40, 4607182418800017408                              \n"
3028         "  %41 = bitcast i64 %.op to double                                         \n"
3029         "  %42 = select i1 %38, double 0.000000e+00, double %41                     \n"
3030         "  %43 = fadd double %33, %42                                               \n"
3031         "  ret double %43                                                           \n"
3032         "}";
3033 
3034     llvm::MemoryBufferRef codeBuf(code, "<string>");
3035     llvm::SMDiagnostic diagnostic;
3036     const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3037     (void) failed;
3038     IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3039 
3040     func = mod->getFunction("__builtin_ceil_f64");
3041 
3042     return this->CreateCall(func, src);
3043 }
3044 
3045 template<bool preserveNames, typename T, typename Inserter>
CreateCeil(llvm::Value * V)3046 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCeil(llvm::Value *V)
3047 {
3048     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3049 
3050     if( V->getType() == this->getDoubleTy() )
3051     {
3052         return CreateDCeil(V);
3053     }
3054     else
3055     {
3056         llvm::Function* ceil =
3057             llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::ceil, V->getType() );
3058         return this->CreateCall( ceil, V );
3059     }
3060 }
3061 
3062 template<bool preserveNames, typename T, typename Inserter>
CreateDTrunc(llvm::Value * src)3063 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDTrunc(llvm::Value *src)
3064 {
3065     llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
3066     IGC_ASSERT(nullptr != mod);
3067     llvm::Function* func = mod->getFunction("__builtin_trunc_f64");
3068     if (func != nullptr)
3069     {
3070         return this->CreateCall(func, src);
3071     }
3072 
3073     // from OCL builtin: double @__builtin_spirv_trunc_f64(double %x)
3074     static const char* const code =
3075         "define double @__builtin_trunc_f64(double %x)                        \n"
3076         "    alwaysinline nounwind readnone {                                       \n"
3077         "  %1 = bitcast double %x to i64                                            \n"
3078         "  %2 = lshr i64 %1, 32                                                     \n"
3079         "  %3 = trunc i64 %2 to i32                                                 \n"
3080         "  %4 = lshr i64 %1, 52                                                     \n"
3081         "  %5 = trunc i64 %4 to i32                                                 \n"
3082         "  %6 = and i32 %5, 2047                                                    \n"
3083         "  %7 = sub nsw i32 1023, %6                                                \n"
3084         "  %8 = add nsw i32 %7, 52                                                  \n"
3085         "  %9 = add nsw i32 %7, 20                                                  \n"
3086         "  %10 = icmp sgt i32 %8, 32                                                \n"
3087         "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
3088         "  %12 = icmp sgt i32 %9, 20                                                \n"
3089         "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
3090         "  %14 = icmp sgt i32 %11, 0                                                \n"
3091         "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
3092         "  %16 = icmp sgt i32 %13, 0                                                \n"
3093         "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
3094         "  %18 = and i32 %15, 31                                                    \n"
3095         "  %19 = shl i32 -1, %18                                                    \n"
3096         "  %20 = and i32 %17, 31                                                    \n"
3097         "  %21 = shl i32 -1, %20                                                    \n"
3098         "  %22 = icmp ne i32 %15, 32                                                \n"
3099         "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
3100         "  %24 = icmp eq i32 %17, 32                                                \n"
3101         "  %25 = icmp ult i32 %6, 1023                                              \n"
3102         "  %or.cond = or i1 %25, %24                                                \n"
3103         "  %maskValHigh32bit.0 = select i1 %or.cond, i32 -2147483648, i32 %21       \n"
3104         "  %maskValLow32bit.0 = select i1 %or.cond, i32 0, i32 %23                  \n"
3105         "  %26 = trunc i64 %1 to i32                                                \n"
3106         "  %27 = and i32 %maskValLow32bit.0, %26                                    \n"
3107         "  %28 = and i32 %maskValHigh32bit.0, %3                                    \n"
3108         "  %29 = zext i32 %28 to i64                                                \n"
3109         "  %30 = shl nuw i64 %29, 32                                                \n"
3110         "  %31 = zext i32 %27 to i64                                                \n"
3111         "  %32 = or i64 %30, %31                                                    \n"
3112         "  %33 = bitcast i64 %32 to double                                          \n"
3113         "  ret double %33                                                           \n"
3114         "}";
3115 
3116     llvm::MemoryBufferRef codeBuf(code, "<string>");
3117     llvm::SMDiagnostic diagnostic;
3118     const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3119     (void) failed;
3120     IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3121 
3122     func = mod->getFunction("__builtin_trunc_f64");
3123 
3124     return this->CreateCall(func, src);
3125 }
3126 
3127 template<bool preserveNames, typename T, typename Inserter>
CreateRoundZ(llvm::Value * V)3128 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundZ(llvm::Value *V)
3129 {
3130     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3131 
3132     if (V->getType() == this->getDoubleTy())
3133     {
3134         return CreateDTrunc(V);
3135     }
3136     else
3137     {
3138         llvm::Function* trunc =
3139             llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trunc, V->getType());
3140         return this->CreateCall(trunc, V);
3141     }
3142 }
3143 
3144 template<bool preserveNames, typename T, typename Inserter>
CreateDRoundNE(llvm::Value * src)3145 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDRoundNE(llvm::Value *src)
3146 {
3147     llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
3148     IGC_ASSERT(nullptr != mod);
3149     llvm::Function* func = mod->getFunction("__builtin_roundne_f64");
3150     if (func != nullptr)
3151     {
3152         return this->CreateCall(func, src);
3153     }
3154 
3155     // From OCL builtin: double @__builtin_spirv_rint_f64(double %x)
3156     static const char* const code =
3157         "define double @__builtin_roundne_f64(double %x)                            \n"
3158         "    alwaysinline nounwind readnone {                                       \n"
3159         "  %1 = bitcast double %x to i64                                            \n"
3160         "  %2 = and i64 %1, 9223372036854775807                                     \n"
3161         "  %3 = bitcast i64 %2 to double                                            \n"
3162         "  %4 = lshr i64 %2, 52                                                     \n"
3163         "  %5 = trunc i64 %4 to i32                                                 \n"
3164         "  %6 = icmp ult i32 %5, 1075                                               \n"
3165         "  %7 = zext i1 %6 to i32                                                   \n"
3166         "  %8 = sitofp i32 %7 to double                                             \n"
3167         "  %9 = fmul double %8, 5.000000e-01                                        \n"
3168         "  %10 = fadd double %3, %9                                                 \n"
3169         "  %11 = bitcast double %10 to i64                                          \n"
3170         "  %12 = lshr i64 %11, 32                                                   \n"
3171         "  %13 = trunc i64 %12 to i32                                               \n"
3172         "  %14 = lshr i64 %11, 52                                                   \n"
3173         "  %15 = trunc i64 %14 to i32                                               \n"
3174         "  %16 = and i32 %15, 2047                                                  \n"
3175         "  %17 = sub nsw i32 1023, %16                                              \n"
3176         "  %18 = add nsw i32 %17, 52                                                \n"
3177         "  %19 = add nsw i32 %17, 20                                                \n"
3178         "  %20 = icmp sgt i32 %18, 32                                               \n"
3179         "  %21 = select i1 %20, i32 32, i32 %18                                     \n"
3180         "  %22 = icmp sgt i32 %19, 20                                               \n"
3181         "  %23 = select i1 %22, i32 20, i32 %19                                     \n"
3182         "  %24 = icmp sgt i32 %21, 0                                                \n"
3183         "  %25 = select i1 %24, i32 %21, i32 0                                      \n"
3184         "  %26 = icmp sgt i32 %23, 0                                                \n"
3185         "  %27 = select i1 %26, i32 %23, i32 0                                      \n"
3186         "  %28 = and i32 %25, 31                                                    \n"
3187         "  %29 = shl i32 -1, %28                                                    \n"
3188         "  %30 = and i32 %27, 31                                                    \n"
3189         "  %31 = shl i32 -1, %30                                                    \n"
3190         "  %32 = icmp ne i32 %25, 32                                                \n"
3191         "  %33 = select i1 %32, i32 %29, i32 0                                      \n"
3192         "  %34 = icmp eq i32 %27, 32                                                \n"
3193         "  %35 = icmp ult i32 %16, 1023                                             \n"
3194         "  %or.cond.i = or i1 %35, %34                                              \n"
3195         "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %31   \n"
3196         "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %33              \n"
3197         "  %36 = trunc i64 %11 to i32                                               \n"
3198         "  %37 = and i32 %maskValLow32bit.0.i, %36                                  \n"
3199         "  %38 = and i32 %maskValHigh32bit.0.i, %13                                 \n"
3200         "  %39 = zext i32 %38 to i64                                                \n"
3201         "  %40 = shl nuw i64 %39, 32                                                \n"
3202         "  %41 = zext i32 %37 to i64                                                \n"
3203         "  %42 = or i64 %40, %41                                                    \n"
3204         "  %43 = bitcast i64 %42 to double                                          \n"
3205         "  %44 = fptoui double %43 to i64                                           \n"
3206         "  %.tr = trunc i64 %44 to i32                                              \n"
3207         "  %45 = fsub double %43, %3                                                \n"
3208         "  %46 = fcmp oeq double %45, 5.000000e-01                                  \n"
3209         "  %47 = zext i1 %46 to i32                                                 \n"
3210         "  %48 = and i32 %.tr, %47                                                  \n"
3211         "  %49 = uitofp i32 %48 to double                                           \n"
3212         "  %50 = fsub double %43, %49                                               \n"
3213         "  %51 = and i64 %1, -9223372036854775808                                   \n"
3214         "  %52 = bitcast double %50 to i64                                          \n"
3215         "  %53 = or i64 %52, %51                                                    \n"
3216         "  %54 = bitcast i64 %53 to double                                          \n"
3217         "  ret double %54                                                           \n"
3218         "}";
3219 
3220     llvm::MemoryBufferRef codeBuf(code, "<string>");
3221     llvm::SMDiagnostic diagnostic;
3222     const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3223     (void) failed;
3224     IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3225 
3226     func = mod->getFunction("__builtin_roundne_f64");
3227 
3228     return this->CreateCall(func, src);
3229 }
3230 
3231 template<bool preserveNames, typename T, typename Inserter>
CreateRoundNE(llvm::Value * V)3232 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundNE(llvm::Value *V)
3233 {
3234     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3235 
3236     if( V->getType() == this->getDoubleTy() )
3237     {
3238         return CreateDRoundNE(V);
3239     }
3240     else if( V->getType() == this->getHalfTy() )
3241     {
3242         V = this->CreateFPExt(V, this->getFloatTy());
3243         llvm::Function* roundne =
3244             llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ROUNDNE);
3245         V = this->CreateCall(roundne, V);
3246         return this->CreateFPTrunc(V, this->getHalfTy());
3247     }
3248     else
3249     {
3250         llvm::Function* roundne =
3251             llvm::GenISAIntrinsic::getDeclaration( module, llvm::GenISAIntrinsic::GenISA_ROUNDNE );
3252         return this->CreateCall( roundne, V );
3253     }
3254 }
3255 
3256 template<bool preserveNames, typename T, typename Inserter>
CreateIsNan(llvm::Value * V)3257 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIsNan(llvm::Value* V)
3258 {
3259     //fcmp_uno yields true if either operand is a QNAN. Since we compare the same numer with itself.
3260     //If V is not NAN it will return false
3261     return this->CreateFCmp(llvm::FCmpInst::FCMP_UNO, V, V);
3262 }
3263 
3264 
3265 template<bool preserveNames, typename T, typename Inserter>
CreateCtpop(llvm::Value * V)3266 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCtpop(llvm::Value *V)
3267 {
3268     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3269 
3270     llvm::Function* ctpop =
3271         llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::ctpop, V->getType());
3272     return this->CreateCall(ctpop, V);
3273 }
3274 
3275 template<bool preserveNames, typename T, typename Inserter>
getHalf(float f)3276 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getHalf(float f)
3277 {
3278     return llvm::ConstantFP::get(this->getHalfTy(), f);
3279 }
3280 
3281 template<bool preserveNames, typename T, typename Inserter>
getFloat(float f)3282 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFloat(float f)
3283 {
3284     return llvm::ConstantFP::get(this->getFloatTy(), f);
3285 }
3286 
3287 template<bool preserveNames, typename T, typename Inserter>
getDouble(double d)3288 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getDouble(double d)
3289 {
3290     return llvm::ConstantFP::get(this->getDoubleTy(), d);
3291 }
3292 
3293 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTX(llvm::Value * V)3294 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX(llvm::Value *V)
3295 {
3296     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3297 
3298     llvm::Function* floor =
3299         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientX,
3300         V->getType());
3301     return this->CreateCall(floor, V);
3302 }
3303 
3304 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTX_Fine(llvm::Value * V)3305 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX_Fine(llvm::Value *V)
3306 {
3307     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3308 
3309     llvm::Function* floor =
3310         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientXfine, V->getType());
3311     return this->CreateCall(floor, V);
3312 }
3313 
3314 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTY(llvm::Value * V)3315 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY(llvm::Value *V)
3316 {
3317     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3318 
3319     llvm::Function* floor =
3320         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientY,
3321     V->getType());
3322     return this->CreateCall(floor, V);
3323 }
3324 
3325 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTY_Fine(llvm::Value * V)3326 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY_Fine(llvm::Value *V)
3327 {
3328     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3329 
3330     llvm::Function* floor =
3331         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientYfine, V->getType());
3332     return this->CreateCall(floor, V);
3333 }
3334 
3335 template<bool preserveNames, typename T, typename Inserter>
Create_MAD_Scalar(llvm::Value * float_src0,llvm::Value * float_src1,llvm::Value * float_src2)3336 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MAD_Scalar(llvm::Value* float_src0, llvm::Value* float_src1, llvm::Value* float_src2)
3337 {
3338     llvm::Module* const module = this->GetInsertBlock()->getParent()->getParent();
3339     IGC_ASSERT(nullptr != module);
3340     IGC_ASSERT(nullptr != float_src0);
3341 
3342     // Builtin Signature: float (float, float, float)
3343     IGC_ASSERT_MESSAGE((float_src0->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src0->getType() == this->getFloatTy() || float_src0->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 0");
3344     IGC_ASSERT_MESSAGE((float_src1->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src1->getType() == this->getFloatTy() || float_src1->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 1");
3345     IGC_ASSERT_MESSAGE((float_src2->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src2->getType() == this->getFloatTy() || float_src2->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 2");
3346 
3347     llvm::Function* madFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fma, float_src0->getType());
3348     llvm::Value* args[] = { float_src0, float_src1, float_src2 };
3349     llvm::Value* float_madres_s = this->CreateCall(madFunc, args);
3350 
3351     return float_madres_s;
3352 }
3353 
3354 template<bool preserveNames, typename T, typename Inserter>
CreatePow(llvm::Value * src0,llvm::Value * src1)3355 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePow(llvm::Value* src0, llvm::Value* src1)
3356 {
3357     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3358     llvm::Function* powFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, src0->getType());
3359     llvm::Value* args[] = { src0, src1 };
3360     llvm::Value* powres_s = this->CreateCall(powFunc, args);
3361 
3362     return powres_s;
3363 }
3364 
3365 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEBC(llvm::Value * float_ref_value,llvm::Value * bias_value,llvm::Value * address_u,llvm::Value * address_v,llvm::Value * address_r,llvm::Value * address_ai,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)3366 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEBC(
3367     llvm::Value* float_ref_value,
3368     llvm::Value* bias_value,
3369     llvm::Value* address_u,
3370     llvm::Value* address_v,
3371     llvm::Value* address_r,
3372     llvm::Value* address_ai,
3373     llvm::Value* int32_textureIdx,
3374     llvm::Value* int32_sampler,
3375     llvm::Value* int32_offsetU,
3376     llvm::Value* int32_offsetV,
3377     llvm::Value* int32_offsetW,
3378     llvm::Type* returnType)
3379 {
3380     llvm::Value * packed_tex_params[] = {
3381         float_ref_value,
3382         bias_value,
3383         address_u,
3384         address_v,
3385         address_r,
3386         address_ai,
3387         int32_textureIdx,
3388         int32_sampler,
3389         int32_offsetU,
3390         int32_offsetV,
3391         int32_offsetW
3392     };
3393 
3394     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3395 
3396     llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
3397     llvm::Type* types[] = {
3398         IGCLLVM::FixedVectorType::get(dstType, 4),
3399         float_ref_value->getType(),
3400         int32_textureIdx->getType(),
3401         int32_sampler->getType()
3402     };
3403     llvm::Function* func_llvm_GenISA_sampleBCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
3404         (module, llvm::GenISAIntrinsic::GenISA_sampleBCptr, types);
3405 
3406     llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleBCptr_v4f32_f32, packed_tex_params);
3407     return packed_tex_call;
3408 }
3409 
3410 template<bool preserveNames, typename T, typename Inserter>
CreateEvalSampleIndex(llvm::Value * inputIndex,llvm::Value * sampleIndex,llvm::Value * perspective)3411 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSampleIndex(
3412     llvm::Value* inputIndex,
3413     llvm::Value* sampleIndex,
3414     llvm::Value* perspective)
3415 {
3416     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3417 
3418     llvm::Function* pullBarys =
3419         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSampleIndexBarys);
3420     llvm::Value* bary = this->CreateCall2(pullBarys, sampleIndex, perspective);
3421     llvm::Function* interpolate =
3422         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
3423     return this->CreateCall2(interpolate, inputIndex, bary);
3424 }
3425 
3426 
3427 template<bool preserveNames, typename T, typename Inserter>
CreateEvalSnapped(llvm::Value * inputIndex,llvm::Value * xOffset,llvm::Value * yOffset,llvm::Value * perspective)3428 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSnapped(
3429     llvm::Value* inputIndex,
3430     llvm::Value* xOffset,
3431     llvm::Value* yOffset,
3432     llvm::Value* perspective)
3433 {
3434     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3435 
3436     llvm::Function* pullBarys =
3437         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSnappedBarys);
3438     llvm::Value* bary = this->CreateCall3(pullBarys, xOffset, yOffset, perspective);
3439     llvm::Function* interpolate =
3440         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
3441     return this->CreateCall2(interpolate, inputIndex, bary);
3442 }
3443 
3444 template<bool preserveNames, typename T, typename Inserter>
CreateSetStream(llvm::Value * StreamId,llvm::Value * emitCount)3445 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSetStream(
3446     llvm::Value* StreamId, llvm::Value* emitCount)
3447 {
3448     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3449     llvm::Function* fn =
3450         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_SetStream);
3451     return this->CreateCall2(fn, StreamId, emitCount);
3452 }
3453 
3454 template<bool preserveNames, typename T, typename Inserter>
CreateEndPrimitive(llvm::Value * emitCount)3455 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEndPrimitive(
3456     llvm::Value* emitCount)
3457 {
3458     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3459     llvm::Function* fn =
3460         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_EndPrimitive);
3461     return this->CreateCall(fn, emitCount);
3462 }
3463 
3464 template<bool preserveNames, typename T, typename Inserter>
CreateControlPointId()3465 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateControlPointId()
3466 {
3467     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3468     llvm::Function* fn =
3469         llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_DCL_HSControlPointID);
3470     return this->CreateCall(fn);
3471 }
3472 
3473 template<bool preserveNames, typename T, typename Inserter>
CreatePrimitiveID()3474 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePrimitiveID()
3475 {
3476     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3477     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3478         module,
3479         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3480         this->getFloatTy());
3481     return this->CreateBitCast(
3482         this->CreateCall(pFunc, this->getInt32(IGC::PRIMITIVEID)), this->getInt32Ty());
3483 }
3484 
3485 template<bool preserveNames, typename T, typename Inserter>
CreateInstanceID()3486 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateInstanceID()
3487 {
3488     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3489     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3490         module,
3491         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3492         this->getFloatTy());
3493     return this->CreateBitCast(
3494         this->CreateCall(pFunc, this->getInt32(IGC::GS_INSTANCEID)), this->getInt32Ty());
3495 }
3496 
3497 template<bool preserveNames, typename T, typename Inserter>
CreateSampleIndex()3498 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSampleIndex()
3499 {
3500     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3501     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3502         module,
3503         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3504         this->getFloatTy());
3505     return this->CreateBitCast(
3506         this->CreateCall(pFunc, this->getInt32(IGC::SAMPLEINDEX)), this->getInt32Ty());
3507 }
3508 
3509 template<bool preserveNames, typename T, typename Inserter>
CreateCoverage()3510 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCoverage()
3511 {
3512     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3513     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3514         module,
3515         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3516         this->getFloatTy());
3517     return this->CreateBitCast(
3518         this->CreateCall(pFunc, this->getInt32(IGC::INPUT_COVERAGE_MASK)), this->getInt32Ty());
3519 }
3520 
3521 
3522 template<bool preserveNames, typename T, typename Inserter>
CreateDomainPointInput(unsigned int dim)3523 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDomainPointInput(unsigned int dim)
3524 {
3525     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3526     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3527         module,
3528         llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3529         this->getFloatTy());
3530     return this->CreateCall(pFunc, this->getInt32(IGC::DOMAIN_POINT_ID_X + dim));
3531 }
3532 
3533 template<bool preserveNames, typename T, typename Inserter>
create_inputVecF32(llvm::Value * inputIndex,llvm::Value * interpolationMode)3534 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_inputVecF32(llvm::Value* inputIndex, llvm::Value* interpolationMode)
3535 {
3536     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3537     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3538      module,
3539      llvm::GenISAIntrinsic::GenISA_DCL_inputVec,
3540      this->getFloatTy());
3541     return this->CreateCall2(pFunc, inputIndex, interpolationMode);
3542 }
3543 
3544 template<bool preserveNames, typename T, typename Inserter>
create_discard(llvm::Value * condition)3545 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_discard(llvm::Value* condition)
3546 {
3547     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3548     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
3549     return this->CreateCall(pFunc, condition);
3550 }
3551 
3552 template<bool preserveNames, typename T, typename Inserter>
create_runtime(llvm::Value * offset)3553 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_runtime(llvm::Value* offset)
3554 {
3555     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3556     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_RuntimeValue);
3557     return this->CreateCall(pFunc, offset);
3558 }
3559 
3560 template<bool preserveNames, typename T, typename Inserter>
create_uavSerializeAll()3561 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_uavSerializeAll()
3562 {
3563     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3564     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_uavSerializeAll);
3565     return this->CreateCall(pFunc);
3566 }
3567 
3568 template<bool preserveNames, typename T, typename Inserter>
create_countbits(llvm::Value * src)3569 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::create_countbits(llvm::Value* src)
3570 {
3571     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3572     llvm::Function* pFunc = llvm::Intrinsic::getDeclaration(
3573         module,
3574         llvm::Intrinsic::ctpop,
3575         this->getInt32Ty());
3576     return this->CreateCall(pFunc, src);
3577 }
3578 
3579 template<bool preserveNames, typename T, typename Inserter>
3580 inline llvm::Value*
create_waveInverseBallot(llvm::Value * src)3581 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveInverseBallot(
3582     llvm::Value* src)
3583 {
3584     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3585     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3586         module,
3587         llvm::GenISAIntrinsic::GenISA_WaveInverseBallot);
3588     return this->CreateCall(pFunc, src);
3589 }
3590 
3591 template<bool preserveNames, typename T, typename Inserter>
create_waveBallot(llvm::Value * src)3592 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveBallot(llvm::Value* src)
3593 {
3594     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3595     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3596         module,
3597         llvm::GenISAIntrinsic::GenISA_WaveBallot);
3598     return this->CreateCall(pFunc, src);
3599 }
3600 
3601 template<bool preserveNames, typename T, typename Inserter>
create_waveshuffleIndex(llvm::Value * src,llvm::Value * index,llvm::Value * helperLaneMode)3602 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveshuffleIndex(llvm::Value* src, llvm::Value* index, llvm::Value* helperLaneMode)
3603 {
3604     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3605     llvm::Type* srcType = src->getType();
3606     if (srcType == this->getInt1Ty())
3607     {
3608         src = this->CreateZExt(src, this->getInt32Ty());
3609     }
3610     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3611         module,
3612         llvm::GenISAIntrinsic::GenISA_WaveShuffleIndex,
3613         src->getType());
3614     llvm::Value* retVal = this->CreateCall3(pFunc, src, index, (helperLaneMode ? helperLaneMode : this->getInt32(0)));
3615     if (srcType == this->getInt1Ty())
3616     {
3617         retVal = this->CreateTrunc(retVal, srcType);
3618     }
3619     return retVal;
3620 }
3621 
3622 template<bool preserveNames, typename T, typename Inserter>
create_waveAll(llvm::Value * src,llvm::Value * type)3623 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveAll(llvm::Value* src, llvm::Value* type)
3624 {
3625     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3626     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3627         module,
3628         llvm::GenISAIntrinsic::GenISA_WaveAll,
3629         src->getType());
3630     return this->CreateCall2(pFunc, src, type);
3631 }
3632 
3633 template<bool preserveNames, typename T, typename Inserter>
create_wavePrefix(llvm::Value * src,llvm::Value * type,bool inclusive,llvm::Value * Mask)3634 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefix(
3635     llvm::Value* src, llvm::Value* type, bool inclusive, llvm::Value *Mask)
3636 {
3637     // If a nullptr is passed in for 'Mask' (as is the default), just include
3638     // all lanes.
3639     Mask = Mask ? Mask : this->getInt1(true);
3640 
3641     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3642     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3643         module,
3644         llvm::GenISAIntrinsic::GenISA_WavePrefix,
3645         src->getType());
3646     return this->CreateCall4(pFunc, src, type, this->getInt1(inclusive), Mask);
3647 }
3648 
3649     // We currently use the combination of 'convergent' and
3650     // 'inaccessiblememonly' to prevent code motion of
3651     // wave intrinsics.  Removing 'readnone' from a callsite
3652     // is not sufficient to stop LICM from looking back up to the
3653     // function definition for the attribute.  We can short circuit that
3654     // by creating an operand bundle.  The name "nohoist" is not
3655     // significant; anything will do.
setUnsafeToHoistAttr(llvm::CallInst * CI)3656 inline llvm::CallInst* setUnsafeToHoistAttr(llvm::CallInst *CI)
3657     {
3658         CI->setConvergent();
3659 #if LLVM_VERSION_MAJOR >= 7
3660         CI->setOnlyAccessesInaccessibleMemory();
3661         CI->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone);
3662 #else
3663         CI->addAttribute(
3664             llvm::AttributeSet::FunctionIndex, llvm::Attribute::InaccessibleMemOnly);
3665         CI->removeAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ReadNone);
3666 #endif
3667         llvm::OperandBundleDef OpDef("nohoist", llvm::None);
3668 
3669         // An operand bundle cannot be appended onto a call after creation.
3670         // clone the instruction but add our operandbundle on as well.
3671         llvm::SmallVector<llvm::OperandBundleDef, 1> OpBundles;
3672         CI->getOperandBundlesAsDefs(OpBundles);
3673         OpBundles.push_back(OpDef);
3674         llvm::CallInst *NewCall = llvm::CallInst::Create(CI, OpBundles, CI);
3675         CI->replaceAllUsesWith(NewCall);
3676         return NewCall;
3677     }
3678 
3679 template<bool preserveNames, typename T, typename Inserter>
3680 inline llvm::Value*
create_wavePrefixBitCount(llvm::Value * src,llvm::Value * Mask)3681 LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefixBitCount(
3682     llvm::Value* src, llvm::Value *Mask)
3683 {
3684     //bits = ballot(bBit);
3685     //laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
3686     //prefixBitCount = countbits(bits & laneMaskLT);
3687     llvm::Value* ballot = this->create_waveBallot(src);
3688     if (Mask)
3689         ballot = this->CreateAnd(ballot, Mask);
3690     llvm::Value* shlLaneId = this->CreateShl(
3691         this->getInt32(1), this->get32BitLaneID());
3692     llvm::Value* laneMask = this->CreateSub(shlLaneId, this->getInt32(1));
3693     llvm::Value *mask = this->CreateAnd(ballot, laneMask);
3694 
3695     // update llvm.ctpop so it won't be hoisted/sunk out of the loop.
3696     auto *PopCnt = this->create_countbits(mask);
3697     auto *NoHoistPopCnt = setUnsafeToHoistAttr(PopCnt);
3698     PopCnt->eraseFromParent();
3699     return NoHoistPopCnt;
3700 }
3701 
3702 template<bool preserveNames, typename T, typename Inserter>
create_waveMatch(llvm::Instruction * inst,llvm::Value * src)3703 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMatch(
3704     llvm::Instruction *inst,
3705     llvm::Value       *src)
3706 {
3707 
3708     // Note that we will stay in the loop above as long as there is at least
3709     // one active lane remaining.
3710 
3711     // We will split the basic blocks twice.  The first will create a
3712     // pre-header for the loop code.  The second will separate the WaveMatch
3713     // from code after it so it can be broken down into a sequence of
3714     // instructions and then branch to the remaining code when done.
3715 
3716     auto *PreHeader = inst->getParent();
3717     auto *BodyBlock = PreHeader->splitBasicBlock(inst, "wavematch-body");
3718     auto *EndBlock = BodyBlock->splitBasicBlock(
3719         inst->getNextNode(), "wavematch-end");
3720 
3721     // Make sure that we set the insert point again as we've just invalidated
3722     // it with the splitBasicBlock() calls above.
3723     this->SetInsertPoint(inst);
3724 
3725     // Now generate the code for a single iteration of the code
3726     auto *FirstValue = this->readFirstLane(src);
3727     llvm::Value *CmpRes = nullptr;
3728     if (src->getType()->isFloatingPointTy())
3729         CmpRes = this->CreateFCmpOEQ(FirstValue, src);
3730     else
3731         CmpRes = this->CreateICmpEQ(FirstValue, src);
3732 
3733     auto *Mask = this->create_waveBallot(CmpRes);
3734 
3735     // Replace the current terminator to either exit the loop
3736     // or branch back for another iteration.
3737     auto *Br = BodyBlock->getTerminator();
3738     this->SetInsertPoint(Br);
3739     this->CreateCondBr(CmpRes, EndBlock, BodyBlock);
3740     Br->eraseFromParent();
3741 
3742     // Now, gather up the output struct outside of the loop
3743     this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3744 
3745     return Mask;
3746 }
3747 
3748 template<bool preserveNames, typename T, typename Inserter>
3749 inline llvm::Value*
create_waveMultiPrefix(llvm::Instruction * I,llvm::Value * Val,llvm::Value * Mask,IGC::WaveOps OpKind)3750 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefix(
3751     llvm::Instruction *I,
3752     llvm::Value *Val,
3753     llvm::Value *Mask,
3754     IGC::WaveOps OpKind)
3755 {
3756     // This implementation is similar create_waveMatch() in that we loop
3757     // until all subsets of lanes are processed.
3758     auto *PreHeader = I->getParent();
3759     auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefix-body");
3760     auto *EndBlock = BodyBlock->splitBasicBlock(
3761         I->getNextNode(), "multiprefix-end");
3762 
3763     // Make sure that we set the insert point again as we've just invalidated
3764     // it with the splitBasicBlock() calls above.
3765     this->SetInsertPoint(I);
3766 
3767     // Now generate the code for a single iteration of the code
3768     auto *FirstValue = this->readFirstLane(Mask);
3769     auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);
3770 
3771     auto *WavePrefix = this->create_wavePrefix(
3772         Val, this->getInt8((uint8_t)OpKind), false, ParticipatingLanes);
3773 
3774     // Replace the current terminator to either exit the loop
3775     // or branch back for another iteration.
3776     auto *Br = BodyBlock->getTerminator();
3777     this->SetInsertPoint(Br);
3778     this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
3779     Br->eraseFromParent();
3780 
3781     this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3782 
3783     return WavePrefix;
3784 }
3785 
3786 template<bool preserveNames, typename T, typename Inserter>
3787 inline llvm::Value*
create_waveMultiPrefixBitCount(llvm::Instruction * I,llvm::Value * Val,llvm::Value * Mask)3788 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefixBitCount(
3789     llvm::Instruction *I,
3790     llvm::Value *Val,
3791     llvm::Value *Mask)
3792 {
3793     // Similar structure to waveMatch and waveMultiPrefix
3794     auto *PreHeader = I->getParent();
3795     auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefixbitcount-body");
3796     auto *EndBlock = BodyBlock->splitBasicBlock(
3797         I->getNextNode(), "multiprefixbitcount-end");
3798 
3799     // Make sure that we set the insert point again as we've just invalidated
3800     // it with the splitBasicBlock() calls above.
3801     this->SetInsertPoint(I);
3802 
3803     // Now generate the code for a single iteration of the code
3804     auto *FirstValue = this->readFirstLane(Mask);
3805 
3806     auto *Count = this->create_wavePrefixBitCount(Val, FirstValue);
3807 
3808     // Replace the current terminator to either exit the loop
3809     // or branch back for another iteration.
3810     auto *Br = BodyBlock->getTerminator();
3811     this->SetInsertPoint(Br);
3812     auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);
3813     this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
3814     Br->eraseFromParent();
3815 
3816     this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3817 
3818     return Count;
3819 }
3820 
3821 template<bool preserveNames, typename T, typename Inserter>
create_quadPrefix(llvm::Value * src,llvm::Value * type,bool inclusive)3822 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_quadPrefix(llvm::Value* src, llvm::Value* type, bool inclusive)
3823 {
3824     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3825     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3826         module,
3827         llvm::GenISAIntrinsic::GenISA_QuadPrefix,
3828         src->getType());
3829     return this->CreateCall3(pFunc, src, type, this->getInt1(inclusive));
3830 }
3831 
3832 template<bool preserveNames, typename T, typename Inserter>
get32BitLaneID()3833 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::get32BitLaneID()
3834 {
3835     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3836     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3837         module,
3838         llvm::GenISAIntrinsic::GenISA_simdLaneId);
3839     llvm::Value* int16LaneId =  this->CreateCall(pFunc);
3840     return this->CreateZExt(int16LaneId, this->getInt32Ty());
3841 }
3842 
3843 template<bool preserveNames, typename T, typename Inserter>
getSimdSize()3844 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getSimdSize()
3845 {
3846     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3847     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_simdSize);
3848     return this->CreateCall(pFunc);
3849 }
3850 
3851 template<bool preserveNames, typename T, typename Inserter>
getFirstLaneID()3852 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFirstLaneID()
3853 {
3854     //fbl(WaveBallot(true))
3855     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3856     llvm::Value* ballot = this->create_waveBallot(this->getInt1(1));
3857     llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3858         module,
3859         llvm::GenISAIntrinsic::GenISA_firstbitLo);
3860     return this->CreateCall(pFunc, ballot);
3861 }
3862 
3863 template<bool preserveNames, typename T, typename Inserter>
readFirstLane(llvm::Value * src)3864 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::readFirstLane(llvm::Value* src)
3865 {
3866     llvm::Value* firstLaneID = this->getFirstLaneID();
3867     return this->create_waveshuffleIndex(src, firstLaneID);
3868 }
3869 
3870 ///////////////////////////////////////////////////////////////////////////////
3871 /// @brief Creates data conversion for typed image reads.
3872 ///     Gen HW has supports only limited number of surface formats through data
3873 /// port data cache typed read messages. Complete lists of formats supported
3874 /// for read is available in Programmer's Reference Manual.
3875 /// Some of the unsupported formats are  mandatory in Vulkan and OGL.
3876 /// In order to support these formats the driver and the compiler implement the
3877 /// following emulation:
3878 /// Since Gen9 HW typed read messages return raw data when reading from an
3879 /// unsupported format. It's enough to call the conversion method
3880 /// CreateImageDataConversion() using data returned from typed read messages.
3881 ///
3882 /// @param format Surface format of the typed image (original i.e. from shader)
3883 /// @param data Data returned by typed read message
3884 /// @returns llvm::Value* Vector of data converted to the input surface format.
3885 ///
3886 template<bool preserveNames, typename T, typename Inserter>
3887 inline
CreateImageDataConversion(IGC::SURFACE_FORMAT format,llvm::Value * data)3888 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateImageDataConversion(
3889     IGC::SURFACE_FORMAT format,
3890     llvm::Value* data)
3891 {
3892     IGC_ASSERT(nullptr != m_Platform);
3893 
3894     switch (format)
3895     {
3896     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
3897     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
3898     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
3899     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
3900     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
3901     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
3902     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
3903     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
3904     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
3905     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
3906     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
3907     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
3908         if (m_Platform->hasHDCSupportForTypedReadsUnormSnormToFloatConversion())
3909         {
3910             return data;
3911         }
3912         break;
3913     default:
3914         break;
3915     }
3916 
3917 
3918     llvm::Value* pFormatConvertedLLVMLdUAVTypedResult = data;
3919     switch (format)
3920     {
3921     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
3922     {
3923         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
3924         llvm::Value* pConstFloat = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 65535.0f)));
3925         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
3926         llvm::Value* pTempInt16 = llvm::UndefValue::get(this->getInt32Ty());
3927         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
3928         llvm::Value* pMaskLow = this->getInt32(0x0000FFFF);
3929         llvm::Value* pShift16 = this->getInt32(0x00000010);
3930 
3931         // pTempFloat = pLdUAVTypedResult[0];
3932         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
3933 
3934         // Retrieve unsigned short value (component 0).
3935         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3936         pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);
3937 
3938         // Convert unsigned short to float (component 0).
3939         pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3940         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3941 
3942         // Store component 0 in output vector (pTempVec4[0]).
3943         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
3944 
3945         // Retrieve unsigned short value (component 1).
3946         pTempInt16 = this->CreateLShr(pTempInt32, pShift16);
3947 
3948         // Convert unsigned short to float (component 1).
3949         pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3950         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3951 
3952         // Store component 1 in output vector (pTempVec4[1]).
3953         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
3954 
3955         // pTempFloat = pLdUAVTypedResult[1];
3956         pTempFloat = this->CreateExtractElement(data, this->getInt32(1));
3957 
3958         // Retrieve unsigned short value (component 2).
3959         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3960         pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);
3961 
3962         // Convert unsigned short to float (component 2).
3963         pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3964         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3965 
3966         // Store component 2 in output vector (pTempVec4[2]).
3967         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
3968 
3969         // Retrieve unsigned short value (component 3).
3970         pTempInt16 = this->CreateLShr(pTempInt32, pShift16);
3971 
3972         // Convert unsigned short to float (component 3).
3973         pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3974         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3975 
3976         // Store component 3 in output vector (pTempVec4[3]).
3977         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
3978 
3979         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
3980         break;
3981     }
3982     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
3983     {
3984         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
3985         llvm::Value* pScalingFactor = this->getFloat(1.0f / 32767.0f);
3986         llvm::Value* pTempInt32;
3987         llvm::Value* pTempInt16;
3988         llvm::Value* pTempFloat;
3989         llvm::Value* pNegativeOne = this->getFloat(-1.0f);
3990         llvm::Value* pCmp_result;
3991         llvm::Value* fieldWidth = this->getInt32(16);
3992 
3993         // pTempFloat = pLdUAVTypedResult[0];
3994         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
3995 
3996         // Retrieve unsigned short value (component 0).
3997         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3998         pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);
3999 
4000         // Convert signed short to float (component 0).
4001         pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4002         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4003 
4004         // Compare with -1.0f
4005         pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4006         pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4007 
4008         // Store component 0 in output vector (pTempVec4[0]).
4009         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4010 
4011         // Retrieve unsigned short value (component 1).
4012         pTempInt16 = this->CreateAShr(pTempInt32, 16);
4013 
4014         // Convert signed short to float (component 1).
4015         pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4016         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4017 
4018         // Compare with -1.0f
4019         pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4020         pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4021 
4022         // Store component 1 in output vector (pTempVec4[1]).
4023         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4024 
4025         // pTempFloat = pLdUAVTypedResult[1];
4026         pTempFloat = this->CreateExtractElement(data, this->getInt32(1));
4027 
4028         // Retrieve unsigned short value (component 2).
4029         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4030         pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);
4031 
4032         // Convert unsigned short to float (component 2).
4033         pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4034         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4035 
4036         // Compare with -1.0f
4037         pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4038         pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4039 
4040         // Store component 2 in output vector (pTempVec4[2]).
4041         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4042 
4043         // Retrieve unsigned short value (component 3).
4044         pTempInt16 = this->CreateAShr(pTempInt32, 16);
4045 
4046         // Convert unsigned short to float (component 3).
4047         pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4048         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4049 
4050         // Compare with -1.0f
4051         pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4052         pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4053 
4054         // Store component 3 in output vector (pTempVec4[3]).
4055         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4056 
4057         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4058         break;
4059     }
4060     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UNORM:
4061     {
4062         llvm::Value* pImmediateXYZ = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 1023.0f)));
4063         llvm::Value* pImmediateW = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 3.0f)));
4064         llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
4065         llvm::Value* pMaskW = this->getInt32(0x00000003);
4066         llvm::Value* pShiftData = this->getInt32(10);
4067 
4068         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4069         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4070         llvm::Value* pTempIntWithMask = llvm::UndefValue::get(this->getInt32Ty());
4071         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4072         llvm::Value* pTempShiftRightData = llvm::UndefValue::get(this->getInt32Ty());
4073 
4074         // pTempFloat = pLdUAVTypedResult[0];
4075         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4076 
4077         // Retrieve unsigned short value (component 0).
4078         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4079         pTempIntWithMask = this->CreateAnd(pTempInt32, pMaskXYZ);
4080 
4081         // Convert unsigned short to float (component 0).
4082         pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4083         pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4084 
4085         // Store component 0 in output vector (pTempVec4[0]).
4086         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4087 
4088         // Retrieve unsigned short value (component 0).
4089         pTempShiftRightData = this->CreateLShr(pTempInt32, pShiftData);
4090 
4091         pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);
4092 
4093         // Convert unsigned short to float.
4094         pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4095         pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4096 
4097         // Store component 1 in output vector (pTempVec4[1]).
4098         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4099 
4100         // Retrieve unsigned short value.
4101         pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);
4102 
4103         pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);
4104 
4105         // Convert unsigned short to float.
4106         pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4107         pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4108 
4109         // Store component 2 in output vector (pTempVec4[1]).
4110         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4111 
4112         // Retrieve unsigned short value.
4113         pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);
4114 
4115         pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskW);
4116 
4117         // Convert unsigned short to float.
4118         pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4119         pTempFloat = this->CreateFMul(pTempFloat, pImmediateW);
4120 
4121         // Store component 3 in output vector (pTempVec4[1]).
4122         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4123         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4124         break;
4125     }
4126     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R11G11B10_FLOAT:
4127     {
4128         // This surface format packs 3 half-float values into 32-bit string.
4129         // Half-floats are always non-negative, so to save space sign bit
4130         // is not stored and assumed to be zero.
4131         // Only 11 or 10 most significant bits (not counting sign bit)
4132         // of the 16 bits of IEEE 754 float16 are stored.
4133         // The least significant bits of the mantissa are assumed to be zero.
4134         // First value is stored in bits 0--10.     (r)
4135         // Second value is stored in bits 11 - 22   (g)
4136         // Third value is stored in bits 22 - 31    (b)
4137         // Fourth value is set to 1.0f.
4138 
4139         llvm::Value* pMaskX = this->getInt32(0x000007ff);
4140         llvm::Value* pMaskY = this->getInt32(0x00007ff0);
4141         llvm::Value* pMaskZ = this->getInt32(0x00007fe0);
4142         llvm::Value* pShiftDataX = this->getInt32(4);
4143         llvm::Value* pShiftDataY = this->getInt32(7);
4144         llvm::Value* pShiftDataZ = this->getInt32(10);
4145         llvm::Value* pTempFloat;
4146         llvm::Value* pTempFloat0;
4147         llvm::Value* pTempInt;
4148         llvm::Value* pTempInt0;
4149 
4150         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4151 
4152         // pTempFloat0 = pLdUAVTypedResult[0];
4153         pTempFloat0 = this->CreateExtractElement(data, this->getInt32(0));
4154         pTempInt0 = this->CreateBitCast(pTempFloat0, this->getInt32Ty());
4155 
4156         pTempInt = this->CreateAnd(pTempInt0, pMaskX);
4157         pTempInt = this->CreateShl(pTempInt, pShiftDataX);
4158         pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4159         pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4160         pTempFloat = this->CreateF16TOF32(pTempFloat);
4161 
4162         // Store component 0 in output vector (pTempVec4[0]).
4163         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4164 
4165         pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataY);
4166         pTempInt = this->CreateAnd(pTempInt0, pMaskY);
4167         pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4168         pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4169         pTempFloat = this->CreateF16TOF32(pTempFloat);
4170 
4171         // Store component 1 in output vector (pTempVec4[1]).
4172         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4173 
4174         pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataZ);
4175         pTempInt = this->CreateAnd(pTempInt0, pMaskZ);
4176         pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4177         pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4178         pTempFloat = this->CreateF16TOF32(pTempFloat);
4179 
4180         // Store component 2 in output vector (pTempVec4[2]).
4181         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4182 
4183         // store 1.0 into component 3
4184         pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4185         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4186         break;
4187     }
4188     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UINT:
4189     {
4190         // AND          ro.x, ri.x, { 0x000003ff };
4191         // SHR          ri.x, ri.x, { 10 };
4192         // AND          ro.y, ri.x, { 0x000003ff };
4193         // SHR          ri.x, ri.x, { 10 };
4194         // AND          ro.z, ri.x, { 0x000003ff };
4195         // SHR          ri.x, ri.x, { 10 };
4196         // AND          ro.w, ri.x, { 0x00000003 };
4197         // copy results
4198         llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
4199         llvm::Value* pMaskW = this->getInt32(0x00000003);
4200         llvm::Value* pShiftDataXYZ = this->getInt32(10);
4201 
4202         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4203         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4204         llvm::Value* pTempIntRes = llvm::UndefValue::get(this->getInt32Ty());
4205 
4206         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4207 
4208         // pTempFloat = pLdUAVTypedResult[0];
4209         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4210         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4211 
4212         // AND          ro.x, ri.x, { 0x000003ff };
4213         pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4214         pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4215 
4216         // Store component 0 in output vector (pTempVec4[0]).
4217         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4218 
4219         // SHR          ri.x, ri.x, { 10 };
4220         // AND          ro.y, ri.x, { 0x000003ff };
4221         pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4222         pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4223         pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4224 
4225         // Store component 1 in output vector (pTempVec4[0]).
4226         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4227 
4228         // SHR          ri.x, ri.x, { 10 };
4229         // AND          ro.z, ri.x, { 0x000003ff };
4230         pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4231         pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4232         pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4233 
4234         // Store component 2 in output vector (pTempVec4[0]).
4235         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4236 
4237         // SHR          ri.x, ri.x, { 10 };
4238         // AND          ro.w, ri.x, { 3 };
4239         pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4240         pTempIntRes = this->CreateAnd(pTempInt32, pMaskW);
4241         pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4242 
4243         // Store component 3 in output vector (pTempVec4[0]).
4244         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4245         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4246         break;
4247     }
4248     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
4249     {
4250         // immX = 0x8, immY = 0x10, immZ = 0x18
4251         // immMaskLow = 0x000000FF
4252         // AND rTemp.x, ri.x, immMaskLow
4253         // ubfe rTemp.y, immX, immX, ri.x
4254         // ubfe rTemp.z, immX, immY, ri.x
4255         // ubfe rTemp.w, immX, immZ, ri.x
4256         // ubtof rTemp, rTemp
4257         // Fmul  rOutput, rTemp, 1.0f/255.0f
4258         llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
4259         llvm::Value* pImmX = this->getInt32(0x8);
4260         llvm::Value* pImmY = this->getInt32(0x10);
4261         llvm::Value* pImmZ = this->getInt32(0x18);
4262         llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
4263         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4264         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4265         llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4266         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4267 
4268         // pTempFloat = pLdUAVTypedResult[0];
4269         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4270         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4271 
4272         // AND rTemp.x, ri.x, immMaskLow
4273         pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);
4274 
4275         // ubtof rTemp.x, rTemp.x
4276         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4277 
4278         // Fmul  rOutput.x, rTemp.x, 1.0f/255.0f
4279         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4280 
4281         // Store component 0 in output vector (pTempVec4[0]).
4282         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4283 
4284         // ubfe rTemp.y, immX, immX,  ri.x
4285         pTempInt32Res = this->Create_UBFE(pImmX, pImmX, pTempInt32);
4286 
4287         // ubtof rTemp.y, rTemp.y
4288         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4289 
4290         // Fmul  rOutput.y, rTemp.y, 1.0f/255.0f
4291         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4292 
4293         // Store component 1 in output vector (pTempVec4[0]).
4294         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4295 
4296         // ubfe rTemp.z, immX, immY,  ri.x
4297         pTempInt32Res = this->Create_UBFE(pImmX, pImmY, pTempInt32);
4298 
4299         // ubtof rTemp.z, rTemp.z
4300         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4301 
4302         // Fmul  rOutput.z, rTemp.z, 1.0f/255.0f
4303         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4304 
4305         // Store component 2 in output vector (pTempVec4[0]).
4306         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4307 
4308         // ubfe rTemp.w, immX, immZ,  ri.x
4309         pTempInt32Res = this->Create_UBFE(pImmX, pImmZ, pTempInt32);
4310 
4311         // ubtof rTemp.w, rTemp.w
4312         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4313 
4314         // Fmul  rOutput.w, rTemp.w, 1.0f/255.0f
4315         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4316 
4317         // Store component 3 in output vector (pTempVec4[0]).
4318         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4319 
4320         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4321         break;
4322     }
4323     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
4324     {
4325         llvm::Value* pScalingFactor = this->getFloat(1.0f / 127.0f);
4326         llvm::Value* fieldWidth = this->getInt32(8);
4327         llvm::Value* fpNegOne = this->getFloat(-1.0f);
4328 
4329         // pTempFloat = pLdUAVTypedResult[0];
4330         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4331         // cast to int32 since result is seen as float
4332         llvm::Value* pInputAsInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4333 
4334         // create 4-component output vector
4335         llvm::Value* pOutputVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4336 
4337         // for each of the four channels
4338         for (unsigned int ch = 0; ch < 4; ++ch)
4339         {
4340             // extract 8 bits with sign extend from position 8*ch..8*ch+7
4341             // for bits 24..31 we can use arithmetic shift right instead of bit extract
4342             llvm::Value* pTempInt32Res = (ch < 3) ?
4343                 this->Create_IBFE(fieldWidth, this->getInt32(8 * ch), pInputAsInt32) :
4344                 this->CreateAShr(pInputAsInt32, 8 * ch);
4345 
4346             // convert to float
4347             pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4348 
4349             // multiply bthis->y the scaling factor 1.0f/127.0f
4350             pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4351 
4352             // Fcmp_ge rFlag, rTemp.x, -1.0f
4353             // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4354             llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, fpNegOne);
4355             pTempFloat = this->CreateSelect(pFlag, pTempFloat, fpNegOne);
4356 
4357             // Store component ch in output vector (pTempVec4[0]).
4358             pOutputVec4 = this->CreateInsertElement(pOutputVec4, pTempFloat, this->getInt32(ch));
4359         }
4360 
4361         pFormatConvertedLLVMLdUAVTypedResult = pOutputVec4;
4362         break;
4363     }
4364     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
4365     {
4366         // immMaskHigh = 0x0000FFFF
4367         // rImm.zw = {0.0f, 1.0f}
4368         // AND rTemp.x, ri.x, immMaskHigh
4369         // SHR rTemp.y, ri.x, 0x10,
4370         // USTOF rTemp.xy, rTemp.xy
4371         // FMUL rOutput.xy, rTemp.xy, 1.0f/65535.0f
4372         // MOV rOutput.zw, rImm.zw
4373         llvm::Value* pMaskHigh = this->getInt32(0x0000FFFF);
4374         llvm::Value* pShiftVal = this->getInt32(0x10);
4375         llvm::Value* pImmZ = this->getFloat(0.0f);
4376         llvm::Value* pImmW = this->getFloat(1.0f);
4377         llvm::Value* pConstFloat = this->getFloat(1.0f / 65535.0f);
4378         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4379         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4380         llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4381         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4382 
4383         // pTempFloat = pLdUAVTypedResult[0];
4384         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4385         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4386 
4387         // AND rTemp.x, ri.x, immMaskHigh
4388         pTempInt32Res = this->CreateAnd(pTempInt32, pMaskHigh);
4389 
4390         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4391         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4392 
4393         // Store component 0 in output vector (pTempVec4[0]).
4394         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4395 
4396         pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
4397         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4398         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4399 
4400         // Store component 1 in output vector (pTempVec4[0]).
4401         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4402 
4403         // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
4404         pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));
4405 
4406         // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
4407         pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
4408         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4409         break;
4410     }
4411     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
4412     {
4413         // immMaskLow16 = 0x0000FFFF
4414         // rImm.zw = {0.0f, 1.0f}
4415         // AND rTemp.x, ri.x, immMaskLow16
4416         // SHR rTemp.y, ri.x, 0x10,
4417         // STOF rTemp.xy, rTemp.xy
4418         // FMUL rTemp.xy, rTemp.xy, 1.0f / 32767.0f
4419         // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
4420         // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
4421         // MOV rOutput.zw, rImm.zw
4422         llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
4423         llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4424 
4425         // pTempFloat = pLdUAVTypedResult[0];
4426         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4427         llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4428 
4429         // extract bits 0..15 and sign extend the result
4430         llvm::Value* pTempInt32Res = Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);
4431 
4432         // convert to float and apply scaling factor
4433         pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4434         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4435 
4436         // clamp to range [-1.0f, 1.0f] since the value can be little less than -1.0f
4437         // Fcmp_ge rFlag, rTemp.x, -1.0f
4438         // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4439         llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));
4440         pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));
4441 
4442         // Store component 0 in output vector (pTempVec4[0]).
4443         pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4444 
4445         // extract bits 16..31 with sign extension
4446         pTempInt32Res = this->CreateAShr(pTempInt32, 16);
4447         pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4448         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4449 
4450         // Fcmp_ge rFlag, rTemp.y, -1.0f
4451         // Sel.rFlag rOutput.y, rTemp.y, -1.0f
4452         pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));
4453 
4454         pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));
4455 
4456         // Store component 1 in output vector (pTempVec4[0]).
4457         pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));
4458 
4459         // Store 0.0f, 1.0f in the remaining components of the output vector
4460         pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
4461         pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4462         pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4463         break;
4464     }
4465     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
4466     {
4467         // immMaskLow8 = 0x000000FF
4468         // rImm.zw = {0.0f, 1.0f}
4469         // AND rTemp.x, ri.x, immMaskLow8
4470         // SHR rTemp.y, ri.x, 0x8,
4471         // USTOF rTemp.xy, rTemp.xy
4472         // FMUL rOutput.xy, rTemp.xy, 1.0f / 255.0f
4473         // MOV rOutput.zw, rImm.zw
4474         llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
4475         llvm::Value* pShiftVal = this->getInt32(0x8);
4476         llvm::Value* pImmZ = this->getFloat(0.0f);
4477         llvm::Value* pImmW = this->getFloat(1.0f);
4478         llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
4479         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4480         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4481         llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4482         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4483 
4484         // pTempFloat = pLdUAVTypedResult[0];
4485         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4486         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4487 
4488         // AND rTemp.x, ri.x, immMaskHigh
4489         pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);
4490 
4491         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4492         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4493 
4494         // Store component 0 in output vector (pTempVec4[0]).
4495         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4496 
4497         pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
4498         pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4499         pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4500 
4501         // Store component 1 in output vector (pTempVec4[0]).
4502         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4503 
4504         // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
4505         pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));
4506 
4507         // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
4508         pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
4509         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4510         break;
4511     }
4512     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
4513     {
4514         // immMaskLow8 = 0x000000FF
4515         // rImm.zw = {0.0f, 1.0f}
4516         // AND rTemp.x, ri.x, immMaskLow8
4517         // SHR rTemp.y, ri.x, 0x8,
4518         // STOF rTemp.xy, rTemp.xy
4519         // FMUL rTemp.xy, rTemp.xy, 1.0f / 127.0f
4520         // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
4521         // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
4522         // MOV rOutput.zw, rImm.zw
4523         llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);
4524 
4525         llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4526 
4527         llvm::Value* fieldWidth = this->getInt32(8);
4528 
4529         // pTempFloat = pLdUAVTypedResult[0];
4530         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4531         llvm::Value* pInputInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4532 
4533         llvm::Value* pTempInt32Res = Create_IBFE(fieldWidth, this->getInt32(0), pInputInt32);
4534         pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4535         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4536 
4537         // Fcmp_ge rFlag, rTemp.x, -1.0f
4538         // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4539         llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4540 
4541         pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4542         // Store component 0 in output vector (pTempVec4[0]).
4543         pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4544 
4545         // extract bits 8..15 and sign extend the result
4546         pTempInt32Res = this->Create_IBFE(fieldWidth, this->getInt32(8), pInputInt32);
4547 
4548         pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4549         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4550 
4551         // Fcmp_ge rFlag, rTemp.y, -1.0f
4552         // Sel.rFlag rOutput.y, rTemp.y, -1.0f
4553         pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4554         pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4555 
4556         // store the value in component 1 of the output vector
4557         pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));
4558 
4559         // store 0.0f, 1.0f in the remaining components of the output vector
4560         pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
4561         pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4562         pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4563         break;
4564     }
4565     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
4566     {
4567         // rImm.yzw = {0.0f, 0.0f, 1.0f}
4568         // USTOF rTemp.x, ri.x
4569         // FMUL rOutput.x, rTemp.x, 1.0f / 65535.0f
4570         // MOV rOutput.yzw, rImm.yzw
4571         llvm::Value* pScalingFactor = getFloat(1.0f / 65535.0f);
4572         llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4573         llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4574         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4575 
4576         // pTempFloat = pLdUAVTypedResult[0];
4577         pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4578         pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4579 
4580         pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
4581         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4582 
4583         // Store component 0 in output vector (pTempVec4[0]).
4584         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4585 
4586         // Store 0.0f, 0.0f, 1.0f, in remaining components of the output
4587         llvm::Value* pFPZero = getFloat(0.0f);
4588         pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
4589         pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
4590         pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4591         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4592         break;
4593     }
4594     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
4595     {
4596         // rImm.yzw = {0.0f, 0.0f, 1.0f}
4597         // STOF rTemp.x, ri.x
4598         // FMUL rTemp.x, rTemp.x, 1.0f / 32767.0f
4599         // FCMP_GE rFlag.x, rTemp.x, -1.0f
4600         // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
4601         // MOV rOutput.yzw, rImm.yzw
4602         llvm::Value* pFPZero = getFloat(0.0f);
4603         llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
4604         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4605 
4606         // pTempFloat = pLdUAVTypedResult[0];
4607         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4608         llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4609 
4610         pTempInt32 = this->Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);
4611 
4612         pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
4613         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4614 
4615         // compare with -1.0f and clamp to -1.0 if less than -1.0
4616         // Fcmp_ge rFlag, rTemp.x, -1.0f
4617         // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4618         llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4619         pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4620 
4621         // Store the result in component 0 of the output vector (pTempVec4[0]).
4622         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4623         // Store 0.0f, 0.0f, 1.0f in remaining components
4624         pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
4625         pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
4626         pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4627         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4628         break;
4629     }
4630     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
4631     {
4632         // rImm.yzw = {0.0f, 0.0f, 1.0f}
4633         // USTOF rTemp.x, ri.x
4634         // FMUL rOutput.x, rTemp.x, 1.0f / 255.0f
4635         // MOV rOutput.yzw, rImm.yzw
4636         // UBTOF        ro.x, ri.x;
4637         llvm::Value* fpZero = this->getFloat(0.0f);
4638         llvm::Value* pScalingFactor = getFloat(1.0f / 255.0f);
4639         llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4640 
4641         // pTempFloat = pLdUAVTypedResult[0];
4642         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4643         llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4644 
4645         pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
4646         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4647 
4648         // Store component 0 in output vector (pTempVec4[0]).
4649         pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4650         // fill the rest with 0.0f, 0.0f, 1.0f
4651         pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(1));
4652         pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(2));
4653         pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4654         pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4655         break;
4656     }
4657     case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
4658     {
4659         // rImm.yzw = {0.0f, 0.0f, 1.0f}
4660         // STOF rTemp.x, ri.x
4661         // FMUL rTemp.x, rTemp.x, 1.0f / 127.0f
4662         // FCMP_GE rFlag.x, rTemp.x, -1.0f
4663         // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
4664         // MOV rOutput.yzw, rImm.yzw
4665         llvm::Value* pFpZero = getFloat(0.0f);
4666         llvm::Value* pFpNegOne = getFloat(-1.0f);
4667         llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);
4668         llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4669 
4670         // pTempFloat = pLdUAVTypedResult[0];
4671         llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4672         llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4673 
4674         // extract bits 0..7 and sign extend the result
4675         pTempInt32 = this->Create_IBFE(this->getInt32(8), this->getInt32(0), pTempInt32);
4676 
4677         // convert to float and apply scaling factor
4678         pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
4679         pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4680 
4681         // Fcmp_ge rFlag, rTemp.x, -1.0f
4682         // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4683         llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pFpNegOne);
4684         pTempFloat = this->CreateSelect(pFlag, pTempFloat, pFpNegOne);
4685 
4686         // Store component 0 in output vector (pTempVec4[0]).
4687         pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4688 
4689         // Store 0.0f, 0.0f, 1.0f in the remaining components of the output vector
4690         pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(1));
4691         pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(2));
4692         pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4693 
4694         pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4695         break;
4696     }
4697     default:
4698         break;
4699     }
4700 
4701     return pFormatConvertedLLVMLdUAVTypedResult;
4702 }
4703 
4704 
4705 ///////////////////////////////////////////////////////////////////////////////
4706 /// @brief Extract all scalars from a vector
4707 /// @param  vector Llvm value of a vector
4708 /// @param  outScalars pointer to the output array of scalars
4709 /// @param  maxSize Size of the output array
4710 /// @param  initializer optional parameter to set to unused elements
4711 ///
4712 template<bool preserveNames, typename T, typename Inserter>
4713 inline
VectorToScalars(llvm::Value * vector,llvm::Value ** outScalars,unsigned maxSize,llvm::Value * initializer)4714 void LLVM3DBuilder<preserveNames, T, Inserter>::VectorToScalars(
4715     llvm::Value* vector,
4716     llvm::Value** outScalars,
4717     unsigned maxSize,
4718     llvm::Value* initializer)
4719 {
4720     IGC_ASSERT(nullptr != vector);
4721     IGC_ASSERT(nullptr != vector->getType());
4722     IGC_ASSERT(vector->getType()->isVectorTy());
4723 
4724     const unsigned count = (unsigned)llvm::cast<IGCLLVM::FixedVectorType>(vector->getType())->getNumElements();
4725     IGC_ASSERT(1 < count);
4726     IGC_ASSERT(count <= 4);
4727     IGC_ASSERT(count <= maxSize);
4728 
4729     for (unsigned vecElem = 0; vecElem < maxSize; vecElem++)
4730     {
4731         if (vecElem >= count)
4732         {
4733             outScalars[vecElem] = initializer;
4734             continue;
4735         }
4736         outScalars[vecElem] = this->CreateExtractElement(
4737             vector,
4738             this->getInt32(vecElem));
4739     }
4740 }
4741 
4742 
4743 ///////////////////////////////////////////////////////////////////////////////
4744 /// @brief Aggregates scalar values to a vector
4745 /// @param  scalars Array of scalars
4746 /// @param  vectorElementCnt The number of elements in the vector to create.
4747 /// @return Vector of type resultType
4748 ///
4749 template<bool preserveNames, typename T, typename Inserter>
4750 inline
ScalarsToVector(llvm::Value * (& scalars)[4],unsigned vectorElementCnt)4751 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::ScalarsToVector(
4752     llvm::Value* (&scalars)[4],
4753     unsigned vectorElementCnt)
4754 {
4755     llvm::Type* const resultType = IGCLLVM::FixedVectorType::get(scalars[0]->getType(), vectorElementCnt);
4756     IGC_ASSERT(nullptr != resultType);
4757     llvm::Value* result = llvm::UndefValue::get(resultType);
4758 
4759     for (unsigned i = 0; i < llvm::cast<IGCLLVM::FixedVectorType>(resultType)->getNumElements(); i++)
4760     {
4761         IGC_ASSERT(nullptr != scalars[i]);
4762         IGC_ASSERT(llvm::cast<llvm::VectorType>(resultType)->getElementType() == scalars[i]->getType());
4763 
4764         result = this->CreateInsertElement(
4765             result,
4766             scalars[i],
4767             this->getInt32(i));
4768     }
4769     return result;
4770 }
4771 
4772 
4773 ///////////////////////////////////////////////////////////////////////////////
4774 /// @brief Returns the normalization factor for UNORM formats
4775 /// @param  bits Number of bits in the UNORM value
4776 /// @return llvm::Constant* unorm factor
4777 template<bool preserveNames, typename T, typename Inserter>
4778 inline
GetUnormFactor(unsigned bits)4779 llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetUnormFactor(unsigned bits)
4780 {
4781     float maxUint = (float)((1 << bits) - 1);
4782     return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxUint));
4783 };
4784 
4785 
4786 ///////////////////////////////////////////////////////////////////////////////
4787 /// @brief Returns the normalization factor for SNORM formats
4788 /// @param  bits Number of bits in the SNORM value
4789 /// @return llvm::Constant* snorm factor
4790 template<bool preserveNames, typename T, typename Inserter>
4791 inline
GetSnormFactor(unsigned bits)4792 llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetSnormFactor(unsigned bits)
4793 {
4794     float maxSint = (float)(((1 << bits) - 1) / 2);
4795     return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxSint));
4796 };
4797 
4798 template<bool preserveNames, typename T, typename Inserter>
CreateCPSRqstCoarseSize(llvm::Value * pSrcVal)4799 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSRqstCoarseSize(
4800     llvm::Value* pSrcVal)
4801 {
4802     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
4803     llvm::Function*  pFunc = llvm::GenISAIntrinsic::getDeclaration(
4804                                 module,
4805                                 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
4806                                 this->getFloatTy());
4807 
4808     llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_X));
4809     llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_Y));
4810     llvm::Value* vec = this->CreateInsertElement(
4811                     llvm::UndefValue::get(pSrcVal->getType()),
4812                     sizeX,
4813                     this->getInt32(0));
4814     return this->CreateInsertElement(
4815             vec,
4816             sizeY,
4817             this->getInt32(1));
4818 
4819 }
4820 
4821 template<bool preserveNames, typename T, typename Inserter>
CreateCPSActualCoarseSize(llvm::Value * pSrcVal)4822 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSActualCoarseSize(
4823     llvm::Value* pSrcVal)
4824 {
4825     llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
4826     llvm::Function*  pFunc = llvm::GenISAIntrinsic::getDeclaration(
4827                                 module,
4828                                 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
4829                                 this->getFloatTy());
4830     llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_X));
4831     llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_Y));
4832     llvm::Value* vec = this->CreateInsertElement(
4833                     llvm::UndefValue::get(pSrcVal->getType()),
4834                     sizeX,
4835                     this->getInt32(0));
4836     return this->CreateInsertElement(
4837             vec,
4838             sizeY,
4839             this->getInt32(1));
4840 
4841 }
4842 
4843 
4844 
4845 #endif // BUILTINS_FRONTEND_DEFINITIONS_HPP
4846