1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #ifndef BUILTINS_FRONTEND_DEFINITIONS_HPP
10 #define BUILTINS_FRONTEND_DEFINITIONS_HPP
11
12 #include "common/debug/DebugMacros.hpp" // VALUE_NAME() definition.
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvm/Config/llvm-config.h"
15 #include "llvm/AsmParser/Parser.h"
16 #include "llvmWrapper/IR/DerivedTypes.h"
17 #include "llvm/Support/Casting.h"
18 #include "common/LLVMWarningsPop.hpp"
19 #include "Probe/Assertion.h"
20
21 typedef union _gfxResourceAddressSpace
22 {
23 struct _bits
24 {
25 unsigned int bufId : 16;
26 unsigned int bufType : 5;
27 unsigned int indirect : 1; // bool
28 unsigned int reserved : 10;
29 } bits;
30 unsigned int u32Val;
31 } GFXResourceAddressSpace;
32
33 enum class ADDRESS_SPACE_TYPE : unsigned int
34 {
35 ADDRESS_SPACE_PRIVATE = 0,
36 ADDRESS_SPACE_GLOBAL = 1,
37 ADDRESS_SPACE_CONSTANT = 2,
38 ADDRESS_SPACE_LOCAL = 3,
39 ADDRESS_SPACE_GENERIC = 4,
40 ADDRESS_SPACE_LOCAL_32 = 13,
41 };
42
43 template<bool preserveNames, typename T, typename Inserter>
EncodeASForGFXResource(const llvm::Value & bufIdx,IGC::BufferType bufType,unsigned uniqueIndAS)44 unsigned LLVM3DBuilder<preserveNames, T, Inserter>::EncodeASForGFXResource(
45 const llvm::Value& bufIdx,
46 IGC::BufferType bufType,
47 unsigned uniqueIndAS)
48 {
49 GFXResourceAddressSpace temp = {};
50
51 static_assert(sizeof(temp) == 4, "Code below may need and update.");
52
53 temp.u32Val = 0;
54 IGC_ASSERT((bufType + 1) < IGC::BUFFER_TYPE_UNKNOWN + 1);
55 temp.bits.bufType = bufType + 1;
56 if (bufType == IGC::BufferType::SLM)
57 {
58 return static_cast<unsigned int>(ADDRESS_SPACE_TYPE::ADDRESS_SPACE_LOCAL); // OCL uses addrspace 3 for SLM. We should use the same thing.
59 }
60 else if (llvm::isa<llvm::ConstantInt>(&bufIdx))
61 {
62 const unsigned bufId = (unsigned)(llvm::cast<llvm::ConstantInt>(&bufIdx)->getZExtValue());
63 IGC_ASSERT(bufId < (1 << 16));
64 temp.bits.bufId = bufId;
65 return temp.u32Val;
66 }
67
68 // if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
69 temp.bits.bufId = uniqueIndAS;
70 temp.bits.indirect = 1;
71 return temp.u32Val;
72 }
73
74
75 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_ubfe() const76 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ubfe() const
77 {
78 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
79
80 llvm::Function* func_llvm_GenISA_ubfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ubfe);
81 return func_llvm_GenISA_ubfe;
82 }
83
84 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_ibfe() const85 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ibfe() const
86 {
87 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
88
89 llvm::Function* func_llvm_GenISA_ibfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ibfe);
90 return func_llvm_GenISA_ibfe;
91 }
92
93 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_bfi() const94 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfi() const
95 {
96 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
97
98 llvm::Function* func_llvm_GenISA_bfi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfi);
99 return func_llvm_GenISA_bfi;
100 }
101
102 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_bfrev() const103 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfrev() const
104 {
105 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
106
107 llvm::Function* func_llvm_GenISA_bfrev = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfrev);
108 return func_llvm_GenISA_bfrev;
109 }
110
111 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitHi() const112 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitHi() const
113 {
114 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
115
116 llvm::Function* func_llvm_GenISA_firstbitHi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitHi);
117 return func_llvm_GenISA_firstbitHi;
118 }
119
120 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitLo() const121 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitLo() const
122 {
123 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
124
125 llvm::Function* func_llvm_GenISA_firstbitLo = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitLo);
126 return func_llvm_GenISA_firstbitLo;
127 }
128
129 template<bool preserveNames, typename T, typename Inserter>
llvm_GenISA_firstbitShi() const130 inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitShi() const
131 {
132 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
133
134 llvm::Function* func_llvm_GenISA_firstbitShi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitShi);
135 return func_llvm_GenISA_firstbitShi;
136 }
137
138 template<bool preserveNames, typename T, typename Inserter>
Init()139 void LLVM3DBuilder<preserveNames, T, Inserter>::Init()
140 {
141 // Cached constants
142 m_int0 = this->getInt32( 0 );
143 m_int1 = this->getInt32( 1 );
144 m_int2 = this->getInt32( 2 );
145 m_int3 = this->getInt32( 3 );
146 m_float0 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 0.0));
147 m_float1 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 1.0));
148 }
149
150 template<bool preserveNames, typename T, typename Inserter>
Create_resinfo(llvm::Value * int32_src_s_mip,llvm::Value * int32_textureIdx)151 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfo(
152 llvm::Value* int32_src_s_mip,
153 llvm::Value* int32_textureIdx)
154 {
155 llvm::Value * packed_params[] = {
156 int32_textureIdx,
157 int32_src_s_mip,
158 };
159
160 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
161
162 llvm::Function* func_llvm_GenISA_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
163 (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, int32_textureIdx->getType());
164
165 llvm::CallInst* packed_resinfo_call = this->CreateCall(func_llvm_GenISA_resinfoptr, packed_params);
166 return packed_resinfo_call;
167 }
168
169 template<bool preserveNames, typename T, typename Inserter>
Create_resinfoptr_msaa(llvm::Value * srcBuffer,llvm::Value * float_src_s_mip)170 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfoptr_msaa(
171 llvm::Value* srcBuffer,
172 llvm::Value* float_src_s_mip)
173 {
174 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
175
176 llvm::Function* func_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
177 (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, srcBuffer->getType());
178
179 //%mip_s = bitcast float %float_src_s_mip to i32
180 llvm::Value* int32_mip = this->CreateBitCast(float_src_s_mip, this->getInt32Ty(), VALUE_NAME("mip_s"));
181
182 llvm::Value * packed_params[] = {
183 srcBuffer,
184 int32_mip
185 };
186
187 llvm::CallInst* packed_resinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_resinfoptr, packed_params));
188
189 // %tex_s.chan0 = extractelement <4 x i32> %packed_resinfo_call, i32 2
190 llvm::Value* int32_info_s_ch2 = this->CreateExtractElement(packed_resinfo_call, this->m_int2);
191
192 llvm::Function* func_sampleinfoptr = llvm::GenISAIntrinsic::getDeclaration
193 (module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());
194
195 llvm::Value * packed_sampleinfo_params[] = {
196 srcBuffer
197 };
198
199 // Call sampleinfoptr intrinsic to get the number of samples.
200 llvm::CallInst* packed_sampleinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_sampleinfoptr, packed_sampleinfo_params));
201
202 // We can not use channel 0 of sampleinfo which should contain the correct
203 // number of samples retrieved from surface state because this value in surface
204 // state must be set to 1 in case of MSAA UAV emulation due to fact that
205 // IGC does not support native MSAA UAV messages at the moment.
206 // Instead of channel 0 we can use channel 3 of sampleinfo which contains
207 // sample position palette index field retrieved from surface state.
208 // The sample position palette index field is set to log2(number of samples).
209
210 // Get sample position palette index from sampleinfo. Note that this value
211 // is incremented by one from its value in the surface state.
212 llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
213 llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
214
215 // Number of samples = 2 ^ "sample position palette index".
216 llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
217
218 // Divide depth by number of samples.
219 // %depth_s = udiv i32 %src_s.chan2, %src1_s_ch0
220 llvm::Value* int32_depth = this->CreateUDiv(int32_info_s_ch2, int32_numberOfSamples, VALUE_NAME("depth_s"));
221
222 llvm::Value *resinfo = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getInt32Ty(module->getContext()), 4));
223
224 resinfo = this->CreateInsertElement(
225 resinfo,
226 this->CreateExtractElement(packed_resinfo_call, this->m_int0),
227 this->getInt32(0),
228 "call_inst");
229
230 resinfo = this->CreateInsertElement(
231 resinfo,
232 this->CreateExtractElement(packed_resinfo_call, this->m_int1),
233 this->getInt32(1),
234 "call_inst");
235
236 resinfo = this->CreateInsertElement(
237 resinfo,
238 this->CreateExtractElement(packed_resinfo_call, this->m_int3),
239 this->getInt32(3),
240 "call_inst");
241
242 resinfo = this->CreateInsertElement(
243 resinfo,
244 int32_depth,
245 this->getInt32(2),
246 "call_inst");
247
248 return resinfo;
249 }
250
251 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite(llvm::Value * dstBuffer,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressW,llvm::Value * lod,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)252 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite(
253 llvm::Value* dstBuffer,
254 llvm::Value* srcAddressU,
255 llvm::Value* srcAddressV,
256 llvm::Value* srcAddressW,
257 llvm::Value* lod,
258 llvm::Value* float_X,
259 llvm::Value* float_Y,
260 llvm::Value* float_Z,
261 llvm::Value* float_W)
262 {
263 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
264
265 llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
266 module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
267
268 //R = SampleIndex
269 llvm::Value * args[] = {
270 dstBuffer,
271 srcAddressU,
272 srcAddressV,
273 srcAddressW,
274 lod,
275 float_X,
276 float_Y,
277 float_Z,
278 float_W,
279 };
280
281 llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
282 return typedwrite;
283 }
284
285 template<bool preserveNames, typename T, typename Inserter>
Create_typedread(llvm::Value * srcBuffer,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressW,llvm::Value * lod)286 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread(
287 llvm::Value* srcBuffer,
288 llvm::Value* srcAddressU,
289 llvm::Value* srcAddressV,
290 llvm::Value* srcAddressW,
291 llvm::Value* lod)
292 {
293 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
294
295 llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
296 module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
297
298 llvm::Value * args[] = {
299 srcBuffer,
300 srcAddressU,
301 srcAddressV,
302 srcAddressW,
303 lod
304 };
305
306 llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
307 return typedread;
308 }
309
310 template<bool preserveNames, typename T, typename Inserter>
Create_typedread_msaa2D(llvm::Value * srcBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * lod)311 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2D(
312 llvm::Value* srcBuffer,
313 llvm::Value* sampleIdx,
314 llvm::Value* srcAddressU,
315 llvm::Value* srcAddressV,
316 llvm::Value* lod)
317 {
318 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
319
320 llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
321 module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
322
323 //R = SampleIndex
324 llvm::Value * args[] = {
325 srcBuffer,
326 srcAddressU,
327 srcAddressV,
328 sampleIdx,
329 lod
330 };
331
332 llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
333 return typedread;
334 }
335
336 template<bool preserveNames, typename T, typename Inserter>
Create_typedread_msaa2DArray(llvm::Value * srcBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * lod)337 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2DArray(
338 llvm::Value* srcBuffer,
339 llvm::Value* sampleIdx,
340 llvm::Value* srcAddressU,
341 llvm::Value* srcAddressV,
342 llvm::Value* srcAddressR,
343 llvm::Value* lod)
344 {
345 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
346
347 // Call sampleinfoptr intrinsic to get the number of samples.
348 // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
349 llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
350 module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());
351 llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, srcBuffer);
352
353 // We can not use channel 0 of sampleinfo which should contain the correct
354 // number of samples retrieved from surface state because this value in surface
355 // state must be set to 1 in case of MSAA UAV emulation due to fact that
356 // IGC does not support native MSAA UAV messages at the moment.
357 // Instead of channel 0 we can use channel 3 of sampleinfo which contains
358 // sample position palette index field retrieved from surface state.
359 // The sample position palette index field is set to log2(number of samples).
360
361 // Get sample position palette index from surface state. Note that this value
362 // is incremented by one from its value in the surface state.
363 llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
364 llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
365
366 // Number of samples = 2 ^ "sample position palette index".
367 llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
368
369 //R = R' * num of Samples + SampleIndex
370 llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
371 llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
372
373 llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
374 module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());
375
376 llvm::Value * args[] = {
377 srcBuffer,
378 srcAddressU,
379 srcAddressV,
380 int32_SrcAddrR,
381 lod
382 };
383
384 llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
385 return typedread;
386 }
387
388 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite_msaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)389 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2D(
390 llvm::Value* dstBuffer,
391 llvm::Value* sampleIdx,
392 llvm::Value* srcAddressU,
393 llvm::Value* srcAddressV,
394 llvm::Value* float_X,
395 llvm::Value* float_Y,
396 llvm::Value* float_Z,
397 llvm::Value* float_W)
398 {
399 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
400
401 llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
402 module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
403
404 //R = SampleIndex
405 llvm::Value * args[] = {
406 dstBuffer,
407 srcAddressU,
408 srcAddressV,
409 sampleIdx,
410 m_int0,
411 float_X,
412 float_Y,
413 float_Z,
414 float_W,
415 };
416
417 llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
418 return typedwrite;
419 }
420
421 template<bool preserveNames, typename T, typename Inserter>
Create_typedwrite_msaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * float_X,llvm::Value * float_Y,llvm::Value * float_Z,llvm::Value * float_W)422 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2DArray(
423 llvm::Value* dstBuffer,
424 llvm::Value* sampleIdx,
425 llvm::Value* srcAddressU,
426 llvm::Value* srcAddressV,
427 llvm::Value* srcAddressR,
428 llvm::Value* float_X,
429 llvm::Value* float_Y,
430 llvm::Value* float_Z,
431 llvm::Value* float_W)
432 {
433 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
434
435 // Call sampleinfoptr intrinsic to get the number of samples.
436 // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
437 llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
438 module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
439 llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
440
441 // We can not use channel 0 of sampleinfo which should contain the correct
442 // number of samples retrieved from surface state because this value in surface
443 // state must be set to 1 in case of MSAA UAV emulation due to fact that
444 // IGC does not support native MSAA UAV messages at the moment.
445 // Instead of channel 0 we can use channel 3 of sampleinfo which contains
446 // sample position palette index field retrieved from surface state.
447 // The sample position palette index field is set to log2(number of samples).
448
449 // Get sample position palette index from surface state. Note that this value
450 // is incremented by one from its value in the surface state.
451 llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
452 llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
453
454 // Number of samples = 2 ^ "sample position palette index".
455 llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
456
457 //R = R' * num of Samples + SampleIndex
458 llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
459 llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
460
461 llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
462 module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());
463
464 //R = SampleIndex
465 llvm::Value * args[] = {
466 dstBuffer,
467 srcAddressU,
468 srcAddressV,
469 int32_SrcAddrR,
470 m_int0,
471 float_X,
472 float_Y,
473 float_Z,
474 float_W,
475 };
476
477 llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
478 return typedwrite;
479 }
480
481 template<bool preserveNames, typename T, typename Inserter>
Create_dwordatomictypedMsaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * src,llvm::Value * instType)482 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2D(
483 llvm::Value* dstBuffer,
484 llvm::Value* sampleIdx,
485 llvm::Value* srcAddressU,
486 llvm::Value* srcAddressV,
487 llvm::Value* src,
488 llvm::Value* instType)
489 {
490 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
491
492 llvm::Type *types[] = { src->getType(), dstBuffer->getType() };
493
494 llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
495 module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
496
497 //R = SampleIndex
498 llvm::Value * args[] = {
499 dstBuffer,
500 srcAddressU,
501 srcAddressV,
502 sampleIdx,
503 src,
504 instType
505 };
506
507 llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
508 return dwordAtomicTyped;
509 }
510
511 template<bool preserveNames, typename T, typename Inserter>
Create_dwordatomictypedMsaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * src,llvm::Value * instType)512 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2DArray(
513 llvm::Value* dstBuffer,
514 llvm::Value* sampleIdx,
515 llvm::Value* srcAddressU,
516 llvm::Value* srcAddressV,
517 llvm::Value* srcAddressR,
518 llvm::Value* src,
519 llvm::Value* instType)
520 {
521 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
522
523 // Call sampleinfoptr intrinsic to get the number of samples.
524 // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
525 llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
526 module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
527 llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
528
529 // We can not use channel 0 of sampleinfo which should contain the correct
530 // number of samples retrieved from surface state because this value in surface
531 // state must be set to 1 in case of MSAA UAV emulation due to fact that
532 // IGC does not support native MSAA UAV messages at the moment.
533 // Instead of channel 0 we can use channel 3 of sampleinfo which contains
534 // sample position palette index field retrieved from surface state.
535 // The sample position palette index field is set to log2(number of samples).
536
537 // Get sample position palette index from surface state. Note that this value
538 // is incremented by one from its value in the surface state.
539 llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
540 llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
541
542 // Number of samples = 2 ^ "sample position palette index".
543 llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
544
545 //R = R' * num of Samples + SampleIndex
546 llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
547 llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
548
549 llvm::Type *types[] = { src->getType(), dstBuffer->getType() };
550
551 llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
552 module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
553
554 llvm::Value * args[] = {
555 dstBuffer,
556 srcAddressU,
557 srcAddressV,
558 int32_SrcAddrR,
559 src,
560 instType
561 };
562
563 llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
564 return dwordAtomicTyped;
565 }
566
567 template<bool preserveNames, typename T, typename Inserter>
Create_StatelessAtomic(llvm::Value * ptr,llvm::Value * data,IGC::AtomicOp opcode)568 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomic(
569 llvm::Value* ptr,
570 llvm::Value* data,
571 IGC::AtomicOp opcode)
572 {
573 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
574 llvm::Type* types[] = { data->getType(), ptr->getType(), ptr->getType() };
575 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
576 module, llvm::GenISAIntrinsic::GenISA_intatomicrawA64, types);
577
578 llvm::Value* args[] =
579 {
580 ptr,
581 ptr,
582 data,
583 this->getInt32(opcode)
584 };
585 return this->CreateCall(pFunc, args);
586 }
587
588 template<bool preserveNames, typename T, typename Inserter>
Create_InidrectAtomic(llvm::Value * resource,llvm::Value * offset,llvm::Value * data,IGC::AtomicOp opcode)589 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomic(
590 llvm::Value* resource,
591 llvm::Value* offset,
592 llvm::Value* data,
593 IGC::AtomicOp opcode)
594 {
595 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
596
597 llvm::Type *types[] = { data->getType(), resource->getType() };
598
599 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
600 module, llvm::GenISAIntrinsic::GenISA_intatomicraw, types);
601
602 llvm::Value* args[] =
603 {
604 resource,
605 offset,
606 data,
607 this->getInt32(opcode)
608 };
609 return this->CreateCall(pFunc, args);
610 }
611
612 template<bool preserveNames, typename T, typename Inserter>
Create_StatelessAtomicCmpXChg(llvm::Value * ptr,llvm::Value * data0,llvm::Value * data1)613 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomicCmpXChg(
614 llvm::Value* ptr,
615 llvm::Value* data0,
616 llvm::Value* data1)
617 {
618 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
619 llvm::Type* types[] = { data0->getType(), ptr->getType(), ptr->getType() };
620 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
621 module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64, types);
622
623 llvm::Value* args[] =
624 {
625 ptr,
626 ptr,
627 data0,
628 data1,
629 };
630 return this->CreateCall(pFunc, args);
631 }
632
633 template<bool preserveNames, typename T, typename Inserter>
Create_InidrectAtomicCmpXChg(llvm::Value * resource,llvm::Value * offset,llvm::Value * data0,llvm::Value * data1)634 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomicCmpXChg(
635 llvm::Value* resource,
636 llvm::Value* offset,
637 llvm::Value* data0,
638 llvm::Value* data1)
639 {
640 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
641
642 llvm::Type *types[] = { data0->getType(), resource->getType() };
643
644 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
645 module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw, types);
646
647 llvm::Value* args[] =
648 {
649 resource,
650 offset,
651 data0,
652 data1,
653 };
654 return this->CreateCall(pFunc, args);
655 }
656
657 template<bool preserveNames, typename T, typename Inserter>
Create_cmpxchgatomictypedMsaa2D(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * src0,llvm::Value * src1)658 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2D(
659 llvm::Value* dstBuffer,
660 llvm::Value* sampleIdx,
661 llvm::Value* srcAddressU,
662 llvm::Value* srcAddressV,
663 llvm::Value* src0,
664 llvm::Value* src1)
665 {
666 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
667
668 llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };
669
670 llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
671 module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
672
673 //R = SampleIndex
674 llvm::Value * args[] = {
675 dstBuffer,
676 srcAddressU,
677 srcAddressV,
678 sampleIdx,
679 src0,
680 src1
681 };
682
683 llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
684 return dwordCmpxchgatomictyped;
685 }
686
687 template<bool preserveNames, typename T, typename Inserter>
Create_cmpxchgatomictypedMsaa2DArray(llvm::Value * dstBuffer,llvm::Value * sampleIdx,llvm::Value * srcAddressU,llvm::Value * srcAddressV,llvm::Value * srcAddressR,llvm::Value * src0,llvm::Value * src1)688 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2DArray(
689 llvm::Value* dstBuffer,
690 llvm::Value* sampleIdx,
691 llvm::Value* srcAddressU,
692 llvm::Value* srcAddressV,
693 llvm::Value* srcAddressR,
694 llvm::Value* src0,
695 llvm::Value* src1)
696 {
697 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
698
699 // Call sampleinfoptr intrinsic to get the number of samples.
700 // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
701 llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
702 module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
703 llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);
704
705 // We can not use channel 0 of sampleinfo which should contain the correct
706 // number of samples retrieved from surface state because this value in surface
707 // state must be set to 1 in case of MSAA UAV emulation due to fact that
708 // IGC does not support native MSAA UAV messages at the moment.
709 // Instead of channel 0 we can use channel 3 of sampleinfo which contains
710 // sample position palette index field retrieved from surface state.
711 // The sample position palette index field is set to log2(number of samples).
712
713 // Get sample position palette index from surface state. Note that this value
714 // is incremented by one from its value in the surface state.
715 llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
716 llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);
717
718 // Number of samples = 2 ^ "sample position palette index".
719 llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);
720
721 //R = R' * num of Samples + SampleIndex
722 llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
723 llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));
724
725 llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };
726
727 llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
728 module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
729
730 llvm::Value * args[] = {
731 dstBuffer,
732 srcAddressU,
733 srcAddressV,
734 int32_SrcAddrR,
735 src0,
736 src1
737 };
738
739 llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
740 return dwordCmpxchgatomictyped;
741 }
742
743 template<bool preserveNames, typename T, typename Inserter>
Create_TypedAtomic(llvm::Value * resource,llvm::Value * addressU,llvm::Value * addressV,llvm::Value * addressR,llvm::Value * data,IGC::AtomicOp opcode)744 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomic(
745 llvm::Value* resource,
746 llvm::Value* addressU,
747 llvm::Value* addressV,
748 llvm::Value* addressR,
749 llvm::Value* data,
750 IGC::AtomicOp opcode)
751 {
752 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
753
754 llvm::Type *types[] = { data->getType(), resource->getType() };
755
756 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
757 module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);
758
759 llvm::Value* args[] =
760 {
761 resource,
762 addressU,
763 addressV,
764 addressR,
765 data,
766 this->getInt32(opcode)
767 };
768 return this->CreateCall(pFunc, args);
769 }
770
771 template<bool preserveNames, typename T, typename Inserter>
Create_TypedAtomicCmpXChg(llvm::Value * resource,llvm::Value * addressU,llvm::Value * addressV,llvm::Value * addressR,llvm::Value * data0,llvm::Value * data1)772 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomicCmpXChg(
773 llvm::Value* resource,
774 llvm::Value* addressU,
775 llvm::Value* addressV,
776 llvm::Value* addressR,
777 llvm::Value* data0,
778 llvm::Value* data1)
779 {
780 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
781
782 llvm::Type *types[] = { data0->getType(), resource->getType() };
783
784 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
785 module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);
786
787 llvm::Value* args[] =
788 {
789 resource,
790 addressU,
791 addressV,
792 addressR,
793 data0,
794 data1,
795 };
796 return this->CreateCall(pFunc, args);
797 }
798
799 template<bool preserveNames, typename T, typename Inserter>
Create_SampleInfo(llvm::Value * resourcePtr)800 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SampleInfo(
801 llvm::Value* resourcePtr)
802 {
803 llvm::Value * packed_tex_params[] = {
804 resourcePtr,
805 };
806
807 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
808
809 llvm::CallInst* packed_tex_call = llvm::cast<llvm::CallInst>(this->CreateCall(
810 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, resourcePtr->getType()),
811 packed_tex_params));
812
813 return packed_tex_call;
814 }
815
816 template<bool preserveNames, typename T, typename Inserter>
CreateReadSurfaceInfo(llvm::Value * resourcePtr,llvm::Value * mipmap)817 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateReadSurfaceInfo(
818 llvm::Value* resourcePtr,
819 llvm::Value* mipmap)
820 {
821 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
822 llvm::Function* fn = llvm::GenISAIntrinsic::getDeclaration(
823 module, llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr, resourcePtr->getType());
824 llvm::Value* packed_tex_call = this->CreateCall2(fn, resourcePtr, mipmap);
825 return packed_tex_call;
826 }
827
828 template<bool preserveNames, typename T, typename Inserter>
Create_SyncThreadGroup()829 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SyncThreadGroup()
830 {
831 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
832 return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_threadgroupbarrier));
833 }
834
835 template<bool preserveNames, typename T, typename Inserter>
Create_FlushSampler()836 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FlushSampler()
837 {
838 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
839 return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_flushsampler));
840 }
841
842 template<bool preserveNames, typename T, typename Inserter>
Create_MemoryFence(bool commit,bool flushRWDataCache,bool flushConstantCache,bool flushTextureCache,bool flushInstructionCache,bool globalFence)843 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MemoryFence(
844 bool commit,
845 bool flushRWDataCache,
846 bool flushConstantCache,
847 bool flushTextureCache,
848 bool flushInstructionCache,
849 bool globalFence)
850 {
851 llvm::Value* parameters[] =
852 {
853 this->getInt1(commit),
854 this->getInt1(flushRWDataCache),
855 this->getInt1(flushConstantCache),
856 this->getInt1(flushTextureCache),
857 this->getInt1(flushInstructionCache),
858 this->getInt1(globalFence),
859 this->getInt1(false),
860 };
861 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
862 return this->CreateCall(
863 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_memoryfence),
864 parameters);
865 }
866
867 template<bool preserveNames, typename T, typename Inserter>
Create_GlobalSync()868 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_GlobalSync()
869 {
870 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
871 return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_globalSync));
872 }
873
874 template<bool preserveNames, typename T, typename Inserter>
Create_SamplePos(llvm::Value * int32_resourceIdx,llvm::Value * int32_samplerIdx)875 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SamplePos(
876 llvm::Value* int32_resourceIdx,
877 llvm::Value* int32_samplerIdx)
878 {
879 llvm::Value* sampleInfo = this->Create_SampleInfo(int32_resourceIdx);
880
881
882 llvm::Value* int32_texX = this->CreateExtractElement(sampleInfo, m_int0);
883 llvm::Value* int32_texW = this->CreateExtractElement(sampleInfo, m_int3);
884
885 llvm::Value* int32_tempIndex = this->CreateAdd(int32_texX, int32_samplerIdx);
886 llvm::Value* int1_ole = this->CreateICmp(llvm::ICmpInst::ICMP_UGT, int32_texX, int32_samplerIdx);
887 llvm::Value* int32_sel = this->CreateSelect(int1_ole, int32_tempIndex, m_int0);
888 llvm::Value* int1_one = this->CreateICmp(llvm::ICmpInst::ICMP_EQ, int32_texW, m_int1);
889 llvm::Value* int32_selIndex = this->CreateSelect(int1_one, m_int0, int32_sel);
890
891 /*
892 %tempY = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0,
893 f32 -2.0 / 16.0, f32 2.0 / 16.0, f32 6.0 / 16.0, f32 -3.0 / 16.0,
894 f32 3.0 / 16.0, f32 1.0 / 16.0, f32 -5.0 / 16.0, f32 5.0 / 16.0,
895 f32 -1.0 / 16.0, f32 7.0 / 16.0, f32 -7.0 / 16.0, f32 1.0 / 16.0,
896 f32 -3.0 / 16.0, f32 2.0 / 16.0, f32 -1.0 / 16.0, f32 -2.0 / 16.0,
897 f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -5.0 / 16.0, f32 6.0 / 16.0,
898 f32 -7.0 / 16.0, f32 -6.0 / 16.0, f32 4.0 / 16.0, f32 0.0,
899 f32 -4.0 / 16.0, f32 7.0 / 16.0, f32 -8.0 / 16.0>, i32 %selIndex
900 */
901 llvm::Value* float_y = nullptr;
902 {
903 llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
904 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
905 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
906 temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
907 temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
908 temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(4));
909 temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(5));
910 temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(6));
911 temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(7));
912 temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(8));
913 temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(9));
914 temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(10));
915 temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(11));
916 temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(12));
917 temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(13));
918 temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(14));
919 temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(15));
920 temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
921 temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(17));
922 temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(18));
923 temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(19));
924 temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(20));
925 temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(21));
926 temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(22));
927 temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(23));
928 temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(24));
929 temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(25));
930 temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(26));
931 temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(27));
932 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(28));
933 temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(29));
934 temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(30));
935 temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(31));
936 float_y = this->CreateExtractElement(temp, int32_selIndex);
937 }
938
939 /*
940 %tempX = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -2.0 / 16.0,
941 f32 6.0 / 16.0, f32 -6.0 / 16.0, f32 2.0 / 16.0, f32 1.0 / 16.0,
942 f32 -1.0 / 16.0, f32 5.0 / 16.0, f32 -3.0 / 16.0, f32 -5.0 / 16.0,
943 f32 -7.0 / 16.0, f32 3.0 / 16.0, f32 7.0 / 16.0, f32 1.0 / 16.0,
944 f32 -1.0 / 16.0, f32 -3.0 / 16.0, f32 4.0 / 16.0, f32 -5.0 / 16.0,
945 f32 2.0 / 16.0, f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -2.0 / 16.0,
946 f32 0.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0, f32 -8.0 / 16.0,
947 f32 7.0 / 16.0, f32 6.0 / 16.0, f32 -7.0 / 16.0>, i32 %selIndex
948 */
949 llvm::Value* float_x = nullptr;
950 {
951 llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
952 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
953 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
954 temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
955 temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
956 temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(4));
957 temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(5));
958 temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(6));
959 temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(7));
960 temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(8));
961 temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(9));
962 temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(10));
963 temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(11));
964 temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(12));
965 temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(13));
966 temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(14));
967 temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(15));
968 temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
969 temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(17));
970 temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(18));
971 temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(19));
972 temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(20));
973 temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(21));
974 temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(22));
975 temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(23));
976 temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(24));
977 temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(25));
978 temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(26));
979 temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(27));
980 temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(28));
981 temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(29));
982 temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(30));
983 temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(31));
984 float_x = this->CreateExtractElement(temp, int32_selIndex);
985 }
986
987 llvm::Value* packed_ret_value = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
988 packed_ret_value = this->CreateInsertElement(packed_ret_value, float_x, this->getInt32(0));
989 packed_ret_value = this->CreateInsertElement(packed_ret_value, float_y, this->getInt32(1));
990 packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(2));
991 packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(3));
992
993 return packed_ret_value;
994 }
995
996 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLE(llvm::Value * coordinate_u,llvm::Value * coordinate_v,llvm::Value * coordinate_r,llvm::Value * coordinate_ai,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * offsetU,llvm::Value * offsetV,llvm::Value * offsetW,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)997 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLE(
998 llvm::Value* coordinate_u,
999 llvm::Value* coordinate_v,
1000 llvm::Value* coordinate_r,
1001 llvm::Value* coordinate_ai,
1002 llvm::Value* ptr_textureIdx,
1003 llvm::Value* ptr_sampler,
1004 llvm::Value* offsetU,
1005 llvm::Value* offsetV,
1006 llvm::Value* offsetW,
1007 llvm::Value* minlod,
1008 bool feedback_enabled,
1009 llvm::Type* returnType)
1010 {
1011 if (minlod == nullptr)
1012 {
1013 minlod = llvm::ConstantFP::get(coordinate_u->getType(), 0.0);
1014 }
1015
1016 llvm::Value * packed_tex_params[] = {
1017 coordinate_u,
1018 coordinate_v,
1019 coordinate_r,
1020 coordinate_ai,
1021 minlod,
1022 ptr_textureIdx,
1023 ptr_sampler,
1024 offsetU,
1025 offsetV,
1026 offsetW
1027 };
1028
1029 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1030
1031 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1032 llvm::Type* types[] = {
1033 IGCLLVM::FixedVectorType::get(dstType, 4),
1034 coordinate_u->getType(),
1035 ptr_textureIdx->getType(),
1036 ptr_sampler->getType()
1037 };
1038 if (feedback_enabled)
1039 {
1040 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1041 }
1042 llvm::Function* func_llvm_GenISA_sampleptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1043 (module, llvm::GenISAIntrinsic::GenISA_sampleptr, types);
1044 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleptr_v4f32_f32, packed_tex_params);
1045 return packed_tex_call;
1046 }
1047
1048 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1049 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC(
1050 llvm::Value* float_reference_0,
1051 llvm::Value* float_address_0,
1052 llvm::Value* float_address_1,
1053 llvm::Value* float_address_2,
1054 llvm::Value* float_address_3,
1055 llvm::Value* int32_textureIdx,
1056 llvm::Value* int32_sampler,
1057 llvm::Value* int32_offsetU,
1058 llvm::Value* int32_offsetV,
1059 llvm::Value* int32_offsetR,
1060 llvm::Value* minlod,
1061 bool feedback_enabled,
1062 llvm::Type* returnType)
1063 {
1064 if (minlod == nullptr)
1065 {
1066 minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
1067 }
1068
1069 llvm::Value * packed_tex_params[] = {
1070 float_reference_0,
1071 float_address_0,
1072 float_address_1,
1073 float_address_2,
1074 float_address_3,
1075 minlod,
1076 int32_textureIdx,
1077 int32_sampler,
1078 int32_offsetU,
1079 int32_offsetV,
1080 int32_offsetR
1081 };
1082
1083 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1084
1085 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1086 llvm::Type* types[] = {
1087 IGCLLVM::FixedVectorType::get(dstType, 4),
1088 float_reference_0->getType(),
1089 int32_textureIdx->getType(),
1090 int32_sampler->getType()
1091 };
1092 if (feedback_enabled)
1093 {
1094 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1095 }
1096 llvm::Function* func_llvm_GenISA_sampleCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1097 (module, llvm::GenISAIntrinsic::GenISA_sampleCptr, types);
1098 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleCptr_v4f32_f32, packed_tex_params);
1099 return packed_tex_call;
1100 }
1101
1102 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLELC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * float_lod,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)1103 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLELC(
1104 llvm::Value* float_reference_0,
1105 llvm::Value* float_address_0,
1106 llvm::Value* float_address_1,
1107 llvm::Value* float_address_2,
1108 llvm::Value* float_address_3,
1109 llvm::Value* float_lod,
1110 llvm::Value* int32_textureIdx,
1111 llvm::Value* int32_sampler,
1112 llvm::Value* int32_offsetU,
1113 llvm::Value* int32_offsetV,
1114 llvm::Value* int32_offsetW,
1115 llvm::Type* returnType)
1116 {
1117 llvm::Value * packed_tex_params[] = {
1118 float_reference_0,
1119 float_lod,
1120 float_address_0,
1121 float_address_1,
1122 float_address_2,
1123 float_address_3,
1124 int32_textureIdx,
1125 int32_sampler,
1126 int32_offsetU,
1127 int32_offsetV,
1128 int32_offsetW
1129 };
1130
1131 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1132
1133 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1134 llvm::Type* types[] = {
1135 IGCLLVM::FixedVectorType::get(dstType, 4),
1136 float_reference_0->getType(),
1137 int32_textureIdx->getType(),
1138 int32_sampler->getType()
1139 };
1140 llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1141 (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
1142 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
1143 return packed_tex_call;
1144 }
1145
1146 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEC_LZ(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,bool feedback_enabled,llvm::Type * returnType)1147 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC_LZ(
1148 llvm::Value* float_reference_0,
1149 llvm::Value* float_address_0,
1150 llvm::Value* float_address_1,
1151 llvm::Value* float_address_2,
1152 llvm::Value* float_address_3,
1153 llvm::Value* int32_textureIdx,
1154 llvm::Value* int32_sampler,
1155 llvm::Value* int32_offsetU,
1156 llvm::Value* int32_offsetV,
1157 llvm::Value* int32_offsetW,
1158 bool feedback_enabled,
1159 llvm::Type* returnType)
1160 {
1161 llvm::Value * packed_tex_params[] = {
1162 float_reference_0,
1163 llvm::ConstantFP::get(float_address_0->getType(), 0.0),
1164 float_address_0,
1165 float_address_1,
1166 float_address_2,
1167 float_address_3,
1168 int32_textureIdx,
1169 int32_sampler,
1170 int32_offsetU,
1171 int32_offsetV,
1172 int32_offsetW
1173 };
1174
1175 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1176
1177 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1178 llvm::Type* types[] = {
1179 IGCLLVM::FixedVectorType::get(dstType, 4),
1180 float_reference_0->getType(),
1181 int32_textureIdx->getType(),
1182 int32_sampler->getType()
1183 };
1184 if (feedback_enabled)
1185 {
1186 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1187 }
1188 llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1189 (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
1190 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
1191 return packed_tex_call;
1192 }
1193
1194 template<bool preserveNames, typename T, typename Inserter>
Create_gather4C(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1195 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4C(
1196 llvm::Value* float_reference_0,
1197 llvm::Value* float_address_0,
1198 llvm::Value* float_address_1,
1199 llvm::Value* float_address_2,
1200 llvm::Value* float_address_3,
1201 llvm::Value* int32_textureIdx,
1202 llvm::Value* int32_sampler,
1203 llvm::Value* int32_offsetU,
1204 llvm::Value* int32_offsetV,
1205 llvm::Value* int32_srcChannel,
1206 bool feedback_enabled,
1207 llvm::Type* returnType)
1208 {
1209 llvm::Value * packed_tex_params[] = {
1210 float_reference_0,
1211 float_address_0,
1212 float_address_1,
1213 float_address_2,
1214 float_address_3,
1215 int32_textureIdx,
1216 int32_sampler,
1217 int32_offsetU,
1218 int32_offsetV,
1219 m_int0,
1220 int32_srcChannel
1221 };
1222
1223 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1224
1225 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1226 llvm::Type* types[] = {
1227 IGCLLVM::FixedVectorType::get(dstType, 4),
1228 float_reference_0->getType(),
1229 int32_textureIdx->getType(),
1230 int32_sampler->getType()
1231 };
1232 if (feedback_enabled)
1233 {
1234 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1235 }
1236 llvm::Function* func_llvm_GenISA_gather4Cptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1237 (module, llvm::GenISAIntrinsic::GenISA_gather4Cptr, types);
1238 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4Cptr_v4f32_f32, packed_tex_params);
1239 return packed_tex_call;
1240 }
1241
1242 template<bool preserveNames, typename T, typename Inserter>
Create_gather4POC(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * int_src_offset_0,llvm::Value * int_src_offset_1,llvm::Value * float_src_reference_0,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1243 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4POC(
1244 llvm::Value* float_address_0,
1245 llvm::Value* float_address_1,
1246 llvm::Value* float_address_2,
1247 llvm::Value* int_src_offset_0,
1248 llvm::Value* int_src_offset_1,
1249 llvm::Value* float_src_reference_0,
1250 llvm::Value* int32_textureIdx,
1251 llvm::Value* int32_sampler,
1252 llvm::Value* int32_offsetU,
1253 llvm::Value* int32_offsetV,
1254 llvm::Value* int32_srcChannel,
1255 bool feedback_enabled,
1256 llvm::Type* returnType)
1257 {
1258 llvm::Value * packed_tex_params[] = {
1259 float_src_reference_0,
1260 float_address_0,
1261 float_address_1,
1262 int_src_offset_0,
1263 int_src_offset_1,
1264 float_address_2,
1265 int32_textureIdx,
1266 int32_sampler,
1267 int32_offsetU,
1268 int32_offsetV,
1269 m_int0,
1270 int32_srcChannel
1271 };
1272
1273 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1274
1275 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1276 llvm::Type* types[] = {
1277 IGCLLVM::FixedVectorType::get(dstType, 4),
1278 float_src_reference_0->getType(),
1279 int32_textureIdx->getType(),
1280 int32_sampler->getType()
1281 };
1282 if (feedback_enabled)
1283 {
1284 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1285 }
1286 llvm::Function* func_llvm_GenISA_gather4POCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1287 (module, llvm::GenISAIntrinsic::GenISA_gather4POCptr, types);
1288 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POCptr_v4f32_f32, packed_tex_params);
1289 return packed_tex_call;
1290 }
1291
1292 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PO(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * int_src_offset_0,llvm::Value * int_src_offset_1,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1293 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PO(
1294 llvm::Value* float_address_0,
1295 llvm::Value* float_address_1,
1296 llvm::Value* float_address_2,
1297 llvm::Value* int_src_offset_0,
1298 llvm::Value* int_src_offset_1,
1299 llvm::Value* int32_textureIdx,
1300 llvm::Value* int32_sampler,
1301 llvm::Value* int32_offsetU,
1302 llvm::Value* int32_offsetV,
1303 llvm::Value* int32_srcChannel,
1304 bool feedback_enabled,
1305 llvm::Type* returnType)
1306 {
1307 llvm::Value * packed_tex_params[] = {
1308 float_address_0,
1309 float_address_1,
1310 int_src_offset_0,
1311 int_src_offset_1,
1312 float_address_2,
1313 int32_textureIdx,
1314 int32_sampler,
1315 int32_offsetU,
1316 int32_offsetV,
1317 m_int0,
1318 int32_srcChannel
1319 };
1320
1321 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1322
1323 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1324 llvm::Type* types[] = {
1325 IGCLLVM::FixedVectorType::get(dstType, 4),
1326 float_address_0->getType(),
1327 int32_textureIdx->getType(),
1328 int32_sampler->getType()
1329 };
1330 if (feedback_enabled)
1331 {
1332 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1333 }
1334 llvm::Function* func_llvm_GenISA_gather4POptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1335 (module, llvm::GenISAIntrinsic::GenISA_gather4POptr, types);
1336 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POptr_v4f32_f32, packed_tex_params);
1337
1338 return packed_tex_call;
1339 }
1340
1341 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PositionOffsets(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::ArrayRef<llvm::Value * > int_src_offsets,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel)1342 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsets(
1343 llvm::Value* float_address_0,
1344 llvm::Value* float_address_1,
1345 llvm::Value* float_address_2,
1346 llvm::ArrayRef<llvm::Value *> int_src_offsets,
1347 llvm::Value* int32_textureIdx,
1348 llvm::Value* int32_sampler,
1349 llvm::Value* int32_offsetU,
1350 llvm::Value* int32_offsetV,
1351 llvm::Value* int32_srcChannel)
1352 {
1353 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1354
1355 llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
1356 for (int i = 0, j = 0; i < 7; i = i + 2, j++)
1357 {
1358 llvm::Value* packed_tex_call = Create_gather4PO(
1359 float_address_0,
1360 float_address_1,
1361 float_address_2,
1362 int_src_offsets[i],
1363 int_src_offsets[i + 1],
1364 int32_textureIdx,
1365 int32_sampler,
1366 int32_offsetU,
1367 int32_offsetV,
1368 int32_srcChannel,
1369 false,
1370 llvm::Type::getFloatTy(module->getContext()));
1371
1372
1373 gatherReturn = this->CreateInsertElement(
1374 gatherReturn,
1375 this->CreateExtractElement(packed_tex_call, this->getInt32(3)),
1376 this->getInt32(j),
1377 "call_inst");
1378 }
1379
1380 return gatherReturn;
1381 }
1382
1383 template<bool preserveNames, typename T, typename Inserter>
Create_gather4PositionOffsetsC(llvm::Value * float_reference_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::ArrayRef<llvm::Value * > int_src_offsets,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_srcChannel)1384 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsetsC(
1385 llvm::Value* float_reference_0,
1386 llvm::Value* float_address_0,
1387 llvm::Value* float_address_1,
1388 llvm::Value* float_address_2,
1389 llvm::ArrayRef<llvm::Value *> int_src_offsets,
1390 llvm::Value* int32_textureIdx_356,
1391 llvm::Value* int32_sampler_357,
1392 llvm::Value* int32_offsetU,
1393 llvm::Value* int32_offsetV,
1394 llvm::Value* int32_srcChannel)
1395 {
1396 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1397
1398 llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
1399 for (int i = 0, j = 0; i < 7; i = i + 2, j++)
1400 {
1401 llvm::Value* packed_tex_1527_call = Create_gather4POC(
1402 float_address_0,
1403 float_address_1,
1404 float_address_2,
1405 int_src_offsets[i],
1406 int_src_offsets[i + 1],
1407 float_reference_0,
1408 int32_textureIdx_356,
1409 int32_sampler_357,
1410 int32_offsetU,
1411 int32_offsetV,
1412 int32_srcChannel,
1413 false,
1414 llvm::Type::getFloatTy(module->getContext()));
1415
1416 gatherReturn = this->CreateInsertElement(
1417 gatherReturn,
1418 this->CreateExtractElement(packed_tex_1527_call, this->getInt32(3)),
1419 this->getInt32(j),
1420 "call_inst");
1421 }
1422
1423 return gatherReturn;
1424 }
1425
1426 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEB(llvm::Value * float_bias_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1427 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEB(
1428 llvm::Value* float_bias_0,
1429 llvm::Value* float_address_0,
1430 llvm::Value* float_address_1,
1431 llvm::Value* float_address_2,
1432 llvm::Value* float_address_3,
1433 llvm::Value* int32_textureIdx,
1434 llvm::Value* int32_sampler,
1435 llvm::Value* int32_offsetU,
1436 llvm::Value* int32_offsetV,
1437 llvm::Value* int32_offsetW,
1438 llvm::Value* minlod,
1439 bool feedback_enabled,
1440 llvm::Type* returnType)
1441 {
1442 if (minlod == nullptr)
1443 {
1444 minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
1445 }
1446
1447 // %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32(float %src_s.chan0, float %src_s.chan1, float %src_s.chan2, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
1448 llvm::Value * packed_tex_params[] = {
1449 float_bias_0,
1450 float_address_0,
1451 float_address_1,
1452 float_address_2,
1453 float_address_3,
1454 minlod,
1455 int32_textureIdx,
1456 int32_sampler,
1457 int32_offsetU,
1458 int32_offsetV,
1459 int32_offsetW
1460 };
1461
1462 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1463
1464 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1465 llvm::Type* types[] = {
1466 IGCLLVM::FixedVectorType::get(dstType, 4),
1467 float_bias_0->getType(),
1468 int32_textureIdx->getType(),
1469 int32_sampler->getType()
1470 };
1471 if (feedback_enabled)
1472 {
1473 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1474 }
1475 llvm::Function* func_llvm_GenISA_sampleB_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1476 (module, llvm::GenISAIntrinsic::GenISA_sampleBptr, types);
1477
1478 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleB_v4f32_f32, packed_tex_params);
1479 return packed_tex_call;
1480 }
1481
1482 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEL(llvm::Value * float_lod_0,llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,bool feedback_enabled,llvm::Type * returnType)1483 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEL(
1484 llvm::Value* float_lod_0,
1485 llvm::Value* float_address_0,
1486 llvm::Value* float_address_1,
1487 llvm::Value* float_address_2,
1488 llvm::Value* float_address_3,
1489 llvm::Value* ptr_textureIdx,
1490 llvm::Value* ptr_sampler,
1491 llvm::Value* int32_offsetU,
1492 llvm::Value* int32_offsetV,
1493 llvm::Value* int32_offsetW,
1494 bool feedback_enabled,
1495 llvm::Type* returnType)
1496 {
1497 llvm::Value * packed_tex_params[] = {
1498 float_lod_0,
1499 float_address_0,
1500 float_address_1,
1501 float_address_2,
1502 float_address_3,
1503 ptr_textureIdx,
1504 ptr_sampler,
1505 int32_offsetU,
1506 int32_offsetV,
1507 int32_offsetW
1508 };
1509
1510 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1511
1512 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1513 llvm::Type* types[] = {
1514 IGCLLVM::FixedVectorType::get(dstType, 4),
1515 float_lod_0->getType(),
1516 ptr_textureIdx->getType(),
1517 ptr_sampler->getType()
1518 };
1519 if (feedback_enabled)
1520 {
1521 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1522 }
1523 llvm::Function* func_llvm_GenISA_sampleL_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1524 (module, llvm::GenISAIntrinsic::GenISA_sampleLptr, types);
1525
1526 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleL_v4f32_f32, packed_tex_params);
1527 return packed_tex_call;
1528 }
1529
1530 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLED(SampleD_DC_FromCubeParams & sampleParams,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1531 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
1532 SampleD_DC_FromCubeParams& sampleParams,
1533 llvm::Value* minlod,
1534 bool feedback_enabled,
1535 llvm::Type* returnType)
1536 {
1537 return Create_SAMPLED(
1538 sampleParams.get_float_src_u(),
1539 sampleParams.get_float_src_v(),
1540 sampleParams.get_float_src_r(),
1541 sampleParams.get_dxu(),
1542 sampleParams.get_dxv(),
1543 sampleParams.get_dxr(),
1544 sampleParams.get_dyu(),
1545 sampleParams.get_dyv(),
1546 sampleParams.get_dyr(),
1547 sampleParams.get_float_src_ai(),
1548 sampleParams.get_int32_textureIdx(),
1549 sampleParams.get_int32_sampler(),
1550 sampleParams.get_int32_offsetU(),
1551 sampleParams.get_int32_offsetV(),
1552 sampleParams.get_int32_offsetW(),
1553 minlod,
1554 feedback_enabled,
1555 returnType
1556 );
1557 }
1558
1559 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLED(llvm::Value * float_src1_s_chan0,llvm::Value * float_src1_s_chan1,llvm::Value * float_src1_s_chan2,llvm::Value * float_src2_s_chan0,llvm::Value * float_src2_s_chan1,llvm::Value * float_src2_s_chan2,llvm::Value * float_src3_s_chan0,llvm::Value * float_src3_s_chan1,llvm::Value * float_src3_s_chan2,llvm::Value * float_src1_s_chan3,llvm::Value * ptr_textureIdx,llvm::Value * ptr_sampler,llvm::Value * int32_offsetU_358,llvm::Value * int32_offsetV_359,llvm::Value * int32_offsetW_359,llvm::Value * minlod,bool feedback_enabled,llvm::Type * returnType)1560 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
1561 llvm::Value* float_src1_s_chan0,
1562 llvm::Value* float_src1_s_chan1,
1563 llvm::Value* float_src1_s_chan2,
1564 llvm::Value* float_src2_s_chan0,
1565 llvm::Value* float_src2_s_chan1,
1566 llvm::Value* float_src2_s_chan2,
1567 llvm::Value* float_src3_s_chan0,
1568 llvm::Value* float_src3_s_chan1,
1569 llvm::Value* float_src3_s_chan2,
1570 llvm::Value* float_src1_s_chan3,
1571 llvm::Value* ptr_textureIdx,
1572 llvm::Value* ptr_sampler,
1573 llvm::Value* int32_offsetU_358,
1574 llvm::Value* int32_offsetV_359,
1575 llvm::Value* int32_offsetW_359,
1576 llvm::Value* minlod,
1577 bool feedback_enabled,
1578 llvm::Type* returnType)
1579 {
1580 if (minlod == nullptr)
1581 {
1582 minlod = llvm::ConstantFP::get(float_src1_s_chan0->getType(), 0.0);
1583 }
1584
1585 // %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %src_s.chan0, float %src2_s.chan0, float %src3_s.chan0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 0, i32 0)
1586 llvm::Value * packed_tex_params[] = {
1587 float_src1_s_chan0,
1588 float_src2_s_chan0,
1589 float_src3_s_chan0,
1590 float_src1_s_chan1,
1591 float_src2_s_chan1,
1592 float_src3_s_chan1,
1593 float_src1_s_chan2,
1594 float_src2_s_chan2,
1595 float_src3_s_chan2,
1596 float_src1_s_chan3,
1597 minlod,
1598 ptr_textureIdx,
1599 ptr_sampler,
1600 int32_offsetU_358,
1601 int32_offsetV_359,
1602 int32_offsetW_359
1603 };
1604
1605 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1606
1607 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1608 llvm::Type* types[] = {
1609 IGCLLVM::FixedVectorType::get(dstType, 4),
1610 float_src1_s_chan0->getType(),
1611 ptr_textureIdx->getType(),
1612 ptr_sampler->getType()
1613 };
1614 if(feedback_enabled)
1615 {
1616 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1617 }
1618
1619 llvm::Function* func_llvm_GenISA_sampleDptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1620 (module, llvm::GenISAIntrinsic::GenISA_sampleDptr, types);
1621
1622 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDptr_v4f32_f32, packed_tex_params);
1623
1624 return packed_tex_call;
1625 }
1626
1627 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEDC(llvm::Value * float_ref,llvm::Value * float_src_u,llvm::Value * dxu,llvm::Value * dyu,llvm::Value * float_src_v,llvm::Value * dxv,llvm::Value * dyv,llvm::Value * float_src_r,llvm::Value * dxr,llvm::Value * dyr,llvm::Value * float_src_ai,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)1628 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEDC(
1629 llvm::Value* float_ref,
1630 llvm::Value* float_src_u,
1631 llvm::Value* dxu,
1632 llvm::Value* dyu,
1633 llvm::Value* float_src_v,
1634 llvm::Value* dxv,
1635 llvm::Value* dyv,
1636 llvm::Value* float_src_r,
1637 llvm::Value* dxr,
1638 llvm::Value* dyr,
1639 llvm::Value* float_src_ai,
1640 llvm::Value* int32_textureIdx,
1641 llvm::Value* int32_sampler,
1642 llvm::Value* int32_offsetU,
1643 llvm::Value* int32_offsetV,
1644 llvm::Value* int32_offsetW,
1645 llvm::Type* returnType)
1646 {
1647 // %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %float_ref, float %float_src_u, float %dxu, float %dxu, float %dyu, float float_src_v,
1648 // float %dxv, float %dyv, float %float_src_r, float %dxr, float %dyr, float 0.000000e+00,
1649 // i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
1650 llvm::Value * packed_tex_params[] = {
1651 float_ref,
1652 float_src_u,
1653 dxu,
1654 dyu,
1655 float_src_v,
1656 dxv,
1657 dyv,
1658 float_src_r,
1659 dxr,
1660 dyr,
1661 float_src_ai,
1662 int32_textureIdx,
1663 int32_sampler,
1664 int32_offsetU,
1665 int32_offsetV,
1666 int32_offsetW
1667 };
1668
1669 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1670
1671 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1672 llvm::Type* types[] = {
1673 IGCLLVM::FixedVectorType::get(dstType, 4),
1674 float_ref->getType(),
1675 int32_textureIdx->getType(),
1676 int32_sampler->getType()
1677 };
1678
1679 llvm::Function* func_llvm_GenISA_sampleDCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1680 (module, llvm::GenISAIntrinsic::GenISA_sampleDCptr, types);
1681
1682 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDCptr_v4f32_f32, packed_tex_params);
1683
1684 return packed_tex_call;
1685 }
1686
1687 template<bool preserveNames, typename T, typename Inserter>
Create_lod(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Type * returnType)1688 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_lod(
1689 llvm::Value* float_address_0,
1690 llvm::Value* float_address_1,
1691 llvm::Value* float_address_2,
1692 llvm::Value* float_address_3,
1693 llvm::Value* int32_textureIdx_356,
1694 llvm::Value* int32_sampler_357,
1695 llvm::Type* returnType)
1696 {
1697 llvm::Value * packed_tex_params[] = {
1698 float_address_0,
1699 float_address_1,
1700 float_address_2,
1701 float_address_3,
1702 int32_textureIdx_356,
1703 int32_sampler_357,
1704 };
1705
1706 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1707
1708 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1709 llvm::Type* types[] = {
1710 IGCLLVM::FixedVectorType::get(dstType, 4),
1711 float_address_0->getType(),
1712 int32_textureIdx_356->getType(),
1713 int32_sampler_357->getType()
1714 };
1715
1716 llvm::Function* func_llvm_GenISA_lodptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1717 (module, llvm::GenISAIntrinsic::GenISA_lodptr, types);
1718
1719 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_lodptr_v4f32_f32, packed_tex_params);
1720
1721 return packed_tex_call;
1722 }
1723
1724 template<bool preserveNames, typename T, typename Inserter>
Create_gather4(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx_356,llvm::Value * int32_sampler_357,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Value * int32_srcChannel,bool feedback_enabled,llvm::Type * returnType)1725 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4(
1726 llvm::Value* float_address_0,
1727 llvm::Value* float_address_1,
1728 llvm::Value* float_address_2,
1729 llvm::Value* float_address_3,
1730 llvm::Value* int32_textureIdx_356,
1731 llvm::Value* int32_sampler_357,
1732 llvm::Value* int32_offsetU,
1733 llvm::Value* int32_offsetV,
1734 llvm::Value* int32_offsetW,
1735 llvm::Value* int32_srcChannel,
1736 bool feedback_enabled,
1737 llvm::Type* returnType)
1738 {
1739 llvm::Value * packed_tex_params[] = {
1740 float_address_0,
1741 float_address_1,
1742 float_address_2,
1743 float_address_3,
1744 int32_textureIdx_356,
1745 int32_sampler_357,
1746 int32_offsetU,
1747 int32_offsetV,
1748 int32_offsetW,
1749 int32_srcChannel
1750 };
1751
1752 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1753
1754 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1755 llvm::Type* types[] = {
1756 IGCLLVM::FixedVectorType::get(dstType, 4),
1757 float_address_0->getType(),
1758 int32_textureIdx_356->getType(),
1759 int32_sampler_357->getType()
1760 };
1761 if (feedback_enabled)
1762 {
1763 types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1764 }
1765 llvm::Function* func_llvm_GenISA_gather4ptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1766 (module, llvm::GenISAIntrinsic::GenISA_gather4ptr, types);
1767
1768 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4ptr_v4f32_f32, packed_tex_params);
1769
1770 return packed_tex_call;
1771 }
1772
1773 template<bool preserveNames, typename T, typename Inserter>
Create_load(llvm::Value * int32_sampleIdxU,llvm::Value * int32_sampleIdxV,llvm::Value * int32_sampleIdxR,llvm::Value * int32_lod,llvm::Value * ptr_textureIdx,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,bool feedback_enabled,llvm::Type * returnType)1774 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_load(
1775 llvm::Value* int32_sampleIdxU,
1776 llvm::Value* int32_sampleIdxV,
1777 llvm::Value* int32_sampleIdxR,
1778 llvm::Value* int32_lod,
1779 llvm::Value* ptr_textureIdx,
1780 llvm::Value* int32_offsetU,
1781 llvm::Value* int32_offsetV,
1782 llvm::Value* int32_offsetR,
1783 bool feedback_enabled,
1784 llvm::Type* returnType)
1785 {
1786 llvm::Value * packed_tex_params[] = {
1787 int32_sampleIdxU,
1788 int32_sampleIdxV,
1789 int32_lod,
1790 int32_sampleIdxR,
1791 ptr_textureIdx,
1792 int32_offsetU,
1793 int32_offsetV,
1794 int32_offsetR
1795 };
1796
1797 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1798
1799 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1800 llvm::Type* types[] = {
1801 IGCLLVM::FixedVectorType::get(dstType, feedback_enabled ? 5 : 4),
1802 ptr_textureIdx->getType()
1803 };
1804
1805 llvm::Function* func_llvm_GenISA_ldptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1806 (module, llvm::GenISAIntrinsic::GenISA_ldptr, types);
1807
1808 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldptr_v4f32_f32, packed_tex_params);
1809
1810 return packed_tex_call;
1811 }
1812
1813 template<bool preserveNames, typename T, typename Inserter>
Create_ldms(llvm::Value * int32_srcIdxU,llvm::Value * int32_srcIdxV,llvm::Value * int32_srcIdxR,llvm::Value * int32_sampleIdx,llvm::Value * int32_textureIdx,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetR,bool feedback_enabled,llvm::Type * returnType)1814 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_ldms(
1815 llvm::Value* int32_srcIdxU,
1816 llvm::Value* int32_srcIdxV,
1817 llvm::Value* int32_srcIdxR,
1818 llvm::Value* int32_sampleIdx,
1819 llvm::Value* int32_textureIdx,
1820 llvm::Value* int32_offsetU,
1821 llvm::Value* int32_offsetV,
1822 llvm::Value* int32_offsetR,
1823 bool feedback_enabled,
1824 llvm::Type* returnType)
1825 {
1826 llvm::Value * packed_mcs_params[] = {
1827 int32_srcIdxU,
1828 int32_srcIdxV,
1829 int32_srcIdxR,
1830 m_int0,
1831 int32_textureIdx,
1832 int32_offsetU,
1833 int32_offsetV,
1834 int32_offsetR
1835 };
1836
1837 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
1838
1839 llvm::Type* types[] = { IGCLLVM::FixedVectorType::get(this->getInt32Ty(), 2), this->getInt32Ty(), int32_textureIdx->getType() };
1840 llvm::Function* func_llvm_GenISA_ldmcsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1841 (module, llvm::GenISAIntrinsic::GenISA_ldmcsptr, types);
1842 llvm::CallInst* packed_mcs_call = this->CreateCall(func_llvm_GenISA_ldmcsptr_v4f32_f32, packed_mcs_params);
1843
1844
1845 llvm::Value* mcs_ch0 = this->CreateExtractElement(packed_mcs_call, m_int0);
1846 llvm::Value* mcs_ch1 = this->CreateExtractElement(packed_mcs_call, m_int1);
1847
1848 llvm::Value * packed_tex_params[] = {
1849 int32_sampleIdx,
1850 mcs_ch0,
1851 mcs_ch1,
1852 int32_srcIdxU,
1853 int32_srcIdxV,
1854 int32_srcIdxR,
1855 m_int0,
1856 int32_textureIdx,
1857 int32_offsetU,
1858 int32_offsetV,
1859 int32_offsetR
1860 };
1861
1862 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
1863 llvm::Type* types_ldms[] = {
1864 IGCLLVM::FixedVectorType::get(dstType, 4),
1865 int32_textureIdx->getType()
1866 };
1867 if (feedback_enabled)
1868 {
1869 types_ldms[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
1870 }
1871
1872 llvm::Function* func_llvm_GenISA_ldmsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
1873 (module, llvm::GenISAIntrinsic::GenISA_ldmsptr, types_ldms);
1874
1875 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldmsptr_v4f32_f32, packed_tex_params);
1876 return packed_tex_call;
1877 }
1878
1879 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_Cube_ParamsFromUnormalizedCoords(llvm::Value * int32_lod,llvm::Value * int32_textureIdx,llvm::Value * int32_u,llvm::Value * int32_v,llvm::Value * int32_faceid,llvm::Value * int32_cube_array_index,llvm::Value * float_array_6_3,llvm::Value * int32_sampler)1880 inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_ParamsFromUnormalizedCoords(
1881 llvm::Value* int32_lod,
1882 llvm::Value* int32_textureIdx,
1883 llvm::Value* int32_u,
1884 llvm::Value* int32_v,
1885 llvm::Value* int32_faceid,
1886 llvm::Value* int32_cube_array_index,
1887 llvm::Value *float_array_6_3,
1888 llvm::Value* int32_sampler
1889 )
1890 {
1891 //Samplers point of reference is always center of the face, which is (0,0)
1892 //That means the four vertices of the normalized cube are depiced as below
1893 //(-1,-1) (1,-1)
1894 // -------|---------
1895 // | | |
1896 // | | |
1897 // |---------------|
1898 // | |(0,0) |
1899 // | | |
1900 // -------|---------
1901 //(-1,1) (1,1)
1902 //Thus each un-normalized coordiate (x,y) needs to be normalized between <-1,1>
1903 //Below is the Math to normalize between <-1,1>
1904 //u = (u * 2 + 1)/width - 1
1905 //v = (v * 2 + 1)/height - 1
1906
1907 //Using resinfo extract width and height of the buffer
1908 //Using resinfo extract width and height of the buffer
1909 llvm::Value *resinfo = this->Create_resinfo(int32_lod, int32_textureIdx);
1910 llvm::Value *width = this->CreateExtractElement(resinfo, m_int0);
1911 llvm::Value *height = this->CreateExtractElement(resinfo, m_int1);
1912
1913 //convert u, v, width and height to float
1914 llvm::Value *float_u = this->CreateUIToFP(int32_u, this->getFloatTy());
1915 llvm::Value *float_v = this->CreateUIToFP(int32_v, this->getFloatTy());
1916 width = this->CreateUIToFP(width, this->getFloatTy());
1917 height = this->CreateUIToFP(height, this->getFloatTy());
1918 //define some constants
1919 llvm::Value* float_minus1 = this->getFloat(-1.0);
1920 llvm::Value* float_2 = this->getFloat(2.0);
1921
1922 //u and v represent the coordinates of a texel for a given face
1923 //Now normalize u in the range [-1,1] using following equation
1924 //u = (2*u + 1)/width -1
1925 float_u = this->CreateFAdd(this->CreateFMul(float_u, float_2), m_float1);
1926 float_u = this->CreateFSub(this->CreateFDiv(float_u, width), m_float1);
1927 //Now normalize v in the range [-1,1] using following equation
1928 //v = (v * 2 + 1)/height - 1
1929 float_v = this->CreateFAdd(this->CreateFMul(float_v, float_2), m_float1);
1930 float_v = this->CreateFSub(this->CreateFDiv(float_v, height), m_float1);
1931
1932 llvm::Value *minus_floatu = this->CreateFMul(float_u, float_minus1); //-u
1933 llvm::Value *minus_floatv = this->CreateFMul(float_v, float_minus1); //-v
1934 llvm::Value *float_arrayIndex = this->CreateUIToFP(int32_cube_array_index, this->getFloatTy());
1935 //This array represents how the u and v value needs to be picked, for a face
1936 unsigned num_cube_faces = 6;
1937 unsigned num_dimensions = 3;
1938
1939 //The mapping of face-id to texture surface is as follows
1940 //+x->face 0, -x->face 1, +y -> face 2, -y -> face 3, +z -> face 4, -z -> face 5
1941 //Now for each face we need to transform the normalized coordinates as follows
1942 //face 0(+X) = (-v, -u), face 1(-X) = (-v, u), face 2(+Y) = (u, v)
1943 //face 3(-Y) = (u, -v) , face 4(+Z) = (u, -v), face 5(+Z) = (-u, -v)
1944 //Refer to https://en.wikipedia.org/wiki/Cube_mapping for details
1945 llvm::Value *cubeCoordMap[6][3] = {
1946 { m_float1 , minus_floatv, minus_floatu }, //+x = face0
1947 { float_minus1 , minus_floatv, float_u }, //-x = face1
1948 { float_u , m_float1 , float_v }, //+y = face2
1949 { float_u , float_minus1, minus_floatv }, //-y = face3
1950 { float_u , minus_floatv, m_float1 }, //+z = face4
1951 { minus_floatu , minus_floatv, float_minus1 } //-z = face5
1952 };
1953 //Now populate the 6x3 array with values of cubeCoordMap
1954 llvm::Value *indexList[2];
1955 llvm::Value *row, *elt;
1956 indexList[0] = m_int0;
1957 for (unsigned faceid = 0; faceid < num_cube_faces; faceid++) {
1958 indexList[1] = this->getInt32(faceid);
1959 row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1960 for (unsigned j = 0; j < num_dimensions; j++) {
1961 indexList[1] = this->getInt32(j);
1962 elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1963 this->CreateStore(cubeCoordMap[faceid][j], elt);
1964 }
1965 }
1966
1967 //Now pick the one the row indexed by int32_faceid
1968 llvm::Value *finalCoords[3];
1969 indexList[1] = int32_faceid;
1970 row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1971 for (unsigned i = 0; i < 3; i++) {
1972 indexList[1] = this->getInt32(i);
1973 elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
1974 finalCoords[i] = this->CreateLoad(elt);
1975 }
1976
1977 SampleParamsFromCube CubeRetParams;
1978 CubeRetParams.float_xcube = finalCoords[0];
1979 CubeRetParams.float_ycube = finalCoords[1];
1980 CubeRetParams.float_address_3 = finalCoords[2];
1981 CubeRetParams.float_aicube = float_arrayIndex;
1982 CubeRetParams.int32_textureIdx = int32_textureIdx;
1983 CubeRetParams.int32_sampler = int32_sampler;
1984 CubeRetParams.offsetU = int32_u;
1985 CubeRetParams.offsetV = int32_v;
1986 CubeRetParams.offsetR = m_int0; //Not used
1987 return CubeRetParams;
1988 }
1989
1990 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_Cube_Params(llvm::Value * float_address_0,llvm::Value * float_address_1,llvm::Value * float_address_2,llvm::Value * float_address_3,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler)1991 inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_Params(
1992 llvm::Value* float_address_0,
1993 llvm::Value* float_address_1,
1994 llvm::Value* float_address_2,
1995 llvm::Value* float_address_3,
1996 llvm::Value* int32_textureIdx,
1997 llvm::Value* int32_sampler)
1998 {
1999 IGC_ASSERT(nullptr != float_address_0);
2000 llvm::Type* const coordType = float_address_0->getType();
2001 IGC_ASSERT(nullptr != coordType);
2002 IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());
2003
2004 llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);
2005
2006 // %xneg_s = fsub float 0.000000e+00, %src_s.chan0
2007 llvm::Value* float_xneg_s_1389 = this->CreateFSub(zero, float_address_0, VALUE_NAME("xneg_s"));
2008
2009 // %cmpx_s = fcmp oge float %src_s.chan0, 0.000000e+00
2010 llvm::Value* int1_cmpx_s_1390 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_0, zero, VALUE_NAME("cmpx_s"));
2011
2012 // %xabs_s = select i1 %cmpx_s, float %src_s.chan0, float %xneg_s
2013 llvm::Value* float_xabs_s_1391 = this->CreateSelect(int1_cmpx_s_1390, float_address_0, float_xneg_s_1389, VALUE_NAME("xabs_s"));
2014
2015 // %yneg_s = fsub float 0.000000e+00, %src_s.chan1
2016 llvm::Value* float_yneg_s_1392 = this->CreateFSub(zero, float_address_1, VALUE_NAME("yneg_s"));
2017
2018 // %cmpy_s = fcmp oge float %src_s.chan1, 0.000000e+00
2019 llvm::Value* int1_cmpy_s_1393 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_1, zero, VALUE_NAME("cmpy_s"));
2020
2021 // %yabs_s = select i1 %cmpy_s, float %src_s.chan1, float %yneg_s
2022 llvm::Value* float_yabs_s_1394 = this->CreateSelect(int1_cmpy_s_1393, float_address_1, float_yneg_s_1392, VALUE_NAME("yabs_s"));
2023
2024 // %aineg_s = fsub float 0.000000e+00, %src_s.chan2
2025 llvm::Value* float_aineg_s_1395 = this->CreateFSub(zero, float_address_2, VALUE_NAME("aineg_s"));
2026
2027 // %cmpai_s = fcmp oge float %src_s.chan2, 0.000000e+00
2028 llvm::Value* int1_cmpai_s_1396 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_2, zero, VALUE_NAME("cmpai_s"));
2029
2030 // %aiabs_s = select i1 %cmpai_s, float %src_s.chan2, float %aineg_s
2031 llvm::Value* float_aiabs_s_1397 = this->CreateSelect(int1_cmpai_s_1396, float_address_2, float_aineg_s_1395, VALUE_NAME("aiabs_s"));
2032
2033 // %oge0_s = fcmp oge float %xabs_s, %yabs_s
2034 llvm::Value* int1_oge0_s_1398 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("oge0_s"));
2035
2036 // %max1_s = select i1 %oge0_s, float %xabs_s, float %yabs_s
2037 llvm::Value* float_max1_s_1399 = this->CreateSelect(int1_oge0_s_1398, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("max1_s"));
2038
2039 // %oge1_s = fcmp oge float %max1_s, %aiabs_s
2040 llvm::Value* int1_oge1_s_1400 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("oge1_s"));
2041
2042 // %max2_s = select i1 %oge1_s, float %max1_s, float %aiabs_s
2043 llvm::Value* float_max2_s_1401 = this->CreateSelect(int1_oge1_s_1400, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("max2_s"));
2044
2045 // %xcube_s = fdiv float %src_s.chan0, %max2_s
2046 llvm::Value* float_xcube_s_1402 = this->CreateFDiv(float_address_0, float_max2_s_1401, VALUE_NAME("xcube_s"));
2047
2048 // %ycube_s = fdiv float %src_s.chan1, %max2_s
2049 llvm::Value* float_ycube_s_1403 = this->CreateFDiv(float_address_1, float_max2_s_1401, VALUE_NAME("ycube_s"));
2050
2051 // %aicube_s = fdiv float %src_s.chan2, %max2_s
2052 llvm::Value* float_aicube_s_1404 = this->CreateFDiv(float_address_2, float_max2_s_1401, VALUE_NAME("aicube_s"));
2053
2054 SampleParamsFromCube CubeRetParams;
2055
2056 CubeRetParams.float_xcube = float_xcube_s_1402;
2057 CubeRetParams.float_ycube = float_ycube_s_1403;
2058 CubeRetParams.float_aicube = float_aicube_s_1404;
2059 CubeRetParams.float_address_3 = float_address_3;
2060 CubeRetParams.int32_textureIdx = int32_textureIdx;
2061 CubeRetParams.int32_sampler = int32_sampler;
2062 CubeRetParams.offsetU = m_int0;
2063 CubeRetParams.offsetV = m_int0;
2064 CubeRetParams.offsetR = m_int0;
2065
2066 return CubeRetParams;
2067
2068 }
2069
2070 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_D_DC_Cube_Params(SampleD_DC_FromCubeParams & params)2071 inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
2072 SampleD_DC_FromCubeParams& params)
2073 {
2074 return Prepare_SAMPLE_D_DC_Cube_Params(
2075 params.float_src_u,
2076 params.float_src_v,
2077 params.float_src_r,
2078 params.float_src_ai,
2079 params.dxu,
2080 params.dxv,
2081 params.dxr,
2082 params.dyu,
2083 params.dyv,
2084 params.dyr,
2085 params.int32_textureIdx,
2086 params.int32_sampler,
2087 params.int32_offsetU,
2088 params.int32_offsetV,
2089 params.int32_offsetW
2090 );
2091 }
2092
2093 template<bool preserveNames, typename T, typename Inserter>
Prepare_SAMPLE_D_DC_Cube_Params(llvm::Value * float_src_r,llvm::Value * float_src_s,llvm::Value * float_src_t,llvm::Value * float_src_ai,llvm::Value * float_drdx,llvm::Value * float_dsdx,llvm::Value * float_dtdx,llvm::Value * float_drdy,llvm::Value * float_dsdy,llvm::Value * float_dtdy,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW)2094 inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
2095 llvm::Value* float_src_r,
2096 llvm::Value* float_src_s,
2097 llvm::Value* float_src_t,
2098 llvm::Value* float_src_ai,
2099 llvm::Value* float_drdx,
2100 llvm::Value* float_dsdx,
2101 llvm::Value* float_dtdx,
2102 llvm::Value* float_drdy,
2103 llvm::Value* float_dsdy,
2104 llvm::Value* float_dtdy,
2105 llvm::Value* int32_textureIdx,
2106 llvm::Value* int32_sampler,
2107 llvm::Value* int32_offsetU,
2108 llvm::Value* int32_offsetV,
2109 llvm::Value* int32_offsetW)
2110 {
2111 // For cube texture sampling, sampling instruction must receive proper cube face ID
2112 // together with coordinates projected onto that face. Gradients also have to be transformed
2113 // into the same (cube face) address space.
2114 // To achieve this we first have to find a major coordinate, then normalize coordinates
2115 // and select remaining ones as u/v coordinates for the face. Because of the cube texture layout
2116 // in memory (as 6 2D faces) this sometimes involves changing the coordinate direction (sign).
2117 // Gradients are transformed using quotient rule for derivatives:
2118 // (fA/fB)' = (fA'*fB - fB'*fA)/fB^2
2119 // where fA and fB are base functions, i.e. base cube coordinates in this case.
2120 // Note that we first normalize coordinates and all derivatives, so calculations
2121 // here use the form:
2122 // (fA/fB)' = [fA'/fB] - [fB'/fB]*[fA/fB]
2123
2124 IGC_ASSERT(nullptr != this->GetInsertBlock());
2125 llvm::Function* const parentFunc = this->GetInsertBlock()->getParent();
2126 IGC_ASSERT(nullptr != float_src_r);
2127 llvm::Type* const coordType = float_src_r->getType();
2128 IGC_ASSERT(nullptr != coordType);
2129 IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());
2130
2131 llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);
2132
2133 // Create coordinate absolute values to look for major.
2134 llvm::Value* float_abs_r = this->CreateFAbs(float_src_r);
2135 llvm::Value* float_abs_s = this->CreateFAbs(float_src_s);
2136 llvm::Value* float_abs_t = this->CreateFAbs(float_src_t);
2137
2138 {
2139 llvm::BasicBlock* currentBlock = this->GetInsertBlock();
2140 bool shouldSplitBB = this->GetInsertPoint() != currentBlock->end();
2141
2142 // Create basic blocks.
2143 llvm::BasicBlock* block_final = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubefinal_block"));
2144
2145 llvm::BasicBlock* block_major_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_t_block"));
2146 llvm::BasicBlock* block_not_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubenott_block"));
2147 llvm::BasicBlock* block_zp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zp_block"));
2148 llvm::BasicBlock* block_zm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zm_block"));
2149
2150 llvm::BasicBlock* block_major_s = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_s_block"));
2151 llvm::BasicBlock* block_yp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_yp_block"));
2152 llvm::BasicBlock* block_ym = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_ym_block"));
2153
2154 llvm::BasicBlock* block_major_r = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_r_block"));
2155 llvm::BasicBlock* block_xp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xp_block"));
2156 llvm::BasicBlock* block_xm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xm_block"));
2157
2158 // Find the major coordinate (and thus cube face), precedence is Z,Y,X.
2159 llvm::Value* int1_cmp_tges = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_s, VALUE_NAME("cmp_tges"));
2160
2161 llvm::Value* int1_cmp_tger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_r, VALUE_NAME("cmp_tger"));
2162
2163 llvm::Value* int1_tgesr = this->CreateAnd(int1_cmp_tger, int1_cmp_tges);
2164
2165 // Major coordinate is T, faces could be +Z or -Z
2166 llvm::BasicBlock* splitBlock = nullptr;
2167 if (shouldSplitBB)
2168 {
2169 IGC_ASSERT(nullptr != currentBlock);
2170 IGC_ASSERT(currentBlock->getTerminator());
2171 splitBlock = currentBlock->splitBasicBlock(this->GetInsertPoint()->getNextNode());
2172 currentBlock->getTerminator()->eraseFromParent();
2173 this->SetInsertPoint(currentBlock);
2174 }
2175 this->CreateCondBr(int1_tgesr, block_major_t, block_not_t);
2176 this->SetInsertPoint(block_major_t);
2177 parentFunc->getBasicBlockList().push_back(block_major_t);
2178
2179 // Normalize coordinates and gradients.
2180 llvm::Value* float_tnorm_r = this->CreateFDiv(float_src_r, float_abs_t, VALUE_NAME("tnorm_r"));
2181 llvm::Value* float_tnorm_s = this->CreateFDiv(float_src_s, float_abs_t, VALUE_NAME("tnorm_s"));
2182 llvm::Value* float_tnorm_drdx = this->CreateFDiv(float_drdx, float_abs_t, VALUE_NAME("tnorm_drdx"));
2183 llvm::Value* float_tnorm_drdy = this->CreateFDiv(float_drdy, float_abs_t, VALUE_NAME("tnorm_drdy"));
2184 llvm::Value* float_tnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_t, VALUE_NAME("tnorm_dsdx"));
2185 llvm::Value* float_tnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_t, VALUE_NAME("tnorm_dsdy"));
2186 llvm::Value* float_tnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_t, VALUE_NAME("tnorm_dtdx"));
2187 llvm::Value* float_tnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_t, VALUE_NAME("tnorm_dtdy"));
2188
2189 // Select positive or negative face.
2190 llvm::Value* int1_cmpx_t = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_t, zero, VALUE_NAME("cmpx_t"));
2191 this->CreateCondBr(int1_cmpx_t, block_zp, block_zm);
2192 this->SetInsertPoint(block_zp);
2193 parentFunc->getBasicBlockList().push_back(block_zp);
2194
2195 // Face +Z,
2196 // major = neg T
2197 // u = R
2198 // v = neg S
2199
2200 llvm::Value* float_face_zp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 4.0));
2201
2202 // Select u from s/r/t
2203 llvm::Value* float_face_zp_u = float_tnorm_r;
2204
2205 // Select v from s/r/t
2206 llvm::Value* float_face_zp_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zp_v"));
2207
2208 // du/dx = dm * u + d{s/r/t}/dx
2209 llvm::Value* float_neg_dmx4 = this->CreateFNeg(float_tnorm_dtdx, VALUE_NAME("neg_dmx"));
2210 llvm::Value* float_dmxu4 = this->CreateFMul(float_neg_dmx4, float_tnorm_r, VALUE_NAME("dmxu"));
2211 llvm::Value* float_face_zp_dudx = this->CreateFAdd(float_dmxu4, float_tnorm_drdx, VALUE_NAME("face_zp_dudx"));
2212
2213 // du/dy = dm * u + d{s/r/t}/dy
2214 llvm::Value* float_neg_dmy4 = this->CreateFNeg(float_tnorm_dtdy, VALUE_NAME("neg_dmy"));
2215 llvm::Value* float_dmyu4 = this->CreateFMul(float_neg_dmy4, float_tnorm_r, VALUE_NAME("dmyu"));
2216 llvm::Value* float_face_zp_dudy = this->CreateFAdd(float_dmyu4, float_tnorm_drdy, VALUE_NAME("face_zp_dvdx"));
2217
2218 // dv/dx = dm * v + d{s/r/t}/dx
2219 llvm::Value* float_dmxv4 = this->CreateFMul(float_tnorm_dtdx, float_tnorm_s, VALUE_NAME("dmxv"));
2220 llvm::Value* float_face_zp_dvdx = this->CreateFSub(float_dmxv4, float_tnorm_dsdx, VALUE_NAME("face_zp_dvdx"));
2221
2222 // dv/dy = dm * v + d{s/r/t}/dy
2223 llvm::Value* float_dmyv4 = this->CreateFMul(float_tnorm_dtdy, float_tnorm_s, VALUE_NAME("dmyv"));
2224 llvm::Value* float_face_zp_dvdy = this->CreateFSub(float_dmyv4, float_tnorm_dsdy, VALUE_NAME("face_zp_dvdy"));
2225
2226 this->CreateBr(block_final);
2227 this->SetInsertPoint(block_zm);
2228 parentFunc->getBasicBlockList().push_back(block_zm);
2229
2230 // Face -Z,
2231 // major = T
2232 // u = neg R
2233 // v = neg S
2234
2235 llvm::Value* float_face_zm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 5.0));
2236
2237 // Select u from s/r/t
2238 llvm::Value* float_face_zm_u = this->CreateFNeg(float_tnorm_r, VALUE_NAME("face_zm_u"));
2239
2240 // Select v from s/r/t
2241 llvm::Value* float_face_zm_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zm_v"));
2242
2243 // du/dx = dm * u + d{s/r/t}/dx
2244 llvm::Value* float_dmxu5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_u, VALUE_NAME("dmxu"));
2245 llvm::Value* float_face_zm_dudx = this->CreateFSub(float_dmxu5, float_tnorm_drdx, VALUE_NAME("face_zm_dudx"));
2246
2247 // du/dy = dm * u + d{s/r/t}/dy
2248 llvm::Value* float_dmyu5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_u, VALUE_NAME("dmyu"));
2249 llvm::Value* float_face_zm_dudy = this->CreateFSub(float_dmyu5, float_tnorm_drdy, VALUE_NAME("face_zm_dvdx"));
2250
2251 // dv/dx = dm * v + d{s/r/t}/dx
2252 llvm::Value* float_dmxv5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_v, VALUE_NAME("dmxv"));
2253 llvm::Value* float_face_zm_dvdx = this->CreateFSub(float_dmxv5, float_tnorm_dsdx, VALUE_NAME("face_zm_dvdx"));
2254
2255 // dv/dy = dm * v + d{s/r/t}/dy
2256 llvm::Value* float_dmyv5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_v, VALUE_NAME("dmyv"));
2257 llvm::Value* float_face_zm_dvdy = this->CreateFSub(float_dmyv5, float_tnorm_dsdy, VALUE_NAME("face_zm_dvdy"));
2258
2259 this->CreateBr(block_final);
2260 this->SetInsertPoint(block_not_t);
2261 parentFunc->getBasicBlockList().push_back(block_not_t);
2262
2263 // Choose major S or R.
2264 llvm::Value* int1_cmp_sger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_s, float_abs_r, VALUE_NAME("cmp_sger"));
2265
2266 // Major coordinate is S, faces could be +Y or -Y
2267 this->CreateCondBr(int1_cmp_sger, block_major_s, block_major_r);
2268 this->SetInsertPoint(block_major_s);
2269 parentFunc->getBasicBlockList().push_back(block_major_s);
2270
2271 // Normalize coordinates and gradients.
2272 llvm::Value* float_snorm_r = this->CreateFDiv(float_src_r, float_abs_s, VALUE_NAME("snorm_r"));
2273 llvm::Value* float_snorm_t = this->CreateFDiv(float_src_t, float_abs_s, VALUE_NAME("snorm_t"));
2274 llvm::Value* float_snorm_drdx = this->CreateFDiv(float_drdx, float_abs_s, VALUE_NAME("snorm_drdx"));
2275 llvm::Value* float_snorm_drdy = this->CreateFDiv(float_drdy, float_abs_s, VALUE_NAME("snorm_drdy"));
2276 llvm::Value* float_snorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_s, VALUE_NAME("snorm_dsdx"));
2277 llvm::Value* float_snorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_s, VALUE_NAME("snorm_dsdy"));
2278 llvm::Value* float_snorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_s, VALUE_NAME("snorm_dtdx"));
2279 llvm::Value* float_snorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_s, VALUE_NAME("snorm_dtdy"));
2280
2281 // Select positive or negative face.
2282 llvm::Value* int1_cmpx_s = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_s, zero, VALUE_NAME("cmpx_s"));
2283 this->CreateCondBr(int1_cmpx_s, block_yp, block_ym);
2284 this->SetInsertPoint(block_yp);
2285 parentFunc->getBasicBlockList().push_back(block_yp);
2286
2287 // Face +Y,
2288 // major = neg S
2289 // u = R
2290 // v = T
2291
2292 llvm::Value* float_face_yp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 2.0));
2293
2294 // Select u from s/r/t
2295 llvm::Value* float_face_yp_u = float_snorm_r;
2296
2297 // Select v from s/r/t
2298 llvm::Value* float_face_yp_v = float_snorm_t;
2299
2300 // du/dx = dm * u + d{s/r/t}/dx
2301 llvm::Value* float_neg_dmx2 = this->CreateFNeg(float_snorm_dsdx, VALUE_NAME("neg_dmx"));
2302 llvm::Value* float_dmxu2 = this->CreateFMul(float_neg_dmx2, float_snorm_r, VALUE_NAME("dmxu"));
2303 llvm::Value* float_face_yp_dudx = this->CreateFAdd(float_dmxu2, float_snorm_drdx, VALUE_NAME("face_yp_dudx"));
2304
2305 // du/dy = dm * u + d{s/r/t}/dy
2306 llvm::Value* float_neg_dmy2 = this->CreateFNeg(float_snorm_dsdy, VALUE_NAME("neg_dmy"));
2307 llvm::Value* float_dmyu2 = this->CreateFMul(float_neg_dmy2, float_snorm_r, VALUE_NAME("dmyu"));
2308 llvm::Value* float_face_yp_dudy = this->CreateFAdd(float_dmyu2, float_snorm_drdy, VALUE_NAME("face_yp_dvdx"));
2309
2310 // dv/dx = dm * v + d{s/r/t}/dx
2311 llvm::Value* float_dmxv2 = this->CreateFMul(float_neg_dmx2, float_snorm_t, VALUE_NAME("dmxv"));
2312 llvm::Value* float_face_yp_dvdx = this->CreateFAdd(float_dmxv2, float_snorm_dtdx, VALUE_NAME("face_yp_dvdx"));
2313
2314 // dv/dy = dm * v + d{s/r/t}/dy
2315 llvm::Value* float_dmyv2 = this->CreateFMul(float_neg_dmy2, float_snorm_t, VALUE_NAME("dmyv"));
2316 llvm::Value* float_face_yp_dvdy = this->CreateFAdd(float_dmyv2, float_snorm_dtdy, VALUE_NAME("face_yp_dvdy"));
2317
2318 this->CreateBr(block_final);
2319 this->SetInsertPoint(block_ym);
2320 parentFunc->getBasicBlockList().push_back(block_ym);
2321
2322 // Face -Y,
2323 // major = S
2324 // u = R
2325 // v = neg T
2326
2327 llvm::Value* float_face_ym_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 3.0));
2328
2329 // Select u from s/r/t
2330 llvm::Value* float_face_ym_u = float_snorm_r;
2331
2332 // Select v from s/r/t
2333 llvm::Value* float_face_ym_v = this->CreateFNeg(float_snorm_t, VALUE_NAME("face_ym_v"));
2334
2335 // du/dx = dm * u + d{s/r/t}/dx
2336 llvm::Value* float_dmxu3 = this->CreateFMul(float_snorm_dsdx, float_snorm_r, VALUE_NAME("dmxu"));
2337 llvm::Value* float_face_ym_dudx = this->CreateFAdd(float_dmxu3, float_snorm_drdx, VALUE_NAME("face_ym_dudx"));
2338
2339 // du/dy = dm * u + d{s/r/t}/dy
2340 llvm::Value* float_dmyu3 = this->CreateFMul(float_snorm_dsdy, float_snorm_r, VALUE_NAME("dmyu"));
2341 llvm::Value* float_face_ym_dudy = this->CreateFAdd(float_dmyu3, float_snorm_drdy, VALUE_NAME("face_ym_dvdx"));
2342
2343 // dv/dx = dm * v + d{s/r/t}/dx
2344 llvm::Value* float_dmxv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmxv"));
2345 llvm::Value* float_face_ym_dvdx = this->CreateFSub(float_dmxv3, float_snorm_dtdx, VALUE_NAME("face_ym_dvdx"));
2346
2347 // dv/dy = dm * v + d{s/r/t}/dy
2348 llvm::Value* float_dmyv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmyv"));
2349 llvm::Value* float_face_ym_dvdy = this->CreateFSub(float_dmyv3, float_snorm_dtdy, VALUE_NAME("face_ym_dvdy"));
2350
2351 this->CreateBr(block_final);
2352 this->SetInsertPoint(block_major_r);
2353 parentFunc->getBasicBlockList().push_back(block_major_r);
2354
2355 // Major coordinate is R, faces could be +X or -X
2356
2357 // Normalize coordinates and gradients.
2358 llvm::Value* float_rnorm_s = this->CreateFDiv(float_src_s, float_abs_r, VALUE_NAME("rnorm_r"));
2359 llvm::Value* float_rnorm_t = this->CreateFDiv(float_src_t, float_abs_r, VALUE_NAME("rnorm_t"));
2360 llvm::Value* float_rnorm_drdx = this->CreateFDiv(float_drdx, float_abs_r, VALUE_NAME("rnorm_drdx"));
2361 llvm::Value* float_rnorm_drdy = this->CreateFDiv(float_drdy, float_abs_r, VALUE_NAME("rnorm_drdy"));
2362 llvm::Value* float_rnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_r, VALUE_NAME("rnorm_dsdx"));
2363 llvm::Value* float_rnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_r, VALUE_NAME("rnorm_dsdy"));
2364 llvm::Value* float_rnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_r, VALUE_NAME("rnorm_dtdx"));
2365 llvm::Value* float_rnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_r, VALUE_NAME("rnorm_dtdy"));
2366
2367 // Select positive or negative face.
2368 llvm::Value* int1_cmpx_r = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_r, zero, VALUE_NAME("cmpx_r"));
2369 this->CreateCondBr(int1_cmpx_r, block_xp, block_xm);
2370 this->SetInsertPoint(block_xp);
2371 parentFunc->getBasicBlockList().push_back(block_xp);
2372
2373 // Face +X,
2374 // major = neg R
2375 // u = neg T
2376 // v = neg S
2377
2378 llvm::Value* float_face_xp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 0.0));
2379
2380 // Select u from s/r/t
2381 llvm::Value* float_face_xp_u = this->CreateFNeg(float_rnorm_t, VALUE_NAME("face_xp_u"));
2382
2383 // Select v from s/r/t
2384 llvm::Value* float_face_xp_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xp_v"));
2385
2386 // du/dx = dm * u + d{s/r/t}/dx
2387 llvm::Value* float_dmxu0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
2388 llvm::Value* float_face_xp_dudx = this->CreateFSub(float_dmxu0, float_rnorm_dtdx, VALUE_NAME("face_xp_dudx"));
2389
2390 // du/dy = dm * u + d{s/r/t}/dy
2391 llvm::Value* float_dmyu0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
2392 llvm::Value* float_face_xp_dudy = this->CreateFSub(float_dmyu0, float_rnorm_dtdy, VALUE_NAME("face_xp_dvdx"));
2393
2394 // dv/dx = dm * v + d{s/r/t}/dx
2395 llvm::Value* float_dmxv0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_s, VALUE_NAME("dmxv"));
2396 llvm::Value* float_face_xp_dvdx = this->CreateFSub(float_dmxv0, float_rnorm_dsdx, VALUE_NAME("face_xp_dvdx"));
2397
2398 // dv/dy = dm * v + d{s/r/t}/dy
2399 llvm::Value* float_dmyv0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_s, VALUE_NAME("dmyv"));
2400 llvm::Value* float_face_xp_dvdy = this->CreateFSub(float_dmyv0, float_rnorm_dsdy, VALUE_NAME("face_xp_dvdy"));
2401
2402 this->CreateBr(block_final);
2403 this->SetInsertPoint(block_xm);
2404 parentFunc->getBasicBlockList().push_back(block_xm);
2405
2406 // Face -X,
2407 // major = R
2408 // u = T
2409 // v = neg S
2410
2411 llvm::Value* float_face_xm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 1.0));
2412
2413 // Select u from s/r/t
2414 llvm::Value* float_face_xm_u = float_rnorm_t;
2415
2416 // Select v from s/r/t
2417 llvm::Value* float_face_xm_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xm_v"));
2418
2419 // du/dx = dm * u + d{s/r/t}/dx
2420 llvm::Value* float_dmxu1 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
2421 llvm::Value* float_face_xm_dudx = this->CreateFAdd(float_dmxu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dudx"));
2422
2423 // du/dy = dm * u + d{s/r/t}/dy
2424 llvm::Value* float_dmyu1 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
2425 llvm::Value* float_face_xm_dudy = this->CreateFAdd(float_dmyu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dvdx"));
2426
2427 // dv/dx = dm * v + d{s/r/t}/dx
2428 llvm::Value* float_dmxv1 = this->CreateFMul(float_rnorm_drdx, float_face_xm_v, VALUE_NAME("dmxv"));
2429 llvm::Value* float_face_xm_dvdx = this->CreateFSub(float_dmxv1, float_rnorm_dsdx, VALUE_NAME("face_xm_dvdx"));
2430
2431 // dv/dy = dm * v + d{s/r/t}/dy
2432 llvm::Value* float_dmyv1 = this->CreateFMul(float_rnorm_drdy, float_face_xm_v, VALUE_NAME("dmyv"));
2433 llvm::Value* float_face_xm_dvdy = this->CreateFSub(float_dmyv1, float_rnorm_dsdy, VALUE_NAME("face_xm_dvdy"));
2434
2435 this->CreateBr(block_final);
2436 this->SetInsertPoint(block_final);
2437 parentFunc->getBasicBlockList().push_back(block_final);
2438
2439 llvm::PHINode* phi_u = this->CreatePHI(coordType, 6, VALUE_NAME("phi_u"));
2440 phi_u->addIncoming(float_face_xp_u, block_xp);
2441 phi_u->addIncoming(float_face_xm_u, block_xm);
2442 phi_u->addIncoming(float_face_yp_u, block_yp);
2443 phi_u->addIncoming(float_face_ym_u, block_ym);
2444 phi_u->addIncoming(float_face_zp_u, block_zp);
2445 phi_u->addIncoming(float_face_zm_u, block_zm);
2446
2447 llvm::PHINode* phi_v = this->CreatePHI(coordType, 6, VALUE_NAME("phi_v"));
2448 phi_v->addIncoming(float_face_xp_v, block_xp);
2449 phi_v->addIncoming(float_face_xm_v, block_xm);
2450 phi_v->addIncoming(float_face_yp_v, block_yp);
2451 phi_v->addIncoming(float_face_ym_v, block_ym);
2452 phi_v->addIncoming(float_face_zp_v, block_zp);
2453 phi_v->addIncoming(float_face_zm_v, block_zm);
2454
2455 llvm::PHINode* phi_dudx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudx"));
2456 phi_dudx->addIncoming(float_face_xp_dudx, block_xp);
2457 phi_dudx->addIncoming(float_face_xm_dudx, block_xm);
2458 phi_dudx->addIncoming(float_face_yp_dudx, block_yp);
2459 phi_dudx->addIncoming(float_face_ym_dudx, block_ym);
2460 phi_dudx->addIncoming(float_face_zp_dudx, block_zp);
2461 phi_dudx->addIncoming(float_face_zm_dudx, block_zm);
2462
2463 llvm::PHINode* phi_dudy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudy"));
2464 phi_dudy->addIncoming(float_face_xp_dudy, block_xp);
2465 phi_dudy->addIncoming(float_face_xm_dudy, block_xm);
2466 phi_dudy->addIncoming(float_face_yp_dudy, block_yp);
2467 phi_dudy->addIncoming(float_face_ym_dudy, block_ym);
2468 phi_dudy->addIncoming(float_face_zp_dudy, block_zp);
2469 phi_dudy->addIncoming(float_face_zm_dudy, block_zm);
2470
2471 llvm::PHINode* phi_dvdx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdx"));
2472 phi_dvdx->addIncoming(float_face_xp_dvdx, block_xp);
2473 phi_dvdx->addIncoming(float_face_xm_dvdx, block_xm);
2474 phi_dvdx->addIncoming(float_face_yp_dvdx, block_yp);
2475 phi_dvdx->addIncoming(float_face_ym_dvdx, block_ym);
2476 phi_dvdx->addIncoming(float_face_zp_dvdx, block_zp);
2477 phi_dvdx->addIncoming(float_face_zm_dvdx, block_zm);
2478
2479 llvm::PHINode* phi_dvdy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdy"));
2480 phi_dvdy->addIncoming(float_face_xp_dvdy, block_xp);
2481 phi_dvdy->addIncoming(float_face_xm_dvdy, block_xm);
2482 phi_dvdy->addIncoming(float_face_yp_dvdy, block_yp);
2483 phi_dvdy->addIncoming(float_face_ym_dvdy, block_ym);
2484 phi_dvdy->addIncoming(float_face_zp_dvdy, block_zp);
2485 phi_dvdy->addIncoming(float_face_zm_dvdy, block_zm);
2486
2487 llvm::PHINode* phi_face_id = this->CreatePHI(coordType, 6, VALUE_NAME("phi_face_id"));
2488 phi_face_id->addIncoming(float_face_xp_id, block_xp);
2489 phi_face_id->addIncoming(float_face_xm_id, block_xm);
2490 phi_face_id->addIncoming(float_face_yp_id, block_yp);
2491 phi_face_id->addIncoming(float_face_ym_id, block_ym);
2492 phi_face_id->addIncoming(float_face_zp_id, block_zp);
2493 phi_face_id->addIncoming(float_face_zm_id, block_zm);
2494
2495 if (shouldSplitBB)
2496 {
2497 llvm::BranchInst* brInst = this->CreateBr(splitBlock);
2498 this->SetInsertPoint(brInst);
2499 }
2500
2501 SampleD_DC_FromCubeParams D_DC_CUBE_params;
2502
2503 D_DC_CUBE_params.float_src_u = phi_u;
2504 D_DC_CUBE_params.dxu = phi_dudx;
2505 D_DC_CUBE_params.dyu = phi_dudy;
2506 D_DC_CUBE_params.float_src_v = phi_v;
2507 D_DC_CUBE_params.dxv = phi_dvdx;
2508 D_DC_CUBE_params.dyv = phi_dvdy;
2509 D_DC_CUBE_params.float_src_r = phi_face_id;
2510 D_DC_CUBE_params.dxr = zero;
2511 D_DC_CUBE_params.dyr = zero;
2512 D_DC_CUBE_params.float_src_ai = float_src_ai;
2513 D_DC_CUBE_params.int32_textureIdx = int32_textureIdx;
2514 D_DC_CUBE_params.int32_sampler = int32_sampler;
2515 D_DC_CUBE_params.int32_offsetU = m_int0;
2516 D_DC_CUBE_params.int32_offsetV = m_int0;
2517 D_DC_CUBE_params.int32_offsetW = m_int0;
2518
2519 return D_DC_CUBE_params;
2520 }
2521 }
2522
2523 template<bool preserveNames, typename T, typename Inserter>
CreateFAbs(llvm::Value * V)2524 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFAbs(llvm::Value* V)
2525 {
2526 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2527
2528 llvm::Function* fabs = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fabs, V->getType());
2529 return this->CreateCall(fabs, V);
2530 }
2531
2532 template<bool preserveNames, typename T, typename Inserter>
CreateFSat(llvm::Value * V)2533 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFSat(llvm::Value* V)
2534 {
2535 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2536
2537 llvm::Function* fsat =
2538 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_fsat, V->getType());
2539 return this->CreateCall(fsat, V);
2540 }
2541
2542 template<bool preserveNames, typename T, typename Inserter>
CreateF16TOF32(llvm::Value * f16_src)2543 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateF16TOF32(
2544 llvm::Value* f16_src)
2545 {
2546 llvm::Value* f32_dst = this->CreateFPExt(f16_src, this->getFloatTy(), VALUE_NAME("src0_s"));
2547 return f32_dst;
2548 }
2549
2550 /*****************************************************************************\
2551 Description:
2552 Returns true if additional conversion is required if given format is
2553 128bit.
2554
2555 Input:
2556 SURFACE_FORMAT format - conversion format
2557
2558 Output:
2559 bool - return value.
2560
2561 \*****************************************************************************/
2562 template<bool preserveNames, typename T, typename Inserter>
NeedConversionFor128FormatRead(IGC::SURFACE_FORMAT format) const2563 bool LLVM3DBuilder<preserveNames, T, Inserter>::NeedConversionFor128FormatRead(
2564 IGC::SURFACE_FORMAT format) const
2565 {
2566 bool needsConversion = true;
2567
2568 if ((format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_FLOAT) ||
2569 (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_UINT) ||
2570 (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_SINT))
2571 {
2572 needsConversion = false;
2573 }
2574
2575 return needsConversion;
2576 }
2577
2578 template<bool preserveNames, typename T, typename Inserter>
Create_UBFE(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source)2579 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_UBFE(
2580 llvm::Value* int32_width,
2581 llvm::Value* int32_offset,
2582 llvm::Value* int32_source)
2583 {
2584 // %res = call i32 @llvm.GenISA.ubfe(i32 %src0_s, i32 %src1_s, i32 %src2_s)
2585 llvm::Value * packed_params[] = {
2586 int32_width,
2587 int32_offset,
2588 int32_source
2589 };
2590 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ubfe(), packed_params));
2591 return int32_res;
2592 }
2593
2594 template<bool preserveNames, typename T, typename Inserter>
Create_IBFE(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source)2595 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_IBFE(
2596 llvm::Value* int32_width,
2597 llvm::Value* int32_offset,
2598 llvm::Value* int32_source)
2599 {
2600 // %res = call i32 @llvm.GenISA.ibfe(i32 %int32_width, i32 %int32_offset, i32 %int32_source)
2601 llvm::Value * packed_params[] = {
2602 int32_width,
2603 int32_offset,
2604 int32_source
2605 };
2606 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ibfe(), packed_params));
2607 return int32_res;
2608 }
2609
2610 template<bool preserveNames, typename T, typename Inserter>
Create_BFI(llvm::Value * int32_width,llvm::Value * int32_offset,llvm::Value * int32_source,llvm::Value * int32_replace)2611 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFI(
2612 llvm::Value* int32_width,
2613 llvm::Value* int32_offset,
2614 llvm::Value* int32_source,
2615 llvm::Value* int32_replace)
2616 {
2617 llvm::Value * packed_params[] = {
2618 int32_width,
2619 int32_offset,
2620 int32_source,
2621 int32_replace
2622 };
2623 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfi(), packed_params));
2624 return int32_res;
2625 }
2626
2627 template<bool preserveNames, typename T, typename Inserter>
Create_BFREV(llvm::Value * int32_source)2628 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFREV(
2629 llvm::Value* int32_source)
2630 {
2631 llvm::Value * packed_params[] = {
2632 int32_source
2633 };
2634 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfrev(), packed_params));
2635 return int32_res;
2636 }
2637
2638 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitHi(llvm::Value * int32_source)2639 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitHi(
2640 llvm::Value* int32_source)
2641 {
2642 llvm::Value * packed_params[] = {
2643 int32_source
2644 };
2645 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitHi(), packed_params));
2646 return int32_res;
2647 }
2648
2649 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitLo(llvm::Value * int32_source)2650 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitLo(
2651 llvm::Value* int32_source)
2652 {
2653 llvm::Value * packed_params[] = {
2654 int32_source
2655 };
2656 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitLo(), packed_params));
2657 return int32_res;
2658 }
2659
2660 template<bool preserveNames, typename T, typename Inserter>
Create_FirstBitShi(llvm::Value * int32_source)2661 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitShi(
2662 llvm::Value* int32_source)
2663 {
2664 llvm::Value * packed_params[] = {
2665 int32_source
2666 };
2667 llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitShi(), packed_params));
2668 return int32_res;
2669 }
2670
2671 template<bool preserveNames, typename T, typename Inserter>
create_indirectLoad(llvm::Value * srcBuffer,llvm::Value * offset,llvm::Value * alignment,llvm::Type * returnType,bool isVolatile)2672 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectLoad(
2673 llvm::Value* srcBuffer,
2674 llvm::Value* offset,
2675 llvm::Value* alignment,
2676 llvm::Type* returnType,
2677 bool isVolatile /* false */)
2678 {
2679 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2680 llvm::Type* types[] = {
2681 returnType,
2682 srcBuffer->getType()
2683 };
2684 llvm::Function* pfuncLdPtr = llvm::GenISAIntrinsic::getDeclaration(
2685 module,
2686 llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed,
2687 types);
2688 return this->CreateCall4(pfuncLdPtr, srcBuffer, offset, alignment, this->getInt1(isVolatile));
2689 }
2690
2691 template<bool preserveNames, typename T, typename Inserter>
create_indirectStore(llvm::Value * srcBuffer,llvm::Value * offset,llvm::Value * data,bool isVolatile)2692 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectStore(
2693 llvm::Value* srcBuffer,
2694 llvm::Value* offset,
2695 llvm::Value* data,
2696 bool isVolatile /* false */ )
2697 {
2698 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2699 llvm::Type* types[] = {
2700 srcBuffer->getType(),
2701 data->getType(),
2702 };
2703 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2704 module,
2705 llvm::GenISAIntrinsic::GenISA_storerawvector_indexed,
2706 types);
2707 llvm::Value* alignment = this->getInt32(data->getType()->getScalarSizeInBits() / 8);
2708 return this->CreateCall5(pFunc, srcBuffer, offset, data, alignment, this->getInt1(isVolatile));
2709 }
2710
2711 template<bool preserveNames, typename T, typename Inserter>
create_atomicCounterIncrement(llvm::Value * srcBuffer)2712 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterIncrement(
2713 llvm::Value* srcBuffer)
2714 {
2715 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2716 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2717 module,
2718 llvm::GenISAIntrinsic::GenISA_atomiccounterinc,
2719 srcBuffer->getType());
2720 return this->CreateCall(pFunc, srcBuffer);
2721 }
2722
2723 template<bool preserveNames, typename T, typename Inserter>
create_atomicCounterDecrement(llvm::Value * srcBuffer)2724 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterDecrement(
2725 llvm::Value* srcBuffer)
2726 {
2727 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2728 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2729 module,
2730 llvm::GenISAIntrinsic::GenISA_atomiccounterpredec,
2731 srcBuffer->getType());
2732 return this->CreateCall(pFunc, srcBuffer);
2733 }
2734
2735 template<bool preserveNames, typename T, typename Inserter>
createThreadLocalId(unsigned int dim)2736 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createThreadLocalId(unsigned int dim)
2737 {
2738 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2739 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2740 module,
2741 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
2742 this->getInt32Ty());
2743 return this->CreateCall(pFunc, this->getInt32(IGC::THREAD_ID_IN_GROUP_X + dim));
2744 }
2745
2746 template<bool preserveNames, typename T, typename Inserter>
createGroupId(unsigned int dim)2747 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createGroupId(unsigned int dim)
2748 {
2749 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2750 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
2751 module,
2752 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
2753 this->getFloatTy());
2754 return this->CreateBitCast(
2755 this->CreateCall(pFunc, this->getInt32(IGC::THREAD_GROUP_ID_X + dim)), this->getInt32Ty());
2756 }
2757
2758 template<bool preserveNames, typename T, typename Inserter>
CreateFrc(llvm::Value * V)2759 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFrc(llvm::Value* V)
2760 {
2761 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2762
2763 llvm::Function* frc =
2764 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_frc);
2765 return this->CreateCall(frc, V);
2766 }
2767
2768 template<bool preserveNames, typename T, typename Inserter>
CreateSin(llvm::Value * V)2769 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSin(llvm::Value* V)
2770 {
2771 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2772
2773 llvm::Function* sin =
2774 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sin, V->getType());
2775 return this->CreateCall(sin, V);
2776 }
2777
2778 template<bool preserveNames, typename T, typename Inserter>
CreateCos(llvm::Value * V)2779 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCos(llvm::Value* V)
2780 {
2781 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2782
2783 llvm::Function* cos =
2784 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::cos, V->getType());
2785 return this->CreateCall(cos, V);
2786 }
2787
2788 template<bool preserveNames, typename T, typename Inserter>
CreateSqrt(llvm::Value * V)2789 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSqrt(llvm::Value* V)
2790 {
2791 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2792
2793 llvm::Function* sqrt =
2794 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sqrt, V->getType());
2795 return this->CreateCall(sqrt, V);
2796 }
2797
2798 template<bool preserveNames, typename T, typename Inserter>
CreateFPow(llvm::Value * LHS,llvm::Value * RHS)2799 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFPow(llvm::Value *LHS, llvm::Value *RHS)
2800 {
2801 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2802
2803 llvm::Function* fpow =
2804 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, LHS->getType());
2805 return this->CreateCall2(fpow, LHS, RHS);
2806 }
2807
2808 template<bool preserveNames, typename T, typename Inserter>
CreateFMax(llvm::Value * LHS,llvm::Value * RHS)2809 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMax(llvm::Value *LHS, llvm::Value *RHS)
2810 {
2811 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2812
2813 llvm::Function* fmax =
2814 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::maxnum, LHS->getType());
2815 return this->CreateCall2(fmax, LHS, RHS);
2816 }
2817
2818 template<bool preserveNames, typename T, typename Inserter>
CreateFMin(llvm::Value * LHS,llvm::Value * RHS)2819 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMin(llvm::Value *LHS, llvm::Value *RHS)
2820 {
2821 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2822
2823 llvm::Function* fmin =
2824 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::minnum, LHS->getType());
2825 return this->CreateCall2(fmin, LHS, RHS);
2826 }
2827
2828 template<bool preserveNames, typename T, typename Inserter>
CreateIMulH(llvm::Value * LHS,llvm::Value * RHS)2829 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIMulH(llvm::Value *LHS, llvm::Value *RHS)
2830 {
2831 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2832
2833 llvm::Function* imulh =
2834 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_imulH, LHS->getType());
2835 return this->CreateCall2(imulh, LHS, RHS);
2836 }
2837
2838 template<bool preserveNames, typename T, typename Inserter>
CreateUMulH(llvm::Value * LHS,llvm::Value * RHS)2839 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateUMulH(llvm::Value *LHS, llvm::Value *RHS)
2840 {
2841 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2842
2843 llvm::Function* umulh =
2844 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_umulH, LHS->getType());
2845 return this->CreateCall2(umulh, LHS, RHS);
2846 }
2847
2848 template<bool preserveNames, typename T, typename Inserter>
CreateDiscard(llvm::Value * V)2849 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDiscard(llvm::Value* V)
2850 {
2851 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2852
2853 llvm::Function* discard =
2854 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
2855 return this->CreateCall(discard, V);
2856 }
2857
2858 template<bool preserveNames, typename T, typename Inserter>
CreateFLog(llvm::Value * V)2859 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFLog(llvm::Value *V)
2860 {
2861 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2862
2863 llvm::Function* flog =
2864 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::log2, V->getType());
2865 return this->CreateCall(flog, V);
2866 }
2867
2868 template<bool preserveNames, typename T, typename Inserter>
CreateFExp(llvm::Value * V)2869 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFExp(llvm::Value *V)
2870 {
2871 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2872
2873 llvm::Function* fexp =
2874 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::exp2, V->getType());
2875 return this->CreateCall(fexp, V);
2876 }
2877
2878 template<bool preserveNames, typename T, typename Inserter>
CreateDFloor(llvm::Value * src)2879 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDFloor(llvm::Value* src)
2880 {
2881 llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
2882 IGC_ASSERT(nullptr != mod);
2883 llvm::Function* func = mod->getFunction("__builtin_floor_f64");
2884 if (func != nullptr)
2885 {
2886 return this->CreateCall(func, src);
2887 }
2888
2889 // from OCL builtin: double @__builtin_spirv_floor_f64(double %x)
2890 static const char* const code =
2891 "define double @__builtin_floor_f64(double %x) \n"
2892 " alwaysinline nounwind readnone { \n"
2893 " %1 = bitcast double %x to i64 \n"
2894 " %2 = lshr i64 %1, 32 \n"
2895 " %3 = trunc i64 %2 to i32 \n"
2896 " %4 = lshr i64 %1, 52 \n"
2897 " %5 = trunc i64 %4 to i32 \n"
2898 " %6 = and i32 %5, 2047 \n"
2899 " %7 = sub nsw i32 1023, %6 \n"
2900 " %8 = add nsw i32 %7, 52 \n"
2901 " %9 = add nsw i32 %7, 20 \n"
2902 " %10 = icmp sgt i32 %8, 32 \n"
2903 " %11 = select i1 %10, i32 32, i32 %8 \n"
2904 " %12 = icmp sgt i32 %9, 20 \n"
2905 " %13 = select i1 %12, i32 20, i32 %9 \n"
2906 " %14 = icmp sgt i32 %11, 0 \n"
2907 " %15 = select i1 %14, i32 %11, i32 0 \n"
2908 " %16 = icmp sgt i32 %13, 0 \n"
2909 " %17 = select i1 %16, i32 %13, i32 0 \n"
2910 " %18 = and i32 %15, 31 \n"
2911 " %19 = shl i32 -1, %18 \n"
2912 " %20 = and i32 %17, 31 \n"
2913 " %21 = shl i32 -1, %20 \n"
2914 " %22 = icmp ne i32 %15, 32 \n"
2915 " %23 = select i1 %22, i32 %19, i32 0 \n"
2916 " %24 = icmp eq i32 %17, 32 \n"
2917 " %25 = icmp ult i32 %6, 1023 \n"
2918 " %or.cond.i = or i1 %25, %24 \n"
2919 " %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21 \n"
2920 " %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23 \n"
2921 " %26 = trunc i64 %1 to i32 \n"
2922 " %27 = and i32 %maskValLow32bit.0.i, %26 \n"
2923 " %28 = and i32 %maskValHigh32bit.0.i, %3 \n"
2924 " %29 = zext i32 %28 to i64 \n"
2925 " %30 = shl nuw i64 %29, 32 \n"
2926 " %31 = zext i32 %27 to i64 \n"
2927 " %32 = or i64 %30, %31 \n"
2928 " %33 = bitcast i64 %32 to double \n"
2929 " %34 = sub i64 %1, %32 \n"
2930 " %35 = lshr i64 %34, 32 \n"
2931 " %36 = or i64 %35, %34 \n"
2932 " %37 = trunc i64 %36 to i32 \n"
2933 " %38 = icmp eq i32 %37, 0 \n"
2934 " %39 = ashr i64 %1, 31 \n"
2935 " %.op = and i64 %39, -4616189618054758400 \n"
2936 " %40 = bitcast i64 %.op to double \n"
2937 " %41 = select i1 %38, double 0.000000e+00, double %40 \n"
2938 " %42 = fadd double %33, %41 \n"
2939 " ret double %42 \n"
2940 "}";
2941
2942 llvm::MemoryBufferRef codeBuf(code, "<string>");
2943 llvm::SMDiagnostic diagnostic;
2944 const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
2945 (void) failed;
2946 IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
2947
2948 func = mod->getFunction("__builtin_floor_f64");
2949 return this->CreateCall(func, src);
2950 }
2951
2952 template<bool preserveNames, typename T, typename Inserter>
CreateFloor(llvm::Value * V)2953 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFloor(llvm::Value *V)
2954 {
2955 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
2956
2957 if( V->getType() == this->getDoubleTy() )
2958 {
2959 return CreateDFloor(V);
2960 }
2961 else
2962 {
2963 llvm::Function* floor =
2964 llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::floor, V->getType() );
2965 return this->CreateCall( floor, V );
2966 }
2967 }
2968
2969 template<bool preserveNames, typename T, typename Inserter>
CreateDCeil(llvm::Value * src)2970 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDCeil(llvm::Value *src)
2971 {
2972 llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
2973 IGC_ASSERT(nullptr != mod);
2974 llvm::Function* func = mod->getFunction("__builtin_ceil_f64");
2975 if (func != nullptr)
2976 {
2977 return this->CreateCall(func, src);
2978 }
2979
2980 // from OCL builtin: double @__builtin_spirv_ceil_f64(double %x)
2981 static const char* const code =
2982 "define double @__builtin_ceil_f64(double %x) \n"
2983 " alwaysinline nounwind readnone { \n"
2984 " %1 = bitcast double %x to i64 \n"
2985 " %2 = lshr i64 %1, 32 \n"
2986 " %3 = trunc i64 %2 to i32 \n"
2987 " %4 = lshr i64 %1, 52 \n"
2988 " %5 = trunc i64 %4 to i32 \n"
2989 " %6 = and i32 %5, 2047 \n"
2990 " %7 = sub nsw i32 1023, %6 \n"
2991 " %8 = add nsw i32 %7, 52 \n"
2992 " %9 = add nsw i32 %7, 20 \n"
2993 " %10 = icmp sgt i32 %8, 32 \n"
2994 " %11 = select i1 %10, i32 32, i32 %8 \n"
2995 " %12 = icmp sgt i32 %9, 20 \n"
2996 " %13 = select i1 %12, i32 20, i32 %9 \n"
2997 " %14 = icmp sgt i32 %11, 0 \n"
2998 " %15 = select i1 %14, i32 %11, i32 0 \n"
2999 " %16 = icmp sgt i32 %13, 0 \n"
3000 " %17 = select i1 %16, i32 %13, i32 0 \n"
3001 " %18 = and i32 %15, 31 \n"
3002 " %19 = shl i32 -1, %18 \n"
3003 " %20 = and i32 %17, 31 \n"
3004 " %21 = shl i32 -1, %20 \n"
3005 " %22 = icmp ne i32 %15, 32 \n"
3006 " %23 = select i1 %22, i32 %19, i32 0 \n"
3007 " %24 = icmp eq i32 %17, 32 \n"
3008 " %25 = icmp ult i32 %6, 1023 \n"
3009 " %or.cond.i = or i1 %25, %24 \n"
3010 " %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21 \n"
3011 " %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23 \n"
3012 " %26 = trunc i64 %1 to i32 \n"
3013 " %27 = and i32 %maskValLow32bit.0.i, %26 \n"
3014 " %28 = and i32 %maskValHigh32bit.0.i, %3 \n"
3015 " %29 = zext i32 %28 to i64 \n"
3016 " %30 = shl nuw i64 %29, 32 \n"
3017 " %31 = zext i32 %27 to i64 \n"
3018 " %32 = or i64 %30, %31 \n"
3019 " %33 = bitcast i64 %32 to double \n"
3020 " %34 = sub i64 %1, %32 \n"
3021 " %35 = lshr i64 %34, 32 \n"
3022 " %36 = or i64 %35, %34 \n"
3023 " %37 = trunc i64 %36 to i32 \n"
3024 " %38 = icmp eq i32 %37, 0 \n"
3025 " %39 = ashr i64 %1, 31 \n"
3026 " %40 = and i64 %39, -4607182418800017408 \n"
3027 " %.op = add nsw i64 %40, 4607182418800017408 \n"
3028 " %41 = bitcast i64 %.op to double \n"
3029 " %42 = select i1 %38, double 0.000000e+00, double %41 \n"
3030 " %43 = fadd double %33, %42 \n"
3031 " ret double %43 \n"
3032 "}";
3033
3034 llvm::MemoryBufferRef codeBuf(code, "<string>");
3035 llvm::SMDiagnostic diagnostic;
3036 const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3037 (void) failed;
3038 IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3039
3040 func = mod->getFunction("__builtin_ceil_f64");
3041
3042 return this->CreateCall(func, src);
3043 }
3044
3045 template<bool preserveNames, typename T, typename Inserter>
CreateCeil(llvm::Value * V)3046 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCeil(llvm::Value *V)
3047 {
3048 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3049
3050 if( V->getType() == this->getDoubleTy() )
3051 {
3052 return CreateDCeil(V);
3053 }
3054 else
3055 {
3056 llvm::Function* ceil =
3057 llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::ceil, V->getType() );
3058 return this->CreateCall( ceil, V );
3059 }
3060 }
3061
3062 template<bool preserveNames, typename T, typename Inserter>
CreateDTrunc(llvm::Value * src)3063 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDTrunc(llvm::Value *src)
3064 {
3065 llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
3066 IGC_ASSERT(nullptr != mod);
3067 llvm::Function* func = mod->getFunction("__builtin_trunc_f64");
3068 if (func != nullptr)
3069 {
3070 return this->CreateCall(func, src);
3071 }
3072
3073 // from OCL builtin: double @__builtin_spirv_trunc_f64(double %x)
3074 static const char* const code =
3075 "define double @__builtin_trunc_f64(double %x) \n"
3076 " alwaysinline nounwind readnone { \n"
3077 " %1 = bitcast double %x to i64 \n"
3078 " %2 = lshr i64 %1, 32 \n"
3079 " %3 = trunc i64 %2 to i32 \n"
3080 " %4 = lshr i64 %1, 52 \n"
3081 " %5 = trunc i64 %4 to i32 \n"
3082 " %6 = and i32 %5, 2047 \n"
3083 " %7 = sub nsw i32 1023, %6 \n"
3084 " %8 = add nsw i32 %7, 52 \n"
3085 " %9 = add nsw i32 %7, 20 \n"
3086 " %10 = icmp sgt i32 %8, 32 \n"
3087 " %11 = select i1 %10, i32 32, i32 %8 \n"
3088 " %12 = icmp sgt i32 %9, 20 \n"
3089 " %13 = select i1 %12, i32 20, i32 %9 \n"
3090 " %14 = icmp sgt i32 %11, 0 \n"
3091 " %15 = select i1 %14, i32 %11, i32 0 \n"
3092 " %16 = icmp sgt i32 %13, 0 \n"
3093 " %17 = select i1 %16, i32 %13, i32 0 \n"
3094 " %18 = and i32 %15, 31 \n"
3095 " %19 = shl i32 -1, %18 \n"
3096 " %20 = and i32 %17, 31 \n"
3097 " %21 = shl i32 -1, %20 \n"
3098 " %22 = icmp ne i32 %15, 32 \n"
3099 " %23 = select i1 %22, i32 %19, i32 0 \n"
3100 " %24 = icmp eq i32 %17, 32 \n"
3101 " %25 = icmp ult i32 %6, 1023 \n"
3102 " %or.cond = or i1 %25, %24 \n"
3103 " %maskValHigh32bit.0 = select i1 %or.cond, i32 -2147483648, i32 %21 \n"
3104 " %maskValLow32bit.0 = select i1 %or.cond, i32 0, i32 %23 \n"
3105 " %26 = trunc i64 %1 to i32 \n"
3106 " %27 = and i32 %maskValLow32bit.0, %26 \n"
3107 " %28 = and i32 %maskValHigh32bit.0, %3 \n"
3108 " %29 = zext i32 %28 to i64 \n"
3109 " %30 = shl nuw i64 %29, 32 \n"
3110 " %31 = zext i32 %27 to i64 \n"
3111 " %32 = or i64 %30, %31 \n"
3112 " %33 = bitcast i64 %32 to double \n"
3113 " ret double %33 \n"
3114 "}";
3115
3116 llvm::MemoryBufferRef codeBuf(code, "<string>");
3117 llvm::SMDiagnostic diagnostic;
3118 const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3119 (void) failed;
3120 IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3121
3122 func = mod->getFunction("__builtin_trunc_f64");
3123
3124 return this->CreateCall(func, src);
3125 }
3126
3127 template<bool preserveNames, typename T, typename Inserter>
CreateRoundZ(llvm::Value * V)3128 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundZ(llvm::Value *V)
3129 {
3130 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3131
3132 if (V->getType() == this->getDoubleTy())
3133 {
3134 return CreateDTrunc(V);
3135 }
3136 else
3137 {
3138 llvm::Function* trunc =
3139 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trunc, V->getType());
3140 return this->CreateCall(trunc, V);
3141 }
3142 }
3143
3144 template<bool preserveNames, typename T, typename Inserter>
CreateDRoundNE(llvm::Value * src)3145 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDRoundNE(llvm::Value *src)
3146 {
3147 llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
3148 IGC_ASSERT(nullptr != mod);
3149 llvm::Function* func = mod->getFunction("__builtin_roundne_f64");
3150 if (func != nullptr)
3151 {
3152 return this->CreateCall(func, src);
3153 }
3154
3155 // From OCL builtin: double @__builtin_spirv_rint_f64(double %x)
3156 static const char* const code =
3157 "define double @__builtin_roundne_f64(double %x) \n"
3158 " alwaysinline nounwind readnone { \n"
3159 " %1 = bitcast double %x to i64 \n"
3160 " %2 = and i64 %1, 9223372036854775807 \n"
3161 " %3 = bitcast i64 %2 to double \n"
3162 " %4 = lshr i64 %2, 52 \n"
3163 " %5 = trunc i64 %4 to i32 \n"
3164 " %6 = icmp ult i32 %5, 1075 \n"
3165 " %7 = zext i1 %6 to i32 \n"
3166 " %8 = sitofp i32 %7 to double \n"
3167 " %9 = fmul double %8, 5.000000e-01 \n"
3168 " %10 = fadd double %3, %9 \n"
3169 " %11 = bitcast double %10 to i64 \n"
3170 " %12 = lshr i64 %11, 32 \n"
3171 " %13 = trunc i64 %12 to i32 \n"
3172 " %14 = lshr i64 %11, 52 \n"
3173 " %15 = trunc i64 %14 to i32 \n"
3174 " %16 = and i32 %15, 2047 \n"
3175 " %17 = sub nsw i32 1023, %16 \n"
3176 " %18 = add nsw i32 %17, 52 \n"
3177 " %19 = add nsw i32 %17, 20 \n"
3178 " %20 = icmp sgt i32 %18, 32 \n"
3179 " %21 = select i1 %20, i32 32, i32 %18 \n"
3180 " %22 = icmp sgt i32 %19, 20 \n"
3181 " %23 = select i1 %22, i32 20, i32 %19 \n"
3182 " %24 = icmp sgt i32 %21, 0 \n"
3183 " %25 = select i1 %24, i32 %21, i32 0 \n"
3184 " %26 = icmp sgt i32 %23, 0 \n"
3185 " %27 = select i1 %26, i32 %23, i32 0 \n"
3186 " %28 = and i32 %25, 31 \n"
3187 " %29 = shl i32 -1, %28 \n"
3188 " %30 = and i32 %27, 31 \n"
3189 " %31 = shl i32 -1, %30 \n"
3190 " %32 = icmp ne i32 %25, 32 \n"
3191 " %33 = select i1 %32, i32 %29, i32 0 \n"
3192 " %34 = icmp eq i32 %27, 32 \n"
3193 " %35 = icmp ult i32 %16, 1023 \n"
3194 " %or.cond.i = or i1 %35, %34 \n"
3195 " %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %31 \n"
3196 " %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %33 \n"
3197 " %36 = trunc i64 %11 to i32 \n"
3198 " %37 = and i32 %maskValLow32bit.0.i, %36 \n"
3199 " %38 = and i32 %maskValHigh32bit.0.i, %13 \n"
3200 " %39 = zext i32 %38 to i64 \n"
3201 " %40 = shl nuw i64 %39, 32 \n"
3202 " %41 = zext i32 %37 to i64 \n"
3203 " %42 = or i64 %40, %41 \n"
3204 " %43 = bitcast i64 %42 to double \n"
3205 " %44 = fptoui double %43 to i64 \n"
3206 " %.tr = trunc i64 %44 to i32 \n"
3207 " %45 = fsub double %43, %3 \n"
3208 " %46 = fcmp oeq double %45, 5.000000e-01 \n"
3209 " %47 = zext i1 %46 to i32 \n"
3210 " %48 = and i32 %.tr, %47 \n"
3211 " %49 = uitofp i32 %48 to double \n"
3212 " %50 = fsub double %43, %49 \n"
3213 " %51 = and i64 %1, -9223372036854775808 \n"
3214 " %52 = bitcast double %50 to i64 \n"
3215 " %53 = or i64 %52, %51 \n"
3216 " %54 = bitcast i64 %53 to double \n"
3217 " ret double %54 \n"
3218 "}";
3219
3220 llvm::MemoryBufferRef codeBuf(code, "<string>");
3221 llvm::SMDiagnostic diagnostic;
3222 const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
3223 (void) failed;
3224 IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");
3225
3226 func = mod->getFunction("__builtin_roundne_f64");
3227
3228 return this->CreateCall(func, src);
3229 }
3230
3231 template<bool preserveNames, typename T, typename Inserter>
CreateRoundNE(llvm::Value * V)3232 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundNE(llvm::Value *V)
3233 {
3234 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3235
3236 if( V->getType() == this->getDoubleTy() )
3237 {
3238 return CreateDRoundNE(V);
3239 }
3240 else if( V->getType() == this->getHalfTy() )
3241 {
3242 V = this->CreateFPExt(V, this->getFloatTy());
3243 llvm::Function* roundne =
3244 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ROUNDNE);
3245 V = this->CreateCall(roundne, V);
3246 return this->CreateFPTrunc(V, this->getHalfTy());
3247 }
3248 else
3249 {
3250 llvm::Function* roundne =
3251 llvm::GenISAIntrinsic::getDeclaration( module, llvm::GenISAIntrinsic::GenISA_ROUNDNE );
3252 return this->CreateCall( roundne, V );
3253 }
3254 }
3255
3256 template<bool preserveNames, typename T, typename Inserter>
CreateIsNan(llvm::Value * V)3257 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIsNan(llvm::Value* V)
3258 {
3259 //fcmp_uno yields true if either operand is a QNAN. Since we compare the same numer with itself.
3260 //If V is not NAN it will return false
3261 return this->CreateFCmp(llvm::FCmpInst::FCMP_UNO, V, V);
3262 }
3263
3264
3265 template<bool preserveNames, typename T, typename Inserter>
CreateCtpop(llvm::Value * V)3266 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCtpop(llvm::Value *V)
3267 {
3268 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3269
3270 llvm::Function* ctpop =
3271 llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::ctpop, V->getType());
3272 return this->CreateCall(ctpop, V);
3273 }
3274
3275 template<bool preserveNames, typename T, typename Inserter>
getHalf(float f)3276 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getHalf(float f)
3277 {
3278 return llvm::ConstantFP::get(this->getHalfTy(), f);
3279 }
3280
3281 template<bool preserveNames, typename T, typename Inserter>
getFloat(float f)3282 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFloat(float f)
3283 {
3284 return llvm::ConstantFP::get(this->getFloatTy(), f);
3285 }
3286
3287 template<bool preserveNames, typename T, typename Inserter>
getDouble(double d)3288 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getDouble(double d)
3289 {
3290 return llvm::ConstantFP::get(this->getDoubleTy(), d);
3291 }
3292
3293 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTX(llvm::Value * V)3294 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX(llvm::Value *V)
3295 {
3296 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3297
3298 llvm::Function* floor =
3299 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientX,
3300 V->getType());
3301 return this->CreateCall(floor, V);
3302 }
3303
3304 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTX_Fine(llvm::Value * V)3305 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX_Fine(llvm::Value *V)
3306 {
3307 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3308
3309 llvm::Function* floor =
3310 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientXfine, V->getType());
3311 return this->CreateCall(floor, V);
3312 }
3313
3314 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTY(llvm::Value * V)3315 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY(llvm::Value *V)
3316 {
3317 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3318
3319 llvm::Function* floor =
3320 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientY,
3321 V->getType());
3322 return this->CreateCall(floor, V);
3323 }
3324
3325 template<bool preserveNames, typename T, typename Inserter>
CreateDeriveRTY_Fine(llvm::Value * V)3326 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY_Fine(llvm::Value *V)
3327 {
3328 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3329
3330 llvm::Function* floor =
3331 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientYfine, V->getType());
3332 return this->CreateCall(floor, V);
3333 }
3334
3335 template<bool preserveNames, typename T, typename Inserter>
Create_MAD_Scalar(llvm::Value * float_src0,llvm::Value * float_src1,llvm::Value * float_src2)3336 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MAD_Scalar(llvm::Value* float_src0, llvm::Value* float_src1, llvm::Value* float_src2)
3337 {
3338 llvm::Module* const module = this->GetInsertBlock()->getParent()->getParent();
3339 IGC_ASSERT(nullptr != module);
3340 IGC_ASSERT(nullptr != float_src0);
3341
3342 // Builtin Signature: float (float, float, float)
3343 IGC_ASSERT_MESSAGE((float_src0->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src0->getType() == this->getFloatTy() || float_src0->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 0");
3344 IGC_ASSERT_MESSAGE((float_src1->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src1->getType() == this->getFloatTy() || float_src1->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 1");
3345 IGC_ASSERT_MESSAGE((float_src2->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src2->getType() == this->getFloatTy() || float_src2->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 2");
3346
3347 llvm::Function* madFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fma, float_src0->getType());
3348 llvm::Value* args[] = { float_src0, float_src1, float_src2 };
3349 llvm::Value* float_madres_s = this->CreateCall(madFunc, args);
3350
3351 return float_madres_s;
3352 }
3353
3354 template<bool preserveNames, typename T, typename Inserter>
CreatePow(llvm::Value * src0,llvm::Value * src1)3355 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePow(llvm::Value* src0, llvm::Value* src1)
3356 {
3357 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3358 llvm::Function* powFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, src0->getType());
3359 llvm::Value* args[] = { src0, src1 };
3360 llvm::Value* powres_s = this->CreateCall(powFunc, args);
3361
3362 return powres_s;
3363 }
3364
3365 template<bool preserveNames, typename T, typename Inserter>
Create_SAMPLEBC(llvm::Value * float_ref_value,llvm::Value * bias_value,llvm::Value * address_u,llvm::Value * address_v,llvm::Value * address_r,llvm::Value * address_ai,llvm::Value * int32_textureIdx,llvm::Value * int32_sampler,llvm::Value * int32_offsetU,llvm::Value * int32_offsetV,llvm::Value * int32_offsetW,llvm::Type * returnType)3366 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEBC(
3367 llvm::Value* float_ref_value,
3368 llvm::Value* bias_value,
3369 llvm::Value* address_u,
3370 llvm::Value* address_v,
3371 llvm::Value* address_r,
3372 llvm::Value* address_ai,
3373 llvm::Value* int32_textureIdx,
3374 llvm::Value* int32_sampler,
3375 llvm::Value* int32_offsetU,
3376 llvm::Value* int32_offsetV,
3377 llvm::Value* int32_offsetW,
3378 llvm::Type* returnType)
3379 {
3380 llvm::Value * packed_tex_params[] = {
3381 float_ref_value,
3382 bias_value,
3383 address_u,
3384 address_v,
3385 address_r,
3386 address_ai,
3387 int32_textureIdx,
3388 int32_sampler,
3389 int32_offsetU,
3390 int32_offsetV,
3391 int32_offsetW
3392 };
3393
3394 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3395
3396 llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
3397 llvm::Type* types[] = {
3398 IGCLLVM::FixedVectorType::get(dstType, 4),
3399 float_ref_value->getType(),
3400 int32_textureIdx->getType(),
3401 int32_sampler->getType()
3402 };
3403 llvm::Function* func_llvm_GenISA_sampleBCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
3404 (module, llvm::GenISAIntrinsic::GenISA_sampleBCptr, types);
3405
3406 llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleBCptr_v4f32_f32, packed_tex_params);
3407 return packed_tex_call;
3408 }
3409
3410 template<bool preserveNames, typename T, typename Inserter>
CreateEvalSampleIndex(llvm::Value * inputIndex,llvm::Value * sampleIndex,llvm::Value * perspective)3411 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSampleIndex(
3412 llvm::Value* inputIndex,
3413 llvm::Value* sampleIndex,
3414 llvm::Value* perspective)
3415 {
3416 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3417
3418 llvm::Function* pullBarys =
3419 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSampleIndexBarys);
3420 llvm::Value* bary = this->CreateCall2(pullBarys, sampleIndex, perspective);
3421 llvm::Function* interpolate =
3422 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
3423 return this->CreateCall2(interpolate, inputIndex, bary);
3424 }
3425
3426
3427 template<bool preserveNames, typename T, typename Inserter>
CreateEvalSnapped(llvm::Value * inputIndex,llvm::Value * xOffset,llvm::Value * yOffset,llvm::Value * perspective)3428 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSnapped(
3429 llvm::Value* inputIndex,
3430 llvm::Value* xOffset,
3431 llvm::Value* yOffset,
3432 llvm::Value* perspective)
3433 {
3434 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3435
3436 llvm::Function* pullBarys =
3437 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSnappedBarys);
3438 llvm::Value* bary = this->CreateCall3(pullBarys, xOffset, yOffset, perspective);
3439 llvm::Function* interpolate =
3440 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
3441 return this->CreateCall2(interpolate, inputIndex, bary);
3442 }
3443
3444 template<bool preserveNames, typename T, typename Inserter>
CreateSetStream(llvm::Value * StreamId,llvm::Value * emitCount)3445 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSetStream(
3446 llvm::Value* StreamId, llvm::Value* emitCount)
3447 {
3448 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3449 llvm::Function* fn =
3450 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_SetStream);
3451 return this->CreateCall2(fn, StreamId, emitCount);
3452 }
3453
3454 template<bool preserveNames, typename T, typename Inserter>
CreateEndPrimitive(llvm::Value * emitCount)3455 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEndPrimitive(
3456 llvm::Value* emitCount)
3457 {
3458 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3459 llvm::Function* fn =
3460 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_EndPrimitive);
3461 return this->CreateCall(fn, emitCount);
3462 }
3463
3464 template<bool preserveNames, typename T, typename Inserter>
CreateControlPointId()3465 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateControlPointId()
3466 {
3467 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3468 llvm::Function* fn =
3469 llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_DCL_HSControlPointID);
3470 return this->CreateCall(fn);
3471 }
3472
3473 template<bool preserveNames, typename T, typename Inserter>
CreatePrimitiveID()3474 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePrimitiveID()
3475 {
3476 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3477 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3478 module,
3479 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3480 this->getFloatTy());
3481 return this->CreateBitCast(
3482 this->CreateCall(pFunc, this->getInt32(IGC::PRIMITIVEID)), this->getInt32Ty());
3483 }
3484
3485 template<bool preserveNames, typename T, typename Inserter>
CreateInstanceID()3486 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateInstanceID()
3487 {
3488 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3489 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3490 module,
3491 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3492 this->getFloatTy());
3493 return this->CreateBitCast(
3494 this->CreateCall(pFunc, this->getInt32(IGC::GS_INSTANCEID)), this->getInt32Ty());
3495 }
3496
3497 template<bool preserveNames, typename T, typename Inserter>
CreateSampleIndex()3498 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSampleIndex()
3499 {
3500 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3501 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3502 module,
3503 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3504 this->getFloatTy());
3505 return this->CreateBitCast(
3506 this->CreateCall(pFunc, this->getInt32(IGC::SAMPLEINDEX)), this->getInt32Ty());
3507 }
3508
3509 template<bool preserveNames, typename T, typename Inserter>
CreateCoverage()3510 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCoverage()
3511 {
3512 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3513 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3514 module,
3515 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3516 this->getFloatTy());
3517 return this->CreateBitCast(
3518 this->CreateCall(pFunc, this->getInt32(IGC::INPUT_COVERAGE_MASK)), this->getInt32Ty());
3519 }
3520
3521
3522 template<bool preserveNames, typename T, typename Inserter>
CreateDomainPointInput(unsigned int dim)3523 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDomainPointInput(unsigned int dim)
3524 {
3525 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3526 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3527 module,
3528 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
3529 this->getFloatTy());
3530 return this->CreateCall(pFunc, this->getInt32(IGC::DOMAIN_POINT_ID_X + dim));
3531 }
3532
3533 template<bool preserveNames, typename T, typename Inserter>
create_inputVecF32(llvm::Value * inputIndex,llvm::Value * interpolationMode)3534 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_inputVecF32(llvm::Value* inputIndex, llvm::Value* interpolationMode)
3535 {
3536 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3537 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3538 module,
3539 llvm::GenISAIntrinsic::GenISA_DCL_inputVec,
3540 this->getFloatTy());
3541 return this->CreateCall2(pFunc, inputIndex, interpolationMode);
3542 }
3543
3544 template<bool preserveNames, typename T, typename Inserter>
create_discard(llvm::Value * condition)3545 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_discard(llvm::Value* condition)
3546 {
3547 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3548 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
3549 return this->CreateCall(pFunc, condition);
3550 }
3551
3552 template<bool preserveNames, typename T, typename Inserter>
create_runtime(llvm::Value * offset)3553 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_runtime(llvm::Value* offset)
3554 {
3555 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3556 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_RuntimeValue);
3557 return this->CreateCall(pFunc, offset);
3558 }
3559
3560 template<bool preserveNames, typename T, typename Inserter>
create_uavSerializeAll()3561 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_uavSerializeAll()
3562 {
3563 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3564 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_uavSerializeAll);
3565 return this->CreateCall(pFunc);
3566 }
3567
3568 template<bool preserveNames, typename T, typename Inserter>
create_countbits(llvm::Value * src)3569 inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::create_countbits(llvm::Value* src)
3570 {
3571 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3572 llvm::Function* pFunc = llvm::Intrinsic::getDeclaration(
3573 module,
3574 llvm::Intrinsic::ctpop,
3575 this->getInt32Ty());
3576 return this->CreateCall(pFunc, src);
3577 }
3578
3579 template<bool preserveNames, typename T, typename Inserter>
3580 inline llvm::Value*
create_waveInverseBallot(llvm::Value * src)3581 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveInverseBallot(
3582 llvm::Value* src)
3583 {
3584 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3585 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3586 module,
3587 llvm::GenISAIntrinsic::GenISA_WaveInverseBallot);
3588 return this->CreateCall(pFunc, src);
3589 }
3590
3591 template<bool preserveNames, typename T, typename Inserter>
create_waveBallot(llvm::Value * src)3592 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveBallot(llvm::Value* src)
3593 {
3594 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3595 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3596 module,
3597 llvm::GenISAIntrinsic::GenISA_WaveBallot);
3598 return this->CreateCall(pFunc, src);
3599 }
3600
3601 template<bool preserveNames, typename T, typename Inserter>
create_waveshuffleIndex(llvm::Value * src,llvm::Value * index,llvm::Value * helperLaneMode)3602 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveshuffleIndex(llvm::Value* src, llvm::Value* index, llvm::Value* helperLaneMode)
3603 {
3604 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3605 llvm::Type* srcType = src->getType();
3606 if (srcType == this->getInt1Ty())
3607 {
3608 src = this->CreateZExt(src, this->getInt32Ty());
3609 }
3610 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3611 module,
3612 llvm::GenISAIntrinsic::GenISA_WaveShuffleIndex,
3613 src->getType());
3614 llvm::Value* retVal = this->CreateCall3(pFunc, src, index, (helperLaneMode ? helperLaneMode : this->getInt32(0)));
3615 if (srcType == this->getInt1Ty())
3616 {
3617 retVal = this->CreateTrunc(retVal, srcType);
3618 }
3619 return retVal;
3620 }
3621
3622 template<bool preserveNames, typename T, typename Inserter>
create_waveAll(llvm::Value * src,llvm::Value * type)3623 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveAll(llvm::Value* src, llvm::Value* type)
3624 {
3625 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3626 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3627 module,
3628 llvm::GenISAIntrinsic::GenISA_WaveAll,
3629 src->getType());
3630 return this->CreateCall2(pFunc, src, type);
3631 }
3632
3633 template<bool preserveNames, typename T, typename Inserter>
create_wavePrefix(llvm::Value * src,llvm::Value * type,bool inclusive,llvm::Value * Mask)3634 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefix(
3635 llvm::Value* src, llvm::Value* type, bool inclusive, llvm::Value *Mask)
3636 {
3637 // If a nullptr is passed in for 'Mask' (as is the default), just include
3638 // all lanes.
3639 Mask = Mask ? Mask : this->getInt1(true);
3640
3641 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3642 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3643 module,
3644 llvm::GenISAIntrinsic::GenISA_WavePrefix,
3645 src->getType());
3646 return this->CreateCall4(pFunc, src, type, this->getInt1(inclusive), Mask);
3647 }
3648
3649 // We currently use the combination of 'convergent' and
3650 // 'inaccessiblememonly' to prevent code motion of
3651 // wave intrinsics. Removing 'readnone' from a callsite
3652 // is not sufficient to stop LICM from looking back up to the
3653 // function definition for the attribute. We can short circuit that
3654 // by creating an operand bundle. The name "nohoist" is not
3655 // significant; anything will do.
setUnsafeToHoistAttr(llvm::CallInst * CI)3656 inline llvm::CallInst* setUnsafeToHoistAttr(llvm::CallInst *CI)
3657 {
3658 CI->setConvergent();
3659 #if LLVM_VERSION_MAJOR >= 7
3660 CI->setOnlyAccessesInaccessibleMemory();
3661 CI->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone);
3662 #else
3663 CI->addAttribute(
3664 llvm::AttributeSet::FunctionIndex, llvm::Attribute::InaccessibleMemOnly);
3665 CI->removeAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ReadNone);
3666 #endif
3667 llvm::OperandBundleDef OpDef("nohoist", llvm::None);
3668
3669 // An operand bundle cannot be appended onto a call after creation.
3670 // clone the instruction but add our operandbundle on as well.
3671 llvm::SmallVector<llvm::OperandBundleDef, 1> OpBundles;
3672 CI->getOperandBundlesAsDefs(OpBundles);
3673 OpBundles.push_back(OpDef);
3674 llvm::CallInst *NewCall = llvm::CallInst::Create(CI, OpBundles, CI);
3675 CI->replaceAllUsesWith(NewCall);
3676 return NewCall;
3677 }
3678
3679 template<bool preserveNames, typename T, typename Inserter>
3680 inline llvm::Value*
create_wavePrefixBitCount(llvm::Value * src,llvm::Value * Mask)3681 LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefixBitCount(
3682 llvm::Value* src, llvm::Value *Mask)
3683 {
3684 //bits = ballot(bBit);
3685 //laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
3686 //prefixBitCount = countbits(bits & laneMaskLT);
3687 llvm::Value* ballot = this->create_waveBallot(src);
3688 if (Mask)
3689 ballot = this->CreateAnd(ballot, Mask);
3690 llvm::Value* shlLaneId = this->CreateShl(
3691 this->getInt32(1), this->get32BitLaneID());
3692 llvm::Value* laneMask = this->CreateSub(shlLaneId, this->getInt32(1));
3693 llvm::Value *mask = this->CreateAnd(ballot, laneMask);
3694
3695 // update llvm.ctpop so it won't be hoisted/sunk out of the loop.
3696 auto *PopCnt = this->create_countbits(mask);
3697 auto *NoHoistPopCnt = setUnsafeToHoistAttr(PopCnt);
3698 PopCnt->eraseFromParent();
3699 return NoHoistPopCnt;
3700 }
3701
3702 template<bool preserveNames, typename T, typename Inserter>
create_waveMatch(llvm::Instruction * inst,llvm::Value * src)3703 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMatch(
3704 llvm::Instruction *inst,
3705 llvm::Value *src)
3706 {
3707
3708 // Note that we will stay in the loop above as long as there is at least
3709 // one active lane remaining.
3710
3711 // We will split the basic blocks twice. The first will create a
3712 // pre-header for the loop code. The second will separate the WaveMatch
3713 // from code after it so it can be broken down into a sequence of
3714 // instructions and then branch to the remaining code when done.
3715
3716 auto *PreHeader = inst->getParent();
3717 auto *BodyBlock = PreHeader->splitBasicBlock(inst, "wavematch-body");
3718 auto *EndBlock = BodyBlock->splitBasicBlock(
3719 inst->getNextNode(), "wavematch-end");
3720
3721 // Make sure that we set the insert point again as we've just invalidated
3722 // it with the splitBasicBlock() calls above.
3723 this->SetInsertPoint(inst);
3724
3725 // Now generate the code for a single iteration of the code
3726 auto *FirstValue = this->readFirstLane(src);
3727 llvm::Value *CmpRes = nullptr;
3728 if (src->getType()->isFloatingPointTy())
3729 CmpRes = this->CreateFCmpOEQ(FirstValue, src);
3730 else
3731 CmpRes = this->CreateICmpEQ(FirstValue, src);
3732
3733 auto *Mask = this->create_waveBallot(CmpRes);
3734
3735 // Replace the current terminator to either exit the loop
3736 // or branch back for another iteration.
3737 auto *Br = BodyBlock->getTerminator();
3738 this->SetInsertPoint(Br);
3739 this->CreateCondBr(CmpRes, EndBlock, BodyBlock);
3740 Br->eraseFromParent();
3741
3742 // Now, gather up the output struct outside of the loop
3743 this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3744
3745 return Mask;
3746 }
3747
3748 template<bool preserveNames, typename T, typename Inserter>
3749 inline llvm::Value*
create_waveMultiPrefix(llvm::Instruction * I,llvm::Value * Val,llvm::Value * Mask,IGC::WaveOps OpKind)3750 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefix(
3751 llvm::Instruction *I,
3752 llvm::Value *Val,
3753 llvm::Value *Mask,
3754 IGC::WaveOps OpKind)
3755 {
3756 // This implementation is similar create_waveMatch() in that we loop
3757 // until all subsets of lanes are processed.
3758 auto *PreHeader = I->getParent();
3759 auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefix-body");
3760 auto *EndBlock = BodyBlock->splitBasicBlock(
3761 I->getNextNode(), "multiprefix-end");
3762
3763 // Make sure that we set the insert point again as we've just invalidated
3764 // it with the splitBasicBlock() calls above.
3765 this->SetInsertPoint(I);
3766
3767 // Now generate the code for a single iteration of the code
3768 auto *FirstValue = this->readFirstLane(Mask);
3769 auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);
3770
3771 auto *WavePrefix = this->create_wavePrefix(
3772 Val, this->getInt8((uint8_t)OpKind), false, ParticipatingLanes);
3773
3774 // Replace the current terminator to either exit the loop
3775 // or branch back for another iteration.
3776 auto *Br = BodyBlock->getTerminator();
3777 this->SetInsertPoint(Br);
3778 this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
3779 Br->eraseFromParent();
3780
3781 this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3782
3783 return WavePrefix;
3784 }
3785
3786 template<bool preserveNames, typename T, typename Inserter>
3787 inline llvm::Value*
create_waveMultiPrefixBitCount(llvm::Instruction * I,llvm::Value * Val,llvm::Value * Mask)3788 LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefixBitCount(
3789 llvm::Instruction *I,
3790 llvm::Value *Val,
3791 llvm::Value *Mask)
3792 {
3793 // Similar structure to waveMatch and waveMultiPrefix
3794 auto *PreHeader = I->getParent();
3795 auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefixbitcount-body");
3796 auto *EndBlock = BodyBlock->splitBasicBlock(
3797 I->getNextNode(), "multiprefixbitcount-end");
3798
3799 // Make sure that we set the insert point again as we've just invalidated
3800 // it with the splitBasicBlock() calls above.
3801 this->SetInsertPoint(I);
3802
3803 // Now generate the code for a single iteration of the code
3804 auto *FirstValue = this->readFirstLane(Mask);
3805
3806 auto *Count = this->create_wavePrefixBitCount(Val, FirstValue);
3807
3808 // Replace the current terminator to either exit the loop
3809 // or branch back for another iteration.
3810 auto *Br = BodyBlock->getTerminator();
3811 this->SetInsertPoint(Br);
3812 auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);
3813 this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
3814 Br->eraseFromParent();
3815
3816 this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());
3817
3818 return Count;
3819 }
3820
3821 template<bool preserveNames, typename T, typename Inserter>
create_quadPrefix(llvm::Value * src,llvm::Value * type,bool inclusive)3822 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_quadPrefix(llvm::Value* src, llvm::Value* type, bool inclusive)
3823 {
3824 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3825 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3826 module,
3827 llvm::GenISAIntrinsic::GenISA_QuadPrefix,
3828 src->getType());
3829 return this->CreateCall3(pFunc, src, type, this->getInt1(inclusive));
3830 }
3831
3832 template<bool preserveNames, typename T, typename Inserter>
get32BitLaneID()3833 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::get32BitLaneID()
3834 {
3835 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3836 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3837 module,
3838 llvm::GenISAIntrinsic::GenISA_simdLaneId);
3839 llvm::Value* int16LaneId = this->CreateCall(pFunc);
3840 return this->CreateZExt(int16LaneId, this->getInt32Ty());
3841 }
3842
3843 template<bool preserveNames, typename T, typename Inserter>
getSimdSize()3844 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getSimdSize()
3845 {
3846 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3847 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_simdSize);
3848 return this->CreateCall(pFunc);
3849 }
3850
3851 template<bool preserveNames, typename T, typename Inserter>
getFirstLaneID()3852 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFirstLaneID()
3853 {
3854 //fbl(WaveBallot(true))
3855 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
3856 llvm::Value* ballot = this->create_waveBallot(this->getInt1(1));
3857 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
3858 module,
3859 llvm::GenISAIntrinsic::GenISA_firstbitLo);
3860 return this->CreateCall(pFunc, ballot);
3861 }
3862
3863 template<bool preserveNames, typename T, typename Inserter>
readFirstLane(llvm::Value * src)3864 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::readFirstLane(llvm::Value* src)
3865 {
3866 llvm::Value* firstLaneID = this->getFirstLaneID();
3867 return this->create_waveshuffleIndex(src, firstLaneID);
3868 }
3869
3870 ///////////////////////////////////////////////////////////////////////////////
3871 /// @brief Creates data conversion for typed image reads.
3872 /// Gen HW has supports only limited number of surface formats through data
3873 /// port data cache typed read messages. Complete lists of formats supported
3874 /// for read is available in Programmer's Reference Manual.
3875 /// Some of the unsupported formats are mandatory in Vulkan and OGL.
3876 /// In order to support these formats the driver and the compiler implement the
3877 /// following emulation:
3878 /// Since Gen9 HW typed read messages return raw data when reading from an
3879 /// unsupported format. It's enough to call the conversion method
3880 /// CreateImageDataConversion() using data returned from typed read messages.
3881 ///
3882 /// @param format Surface format of the typed image (original i.e. from shader)
3883 /// @param data Data returned by typed read message
3884 /// @returns llvm::Value* Vector of data converted to the input surface format.
3885 ///
3886 template<bool preserveNames, typename T, typename Inserter>
3887 inline
CreateImageDataConversion(IGC::SURFACE_FORMAT format,llvm::Value * data)3888 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateImageDataConversion(
3889 IGC::SURFACE_FORMAT format,
3890 llvm::Value* data)
3891 {
3892 IGC_ASSERT(nullptr != m_Platform);
3893
3894 switch (format)
3895 {
3896 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
3897 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
3898 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
3899 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
3900 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
3901 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
3902 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
3903 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
3904 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
3905 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
3906 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
3907 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
3908 if (m_Platform->hasHDCSupportForTypedReadsUnormSnormToFloatConversion())
3909 {
3910 return data;
3911 }
3912 break;
3913 default:
3914 break;
3915 }
3916
3917
3918 llvm::Value* pFormatConvertedLLVMLdUAVTypedResult = data;
3919 switch (format)
3920 {
3921 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
3922 {
3923 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
3924 llvm::Value* pConstFloat = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 65535.0f)));
3925 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
3926 llvm::Value* pTempInt16 = llvm::UndefValue::get(this->getInt32Ty());
3927 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
3928 llvm::Value* pMaskLow = this->getInt32(0x0000FFFF);
3929 llvm::Value* pShift16 = this->getInt32(0x00000010);
3930
3931 // pTempFloat = pLdUAVTypedResult[0];
3932 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
3933
3934 // Retrieve unsigned short value (component 0).
3935 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3936 pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);
3937
3938 // Convert unsigned short to float (component 0).
3939 pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3940 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3941
3942 // Store component 0 in output vector (pTempVec4[0]).
3943 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
3944
3945 // Retrieve unsigned short value (component 1).
3946 pTempInt16 = this->CreateLShr(pTempInt32, pShift16);
3947
3948 // Convert unsigned short to float (component 1).
3949 pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3950 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3951
3952 // Store component 1 in output vector (pTempVec4[1]).
3953 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
3954
3955 // pTempFloat = pLdUAVTypedResult[1];
3956 pTempFloat = this->CreateExtractElement(data, this->getInt32(1));
3957
3958 // Retrieve unsigned short value (component 2).
3959 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3960 pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);
3961
3962 // Convert unsigned short to float (component 2).
3963 pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3964 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3965
3966 // Store component 2 in output vector (pTempVec4[2]).
3967 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
3968
3969 // Retrieve unsigned short value (component 3).
3970 pTempInt16 = this->CreateLShr(pTempInt32, pShift16);
3971
3972 // Convert unsigned short to float (component 3).
3973 pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
3974 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
3975
3976 // Store component 3 in output vector (pTempVec4[3]).
3977 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
3978
3979 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
3980 break;
3981 }
3982 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
3983 {
3984 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
3985 llvm::Value* pScalingFactor = this->getFloat(1.0f / 32767.0f);
3986 llvm::Value* pTempInt32;
3987 llvm::Value* pTempInt16;
3988 llvm::Value* pTempFloat;
3989 llvm::Value* pNegativeOne = this->getFloat(-1.0f);
3990 llvm::Value* pCmp_result;
3991 llvm::Value* fieldWidth = this->getInt32(16);
3992
3993 // pTempFloat = pLdUAVTypedResult[0];
3994 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
3995
3996 // Retrieve unsigned short value (component 0).
3997 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
3998 pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);
3999
4000 // Convert signed short to float (component 0).
4001 pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4002 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4003
4004 // Compare with -1.0f
4005 pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4006 pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4007
4008 // Store component 0 in output vector (pTempVec4[0]).
4009 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4010
4011 // Retrieve unsigned short value (component 1).
4012 pTempInt16 = this->CreateAShr(pTempInt32, 16);
4013
4014 // Convert signed short to float (component 1).
4015 pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4016 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4017
4018 // Compare with -1.0f
4019 pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4020 pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4021
4022 // Store component 1 in output vector (pTempVec4[1]).
4023 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4024
4025 // pTempFloat = pLdUAVTypedResult[1];
4026 pTempFloat = this->CreateExtractElement(data, this->getInt32(1));
4027
4028 // Retrieve unsigned short value (component 2).
4029 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4030 pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);
4031
4032 // Convert unsigned short to float (component 2).
4033 pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4034 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4035
4036 // Compare with -1.0f
4037 pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4038 pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4039
4040 // Store component 2 in output vector (pTempVec4[2]).
4041 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4042
4043 // Retrieve unsigned short value (component 3).
4044 pTempInt16 = this->CreateAShr(pTempInt32, 16);
4045
4046 // Convert unsigned short to float (component 3).
4047 pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
4048 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4049
4050 // Compare with -1.0f
4051 pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
4052 pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);
4053
4054 // Store component 3 in output vector (pTempVec4[3]).
4055 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4056
4057 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4058 break;
4059 }
4060 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UNORM:
4061 {
4062 llvm::Value* pImmediateXYZ = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 1023.0f)));
4063 llvm::Value* pImmediateW = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 3.0f)));
4064 llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
4065 llvm::Value* pMaskW = this->getInt32(0x00000003);
4066 llvm::Value* pShiftData = this->getInt32(10);
4067
4068 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4069 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4070 llvm::Value* pTempIntWithMask = llvm::UndefValue::get(this->getInt32Ty());
4071 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4072 llvm::Value* pTempShiftRightData = llvm::UndefValue::get(this->getInt32Ty());
4073
4074 // pTempFloat = pLdUAVTypedResult[0];
4075 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4076
4077 // Retrieve unsigned short value (component 0).
4078 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4079 pTempIntWithMask = this->CreateAnd(pTempInt32, pMaskXYZ);
4080
4081 // Convert unsigned short to float (component 0).
4082 pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4083 pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4084
4085 // Store component 0 in output vector (pTempVec4[0]).
4086 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4087
4088 // Retrieve unsigned short value (component 0).
4089 pTempShiftRightData = this->CreateLShr(pTempInt32, pShiftData);
4090
4091 pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);
4092
4093 // Convert unsigned short to float.
4094 pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4095 pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4096
4097 // Store component 1 in output vector (pTempVec4[1]).
4098 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4099
4100 // Retrieve unsigned short value.
4101 pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);
4102
4103 pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);
4104
4105 // Convert unsigned short to float.
4106 pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4107 pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);
4108
4109 // Store component 2 in output vector (pTempVec4[1]).
4110 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4111
4112 // Retrieve unsigned short value.
4113 pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);
4114
4115 pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskW);
4116
4117 // Convert unsigned short to float.
4118 pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
4119 pTempFloat = this->CreateFMul(pTempFloat, pImmediateW);
4120
4121 // Store component 3 in output vector (pTempVec4[1]).
4122 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4123 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4124 break;
4125 }
4126 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R11G11B10_FLOAT:
4127 {
4128 // This surface format packs 3 half-float values into 32-bit string.
4129 // Half-floats are always non-negative, so to save space sign bit
4130 // is not stored and assumed to be zero.
4131 // Only 11 or 10 most significant bits (not counting sign bit)
4132 // of the 16 bits of IEEE 754 float16 are stored.
4133 // The least significant bits of the mantissa are assumed to be zero.
4134 // First value is stored in bits 0--10. (r)
4135 // Second value is stored in bits 11 - 22 (g)
4136 // Third value is stored in bits 22 - 31 (b)
4137 // Fourth value is set to 1.0f.
4138
4139 llvm::Value* pMaskX = this->getInt32(0x000007ff);
4140 llvm::Value* pMaskY = this->getInt32(0x00007ff0);
4141 llvm::Value* pMaskZ = this->getInt32(0x00007fe0);
4142 llvm::Value* pShiftDataX = this->getInt32(4);
4143 llvm::Value* pShiftDataY = this->getInt32(7);
4144 llvm::Value* pShiftDataZ = this->getInt32(10);
4145 llvm::Value* pTempFloat;
4146 llvm::Value* pTempFloat0;
4147 llvm::Value* pTempInt;
4148 llvm::Value* pTempInt0;
4149
4150 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4151
4152 // pTempFloat0 = pLdUAVTypedResult[0];
4153 pTempFloat0 = this->CreateExtractElement(data, this->getInt32(0));
4154 pTempInt0 = this->CreateBitCast(pTempFloat0, this->getInt32Ty());
4155
4156 pTempInt = this->CreateAnd(pTempInt0, pMaskX);
4157 pTempInt = this->CreateShl(pTempInt, pShiftDataX);
4158 pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4159 pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4160 pTempFloat = this->CreateF16TOF32(pTempFloat);
4161
4162 // Store component 0 in output vector (pTempVec4[0]).
4163 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4164
4165 pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataY);
4166 pTempInt = this->CreateAnd(pTempInt0, pMaskY);
4167 pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4168 pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4169 pTempFloat = this->CreateF16TOF32(pTempFloat);
4170
4171 // Store component 1 in output vector (pTempVec4[1]).
4172 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4173
4174 pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataZ);
4175 pTempInt = this->CreateAnd(pTempInt0, pMaskZ);
4176 pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
4177 pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
4178 pTempFloat = this->CreateF16TOF32(pTempFloat);
4179
4180 // Store component 2 in output vector (pTempVec4[2]).
4181 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4182
4183 // store 1.0 into component 3
4184 pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4185 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4186 break;
4187 }
4188 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UINT:
4189 {
4190 // AND ro.x, ri.x, { 0x000003ff };
4191 // SHR ri.x, ri.x, { 10 };
4192 // AND ro.y, ri.x, { 0x000003ff };
4193 // SHR ri.x, ri.x, { 10 };
4194 // AND ro.z, ri.x, { 0x000003ff };
4195 // SHR ri.x, ri.x, { 10 };
4196 // AND ro.w, ri.x, { 0x00000003 };
4197 // copy results
4198 llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
4199 llvm::Value* pMaskW = this->getInt32(0x00000003);
4200 llvm::Value* pShiftDataXYZ = this->getInt32(10);
4201
4202 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4203 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4204 llvm::Value* pTempIntRes = llvm::UndefValue::get(this->getInt32Ty());
4205
4206 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4207
4208 // pTempFloat = pLdUAVTypedResult[0];
4209 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4210 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4211
4212 // AND ro.x, ri.x, { 0x000003ff };
4213 pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4214 pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4215
4216 // Store component 0 in output vector (pTempVec4[0]).
4217 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4218
4219 // SHR ri.x, ri.x, { 10 };
4220 // AND ro.y, ri.x, { 0x000003ff };
4221 pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4222 pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4223 pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4224
4225 // Store component 1 in output vector (pTempVec4[0]).
4226 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4227
4228 // SHR ri.x, ri.x, { 10 };
4229 // AND ro.z, ri.x, { 0x000003ff };
4230 pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4231 pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
4232 pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4233
4234 // Store component 2 in output vector (pTempVec4[0]).
4235 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4236
4237 // SHR ri.x, ri.x, { 10 };
4238 // AND ro.w, ri.x, { 3 };
4239 pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
4240 pTempIntRes = this->CreateAnd(pTempInt32, pMaskW);
4241 pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());
4242
4243 // Store component 3 in output vector (pTempVec4[0]).
4244 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4245 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4246 break;
4247 }
4248 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
4249 {
4250 // immX = 0x8, immY = 0x10, immZ = 0x18
4251 // immMaskLow = 0x000000FF
4252 // AND rTemp.x, ri.x, immMaskLow
4253 // ubfe rTemp.y, immX, immX, ri.x
4254 // ubfe rTemp.z, immX, immY, ri.x
4255 // ubfe rTemp.w, immX, immZ, ri.x
4256 // ubtof rTemp, rTemp
4257 // Fmul rOutput, rTemp, 1.0f/255.0f
4258 llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
4259 llvm::Value* pImmX = this->getInt32(0x8);
4260 llvm::Value* pImmY = this->getInt32(0x10);
4261 llvm::Value* pImmZ = this->getInt32(0x18);
4262 llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
4263 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4264 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4265 llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4266 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4267
4268 // pTempFloat = pLdUAVTypedResult[0];
4269 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4270 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4271
4272 // AND rTemp.x, ri.x, immMaskLow
4273 pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);
4274
4275 // ubtof rTemp.x, rTemp.x
4276 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4277
4278 // Fmul rOutput.x, rTemp.x, 1.0f/255.0f
4279 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4280
4281 // Store component 0 in output vector (pTempVec4[0]).
4282 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4283
4284 // ubfe rTemp.y, immX, immX, ri.x
4285 pTempInt32Res = this->Create_UBFE(pImmX, pImmX, pTempInt32);
4286
4287 // ubtof rTemp.y, rTemp.y
4288 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4289
4290 // Fmul rOutput.y, rTemp.y, 1.0f/255.0f
4291 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4292
4293 // Store component 1 in output vector (pTempVec4[0]).
4294 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4295
4296 // ubfe rTemp.z, immX, immY, ri.x
4297 pTempInt32Res = this->Create_UBFE(pImmX, pImmY, pTempInt32);
4298
4299 // ubtof rTemp.z, rTemp.z
4300 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4301
4302 // Fmul rOutput.z, rTemp.z, 1.0f/255.0f
4303 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4304
4305 // Store component 2 in output vector (pTempVec4[0]).
4306 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));
4307
4308 // ubfe rTemp.w, immX, immZ, ri.x
4309 pTempInt32Res = this->Create_UBFE(pImmX, pImmZ, pTempInt32);
4310
4311 // ubtof rTemp.w, rTemp.w
4312 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4313
4314 // Fmul rOutput.w, rTemp.w, 1.0f/255.0f
4315 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4316
4317 // Store component 3 in output vector (pTempVec4[0]).
4318 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
4319
4320 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4321 break;
4322 }
4323 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
4324 {
4325 llvm::Value* pScalingFactor = this->getFloat(1.0f / 127.0f);
4326 llvm::Value* fieldWidth = this->getInt32(8);
4327 llvm::Value* fpNegOne = this->getFloat(-1.0f);
4328
4329 // pTempFloat = pLdUAVTypedResult[0];
4330 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4331 // cast to int32 since result is seen as float
4332 llvm::Value* pInputAsInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4333
4334 // create 4-component output vector
4335 llvm::Value* pOutputVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4336
4337 // for each of the four channels
4338 for (unsigned int ch = 0; ch < 4; ++ch)
4339 {
4340 // extract 8 bits with sign extend from position 8*ch..8*ch+7
4341 // for bits 24..31 we can use arithmetic shift right instead of bit extract
4342 llvm::Value* pTempInt32Res = (ch < 3) ?
4343 this->Create_IBFE(fieldWidth, this->getInt32(8 * ch), pInputAsInt32) :
4344 this->CreateAShr(pInputAsInt32, 8 * ch);
4345
4346 // convert to float
4347 pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4348
4349 // multiply bthis->y the scaling factor 1.0f/127.0f
4350 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4351
4352 // Fcmp_ge rFlag, rTemp.x, -1.0f
4353 // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4354 llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, fpNegOne);
4355 pTempFloat = this->CreateSelect(pFlag, pTempFloat, fpNegOne);
4356
4357 // Store component ch in output vector (pTempVec4[0]).
4358 pOutputVec4 = this->CreateInsertElement(pOutputVec4, pTempFloat, this->getInt32(ch));
4359 }
4360
4361 pFormatConvertedLLVMLdUAVTypedResult = pOutputVec4;
4362 break;
4363 }
4364 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
4365 {
4366 // immMaskHigh = 0x0000FFFF
4367 // rImm.zw = {0.0f, 1.0f}
4368 // AND rTemp.x, ri.x, immMaskHigh
4369 // SHR rTemp.y, ri.x, 0x10,
4370 // USTOF rTemp.xy, rTemp.xy
4371 // FMUL rOutput.xy, rTemp.xy, 1.0f/65535.0f
4372 // MOV rOutput.zw, rImm.zw
4373 llvm::Value* pMaskHigh = this->getInt32(0x0000FFFF);
4374 llvm::Value* pShiftVal = this->getInt32(0x10);
4375 llvm::Value* pImmZ = this->getFloat(0.0f);
4376 llvm::Value* pImmW = this->getFloat(1.0f);
4377 llvm::Value* pConstFloat = this->getFloat(1.0f / 65535.0f);
4378 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4379 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4380 llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4381 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4382
4383 // pTempFloat = pLdUAVTypedResult[0];
4384 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4385 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4386
4387 // AND rTemp.x, ri.x, immMaskHigh
4388 pTempInt32Res = this->CreateAnd(pTempInt32, pMaskHigh);
4389
4390 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4391 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4392
4393 // Store component 0 in output vector (pTempVec4[0]).
4394 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4395
4396 pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
4397 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4398 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4399
4400 // Store component 1 in output vector (pTempVec4[0]).
4401 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4402
4403 // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
4404 pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));
4405
4406 // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
4407 pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
4408 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4409 break;
4410 }
4411 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
4412 {
4413 // immMaskLow16 = 0x0000FFFF
4414 // rImm.zw = {0.0f, 1.0f}
4415 // AND rTemp.x, ri.x, immMaskLow16
4416 // SHR rTemp.y, ri.x, 0x10,
4417 // STOF rTemp.xy, rTemp.xy
4418 // FMUL rTemp.xy, rTemp.xy, 1.0f / 32767.0f
4419 // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
4420 // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
4421 // MOV rOutput.zw, rImm.zw
4422 llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
4423 llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4424
4425 // pTempFloat = pLdUAVTypedResult[0];
4426 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4427 llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4428
4429 // extract bits 0..15 and sign extend the result
4430 llvm::Value* pTempInt32Res = Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);
4431
4432 // convert to float and apply scaling factor
4433 pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4434 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4435
4436 // clamp to range [-1.0f, 1.0f] since the value can be little less than -1.0f
4437 // Fcmp_ge rFlag, rTemp.x, -1.0f
4438 // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4439 llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));
4440 pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));
4441
4442 // Store component 0 in output vector (pTempVec4[0]).
4443 pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4444
4445 // extract bits 16..31 with sign extension
4446 pTempInt32Res = this->CreateAShr(pTempInt32, 16);
4447 pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4448 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4449
4450 // Fcmp_ge rFlag, rTemp.y, -1.0f
4451 // Sel.rFlag rOutput.y, rTemp.y, -1.0f
4452 pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));
4453
4454 pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));
4455
4456 // Store component 1 in output vector (pTempVec4[0]).
4457 pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));
4458
4459 // Store 0.0f, 1.0f in the remaining components of the output vector
4460 pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
4461 pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4462 pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4463 break;
4464 }
4465 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
4466 {
4467 // immMaskLow8 = 0x000000FF
4468 // rImm.zw = {0.0f, 1.0f}
4469 // AND rTemp.x, ri.x, immMaskLow8
4470 // SHR rTemp.y, ri.x, 0x8,
4471 // USTOF rTemp.xy, rTemp.xy
4472 // FMUL rOutput.xy, rTemp.xy, 1.0f / 255.0f
4473 // MOV rOutput.zw, rImm.zw
4474 llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
4475 llvm::Value* pShiftVal = this->getInt32(0x8);
4476 llvm::Value* pImmZ = this->getFloat(0.0f);
4477 llvm::Value* pImmW = this->getFloat(1.0f);
4478 llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
4479 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4480 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4481 llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
4482 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4483
4484 // pTempFloat = pLdUAVTypedResult[0];
4485 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4486 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4487
4488 // AND rTemp.x, ri.x, immMaskHigh
4489 pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);
4490
4491 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4492 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4493
4494 // Store component 0 in output vector (pTempVec4[0]).
4495 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4496
4497 pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
4498 pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
4499 pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);
4500
4501 // Store component 1 in output vector (pTempVec4[0]).
4502 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));
4503
4504 // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
4505 pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));
4506
4507 // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
4508 pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
4509 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4510 break;
4511 }
4512 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
4513 {
4514 // immMaskLow8 = 0x000000FF
4515 // rImm.zw = {0.0f, 1.0f}
4516 // AND rTemp.x, ri.x, immMaskLow8
4517 // SHR rTemp.y, ri.x, 0x8,
4518 // STOF rTemp.xy, rTemp.xy
4519 // FMUL rTemp.xy, rTemp.xy, 1.0f / 127.0f
4520 // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
4521 // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
4522 // MOV rOutput.zw, rImm.zw
4523 llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);
4524
4525 llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4526
4527 llvm::Value* fieldWidth = this->getInt32(8);
4528
4529 // pTempFloat = pLdUAVTypedResult[0];
4530 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4531 llvm::Value* pInputInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4532
4533 llvm::Value* pTempInt32Res = Create_IBFE(fieldWidth, this->getInt32(0), pInputInt32);
4534 pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4535 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4536
4537 // Fcmp_ge rFlag, rTemp.x, -1.0f
4538 // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4539 llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4540
4541 pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4542 // Store component 0 in output vector (pTempVec4[0]).
4543 pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4544
4545 // extract bits 8..15 and sign extend the result
4546 pTempInt32Res = this->Create_IBFE(fieldWidth, this->getInt32(8), pInputInt32);
4547
4548 pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
4549 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4550
4551 // Fcmp_ge rFlag, rTemp.y, -1.0f
4552 // Sel.rFlag rOutput.y, rTemp.y, -1.0f
4553 pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4554 pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4555
4556 // store the value in component 1 of the output vector
4557 pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));
4558
4559 // store 0.0f, 1.0f in the remaining components of the output vector
4560 pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
4561 pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4562 pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4563 break;
4564 }
4565 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
4566 {
4567 // rImm.yzw = {0.0f, 0.0f, 1.0f}
4568 // USTOF rTemp.x, ri.x
4569 // FMUL rOutput.x, rTemp.x, 1.0f / 65535.0f
4570 // MOV rOutput.yzw, rImm.yzw
4571 llvm::Value* pScalingFactor = getFloat(1.0f / 65535.0f);
4572 llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
4573 llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
4574 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4575
4576 // pTempFloat = pLdUAVTypedResult[0];
4577 pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4578 pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4579
4580 pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
4581 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4582
4583 // Store component 0 in output vector (pTempVec4[0]).
4584 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4585
4586 // Store 0.0f, 0.0f, 1.0f, in remaining components of the output
4587 llvm::Value* pFPZero = getFloat(0.0f);
4588 pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
4589 pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
4590 pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4591 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4592 break;
4593 }
4594 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
4595 {
4596 // rImm.yzw = {0.0f, 0.0f, 1.0f}
4597 // STOF rTemp.x, ri.x
4598 // FMUL rTemp.x, rTemp.x, 1.0f / 32767.0f
4599 // FCMP_GE rFlag.x, rTemp.x, -1.0f
4600 // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
4601 // MOV rOutput.yzw, rImm.yzw
4602 llvm::Value* pFPZero = getFloat(0.0f);
4603 llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
4604 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4605
4606 // pTempFloat = pLdUAVTypedResult[0];
4607 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4608 llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4609
4610 pTempInt32 = this->Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);
4611
4612 pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
4613 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4614
4615 // compare with -1.0f and clamp to -1.0 if less than -1.0
4616 // Fcmp_ge rFlag, rTemp.x, -1.0f
4617 // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4618 llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
4619 pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
4620
4621 // Store the result in component 0 of the output vector (pTempVec4[0]).
4622 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4623 // Store 0.0f, 0.0f, 1.0f in remaining components
4624 pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
4625 pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
4626 pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4627 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4628 break;
4629 }
4630 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
4631 {
4632 // rImm.yzw = {0.0f, 0.0f, 1.0f}
4633 // USTOF rTemp.x, ri.x
4634 // FMUL rOutput.x, rTemp.x, 1.0f / 255.0f
4635 // MOV rOutput.yzw, rImm.yzw
4636 // UBTOF ro.x, ri.x;
4637 llvm::Value* fpZero = this->getFloat(0.0f);
4638 llvm::Value* pScalingFactor = getFloat(1.0f / 255.0f);
4639 llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4640
4641 // pTempFloat = pLdUAVTypedResult[0];
4642 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4643 llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4644
4645 pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
4646 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4647
4648 // Store component 0 in output vector (pTempVec4[0]).
4649 pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
4650 // fill the rest with 0.0f, 0.0f, 1.0f
4651 pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(1));
4652 pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(2));
4653 pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
4654 pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
4655 break;
4656 }
4657 case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
4658 {
4659 // rImm.yzw = {0.0f, 0.0f, 1.0f}
4660 // STOF rTemp.x, ri.x
4661 // FMUL rTemp.x, rTemp.x, 1.0f / 127.0f
4662 // FCMP_GE rFlag.x, rTemp.x, -1.0f
4663 // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
4664 // MOV rOutput.yzw, rImm.yzw
4665 llvm::Value* pFpZero = getFloat(0.0f);
4666 llvm::Value* pFpNegOne = getFloat(-1.0f);
4667 llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);
4668 llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
4669
4670 // pTempFloat = pLdUAVTypedResult[0];
4671 llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
4672 llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
4673
4674 // extract bits 0..7 and sign extend the result
4675 pTempInt32 = this->Create_IBFE(this->getInt32(8), this->getInt32(0), pTempInt32);
4676
4677 // convert to float and apply scaling factor
4678 pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
4679 pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);
4680
4681 // Fcmp_ge rFlag, rTemp.x, -1.0f
4682 // Sel.rFlag rOutput.x, rTemp.x, -1.0f
4683 llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pFpNegOne);
4684 pTempFloat = this->CreateSelect(pFlag, pTempFloat, pFpNegOne);
4685
4686 // Store component 0 in output vector (pTempVec4[0]).
4687 pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));
4688
4689 // Store 0.0f, 0.0f, 1.0f in the remaining components of the output vector
4690 pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(1));
4691 pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(2));
4692 pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
4693
4694 pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
4695 break;
4696 }
4697 default:
4698 break;
4699 }
4700
4701 return pFormatConvertedLLVMLdUAVTypedResult;
4702 }
4703
4704
4705 ///////////////////////////////////////////////////////////////////////////////
4706 /// @brief Extract all scalars from a vector
4707 /// @param vector Llvm value of a vector
4708 /// @param outScalars pointer to the output array of scalars
4709 /// @param maxSize Size of the output array
4710 /// @param initializer optional parameter to set to unused elements
4711 ///
4712 template<bool preserveNames, typename T, typename Inserter>
4713 inline
VectorToScalars(llvm::Value * vector,llvm::Value ** outScalars,unsigned maxSize,llvm::Value * initializer)4714 void LLVM3DBuilder<preserveNames, T, Inserter>::VectorToScalars(
4715 llvm::Value* vector,
4716 llvm::Value** outScalars,
4717 unsigned maxSize,
4718 llvm::Value* initializer)
4719 {
4720 IGC_ASSERT(nullptr != vector);
4721 IGC_ASSERT(nullptr != vector->getType());
4722 IGC_ASSERT(vector->getType()->isVectorTy());
4723
4724 const unsigned count = (unsigned)llvm::cast<IGCLLVM::FixedVectorType>(vector->getType())->getNumElements();
4725 IGC_ASSERT(1 < count);
4726 IGC_ASSERT(count <= 4);
4727 IGC_ASSERT(count <= maxSize);
4728
4729 for (unsigned vecElem = 0; vecElem < maxSize; vecElem++)
4730 {
4731 if (vecElem >= count)
4732 {
4733 outScalars[vecElem] = initializer;
4734 continue;
4735 }
4736 outScalars[vecElem] = this->CreateExtractElement(
4737 vector,
4738 this->getInt32(vecElem));
4739 }
4740 }
4741
4742
4743 ///////////////////////////////////////////////////////////////////////////////
4744 /// @brief Aggregates scalar values to a vector
4745 /// @param scalars Array of scalars
4746 /// @param vectorElementCnt The number of elements in the vector to create.
4747 /// @return Vector of type resultType
4748 ///
4749 template<bool preserveNames, typename T, typename Inserter>
4750 inline
ScalarsToVector(llvm::Value * (& scalars)[4],unsigned vectorElementCnt)4751 llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::ScalarsToVector(
4752 llvm::Value* (&scalars)[4],
4753 unsigned vectorElementCnt)
4754 {
4755 llvm::Type* const resultType = IGCLLVM::FixedVectorType::get(scalars[0]->getType(), vectorElementCnt);
4756 IGC_ASSERT(nullptr != resultType);
4757 llvm::Value* result = llvm::UndefValue::get(resultType);
4758
4759 for (unsigned i = 0; i < llvm::cast<IGCLLVM::FixedVectorType>(resultType)->getNumElements(); i++)
4760 {
4761 IGC_ASSERT(nullptr != scalars[i]);
4762 IGC_ASSERT(llvm::cast<llvm::VectorType>(resultType)->getElementType() == scalars[i]->getType());
4763
4764 result = this->CreateInsertElement(
4765 result,
4766 scalars[i],
4767 this->getInt32(i));
4768 }
4769 return result;
4770 }
4771
4772
4773 ///////////////////////////////////////////////////////////////////////////////
4774 /// @brief Returns the normalization factor for UNORM formats
4775 /// @param bits Number of bits in the UNORM value
4776 /// @return llvm::Constant* unorm factor
4777 template<bool preserveNames, typename T, typename Inserter>
4778 inline
GetUnormFactor(unsigned bits)4779 llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetUnormFactor(unsigned bits)
4780 {
4781 float maxUint = (float)((1 << bits) - 1);
4782 return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxUint));
4783 };
4784
4785
4786 ///////////////////////////////////////////////////////////////////////////////
4787 /// @brief Returns the normalization factor for SNORM formats
4788 /// @param bits Number of bits in the SNORM value
4789 /// @return llvm::Constant* snorm factor
4790 template<bool preserveNames, typename T, typename Inserter>
4791 inline
GetSnormFactor(unsigned bits)4792 llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetSnormFactor(unsigned bits)
4793 {
4794 float maxSint = (float)(((1 << bits) - 1) / 2);
4795 return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxSint));
4796 };
4797
4798 template<bool preserveNames, typename T, typename Inserter>
CreateCPSRqstCoarseSize(llvm::Value * pSrcVal)4799 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSRqstCoarseSize(
4800 llvm::Value* pSrcVal)
4801 {
4802 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
4803 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
4804 module,
4805 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
4806 this->getFloatTy());
4807
4808 llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_X));
4809 llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_Y));
4810 llvm::Value* vec = this->CreateInsertElement(
4811 llvm::UndefValue::get(pSrcVal->getType()),
4812 sizeX,
4813 this->getInt32(0));
4814 return this->CreateInsertElement(
4815 vec,
4816 sizeY,
4817 this->getInt32(1));
4818
4819 }
4820
4821 template<bool preserveNames, typename T, typename Inserter>
CreateCPSActualCoarseSize(llvm::Value * pSrcVal)4822 inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSActualCoarseSize(
4823 llvm::Value* pSrcVal)
4824 {
4825 llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
4826 llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
4827 module,
4828 llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
4829 this->getFloatTy());
4830 llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_X));
4831 llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_Y));
4832 llvm::Value* vec = this->CreateInsertElement(
4833 llvm::UndefValue::get(pSrcVal->getType()),
4834 sizeX,
4835 this->getInt32(0));
4836 return this->CreateInsertElement(
4837 vec,
4838 sizeY,
4839 this->getInt32(1));
4840
4841 }
4842
4843
4844
4845 #endif // BUILTINS_FRONTEND_DEFINITIONS_HPP
4846