1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Compiler/CISACodeGen/helper.h"
10 #include "Compiler/CISACodeGen/CISACodeGen.h"
11 #include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
12 #include "Compiler/MetaDataUtilsWrapper.h"
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvm/Config/llvm-config.h"
15 #include "llvmWrapper/IR/DerivedTypes.h"
16 #include "llvmWrapper/Support/KnownBits.h"
17 #include "llvmWrapper/IR/Instructions.h"
18 #include "llvmWrapper/Support/Alignment.h"
19 #include "llvm/IR/GetElementPtrTypeIterator.h"
20 #include <llvm/IR/InstIterator.h>
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "common/LLVMWarningsPop.hpp"
23 #include "GenISAIntrinsics/GenIntrinsicInst.h"
24 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
25 #include "common/secure_mem.h"
26 #include <stack>
27 #include "Probe/Assertion.h"
28
29 using namespace llvm;
30 using namespace GenISAIntrinsic;
31
32 /************************************************************************
33 This file contains helper functions for the code generator
34 Many functions use X-MACRO, that allow us to separate data about encoding
35 to the logic of the helper functions
36
37 ************************************************************************/
38
39 namespace IGC
40 {
41 typedef union _gfxResourceAddrSpace
42 {
43 struct _bits
44 {
45 unsigned int bufId : 16;
46 unsigned int bufType : 5;
47 unsigned int indirect : 1; // bool
48 unsigned int reserved : 10;
49 } bits;
50 uint32_t u32Val;
51 } GFXResourceAddrSpace;
52
53 // If 'bufIdx' is a ConstantInt, 'uniqueIndAS' is irrelevant.
54 // Otherwise, you should set 'uniqueIndAS' if you want to identify
55 // this address space later on. If not, the default can be used.
EncodeAS4GFXResource(const llvm::Value & bufIdx,BufferType bufType,unsigned uniqueIndAS)56 unsigned EncodeAS4GFXResource(
57 const llvm::Value& bufIdx,
58 BufferType bufType,
59 unsigned uniqueIndAS)
60 {
61 GFXResourceAddrSpace temp;
62 static_assert(sizeof(temp) == 4, "Code below may need and update.");
63 temp.u32Val = 0;
64 IGC_ASSERT((bufType + 1) < BUFFER_TYPE_UNKNOWN + 1);
65 temp.bits.bufType = bufType + 1;
66
67 if (bufType == SLM)
68 {
69 return ADDRESS_SPACE_LOCAL;
70 }
71 else if (bufType == STATELESS_READONLY)
72 {
73 return ADDRESS_SPACE_CONSTANT;
74 }
75 else if (bufType == STATELESS)
76 {
77 return ADDRESS_SPACE_GLOBAL;
78 }
79 else if (bufType == STATELESS_A32)
80 {
81 return ADDRESS_SPACE_A32;
82 }
83 else if (auto *CI = dyn_cast<ConstantInt>(&bufIdx))
84 {
85 unsigned int bufId = static_cast<unsigned>(CI->getZExtValue());
86 IGC_ASSERT((bufType == BINDLESS_SAMPLER) || (bufId < (1 << 16)));
87 temp.bits.bufId = bufId;
88 return temp.u32Val;
89 }
90
91 // if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
92 temp.bits.bufId = uniqueIndAS;
93 temp.bits.indirect = 1;
94 return temp.u32Val;
95 }
96
97 ///
98 /// if you want resource-dimension, use GetBufferDimension()
99 ///
DecodeAS4GFXResource(unsigned addrSpace,bool & directIndexing,unsigned & bufId)100 BufferType DecodeAS4GFXResource(unsigned addrSpace, bool& directIndexing, unsigned& bufId)
101 {
102 GFXResourceAddrSpace temp;
103 temp.u32Val = addrSpace;
104
105 directIndexing = (temp.bits.indirect == 0);
106 bufId = temp.bits.bufId;
107
108 if (addrSpace == ADDRESS_SPACE_LOCAL)
109 {
110 return SLM;
111 }
112 else if (addrSpace == ADDRESS_SPACE_A32)
113 {
114 return STATELESS_A32;
115 }
116 unsigned bufType = temp.bits.bufType - 1;
117 if (bufType < BUFFER_TYPE_UNKNOWN)
118 {
119 return (BufferType)bufType;
120 }
121 return BUFFER_TYPE_UNKNOWN;
122 }
123
124 // Return true if AS is for a stateful surface.
125 // Stateful surface should have an encoded AS that is bigger than
126 // ADDRESS_SPACE_NUM_ADDRESSES.
isStatefulAddrSpace(unsigned AS)127 bool isStatefulAddrSpace(unsigned AS)
128 {
129 return AS > ADDRESS_SPACE_NUM_ADDRESSES;
130 }
131
isDummyBasicBlock(llvm::BasicBlock * BB)132 bool isDummyBasicBlock(llvm::BasicBlock* BB)
133 {
134 if (BB->size() != 1)
135 return false;
136 if ((++pred_begin(BB)) != pred_end(BB))
137 return false;
138 if ((++succ_begin(BB)) != succ_end(BB))
139 return false;
140 return true;
141 }
142
SetBufferAsBindless(unsigned addressSpaceOfPtr,BufferType bufferType)143 unsigned SetBufferAsBindless(unsigned addressSpaceOfPtr, BufferType bufferType)
144 {
145 GFXResourceAddrSpace temp = {};
146 temp.u32Val = addressSpaceOfPtr;
147
148 // Mark buffer as it is bindless for further processing
149 switch (bufferType)
150 {
151 case BufferType::RESOURCE:
152 temp.bits.bufType = IGC::BINDLESS_TEXTURE + 1;
153 break;
154 case BufferType::CONSTANT_BUFFER:
155 temp.bits.bufType = IGC::BINDLESS_CONSTANT_BUFFER + 1;
156 break;
157 case BufferType::UAV:
158 temp.bits.bufType = IGC::BINDLESS + 1;
159 break;
160 case BufferType::SAMPLER:
161 temp.bits.bufType = IGC::BINDLESS_SAMPLER + 1;
162 break;
163 default:
164 IGC_ASSERT_MESSAGE(0, "other types of buffers shouldn't reach this part");
165 break;
166 }
167 return temp.u32Val;
168 }
169
UsesTypedConstantBuffer(const CodeGenContext * pContext,const BufferType bufType)170 bool UsesTypedConstantBuffer(
171 const CodeGenContext* pContext,
172 const BufferType bufType)
173 {
174 IGC_ASSERT(bufType == CONSTANT_BUFFER ||
175 bufType == BINDLESS_CONSTANT_BUFFER);
176
177 if (pContext->m_DriverInfo.ForceUntypedBindlessConstantBuffers() &&
178 bufType == BINDLESS_CONSTANT_BUFFER)
179 {
180 return false;
181 }
182
183 if (pContext->m_DriverInfo.UsesTypedConstantBuffers3D() &&
184 pContext->type != ShaderType::COMPUTE_SHADER)
185 {
186 return true;
187 }
188 if (pContext->m_DriverInfo.UsesTypedConstantBuffersGPGPU() &&
189 pContext->type == ShaderType::COMPUTE_SHADER)
190 {
191 return true;
192 }
193 return false;
194 }
195
196 ///
197 /// returns buffer type from addressspace
198 ///
DecodeBufferType(unsigned addrSpace)199 BufferType DecodeBufferType(unsigned addrSpace)
200 {
201 switch (addrSpace)
202 {
203 case ADDRESS_SPACE_CONSTANT:
204 return STATELESS_READONLY;
205 case ADDRESS_SPACE_LOCAL:
206 return SLM;
207 case ADDRESS_SPACE_GLOBAL:
208 return STATELESS;
209 case ADDRESS_SPACE_A32:
210 return STATELESS_A32;
211 default:
212 break;
213 }
214 GFXResourceAddrSpace temp;
215 temp.u32Val = addrSpace;
216 BufferType type = BUFFER_TYPE_UNKNOWN;
217 if (addrSpace > ADDRESS_SPACE_NUM_ADDRESSES &&
218 (temp.bits.bufType - 1) < BUFFER_TYPE_UNKNOWN)
219 {
220 type = static_cast<BufferType>(temp.bits.bufType - 1);
221 }
222 return type;
223 }
224
225 ///
226 /// returns constant buffer load offset
227 ///
getConstantBufferLoadOffset(llvm::LoadInst * ld)228 int getConstantBufferLoadOffset(llvm::LoadInst* ld)
229 {
230 int offset = 0;
231 Value* ptr = ld->getPointerOperand();
232 if (isa<ConstantPointerNull>(ptr))
233 {
234 offset = 0;
235 }
236 else if (IntToPtrInst * itop = dyn_cast<IntToPtrInst>(ptr))
237 {
238 ConstantInt* ci = dyn_cast<ConstantInt>(
239 itop->getOperand(0));
240 if (ci)
241 {
242 offset = int_cast<unsigned>(ci->getZExtValue());
243 }
244 }
245 else if (ConstantExpr * itop = dyn_cast<ConstantExpr>(ptr))
246 {
247 if (itop->getOpcode() == Instruction::IntToPtr)
248 {
249 offset = int_cast<unsigned>(
250 cast<ConstantInt>(itop->getOperand(0))->getZExtValue());
251 }
252 }
253 return offset;
254 }
255 ///
256 /// returns info if direct addressing is used
257 ///
IsDirectIdx(unsigned addrSpace)258 bool IsDirectIdx(unsigned addrSpace)
259 {
260 GFXResourceAddrSpace temp;
261 temp.u32Val = addrSpace;
262 return (temp.bits.indirect == 0);
263 }
264
isNaNCheck(llvm::FCmpInst & FC)265 bool isNaNCheck(llvm::FCmpInst& FC)
266 {
267 Value* Op1 = FC.getOperand(1);
268 if (FC.getPredicate() == CmpInst::FCMP_UNO)
269 {
270 auto CFP = dyn_cast<ConstantFP>(Op1);
271 return CFP && CFP->isZero();
272 }
273 else if (FC.getPredicate() == CmpInst::FCMP_UNE)
274 {
275 Value* Op0 = FC.getOperand(0);
276 return Op0 == Op1;
277 }
278 return false;
279 }
280
cloneLoad(llvm::LoadInst * Orig,llvm::Value * Ptr)281 llvm::LoadInst* cloneLoad(llvm::LoadInst* Orig, llvm::Value* Ptr)
282 {
283 llvm::LoadInst* LI = new llvm::LoadInst(
284 cast<PointerType>(Ptr->getType())->getElementType(),
285 Ptr, "", false, Orig);
286 LI->setVolatile(Orig->isVolatile());
287 LI->setAlignment(IGCLLVM::getCorrectAlign(Orig->getAlignment()));
288 if (LI->isAtomic())
289 {
290 LI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
291 }
292 // Clone metadata
293 llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
294 Orig->getAllMetadata(MDs);
295 for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
296 MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
297 {
298 LI->setMetadata(MI->first, MI->second);
299 }
300 return LI;
301 }
302
cloneStore(llvm::StoreInst * Orig,llvm::Value * Val,llvm::Value * Ptr)303 llvm::StoreInst* cloneStore(llvm::StoreInst* Orig, llvm::Value* Val, llvm::Value* Ptr)
304 {
305 llvm::StoreInst* SI = new llvm::StoreInst(Val, Ptr, Orig);
306 SI->setVolatile(Orig->isVolatile());
307 SI->setAlignment(IGCLLVM::getCorrectAlign(Orig->getAlignment()));
308 if (SI->isAtomic())
309 {
310 SI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
311 }
312 // Clone metadata
313 llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
314 Orig->getAllMetadata(MDs);
315 for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
316 MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
317 {
318 SI->setMetadata(MI->first, MI->second);
319 }
320 return SI;
321 }
322
323 // Create a ldraw from a load instruction
CreateLoadRawIntrinsic(LoadInst * inst,Value * bufPtr,Value * offsetVal)324 LdRawIntrinsic* CreateLoadRawIntrinsic(LoadInst* inst, Value* bufPtr, Value* offsetVal)
325 {
326 Type* tys[] = {
327 inst->getType(),
328 bufPtr->getType()
329 };
330
331 auto* M = inst->getModule();
332 auto& DL = M->getDataLayout();
333 Function *func = GenISAIntrinsic::getDeclaration(
334 M,
335 inst->getType()->isVectorTy() ?
336 GenISAIntrinsic::GenISA_ldrawvector_indexed :
337 GenISAIntrinsic::GenISA_ldraw_indexed,
338 tys);
339
340 unsigned alignment = inst->getAlignment();
341 if (alignment == 0)
342 alignment = DL.getABITypeAlignment(inst->getType());
343
344 IRBuilder<> builder(inst);
345
346 Value* attr[] =
347 {
348 bufPtr,
349 offsetVal,
350 builder.getInt32(alignment),
351 builder.getInt1(inst->isVolatile()) // volatile
352 };
353 auto* ld = builder.CreateCall(func, attr);
354 IGC_ASSERT(ld->getType() == inst->getType());
355 return cast<LdRawIntrinsic>(ld);
356 }
357
358 // Creates a storeraw from a store instruction
CreateStoreRawIntrinsic(StoreInst * inst,Value * bufPtr,Value * offsetVal)359 StoreRawIntrinsic* CreateStoreRawIntrinsic(StoreInst* inst, Value* bufPtr, Value* offsetVal)
360 {
361 Module* module = inst->getModule();
362 Function* func = nullptr;
363 Value* storeVal = inst->getValueOperand();
364 auto& DL = module->getDataLayout();
365 if (storeVal->getType()->isVectorTy())
366 {
367 Type* tys[] = {
368 bufPtr->getType(),
369 storeVal->getType()
370 };
371 func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storerawvector_indexed, tys);
372 }
373 else
374 {
375 Type* dataType = storeVal->getType();
376 const uint64_t typeSize = DL.getTypeSizeInBits(dataType);
377 IGC_ASSERT(typeSize == 8 || typeSize == 16 || typeSize == 32 || typeSize == 64);
378
379 Type* types[] = {
380 bufPtr->getType(),
381 storeVal->getType()
382 };
383
384 func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storeraw_indexed, types);
385 }
386 IRBuilder<> builder(inst);
387 unsigned alignment = inst->getAlignment();
388 if (alignment == 0)
389 alignment = DL.getABITypeAlignment(storeVal->getType());
390 Value* attr[] =
391 {
392 bufPtr,
393 offsetVal,
394 storeVal,
395 builder.getInt32(alignment),
396 builder.getInt1(inst->isVolatile()) // volatile
397 };
398 auto* st = builder.CreateCall(func, attr);
399 return cast<StoreRawIntrinsic>(st);
400 }
401
402 ///
403 /// Tries to trace a resource pointer (texture/sampler/buffer) back to
404 /// the pointer source. Also returns a vector of all instructions in the search path
405 ///
TracePointerSource(Value * resourcePtr,bool hasBranching,bool enablePhiLoops,bool fillList,std::vector<Value * > & instList,llvm::SmallSet<PHINode *,8> & visitedPHIs)406 Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList,
407 std::vector<Value*>& instList, llvm::SmallSet<PHINode*, 8> & visitedPHIs)
408 {
409 Value* srcPtr = nullptr;
410 Value* baseValue = resourcePtr;
411
412 // Returns true if resource pointers describe the same resource.
413 auto ResourcePointersEq = [](Value* a, Value* b)->bool
414 {
415 if (a == b)
416 {
417 return true;
418 }
419 if (a->getType()->isPointerTy() && b->getType()->isPointerTy())
420 {
421 unsigned idxA = 0, idxB = 0;
422 BufferType bufA, bufB;
423 BufferAccessType accessA, accessB;
424 bool needBufferOffsetA = false, needBufferOffsetB = false;
425
426 if (GetResourcePointerInfo(a, idxA, bufA, accessA, needBufferOffsetA) &&
427 GetResourcePointerInfo(b, idxB, bufB, accessB, needBufferOffsetB) &&
428 idxA == idxB &&
429 accessA == accessB &&
430 needBufferOffsetA == needBufferOffsetB)
431 {
432 return true;
433 }
434 }
435 return false;
436 };
437
438 while (true)
439 {
440 if (fillList)
441 {
442 instList.push_back(baseValue);
443 }
444
445 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(baseValue))
446 {
447 // For bindless pointers
448 if ((inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue) ||
449 (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
450 {
451 srcPtr = baseValue;
452 }
453 break;
454 }
455 else if (isa<Argument>(baseValue))
456 {
457 // For compute, resource comes from the kernel args
458 srcPtr = baseValue;
459 break;
460 }
461 else if (isa<GlobalVariable>(baseValue))
462 {
463 // Can be an inline sampler/constant buffer
464 srcPtr = baseValue;
465 break;
466 }
467 else if (auto allocaInst = dyn_cast<AllocaInst>(baseValue))
468 {
469 if (allocaInst->getMetadata("igc.read_only_array"))
470 {
471 // Found alloca marked as read_only array.
472 srcPtr = baseValue;
473 }
474 break;
475 }
476 else if (CastInst * inst = dyn_cast<CastInst>(baseValue))
477 {
478 baseValue = inst->getOperand(0);
479 }
480 else if (GetElementPtrInst * inst = dyn_cast<GetElementPtrInst>(baseValue))
481 {
482 baseValue = inst->getOperand(0);
483 }
484 else if (BinaryOperator* inst = dyn_cast<BinaryOperator>(baseValue))
485 {
486 // Assume this is pointer arithmetic, which allows add/sub only.
487 // Follow the first operand assuming it's pointer base.
488 // Do not check the operand is pointer type now, leave the check
489 // until the leaf instruction is found.
490 Instruction::BinaryOps Opcode = inst->getOpcode();
491 if (Opcode == Instruction::Add || Opcode == Instruction::Sub)
492 {
493 baseValue = inst->getOperand(0);
494 }
495 else
496 break;
497 }
498 else if (PHINode * inst = dyn_cast<PHINode>(baseValue))
499 {
500 if (visitedPHIs.count(inst) != 0)
501 {
502 // stop if we've seen this phi node before
503 return baseValue;
504 }
505 visitedPHIs.insert(inst);
506
507 for (unsigned int i = 0; i < inst->getNumIncomingValues(); ++i)
508 {
509 // All phi paths must be trace-able and trace back to the same source
510 Value* phiVal = inst->getIncomingValue(i);
511 std::vector<Value*> splitList;
512 Value* phiSrcPtr = TracePointerSource(phiVal, true, enablePhiLoops, fillList, splitList, visitedPHIs);
513 if (phiSrcPtr == nullptr)
514 {
515 // Incoming value not trace-able, bail out.
516 return nullptr;
517 }
518 else if (isa<PHINode>(phiSrcPtr) && phiSrcPtr == baseValue)
519 {
520 // Found a loop in one of the phi paths. We can still trace as long as all the other paths match
521 if (enablePhiLoops)
522 continue;
523 else
524 return nullptr;
525 }
526 else if (srcPtr == nullptr)
527 {
528 // Found a path to the source pointer. We only save the instructions used in this path
529 srcPtr = phiSrcPtr;
530 instList.insert(instList.end(), splitList.begin(), splitList.end());
531 }
532 else if (!ResourcePointersEq(srcPtr , phiSrcPtr))
533 {
534 // The source pointers have diverged. Bail out.
535 return nullptr;
536 }
537 }
538 break;
539 }
540 else if (SelectInst * inst = dyn_cast<SelectInst>(baseValue))
541 {
542 if (hasBranching)
543 {
544 // only allow a single branching instruction to be supported for now
545 // if both select and PHI are present, or there are multiples of each, we bail
546 break;
547 }
548 // Trace both operands of the select instruction. Both have to be traced back to the same
549 // source pointer, otherwise we can't determine which one to use.
550 Value* selectSrc0 = TracePointerSource(inst->getOperand(1), true, enablePhiLoops, fillList, instList, visitedPHIs);
551 Value* selectSrc1 = TracePointerSource(inst->getOperand(2), true, enablePhiLoops, false, instList, visitedPHIs);
552 if (selectSrc0 && selectSrc1 &&
553 ResourcePointersEq(selectSrc0, selectSrc1))
554 {
555 srcPtr = selectSrc0;
556 break;
557 }
558 return nullptr;
559 }
560 else if (LoadInst* inst = dyn_cast<LoadInst>(baseValue))
561 {
562 if (inst->getPointerAddressSpace() == 0)
563 {
564 // May be local array of resources:
565 baseValue = inst->getPointerOperand();
566 }
567 else
568 {
569 break;
570 }
571 }
572 else
573 {
574 // Unsupported instruction in search chain. Don't continue.
575 break;
576 }
577 }
578 return srcPtr;
579 }
580
581 ///
582 /// Only trace the GetBufferPtr instruction (ignore GetElementPtr)
583 ///
TracePointerSource(Value * resourcePtr)584 Value* TracePointerSource(Value* resourcePtr)
585 {
586 std::vector<Value*> tempList; //unused
587 llvm::SmallSet<PHINode*, 8> visitedPHIs;
588 return TracePointerSource(resourcePtr, false, true, false, tempList, visitedPHIs);
589 }
590
TracePointerSource(Value * resourcePtr,bool hasBranching,bool enablePhiLoops,bool fillList,std::vector<Value * > & instList)591 Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList, std::vector<Value*>& instList)
592 {
593 llvm::SmallSet<PHINode*, 8> visitedPHIs;
594 return TracePointerSource(resourcePtr, hasBranching, enablePhiLoops, fillList, instList, visitedPHIs);
595 }
596
getDefaultAccessType(BufferType bufTy)597 BufferAccessType getDefaultAccessType(BufferType bufTy)
598 {
599 switch (bufTy)
600 {
601 case BufferType::CONSTANT_BUFFER:
602 case BufferType::RESOURCE:
603 case BufferType::BINDLESS_TEXTURE:
604 case BufferType::BINDLESS_CONSTANT_BUFFER:
605 case BufferType::STATELESS_READONLY:
606 case BufferType::SAMPLER:
607 return BufferAccessType::ACCESS_READ;
608
609 case BufferType::UAV:
610 case BufferType::SLM:
611 case BufferType::POINTER:
612 case BufferType::BINDLESS:
613 case BufferType::STATELESS:
614 return BufferAccessType::ACCESS_READWRITE;
615
616 case BufferType::RENDER_TARGET:
617 return BufferAccessType::ACCESS_WRITE;
618
619 default:
620 IGC_ASSERT_MESSAGE(0, "Invalid buffer type");
621 return BufferAccessType::ACCESS_READWRITE;
622 }
623 }
624
GetResourcePointerInfo(Value * srcPtr,unsigned & resID,IGC::BufferType & resTy,BufferAccessType & accessTy,bool & needBufferOffset)625 bool GetResourcePointerInfo(Value* srcPtr, unsigned& resID, IGC::BufferType& resTy, BufferAccessType& accessTy, bool& needBufferOffset)
626 {
627 accessTy = BufferAccessType::ACCESS_READWRITE;
628 needBufferOffset = false;
629 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(srcPtr))
630 {
631 // For bindless pointers with encoded metadata
632 if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
633 {
634 if (inst->hasOperandBundles())
635 {
636 auto resIDBundle = inst->getOperandBundle("resID");
637 auto resTyBundle = inst->getOperandBundle("resTy");
638 auto accessTyBundle = inst->getOperandBundle("accessTy");
639 auto needBufferOffsetBundle = inst->getOperandBundle("needBufferOffset");
640 if (resIDBundle && resTyBundle)
641 {
642 resID = (unsigned)(cast<ConstantInt>(resIDBundle->Inputs.front()))->getZExtValue();
643 resTy = (BufferType)(cast<ConstantInt>(resTyBundle->Inputs.front()))->getZExtValue();
644
645 if (accessTyBundle)
646 accessTy = (BufferAccessType)(cast<ConstantInt>(accessTyBundle->Inputs.front()))->getZExtValue();
647 else
648 accessTy = getDefaultAccessType(resTy);
649
650 if(needBufferOffsetBundle)
651 needBufferOffset = (bool)(cast<ConstantInt>(needBufferOffsetBundle->Inputs.front()))->getZExtValue();
652
653 return true;
654 }
655 }
656 }
657 // For GetBufferPtr instructions with buffer info in the operands
658 else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr)
659 {
660 Value* bufIdV = inst->getOperand(0);
661 Value* bufTyV = inst->getOperand(1);
662 if (isa<ConstantInt>(bufIdV) && isa<ConstantInt>(bufTyV))
663 {
664 resID = (unsigned)(cast<ConstantInt>(bufIdV)->getZExtValue());
665 resTy = (IGC::BufferType)(cast<ConstantInt>(bufTyV)->getZExtValue());
666 accessTy = getDefaultAccessType(resTy);
667 return true;
668 }
669 }
670 }
671 return false;
672 }
673
674 // Get GRF offset from GenISA_RuntimeValue intrinsic call
GetGRFOffsetFromRTV(Value * pointerSrc,unsigned & GRFOffset)675 bool GetGRFOffsetFromRTV(Value* pointerSrc, unsigned& GRFOffset)
676 {
677 if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(pointerSrc))
678 {
679 // For bindless pointers with encoded metadata
680 if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
681 {
682 GRFOffset = (unsigned)llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue();
683 return true;
684 }
685 }
686 return false;
687 }
688
GetStatelessBufferInfo(Value * pointer,unsigned & bufIdOrGRFOffset,BufferType & bufferTy,Value * & bufferSrcPtr,bool & isDirectBuf)689 bool GetStatelessBufferInfo(Value* pointer, unsigned& bufIdOrGRFOffset,
690 BufferType & bufferTy, Value*& bufferSrcPtr, bool& isDirectBuf)
691 {
692 isDirectBuf = false;
693 // If the buffer info is not encoded in the address space, we can still find it by
694 // tracing the pointer to where it's created.
695 Value * src = IGC::TracePointerSource(pointer);
696 BufferAccessType accType;
697 bool needBufferOffset; // Unused
698 if (!src) return false;
699 if (IGC::GetResourcePointerInfo(src, bufIdOrGRFOffset, bufferTy, accType, needBufferOffset))
700 {
701 bufferSrcPtr = src;
702 isDirectBuf = true;
703 return true;
704 }
705 else if (GetGRFOffsetFromRTV(src, bufIdOrGRFOffset))
706 {
707 bufferSrcPtr = src;
708 bufferTy = BUFFER_TYPE_UNKNOWN;
709 return true;
710 }
711 return false;
712 }
713
EvalConstantAddress(Value * address,unsigned int & offset,const llvm::DataLayout * pDL,Value * ptrSrc)714 bool EvalConstantAddress(Value* address, unsigned int& offset, const llvm::DataLayout* pDL, Value* ptrSrc)
715 {
716
717 if ((ptrSrc == nullptr && isa<ConstantPointerNull>(address)) ||
718 (ptrSrc == address))
719 {
720 offset = 0;
721 return true;
722 }
723 else if(ConstantInt* eltIdx = dyn_cast<ConstantInt>(address))
724 {
725 offset = int_cast<int>(eltIdx->getZExtValue());
726 return true;
727 }
728 else if (ConstantExpr * ptrExpr = dyn_cast<ConstantExpr>(address))
729 {
730 if (ptrExpr->getOpcode() == Instruction::IntToPtr)
731 {
732 Value* eltIdxVal = ptrExpr->getOperand(0);
733 ConstantInt* eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
734 if (!eltIdx)
735 return false;
736 offset = int_cast<int>(eltIdx->getZExtValue());
737 return true;
738 }
739 }
740 else if (Instruction* ptrExpr = dyn_cast<Instruction>(address))
741 {
742 if (ptrExpr->getOpcode() == Instruction::BitCast ||
743 ptrExpr->getOpcode() == Instruction::AddrSpaceCast)
744 {
745 return EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc);
746 }
747 if (ptrExpr->getOpcode() == Instruction::IntToPtr)
748 {
749 Value * eltIdxVal = ptrExpr->getOperand(0);
750 ConstantInt * eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
751 if (!eltIdx)
752 return false;
753 offset = int_cast<int>(eltIdx->getZExtValue());
754 return true;
755 }
756 if (ptrExpr->getOpcode() == Instruction::PtrToInt)
757 {
758 offset = 0;
759 if (!EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc))
760 {
761 return false;
762 }
763 return true;
764 }
765 else if (ptrExpr->getOpcode() == Instruction::GetElementPtr)
766 {
767 offset = 0;
768 if (!EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc))
769 {
770 return false;
771 }
772 Type * Ty = ptrExpr->getType();
773 gep_type_iterator GTI = gep_type_begin(ptrExpr);
774 for (auto OI = ptrExpr->op_begin() + 1, E = ptrExpr->op_end(); OI != E; ++OI, ++GTI) {
775 Value * Idx = *OI;
776 if (StructType * StTy = GTI.getStructTypeOrNull()) {
777 unsigned Field = int_cast<unsigned>(cast<ConstantInt>(Idx)->getZExtValue());
778 if (Field) {
779 offset += int_cast<int>(pDL->getStructLayout(StTy)->getElementOffset(Field));
780 }
781 Ty = StTy->getElementType(Field);
782 }
783 else {
784 Ty = GTI.getIndexedType();
785 if (const ConstantInt * CI = dyn_cast<ConstantInt>(Idx)) {
786 offset += int_cast<int>(
787 pDL->getTypeAllocSize(Ty) * CI->getSExtValue());
788
789 }
790 else
791 {
792 return false;
793 }
794 }
795 }
796 return true;
797 }
798 }
799 return false;
800 }
801
802 // Get constant address from load/ldraw instruction
getConstantAddress(llvm::Instruction & I,ConstantAddress & cl,CodeGenContext * pContext,bool & directBuf,bool & statelessBuf,bool & bindlessBuf)803 bool getConstantAddress(llvm::Instruction& I, ConstantAddress& cl, CodeGenContext* pContext, bool& directBuf, bool& statelessBuf, bool& bindlessBuf)
804 {
805 // Check if the load instruction is with constant buffer address
806 unsigned as;
807 Value* ptrVal;
808 Value* offsetVal;
809 directBuf = false;
810 statelessBuf = false;
811 bindlessBuf = false;
812 bool isPushableAddr = false;
813 unsigned int& bufIdOrGRFOffset = cl.bufId;
814 unsigned int& eltId = cl.eltId;
815 unsigned int& size_in_bytes = cl.size;
816 const llvm::DataLayout DL = pContext->getModule()->getDataLayout();
817
818 // Only load and ldRaw instructions handled, rest should return
819 if (LoadInst* load = llvm::dyn_cast<LoadInst> (&I))
820 {
821 as = load->getPointerAddressSpace();
822 ptrVal = load->getPointerOperand();
823 offsetVal = ptrVal;
824 statelessBuf = (as == ADDRESS_SPACE_CONSTANT);
825 }
826 else if (LdRawIntrinsic* ldRaw = dyn_cast<LdRawIntrinsic>(&I))
827 {
828 as = ldRaw->getResourceValue()->getType()->getPointerAddressSpace();
829 ptrVal = ldRaw->getResourceValue();
830 offsetVal = ldRaw->getOffsetValue();
831 bindlessBuf = (DecodeBufferType(as) == SSH_BINDLESS_CONSTANT_BUFFER);
832 }
833 else
834 return false;
835
836 size_in_bytes = 0;
837 BufferType bufType;
838 Value* pointerSrc = nullptr;
839
840 if (statelessBuf || bindlessBuf)
841 {
842 // If the buffer info is not encoded in the address space, we can still find it by
843 // tracing the pointer to where it's created.
844 if (!GetStatelessBufferInfo(ptrVal, bufIdOrGRFOffset, bufType, pointerSrc, directBuf))
845 {
846 return false;
847 }
848 if (!directBuf)
849 {
850 // Make sure constant folding is safe by looking up in pushableAddresses
851 PushInfo& pushInfo = pContext->getModuleMetaData()->pushInfo;
852
853 for (auto it : pushInfo.pushableAddresses)
854 {
855 if ((bufIdOrGRFOffset * 4 == it.addressOffset) && (IGC_IS_FLAG_ENABLED(DisableStaticCheckForConstantFolding) || it.isStatic))
856 {
857 isPushableAddr = true;
858 break;
859 }
860 }
861 }
862 }
863 else
864 {
865 bufType = IGC::DecodeAS4GFXResource(as, directBuf, bufIdOrGRFOffset);
866 }
867 // If it is statelessBuf, we made sure it is a constant buffer by finding it in pushableAddresses
868 if ((directBuf && (bufType == CONSTANT_BUFFER)) ||
869 (isPushableAddr && (statelessBuf || bindlessBuf)))
870 {
871 eltId = 0;
872 if (!EvalConstantAddress(offsetVal, eltId, &DL, pointerSrc))
873 {
874 return false;
875 }
876 }
877 else
878 {
879 return false;
880 }
881 size_in_bytes = (unsigned int)I.getType()->getPrimitiveSizeInBits() / 8;
882 return true;
883 }
884
885
886 ///
887 /// Replaces oldPtr with newPtr in a sample/ld intrinsic's argument list. The new instrinsic will
888 /// replace the old one in the module
889 ///
ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst * & pIntr,llvm::Value * oldPtr,llvm::Value * newPtr)890 void ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst*& pIntr, llvm::Value* oldPtr, llvm::Value* newPtr)
891 {
892 llvm::Module* pModule = pIntr->getParent()->getParent()->getParent();
893 llvm::Function* pCalledFunc = pIntr->getCalledFunction();
894
895 // Look at the intrinsic and figure out which pointer to change
896 int num_ops = pIntr->getNumArgOperands();
897 llvm::SmallVector<llvm::Value*, 5> args;
898
899 for (int i = 0; i < num_ops; ++i)
900 {
901 if (pIntr->getArgOperand(i) == oldPtr)
902 args.push_back(newPtr);
903 else
904 args.push_back(pIntr->getArgOperand(i));
905 }
906
907 llvm::Function* pNewIntr = nullptr;
908 llvm::SmallVector<llvm::Type*, 4> overloadedTys;
909 GenISAIntrinsic::ID id = pIntr->getIntrinsicID();
910 switch (id)
911 {
912 case llvm::GenISAIntrinsic::GenISA_ldmcsptr:
913 {
914 llvm::Value* pTextureValue = cast<SamplerLoadIntrinsic>(pIntr)->getTextureValue();
915 overloadedTys.push_back(pCalledFunc->getReturnType());
916 overloadedTys.push_back(args[0]->getType());
917 overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
918 break;
919 }
920 case llvm::GenISAIntrinsic::GenISA_ldptr:
921 case llvm::GenISAIntrinsic::GenISA_ldmsptr:
922 {
923 llvm::Value* pTextureValue = cast<SamplerLoadIntrinsic>(pIntr)->getTextureValue();
924 overloadedTys.push_back(pCalledFunc->getReturnType());
925 overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
926 break;
927 }
928 case llvm::GenISAIntrinsic::GenISA_resinfoptr:
929 case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
930 case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
931 overloadedTys.push_back(newPtr->getType());
932 break;
933 case llvm::GenISAIntrinsic::GenISA_sampleptr:
934 case llvm::GenISAIntrinsic::GenISA_sampleBptr:
935 case llvm::GenISAIntrinsic::GenISA_sampleCptr:
936 case llvm::GenISAIntrinsic::GenISA_sampleDptr:
937 case llvm::GenISAIntrinsic::GenISA_sampleLptr:
938 case llvm::GenISAIntrinsic::GenISA_sampleBCptr:
939 case llvm::GenISAIntrinsic::GenISA_sampleDCptr:
940 case llvm::GenISAIntrinsic::GenISA_sampleLCptr:
941 case llvm::GenISAIntrinsic::GenISA_gather4ptr:
942 case llvm::GenISAIntrinsic::GenISA_gather4POptr:
943 case llvm::GenISAIntrinsic::GenISA_gather4Cptr:
944 case llvm::GenISAIntrinsic::GenISA_gather4POCptr:
945 case llvm::GenISAIntrinsic::GenISA_lodptr:
946 {
947 // Figure out the intrinsic operands for texture & sampler
948 llvm::Value* pTextureValue = nullptr;
949 llvm::Value* pSamplerValue = nullptr;
950 IGC::getTextureAndSamplerOperands(
951 pIntr,
952 pTextureValue,
953 pSamplerValue);
954
955 overloadedTys.push_back(pCalledFunc->getReturnType());
956 overloadedTys.push_back(pIntr->getOperand(0)->getType());
957 overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
958 if (pSamplerValue != nullptr)
959 {
960 // Samplerless messages will not have sampler in signature.
961 overloadedTys.push_back(pSamplerValue == oldPtr ? newPtr->getType() : pSamplerValue->getType());
962 }
963 break;
964 }
965 case llvm::GenISAIntrinsic::GenISA_typedread:
966 case llvm::GenISAIntrinsic::GenISA_typedwrite:
967 case llvm::GenISAIntrinsic::GenISA_ldstructured:
968 case llvm::GenISAIntrinsic::GenISA_storestructured1:
969 case llvm::GenISAIntrinsic::GenISA_storestructured2:
970 case llvm::GenISAIntrinsic::GenISA_storestructured3:
971 case llvm::GenISAIntrinsic::GenISA_storestructured4:
972 overloadedTys.push_back(newPtr->getType());
973 break;
974 case llvm::GenISAIntrinsic::GenISA_intatomicraw:
975 case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw:
976 case llvm::GenISAIntrinsic::GenISA_intatomicrawA64:
977 case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
978 case llvm::GenISAIntrinsic::GenISA_floatatomicraw:
979 case llvm::GenISAIntrinsic::GenISA_floatatomicrawA64:
980 case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
981 case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
982 overloadedTys.push_back(pIntr->getType());
983 overloadedTys.push_back(newPtr->getType());
984 if (id == GenISAIntrinsic::GenISA_intatomicrawA64)
985 {
986 args[0] = args[1];
987 args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
988 id = GenISAIntrinsic::GenISA_intatomicraw;
989 }
990 else if (id == GenISAIntrinsic::GenISA_icmpxchgatomicrawA64)
991 {
992 args[0] = args[1];
993 args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
994 id = GenISAIntrinsic::GenISA_icmpxchgatomicraw;
995 }
996 else if (id == GenISAIntrinsic::GenISA_floatatomicrawA64)
997 {
998 args[0] = args[1];
999 args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
1000 id = GenISAIntrinsic::GenISA_floatatomicraw;
1001 }
1002 else if (id == GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64)
1003 {
1004 args[0] = args[1];
1005 args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
1006 id = GenISAIntrinsic::GenISA_fcmpxchgatomicraw;
1007 }
1008 break;
1009 case llvm::GenISAIntrinsic::GenISA_dwordatomicstructured:
1010 case llvm::GenISAIntrinsic::GenISA_floatatomicstructured:
1011 case llvm::GenISAIntrinsic::GenISA_cmpxchgatomicstructured:
1012 case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicstructured:
1013 overloadedTys.push_back(pIntr->getType());
1014 overloadedTys.push_back(args[0]->getType());
1015 break;
1016 case GenISAIntrinsic::GenISA_intatomictyped:
1017 case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
1018 overloadedTys.push_back(pIntr->getType());
1019 overloadedTys.push_back(newPtr->getType());
1020 break;
1021 case GenISAIntrinsic::GenISA_atomiccounterinc:
1022 case GenISAIntrinsic::GenISA_atomiccounterpredec:
1023 overloadedTys.push_back(args[0]->getType());
1024 break;
1025 case llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed:
1026 case llvm::GenISAIntrinsic::GenISA_ldraw_indexed:
1027 overloadedTys.push_back(pCalledFunc->getReturnType());
1028 overloadedTys.push_back(newPtr->getType());
1029 break;
1030 case llvm::GenISAIntrinsic::GenISA_storerawvector_indexed:
1031 case llvm::GenISAIntrinsic::GenISA_storeraw_indexed:
1032 overloadedTys.push_back(newPtr->getType());
1033 overloadedTys.push_back(args[2]->getType());
1034 break;
1035 default:
1036 IGC_ASSERT_MESSAGE(0, "Unknown intrinsic encountered while changing pointer types");
1037 break;
1038 }
1039
1040 pNewIntr = llvm::GenISAIntrinsic::getDeclaration(
1041 pModule,
1042 id,
1043 overloadedTys);
1044
1045 llvm::CallInst* pNewCall = llvm::CallInst::Create(pNewIntr, args, "", pIntr);
1046 pNewCall->setDebugLoc(pIntr->getDebugLoc());
1047
1048 pIntr->replaceAllUsesWith(pNewCall);
1049 pIntr->eraseFromParent();
1050
1051 pIntr = llvm::cast<llvm::GenIntrinsicInst>(pNewCall);
1052 }
1053
1054 ///
1055 /// Returns the sampler/texture pointers for resource access intrinsics
1056 ///
getTextureAndSamplerOperands(llvm::GenIntrinsicInst * pIntr,llvm::Value * & pTextureValue,llvm::Value * & pSamplerValue)1057 void getTextureAndSamplerOperands(
1058 llvm::GenIntrinsicInst* pIntr,
1059 llvm::Value*& pTextureValue,
1060 llvm::Value*& pSamplerValue)
1061 {
1062 if (llvm::SamplerLoadIntrinsic * pSamplerLoadInst = llvm::dyn_cast<llvm::SamplerLoadIntrinsic>(pIntr))
1063 {
1064 pTextureValue = pSamplerLoadInst->getTextureValue();
1065 pSamplerValue = nullptr;
1066 }
1067 else if (llvm::SampleIntrinsic * pSampleInst = llvm::dyn_cast<llvm::SampleIntrinsic>(pIntr))
1068 {
1069 pTextureValue = pSampleInst->getTextureValue();
1070 pSamplerValue = pSampleInst->getSamplerValue();
1071 }
1072 else if (llvm::SamplerGatherIntrinsic * pGatherInst = llvm::dyn_cast<llvm::SamplerGatherIntrinsic>(pIntr))
1073 {
1074 pTextureValue = pGatherInst->getTextureValue();
1075 pSamplerValue = pGatherInst->getSamplerValue();
1076 }
1077 else
1078 {
1079 pTextureValue = nullptr;
1080 pSamplerValue = nullptr;
1081 switch (pIntr->getIntrinsicID())
1082 {
1083 case llvm::GenISAIntrinsic::GenISA_resinfoptr:
1084 case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
1085 case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
1086 case llvm::GenISAIntrinsic::GenISA_typedwrite:
1087 case llvm::GenISAIntrinsic::GenISA_typedread:
1088 pTextureValue = pIntr->getOperand(0);
1089 break;
1090 default:
1091 break;
1092 }
1093 }
1094 }
1095
1096 // Get the buffer pointer operand for supported buffer access instructions
GetBufferOperand(Instruction * inst)1097 Value* GetBufferOperand(Instruction* inst)
1098 {
1099 Value* pBuffer = nullptr;
1100 if (LoadInst * load = dyn_cast<LoadInst>(inst))
1101 {
1102 pBuffer = load->getPointerOperand();
1103 }
1104 else if (StoreInst * store = dyn_cast<StoreInst>(inst))
1105 {
1106 pBuffer = store->getPointerOperand();
1107 }
1108 else if (GenIntrinsicInst * intr = dyn_cast<GenIntrinsicInst>(inst))
1109 {
1110 switch (intr->getIntrinsicID())
1111 {
1112 case GenISAIntrinsic::GenISA_storerawvector_indexed:
1113 case GenISAIntrinsic::GenISA_ldrawvector_indexed:
1114 case GenISAIntrinsic::GenISA_storeraw_indexed:
1115 case GenISAIntrinsic::GenISA_ldraw_indexed:
1116 case GenISAIntrinsic::GenISA_intatomicraw:
1117 case GenISAIntrinsic::GenISA_intatomictyped:
1118 case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
1119 case GenISAIntrinsic::GenISA_floatatomicraw:
1120 case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
1121 case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
1122 case GenISAIntrinsic::GenISA_simdBlockRead:
1123 case GenISAIntrinsic::GenISA_simdBlockWrite:
1124 pBuffer = intr->getOperand(0);
1125 break;
1126 case GenISAIntrinsic::GenISA_intatomicrawA64:
1127 case GenISAIntrinsic::GenISA_floatatomicrawA64:
1128 case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
1129 case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
1130 pBuffer = intr->getOperand(1);
1131 break;
1132 default:
1133 break;
1134 }
1135 }
1136 return pBuffer;
1137 }
1138
GetOpCode(const llvm::Instruction * inst)1139 EOPCODE GetOpCode(const llvm::Instruction* inst)
1140 {
1141 if (const GenIntrinsicInst * CI = dyn_cast<GenIntrinsicInst>(inst))
1142 {
1143 unsigned ID = CI->getIntrinsicID();
1144 return (EOPCODE)(OPCODE(ID, e_Intrinsic));
1145 }
1146 else if (const IntrinsicInst * CI = llvm::dyn_cast<llvm::IntrinsicInst>(inst))
1147 {
1148 unsigned ID = CI->getIntrinsicID();
1149 return (EOPCODE)(OPCODE(ID, e_Intrinsic));
1150 }
1151 return (EOPCODE)(OPCODE(inst->getOpcode(), e_Instruction));
1152 }
1153
GetBufferType(uint addrSpace)1154 BufferType GetBufferType(uint addrSpace)
1155 {
1156 bool directIndexing = false;
1157 unsigned int bufId = 0;
1158 return DecodeAS4GFXResource(addrSpace, directIndexing, bufId);
1159 }
1160
IsReadOnlyLoadDirectCB(llvm::Instruction * pLLVMInst,uint & cbId,llvm::Value * & eltPtrVal,BufferType & bufType)1161 bool IsReadOnlyLoadDirectCB(llvm::Instruction* pLLVMInst,
1162 uint& cbId, llvm::Value*& eltPtrVal, BufferType& bufType)
1163 {
1164 LoadInst* inst = dyn_cast<LoadInst>(pLLVMInst);
1165 if (!inst)
1166 {
1167 return false;
1168 }
1169 bool isInvLoad = inst->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
1170 unsigned as = inst->getPointerAddressSpace();
1171 bool directBuf;
1172 // cbId gets filled in the following call;
1173 bufType = IGC::DecodeAS4GFXResource(as, directBuf, cbId);
1174 if ((bufType == CONSTANT_BUFFER || bufType == RESOURCE || isInvLoad) && directBuf)
1175 {
1176 Value* ptrVal = inst->getPointerOperand();
1177 // skip bitcast and find the real address computation
1178 while (isa<BitCastInst>(ptrVal))
1179 {
1180 ptrVal = cast<BitCastInst>(ptrVal)->getOperand(0);
1181 }
1182 if (isa<ConstantPointerNull>(ptrVal) ||
1183 isa<IntToPtrInst>(ptrVal) ||
1184 isa<GetElementPtrInst>(ptrVal) ||
1185 isa<ConstantExpr>(ptrVal) ||
1186 isa<LoadInst>(ptrVal) ||
1187 isa<Argument>(ptrVal))
1188 {
1189 eltPtrVal = ptrVal;
1190 return true;
1191 }
1192 }
1193 return false;
1194 }
1195
IsLoadFromDirectCB(llvm::Instruction * pLLVMInst,uint & cbId,llvm::Value * & eltPtrVal)1196 bool IsLoadFromDirectCB(llvm::Instruction* pLLVMInst, uint& cbId, llvm::Value*& eltPtrVal)
1197 {
1198 BufferType bufType = BUFFER_TYPE_UNKNOWN;
1199 bool isReadOnly = IsReadOnlyLoadDirectCB(pLLVMInst, cbId, eltPtrVal, bufType);
1200 return isReadOnly && bufType == CONSTANT_BUFFER;
1201 }
1202
1203 /// this is texture-load not buffer-load
isLdInstruction(llvm::Instruction * inst)1204 bool isLdInstruction(llvm::Instruction* inst)
1205 {
1206 return isa<SamplerLoadIntrinsic>(inst);
1207 }
1208
1209 // function returns the position of the texture operand for sample/ld instructions
getTextureIndexArgBasedOnOpcode(llvm::Instruction * inst)1210 llvm::Value* getTextureIndexArgBasedOnOpcode(llvm::Instruction* inst)
1211 {
1212 if (isLdInstruction(inst))
1213 {
1214 return cast<SamplerLoadIntrinsic>(inst)->getTextureValue();
1215 }
1216 else if (isSampleInstruction(inst))
1217 {
1218 return cast<SampleIntrinsic>(inst)->getTextureValue();
1219 }
1220 else if (isGather4Instruction(inst))
1221 {
1222 return cast<SamplerGatherIntrinsic>(inst)->getTextureValue();
1223 }
1224
1225 return nullptr;
1226 }
1227
findSampleInstructionTextureIdx(llvm::Instruction * inst)1228 int findSampleInstructionTextureIdx(llvm::Instruction* inst)
1229 {
1230 // fetch the textureArgIdx.
1231 Value* ptr = getTextureIndexArgBasedOnOpcode(inst);
1232 unsigned textureIdx = -1;
1233
1234 if (ptr && ptr->getType()->isPointerTy())
1235 {
1236 BufferType bufType = BUFFER_TYPE_UNKNOWN;
1237 if (!(isa<GenIntrinsicInst>(ptr) &&
1238 cast<GenIntrinsicInst>(ptr)->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
1239 {
1240 uint as = ptr->getType()->getPointerAddressSpace();
1241 bool directIndexing;
1242 bufType = DecodeAS4GFXResource(as, directIndexing, textureIdx);
1243 if (bufType == UAV)
1244 {
1245 // dont do any clustering on read/write images
1246 textureIdx = -1;
1247 }
1248 }
1249 }
1250 else if (ptr)
1251 {
1252 if (llvm::dyn_cast<llvm::ConstantInt>(ptr))
1253 {
1254 textureIdx = int_cast<unsigned>(GetImmediateVal(ptr));
1255 }
1256 }
1257
1258 return textureIdx;
1259 }
1260
isSampleLoadGather4InfoInstruction(llvm::Instruction * inst)1261 bool isSampleLoadGather4InfoInstruction(llvm::Instruction* inst)
1262 {
1263 if (isa<GenIntrinsicInst>(inst))
1264 {
1265 switch ((cast<GenIntrinsicInst>(inst))->getIntrinsicID())
1266 {
1267 case GenISAIntrinsic::GenISA_sampleptr:
1268 case GenISAIntrinsic::GenISA_sampleBptr:
1269 case GenISAIntrinsic::GenISA_sampleCptr:
1270 case GenISAIntrinsic::GenISA_sampleDptr:
1271 case GenISAIntrinsic::GenISA_sampleDCptr:
1272 case GenISAIntrinsic::GenISA_sampleLptr:
1273 case GenISAIntrinsic::GenISA_sampleLCptr:
1274 case GenISAIntrinsic::GenISA_sampleBCptr:
1275 case GenISAIntrinsic::GenISA_lodptr:
1276 case GenISAIntrinsic::GenISA_ldptr:
1277 case GenISAIntrinsic::GenISA_ldmsptr:
1278 case GenISAIntrinsic::GenISA_ldmsptr16bit:
1279 case GenISAIntrinsic::GenISA_ldmcsptr:
1280 case GenISAIntrinsic::GenISA_sampleinfoptr:
1281 case GenISAIntrinsic::GenISA_resinfoptr:
1282 case GenISAIntrinsic::GenISA_gather4ptr:
1283 case GenISAIntrinsic::GenISA_gather4Cptr:
1284 case GenISAIntrinsic::GenISA_gather4POptr:
1285 case GenISAIntrinsic::GenISA_gather4POCptr:
1286 return true;
1287 default:
1288 return false;
1289 }
1290 }
1291
1292 return false;
1293 }
1294
isSampleInstruction(llvm::Instruction * inst)1295 bool isSampleInstruction(llvm::Instruction* inst)
1296 {
1297 return isa<SampleIntrinsic>(inst);
1298 }
1299
isInfoInstruction(llvm::Instruction * inst)1300 bool isInfoInstruction(llvm::Instruction* inst)
1301 {
1302 return isa<InfoIntrinsic>(inst);
1303 }
1304
isGather4Instruction(llvm::Instruction * inst)1305 bool isGather4Instruction(llvm::Instruction* inst)
1306 {
1307 return isa<SamplerGatherIntrinsic>(inst);
1308 }
1309
IsMediaIOIntrinsic(llvm::Instruction * inst)1310 bool IsMediaIOIntrinsic(llvm::Instruction* inst)
1311 {
1312 if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
1313 {
1314 GenISAIntrinsic::ID id = pGI->getIntrinsicID();
1315
1316 return id == GenISAIntrinsic::GenISA_MediaBlockRead ||
1317 id == GenISAIntrinsic::GenISA_MediaBlockWrite;
1318 }
1319
1320 return false;
1321 }
1322
IsSIMDBlockIntrinsic(llvm::Instruction * inst)1323 bool IsSIMDBlockIntrinsic(llvm::Instruction* inst)
1324 {
1325 if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
1326 {
1327 GenISAIntrinsic::ID id = pGI->getIntrinsicID();
1328
1329 return id == GenISAIntrinsic::GenISA_simdBlockRead ||
1330 id == GenISAIntrinsic::GenISA_simdBlockWrite;
1331 }
1332
1333 return false;
1334 }
1335
isSubGroupIntrinsic(const llvm::Instruction * I)1336 bool isSubGroupIntrinsic(const llvm::Instruction* I)
1337 {
1338 const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
1339 if (!GII)
1340 return false;
1341
1342 switch (GII->getIntrinsicID())
1343 {
1344 case GenISAIntrinsic::GenISA_WaveShuffleIndex:
1345 case GenISAIntrinsic::GenISA_simdShuffleDown:
1346 case GenISAIntrinsic::GenISA_simdBlockRead:
1347 case GenISAIntrinsic::GenISA_simdBlockWrite:
1348 case GenISAIntrinsic::GenISA_simdMediaBlockRead:
1349 case GenISAIntrinsic::GenISA_simdMediaBlockWrite:
1350 case GenISAIntrinsic::GenISA_MediaBlockWrite:
1351 case GenISAIntrinsic::GenISA_MediaBlockRead:
1352 return true;
1353 default:
1354 return false;
1355 }
1356 }
1357
isURBWriteIntrinsic(const llvm::Instruction * I)1358 bool isURBWriteIntrinsic(const llvm::Instruction* I)
1359 {
1360 const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
1361 if (!GII)
1362 return false;
1363
1364 return GII->getIntrinsicID() == GenISA_URBWrite;
1365
1366 }
1367
AdjustSystemValueCall(llvm::GenIntrinsicInst * inst)1368 llvm::Instruction* AdjustSystemValueCall(llvm::GenIntrinsicInst* inst)
1369 {
1370 IGC_ASSERT(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_SystemValue);
1371 llvm::Module* pModule = inst->getParent()->getParent()->getParent();
1372 auto CommonConvertFunc = [pModule](llvm::GenIntrinsicInst* inst, llvm::Type* outputType)
1373 {
1374 IGC_ASSERT(outputType->isVectorTy() == false);
1375 IGC_ASSERT(inst->getType()->isVectorTy() == false);
1376 llvm::Instruction* result = inst;
1377 if (inst->getType() != outputType)
1378 {
1379 llvm::IRBuilder<> builder(inst);
1380 llvm::Function* systemValueFunc = llvm::GenISAIntrinsic::getDeclaration(pModule, GenISAIntrinsic::GenISA_DCL_SystemValue, outputType);
1381 llvm::Instruction* sgv = builder.CreateCall(systemValueFunc, inst->getOperand(0));
1382 // a default system value intrinsic function returns a float value. The returned value is bit casted to an appropriate integer or floating point value
1383 // in reference to HW specification. Casting from floating point to integer and in the opposite direction is not expected.
1384 sgv = llvm::cast<llvm::Instruction>(builder.CreateZExtOrTrunc(sgv, builder.getIntNTy((unsigned int)inst->getType()->getPrimitiveSizeInBits())));
1385 sgv = llvm::cast<llvm::Instruction>(builder.CreateBitCast(sgv, inst->getType()));
1386 inst->replaceAllUsesWith(sgv);
1387 inst->eraseFromParent();
1388 result = sgv;
1389 }
1390 return result;
1391 };
1392
1393 SGVUsage usage = static_cast<SGVUsage>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
1394 llvm::Instruction* result = inst;
1395
1396 switch (usage)
1397 {
1398 case THREAD_ID_IN_GROUP_X:
1399 case THREAD_ID_IN_GROUP_Y:
1400 case THREAD_ID_IN_GROUP_Z:
1401 result = CommonConvertFunc(inst, llvm::IntegerType::get(pModule->getContext(), 16));
1402 break;
1403 default:
1404 break;
1405 }
1406 return result;
1407 }
1408
1409 bool isReadInput(llvm::Instruction* pLLVMInstr);
1410
1411 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1412 case name:\
1413 return modifiers;
SupportsModifier(llvm::Instruction * inst)1414 bool SupportsModifier(llvm::Instruction* inst)
1415 {
1416 // Special cases
1417 switch (inst->getOpcode())
1418 {
1419 case Instruction::ICmp:
1420 {
1421 // icmp supports modifier unless it is unsigned
1422 CmpInst* cmp = cast<ICmpInst>(inst);
1423 return !cmp->isUnsigned();
1424 }
1425 case Instruction::Mul:
1426 // integer mul supports modifier if not int64.
1427 return !inst->getType()->isIntegerTy(64);
1428 case Instruction::URem:
1429 // neg mod is negative. Disable it as URem must have positive operands,
1430 return false;
1431 default:
1432 break;
1433 }
1434
1435 switch (GetOpCode(inst))
1436 {
1437 #include "opCode.h"
1438 default:
1439 return false;
1440 }
1441 }
1442 #undef DECLARE_OPCODE
1443
1444 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1445 case name:\
1446 return sat;
SupportsSaturate(llvm::Instruction * inst)1447 bool SupportsSaturate(llvm::Instruction* inst)
1448 {
1449 switch (GetOpCode(inst))
1450 {
1451 #include "opCode.h"
1452 default:
1453 break;
1454 }
1455 return false;
1456 }
1457 #undef DECLARE_OPCODE
1458
1459 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1460 case name:\
1461 return pred;
SupportsPredicate(llvm::Instruction * inst)1462 bool SupportsPredicate(llvm::Instruction* inst)
1463 {
1464 switch (GetOpCode(inst))
1465 {
1466 #include "opCode.h"
1467 default:
1468 return false;
1469 }
1470 }
1471 #undef DECLARE_OPCODE
1472
1473 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1474 case name:\
1475 return condMod;
SupportsCondModifier(llvm::Instruction * inst)1476 bool SupportsCondModifier(llvm::Instruction* inst)
1477 {
1478 switch (GetOpCode(inst))
1479 {
1480 #include "opCode.h"
1481 default:
1482 return false;
1483 }
1484 }
1485 #undef DECLARE_OPCODE
1486
1487 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1488 case name:\
1489 return regioning;
SupportsRegioning(llvm::Instruction * inst)1490 bool SupportsRegioning(llvm::Instruction* inst)
1491 {
1492 switch (GetOpCode(inst))
1493 {
1494 #include "opCode.h"
1495 default:
1496 break;
1497 }
1498 return false;
1499 }
1500 #undef DECLARE_OPCODE
1501
1502 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1503 case name:\
1504 return mathIntrinsic;
IsMathIntrinsic(EOPCODE opcode)1505 bool IsMathIntrinsic(EOPCODE opcode)
1506 {
1507 switch (opcode)
1508 {
1509 #include "opCode.h"
1510 default:
1511 return false;
1512 }
1513 }
1514 #undef DECLARE_OPCODE
1515
1516 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1517 case name:\
1518 return atomicIntrinsic;
IsAtomicIntrinsic(EOPCODE opcode)1519 bool IsAtomicIntrinsic(EOPCODE opcode)
1520 {
1521 switch (opcode)
1522 {
1523 #include "opCode.h"
1524 default:
1525 return false;
1526 }
1527 }
1528 #undef DECLARE_OPCODE
1529
IsExtendedMathInstruction(llvm::Instruction * Inst)1530 bool IsExtendedMathInstruction(llvm::Instruction* Inst)
1531 {
1532 EOPCODE opcode = GetOpCode(Inst);
1533 switch (opcode)
1534 {
1535 case llvm_fdiv:
1536 case llvm_sdiv:
1537 case llvm_udiv:
1538 case llvm_log:
1539 case llvm_exp:
1540 case llvm_sqrt:
1541 case llvm_sin:
1542 case llvm_cos:
1543 case llvm_pow:
1544 return true;
1545 default:
1546 return false;
1547 }
1548 return false;
1549 }
1550 // for now just include shuffle, reduce and scan,
1551 // which have simd32 implementations and should not be split into two instances
IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode)1552 bool IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode)
1553 {
1554 return (opcode == llvm_waveAll ||
1555 opcode == llvm_waveClustered ||
1556 opcode == llvm_wavePrefix ||
1557 opcode == llvm_waveShuffleIndex ||
1558 opcode == llvm_simdShuffleDown ||
1559 opcode == llvm_simdBlockRead||
1560 opcode == llvm_simdBlockReadBindless);
1561 }
1562
1563
IsGradientIntrinsic(EOPCODE opcode)1564 bool IsGradientIntrinsic(EOPCODE opcode)
1565 {
1566 return(opcode == llvm_gradientX ||
1567 opcode == llvm_gradientY ||
1568 opcode == llvm_gradientXfine ||
1569 opcode == llvm_gradientYfine);
1570 }
1571
IsStatelessMemLoadIntrinsic(llvm::GenISAIntrinsic::ID id)1572 bool IsStatelessMemLoadIntrinsic(llvm::GenISAIntrinsic::ID id)
1573 {
1574 switch(id)
1575 {
1576 case GenISAIntrinsic::GenISA_simdBlockRead:
1577 return true;
1578 default:
1579 break;
1580 }
1581 return false;
1582 }
1583
IsStatelessMemStoreIntrinsic(llvm::GenISAIntrinsic::ID id)1584 bool IsStatelessMemStoreIntrinsic(llvm::GenISAIntrinsic::ID id)
1585 {
1586 switch (id) {
1587 case GenISAIntrinsic::GenISA_simdBlockWrite:
1588 return true;
1589 default:
1590 break;
1591 }
1592 return false;
1593 }
1594
IsStatelessMemAtomicIntrinsic(GenIntrinsicInst & inst,GenISAIntrinsic::ID id)1595 bool IsStatelessMemAtomicIntrinsic(GenIntrinsicInst& inst, GenISAIntrinsic::ID id)
1596 {
1597 // This includes:
1598 // GenISA_intatomicraw
1599 // GenISA_floatatomicraw
1600 // GenISA_intatomicrawA64
1601 // GenISA_floatatomicrawA64
1602 // GenISA_icmpxchgatomicraw
1603 // GenISA_fcmpxchgatomicraw
1604 // GenISA_icmpxchgatomicrawA64
1605 // GenISA_fcmpxchgatomicrawA64
1606 if (IsAtomicIntrinsic(GetOpCode(&inst)))
1607 return true;
1608
1609 return false;
1610 }
1611
ComputesGradient(llvm::Instruction * inst)1612 bool ComputesGradient(llvm::Instruction* inst)
1613 {
1614 llvm::SampleIntrinsic* sampleInst = dyn_cast<llvm::SampleIntrinsic>(inst);
1615 if (sampleInst && sampleInst->IsDerivative())
1616 {
1617 return true;
1618 }
1619 if (IsGradientIntrinsic(GetOpCode(inst)))
1620 {
1621 return true;
1622 }
1623 return false;
1624 }
1625
getImmValueU32(const llvm::Value * value)1626 uint getImmValueU32(const llvm::Value* value)
1627 {
1628 const llvm::ConstantInt* cval = llvm::cast<llvm::ConstantInt>(value);
1629 IGC_ASSERT(nullptr != cval);
1630 IGC_ASSERT(cval->getBitWidth() == 32);
1631
1632 uint ival = int_cast<uint>(cval->getZExtValue());
1633 return ival;
1634 }
1635
getImmValueBool(const llvm::Value * value)1636 bool getImmValueBool(const llvm::Value* value)
1637 {
1638 const llvm::ConstantInt* cval = llvm::cast<llvm::ConstantInt>(value);
1639 IGC_ASSERT(nullptr != cval);
1640 IGC_ASSERT(cval->getBitWidth() == 1);
1641
1642 return cval->getValue().getBoolValue();
1643 }
1644
ExtractElementFromInsertChain(llvm::Value * inst,int pos)1645 llvm::Value* ExtractElementFromInsertChain(llvm::Value* inst, int pos)
1646 {
1647
1648 llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
1649 if (cstV != NULL) {
1650 return cstV->getElementAsConstant(pos);
1651 }
1652
1653 llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
1654 while (ie != NULL) {
1655 int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
1656 IGC_ASSERT(iOffset >= 0);
1657 if (iOffset == pos) {
1658 return ie->getOperand(1);
1659 }
1660 llvm::Value* insertBase = ie->getOperand(0);
1661 ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
1662 }
1663 return NULL;
1664 }
1665
ExtractVec4FromInsertChain(llvm::Value * inst,llvm::Value * elem[4],llvm::SmallVector<llvm::Instruction *,10> & instructionToRemove)1666 bool ExtractVec4FromInsertChain(llvm::Value* inst, llvm::Value* elem[4], llvm::SmallVector<llvm::Instruction*, 10> & instructionToRemove)
1667 {
1668 llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
1669 if (cstV != NULL) {
1670 IGC_ASSERT(cstV->getNumElements() == 4);
1671 for (int i = 0; i < 4; i++) {
1672 elem[i] = cstV->getElementAsConstant(i);
1673 }
1674 return true;
1675 }
1676
1677 for (int i = 0; i < 4; i++) {
1678 elem[i] = NULL;
1679 }
1680
1681 int count = 0;
1682 llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
1683 while (ie != NULL) {
1684 int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
1685 IGC_ASSERT(iOffset >= 0);
1686 if (elem[iOffset] == NULL) {
1687 elem[iOffset] = ie->getOperand(1);
1688 count++;
1689 if (ie->hasOneUse()) {
1690 instructionToRemove.push_back(ie);
1691 }
1692 }
1693 llvm::Value* insertBase = ie->getOperand(0);
1694 ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
1695 }
1696 return (count == 4);
1697 }
1698
VectorToElement(llvm::Value * inst,llvm::Value * elem[],llvm::Type * int32Ty,llvm::Instruction * insert_before,int vsize)1699 void VectorToElement(llvm::Value* inst, llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
1700 {
1701 for (int i = 0; i < vsize; i++) {
1702 if (elem[i] == nullptr) {
1703 // Create an ExtractElementInst
1704 elem[i] = llvm::ExtractElementInst::Create(inst, llvm::ConstantInt::get(int32Ty, i), "", insert_before);
1705 }
1706 }
1707 }
1708
ElementToVector(llvm::Value * elem[],llvm::Type * int32Ty,llvm::Instruction * insert_before,int vsize)1709 llvm::Value* ElementToVector(llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
1710 {
1711 llvm::VectorType* vt = IGCLLVM::FixedVectorType::get(elem[0]->getType(), vsize);
1712 llvm::Value* vecValue = llvm::UndefValue::get(vt);
1713
1714 for (int i = 0; i < vsize; ++i)
1715 {
1716
1717 vecValue = llvm::InsertElementInst::Create(vecValue, elem[i], llvm::ConstantInt::get(int32Ty, i), "", insert_before);
1718 ((Instruction*)vecValue)->setDebugLoc(insert_before->getDebugLoc());
1719 }
1720 return vecValue;
1721 }
1722
ConvertToFloat(llvm::IRBuilder<> & builder,llvm::Value * val)1723 llvm::Value* ConvertToFloat(llvm::IRBuilder<>& builder, llvm::Value* val)
1724 {
1725 llvm::Value* ret = val;
1726 llvm::Type* type = val->getType();
1727 IGC_ASSERT(nullptr != type);
1728 IGC_ASSERT_MESSAGE(type->isSingleValueType(), "Only scalar data is supported here");
1729 IGC_ASSERT_MESSAGE(!type->isVectorTy(), "Only scalar data is supported here");
1730 IGC_ASSERT((type->getTypeID() == Type::FloatTyID) || (type->getTypeID() == Type::HalfTyID) || (type->getTypeID() == Type::IntegerTyID) || (type->getTypeID() == Type::DoubleTyID));
1731
1732 unsigned dataSize = type->getScalarSizeInBits();
1733 if (16 == dataSize){
1734 ret = builder.CreateFPExt(builder.CreateBitCast(val, builder.getHalfTy()), builder.getFloatTy());
1735 }else if (32 == dataSize){
1736 ret = builder.CreateBitCast(val, builder.getFloatTy());
1737 }else if (64 == dataSize){
1738 llvm::Type* vecType = IGCLLVM::FixedVectorType::get(builder.getFloatTy(), 2);
1739 ret = builder.CreateBitCast(val, vecType);
1740 }else{
1741 IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ConvertToFloat of helper.");
1742 }
1743
1744 return ret;
1745 }
1746
ConvertToFloat(llvm::IRBuilder<> & builder,llvm::SmallVectorImpl<llvm::Value * > & instList)1747 void ConvertToFloat(llvm::IRBuilder<>& builder, llvm::SmallVectorImpl<llvm::Value*>& instList)
1748 {
1749 for (size_t i=0; i<instList.size(); i++)
1750 {
1751 llvm::Value* val = ConvertToFloat(builder, instList[i]);
1752 if (val->getType()->isVectorTy())
1753 {
1754 instList[i] = builder.CreateExtractElement(val, static_cast<uint64_t>(0));
1755 size_t iOld = i;
1756 for (unsigned j = 1; j < cast<IGCLLVM::FixedVectorType>(val->getType())->getNumElements(); j++)
1757 {
1758 instList.insert(instList.begin()+ iOld +j, builder.CreateExtractElement(val, j));
1759 i++;
1760 }
1761 }
1762 else
1763 {
1764 instList[i] = val;
1765 }
1766 }
1767 }
1768
ScalarizeAggregateMembers(llvm::IRBuilder<> & builder,llvm::Value * val,llvm::SmallVectorImpl<llvm::Value * > & instList)1769 void ScalarizeAggregateMembers(llvm::IRBuilder<>& builder, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList)
1770 {
1771 llvm::Type* type = val->getType();
1772 unsigned num = 0;
1773 switch (type->getTypeID())
1774 {
1775 case llvm::Type::FloatTyID:
1776 case llvm::Type::HalfTyID:
1777 case llvm::Type::IntegerTyID:
1778 case llvm::Type::DoubleTyID:
1779 instList.push_back(val);
1780 break;
1781 case llvm::Type::StructTyID:
1782 num = type->getStructNumElements();
1783 for (unsigned i = 0; i < num; i++)
1784 {
1785 ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
1786 }
1787 break;
1788 case IGCLLVM::VectorTyID:
1789 num = (unsigned)cast<IGCLLVM::FixedVectorType>(type)->getNumElements();
1790 for (unsigned i = 0; i < num; i++)
1791 {
1792 ScalarizeAggregateMembers(builder, builder.CreateExtractElement(val, i), instList);
1793 }
1794 break;
1795 case llvm::Type::ArrayTyID:
1796 num = static_cast<uint32_t>(type->getArrayNumElements());
1797 for (unsigned i = 0; i < num; i++)
1798 {
1799 ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
1800 }
1801 break;
1802 default:
1803 IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMembers of helper! Please enhance this function first.");
1804 break;
1805 }
1806 }
1807
ScalarizeAggregateMemberAddresses(IGCLLVM::IRBuilder<> & builder,llvm::Type * type,llvm::Value * val,llvm::SmallVectorImpl<llvm::Value * > & instList,llvm::SmallVector<llvm::Value *,16> indices)1808 void ScalarizeAggregateMemberAddresses(IGCLLVM::IRBuilder<>& builder, llvm::Type* type, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList, llvm::SmallVector<llvm::Value*, 16> indices)
1809 {
1810 unsigned num = 0;
1811 switch (type->getTypeID())
1812 {
1813 case llvm::Type::FloatTyID:
1814 case llvm::Type::HalfTyID:
1815 case llvm::Type::IntegerTyID:
1816 case llvm::Type::DoubleTyID:
1817 instList.push_back(builder.CreateInBoundsGEP(val, makeArrayRef(indices)));
1818 break;
1819 case llvm::Type::StructTyID:
1820 num = type->getStructNumElements();
1821 for (unsigned i = 0; i < num; i++)
1822 {
1823 indices.push_back(builder.getInt32(i));
1824 ScalarizeAggregateMemberAddresses(builder, type->getStructElementType(i), val, instList, indices);
1825 indices.pop_back();
1826 }
1827 break;
1828 case IGCLLVM::VectorTyID:
1829 num = (unsigned)cast<IGCLLVM::FixedVectorType>(type)->getNumElements();
1830 for (unsigned i = 0; i < num; i++)
1831 {
1832 indices.push_back(builder.getInt32(i));
1833 ScalarizeAggregateMemberAddresses(builder, cast<VectorType>(type)->getElementType(), val, instList, indices);
1834 indices.pop_back();
1835 }
1836 break;
1837 case llvm::Type::ArrayTyID:
1838 //fix this if one API could support an array with length > 2^32
1839 num = static_cast<uint32_t>(type->getArrayNumElements());
1840 for (unsigned i = 0; i < num; i++)
1841 {
1842 indices.push_back(builder.getInt32(i));
1843 ScalarizeAggregateMemberAddresses(builder, type->getArrayElementType(), val, instList, indices);
1844 indices.pop_back();
1845 }
1846 break;
1847 default:
1848 IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMemberAddresses of helper! Please enhance this function first.");
1849 break;
1850 }
1851 }
1852
IsUnsignedCmp(const llvm::CmpInst::Predicate Pred)1853 bool IsUnsignedCmp(const llvm::CmpInst::Predicate Pred)
1854 {
1855 switch (Pred) {
1856 case llvm::CmpInst::ICMP_UGT:
1857 case llvm::CmpInst::ICMP_UGE:
1858 case llvm::CmpInst::ICMP_ULT:
1859 case llvm::CmpInst::ICMP_ULE:
1860 return true;
1861 default:
1862 break;
1863 }
1864 return false;
1865 }
1866
IsSignedCmp(const llvm::CmpInst::Predicate Pred)1867 bool IsSignedCmp(const llvm::CmpInst::Predicate Pred)
1868 {
1869 switch (Pred)
1870 {
1871 case llvm::CmpInst::ICMP_SGT:
1872 case llvm::CmpInst::ICMP_SGE:
1873 case llvm::CmpInst::ICMP_SLT:
1874 case llvm::CmpInst::ICMP_SLE:
1875 return true;
1876 default:
1877 break;
1878 }
1879 return false;
1880 }
1881
1882 // isA64Ptr - Queries whether given pointer type requires 64-bit representation in vISA
isA64Ptr(llvm::PointerType * PT,CodeGenContext * pContext)1883 bool isA64Ptr(llvm::PointerType* PT, CodeGenContext* pContext)
1884 {
1885 return pContext->getRegisterPointerSizeInBits(PT->getAddressSpace()) == 64;
1886 }
1887
IsBitCastForLifetimeMark(const llvm::Value * V)1888 bool IsBitCastForLifetimeMark(const llvm::Value* V)
1889 {
1890 if (!V || !llvm::isa<llvm::BitCastInst>(V))
1891 {
1892 return false;
1893 }
1894 for (llvm::Value::const_user_iterator it = V->user_begin(), e = V->user_end(); it != e; ++it)
1895 {
1896 const llvm::IntrinsicInst* inst = llvm::dyn_cast<const llvm::IntrinsicInst>(*it);
1897 if (!inst)
1898 {
1899 return false;
1900 }
1901 llvm::Intrinsic::ID IID = inst->getIntrinsicID();
1902 if (IID != llvm::Intrinsic::lifetime_start &&
1903 IID != llvm::Intrinsic::lifetime_end)
1904 {
1905 return false;
1906 }
1907 }
1908 return true;
1909 }
1910
mutatePtrType(Value * ptrv,PointerType * newType,IRBuilder<> & builder,const Twine &)1911 Value* mutatePtrType(Value* ptrv, PointerType* newType,
1912 IRBuilder<>& builder, const Twine&)
1913 {
1914 if (isa<ConstantPointerNull>(ptrv))
1915 {
1916 return ConstantPointerNull::get(newType);
1917 }
1918 else
1919 {
1920 if (ConstantExpr * cexpr = dyn_cast<ConstantExpr>(ptrv))
1921 {
1922 IGC_ASSERT(cexpr->getOpcode() == Instruction::IntToPtr);
1923 Value* offset = cexpr->getOperand(0);
1924 ptrv = builder.CreateIntToPtr(offset, newType);
1925 }
1926 else
1927 {
1928 ptrv->mutateType(newType);
1929 }
1930 }
1931 return ptrv;
1932 }
1933
1934 /*
1935 cmp.l.f0.0 (8) null:d r0.0<0;1,0>:w 0x0000:w { Align1, N1, NoMask, NoCompact }
1936 (-f0.0) jmpi Test
1937 (-f0.0) sendc (8) null:ud r120.0<0;1,0>:f 0x00000025 0x08031400:ud { Align1, N1, EOT, NoCompact }
1938 nop
1939 Test :
1940 nop
1941
1942 */
1943
1944 static const unsigned int CRastHeader_SIMD8[] =
1945 {
1946 0x05600010,0x20001a24,0x1e000000,0x00000000,
1947 0x00110020,0x34000004,0x0e001400,0x00000020,
1948 0x05710032,0x20003a00,0x06000f00,0x88031400,
1949 0x00000000,0x00000000,0x00000000,0x00000000,
1950 };
1951
1952 /*
1953 cmp.l.f0.0 (16) null:d r0.0 < 0; 1, 0 > : w 0x0000 : w{ Align1, N1, NoMask, NoCompact }
1954 (-f0.0) jmpi(1) Test { Align1, N1, NoMask, NoCompact }
1955 (-f0.0) sendc(16) null : ud r120.0 < 0; 1, 0 > : f 0x00000025 0x90031000 : ud{ Align1, N1, EOT, NoCompact }
1956 nop
1957 Test :
1958 nop
1959
1960 */
1961 static const unsigned int CRastHeader_SIMD16[] =
1962 {
1963 0x05800010, 0x20001A24, 0x1E000000, 0x00000000,
1964 0x00110020, 0x34000004, 0x0E001400, 0x00000020,
1965 0x05910032, 0x20003A00, 0x06000F00, 0x90031000,
1966 0x00000000, 0x00000000, 0x00000000, 0x00000000,
1967 };
1968
1969 /*
1970 cmp.l.f0.0 (16) null:d r0.0 < 0; 1, 0 > : w 0x0000 : w{ Align1, N1, NoMask, NoCompact }
1971 (-f0.0) jmpi Test
1972 (-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : ud 0x00000005 0x10031000 : ud{ Align1, N1, NoCompact }
1973 (-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : f 0x00000025 0x10031800 : ud{ Align1, N5, EOT, NoCompact }
1974 nop
1975 Test :
1976 nop
1977
1978 */
1979
1980 static const unsigned int CRastHeader_SIMD32[] =
1981 {
1982 0x05800010,0x20001a24,0x1e000000,0x00000000,
1983 0x00110020,0x34000004,0x0e001400,0x00000020,
1984 0x05910032,0x20000260,0x06000f00,0x10031000,
1985 0x05912032,0x20003a60,0x06000f00,0x90031800,
1986 };
1987
1988
AppendConservativeRastWAHeader(IGC::SProgramOutput * program,SIMDMode simdmode)1989 unsigned int AppendConservativeRastWAHeader(IGC::SProgramOutput* program, SIMDMode simdmode)
1990 {
1991 unsigned int headerSize = 0;
1992 const unsigned int* pHeader = nullptr;
1993
1994 if (program && (program->m_programSize > 0))
1995 {
1996 switch (simdmode)
1997 {
1998 case SIMDMode::SIMD8:
1999 headerSize = sizeof(CRastHeader_SIMD8);
2000 pHeader = CRastHeader_SIMD8;
2001 break;
2002
2003 case SIMDMode::SIMD16:
2004 headerSize = sizeof(CRastHeader_SIMD16);
2005 pHeader = CRastHeader_SIMD16;
2006 break;
2007
2008 case SIMDMode::SIMD32:
2009 headerSize = sizeof(CRastHeader_SIMD32);
2010 pHeader = CRastHeader_SIMD32;
2011 break;
2012
2013 default:
2014 IGC_ASSERT_MESSAGE(0, "Invalid SIMD Mode for Conservative Raster WA");
2015 break;
2016 }
2017
2018 unsigned int newSize = program->m_programSize + headerSize;
2019 void* newBinary = IGC::aligned_malloc(newSize, 16);
2020 memcpy_s(newBinary, newSize, pHeader, headerSize);
2021 memcpy_s((char*)newBinary + headerSize, newSize, program->m_programBin, program->m_programSize);
2022 IGC::aligned_free(program->m_programBin);
2023 program->m_programBin = newBinary;
2024 program->m_programSize = newSize;
2025 }
2026 return headerSize;
2027 }
2028
DSDualPatchEnabled(class CodeGenContext * ctx)2029 bool DSDualPatchEnabled(class CodeGenContext* ctx)
2030 {
2031 return ctx->platform.supportDSDualPatchDispatch() &&
2032 ctx->platform.WaDisableDSDualPatchMode() &&
2033 !(ctx->m_DriverInfo.APIDisableDSDualPatchDispatch()) &&
2034 IGC_IS_FLAG_DISABLED(DisableDSDualPatch);
2035 }
2036
2037
getUniqueEntryFunc(const IGCMD::MetaDataUtils * pM,IGC::ModuleMetaData * pModMD)2038 Function* getUniqueEntryFunc(const IGCMD::MetaDataUtils* pM, IGC::ModuleMetaData* pModMD)
2039 {
2040 Function* entryFunc = nullptr;
2041 auto& FuncMD = pModMD->FuncMD;
2042 for (auto i = pM->begin_FunctionsInfo(), e = pM->end_FunctionsInfo(); i != e; ++i)
2043 {
2044 IGCMD::FunctionInfoMetaDataHandle Info = i->second;
2045 if (Info->getType() != FunctionTypeMD::KernelFunction)
2046 {
2047 continue;
2048 }
2049
2050 Function* F = i->first;
2051 if (!entryFunc)
2052 {
2053 entryFunc = F;
2054 }
2055
2056 auto fi = FuncMD.find(F);
2057 if (fi != FuncMD.end() && fi->second.isUniqueEntry)
2058 {
2059 return F;
2060 }
2061 }
2062 IGC_ASSERT_MESSAGE(nullptr != entryFunc, "No entry func!");
2063 auto ei = FuncMD.find(entryFunc);
2064 IGC_ASSERT(ei != FuncMD.end());
2065 ei->second.isUniqueEntry = true;
2066 return entryFunc;
2067 }
2068
2069 // If true, the codegen will likely not emit instruction for this instruction.
isNoOpInst(Instruction * I,CodeGenContext * Ctx)2070 bool isNoOpInst(Instruction* I, CodeGenContext* Ctx)
2071 {
2072 if (isa<BitCastInst>(I) ||
2073 isa<IntToPtrInst>(I) ||
2074 isa<PtrToIntInst>(I))
2075 {
2076 // Don't bother with constant operands
2077 if (isa<Constant>(I->getOperand(0))) {
2078 return false;
2079 }
2080
2081 Type* dTy = I->getType();
2082 Type* sTy = I->getOperand(0)->getType();
2083 PointerType* dPTy = dyn_cast<PointerType>(dTy);
2084 PointerType* sPTy = dyn_cast<PointerType>(sTy);
2085 uint32_t dBits = dPTy ? Ctx->getRegisterPointerSizeInBits(dPTy->getAddressSpace())
2086 : (unsigned int)dTy->getPrimitiveSizeInBits();
2087 uint32_t sBits = sPTy ? Ctx->getRegisterPointerSizeInBits(sPTy->getAddressSpace())
2088 : (unsigned int)sTy->getPrimitiveSizeInBits();
2089 if (dBits == 0 || sBits == 0 || dBits != sBits) {
2090 // Not primitive type or not equal in size (inttoptr, etc)
2091 return false;
2092 }
2093
2094 IGCLLVM::FixedVectorType* dVTy = dyn_cast<IGCLLVM::FixedVectorType>(dTy);
2095 IGCLLVM::FixedVectorType* sVTy = dyn_cast<IGCLLVM::FixedVectorType>(sTy);
2096 int d_nelts = dVTy ? (int)dVTy->getNumElements() : 1;
2097 int s_nelts = sVTy ? (int)sVTy->getNumElements() : 1;
2098 if (d_nelts != s_nelts) {
2099 // Vector relayout bitcast.
2100 return false;
2101 }
2102 return true;
2103 }
2104 return false;
2105 }
2106
2107 //
2108 // Given a value, check if it is likely a positive number.
2109 //
2110 // This function works best if llvm.assume() is used in the bif libraries to
2111 // give ValueTracking hints. ex:
2112 //
2113 // size_t get_local_id(uint dim)
2114 // {
2115 // size_t ret = __builtin_IB_get_local_id()
2116 // __builtin_assume(ret >= 0);
2117 // __builtin_assume(ret <= 0x0000ffff)
2118 // return ret;
2119 // }
2120 //
2121 // This implementation relies completly on native llvm functions
2122 //
2123 //
2124 //
valueIsPositive(Value * V,const DataLayout * DL,llvm::AssumptionCache * AC,llvm::Instruction * CxtI)2125 bool valueIsPositive(
2126 Value* V,
2127 const DataLayout* DL,
2128 llvm::AssumptionCache* AC,
2129 llvm::Instruction* CxtI)
2130 {
2131 #if LLVM_VERSION_MAJOR == 4
2132 bool isKnownNegative = false;
2133 bool isKnownPositive = false;
2134 llvm::ComputeSignBit(
2135 V,
2136 isKnownPositive,
2137 isKnownNegative,
2138 *DL,
2139 0,
2140 AC,
2141 CxtI);
2142 return isKnownPositive;
2143 #elif LLVM_VERSION_MAJOR >= 7
2144 return computeKnownBits(
2145 V,
2146 *DL,
2147 0,
2148 AC,
2149 CxtI).isNonNegative();
2150 #endif
2151 }
2152
appendToUsed(llvm::Module & M,ArrayRef<GlobalValue * > Values)2153 void appendToUsed(llvm::Module& M, ArrayRef<GlobalValue*> Values)
2154 {
2155 std::string Name = "llvm.used";
2156 GlobalVariable* GV = M.getGlobalVariable(Name);
2157 SmallPtrSet<Constant*, 16> InitAsSet;
2158 SmallVector<Constant*, 16> Init;
2159 if (GV) {
2160 ConstantArray* CA = dyn_cast<ConstantArray>(GV->getInitializer());
2161 for (auto& Op : CA->operands()) {
2162 Constant* C = cast_or_null<Constant>(Op);
2163 if (InitAsSet.insert(C).second)
2164 Init.push_back(C);
2165 }
2166 GV->eraseFromParent();
2167 }
2168
2169 Type* Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
2170 for (auto* V : Values) {
2171 Constant* C = V;
2172 //llvm will complain if members of llvm.uses doesn't have a name
2173 if(C->getName().empty())
2174 C->setName("gVar");
2175
2176 if (V->getType()->getAddressSpace() != 0)
2177 C = ConstantExpr::getAddrSpaceCast(V, Int8PtrTy);
2178 else
2179 C = ConstantExpr::getBitCast(V, Int8PtrTy);
2180 if (InitAsSet.insert(C).second)
2181 Init.push_back(C);
2182 }
2183
2184 if (Init.empty())
2185 return;
2186
2187 ArrayType* ATy = ArrayType::get(Int8PtrTy, Init.size());
2188 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
2189 ConstantArray::get(ATy, Init), Name);
2190 GV->setSection("llvm.metadata");
2191 }
2192
safeScheduleUp(llvm::BasicBlock * BB,llvm::Value * V,llvm::Instruction * & InsertPos,llvm::DenseSet<llvm::Instruction * > Scheduled)2193 bool safeScheduleUp(llvm::BasicBlock* BB, llvm::Value* V, llvm::Instruction*& InsertPos, llvm::DenseSet<llvm::Instruction*> Scheduled)
2194 {
2195 llvm::Instruction* I = llvm::dyn_cast<llvm::Instruction>(V);
2196 if (!I)
2197 return false;
2198
2199 // Skip value defined in other BBs.
2200 if (I->getParent() != BB)
2201 return false;
2202
2203 // Skip phi-node as they are eventually defined in other BBs.
2204 if (llvm::isa<llvm::PHINode>(I))
2205 return false;
2206
2207 // Skip for side effect instructions
2208 if (I->mayHaveSideEffects())
2209 return false;
2210
2211 // Don't re-schedule instruction again.
2212 if (Scheduled.count(I)) {
2213 if (InsertPos && !isInstPrecede(I, InsertPos))
2214 InsertPos = I;
2215 return false;
2216 }
2217
2218 bool Changed = false;
2219
2220 // Try to schedule all its operands first.
2221 for (auto OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI)
2222 Changed |= safeScheduleUp(BB, OI->get(), InsertPos, Scheduled);
2223
2224 // Mark this instruction `visited`.
2225 Scheduled.insert(I);
2226
2227 // Skip if the instruction is already defined before insertion position.
2228 if (InsertPos && isInstPrecede(I, InsertPos))
2229 return Changed;
2230
2231 // Schedule itself.
2232 if (InsertPos) {
2233 I->removeFromParent();
2234 I->insertAfter(InsertPos);
2235 }
2236
2237 InsertPos = I;
2238 return true;
2239 }
2240
getConstantSInt(IRBuilder<> & Builder,const int bitSize,int64_t val)2241 ConstantInt* getConstantSInt(
2242 IRBuilder<>& Builder, const int bitSize, int64_t val)
2243 {
2244 ConstantInt* res = nullptr;
2245 switch (bitSize) {
2246 case 8: res = Builder.getInt8((uint8_t)val); break;
2247 case 16: res = Builder.getInt16((uint16_t)val); break;
2248 case 32: res = Builder.getInt32((uint32_t)val); break;
2249 case 64: res = Builder.getInt64((uint64_t)val); break;
2250 default:
2251 IGC_ASSERT_MESSAGE(0, "invalid bitsize");
2252 }
2253 return res;
2254 }
2255
getConstantUInt(IRBuilder<> & Builder,const int bitSize,uint64_t val)2256 ConstantInt* getConstantUInt(
2257 IRBuilder<>& Builder, const int bitSize, uint64_t val)
2258 {
2259 ConstantInt* res = nullptr;
2260 switch (bitSize) {
2261 case 8: res = Builder.getInt8((uint8_t)val); break;
2262 case 16: res = Builder.getInt16((uint16_t)val); break;
2263 case 32: res = Builder.getInt32((uint32_t)val); break;
2264 case 64: res = Builder.getInt64(val); break;
2265 default:
2266 IGC_ASSERT_MESSAGE(0, "invalid bitsize");
2267 }
2268 return res;
2269 }
2270
2271 // MulH implementation for 64-bit signed integers
CreateMulhS64(IRBuilder<> & B,Value * const u,Value * const v)2272 Value* CreateMulhS64(IRBuilder<>& B, Value* const u, Value* const v) {
2273 // This comes from Hacker's Delight 8-2.
2274 // Think of this as elementry schoole multiplication, but base 2^32.
2275 ConstantInt* const loMask = getConstantSInt(B, 64, 0xFFFFFFFFll);
2276 ConstantInt* const hiShift = getConstantSInt(B, 64, 32);
2277 //
2278 // u64 u0 = u & 0xFFFFFFFF; s64 u1 = u >> 32;
2279 // u64 v0 = v & 0xFFFFFFFF; s64 v1 = v >> 32;
2280 Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
2281 Value* const u1 = B.CreateAShr(u, hiShift, "u.hi32");
2282 Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
2283 Value* const v1 = B.CreateAShr(v, hiShift, "v.hi32");
2284 //
2285 // w = u0*v0
2286 Value* const w0 = B.CreateMul(u0, v0, "w0");
2287 //
2288 // t = u1*v0 + (w0 >> 32)
2289 Value* const tLHS = B.CreateMul(u1, v0);
2290 Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
2291 Value* const t = B.CreateAdd(tLHS, tRHS, "t");
2292 //
2293 // w1 = u0*v0 + (t >> 32)
2294 Value* const u0v1 = B.CreateMul(u0, v1);
2295 Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
2296 Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
2297 //
2298 // return u0*v1 + (t >> 32) + (w1 >> 32)
2299 Value* const u1v1 = B.CreateMul(u1, v1);
2300 Value* const tHI32 = B.CreateAShr(t, hiShift, "t.hi32");
2301 Value* const rLHS = B.CreateAdd(u1v1, tHI32);
2302 Value* const rRHS = B.CreateAShr(w1, hiShift, "w1.lo32");
2303 Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
2304 //
2305 return r;
2306 }
2307
2308 // MulH implementation for 64-bit unsigned integers
CreateMulhU64(IRBuilder<> & B,Value * const u,Value * const v)2309 Value* CreateMulhU64(IRBuilder<>& B, Value* const u, Value* const v)
2310 {
2311 // This is the same as CreateMulhS64, but with all logical shifts.
2312 ConstantInt* const loMask = getConstantUInt(B, 64, 0xFFFFFFFFull);
2313 ConstantInt* const hiShift = getConstantUInt(B, 64, 32);
2314 //
2315 // u64 u0 = u & 0xFFFFFFFF, u1 = u >> 32;
2316 // u64 v0 = v & 0xFFFFFFFF, v1 = v >> 32;
2317 Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
2318 Value* const u1 = B.CreateLShr(u, hiShift, "u.hi32");
2319 Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
2320 Value* const v1 = B.CreateLShr(v, hiShift, "v.hi32");
2321 //
2322 // w0 = u0*v0
2323 Value* const w0 = B.CreateMul(u0, v0, "w0");
2324 //
2325 // t = u1*v0 + (w0 >> 32)
2326 Value* const tLHS = B.CreateMul(u1, v0);
2327 Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
2328 Value* const t = B.CreateAdd(tLHS, tRHS, "t");
2329 //
2330 // w1 = u0*v0 + (t >> 32)
2331 Value* const u0v1 = B.CreateMul(u0, v1);
2332 Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
2333 Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
2334 //
2335 // w1 = u0*v1 + (t >> 32) + (w1 >> 32)
2336 Value* const u1v1 = B.CreateMul(u1, v1);
2337 Value* const tHI32 = B.CreateLShr(t, hiShift, "t.hi32");
2338 Value* const rLHS = B.CreateAdd(u1v1, tHI32);
2339 Value* const rRHS = B.CreateLShr(w1, hiShift, "w1.lo32");
2340 Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
2341 //
2342 return r;
2343 }
2344
2345 // MulH implementation for 32/64 bit integers
CreateMulh(Function & F,IRBuilder<> & B,const bool isSigned,Value * const u,Value * const v)2346 Value* CreateMulh(
2347 Function& F,
2348 IRBuilder<>& B,
2349 const bool isSigned,
2350 Value* const u,
2351 Value* const v)
2352 {
2353 Value* res = nullptr;
2354 IGC_ASSERT(nullptr != u);
2355 IGC_ASSERT(nullptr != u->getType());
2356 int bitWidth = u->getType()->getIntegerBitWidth();
2357 switch(bitWidth)
2358 {
2359 case 32:
2360 {
2361 // we have a dedicated machine instruction for 32b
2362 SmallVector<Value*, 2> imulhArgs;
2363 imulhArgs.push_back(u);
2364 imulhArgs.push_back(v);
2365 auto intrinsic = isSigned ?
2366 GenISAIntrinsic::GenISA_imulH :
2367 GenISAIntrinsic::GenISA_umulH;
2368 IGC_ASSERT(nullptr != v);
2369 Function* const iMulhDecl = llvm::GenISAIntrinsic::getDeclaration(
2370 F.getParent(),
2371 intrinsic,
2372 v->getType());
2373 res = B.CreateCall(iMulhDecl, imulhArgs, "q_appx");
2374 break;
2375 }
2376 case 64:
2377 // emulate via 64b arithmetic
2378 if (isSigned) {
2379 res = CreateMulhS64(B, u, v);
2380 }
2381 else {
2382 res = CreateMulhU64(B, u, v);
2383 }
2384 break;
2385 default:
2386 IGC_ASSERT_MESSAGE(0, "CreateMulH must be 32 or 64");
2387 }
2388 return res;
2389 }
2390
hasInlineAsmInFunc(llvm::Function & F)2391 bool hasInlineAsmInFunc(llvm::Function& F)
2392 {
2393 for (auto ii = inst_begin(&F), ie = inst_end(&F); ii != ie; ii++)
2394 {
2395 if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(&*ii))
2396 {
2397 if (call->isInlineAsm())
2398 {
2399 return true;
2400 }
2401 }
2402 }
2403 return false;
2404 }
2405
2406 // Parses the "vector-variant" attribute string to get a valid function
2407 // variant symbol string supported by current implementation of IGC.
2408 //
2409 // Returns a tuple of values:
2410 // R(0) is the reformatted variant symbol string.
2411 // R(1) is the called function's name.
2412 // R(2) is the required SIMD size.
2413 // See the spec for Intel Vector Function ABI for parsed symbol meanings.
ParseVectorVariantFunctionString(llvm::StringRef varStr)2414 std::tuple<std::string, std::string, unsigned> ParseVectorVariantFunctionString(llvm::StringRef varStr)
2415 {
2416 unsigned vlen = 0;
2417 std::stringstream outStr;
2418
2419 auto pos = varStr.begin();
2420 auto strEnd = varStr.end();
2421
2422 // Starts with _ZGV
2423 IGC_ASSERT(varStr.startswith("_ZGV"));
2424 outStr << "_ZGV";
2425 pos += 4;
2426 // ISA class target processor type
2427 IGC_ASSERT(*pos == 'x' || *pos == 'y' || *pos == 'Y' || *pos == 'z' || *pos == 'Z');
2428 outStr << *pos;
2429 pos++;
2430 // Mask or NoMask, only support NoMask for now
2431 IGC_ASSERT(*pos == 'M' || *pos == 'N');
2432 outStr << 'N';
2433 pos++;
2434 // Parse vector length (input can be 1/2/4/8/16/32, output restricted to 8/16/32)
2435 auto idStart = pos;
2436 while (*pos >= '0' && *pos <= '9') pos++;
2437 IGC_ASSERT(StringRef(idStart, pos - idStart).getAsInteger(10, vlen) == 0);
2438 IGC_ASSERT(vlen == 1 || vlen == 2 || vlen == 4 || vlen == 8 || vlen == 16 || vlen == 32);
2439 // Set min SIMD width to 8
2440 vlen = (vlen < 8) ? 8 : vlen;
2441 outStr << std::to_string(vlen);
2442
2443 while (pos != strEnd)
2444 {
2445 // End of vector properties symbols terminated with '_'
2446 if (*pos == '_') {
2447 outStr << *pos++;
2448 break;
2449 }
2450 // Parameter variant type, only support default vector type
2451 IGC_ASSERT(*pos == 'l' || *pos == 'u' || *pos == 'v' || *pos == 'R' || *pos == 'U' || *pos == 'L');
2452 outStr << 'v';
2453 pos++;
2454 // Ignore alignment properties
2455 if (*pos == 'a') {
2456 pos++;
2457 while (*pos >= '0' && *pos <= '9') pos++;
2458 }
2459 }
2460
2461 // Remaining characters form the function name
2462 std::string fName = StringRef(pos, strEnd - pos).str();
2463
2464 return std::make_tuple(outStr.str(), fName, vlen);
2465 }
2466
2467 ///
2468 /// Return base type from more complex type
2469 ///
2470 /// Return nullptr if complex type cannot be defined with only one type
2471 ///
GetBaseType(llvm::Type * ProcessedType)2472 llvm::Type* GetBaseType(llvm::Type* ProcessedType)
2473 {
2474 while (ProcessedType->isArrayTy() || ProcessedType->isStructTy())
2475 {
2476 if (ProcessedType->isArrayTy())
2477 ProcessedType = ProcessedType->getArrayElementType();
2478 else
2479 {
2480 if (ProcessedType->getStructNumElements() != 1)
2481 return nullptr;
2482
2483 ProcessedType = ProcessedType->getStructElementType(0);
2484 }
2485 }
2486
2487 return ProcessedType;
2488 }
2489
2490 // Function modifies address space in selected uses of given input value
FixAddressSpaceInAllUses(llvm::Value * ptr,uint newAS,uint oldAS)2491 void FixAddressSpaceInAllUses(llvm::Value* ptr, uint newAS, uint oldAS)
2492 {
2493 IGC_ASSERT(newAS != oldAS);
2494
2495 for (auto UI = ptr->user_begin(), E = ptr->user_end(); UI != E; ++UI)
2496 {
2497 Instruction* inst = dyn_cast<Instruction>(*UI);
2498 PointerType* instType = nullptr;
2499 if (isa<BitCastInst>(inst) || isa<GetElementPtrInst>(inst) ||
2500 isa<AddrSpaceCastInst>(inst) || isa<PHINode>(inst))
2501 {
2502 instType = dyn_cast<PointerType>(inst->getType());
2503 }
2504
2505 if (instType && instType->getAddressSpace() == oldAS)
2506 {
2507 Type* eltType = instType->getElementType();
2508 PointerType* ptrType = PointerType::get(eltType, newAS);
2509 inst->mutateType(ptrType);
2510 FixAddressSpaceInAllUses(inst, newAS, oldAS);
2511 }
2512 }
2513 }
2514
2515
GetURBBaseAndOffset(Value * pUrbOffset)2516 std::pair<Value*, unsigned int> GetURBBaseAndOffset(Value* pUrbOffset)
2517 {
2518 Value* pBase = pUrbOffset;
2519 unsigned int offset = 0;
2520
2521 auto GetConstant = [](Value* pVal)->unsigned int
2522 {
2523 IGC_ASSERT(isa<ConstantInt>(pVal));
2524 ConstantInt* pConst = cast<ConstantInt>(pVal);
2525 return int_cast<unsigned int>(pConst->getZExtValue());
2526 };
2527
2528 if (isa<ConstantInt>(pUrbOffset))
2529 {
2530 Value* pNullBase = nullptr;
2531 return std::make_pair(
2532 pNullBase,
2533 GetConstant(pUrbOffset));
2534 }
2535 else if (isa<Instruction>(pUrbOffset))
2536 {
2537 Instruction* pInstr = cast<Instruction>(pUrbOffset);
2538 if (pInstr->getOpcode() == Instruction::Add)
2539 {
2540 Value* src0 = pInstr->getOperand(0);
2541 Value* src1 = pInstr->getOperand(1);
2542 if (isa<ConstantInt>(src1))
2543 {
2544 auto baseAndOffset = GetURBBaseAndOffset(src0);
2545 pBase = baseAndOffset.first;
2546 offset = GetConstant(src1) + baseAndOffset.second;
2547 }
2548 else if (isa<ConstantInt>(src0))
2549 {
2550 auto baseAndOffset = GetURBBaseAndOffset(src1);
2551 pBase = baseAndOffset.first;
2552 offset = GetConstant(src0) + baseAndOffset.second;
2553 }
2554 }
2555 else if (pInstr->getOpcode() == Instruction::Or)
2556 {
2557 // Examples of patterns matched below:
2558 // 1. shl + or
2559 // urbOffset = urbOffset << 1;
2560 // urbOffset = urbOffset | 1;
2561 // 2. mul + or
2562 // urbOffset = urbOffset * 2;
2563 // urbOffset = urbOffset | 1;
2564 // 3. two oword urb writes in loop
2565 // urbOffset = urbOffset * 2;
2566 // for(...) {
2567 // {...}
2568 // urbOffset = urbOffset | 1;
2569 // urbOffset = urbOffset + 2;
2570 // {...}
2571 // }
2572 //
2573 //
2574
2575 std::function<unsigned int(Value*)> GetAlignment =
2576 [&GetAlignment, &GetConstant](Value* pUrbOffset)->unsigned int
2577 {
2578 unsigned int alignment = 1;
2579 Instruction* pInstr = dyn_cast<Instruction>(pUrbOffset);
2580 if (pInstr &&
2581 pInstr->getOpcode() == Instruction::Shl &&
2582 isa<ConstantInt>(pInstr->getOperand(1)))
2583 {
2584 alignment = GetAlignment(pInstr->getOperand(0)) *
2585 (1u << GetConstant(pInstr->getOperand(1)));
2586 }
2587 else if (pInstr &&
2588 pInstr->getOpcode() == Instruction::Mul &&
2589 isa<ConstantInt>(pInstr->getOperand(1)) &&
2590 iSTD::IsPowerOfTwo(GetConstant(pInstr->getOperand(1))))
2591 {
2592 alignment = GetAlignment(pInstr->getOperand(0)) *
2593 GetConstant(pInstr->getOperand(1));
2594 }
2595 else if (pInstr &&
2596 pInstr->getOpcode() == Instruction::Mul &&
2597 isa<ConstantInt>(pInstr->getOperand(0)) &&
2598 iSTD::IsPowerOfTwo(GetConstant(pInstr->getOperand(0))))
2599 {
2600 alignment = GetAlignment(pInstr->getOperand(1)) *
2601 GetConstant(pInstr->getOperand(0));
2602 }
2603 else if (isa<ConstantInt>(pUrbOffset))
2604 {
2605 alignment = 1 << iSTD::bsf(GetConstant(pUrbOffset));
2606 }
2607 return alignment;
2608 };
2609
2610 Value* src0 = pInstr->getOperand(0);
2611 Value* src1 = pInstr->getOperand(1);
2612 unsigned int alignment = 1;
2613 if (isa<PHINode>(src0) && isa<ConstantInt>(src1))
2614 {
2615 // pattern 3
2616 PHINode* pPhi = cast<PHINode>(src0);
2617 alignment = std::numeric_limits<unsigned int>::max();
2618 for (unsigned int i = 0; i < pPhi->getNumIncomingValues(); i++)
2619 {
2620 Instruction* pIncoming = dyn_cast<Instruction>(pPhi->getIncomingValue(i));
2621 if (pIncoming &&
2622 pIncoming->getOpcode() == Instruction::Add &&
2623 pPhi == pIncoming->getOperand(0) &&
2624 isa<ConstantInt>(pIncoming->getOperand(1)) &&
2625 iSTD::IsPowerOfTwo(GetConstant(pIncoming->getOperand(1))))
2626 {
2627 alignment = std::min(alignment, GetConstant(pIncoming->getOperand(1)));
2628 }
2629 else
2630 {
2631 alignment = std::min(alignment, GetAlignment(pPhi->getIncomingValue(i)));
2632 }
2633 }
2634 }
2635 else
2636 {
2637 // patterns 1 and 2
2638 alignment = GetAlignment(src0);
2639 }
2640 if (alignment > GetConstant(src1))
2641 {
2642 IGC_ASSERT(iSTD::IsPowerOfTwo(alignment));
2643 pBase = src0;
2644 offset = GetConstant(src1);
2645 }
2646 }
2647 }
2648
2649 return std::make_pair(pBase, offset);
2650 }
2651
GetPrintfStrings(Module & M)2652 std::vector<std::pair<unsigned int, std::string>> GetPrintfStrings(Module &M)
2653 {
2654 std::vector<std::pair<unsigned int, std::string>> printfStrings;
2655 std::string MDNodeName = "printf.strings";
2656 NamedMDNode* printfMDNode = M.getOrInsertNamedMetadata(MDNodeName);
2657
2658 for (uint i = 0, NumStrings = printfMDNode->getNumOperands();
2659 i < NumStrings;
2660 i++)
2661 {
2662 MDNode* argMDNode = printfMDNode->getOperand(i);
2663 ConstantInt* indexOpndVal =
2664 mdconst::dyn_extract<ConstantInt>(argMDNode->getOperand(0));
2665 MDString* stringOpndVal =
2666 dyn_cast<MDString>(argMDNode->getOperand(1));
2667
2668 printfStrings.push_back({
2669 int_cast<unsigned int>(indexOpndVal->getZExtValue()),
2670 stringOpndVal->getString().data()
2671 });
2672 }
2673
2674 return printfStrings;
2675 }
2676
PDT_dominates(llvm::PostDominatorTree & PTD,const Instruction * I1,const Instruction * I2)2677 bool PDT_dominates(llvm::PostDominatorTree& PTD,
2678 const Instruction* I1,
2679 const Instruction* I2)
2680 {
2681 IGC_ASSERT_MESSAGE(I1, "Expecting valid I1 and I2");
2682 IGC_ASSERT_MESSAGE(I2, "Expecting valid I1 and I2");
2683
2684 const BasicBlock* BB1 = I1->getParent();
2685 const BasicBlock* BB2 = I2->getParent();
2686
2687 if (BB1 != BB2)
2688 return PTD.dominates(BB1, BB2);
2689
2690 // PHINodes in a block are unordered.
2691 if (isa<PHINode>(I1) && isa<PHINode>(I2))
2692 return false;
2693
2694 // Loop through the basic block until we find I1 or I2.
2695 BasicBlock::const_iterator I = BB1->begin();
2696 for (; &*I != I1 && &*I != I2; ++I)
2697 /*empty*/;
2698
2699 return &*I == I2;
2700 }
2701
2702 } // namespace IGC
2703