1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Compiler/CISACodeGen/helper.h"
10 #include "Compiler/CISACodeGen/CISACodeGen.h"
11 #include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
12 #include "Compiler/MetaDataUtilsWrapper.h"
13 #include "common/LLVMWarningsPush.hpp"
14 #include "llvm/Config/llvm-config.h"
15 #include "llvmWrapper/IR/DerivedTypes.h"
16 #include "llvmWrapper/Support/KnownBits.h"
17 #include "llvmWrapper/IR/Instructions.h"
18 #include "llvmWrapper/Support/Alignment.h"
19 #include "llvm/IR/GetElementPtrTypeIterator.h"
20 #include <llvm/IR/InstIterator.h>
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "common/LLVMWarningsPop.hpp"
23 #include "GenISAIntrinsics/GenIntrinsicInst.h"
24 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
25 #include "common/secure_mem.h"
26 #include <stack>
27 #include "Probe/Assertion.h"
28 
29 using namespace llvm;
30 using namespace GenISAIntrinsic;
31 
32 /************************************************************************
33 This file contains helper functions for the code generator
34 Many functions use X-MACRO, that allow us to separate data about encoding
35 to the logic of the helper functions
36 
37 ************************************************************************/
38 
39 namespace IGC
40 {
41     typedef union _gfxResourceAddrSpace
42     {
43         struct _bits
44         {
45             unsigned int       bufId : 16;
46             unsigned int       bufType : 5;
47             unsigned int       indirect : 1;     // bool
48             unsigned int       reserved : 10;
49         } bits;
50         uint32_t u32Val;
51     } GFXResourceAddrSpace;
52 
53     // If 'bufIdx' is a ConstantInt, 'uniqueIndAS' is irrelevant.
54     // Otherwise, you should set 'uniqueIndAS' if you want to identify
55     // this address space later on.  If not, the default can be used.
EncodeAS4GFXResource(const llvm::Value & bufIdx,BufferType bufType,unsigned uniqueIndAS)56     unsigned EncodeAS4GFXResource(
57         const llvm::Value& bufIdx,
58         BufferType bufType,
59         unsigned uniqueIndAS)
60     {
61         GFXResourceAddrSpace temp;
62         static_assert(sizeof(temp) == 4, "Code below may need and update.");
63         temp.u32Val = 0;
64         IGC_ASSERT((bufType + 1) < BUFFER_TYPE_UNKNOWN + 1);
65         temp.bits.bufType = bufType + 1;
66 
67         if (bufType == SLM)
68         {
69             return ADDRESS_SPACE_LOCAL;
70         }
71         else if (bufType == STATELESS_READONLY)
72         {
73             return ADDRESS_SPACE_CONSTANT;
74         }
75         else if (bufType == STATELESS)
76         {
77             return ADDRESS_SPACE_GLOBAL;
78         }
79         else if (bufType == STATELESS_A32)
80         {
81             return ADDRESS_SPACE_A32;
82         }
83         else if (auto *CI = dyn_cast<ConstantInt>(&bufIdx))
84         {
85             unsigned int bufId = static_cast<unsigned>(CI->getZExtValue());
86             IGC_ASSERT((bufType == BINDLESS_SAMPLER) || (bufId < (1 << 16)));
87             temp.bits.bufId = bufId;
88             return temp.u32Val;
89         }
90 
91         // if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
92         temp.bits.bufId = uniqueIndAS;
93         temp.bits.indirect = 1;
94         return temp.u32Val;
95     }
96 
97     ///
98     /// if you want resource-dimension, use GetBufferDimension()
99     ///
DecodeAS4GFXResource(unsigned addrSpace,bool & directIndexing,unsigned & bufId)100     BufferType DecodeAS4GFXResource(unsigned addrSpace, bool& directIndexing, unsigned& bufId)
101     {
102         GFXResourceAddrSpace temp;
103         temp.u32Val = addrSpace;
104 
105         directIndexing = (temp.bits.indirect == 0);
106         bufId = temp.bits.bufId;
107 
108         if (addrSpace == ADDRESS_SPACE_LOCAL)
109         {
110             return SLM;
111         }
112         else if (addrSpace == ADDRESS_SPACE_A32)
113         {
114             return STATELESS_A32;
115         }
116         unsigned bufType = temp.bits.bufType - 1;
117         if (bufType < BUFFER_TYPE_UNKNOWN)
118         {
119             return (BufferType)bufType;
120         }
121         return BUFFER_TYPE_UNKNOWN;
122     }
123 
124     // Return true if AS is for a stateful surface.
125     //    Stateful surface should have an encoded AS that is bigger than
126     //    ADDRESS_SPACE_NUM_ADDRESSES.
isStatefulAddrSpace(unsigned AS)127     bool isStatefulAddrSpace(unsigned AS)
128     {
129         return AS > ADDRESS_SPACE_NUM_ADDRESSES;
130     }
131 
isDummyBasicBlock(llvm::BasicBlock * BB)132     bool isDummyBasicBlock(llvm::BasicBlock* BB)
133     {
134         if (BB->size() != 1)
135             return false;
136         if ((++pred_begin(BB)) != pred_end(BB))
137             return false;
138         if ((++succ_begin(BB)) != succ_end(BB))
139             return false;
140         return true;
141     }
142 
SetBufferAsBindless(unsigned addressSpaceOfPtr,BufferType bufferType)143     unsigned SetBufferAsBindless(unsigned addressSpaceOfPtr, BufferType bufferType)
144     {
145         GFXResourceAddrSpace temp = {};
146         temp.u32Val = addressSpaceOfPtr;
147 
148         // Mark buffer as it is bindless for further processing
149         switch (bufferType)
150         {
151         case BufferType::RESOURCE:
152             temp.bits.bufType = IGC::BINDLESS_TEXTURE + 1;
153             break;
154         case BufferType::CONSTANT_BUFFER:
155             temp.bits.bufType = IGC::BINDLESS_CONSTANT_BUFFER + 1;
156             break;
157         case BufferType::UAV:
158             temp.bits.bufType = IGC::BINDLESS + 1;
159             break;
160         case BufferType::SAMPLER:
161             temp.bits.bufType = IGC::BINDLESS_SAMPLER + 1;
162             break;
163         default:
164             IGC_ASSERT_MESSAGE(0, "other types of buffers shouldn't reach this part");
165             break;
166         }
167         return temp.u32Val;
168     }
169 
UsesTypedConstantBuffer(const CodeGenContext * pContext,const BufferType bufType)170     bool UsesTypedConstantBuffer(
171         const CodeGenContext* pContext,
172         const BufferType bufType)
173     {
174         IGC_ASSERT(bufType == CONSTANT_BUFFER ||
175             bufType == BINDLESS_CONSTANT_BUFFER);
176 
177         if (pContext->m_DriverInfo.ForceUntypedBindlessConstantBuffers() &&
178             bufType == BINDLESS_CONSTANT_BUFFER)
179         {
180             return false;
181         }
182 
183         if (pContext->m_DriverInfo.UsesTypedConstantBuffers3D() &&
184             pContext->type != ShaderType::COMPUTE_SHADER)
185         {
186             return true;
187         }
188         if (pContext->m_DriverInfo.UsesTypedConstantBuffersGPGPU() &&
189             pContext->type == ShaderType::COMPUTE_SHADER)
190         {
191             return true;
192         }
193         return false;
194     }
195 
196     ///
197     /// returns buffer type from addressspace
198     ///
DecodeBufferType(unsigned addrSpace)199     BufferType DecodeBufferType(unsigned addrSpace)
200     {
201         switch (addrSpace)
202         {
203         case ADDRESS_SPACE_CONSTANT:
204             return STATELESS_READONLY;
205         case ADDRESS_SPACE_LOCAL:
206             return SLM;
207         case ADDRESS_SPACE_GLOBAL:
208             return STATELESS;
209         case ADDRESS_SPACE_A32:
210             return STATELESS_A32;
211         default:
212             break;
213         }
214         GFXResourceAddrSpace temp;
215         temp.u32Val = addrSpace;
216         BufferType type = BUFFER_TYPE_UNKNOWN;
217         if (addrSpace > ADDRESS_SPACE_NUM_ADDRESSES &&
218             (temp.bits.bufType - 1) < BUFFER_TYPE_UNKNOWN)
219         {
220             type = static_cast<BufferType>(temp.bits.bufType - 1);
221         }
222         return type;
223     }
224 
225     ///
226     /// returns constant buffer load offset
227     ///
getConstantBufferLoadOffset(llvm::LoadInst * ld)228     int getConstantBufferLoadOffset(llvm::LoadInst* ld)
229     {
230         int offset = 0;
231         Value* ptr = ld->getPointerOperand();
232         if (isa<ConstantPointerNull>(ptr))
233         {
234             offset = 0;
235         }
236         else if (IntToPtrInst * itop = dyn_cast<IntToPtrInst>(ptr))
237         {
238             ConstantInt* ci = dyn_cast<ConstantInt>(
239                 itop->getOperand(0));
240             if (ci)
241             {
242                 offset = int_cast<unsigned>(ci->getZExtValue());
243             }
244         }
245         else if (ConstantExpr * itop = dyn_cast<ConstantExpr>(ptr))
246         {
247             if (itop->getOpcode() == Instruction::IntToPtr)
248             {
249                 offset = int_cast<unsigned>(
250                     cast<ConstantInt>(itop->getOperand(0))->getZExtValue());
251             }
252         }
253         return offset;
254     }
255     ///
256     /// returns info if direct addressing is used
257     ///
IsDirectIdx(unsigned addrSpace)258     bool IsDirectIdx(unsigned addrSpace)
259     {
260         GFXResourceAddrSpace temp;
261         temp.u32Val = addrSpace;
262         return (temp.bits.indirect == 0);
263     }
264 
isNaNCheck(llvm::FCmpInst & FC)265     bool isNaNCheck(llvm::FCmpInst& FC)
266     {
267         Value* Op1 = FC.getOperand(1);
268         if (FC.getPredicate() == CmpInst::FCMP_UNO)
269         {
270             auto CFP = dyn_cast<ConstantFP>(Op1);
271             return CFP && CFP->isZero();
272         }
273         else if (FC.getPredicate() == CmpInst::FCMP_UNE)
274         {
275             Value* Op0 = FC.getOperand(0);
276             return Op0 == Op1;
277         }
278         return false;
279     }
280 
cloneLoad(llvm::LoadInst * Orig,llvm::Value * Ptr)281     llvm::LoadInst* cloneLoad(llvm::LoadInst* Orig, llvm::Value* Ptr)
282     {
283         llvm::LoadInst* LI = new llvm::LoadInst(
284             cast<PointerType>(Ptr->getType())->getElementType(),
285             Ptr, "", false, Orig);
286         LI->setVolatile(Orig->isVolatile());
287         LI->setAlignment(IGCLLVM::getCorrectAlign(Orig->getAlignment()));
288         if (LI->isAtomic())
289         {
290             LI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
291         }
292         // Clone metadata
293         llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
294         Orig->getAllMetadata(MDs);
295         for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
296             MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
297         {
298             LI->setMetadata(MI->first, MI->second);
299         }
300         return LI;
301     }
302 
cloneStore(llvm::StoreInst * Orig,llvm::Value * Val,llvm::Value * Ptr)303     llvm::StoreInst* cloneStore(llvm::StoreInst* Orig, llvm::Value* Val, llvm::Value* Ptr)
304     {
305         llvm::StoreInst* SI = new llvm::StoreInst(Val, Ptr, Orig);
306         SI->setVolatile(Orig->isVolatile());
307         SI->setAlignment(IGCLLVM::getCorrectAlign(Orig->getAlignment()));
308         if (SI->isAtomic())
309         {
310             SI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
311         }
312         // Clone metadata
313         llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
314         Orig->getAllMetadata(MDs);
315         for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
316             MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
317         {
318             SI->setMetadata(MI->first, MI->second);
319         }
320         return SI;
321     }
322 
323     // Create a ldraw from a load instruction
CreateLoadRawIntrinsic(LoadInst * inst,Value * bufPtr,Value * offsetVal)324     LdRawIntrinsic* CreateLoadRawIntrinsic(LoadInst* inst, Value* bufPtr, Value* offsetVal)
325     {
326         Type* tys[] = {
327             inst->getType(),
328             bufPtr->getType()
329         };
330 
331         auto* M = inst->getModule();
332         auto& DL = M->getDataLayout();
333         Function *func = GenISAIntrinsic::getDeclaration(
334             M,
335             inst->getType()->isVectorTy() ?
336                 GenISAIntrinsic::GenISA_ldrawvector_indexed :
337                 GenISAIntrinsic::GenISA_ldraw_indexed,
338             tys);
339 
340         unsigned alignment = inst->getAlignment();
341         if (alignment == 0)
342             alignment = DL.getABITypeAlignment(inst->getType());
343 
344         IRBuilder<> builder(inst);
345 
346         Value* attr[] =
347         {
348             bufPtr,
349             offsetVal,
350             builder.getInt32(alignment),
351             builder.getInt1(inst->isVolatile()) // volatile
352         };
353         auto* ld = builder.CreateCall(func, attr);
354         IGC_ASSERT(ld->getType() == inst->getType());
355         return cast<LdRawIntrinsic>(ld);
356     }
357 
358     // Creates a storeraw from a store instruction
CreateStoreRawIntrinsic(StoreInst * inst,Value * bufPtr,Value * offsetVal)359     StoreRawIntrinsic* CreateStoreRawIntrinsic(StoreInst* inst, Value* bufPtr, Value* offsetVal)
360     {
361         Module* module = inst->getModule();
362         Function* func = nullptr;
363         Value* storeVal = inst->getValueOperand();
364         auto& DL = module->getDataLayout();
365         if (storeVal->getType()->isVectorTy())
366         {
367             Type* tys[] = {
368                 bufPtr->getType(),
369                 storeVal->getType()
370             };
371             func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storerawvector_indexed, tys);
372         }
373         else
374         {
375             Type* dataType = storeVal->getType();
376             const uint64_t typeSize = DL.getTypeSizeInBits(dataType);
377             IGC_ASSERT(typeSize == 8 || typeSize == 16 || typeSize == 32 || typeSize == 64);
378 
379             Type* types[] = {
380                 bufPtr->getType(),
381                 storeVal->getType()
382             };
383 
384             func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storeraw_indexed, types);
385         }
386         IRBuilder<> builder(inst);
387         unsigned alignment = inst->getAlignment();
388         if (alignment == 0)
389             alignment = DL.getABITypeAlignment(storeVal->getType());
390         Value* attr[] =
391         {
392             bufPtr,
393             offsetVal,
394             storeVal,
395             builder.getInt32(alignment),
396             builder.getInt1(inst->isVolatile()) // volatile
397         };
398         auto* st = builder.CreateCall(func, attr);
399         return cast<StoreRawIntrinsic>(st);
400     }
401 
402     ///
403     /// Tries to trace a resource pointer (texture/sampler/buffer) back to
404     /// the pointer source. Also returns a vector of all instructions in the search path
405     ///
TracePointerSource(Value * resourcePtr,bool hasBranching,bool enablePhiLoops,bool fillList,std::vector<Value * > & instList,llvm::SmallSet<PHINode *,8> & visitedPHIs)406     Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList,
407         std::vector<Value*>& instList, llvm::SmallSet<PHINode*, 8> & visitedPHIs)
408     {
409         Value* srcPtr = nullptr;
410         Value* baseValue = resourcePtr;
411 
412         // Returns true if resource pointers describe the same resource.
413         auto ResourcePointersEq = [](Value* a, Value* b)->bool
414         {
415             if (a == b)
416             {
417                 return true;
418             }
419             if (a->getType()->isPointerTy() && b->getType()->isPointerTy())
420             {
421                 unsigned idxA = 0, idxB = 0;
422                 BufferType bufA, bufB;
423                 BufferAccessType accessA, accessB;
424                 bool needBufferOffsetA = false, needBufferOffsetB = false;
425 
426                 if (GetResourcePointerInfo(a, idxA, bufA, accessA, needBufferOffsetA) &&
427                     GetResourcePointerInfo(b, idxB, bufB, accessB, needBufferOffsetB) &&
428                     idxA == idxB &&
429                     accessA == accessB &&
430                     needBufferOffsetA == needBufferOffsetB)
431                 {
432                     return true;
433                 }
434             }
435             return false;
436         };
437 
438         while (true)
439         {
440             if (fillList)
441             {
442                 instList.push_back(baseValue);
443             }
444 
445             if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(baseValue))
446             {
447                 // For bindless pointers
448                 if ((inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue) ||
449                     (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
450                 {
451                     srcPtr = baseValue;
452                 }
453                 break;
454             }
455             else if (isa<Argument>(baseValue))
456             {
457                 // For compute, resource comes from the kernel args
458                 srcPtr = baseValue;
459                 break;
460             }
461             else if (isa<GlobalVariable>(baseValue))
462             {
463                 // Can be an inline sampler/constant buffer
464                 srcPtr = baseValue;
465                 break;
466             }
467             else if (auto allocaInst = dyn_cast<AllocaInst>(baseValue))
468             {
469                 if (allocaInst->getMetadata("igc.read_only_array"))
470                 {
471                     // Found alloca marked as read_only array.
472                     srcPtr = baseValue;
473                 }
474                 break;
475             }
476             else if (CastInst * inst = dyn_cast<CastInst>(baseValue))
477             {
478                 baseValue = inst->getOperand(0);
479             }
480             else if (GetElementPtrInst * inst = dyn_cast<GetElementPtrInst>(baseValue))
481             {
482                 baseValue = inst->getOperand(0);
483             }
484             else if (BinaryOperator* inst = dyn_cast<BinaryOperator>(baseValue))
485             {
486                 // Assume this is pointer arithmetic, which allows add/sub only.
487                 // Follow the first operand assuming it's pointer base.
488                 // Do not check the operand is pointer type now, leave the check
489                 // until the leaf instruction is found.
490                 Instruction::BinaryOps Opcode = inst->getOpcode();
491                 if (Opcode == Instruction::Add || Opcode == Instruction::Sub)
492                 {
493                     baseValue = inst->getOperand(0);
494                 }
495                 else
496                     break;
497             }
498             else if (PHINode * inst = dyn_cast<PHINode>(baseValue))
499             {
500                 if (visitedPHIs.count(inst) != 0)
501                 {
502                     // stop if we've seen this phi node before
503                     return baseValue;
504                 }
505                 visitedPHIs.insert(inst);
506 
507                 for (unsigned int i = 0; i < inst->getNumIncomingValues(); ++i)
508                 {
509                     // All phi paths must be trace-able and trace back to the same source
510                     Value* phiVal = inst->getIncomingValue(i);
511                     std::vector<Value*> splitList;
512                     Value* phiSrcPtr = TracePointerSource(phiVal, true, enablePhiLoops, fillList, splitList, visitedPHIs);
513                     if (phiSrcPtr == nullptr)
514                     {
515                         // Incoming value not trace-able, bail out.
516                         return nullptr;
517                     }
518                     else if (isa<PHINode>(phiSrcPtr) && phiSrcPtr == baseValue)
519                     {
520                         // Found a loop in one of the phi paths. We can still trace as long as all the other paths match
521                         if (enablePhiLoops)
522                             continue;
523                         else
524                             return nullptr;
525                     }
526                     else if (srcPtr == nullptr)
527                     {
528                         // Found a path to the source pointer. We only save the instructions used in this path
529                         srcPtr = phiSrcPtr;
530                         instList.insert(instList.end(), splitList.begin(), splitList.end());
531                     }
532                     else if (!ResourcePointersEq(srcPtr , phiSrcPtr))
533                     {
534                         // The source pointers have diverged. Bail out.
535                         return nullptr;
536                     }
537                 }
538                 break;
539             }
540             else if (SelectInst * inst = dyn_cast<SelectInst>(baseValue))
541             {
542                 if (hasBranching)
543                 {
544                     // only allow a single branching instruction to be supported for now
545                     // if both select and PHI are present, or there are multiples of each, we bail
546                     break;
547                 }
548                 // Trace both operands of the select instruction. Both have to be traced back to the same
549                 // source pointer, otherwise we can't determine which one to use.
550                 Value* selectSrc0 = TracePointerSource(inst->getOperand(1), true, enablePhiLoops, fillList, instList, visitedPHIs);
551                 Value* selectSrc1 = TracePointerSource(inst->getOperand(2), true, enablePhiLoops, false, instList, visitedPHIs);
552                 if (selectSrc0 && selectSrc1 &&
553                     ResourcePointersEq(selectSrc0, selectSrc1))
554                 {
555                     srcPtr = selectSrc0;
556                     break;
557                 }
558                 return nullptr;
559             }
560             else if (LoadInst* inst = dyn_cast<LoadInst>(baseValue))
561             {
562                 if (inst->getPointerAddressSpace() == 0)
563                 {
564                     // May be local array of resources:
565                     baseValue = inst->getPointerOperand();
566                 }
567                 else
568                 {
569                     break;
570                 }
571             }
572             else
573             {
574                 // Unsupported instruction in search chain. Don't continue.
575                 break;
576             }
577         }
578         return srcPtr;
579     }
580 
581     ///
582     /// Only trace the GetBufferPtr instruction (ignore GetElementPtr)
583     ///
TracePointerSource(Value * resourcePtr)584     Value* TracePointerSource(Value* resourcePtr)
585     {
586         std::vector<Value*> tempList; //unused
587         llvm::SmallSet<PHINode*, 8> visitedPHIs;
588         return TracePointerSource(resourcePtr, false, true, false, tempList, visitedPHIs);
589     }
590 
TracePointerSource(Value * resourcePtr,bool hasBranching,bool enablePhiLoops,bool fillList,std::vector<Value * > & instList)591     Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList, std::vector<Value*>& instList)
592     {
593         llvm::SmallSet<PHINode*, 8> visitedPHIs;
594         return TracePointerSource(resourcePtr, hasBranching, enablePhiLoops, fillList, instList, visitedPHIs);
595     }
596 
getDefaultAccessType(BufferType bufTy)597     BufferAccessType getDefaultAccessType(BufferType bufTy)
598     {
599         switch (bufTy)
600         {
601         case BufferType::CONSTANT_BUFFER:
602         case BufferType::RESOURCE:
603         case BufferType::BINDLESS_TEXTURE:
604         case BufferType::BINDLESS_CONSTANT_BUFFER:
605         case BufferType::STATELESS_READONLY:
606         case BufferType::SAMPLER:
607             return BufferAccessType::ACCESS_READ;
608 
609         case BufferType::UAV:
610         case BufferType::SLM:
611         case BufferType::POINTER:
612         case BufferType::BINDLESS:
613         case BufferType::STATELESS:
614             return BufferAccessType::ACCESS_READWRITE;
615 
616         case BufferType::RENDER_TARGET:
617             return BufferAccessType::ACCESS_WRITE;
618 
619         default:
620             IGC_ASSERT_MESSAGE(0, "Invalid buffer type");
621             return BufferAccessType::ACCESS_READWRITE;
622         }
623     }
624 
GetResourcePointerInfo(Value * srcPtr,unsigned & resID,IGC::BufferType & resTy,BufferAccessType & accessTy,bool & needBufferOffset)625     bool GetResourcePointerInfo(Value* srcPtr, unsigned& resID, IGC::BufferType& resTy, BufferAccessType& accessTy, bool& needBufferOffset)
626     {
627         accessTy = BufferAccessType::ACCESS_READWRITE;
628         needBufferOffset = false;
629         if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(srcPtr))
630         {
631             // For bindless pointers with encoded metadata
632             if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
633             {
634                 if (inst->hasOperandBundles())
635                 {
636                     auto resIDBundle = inst->getOperandBundle("resID");
637                     auto resTyBundle = inst->getOperandBundle("resTy");
638                     auto accessTyBundle = inst->getOperandBundle("accessTy");
639                     auto needBufferOffsetBundle = inst->getOperandBundle("needBufferOffset");
640                     if (resIDBundle && resTyBundle)
641                     {
642                         resID = (unsigned)(cast<ConstantInt>(resIDBundle->Inputs.front()))->getZExtValue();
643                         resTy = (BufferType)(cast<ConstantInt>(resTyBundle->Inputs.front()))->getZExtValue();
644 
645                         if (accessTyBundle)
646                             accessTy = (BufferAccessType)(cast<ConstantInt>(accessTyBundle->Inputs.front()))->getZExtValue();
647                         else
648                             accessTy = getDefaultAccessType(resTy);
649 
650                         if(needBufferOffsetBundle)
651                             needBufferOffset = (bool)(cast<ConstantInt>(needBufferOffsetBundle->Inputs.front()))->getZExtValue();
652 
653                         return true;
654                     }
655                 }
656             }
657             // For GetBufferPtr instructions with buffer info in the operands
658             else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr)
659             {
660                 Value* bufIdV = inst->getOperand(0);
661                 Value* bufTyV = inst->getOperand(1);
662                 if (isa<ConstantInt>(bufIdV) && isa<ConstantInt>(bufTyV))
663                 {
664                     resID = (unsigned)(cast<ConstantInt>(bufIdV)->getZExtValue());
665                     resTy = (IGC::BufferType)(cast<ConstantInt>(bufTyV)->getZExtValue());
666                     accessTy = getDefaultAccessType(resTy);
667                     return true;
668                 }
669             }
670         }
671         return false;
672     }
673 
674     // Get GRF offset from GenISA_RuntimeValue intrinsic call
GetGRFOffsetFromRTV(Value * pointerSrc,unsigned & GRFOffset)675     bool GetGRFOffsetFromRTV(Value* pointerSrc, unsigned& GRFOffset)
676     {
677         if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(pointerSrc))
678         {
679             // For bindless pointers with encoded metadata
680             if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
681             {
682                 GRFOffset = (unsigned)llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue();
683                 return true;
684             }
685         }
686         return false;
687     }
688 
GetStatelessBufferInfo(Value * pointer,unsigned & bufIdOrGRFOffset,BufferType & bufferTy,Value * & bufferSrcPtr,bool & isDirectBuf)689     bool GetStatelessBufferInfo(Value* pointer, unsigned& bufIdOrGRFOffset,
690             BufferType & bufferTy, Value*& bufferSrcPtr, bool& isDirectBuf)
691     {
692         isDirectBuf = false;
693         // If the buffer info is not encoded in the address space, we can still find it by
694         // tracing the pointer to where it's created.
695         Value * src = IGC::TracePointerSource(pointer);
696         BufferAccessType accType;
697         bool needBufferOffset;  // Unused
698         if (!src)   return false;
699         if (IGC::GetResourcePointerInfo(src, bufIdOrGRFOffset, bufferTy, accType, needBufferOffset))
700         {
701             bufferSrcPtr = src;
702             isDirectBuf = true;
703             return true;
704         }
705         else if (GetGRFOffsetFromRTV(src, bufIdOrGRFOffset))
706         {
707             bufferSrcPtr = src;
708             bufferTy = BUFFER_TYPE_UNKNOWN;
709             return true;
710         }
711         return false;
712     }
713 
EvalConstantAddress(Value * address,unsigned int & offset,const llvm::DataLayout * pDL,Value * ptrSrc)714     bool EvalConstantAddress(Value* address, unsigned int& offset, const llvm::DataLayout* pDL, Value* ptrSrc)
715     {
716 
717         if ((ptrSrc == nullptr && isa<ConstantPointerNull>(address)) ||
718             (ptrSrc == address))
719         {
720             offset = 0;
721             return true;
722         }
723         else if(ConstantInt* eltIdx = dyn_cast<ConstantInt>(address))
724         {
725             offset = int_cast<int>(eltIdx->getZExtValue());
726             return true;
727         }
728         else if (ConstantExpr * ptrExpr = dyn_cast<ConstantExpr>(address))
729         {
730             if (ptrExpr->getOpcode() == Instruction::IntToPtr)
731             {
732                 Value* eltIdxVal = ptrExpr->getOperand(0);
733                 ConstantInt* eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
734                 if (!eltIdx)
735                     return false;
736                 offset = int_cast<int>(eltIdx->getZExtValue());
737                 return true;
738             }
739         }
740         else if (Instruction* ptrExpr = dyn_cast<Instruction>(address))
741         {
742             if (ptrExpr->getOpcode() == Instruction::BitCast ||
743                 ptrExpr->getOpcode() == Instruction::AddrSpaceCast)
744             {
745                 return EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc);
746             }
747             if (ptrExpr->getOpcode() == Instruction::IntToPtr)
748             {
749                 Value * eltIdxVal = ptrExpr->getOperand(0);
750                 ConstantInt * eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
751                 if (!eltIdx)
752                     return false;
753                 offset = int_cast<int>(eltIdx->getZExtValue());
754                 return true;
755             }
756             if (ptrExpr->getOpcode() == Instruction::PtrToInt)
757             {
758                 offset = 0;
759                 if (!EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc))
760                 {
761                     return false;
762                 }
763                 return true;
764             }
765             else if (ptrExpr->getOpcode() == Instruction::GetElementPtr)
766             {
767                 offset = 0;
768                 if (!EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc))
769                 {
770                     return false;
771                 }
772                 Type * Ty = ptrExpr->getType();
773                 gep_type_iterator GTI = gep_type_begin(ptrExpr);
774                 for (auto OI = ptrExpr->op_begin() + 1, E = ptrExpr->op_end(); OI != E; ++OI, ++GTI) {
775                     Value * Idx = *OI;
776                     if (StructType * StTy = GTI.getStructTypeOrNull()) {
777                         unsigned Field = int_cast<unsigned>(cast<ConstantInt>(Idx)->getZExtValue());
778                         if (Field) {
779                             offset += int_cast<int>(pDL->getStructLayout(StTy)->getElementOffset(Field));
780                         }
781                         Ty = StTy->getElementType(Field);
782                     }
783                     else {
784                         Ty = GTI.getIndexedType();
785                         if (const ConstantInt * CI = dyn_cast<ConstantInt>(Idx)) {
786                             offset += int_cast<int>(
787                             pDL->getTypeAllocSize(Ty) * CI->getSExtValue());
788 
789                         }
790                         else
791                         {
792                             return false;
793                         }
794                     }
795                 }
796                 return true;
797             }
798         }
799         return false;
800     }
801 
802     // Get constant address from load/ldraw instruction
getConstantAddress(llvm::Instruction & I,ConstantAddress & cl,CodeGenContext * pContext,bool & directBuf,bool & statelessBuf,bool & bindlessBuf)803     bool getConstantAddress(llvm::Instruction& I, ConstantAddress& cl, CodeGenContext* pContext, bool& directBuf, bool& statelessBuf, bool& bindlessBuf)
804     {
805         // Check if the load instruction is with constant buffer address
806         unsigned as;
807         Value* ptrVal;
808         Value* offsetVal;
809         directBuf = false;
810         statelessBuf = false;
811         bindlessBuf = false;
812         bool isPushableAddr = false;
813         unsigned int& bufIdOrGRFOffset = cl.bufId;
814         unsigned int& eltId = cl.eltId;
815         unsigned int& size_in_bytes = cl.size;
816         const llvm::DataLayout DL = pContext->getModule()->getDataLayout();
817 
818         // Only load and ldRaw instructions handled, rest should return
819         if (LoadInst* load = llvm::dyn_cast<LoadInst> (&I))
820         {
821             as = load->getPointerAddressSpace();
822             ptrVal = load->getPointerOperand();
823             offsetVal = ptrVal;
824             statelessBuf = (as == ADDRESS_SPACE_CONSTANT);
825         }
826         else if (LdRawIntrinsic* ldRaw = dyn_cast<LdRawIntrinsic>(&I))
827         {
828             as = ldRaw->getResourceValue()->getType()->getPointerAddressSpace();
829             ptrVal = ldRaw->getResourceValue();
830             offsetVal = ldRaw->getOffsetValue();
831             bindlessBuf = (DecodeBufferType(as) == SSH_BINDLESS_CONSTANT_BUFFER);
832         }
833         else
834             return false;
835 
836         size_in_bytes = 0;
837         BufferType bufType;
838         Value* pointerSrc = nullptr;
839 
840         if (statelessBuf || bindlessBuf)
841         {
842             // If the buffer info is not encoded in the address space, we can still find it by
843             // tracing the pointer to where it's created.
844             if (!GetStatelessBufferInfo(ptrVal, bufIdOrGRFOffset, bufType, pointerSrc, directBuf))
845             {
846                 return false;
847             }
848             if (!directBuf)
849             {
850                 // Make sure constant folding is safe by looking up in pushableAddresses
851                 PushInfo& pushInfo = pContext->getModuleMetaData()->pushInfo;
852 
853                 for (auto it : pushInfo.pushableAddresses)
854                 {
855                     if ((bufIdOrGRFOffset * 4 == it.addressOffset) && (IGC_IS_FLAG_ENABLED(DisableStaticCheckForConstantFolding) || it.isStatic))
856                     {
857                         isPushableAddr = true;
858                         break;
859                     }
860                 }
861             }
862         }
863         else
864         {
865             bufType = IGC::DecodeAS4GFXResource(as, directBuf, bufIdOrGRFOffset);
866         }
867         // If it is statelessBuf, we made sure it is a constant buffer by finding it in pushableAddresses
868         if ((directBuf && (bufType == CONSTANT_BUFFER)) ||
869             (isPushableAddr && (statelessBuf || bindlessBuf)))
870         {
871             eltId = 0;
872             if (!EvalConstantAddress(offsetVal, eltId, &DL, pointerSrc))
873             {
874                 return false;
875             }
876         }
877         else
878         {
879             return false;
880         }
881         size_in_bytes = (unsigned int)I.getType()->getPrimitiveSizeInBits() / 8;
882         return true;
883     }
884 
885 
886     ///
887     /// Replaces oldPtr with newPtr in a sample/ld intrinsic's argument list. The new instrinsic will
888     /// replace the old one in the module
889     ///
ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst * & pIntr,llvm::Value * oldPtr,llvm::Value * newPtr)890     void ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst*& pIntr, llvm::Value* oldPtr, llvm::Value* newPtr)
891     {
892         llvm::Module* pModule = pIntr->getParent()->getParent()->getParent();
893         llvm::Function* pCalledFunc = pIntr->getCalledFunction();
894 
895         // Look at the intrinsic and figure out which pointer to change
896         int num_ops = pIntr->getNumArgOperands();
897         llvm::SmallVector<llvm::Value*, 5> args;
898 
899         for (int i = 0; i < num_ops; ++i)
900         {
901             if (pIntr->getArgOperand(i) == oldPtr)
902                 args.push_back(newPtr);
903             else
904                 args.push_back(pIntr->getArgOperand(i));
905         }
906 
907         llvm::Function* pNewIntr = nullptr;
908         llvm::SmallVector<llvm::Type*, 4> overloadedTys;
909         GenISAIntrinsic::ID id = pIntr->getIntrinsicID();
910         switch (id)
911         {
912         case llvm::GenISAIntrinsic::GenISA_ldmcsptr:
913         {
914             llvm::Value* pTextureValue = cast<SamplerLoadIntrinsic>(pIntr)->getTextureValue();
915             overloadedTys.push_back(pCalledFunc->getReturnType());
916             overloadedTys.push_back(args[0]->getType());
917             overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
918             break;
919         }
920         case llvm::GenISAIntrinsic::GenISA_ldptr:
921         case llvm::GenISAIntrinsic::GenISA_ldmsptr:
922         {
923             llvm::Value* pTextureValue = cast<SamplerLoadIntrinsic>(pIntr)->getTextureValue();
924             overloadedTys.push_back(pCalledFunc->getReturnType());
925             overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
926             break;
927         }
928         case llvm::GenISAIntrinsic::GenISA_resinfoptr:
929         case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
930         case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
931             overloadedTys.push_back(newPtr->getType());
932             break;
933         case llvm::GenISAIntrinsic::GenISA_sampleptr:
934         case llvm::GenISAIntrinsic::GenISA_sampleBptr:
935         case llvm::GenISAIntrinsic::GenISA_sampleCptr:
936         case llvm::GenISAIntrinsic::GenISA_sampleDptr:
937         case llvm::GenISAIntrinsic::GenISA_sampleLptr:
938         case llvm::GenISAIntrinsic::GenISA_sampleBCptr:
939         case llvm::GenISAIntrinsic::GenISA_sampleDCptr:
940         case llvm::GenISAIntrinsic::GenISA_sampleLCptr:
941         case llvm::GenISAIntrinsic::GenISA_gather4ptr:
942         case llvm::GenISAIntrinsic::GenISA_gather4POptr:
943         case llvm::GenISAIntrinsic::GenISA_gather4Cptr:
944         case llvm::GenISAIntrinsic::GenISA_gather4POCptr:
945         case llvm::GenISAIntrinsic::GenISA_lodptr:
946         {
947             // Figure out the intrinsic operands for texture & sampler
948             llvm::Value* pTextureValue = nullptr;
949             llvm::Value* pSamplerValue = nullptr;
950             IGC::getTextureAndSamplerOperands(
951                 pIntr,
952                 pTextureValue,
953                 pSamplerValue);
954 
955             overloadedTys.push_back(pCalledFunc->getReturnType());
956             overloadedTys.push_back(pIntr->getOperand(0)->getType());
957             overloadedTys.push_back(pTextureValue == oldPtr ? newPtr->getType() : pTextureValue->getType());
958             if (pSamplerValue != nullptr)
959             {
960                 // Samplerless messages will not have sampler in signature.
961                 overloadedTys.push_back(pSamplerValue == oldPtr ? newPtr->getType() : pSamplerValue->getType());
962             }
963             break;
964         }
965         case llvm::GenISAIntrinsic::GenISA_typedread:
966         case llvm::GenISAIntrinsic::GenISA_typedwrite:
967         case llvm::GenISAIntrinsic::GenISA_ldstructured:
968         case llvm::GenISAIntrinsic::GenISA_storestructured1:
969         case llvm::GenISAIntrinsic::GenISA_storestructured2:
970         case llvm::GenISAIntrinsic::GenISA_storestructured3:
971         case llvm::GenISAIntrinsic::GenISA_storestructured4:
972             overloadedTys.push_back(newPtr->getType());
973             break;
974         case llvm::GenISAIntrinsic::GenISA_intatomicraw:
975         case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw:
976         case llvm::GenISAIntrinsic::GenISA_intatomicrawA64:
977         case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
978         case llvm::GenISAIntrinsic::GenISA_floatatomicraw:
979         case llvm::GenISAIntrinsic::GenISA_floatatomicrawA64:
980         case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
981         case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
982             overloadedTys.push_back(pIntr->getType());
983             overloadedTys.push_back(newPtr->getType());
984             if (id == GenISAIntrinsic::GenISA_intatomicrawA64)
985             {
986                 args[0] = args[1];
987                 args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
988                 id = GenISAIntrinsic::GenISA_intatomicraw;
989             }
990             else if (id == GenISAIntrinsic::GenISA_icmpxchgatomicrawA64)
991             {
992                 args[0] = args[1];
993                 args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
994                 id = GenISAIntrinsic::GenISA_icmpxchgatomicraw;
995             }
996             else if (id == GenISAIntrinsic::GenISA_floatatomicrawA64)
997             {
998                 args[0] = args[1];
999                 args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
1000                 id = GenISAIntrinsic::GenISA_floatatomicraw;
1001             }
1002             else if (id == GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64)
1003             {
1004                 args[0] = args[1];
1005                 args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
1006                 id = GenISAIntrinsic::GenISA_fcmpxchgatomicraw;
1007             }
1008             break;
1009         case llvm::GenISAIntrinsic::GenISA_dwordatomicstructured:
1010         case llvm::GenISAIntrinsic::GenISA_floatatomicstructured:
1011         case llvm::GenISAIntrinsic::GenISA_cmpxchgatomicstructured:
1012         case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicstructured:
1013             overloadedTys.push_back(pIntr->getType());
1014             overloadedTys.push_back(args[0]->getType());
1015             break;
1016         case GenISAIntrinsic::GenISA_intatomictyped:
1017         case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
1018             overloadedTys.push_back(pIntr->getType());
1019             overloadedTys.push_back(newPtr->getType());
1020             break;
1021         case GenISAIntrinsic::GenISA_atomiccounterinc:
1022         case GenISAIntrinsic::GenISA_atomiccounterpredec:
1023             overloadedTys.push_back(args[0]->getType());
1024             break;
1025         case llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed:
1026         case llvm::GenISAIntrinsic::GenISA_ldraw_indexed:
1027             overloadedTys.push_back(pCalledFunc->getReturnType());
1028             overloadedTys.push_back(newPtr->getType());
1029             break;
1030         case llvm::GenISAIntrinsic::GenISA_storerawvector_indexed:
1031         case llvm::GenISAIntrinsic::GenISA_storeraw_indexed:
1032             overloadedTys.push_back(newPtr->getType());
1033             overloadedTys.push_back(args[2]->getType());
1034             break;
1035         default:
1036             IGC_ASSERT_MESSAGE(0, "Unknown intrinsic encountered while changing pointer types");
1037             break;
1038         }
1039 
1040         pNewIntr = llvm::GenISAIntrinsic::getDeclaration(
1041             pModule,
1042             id,
1043             overloadedTys);
1044 
1045         llvm::CallInst* pNewCall = llvm::CallInst::Create(pNewIntr, args, "", pIntr);
1046         pNewCall->setDebugLoc(pIntr->getDebugLoc());
1047 
1048         pIntr->replaceAllUsesWith(pNewCall);
1049         pIntr->eraseFromParent();
1050 
1051         pIntr = llvm::cast<llvm::GenIntrinsicInst>(pNewCall);
1052     }
1053 
1054     ///
1055     /// Returns the sampler/texture pointers for resource access intrinsics
1056     ///
getTextureAndSamplerOperands(llvm::GenIntrinsicInst * pIntr,llvm::Value * & pTextureValue,llvm::Value * & pSamplerValue)1057     void getTextureAndSamplerOperands(
1058         llvm::GenIntrinsicInst* pIntr,
1059         llvm::Value*& pTextureValue,
1060         llvm::Value*& pSamplerValue)
1061     {
1062         if (llvm::SamplerLoadIntrinsic * pSamplerLoadInst = llvm::dyn_cast<llvm::SamplerLoadIntrinsic>(pIntr))
1063         {
1064             pTextureValue = pSamplerLoadInst->getTextureValue();
1065             pSamplerValue = nullptr;
1066         }
1067         else if (llvm::SampleIntrinsic * pSampleInst = llvm::dyn_cast<llvm::SampleIntrinsic>(pIntr))
1068         {
1069             pTextureValue = pSampleInst->getTextureValue();
1070             pSamplerValue = pSampleInst->getSamplerValue();
1071         }
1072         else if (llvm::SamplerGatherIntrinsic * pGatherInst = llvm::dyn_cast<llvm::SamplerGatherIntrinsic>(pIntr))
1073         {
1074             pTextureValue = pGatherInst->getTextureValue();
1075             pSamplerValue = pGatherInst->getSamplerValue();
1076         }
1077         else
1078         {
1079             pTextureValue = nullptr;
1080             pSamplerValue = nullptr;
1081             switch (pIntr->getIntrinsicID())
1082             {
1083             case llvm::GenISAIntrinsic::GenISA_resinfoptr:
1084             case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
1085             case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
1086             case llvm::GenISAIntrinsic::GenISA_typedwrite:
1087             case llvm::GenISAIntrinsic::GenISA_typedread:
1088                 pTextureValue = pIntr->getOperand(0);
1089                 break;
1090             default:
1091                 break;
1092             }
1093         }
1094     }
1095 
1096     // Get the buffer pointer operand for supported buffer access instructions
GetBufferOperand(Instruction * inst)1097     Value* GetBufferOperand(Instruction* inst)
1098     {
1099         Value* pBuffer = nullptr;
1100         if (LoadInst * load = dyn_cast<LoadInst>(inst))
1101         {
1102             pBuffer = load->getPointerOperand();
1103         }
1104         else if (StoreInst * store = dyn_cast<StoreInst>(inst))
1105         {
1106             pBuffer = store->getPointerOperand();
1107         }
1108         else if (GenIntrinsicInst * intr = dyn_cast<GenIntrinsicInst>(inst))
1109         {
1110             switch (intr->getIntrinsicID())
1111             {
1112             case GenISAIntrinsic::GenISA_storerawvector_indexed:
1113             case GenISAIntrinsic::GenISA_ldrawvector_indexed:
1114             case GenISAIntrinsic::GenISA_storeraw_indexed:
1115             case GenISAIntrinsic::GenISA_ldraw_indexed:
1116             case GenISAIntrinsic::GenISA_intatomicraw:
1117             case GenISAIntrinsic::GenISA_intatomictyped:
1118             case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
1119             case GenISAIntrinsic::GenISA_floatatomicraw:
1120             case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
1121             case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
1122             case GenISAIntrinsic::GenISA_simdBlockRead:
1123             case GenISAIntrinsic::GenISA_simdBlockWrite:
1124                 pBuffer = intr->getOperand(0);
1125                 break;
1126             case GenISAIntrinsic::GenISA_intatomicrawA64:
1127             case GenISAIntrinsic::GenISA_floatatomicrawA64:
1128             case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
1129             case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
1130                 pBuffer = intr->getOperand(1);
1131                 break;
1132             default:
1133                 break;
1134             }
1135         }
1136         return pBuffer;
1137     }
1138 
GetOpCode(const llvm::Instruction * inst)1139     EOPCODE GetOpCode(const llvm::Instruction* inst)
1140     {
1141         if (const GenIntrinsicInst * CI = dyn_cast<GenIntrinsicInst>(inst))
1142         {
1143             unsigned ID = CI->getIntrinsicID();
1144             return (EOPCODE)(OPCODE(ID, e_Intrinsic));
1145         }
1146         else if (const IntrinsicInst * CI = llvm::dyn_cast<llvm::IntrinsicInst>(inst))
1147         {
1148             unsigned ID = CI->getIntrinsicID();
1149             return (EOPCODE)(OPCODE(ID, e_Intrinsic));
1150         }
1151         return (EOPCODE)(OPCODE(inst->getOpcode(), e_Instruction));
1152     }
1153 
GetBufferType(uint addrSpace)1154     BufferType GetBufferType(uint addrSpace)
1155     {
1156         bool directIndexing = false;
1157         unsigned int bufId = 0;
1158         return DecodeAS4GFXResource(addrSpace, directIndexing, bufId);
1159     }
1160 
IsReadOnlyLoadDirectCB(llvm::Instruction * pLLVMInst,uint & cbId,llvm::Value * & eltPtrVal,BufferType & bufType)1161     bool IsReadOnlyLoadDirectCB(llvm::Instruction* pLLVMInst,
1162         uint& cbId, llvm::Value*& eltPtrVal, BufferType& bufType)
1163     {
1164         LoadInst* inst = dyn_cast<LoadInst>(pLLVMInst);
1165         if (!inst)
1166         {
1167             return false;
1168         }
1169         bool isInvLoad = inst->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
1170         unsigned as = inst->getPointerAddressSpace();
1171         bool directBuf;
1172         // cbId gets filled in the following call;
1173         bufType = IGC::DecodeAS4GFXResource(as, directBuf, cbId);
1174         if ((bufType == CONSTANT_BUFFER || bufType == RESOURCE || isInvLoad) && directBuf)
1175         {
1176             Value* ptrVal = inst->getPointerOperand();
1177             // skip bitcast and find the real address computation
1178             while (isa<BitCastInst>(ptrVal))
1179             {
1180                 ptrVal = cast<BitCastInst>(ptrVal)->getOperand(0);
1181             }
1182             if (isa<ConstantPointerNull>(ptrVal) ||
1183                 isa<IntToPtrInst>(ptrVal) ||
1184                 isa<GetElementPtrInst>(ptrVal) ||
1185                 isa<ConstantExpr>(ptrVal) ||
1186                 isa<LoadInst>(ptrVal) ||
1187                 isa<Argument>(ptrVal))
1188             {
1189                 eltPtrVal = ptrVal;
1190                 return true;
1191             }
1192         }
1193         return false;
1194     }
1195 
IsLoadFromDirectCB(llvm::Instruction * pLLVMInst,uint & cbId,llvm::Value * & eltPtrVal)1196     bool IsLoadFromDirectCB(llvm::Instruction* pLLVMInst, uint& cbId, llvm::Value*& eltPtrVal)
1197     {
1198         BufferType bufType = BUFFER_TYPE_UNKNOWN;
1199         bool isReadOnly = IsReadOnlyLoadDirectCB(pLLVMInst, cbId, eltPtrVal, bufType);
1200         return isReadOnly && bufType == CONSTANT_BUFFER;
1201     }
1202 
1203     /// this is texture-load not buffer-load
isLdInstruction(llvm::Instruction * inst)1204     bool isLdInstruction(llvm::Instruction* inst)
1205     {
1206         return isa<SamplerLoadIntrinsic>(inst);
1207     }
1208 
1209     // function returns the position of the texture operand for sample/ld instructions
getTextureIndexArgBasedOnOpcode(llvm::Instruction * inst)1210     llvm::Value* getTextureIndexArgBasedOnOpcode(llvm::Instruction* inst)
1211     {
1212         if (isLdInstruction(inst))
1213         {
1214             return cast<SamplerLoadIntrinsic>(inst)->getTextureValue();
1215         }
1216         else if (isSampleInstruction(inst))
1217         {
1218             return cast<SampleIntrinsic>(inst)->getTextureValue();
1219         }
1220         else if (isGather4Instruction(inst))
1221         {
1222             return cast<SamplerGatherIntrinsic>(inst)->getTextureValue();
1223         }
1224 
1225         return nullptr;
1226     }
1227 
findSampleInstructionTextureIdx(llvm::Instruction * inst)1228     int findSampleInstructionTextureIdx(llvm::Instruction* inst)
1229     {
1230         // fetch the textureArgIdx.
1231         Value* ptr = getTextureIndexArgBasedOnOpcode(inst);
1232         unsigned textureIdx = -1;
1233 
1234         if (ptr && ptr->getType()->isPointerTy())
1235         {
1236             BufferType bufType = BUFFER_TYPE_UNKNOWN;
1237             if (!(isa<GenIntrinsicInst>(ptr) &&
1238                 cast<GenIntrinsicInst>(ptr)->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
1239             {
1240                 uint as = ptr->getType()->getPointerAddressSpace();
1241                 bool directIndexing;
1242                 bufType = DecodeAS4GFXResource(as, directIndexing, textureIdx);
1243                 if (bufType == UAV)
1244                 {
1245                     // dont do any clustering on read/write images
1246                     textureIdx = -1;
1247                 }
1248             }
1249         }
1250         else if (ptr)
1251         {
1252             if (llvm::dyn_cast<llvm::ConstantInt>(ptr))
1253             {
1254                 textureIdx = int_cast<unsigned>(GetImmediateVal(ptr));
1255             }
1256         }
1257 
1258         return textureIdx;
1259     }
1260 
isSampleLoadGather4InfoInstruction(llvm::Instruction * inst)1261     bool isSampleLoadGather4InfoInstruction(llvm::Instruction* inst)
1262     {
1263         if (isa<GenIntrinsicInst>(inst))
1264         {
1265             switch ((cast<GenIntrinsicInst>(inst))->getIntrinsicID())
1266             {
1267             case GenISAIntrinsic::GenISA_sampleptr:
1268             case GenISAIntrinsic::GenISA_sampleBptr:
1269             case GenISAIntrinsic::GenISA_sampleCptr:
1270             case GenISAIntrinsic::GenISA_sampleDptr:
1271             case GenISAIntrinsic::GenISA_sampleDCptr:
1272             case GenISAIntrinsic::GenISA_sampleLptr:
1273             case GenISAIntrinsic::GenISA_sampleLCptr:
1274             case GenISAIntrinsic::GenISA_sampleBCptr:
1275             case GenISAIntrinsic::GenISA_lodptr:
1276             case GenISAIntrinsic::GenISA_ldptr:
1277             case GenISAIntrinsic::GenISA_ldmsptr:
1278             case GenISAIntrinsic::GenISA_ldmsptr16bit:
1279             case GenISAIntrinsic::GenISA_ldmcsptr:
1280             case GenISAIntrinsic::GenISA_sampleinfoptr:
1281             case GenISAIntrinsic::GenISA_resinfoptr:
1282             case GenISAIntrinsic::GenISA_gather4ptr:
1283             case GenISAIntrinsic::GenISA_gather4Cptr:
1284             case GenISAIntrinsic::GenISA_gather4POptr:
1285             case GenISAIntrinsic::GenISA_gather4POCptr:
1286                 return true;
1287             default:
1288                 return false;
1289             }
1290         }
1291 
1292         return false;
1293     }
1294 
isSampleInstruction(llvm::Instruction * inst)1295     bool isSampleInstruction(llvm::Instruction* inst)
1296     {
1297         return isa<SampleIntrinsic>(inst);
1298     }
1299 
isInfoInstruction(llvm::Instruction * inst)1300     bool isInfoInstruction(llvm::Instruction* inst)
1301     {
1302         return isa<InfoIntrinsic>(inst);
1303     }
1304 
isGather4Instruction(llvm::Instruction * inst)1305     bool isGather4Instruction(llvm::Instruction* inst)
1306     {
1307         return isa<SamplerGatherIntrinsic>(inst);
1308     }
1309 
IsMediaIOIntrinsic(llvm::Instruction * inst)1310     bool IsMediaIOIntrinsic(llvm::Instruction* inst)
1311     {
1312         if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
1313         {
1314             GenISAIntrinsic::ID id = pGI->getIntrinsicID();
1315 
1316             return id == GenISAIntrinsic::GenISA_MediaBlockRead ||
1317                 id == GenISAIntrinsic::GenISA_MediaBlockWrite;
1318         }
1319 
1320         return false;
1321     }
1322 
IsSIMDBlockIntrinsic(llvm::Instruction * inst)1323     bool IsSIMDBlockIntrinsic(llvm::Instruction* inst)
1324     {
1325         if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
1326         {
1327             GenISAIntrinsic::ID id = pGI->getIntrinsicID();
1328 
1329             return id == GenISAIntrinsic::GenISA_simdBlockRead ||
1330                 id == GenISAIntrinsic::GenISA_simdBlockWrite;
1331         }
1332 
1333         return false;
1334     }
1335 
isSubGroupIntrinsic(const llvm::Instruction * I)1336     bool isSubGroupIntrinsic(const llvm::Instruction* I)
1337     {
1338         const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
1339         if (!GII)
1340             return false;
1341 
1342         switch (GII->getIntrinsicID())
1343         {
1344         case GenISAIntrinsic::GenISA_WaveShuffleIndex:
1345         case GenISAIntrinsic::GenISA_simdShuffleDown:
1346         case GenISAIntrinsic::GenISA_simdBlockRead:
1347         case GenISAIntrinsic::GenISA_simdBlockWrite:
1348         case GenISAIntrinsic::GenISA_simdMediaBlockRead:
1349         case GenISAIntrinsic::GenISA_simdMediaBlockWrite:
1350         case GenISAIntrinsic::GenISA_MediaBlockWrite:
1351         case GenISAIntrinsic::GenISA_MediaBlockRead:
1352             return true;
1353         default:
1354             return false;
1355         }
1356     }
1357 
isURBWriteIntrinsic(const llvm::Instruction * I)1358     bool isURBWriteIntrinsic(const llvm::Instruction* I)
1359     {
1360         const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
1361         if (!GII)
1362             return false;
1363 
1364         return GII->getIntrinsicID() == GenISA_URBWrite;
1365 
1366     }
1367 
AdjustSystemValueCall(llvm::GenIntrinsicInst * inst)1368     llvm::Instruction* AdjustSystemValueCall(llvm::GenIntrinsicInst* inst)
1369     {
1370         IGC_ASSERT(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_SystemValue);
1371         llvm::Module* pModule = inst->getParent()->getParent()->getParent();
1372         auto CommonConvertFunc = [pModule](llvm::GenIntrinsicInst* inst, llvm::Type* outputType)
1373         {
1374             IGC_ASSERT(outputType->isVectorTy() == false);
1375             IGC_ASSERT(inst->getType()->isVectorTy() == false);
1376             llvm::Instruction* result = inst;
1377             if (inst->getType() != outputType)
1378             {
1379                 llvm::IRBuilder<> builder(inst);
1380                 llvm::Function* systemValueFunc = llvm::GenISAIntrinsic::getDeclaration(pModule, GenISAIntrinsic::GenISA_DCL_SystemValue, outputType);
1381                 llvm::Instruction* sgv = builder.CreateCall(systemValueFunc, inst->getOperand(0));
1382                 // a default system value intrinsic function returns a float value. The returned value is bit casted to an appropriate integer or floating point value
1383                 // in reference to HW specification. Casting from floating point to integer and in the opposite direction is not expected.
1384                 sgv = llvm::cast<llvm::Instruction>(builder.CreateZExtOrTrunc(sgv, builder.getIntNTy((unsigned int)inst->getType()->getPrimitiveSizeInBits())));
1385                 sgv = llvm::cast<llvm::Instruction>(builder.CreateBitCast(sgv, inst->getType()));
1386                 inst->replaceAllUsesWith(sgv);
1387                 inst->eraseFromParent();
1388                 result = sgv;
1389             }
1390             return result;
1391         };
1392 
1393         SGVUsage usage = static_cast<SGVUsage>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
1394         llvm::Instruction* result = inst;
1395 
1396         switch (usage)
1397         {
1398         case THREAD_ID_IN_GROUP_X:
1399         case THREAD_ID_IN_GROUP_Y:
1400         case THREAD_ID_IN_GROUP_Z:
1401             result = CommonConvertFunc(inst, llvm::IntegerType::get(pModule->getContext(), 16));
1402             break;
1403         default:
1404             break;
1405         }
1406         return result;
1407     }
1408 
1409     bool isReadInput(llvm::Instruction* pLLVMInstr);
1410 
1411 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1412     case name:\
1413     return modifiers;
SupportsModifier(llvm::Instruction * inst)1414     bool SupportsModifier(llvm::Instruction* inst)
1415     {
1416         // Special cases
1417         switch (inst->getOpcode())
1418         {
1419         case Instruction::ICmp:
1420         {
1421             // icmp supports modifier unless it is unsigned
1422             CmpInst* cmp = cast<ICmpInst>(inst);
1423             return !cmp->isUnsigned();
1424         }
1425         case Instruction::Mul:
1426             // integer mul supports modifier if not int64.
1427             return !inst->getType()->isIntegerTy(64);
1428         case Instruction::URem:
1429             // neg mod is negative. Disable it as URem must have positive operands,
1430             return false;
1431         default:
1432             break;
1433         }
1434 
1435         switch (GetOpCode(inst))
1436         {
1437 #include "opCode.h"
1438         default:
1439             return false;
1440         }
1441     }
1442 #undef DECLARE_OPCODE
1443 
1444 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1445     case name:\
1446     return sat;
SupportsSaturate(llvm::Instruction * inst)1447     bool SupportsSaturate(llvm::Instruction* inst)
1448     {
1449         switch (GetOpCode(inst))
1450         {
1451 #include "opCode.h"
1452         default:
1453             break;
1454         }
1455         return false;
1456     }
1457 #undef DECLARE_OPCODE
1458 
1459 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1460     case name:\
1461     return pred;
SupportsPredicate(llvm::Instruction * inst)1462     bool SupportsPredicate(llvm::Instruction* inst)
1463     {
1464         switch (GetOpCode(inst))
1465         {
1466 #include "opCode.h"
1467         default:
1468             return false;
1469         }
1470     }
1471 #undef DECLARE_OPCODE
1472 
1473 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1474     case name:\
1475     return condMod;
SupportsCondModifier(llvm::Instruction * inst)1476     bool SupportsCondModifier(llvm::Instruction* inst)
1477     {
1478         switch (GetOpCode(inst))
1479         {
1480 #include "opCode.h"
1481         default:
1482             return false;
1483         }
1484     }
1485 #undef DECLARE_OPCODE
1486 
1487 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1488     case name:\
1489     return regioning;
SupportsRegioning(llvm::Instruction * inst)1490     bool SupportsRegioning(llvm::Instruction* inst)
1491     {
1492         switch (GetOpCode(inst))
1493         {
1494 #include "opCode.h"
1495         default:
1496             break;
1497         }
1498         return false;
1499     }
1500 #undef DECLARE_OPCODE
1501 
1502 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1503     case name:\
1504     return mathIntrinsic;
IsMathIntrinsic(EOPCODE opcode)1505     bool IsMathIntrinsic(EOPCODE opcode)
1506     {
1507         switch (opcode)
1508         {
1509 #include "opCode.h"
1510         default:
1511             return false;
1512         }
1513     }
1514 #undef DECLARE_OPCODE
1515 
1516 #define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
1517     case name:\
1518     return atomicIntrinsic;
IsAtomicIntrinsic(EOPCODE opcode)1519     bool IsAtomicIntrinsic(EOPCODE opcode)
1520     {
1521         switch (opcode)
1522         {
1523 #include "opCode.h"
1524         default:
1525             return false;
1526         }
1527     }
1528 #undef DECLARE_OPCODE
1529 
IsExtendedMathInstruction(llvm::Instruction * Inst)1530     bool IsExtendedMathInstruction(llvm::Instruction* Inst)
1531     {
1532         EOPCODE opcode = GetOpCode(Inst);
1533         switch (opcode)
1534         {
1535         case llvm_fdiv:
1536         case llvm_sdiv:
1537         case llvm_udiv:
1538         case llvm_log:
1539         case llvm_exp:
1540         case llvm_sqrt:
1541         case llvm_sin:
1542         case llvm_cos:
1543         case llvm_pow:
1544             return true;
1545         default:
1546             return false;
1547         }
1548         return false;
1549     }
1550     // for now just include shuffle, reduce and scan,
1551     // which have simd32 implementations and should not be split into two instances
IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode)1552     bool IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode)
1553     {
1554         return (opcode == llvm_waveAll ||
1555             opcode == llvm_waveClustered ||
1556             opcode == llvm_wavePrefix ||
1557             opcode == llvm_waveShuffleIndex ||
1558             opcode == llvm_simdShuffleDown ||
1559             opcode == llvm_simdBlockRead||
1560             opcode == llvm_simdBlockReadBindless);
1561     }
1562 
1563 
IsGradientIntrinsic(EOPCODE opcode)1564     bool IsGradientIntrinsic(EOPCODE opcode)
1565     {
1566         return(opcode == llvm_gradientX ||
1567             opcode == llvm_gradientY ||
1568             opcode == llvm_gradientXfine ||
1569             opcode == llvm_gradientYfine);
1570     }
1571 
IsStatelessMemLoadIntrinsic(llvm::GenISAIntrinsic::ID id)1572     bool IsStatelessMemLoadIntrinsic(llvm::GenISAIntrinsic::ID id)
1573     {
1574         switch(id)
1575         {
1576         case GenISAIntrinsic::GenISA_simdBlockRead:
1577                 return true;
1578             default:
1579                 break;
1580         }
1581         return false;
1582     }
1583 
IsStatelessMemStoreIntrinsic(llvm::GenISAIntrinsic::ID id)1584     bool IsStatelessMemStoreIntrinsic(llvm::GenISAIntrinsic::ID id)
1585     {
1586         switch (id) {
1587         case GenISAIntrinsic::GenISA_simdBlockWrite:
1588             return true;
1589         default:
1590             break;
1591         }
1592         return false;
1593     }
1594 
IsStatelessMemAtomicIntrinsic(GenIntrinsicInst & inst,GenISAIntrinsic::ID id)1595     bool IsStatelessMemAtomicIntrinsic(GenIntrinsicInst& inst, GenISAIntrinsic::ID id)
1596     {
1597         // This includes:
1598         // GenISA_intatomicraw
1599         // GenISA_floatatomicraw
1600         // GenISA_intatomicrawA64
1601         // GenISA_floatatomicrawA64
1602         // GenISA_icmpxchgatomicraw
1603         // GenISA_fcmpxchgatomicraw
1604         // GenISA_icmpxchgatomicrawA64
1605         // GenISA_fcmpxchgatomicrawA64
1606         if (IsAtomicIntrinsic(GetOpCode(&inst)))
1607             return true;
1608 
1609         return false;
1610     }
1611 
ComputesGradient(llvm::Instruction * inst)1612     bool ComputesGradient(llvm::Instruction* inst)
1613     {
1614         llvm::SampleIntrinsic* sampleInst = dyn_cast<llvm::SampleIntrinsic>(inst);
1615         if (sampleInst && sampleInst->IsDerivative())
1616         {
1617             return true;
1618         }
1619         if (IsGradientIntrinsic(GetOpCode(inst)))
1620         {
1621             return true;
1622         }
1623         return false;
1624     }
1625 
getImmValueU32(const llvm::Value * value)1626     uint getImmValueU32(const llvm::Value* value)
1627     {
1628         const llvm::ConstantInt* cval = llvm::cast<llvm::ConstantInt>(value);
1629         IGC_ASSERT(nullptr != cval);
1630         IGC_ASSERT(cval->getBitWidth() == 32);
1631 
1632         uint ival = int_cast<uint>(cval->getZExtValue());
1633         return ival;
1634     }
1635 
getImmValueBool(const llvm::Value * value)1636     bool getImmValueBool(const llvm::Value* value)
1637     {
1638         const llvm::ConstantInt* cval = llvm::cast<llvm::ConstantInt>(value);
1639         IGC_ASSERT(nullptr != cval);
1640         IGC_ASSERT(cval->getBitWidth() == 1);
1641 
1642         return cval->getValue().getBoolValue();
1643     }
1644 
ExtractElementFromInsertChain(llvm::Value * inst,int pos)1645     llvm::Value* ExtractElementFromInsertChain(llvm::Value* inst, int pos)
1646     {
1647 
1648         llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
1649         if (cstV != NULL) {
1650             return cstV->getElementAsConstant(pos);
1651         }
1652 
1653         llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
1654         while (ie != NULL) {
1655             int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
1656             IGC_ASSERT(iOffset >= 0);
1657             if (iOffset == pos) {
1658                 return ie->getOperand(1);
1659             }
1660             llvm::Value* insertBase = ie->getOperand(0);
1661             ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
1662         }
1663         return NULL;
1664     }
1665 
ExtractVec4FromInsertChain(llvm::Value * inst,llvm::Value * elem[4],llvm::SmallVector<llvm::Instruction *,10> & instructionToRemove)1666     bool ExtractVec4FromInsertChain(llvm::Value* inst, llvm::Value* elem[4], llvm::SmallVector<llvm::Instruction*, 10> & instructionToRemove)
1667     {
1668         llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
1669         if (cstV != NULL) {
1670             IGC_ASSERT(cstV->getNumElements() == 4);
1671             for (int i = 0; i < 4; i++) {
1672                 elem[i] = cstV->getElementAsConstant(i);
1673             }
1674             return true;
1675         }
1676 
1677         for (int i = 0; i < 4; i++) {
1678             elem[i] = NULL;
1679         }
1680 
1681         int count = 0;
1682         llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
1683         while (ie != NULL) {
1684             int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
1685             IGC_ASSERT(iOffset >= 0);
1686             if (elem[iOffset] == NULL) {
1687                 elem[iOffset] = ie->getOperand(1);
1688                 count++;
1689                 if (ie->hasOneUse()) {
1690                     instructionToRemove.push_back(ie);
1691                 }
1692             }
1693             llvm::Value* insertBase = ie->getOperand(0);
1694             ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
1695         }
1696         return (count == 4);
1697     }
1698 
VectorToElement(llvm::Value * inst,llvm::Value * elem[],llvm::Type * int32Ty,llvm::Instruction * insert_before,int vsize)1699     void VectorToElement(llvm::Value* inst, llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
1700     {
1701         for (int i = 0; i < vsize; i++) {
1702             if (elem[i] == nullptr) {
1703                 // Create an ExtractElementInst
1704                 elem[i] = llvm::ExtractElementInst::Create(inst, llvm::ConstantInt::get(int32Ty, i), "", insert_before);
1705             }
1706         }
1707     }
1708 
ElementToVector(llvm::Value * elem[],llvm::Type * int32Ty,llvm::Instruction * insert_before,int vsize)1709     llvm::Value* ElementToVector(llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
1710     {
1711         llvm::VectorType* vt = IGCLLVM::FixedVectorType::get(elem[0]->getType(), vsize);
1712         llvm::Value* vecValue = llvm::UndefValue::get(vt);
1713 
1714         for (int i = 0; i < vsize; ++i)
1715         {
1716 
1717             vecValue = llvm::InsertElementInst::Create(vecValue, elem[i], llvm::ConstantInt::get(int32Ty, i), "", insert_before);
1718             ((Instruction*)vecValue)->setDebugLoc(insert_before->getDebugLoc());
1719         }
1720         return vecValue;
1721     }
1722 
ConvertToFloat(llvm::IRBuilder<> & builder,llvm::Value * val)1723     llvm::Value* ConvertToFloat(llvm::IRBuilder<>& builder, llvm::Value* val)
1724     {
1725         llvm::Value* ret = val;
1726         llvm::Type* type = val->getType();
1727         IGC_ASSERT(nullptr != type);
1728         IGC_ASSERT_MESSAGE(type->isSingleValueType(), "Only scalar data is supported here");
1729         IGC_ASSERT_MESSAGE(!type->isVectorTy(), "Only scalar data is supported here");
1730         IGC_ASSERT((type->getTypeID() == Type::FloatTyID) || (type->getTypeID() == Type::HalfTyID) || (type->getTypeID() == Type::IntegerTyID) || (type->getTypeID() == Type::DoubleTyID));
1731 
1732         unsigned dataSize = type->getScalarSizeInBits();
1733         if (16 == dataSize){
1734             ret = builder.CreateFPExt(builder.CreateBitCast(val, builder.getHalfTy()), builder.getFloatTy());
1735         }else if (32 == dataSize){
1736             ret = builder.CreateBitCast(val, builder.getFloatTy());
1737         }else if (64 == dataSize){
1738             llvm::Type* vecType = IGCLLVM::FixedVectorType::get(builder.getFloatTy(), 2);
1739             ret = builder.CreateBitCast(val, vecType);
1740         }else{
1741             IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ConvertToFloat of helper.");
1742         }
1743 
1744         return ret;
1745     }
1746 
ConvertToFloat(llvm::IRBuilder<> & builder,llvm::SmallVectorImpl<llvm::Value * > & instList)1747     void ConvertToFloat(llvm::IRBuilder<>& builder, llvm::SmallVectorImpl<llvm::Value*>& instList)
1748     {
1749         for (size_t i=0; i<instList.size(); i++)
1750         {
1751             llvm::Value* val = ConvertToFloat(builder, instList[i]);
1752             if (val->getType()->isVectorTy())
1753             {
1754                 instList[i] = builder.CreateExtractElement(val, static_cast<uint64_t>(0));
1755                 size_t iOld = i;
1756                 for (unsigned j = 1; j < cast<IGCLLVM::FixedVectorType>(val->getType())->getNumElements(); j++)
1757                 {
1758                     instList.insert(instList.begin()+ iOld +j, builder.CreateExtractElement(val, j));
1759                     i++;
1760                 }
1761             }
1762             else
1763             {
1764                 instList[i] = val;
1765             }
1766         }
1767     }
1768 
ScalarizeAggregateMembers(llvm::IRBuilder<> & builder,llvm::Value * val,llvm::SmallVectorImpl<llvm::Value * > & instList)1769     void ScalarizeAggregateMembers(llvm::IRBuilder<>& builder, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList)
1770     {
1771         llvm::Type* type = val->getType();
1772         unsigned num = 0;
1773         switch (type->getTypeID())
1774         {
1775         case llvm::Type::FloatTyID:
1776         case llvm::Type::HalfTyID:
1777         case llvm::Type::IntegerTyID:
1778         case llvm::Type::DoubleTyID:
1779             instList.push_back(val);
1780             break;
1781         case llvm::Type::StructTyID:
1782             num = type->getStructNumElements();
1783             for (unsigned i = 0; i < num; i++)
1784             {
1785                 ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
1786             }
1787             break;
1788         case IGCLLVM::VectorTyID:
1789             num = (unsigned)cast<IGCLLVM::FixedVectorType>(type)->getNumElements();
1790             for (unsigned i = 0; i < num; i++)
1791             {
1792                 ScalarizeAggregateMembers(builder, builder.CreateExtractElement(val, i), instList);
1793             }
1794             break;
1795         case llvm::Type::ArrayTyID:
1796             num = static_cast<uint32_t>(type->getArrayNumElements());
1797             for (unsigned i = 0; i < num; i++)
1798             {
1799                 ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
1800             }
1801             break;
1802         default:
1803             IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMembers of helper! Please enhance this function first.");
1804             break;
1805         }
1806     }
1807 
ScalarizeAggregateMemberAddresses(IGCLLVM::IRBuilder<> & builder,llvm::Type * type,llvm::Value * val,llvm::SmallVectorImpl<llvm::Value * > & instList,llvm::SmallVector<llvm::Value *,16> indices)1808     void ScalarizeAggregateMemberAddresses(IGCLLVM::IRBuilder<>& builder, llvm::Type* type, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList, llvm::SmallVector<llvm::Value*, 16> indices)
1809     {
1810         unsigned num = 0;
1811         switch (type->getTypeID())
1812         {
1813         case llvm::Type::FloatTyID:
1814         case llvm::Type::HalfTyID:
1815         case llvm::Type::IntegerTyID:
1816         case llvm::Type::DoubleTyID:
1817             instList.push_back(builder.CreateInBoundsGEP(val, makeArrayRef(indices)));
1818             break;
1819         case llvm::Type::StructTyID:
1820             num = type->getStructNumElements();
1821             for (unsigned i = 0; i < num; i++)
1822             {
1823                 indices.push_back(builder.getInt32(i));
1824                 ScalarizeAggregateMemberAddresses(builder, type->getStructElementType(i), val, instList, indices);
1825                 indices.pop_back();
1826             }
1827             break;
1828         case IGCLLVM::VectorTyID:
1829             num = (unsigned)cast<IGCLLVM::FixedVectorType>(type)->getNumElements();
1830             for (unsigned i = 0; i < num; i++)
1831             {
1832                 indices.push_back(builder.getInt32(i));
1833                 ScalarizeAggregateMemberAddresses(builder, cast<VectorType>(type)->getElementType(), val, instList, indices);
1834                 indices.pop_back();
1835             }
1836             break;
1837         case llvm::Type::ArrayTyID:
1838             //fix this if one API could support an array with length > 2^32
1839             num = static_cast<uint32_t>(type->getArrayNumElements());
1840             for (unsigned i = 0; i < num; i++)
1841             {
1842                 indices.push_back(builder.getInt32(i));
1843                 ScalarizeAggregateMemberAddresses(builder, type->getArrayElementType(), val, instList, indices);
1844                 indices.pop_back();
1845             }
1846             break;
1847         default:
1848             IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMemberAddresses of helper! Please enhance this function first.");
1849             break;
1850         }
1851     }
1852 
IsUnsignedCmp(const llvm::CmpInst::Predicate Pred)1853     bool IsUnsignedCmp(const llvm::CmpInst::Predicate Pred)
1854     {
1855         switch (Pred) {
1856         case llvm::CmpInst::ICMP_UGT:
1857         case llvm::CmpInst::ICMP_UGE:
1858         case llvm::CmpInst::ICMP_ULT:
1859         case llvm::CmpInst::ICMP_ULE:
1860             return true;
1861         default:
1862             break;
1863         }
1864         return false;
1865     }
1866 
IsSignedCmp(const llvm::CmpInst::Predicate Pred)1867     bool IsSignedCmp(const llvm::CmpInst::Predicate Pred)
1868     {
1869         switch (Pred)
1870         {
1871         case llvm::CmpInst::ICMP_SGT:
1872         case llvm::CmpInst::ICMP_SGE:
1873         case llvm::CmpInst::ICMP_SLT:
1874         case llvm::CmpInst::ICMP_SLE:
1875             return true;
1876         default:
1877             break;
1878         }
1879         return false;
1880     }
1881 
1882     // isA64Ptr - Queries whether given pointer type requires 64-bit representation in vISA
isA64Ptr(llvm::PointerType * PT,CodeGenContext * pContext)1883     bool isA64Ptr(llvm::PointerType* PT, CodeGenContext* pContext)
1884     {
1885         return pContext->getRegisterPointerSizeInBits(PT->getAddressSpace()) == 64;
1886     }
1887 
IsBitCastForLifetimeMark(const llvm::Value * V)1888     bool IsBitCastForLifetimeMark(const llvm::Value* V)
1889     {
1890         if (!V || !llvm::isa<llvm::BitCastInst>(V))
1891         {
1892             return false;
1893         }
1894         for (llvm::Value::const_user_iterator it = V->user_begin(), e = V->user_end(); it != e; ++it)
1895         {
1896             const llvm::IntrinsicInst* inst = llvm::dyn_cast<const llvm::IntrinsicInst>(*it);
1897             if (!inst)
1898             {
1899                 return false;
1900             }
1901             llvm::Intrinsic::ID  IID = inst->getIntrinsicID();
1902             if (IID != llvm::Intrinsic::lifetime_start &&
1903                 IID != llvm::Intrinsic::lifetime_end)
1904             {
1905                 return false;
1906             }
1907         }
1908         return true;
1909     }
1910 
mutatePtrType(Value * ptrv,PointerType * newType,IRBuilder<> & builder,const Twine &)1911     Value* mutatePtrType(Value* ptrv, PointerType* newType,
1912         IRBuilder<>& builder, const Twine&)
1913     {
1914         if (isa<ConstantPointerNull>(ptrv))
1915         {
1916             return ConstantPointerNull::get(newType);
1917         }
1918         else
1919         {
1920             if (ConstantExpr * cexpr = dyn_cast<ConstantExpr>(ptrv))
1921             {
1922                 IGC_ASSERT(cexpr->getOpcode() == Instruction::IntToPtr);
1923                 Value* offset = cexpr->getOperand(0);
1924                 ptrv = builder.CreateIntToPtr(offset, newType);
1925             }
1926             else
1927             {
1928                 ptrv->mutateType(newType);
1929             }
1930         }
1931         return ptrv;
1932     }
1933 
1934     /*
1935     cmp.l.f0.0 (8) null:d       r0.0<0;1,0>:w    0x0000:w         { Align1, N1, NoMask, NoCompact }
1936     (-f0.0) jmpi Test
1937     (-f0.0) sendc (8) null:ud      r120.0<0;1,0>:f  0x00000025  0x08031400:ud    { Align1, N1, EOT, NoCompact }
1938     nop
1939     Test :
1940     nop
1941 
1942     */
1943 
1944     static const unsigned int CRastHeader_SIMD8[] =
1945     {
1946         0x05600010,0x20001a24,0x1e000000,0x00000000,
1947         0x00110020,0x34000004,0x0e001400,0x00000020,
1948         0x05710032,0x20003a00,0x06000f00,0x88031400,
1949         0x00000000,0x00000000,0x00000000,0x00000000,
1950     };
1951 
1952     /*
1953     cmp.l.f0.0 (16) null:d       r0.0 < 0; 1, 0 > : w    0x0000 : w{ Align1, N1, NoMask, NoCompact }
1954     (-f0.0) jmpi(1) Test { Align1, N1, NoMask, NoCompact }
1955     (-f0.0) sendc(16) null : ud      r120.0 < 0; 1, 0 > : f  0x00000025 0x90031000 : ud{ Align1, N1, EOT, NoCompact }
1956     nop
1957     Test :
1958     nop
1959 
1960     */
1961     static const unsigned int CRastHeader_SIMD16[] =
1962     {
1963         0x05800010, 0x20001A24, 0x1E000000, 0x00000000,
1964         0x00110020, 0x34000004, 0x0E001400, 0x00000020,
1965         0x05910032, 0x20003A00, 0x06000F00, 0x90031000,
1966         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1967     };
1968 
1969     /*
1970     cmp.l.f0.0 (16) null:d       r0.0 < 0; 1, 0 > : w    0x0000 : w{ Align1, N1, NoMask, NoCompact }
1971     (-f0.0) jmpi Test
1972     (-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : ud  0x00000005 0x10031000 : ud{ Align1, N1, NoCompact }
1973     (-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : f  0x00000025  0x10031800 : ud{ Align1, N5, EOT, NoCompact }
1974     nop
1975     Test :
1976     nop
1977 
1978     */
1979 
1980     static const unsigned int CRastHeader_SIMD32[] =
1981     {
1982         0x05800010,0x20001a24,0x1e000000,0x00000000,
1983         0x00110020,0x34000004,0x0e001400,0x00000020,
1984         0x05910032,0x20000260,0x06000f00,0x10031000,
1985         0x05912032,0x20003a60,0x06000f00,0x90031800,
1986     };
1987 
1988 
AppendConservativeRastWAHeader(IGC::SProgramOutput * program,SIMDMode simdmode)1989     unsigned int AppendConservativeRastWAHeader(IGC::SProgramOutput* program, SIMDMode simdmode)
1990     {
1991         unsigned int headerSize = 0;
1992         const unsigned int* pHeader = nullptr;
1993 
1994         if (program && (program->m_programSize > 0))
1995         {
1996             switch (simdmode)
1997             {
1998             case SIMDMode::SIMD8:
1999                 headerSize = sizeof(CRastHeader_SIMD8);
2000                 pHeader = CRastHeader_SIMD8;
2001                 break;
2002 
2003             case SIMDMode::SIMD16:
2004                 headerSize = sizeof(CRastHeader_SIMD16);
2005                 pHeader = CRastHeader_SIMD16;
2006                 break;
2007 
2008             case SIMDMode::SIMD32:
2009                 headerSize = sizeof(CRastHeader_SIMD32);
2010                 pHeader = CRastHeader_SIMD32;
2011                 break;
2012 
2013             default:
2014                 IGC_ASSERT_MESSAGE(0, "Invalid SIMD Mode for Conservative Raster WA");
2015                 break;
2016             }
2017 
2018             unsigned int newSize = program->m_programSize + headerSize;
2019             void* newBinary = IGC::aligned_malloc(newSize, 16);
2020             memcpy_s(newBinary, newSize, pHeader, headerSize);
2021             memcpy_s((char*)newBinary + headerSize, newSize, program->m_programBin, program->m_programSize);
2022             IGC::aligned_free(program->m_programBin);
2023             program->m_programBin = newBinary;
2024             program->m_programSize = newSize;
2025         }
2026         return headerSize;
2027     }
2028 
DSDualPatchEnabled(class CodeGenContext * ctx)2029     bool DSDualPatchEnabled(class CodeGenContext* ctx)
2030     {
2031         return ctx->platform.supportDSDualPatchDispatch() &&
2032             ctx->platform.WaDisableDSDualPatchMode() &&
2033             !(ctx->m_DriverInfo.APIDisableDSDualPatchDispatch()) &&
2034             IGC_IS_FLAG_DISABLED(DisableDSDualPatch);
2035     }
2036 
2037 
getUniqueEntryFunc(const IGCMD::MetaDataUtils * pM,IGC::ModuleMetaData * pModMD)2038     Function* getUniqueEntryFunc(const IGCMD::MetaDataUtils* pM, IGC::ModuleMetaData* pModMD)
2039     {
2040         Function* entryFunc = nullptr;
2041         auto& FuncMD = pModMD->FuncMD;
2042         for (auto i = pM->begin_FunctionsInfo(), e = pM->end_FunctionsInfo(); i != e; ++i)
2043         {
2044             IGCMD::FunctionInfoMetaDataHandle Info = i->second;
2045             if (Info->getType() != FunctionTypeMD::KernelFunction)
2046             {
2047                 continue;
2048             }
2049 
2050             Function* F = i->first;
2051             if (!entryFunc)
2052             {
2053                 entryFunc = F;
2054             }
2055 
2056             auto fi = FuncMD.find(F);
2057             if (fi != FuncMD.end() && fi->second.isUniqueEntry)
2058             {
2059                 return F;
2060             }
2061         }
2062         IGC_ASSERT_MESSAGE(nullptr != entryFunc, "No entry func!");
2063         auto ei = FuncMD.find(entryFunc);
2064         IGC_ASSERT(ei != FuncMD.end());
2065         ei->second.isUniqueEntry = true;
2066         return entryFunc;
2067     }
2068 
2069     // If true, the codegen will likely not emit instruction for this instruction.
isNoOpInst(Instruction * I,CodeGenContext * Ctx)2070     bool isNoOpInst(Instruction* I, CodeGenContext* Ctx)
2071     {
2072         if (isa<BitCastInst>(I) ||
2073             isa<IntToPtrInst>(I) ||
2074             isa<PtrToIntInst>(I))
2075         {
2076             // Don't bother with constant operands
2077             if (isa<Constant>(I->getOperand(0))) {
2078                 return false;
2079             }
2080 
2081             Type* dTy = I->getType();
2082             Type* sTy = I->getOperand(0)->getType();
2083             PointerType* dPTy = dyn_cast<PointerType>(dTy);
2084             PointerType* sPTy = dyn_cast<PointerType>(sTy);
2085             uint32_t dBits = dPTy ? Ctx->getRegisterPointerSizeInBits(dPTy->getAddressSpace())
2086                 : (unsigned int)dTy->getPrimitiveSizeInBits();
2087             uint32_t sBits = sPTy ? Ctx->getRegisterPointerSizeInBits(sPTy->getAddressSpace())
2088                 : (unsigned int)sTy->getPrimitiveSizeInBits();
2089             if (dBits == 0 || sBits == 0 || dBits != sBits) {
2090                 // Not primitive type or not equal in size (inttoptr, etc)
2091                 return false;
2092             }
2093 
2094             IGCLLVM::FixedVectorType* dVTy = dyn_cast<IGCLLVM::FixedVectorType>(dTy);
2095             IGCLLVM::FixedVectorType* sVTy = dyn_cast<IGCLLVM::FixedVectorType>(sTy);
2096             int d_nelts = dVTy ? (int)dVTy->getNumElements() : 1;
2097             int s_nelts = sVTy ? (int)sVTy->getNumElements() : 1;
2098             if (d_nelts != s_nelts) {
2099                 // Vector relayout bitcast.
2100                 return false;
2101             }
2102             return true;
2103         }
2104         return false;
2105     }
2106 
2107     //
2108     // Given a value, check if it is likely a positive number.
2109     //
2110     // This function works best if llvm.assume() is used in the bif libraries to
2111     // give ValueTracking hints.  ex:
2112     //
2113     // size_t get_local_id(uint dim)
2114     // {
2115     //    size_t ret = __builtin_IB_get_local_id()
2116     //    __builtin_assume(ret >= 0);
2117     //    __builtin_assume(ret <= 0x0000ffff)
2118     //    return ret;
2119     // }
2120     //
2121     // This implementation relies completly on native llvm functions
2122     //
2123     //
2124     //
valueIsPositive(Value * V,const DataLayout * DL,llvm::AssumptionCache * AC,llvm::Instruction * CxtI)2125     bool valueIsPositive(
2126         Value* V,
2127         const DataLayout* DL,
2128         llvm::AssumptionCache* AC,
2129         llvm::Instruction* CxtI)
2130     {
2131 #if LLVM_VERSION_MAJOR == 4
2132         bool isKnownNegative = false;
2133         bool isKnownPositive = false;
2134         llvm::ComputeSignBit(
2135             V,
2136             isKnownPositive,
2137             isKnownNegative,
2138             *DL,
2139             0,
2140             AC,
2141             CxtI);
2142         return isKnownPositive;
2143 #elif LLVM_VERSION_MAJOR >= 7
2144         return computeKnownBits(
2145             V,
2146             *DL,
2147             0,
2148             AC,
2149             CxtI).isNonNegative();
2150 #endif
2151     }
2152 
appendToUsed(llvm::Module & M,ArrayRef<GlobalValue * > Values)2153     void appendToUsed(llvm::Module& M, ArrayRef<GlobalValue*> Values)
2154     {
2155         std::string Name = "llvm.used";
2156         GlobalVariable* GV = M.getGlobalVariable(Name);
2157         SmallPtrSet<Constant*, 16> InitAsSet;
2158         SmallVector<Constant*, 16> Init;
2159         if (GV) {
2160             ConstantArray* CA = dyn_cast<ConstantArray>(GV->getInitializer());
2161             for (auto& Op : CA->operands()) {
2162                 Constant* C = cast_or_null<Constant>(Op);
2163                 if (InitAsSet.insert(C).second)
2164                     Init.push_back(C);
2165             }
2166             GV->eraseFromParent();
2167         }
2168 
2169         Type* Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
2170         for (auto* V : Values) {
2171             Constant* C = V;
2172             //llvm will complain if members of llvm.uses doesn't have a name
2173             if(C->getName().empty())
2174                 C->setName("gVar");
2175 
2176             if (V->getType()->getAddressSpace() != 0)
2177                 C = ConstantExpr::getAddrSpaceCast(V, Int8PtrTy);
2178             else
2179                 C = ConstantExpr::getBitCast(V, Int8PtrTy);
2180             if (InitAsSet.insert(C).second)
2181                 Init.push_back(C);
2182         }
2183 
2184         if (Init.empty())
2185             return;
2186 
2187         ArrayType* ATy = ArrayType::get(Int8PtrTy, Init.size());
2188         GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
2189             ConstantArray::get(ATy, Init), Name);
2190         GV->setSection("llvm.metadata");
2191     }
2192 
safeScheduleUp(llvm::BasicBlock * BB,llvm::Value * V,llvm::Instruction * & InsertPos,llvm::DenseSet<llvm::Instruction * > Scheduled)2193     bool safeScheduleUp(llvm::BasicBlock* BB, llvm::Value* V, llvm::Instruction*& InsertPos, llvm::DenseSet<llvm::Instruction*> Scheduled)
2194     {
2195         llvm::Instruction* I = llvm::dyn_cast<llvm::Instruction>(V);
2196         if (!I)
2197             return false;
2198 
2199         // Skip value defined in other BBs.
2200         if (I->getParent() != BB)
2201             return false;
2202 
2203         // Skip phi-node as they are eventually defined in other BBs.
2204         if (llvm::isa<llvm::PHINode>(I))
2205             return false;
2206 
2207         // Skip for side effect instructions
2208         if (I->mayHaveSideEffects())
2209             return false;
2210 
2211         // Don't re-schedule instruction again.
2212         if (Scheduled.count(I)) {
2213             if (InsertPos && !isInstPrecede(I, InsertPos))
2214                 InsertPos = I;
2215             return false;
2216         }
2217 
2218         bool Changed = false;
2219 
2220         // Try to schedule all its operands first.
2221         for (auto OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI)
2222             Changed |= safeScheduleUp(BB, OI->get(), InsertPos, Scheduled);
2223 
2224         // Mark this instruction `visited`.
2225         Scheduled.insert(I);
2226 
2227         // Skip if the instruction is already defined before insertion position.
2228         if (InsertPos && isInstPrecede(I, InsertPos))
2229             return Changed;
2230 
2231         // Schedule itself.
2232         if (InsertPos) {
2233             I->removeFromParent();
2234             I->insertAfter(InsertPos);
2235         }
2236 
2237         InsertPos = I;
2238         return true;
2239     }
2240 
getConstantSInt(IRBuilder<> & Builder,const int bitSize,int64_t val)2241     ConstantInt* getConstantSInt(
2242         IRBuilder<>& Builder, const int bitSize, int64_t val)
2243     {
2244         ConstantInt* res = nullptr;
2245         switch (bitSize) {
2246         case 8: res = Builder.getInt8((uint8_t)val); break;
2247         case 16: res = Builder.getInt16((uint16_t)val); break;
2248         case 32: res = Builder.getInt32((uint32_t)val); break;
2249         case 64: res = Builder.getInt64((uint64_t)val); break;
2250         default:
2251             IGC_ASSERT_MESSAGE(0, "invalid bitsize");
2252         }
2253         return res;
2254     }
2255 
getConstantUInt(IRBuilder<> & Builder,const int bitSize,uint64_t val)2256     ConstantInt* getConstantUInt(
2257         IRBuilder<>& Builder, const int bitSize, uint64_t val)
2258     {
2259         ConstantInt* res = nullptr;
2260         switch (bitSize) {
2261         case 8: res = Builder.getInt8((uint8_t)val); break;
2262         case 16: res = Builder.getInt16((uint16_t)val); break;
2263         case 32: res = Builder.getInt32((uint32_t)val); break;
2264         case 64: res = Builder.getInt64(val); break;
2265         default:
2266             IGC_ASSERT_MESSAGE(0, "invalid bitsize");
2267         }
2268         return res;
2269     }
2270 
2271     // MulH implementation for 64-bit signed integers
CreateMulhS64(IRBuilder<> & B,Value * const u,Value * const v)2272     Value* CreateMulhS64(IRBuilder<>& B, Value* const u, Value* const v) {
2273         // This comes from Hacker's Delight 8-2.
2274         // Think of this as elementry schoole multiplication, but base 2^32.
2275         ConstantInt* const loMask = getConstantSInt(B, 64, 0xFFFFFFFFll);
2276         ConstantInt* const hiShift = getConstantSInt(B, 64, 32);
2277         //
2278         // u64 u0 = u & 0xFFFFFFFF; s64 u1 = u >> 32;
2279         // u64 v0 = v & 0xFFFFFFFF; s64 v1 = v >> 32;
2280         Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
2281         Value* const u1 = B.CreateAShr(u, hiShift, "u.hi32");
2282         Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
2283         Value* const v1 = B.CreateAShr(v, hiShift, "v.hi32");
2284         //
2285         // w = u0*v0
2286         Value* const w0 = B.CreateMul(u0, v0, "w0");
2287         //
2288         // t = u1*v0 + (w0 >> 32)
2289         Value* const tLHS = B.CreateMul(u1, v0);
2290         Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
2291         Value* const t = B.CreateAdd(tLHS, tRHS, "t");
2292         //
2293         // w1 = u0*v0 + (t >> 32)
2294         Value* const u0v1 = B.CreateMul(u0, v1);
2295         Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
2296         Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
2297         //
2298         // return u0*v1 + (t >> 32) + (w1 >> 32)
2299         Value* const u1v1 = B.CreateMul(u1, v1);
2300         Value* const tHI32 = B.CreateAShr(t, hiShift, "t.hi32");
2301         Value* const rLHS = B.CreateAdd(u1v1, tHI32);
2302         Value* const rRHS = B.CreateAShr(w1, hiShift, "w1.lo32");
2303         Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
2304         //
2305         return r;
2306     }
2307 
2308     // MulH implementation for 64-bit unsigned integers
CreateMulhU64(IRBuilder<> & B,Value * const u,Value * const v)2309     Value* CreateMulhU64(IRBuilder<>& B, Value* const u, Value* const v)
2310     {
2311         // This is the same as CreateMulhS64, but with all logical shifts.
2312         ConstantInt* const loMask = getConstantUInt(B, 64, 0xFFFFFFFFull);
2313         ConstantInt* const hiShift = getConstantUInt(B, 64, 32);
2314         //
2315         // u64 u0 = u & 0xFFFFFFFF, u1 = u >> 32;
2316         // u64 v0 = v & 0xFFFFFFFF, v1 = v >> 32;
2317         Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
2318         Value* const u1 = B.CreateLShr(u, hiShift, "u.hi32");
2319         Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
2320         Value* const v1 = B.CreateLShr(v, hiShift, "v.hi32");
2321         //
2322         // w0 = u0*v0
2323         Value* const w0 = B.CreateMul(u0, v0, "w0");
2324         //
2325         // t = u1*v0 + (w0 >> 32)
2326         Value* const tLHS = B.CreateMul(u1, v0);
2327         Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
2328         Value* const t = B.CreateAdd(tLHS, tRHS, "t");
2329         //
2330         // w1 = u0*v0 + (t >> 32)
2331         Value* const u0v1 = B.CreateMul(u0, v1);
2332         Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
2333         Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
2334         //
2335         // w1 = u0*v1 + (t >> 32) + (w1 >> 32)
2336         Value* const u1v1 = B.CreateMul(u1, v1);
2337         Value* const tHI32 = B.CreateLShr(t, hiShift, "t.hi32");
2338         Value* const rLHS = B.CreateAdd(u1v1, tHI32);
2339         Value* const rRHS = B.CreateLShr(w1, hiShift, "w1.lo32");
2340         Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
2341         //
2342         return r;
2343     }
2344 
2345     // MulH implementation for 32/64 bit integers
CreateMulh(Function & F,IRBuilder<> & B,const bool isSigned,Value * const u,Value * const v)2346     Value* CreateMulh(
2347         Function& F,
2348         IRBuilder<>&  B,
2349         const bool isSigned,
2350         Value* const u,
2351         Value* const v)
2352     {
2353         Value* res = nullptr;
2354         IGC_ASSERT(nullptr != u);
2355         IGC_ASSERT(nullptr != u->getType());
2356         int bitWidth = u->getType()->getIntegerBitWidth();
2357         switch(bitWidth)
2358         {
2359         case 32:
2360         {
2361             // we have a dedicated machine instruction for 32b
2362             SmallVector<Value*, 2> imulhArgs;
2363             imulhArgs.push_back(u);
2364             imulhArgs.push_back(v);
2365             auto intrinsic = isSigned ?
2366                 GenISAIntrinsic::GenISA_imulH :
2367                 GenISAIntrinsic::GenISA_umulH;
2368             IGC_ASSERT(nullptr != v);
2369             Function* const iMulhDecl = llvm::GenISAIntrinsic::getDeclaration(
2370                 F.getParent(),
2371                 intrinsic,
2372                 v->getType());
2373             res = B.CreateCall(iMulhDecl, imulhArgs, "q_appx");
2374             break;
2375         }
2376         case 64:
2377             // emulate via 64b arithmetic
2378             if (isSigned) {
2379                 res = CreateMulhS64(B, u, v);
2380             }
2381             else {
2382                 res = CreateMulhU64(B, u, v);
2383             }
2384             break;
2385         default:
2386             IGC_ASSERT_MESSAGE(0, "CreateMulH must be 32 or 64");
2387         }
2388         return res;
2389     }
2390 
hasInlineAsmInFunc(llvm::Function & F)2391     bool hasInlineAsmInFunc(llvm::Function& F)
2392     {
2393         for (auto ii = inst_begin(&F), ie = inst_end(&F); ii != ie; ii++)
2394         {
2395             if (llvm::CallInst* call = llvm::dyn_cast<llvm::CallInst>(&*ii))
2396             {
2397                 if (call->isInlineAsm())
2398                 {
2399                     return true;
2400                 }
2401             }
2402         }
2403         return false;
2404     }
2405 
2406     // Parses the "vector-variant" attribute string to get a valid function
2407     // variant symbol string supported by current implementation of IGC.
2408     //
2409     // Returns a tuple of values:
2410     // R(0) is the reformatted variant symbol string.
2411     // R(1) is the called function's name.
2412     // R(2) is the required SIMD size.
2413     // See the spec for Intel Vector Function ABI for parsed symbol meanings.
ParseVectorVariantFunctionString(llvm::StringRef varStr)2414     std::tuple<std::string, std::string, unsigned> ParseVectorVariantFunctionString(llvm::StringRef varStr)
2415     {
2416         unsigned vlen = 0;
2417         std::stringstream outStr;
2418 
2419         auto pos = varStr.begin();
2420         auto strEnd = varStr.end();
2421 
2422         // Starts with _ZGV
2423         IGC_ASSERT(varStr.startswith("_ZGV"));
2424         outStr << "_ZGV";
2425         pos += 4;
2426         // ISA class target processor type
2427         IGC_ASSERT(*pos == 'x' || *pos == 'y' || *pos == 'Y' || *pos == 'z' || *pos == 'Z');
2428         outStr << *pos;
2429         pos++;
2430         // Mask or NoMask, only support NoMask for now
2431         IGC_ASSERT(*pos == 'M' || *pos == 'N');
2432         outStr << 'N';
2433         pos++;
2434         // Parse vector length (input can be 1/2/4/8/16/32, output restricted to 8/16/32)
2435         auto idStart = pos;
2436         while (*pos >= '0' && *pos <= '9') pos++;
2437         IGC_ASSERT(StringRef(idStart, pos - idStart).getAsInteger(10, vlen) == 0);
2438         IGC_ASSERT(vlen == 1 || vlen == 2 || vlen == 4 || vlen == 8 || vlen == 16 || vlen == 32);
2439         // Set min SIMD width to 8
2440         vlen = (vlen < 8) ? 8 : vlen;
2441         outStr << std::to_string(vlen);
2442 
2443         while (pos != strEnd)
2444         {
2445             // End of vector properties symbols terminated with '_'
2446             if (*pos == '_') {
2447                 outStr << *pos++;
2448                 break;
2449             }
2450             // Parameter variant type, only support default vector type
2451             IGC_ASSERT(*pos == 'l' || *pos == 'u' || *pos == 'v' || *pos == 'R' || *pos == 'U' || *pos == 'L');
2452             outStr << 'v';
2453             pos++;
2454             // Ignore alignment properties
2455             if (*pos == 'a') {
2456                 pos++;
2457                 while (*pos >= '0' && *pos <= '9') pos++;
2458             }
2459         }
2460 
2461         // Remaining characters form the function name
2462         std::string fName = StringRef(pos, strEnd - pos).str();
2463 
2464         return std::make_tuple(outStr.str(), fName, vlen);
2465     }
2466 
2467     ///
2468     /// Return base type from more complex type
2469     ///
2470     /// Return nullptr if complex type cannot be defined with only one type
2471     ///
GetBaseType(llvm::Type * ProcessedType)2472     llvm::Type* GetBaseType(llvm::Type* ProcessedType)
2473     {
2474         while (ProcessedType->isArrayTy() || ProcessedType->isStructTy())
2475         {
2476             if (ProcessedType->isArrayTy())
2477                 ProcessedType = ProcessedType->getArrayElementType();
2478             else
2479             {
2480                 if (ProcessedType->getStructNumElements() != 1)
2481                     return nullptr;
2482 
2483                 ProcessedType = ProcessedType->getStructElementType(0);
2484             }
2485         }
2486 
2487         return ProcessedType;
2488     }
2489 
2490     // Function modifies address space in selected uses of given input value
FixAddressSpaceInAllUses(llvm::Value * ptr,uint newAS,uint oldAS)2491     void FixAddressSpaceInAllUses(llvm::Value* ptr, uint newAS, uint oldAS)
2492     {
2493         IGC_ASSERT(newAS != oldAS);
2494 
2495         for (auto UI = ptr->user_begin(), E = ptr->user_end(); UI != E; ++UI)
2496         {
2497             Instruction* inst = dyn_cast<Instruction>(*UI);
2498             PointerType* instType = nullptr;
2499             if (isa<BitCastInst>(inst) || isa<GetElementPtrInst>(inst) ||
2500                 isa<AddrSpaceCastInst>(inst) || isa<PHINode>(inst))
2501             {
2502                 instType = dyn_cast<PointerType>(inst->getType());
2503             }
2504 
2505             if (instType && instType->getAddressSpace() == oldAS)
2506             {
2507                 Type* eltType = instType->getElementType();
2508                 PointerType* ptrType = PointerType::get(eltType, newAS);
2509                 inst->mutateType(ptrType);
2510                 FixAddressSpaceInAllUses(inst, newAS, oldAS);
2511             }
2512         }
2513     }
2514 
2515 
GetURBBaseAndOffset(Value * pUrbOffset)2516 std::pair<Value*, unsigned int> GetURBBaseAndOffset(Value* pUrbOffset)
2517 {
2518     Value* pBase = pUrbOffset;
2519     unsigned int offset = 0;
2520 
2521     auto GetConstant = [](Value* pVal)->unsigned int
2522     {
2523         IGC_ASSERT(isa<ConstantInt>(pVal));
2524         ConstantInt* pConst = cast<ConstantInt>(pVal);
2525         return int_cast<unsigned int>(pConst->getZExtValue());
2526     };
2527 
2528     if (isa<ConstantInt>(pUrbOffset))
2529     {
2530         Value* pNullBase = nullptr;
2531         return std::make_pair(
2532             pNullBase,
2533             GetConstant(pUrbOffset));
2534     }
2535     else if (isa<Instruction>(pUrbOffset))
2536     {
2537         Instruction* pInstr = cast<Instruction>(pUrbOffset);
2538         if (pInstr->getOpcode() == Instruction::Add)
2539         {
2540             Value* src0 = pInstr->getOperand(0);
2541             Value* src1 = pInstr->getOperand(1);
2542             if (isa<ConstantInt>(src1))
2543             {
2544                 auto baseAndOffset = GetURBBaseAndOffset(src0);
2545                 pBase = baseAndOffset.first;
2546                 offset = GetConstant(src1) + baseAndOffset.second;
2547             }
2548             else if (isa<ConstantInt>(src0))
2549             {
2550                 auto baseAndOffset = GetURBBaseAndOffset(src1);
2551                 pBase = baseAndOffset.first;
2552                 offset = GetConstant(src0) + baseAndOffset.second;
2553             }
2554         }
2555         else if (pInstr->getOpcode() == Instruction::Or)
2556         {
2557             // Examples of patterns matched below:
2558             // 1. shl + or
2559             //    urbOffset = urbOffset << 1;
2560             //    urbOffset = urbOffset | 1;
2561             // 2. mul + or
2562             //    urbOffset = urbOffset * 2;
2563             //    urbOffset = urbOffset | 1;
2564             // 3. two oword urb writes in loop
2565             //    urbOffset = urbOffset * 2;
2566             //    for(...) {
2567             //      {...}
2568             //      urbOffset = urbOffset | 1;
2569             //      urbOffset = urbOffset + 2;
2570             //      {...}
2571             //    }
2572             //
2573             //
2574 
2575             std::function<unsigned int(Value*)> GetAlignment =
2576                 [&GetAlignment, &GetConstant](Value* pUrbOffset)->unsigned int
2577             {
2578                 unsigned int alignment = 1;
2579                 Instruction* pInstr = dyn_cast<Instruction>(pUrbOffset);
2580                 if (pInstr &&
2581                     pInstr->getOpcode() == Instruction::Shl &&
2582                     isa<ConstantInt>(pInstr->getOperand(1)))
2583                 {
2584                     alignment = GetAlignment(pInstr->getOperand(0)) *
2585                         (1u << GetConstant(pInstr->getOperand(1)));
2586                 }
2587                 else if (pInstr &&
2588                     pInstr->getOpcode() == Instruction::Mul &&
2589                     isa<ConstantInt>(pInstr->getOperand(1)) &&
2590                     iSTD::IsPowerOfTwo(GetConstant(pInstr->getOperand(1))))
2591                 {
2592                     alignment = GetAlignment(pInstr->getOperand(0)) *
2593                         GetConstant(pInstr->getOperand(1));
2594                 }
2595                 else if (pInstr &&
2596                     pInstr->getOpcode() == Instruction::Mul &&
2597                     isa<ConstantInt>(pInstr->getOperand(0)) &&
2598                     iSTD::IsPowerOfTwo(GetConstant(pInstr->getOperand(0))))
2599                 {
2600                     alignment = GetAlignment(pInstr->getOperand(1)) *
2601                         GetConstant(pInstr->getOperand(0));
2602                 }
2603                 else if (isa<ConstantInt>(pUrbOffset))
2604                 {
2605                     alignment = 1 << iSTD::bsf(GetConstant(pUrbOffset));
2606                 }
2607                 return alignment;
2608             };
2609 
2610             Value* src0 = pInstr->getOperand(0);
2611             Value* src1 = pInstr->getOperand(1);
2612             unsigned int alignment = 1;
2613             if (isa<PHINode>(src0) && isa<ConstantInt>(src1))
2614             {
2615                 // pattern 3
2616                 PHINode* pPhi = cast<PHINode>(src0);
2617                 alignment = std::numeric_limits<unsigned int>::max();
2618                 for (unsigned int i = 0; i < pPhi->getNumIncomingValues(); i++)
2619                 {
2620                     Instruction* pIncoming = dyn_cast<Instruction>(pPhi->getIncomingValue(i));
2621                     if (pIncoming &&
2622                         pIncoming->getOpcode() == Instruction::Add &&
2623                         pPhi == pIncoming->getOperand(0) &&
2624                         isa<ConstantInt>(pIncoming->getOperand(1)) &&
2625                         iSTD::IsPowerOfTwo(GetConstant(pIncoming->getOperand(1))))
2626                     {
2627                         alignment = std::min(alignment, GetConstant(pIncoming->getOperand(1)));
2628                     }
2629                     else
2630                     {
2631                         alignment = std::min(alignment, GetAlignment(pPhi->getIncomingValue(i)));
2632                     }
2633                 }
2634             }
2635             else
2636             {
2637                 // patterns 1 and 2
2638                 alignment = GetAlignment(src0);
2639             }
2640             if (alignment > GetConstant(src1))
2641             {
2642                 IGC_ASSERT(iSTD::IsPowerOfTwo(alignment));
2643                 pBase = src0;
2644                 offset = GetConstant(src1);
2645             }
2646         }
2647     }
2648 
2649     return std::make_pair(pBase, offset);
2650 }
2651 
GetPrintfStrings(Module & M)2652 std::vector<std::pair<unsigned int, std::string>> GetPrintfStrings(Module &M)
2653 {
2654     std::vector<std::pair<unsigned int, std::string>> printfStrings;
2655     std::string MDNodeName = "printf.strings";
2656     NamedMDNode* printfMDNode = M.getOrInsertNamedMetadata(MDNodeName);
2657 
2658     for (uint i = 0, NumStrings = printfMDNode->getNumOperands();
2659          i < NumStrings;
2660          i++)
2661     {
2662         MDNode* argMDNode = printfMDNode->getOperand(i);
2663         ConstantInt* indexOpndVal =
2664             mdconst::dyn_extract<ConstantInt>(argMDNode->getOperand(0));
2665         MDString* stringOpndVal =
2666             dyn_cast<MDString>(argMDNode->getOperand(1));
2667 
2668         printfStrings.push_back({
2669             int_cast<unsigned int>(indexOpndVal->getZExtValue()),
2670             stringOpndVal->getString().data()
2671         });
2672     }
2673 
2674     return printfStrings;
2675 }
2676 
PDT_dominates(llvm::PostDominatorTree & PTD,const Instruction * I1,const Instruction * I2)2677 bool PDT_dominates(llvm::PostDominatorTree& PTD,
2678     const Instruction* I1,
2679     const Instruction* I2)
2680 {
2681     IGC_ASSERT_MESSAGE(I1, "Expecting valid I1 and I2");
2682     IGC_ASSERT_MESSAGE(I2, "Expecting valid I1 and I2");
2683 
2684     const BasicBlock* BB1 = I1->getParent();
2685     const BasicBlock* BB2 = I2->getParent();
2686 
2687     if (BB1 != BB2)
2688         return PTD.dominates(BB1, BB2);
2689 
2690     // PHINodes in a block are unordered.
2691     if (isa<PHINode>(I1) && isa<PHINode>(I2))
2692         return false;
2693 
2694     // Loop through the basic block until we find I1 or I2.
2695     BasicBlock::const_iterator I = BB1->begin();
2696     for (; &*I != I1 && &*I != I2; ++I)
2697         /*empty*/;
2698 
2699     return &*I == I2;
2700 }
2701 
2702 } // namespace IGC
2703