1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
10 #include "AdaptorCommon/ImplicitArgs.hpp"
11 #include "llvmWrapper/IR/DerivedTypes.h"
12 #include "common/LLVMWarningsPush.hpp"
13 #include <llvm/IR/Argument.h>
14 #include <llvm/IR/DataLayout.h>
15 #include <llvm/IR/DerivedTypes.h>
16 #include <llvm/IR/Function.h>
17 #include <llvm/IR/Metadata.h>
18 #include <llvm/IR/Module.h>
19 #include "common/LLVMWarningsPop.hpp"
20 #include "Probe/Assertion.h"
21 
22 using namespace IGC;
23 using namespace IGC::IGCMD;
24 using namespace llvm;
25 
KernelArg(KernelArg::ArgType argType,KernelArg::AccessQual accessQual,unsigned int allocateSize,unsigned int elemAllocateSize,size_t align,bool isConstantBuf,const llvm::Argument * arg,unsigned int associatedArgNo)26 KernelArg::KernelArg(KernelArg::ArgType argType, KernelArg::AccessQual accessQual, unsigned int allocateSize, unsigned int elemAllocateSize, size_t align, bool isConstantBuf, const llvm::Argument* arg, unsigned int associatedArgNo) :
27     m_implicitArgument(false),
28     m_argType(argType),
29     m_accessQual(accessQual),
30     m_allocateSize(allocateSize), // in BYTES
31     m_elemAllocateSize(elemAllocateSize),
32     m_align(align),
33     m_isConstantBuf(isConstantBuf),
34     m_arg(arg),
35     m_associatedArgNo(associatedArgNo),
36     m_structArgOffset(-1),
37     m_locationIndex(-1),
38     m_locationCount(-1),
39     m_needsAllocation(typeAlwaysNeedsAllocation()),
40     m_isEmulationArgument(false),
41     m_imageInfo({ false, false })
42 {
43 }
44 
KernelArg(const Argument * arg,const DataLayout * DL,const StringRef typeStr,const StringRef qualStr,int location_index,int location_count,bool needBindlessHandle,bool isEmulationArgument)45 KernelArg::KernelArg(const Argument* arg, const DataLayout* DL, const StringRef typeStr, const StringRef qualStr, int location_index, int location_count, bool needBindlessHandle, bool isEmulationArgument) :
46     m_implicitArgument(false),
47     m_argType(calcArgType(arg, typeStr)),
48     m_accessQual(calcAccessQual(arg, qualStr)),
49     // Only explicit arguments that need allocation are part of the constant buffer
50     m_isConstantBuf(needBindlessHandle || typeAlwaysNeedsAllocation()),
51     m_arg(arg),
52     m_associatedArgNo(arg->getArgNo()),
53     m_structArgOffset(-1),
54     m_locationIndex(location_index),
55     m_locationCount(location_count),
56     m_needsAllocation(needBindlessHandle || typeAlwaysNeedsAllocation()),
57     m_isEmulationArgument(isEmulationArgument),
58     m_imageInfo({ false, false })
59 {
60     m_allocateSize = calcAllocateSize(arg, DL);
61     m_elemAllocateSize = calcElemAllocateSize(arg, DL);
62     m_align = calcAlignment(arg, DL);
63 }
64 
KernelArg(const ImplicitArg & implicitArg,const DataLayout * DL,const Argument * arg,unsigned int ExplicitArgNo,unsigned int structArgOffset,unsigned int GRFSize)65 KernelArg::KernelArg(const ImplicitArg& implicitArg, const DataLayout* DL, const Argument* arg, unsigned int ExplicitArgNo, unsigned int structArgOffset, unsigned int GRFSize) :
66     m_implicitArgument(true),
67     m_argType(calcArgType(implicitArg)),
68     m_accessQual(AccessQual::NONE),
69     m_allocateSize(implicitArg.getAllocateSize(*DL)),
70     m_align(implicitArg.getAlignment(*DL)),
71     m_isConstantBuf(implicitArg.isConstantBuf()),
72     m_arg(arg),
73     m_associatedArgNo(calcAssociatedArgNo(implicitArg, arg, ExplicitArgNo)),
74     m_structArgOffset(structArgOffset),
75     m_locationIndex(-1),
76     m_locationCount(-1),
77     m_needsAllocation(typeAlwaysNeedsAllocation()),
78     m_isEmulationArgument(false),
79     m_imageInfo({ false, false })
80 {
81     IGC_ASSERT(implicitArg.getNumberElements());
82 
83     m_elemAllocateSize = m_allocateSize / implicitArg.getNumberElements();
84     if (implicitArg.isLocalIDs() && GRFSize == 64)
85     {
86         m_elemAllocateSize = m_allocateSize / (GRFSize / 2);
87     }
88 }
89 
calcAllocateSize(const Argument * arg,const DataLayout * DL) const90 unsigned int KernelArg::calcAllocateSize(const Argument* arg, const DataLayout* DL) const
91 {
92     if (!needsAllocation()) return 0;
93 
94     return int_cast<unsigned int>(DL->getTypeAllocSize(arg->getType()));
95 }
96 
calcAlignment(const Argument * arg,const DataLayout * DL) const97 unsigned int KernelArg::calcAlignment(const Argument* arg, const DataLayout* DL) const
98 {
99     // If we don't need to allocate, we certainly don't need alignment
100     if (!needsAllocation()) return 0;
101 
102     Type* typeToAlign = arg->getType();
103     // Usually, we return the alignment of the parameter type.
104     // For local pointers, we need the alignment of the *contained* type.
105     if (m_argType == ArgType::PTR_LOCAL)
106     {
107         typeToAlign = cast<PointerType>(typeToAlign)->getElementType();
108     }
109 
110     return DL->getABITypeAlignment(typeToAlign);
111 }
112 
calcElemAllocateSize(const Argument * arg,const DataLayout * DL) const113 unsigned int KernelArg::calcElemAllocateSize(const Argument* arg, const DataLayout* DL) const
114 {
115     if (!needsAllocation()) return 0;
116 
117     return int_cast<unsigned int>(DL->getTypeAllocSize(arg->getType()->getScalarType()));
118 }
119 
120 // First member of pair is ArgType of buffer.
121 // When ArgType is SAMPLER, second member should be true.
122 // When ArgType is NOT_TO_ALLOCATE, second member should be false.
123 // ArgType enum uses same values for SAMPLER and NOT_TO_ALLOCATE.
124 // This function helps disambiguate between the two values.
getBufferType(const Argument * arg,const StringRef typeStr)125 KernelArg::BufferArgType KernelArg::getBufferType(const Argument* arg, const StringRef typeStr)
126 {
127     if (arg->getType()->getTypeID() != Type::PointerTyID)
128         return { KernelArg::ArgType::SAMPLER, true };
129 
130     PointerType* ptrType = cast<PointerType>(arg->getType());
131 
132     int address_space = ptrType->getPointerAddressSpace();
133     bool directIdx = false;
134     unsigned int bufId = 0;
135     BufferType bufType = DecodeAS4GFXResource(address_space, directIdx, bufId);
136 
137     // Check if this arg is an image
138     if (bufType == BufferType::UAV)
139     {
140         ArgType imgArgType;
141         // Check if argument is image
142         if (isImage(arg, typeStr, imgArgType)) return { imgArgType, false };
143     }
144     else if (bufType == BufferType::SAMPLER)
145         return { KernelArg::ArgType::SAMPLER, true };
146 
147     return { KernelArg::ArgType::NOT_TO_ALLOCATE, false };
148 }
149 
calcArgType(const Argument * arg,const StringRef typeStr)150 KernelArg::ArgType KernelArg::calcArgType(const Argument* arg, const StringRef typeStr)
151 {
152     switch (arg->getType()->getTypeID())
153     {
154 
155     case Type::PointerTyID:
156     {
157         PointerType* ptrType = cast<PointerType>(arg->getType());
158 
159         // Check for pointer address space
160         switch (ptrType->getAddressSpace())
161         {
162         case ADDRESS_SPACE_PRIVATE:
163         {
164 
165             Type* type = arg->getType();
166             if (typeStr.equals("queue_t") || typeStr.equals("spirv.Queue"))
167             {
168                 return KernelArg::ArgType::PTR_DEVICE_QUEUE;
169             }
170             else if (arg->hasByValAttr() &&
171                 type->isPointerTy() &&
172                 type->getPointerElementType()->isStructTy())
173             {
174                 // Pass by value structs will show up as private pointer
175                 // arguments in the function signiture.
176                 return KernelArg::ArgType::STRUCT;
177             }
178             else
179             {
180                 return KernelArg::ArgType::IMPLICIT_PRIVATE_BASE;
181             }
182         }
183 
184         case ADDRESS_SPACE_GLOBAL:
185         {
186             ArgType imgArgType;
187             // Check if argument is image
188             if (isImage(arg, typeStr, imgArgType)) return imgArgType;
189         }
190         return KernelArg::ArgType::PTR_GLOBAL;
191 
192         case ADDRESS_SPACE_CONSTANT:
193             // Bindless samplers are stored in addrspace(2)
194             if (isSampler(arg, typeStr))
195                 return KernelArg::ArgType::SAMPLER;
196             else if (isBindlessSampler(arg, typeStr))
197                 return KernelArg::ArgType::BINDLESS_SAMPLER;
198 
199             return KernelArg::ArgType::PTR_CONSTANT;
200         case ADDRESS_SPACE_LOCAL:
201             return KernelArg::ArgType::PTR_LOCAL;
202 
203         default:
204 #if 0
205             // Need to disable this assertion for two-phase-inlining, i.e.
206             // kernel arguments will be used for subroutines, which may
207             // have arguments from other address spaces. It is unfortunate
208             // that we cannot run ResourceAllocator only on kernels since
209             // BuiltinsConverter checks caller's resource allocation info.
210             //
211             // For the final codegen, this allocation info is only queried
212             // for kernels. This should not affect correctness, but a waste
213             // on subroutines.
214             //
215             // FIXME: There is a chain of dependency.
216             IGC_ASSERT_MESSAGE(0, "Unrecognized address space");
217 #endif
218             // This is a buffer. Try to decode this
219             return getBufferType(arg, typeStr).type;
220         }
221     }
222     case  Type::IntegerTyID:
223         // Check if argument is sampler
224         if (isSampler(arg, typeStr)) return KernelArg::ArgType::SAMPLER;
225         // Fall through to default
226 
227     default:
228         // May reach here from Type::IntegerTyID
229         return KernelArg::ArgType::CONSTANT_REG;
230     }
231 }
232 
calcArgType(const ImplicitArg & arg) const233 KernelArg::ArgType KernelArg::calcArgType(const ImplicitArg& arg) const
234 {
235     switch (arg.getArgType())
236     {
237     case ImplicitArg::R0:
238         return KernelArg::ArgType::IMPLICIT_R0;
239     case ImplicitArg::PAYLOAD_HEADER:
240         return KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER;
241     case ImplicitArg::PRIVATE_BASE:
242         return KernelArg::ArgType::IMPLICIT_PRIVATE_BASE;
243     case ImplicitArg::CONSTANT_BASE:
244         return KernelArg::ArgType::IMPLICIT_CONSTANT_BASE;
245     case ImplicitArg::PRINTF_BUFFER:
246         return KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER;
247     case ImplicitArg::SYNC_BUFFER:
248         return KernelArg::ArgType::IMPLICIT_SYNC_BUFFER;
249     case ImplicitArg::BUFFER_OFFSET:
250         return KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET;
251     case ImplicitArg::GLOBAL_BASE:
252         return KernelArg::ArgType::IMPLICIT_GLOBAL_BASE;
253     case ImplicitArg::WORK_DIM:
254         return KernelArg::ArgType::IMPLICIT_WORK_DIM;
255     case ImplicitArg::NUM_GROUPS:
256         return KernelArg::ArgType::IMPLICIT_NUM_GROUPS;
257     case ImplicitArg::GLOBAL_SIZE:
258         return KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE;
259     case ImplicitArg::LOCAL_SIZE:
260         return KernelArg::ArgType::IMPLICIT_LOCAL_SIZE;
261     case ImplicitArg::ENQUEUED_LOCAL_WORK_SIZE:
262         return KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE;
263     case ImplicitArg::LOCAL_ID_X:
264         // fall through until LOCAL_ID_Z
265     case ImplicitArg::LOCAL_ID_Y:
266         // fall through until LOCAL_ID_Z
267     case ImplicitArg::LOCAL_ID_Z:
268         return KernelArg::ArgType::IMPLICIT_LOCAL_IDS;
269     case ImplicitArg::STAGE_IN_GRID_ORIGIN:
270         return KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN;
271     case ImplicitArg::STAGE_IN_GRID_SIZE:
272         return KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE;
273     case ImplicitArg::CONSTANT_REG_FP32:
274         return KernelArg::ArgType::CONSTANT_REG;
275     case ImplicitArg::CONSTANT_REG_QWORD:
276         return KernelArg::ArgType::CONSTANT_REG;
277     case ImplicitArg::CONSTANT_REG_DWORD:
278         return KernelArg::ArgType::CONSTANT_REG;
279     case ImplicitArg::CONSTANT_REG_WORD:
280         return KernelArg::ArgType::CONSTANT_REG;
281     case ImplicitArg::CONSTANT_REG_BYTE:
282         return KernelArg::ArgType::CONSTANT_REG;
283 
284     case ImplicitArg::IMAGE_HEIGHT:
285         return KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT;
286     case ImplicitArg::IMAGE_WIDTH:
287         return KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH;
288     case ImplicitArg::IMAGE_DEPTH:
289         return KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH;
290     case ImplicitArg::IMAGE_NUM_MIP_LEVELS:
291         return KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS;
292     case ImplicitArg::IMAGE_CHANNEL_DATA_TYPE:
293         return KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE;
294     case ImplicitArg::IMAGE_CHANNEL_ORDER:
295         return KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER;
296     case ImplicitArg::IMAGE_SRGB_CHANNEL_ORDER:
297         return KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER;
298     case ImplicitArg::IMAGE_ARRAY_SIZE:
299         return KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE;
300     case ImplicitArg::IMAGE_NUM_SAMPLES:
301         return KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES;
302     case ImplicitArg::SAMPLER_ADDRESS:
303         return KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS;
304     case ImplicitArg::SAMPLER_NORMALIZED:
305         return KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED;
306     case ImplicitArg::SAMPLER_SNAP_WA:
307         return KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA;
308     case ImplicitArg::FLAT_IMAGE_BASEOFFSET:
309         return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET;
310     case ImplicitArg::FLAT_IMAGE_HEIGHT:
311         return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT;
312     case ImplicitArg::FLAT_IMAGE_WIDTH:
313         return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH;
314     case ImplicitArg::FLAT_IMAGE_PITCH:
315         return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH;
316 
317     case ImplicitArg::VME_MB_BLOCK_TYPE:
318         return KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE;
319     case ImplicitArg::VME_SUBPIXEL_MODE:
320         return KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE;
321     case ImplicitArg::VME_SAD_ADJUST_MODE:
322         return KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE;
323     case ImplicitArg::VME_SEARCH_PATH_TYPE:
324         return KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE;
325 
326     case ImplicitArg::DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE:
327         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE;
328     case ImplicitArg::DEVICE_ENQUEUE_EVENT_POOL:
329         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL;
330     case ImplicitArg::DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE:
331         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE;
332     case ImplicitArg::DEVICE_ENQUEUE_PARENT_EVENT:
333         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT;
334     case ImplicitArg::DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE:
335         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE;
336     case ImplicitArg::GET_OBJECT_ID:
337         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID;
338     case ImplicitArg::GET_BLOCK_SIMD_SIZE:
339         return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE;
340 
341 
342     case ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS:
343         return KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS;
344     case ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_SIZE:
345         return KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE;
346     case ImplicitArg::PRIVATE_MEMORY_STATELESS_SIZE:
347         return KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE;
348     case ImplicitArg::BINDLESS_OFFSET:
349         return KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET;
350 
351     case ImplicitArg::IMPLICIT_ARG_BUFFER_PTR:
352         return KernelArg::ArgType::IMPLICIT_ARG_BUFFER;
353     case ImplicitArg::IMPLICIT_ARG_LOCALID:
354         return KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER;
355     default:
356         return KernelArg::ArgType::NOT_TO_ALLOCATE;
357     }
358 }
359 
calcAccessQual(const Argument * arg,const StringRef qualStr) const360 KernelArg::AccessQual KernelArg::calcAccessQual(const Argument* arg, const StringRef qualStr) const
361 {
362     if (qualStr.equals("read_write"))
363         return READ_WRITE;
364 
365     if (qualStr.startswith("read"))
366         return READ_ONLY;
367 
368     if (qualStr.startswith("write"))
369         return WRITE_ONLY;
370 
371     return NONE;
372 }
373 
calcAssociatedArgNo(const ImplicitArg & implicitArg,const Argument * arg,unsigned int ExplicitArgNo) const374 unsigned int KernelArg::calcAssociatedArgNo(const ImplicitArg& implicitArg, const Argument* arg, unsigned int ExplicitArgNo) const
375 {
376     ImplicitArg::ArgType argType = implicitArg.getArgType();
377     if ((ImplicitArgs::isImplicitImage(argType)) ||
378         (ImplicitArgs::isImplicitStruct(argType)) ||
379         (argType == ImplicitArg::GET_OBJECT_ID) ||
380         (argType == ImplicitArg::GET_BLOCK_SIMD_SIZE) ||
381         (argType == ImplicitArg::BUFFER_OFFSET) ||
382         (argType == ImplicitArg::BINDLESS_OFFSET)
383         )
384     {
385         // For implicit image and sampler and struct arguments and buffer offset,
386         // the implicit arg's value represents the index of the associated
387         // image/sampler/pointer argument
388         return ExplicitArgNo;
389     }
390     return arg->getArgNo();
391 }
392 
getNumComponents() const393 unsigned int KernelArg::getNumComponents() const
394 {
395     if (IGCLLVM::FixedVectorType * vecType = dyn_cast<IGCLLVM::FixedVectorType>(m_arg->getType()))
396     {
397         // Vector
398         return int_cast<unsigned int>(vecType->getNumElements());
399     }
400 
401     // Scalar
402     return 1;
403 }
404 
getAlignment() const405 size_t KernelArg::getAlignment() const
406 {
407     return m_align;
408 }
409 
getAllocateSize() const410 unsigned int KernelArg::getAllocateSize() const
411 {
412     return int_cast<unsigned int>(llvm::alignTo(m_allocateSize, iOpenCL::DATA_PARAMETER_DATA_SIZE));
413 }
414 
getElemAllocateSize() const415 unsigned int KernelArg::getElemAllocateSize() const
416 {
417     return m_elemAllocateSize;
418 }
419 
isConstantBuf() const420 bool KernelArg::isConstantBuf() const
421 {
422     return m_isConstantBuf;
423 }
424 
typeAlwaysNeedsAllocation() const425 bool KernelArg::typeAlwaysNeedsAllocation() const
426 {
427     return m_argType < KernelArg::ArgType::NOT_TO_ALLOCATE;
428 }
429 
needsAllocation() const430 bool KernelArg::needsAllocation() const
431 {
432     return m_needsAllocation;
433 }
434 
getArgType() const435 KernelArg::ArgType KernelArg::getArgType() const {
436     return m_argType;
437 }
438 
getAccessQual() const439 KernelArg::AccessQual KernelArg::getAccessQual() const
440 {
441     return m_accessQual;
442 }
443 
getArg() const444 const Argument* KernelArg::getArg() const
445 {
446     return m_arg;
447 }
448 
getAssociatedArgNo() const449 unsigned int KernelArg::getAssociatedArgNo() const
450 {
451     return m_associatedArgNo;
452 }
453 
getStructArgOffset() const454 unsigned int KernelArg::getStructArgOffset() const
455 {
456     return m_structArgOffset;
457 }
458 
getLocationCount() const459 unsigned int KernelArg::getLocationCount() const
460 {
461     return m_locationCount;
462 }
463 
getLocationIndex() const464 unsigned int KernelArg::getLocationIndex() const
465 {
466     return m_locationIndex;
467 }
468 
isImage(const Argument * arg,const StringRef typeStr,ArgType & imageArgType)469 bool KernelArg::isImage(const Argument* arg, const StringRef typeStr, ArgType& imageArgType)
470 {
471     if (!typeStr.startswith("image") && !typeStr.startswith("bindless"))
472         return false;
473 
474     // Get the original OpenCL type from the metadata and check if it's an image
475     // clang 3.8 introduced a new type mangling that includes the image access qualifier.
476     // Accept those too.
477     std::vector<std::string> accessQual{ "_t", "_ro_t", "_wo_t", "_rw_t" };
478     for (auto& postfix : accessQual)
479     {
480         if (typeStr.equals("image1d" + postfix))
481         {
482             imageArgType = ArgType::IMAGE_1D;
483             return true;
484         }
485 
486         if (typeStr.equals("image1d_buffer" + postfix))
487         {
488             imageArgType = ArgType::IMAGE_1D_BUFFER;
489             return true;
490         }
491 
492         if (typeStr.equals("image2d" + postfix))
493         {
494             imageArgType = ArgType::IMAGE_2D;
495             return true;
496         }
497 
498         if (typeStr.equals("image2d_depth" + postfix))
499         {
500             imageArgType = ArgType::IMAGE_2D_DEPTH;
501             return true;
502         }
503 
504         if (typeStr.equals("image2d_msaa" + postfix))
505         {
506             imageArgType = ArgType::IMAGE_2D_MSAA;
507             return true;
508         }
509 
510         if (typeStr.equals("image2d_msaa_depth" + postfix))
511         {
512             imageArgType = ArgType::IMAGE_2D_MSAA_DEPTH;
513             return true;
514         }
515 
516         if (typeStr.equals("image3d" + postfix))
517         {
518             imageArgType = ArgType::IMAGE_3D;
519             return true;
520         }
521 
522         if (typeStr.equals("image1d_array" + postfix))
523         {
524             imageArgType = ArgType::IMAGE_1D_ARRAY;
525             return true;
526         }
527 
528         if (typeStr.equals("image2d_array" + postfix))
529         {
530             imageArgType = ArgType::IMAGE_2D_ARRAY;
531             return true;
532         }
533 
534         if (typeStr.equals("image2d_array_depth" + postfix))
535         {
536             imageArgType = ArgType::IMAGE_2D_DEPTH_ARRAY;
537             return true;
538         }
539 
540         if (typeStr.equals("image2d_array_msaa" + postfix))
541         {
542             imageArgType = ArgType::IMAGE_2D_MSAA_ARRAY;
543             return true;
544         }
545 
546         if (typeStr.equals("image2d_array_msaa_depth" + postfix))
547         {
548             imageArgType = ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY;
549             return true;
550         }
551     }
552 
553     // See if these are address space decoded args.
554     // Get the original OpenCL type from the metadata and check if it's an image
555     if (typeStr.equals("bindless_image1d_t"))
556     {
557         imageArgType = ArgType::BINDLESS_IMAGE_1D;
558         return true;
559     }
560 
561     if (typeStr.equals("bindless_image1d_buffer_t"))
562     {
563         imageArgType = ArgType::BINDLESS_IMAGE_1D_BUFFER;
564         return true;
565     }
566 
567     if (typeStr.equals("bindless_image2d_t"))
568     {
569         imageArgType = ArgType::BINDLESS_IMAGE_2D;
570         return true;
571     }
572 
573     if (typeStr.equals("bindless_image2d_depth_t"))
574     {
575         imageArgType = ArgType::BINDLESS_IMAGE_2D_DEPTH;
576         return true;
577     }
578 
579     if (typeStr.equals("bindless_image2d_msaa_t"))
580     {
581         imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA;
582         return true;
583     }
584 
585     if (typeStr.equals("bindless_image2d_msaa_depth_t"))
586     {
587         imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH;
588         return true;
589     }
590 
591     if (typeStr.equals("bindless_image3d_t"))
592     {
593         imageArgType = ArgType::BINDLESS_IMAGE_3D;
594         return true;
595     }
596 
597     if (typeStr.equals("bindless_image_cube_array_t"))
598     {
599         imageArgType = ArgType::BINDLESS_IMAGE_CUBE_ARRAY;
600         return true;
601     }
602 
603     if (typeStr.equals("bindless_image_cube_t"))
604     {
605         imageArgType = ArgType::BINDLESS_IMAGE_CUBE;
606         return true;
607     }
608 
609     if (typeStr.equals("bindless_image1d_array_t"))
610     {
611         imageArgType = ArgType::BINDLESS_IMAGE_1D_ARRAY;
612         return true;
613     }
614 
615     if (typeStr.equals("bindless_image2d_array_t"))
616     {
617         imageArgType = ArgType::BINDLESS_IMAGE_2D_ARRAY;
618         return true;
619     }
620 
621     if (typeStr.equals("bindless_image2d_array_depth_t"))
622     {
623         imageArgType = ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY;
624         return true;
625     }
626 
627     if (typeStr.equals("bindless_image2d_array_msaa_t"))
628     {
629         imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY;
630         return true;
631     }
632 
633     if (typeStr.equals("bindless_image2d_array_msaa_depth_t"))
634     {
635         imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY;
636         return true;
637     }
638 
639     if (typeStr.equals("bindless_image_cube_array_depth_t"))
640     {
641         imageArgType = ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY;
642         return true;
643     }
644 
645     if (typeStr.equals("bindless_image_cube_depth_t"))
646     {
647         imageArgType = ArgType::BINDLESS_IMAGE_CUBE_DEPTH;
648         return true;
649     }
650 
651     return false;
652 }
653 
isSampler(const Argument * arg,const StringRef typeStr)654 bool KernelArg::isSampler(const Argument* arg, const StringRef typeStr)
655 {
656     // Get the original OpenCL type from the metadata and check if it's a sampler
657     return (typeStr.equals("sampler_t"));
658 }
659 
isBindlessSampler(const Argument * arg,const StringRef typeStr)660 bool KernelArg::isBindlessSampler(const Argument* arg, const StringRef typeStr)
661 {
662     return (typeStr.equals("bindless_sampler_t"));
663 }
664 
getDataParamToken() const665 iOpenCL::DATA_PARAMETER_TOKEN KernelArg::getDataParamToken() const
666 {
667     auto iter = argTypeTokenMap.find(m_argType);
668     if (iter == argTypeTokenMap.end())
669     {
670         return iOpenCL::DATA_PARAMETER_TOKEN_UNKNOWN;
671     }
672 
673     return iter->second;
674 }
675 
676 
initArgTypeTokenMap()677 std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> initArgTypeTokenMap()
678 {
679     std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> map
680     {
681        { KernelArg::ArgType::IMPLICIT_LOCAL_IDS, iOpenCL::DATA_PARAMETER_LOCAL_ID },
682        { KernelArg::ArgType::IMPLICIT_WORK_DIM, iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS },
683        { KernelArg::ArgType::IMPLICIT_NUM_GROUPS, iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS },
684        { KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE },
685        { KernelArg::ArgType::IMPLICIT_LOCAL_SIZE, iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE },
686        { KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN, iOpenCL::DATA_PARAMETER_STAGE_IN_GRID_ORIGIN },
687        { KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE, iOpenCL::DATA_PARAMETER_STAGE_IN_GRID_SIZE },
688        { KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE },
689 
690        { KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT },
691        { KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_IMAGE_WIDTH },
692        { KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH, iOpenCL::DATA_PARAMETER_IMAGE_DEPTH },
693        { KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS, iOpenCL::DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS },
694        { KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE },
695        { KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER },
696        { KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER },
697        { KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE, iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE },
698        { KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES, iOpenCL::DATA_PARAMETER_IMAGE_NUM_SAMPLES },
699        { KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS, iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE },
700        { KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED, iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS },
701        { KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA, iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED },
702        { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_BASEOFFSET },
703        { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_HEIGHT },
704        { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_WIDTH },
705        { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_PITCH },
706 
707        { KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE, iOpenCL::DATA_PARAMETER_VME_MB_BLOCK_TYPE },
708        { KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE, iOpenCL::DATA_PARAMETER_VME_SUBPIXEL_MODE },
709        { KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE, iOpenCL::DATA_PARAMETER_VME_SAD_ADJUST_MODE },
710        { KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE, iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE },
711 
712        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE, iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE },
713        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL, iOpenCL::DATA_PARAMETER_PARENT_EVENT },
714 
715        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE, iOpenCL::DATA_PARAMETER_MAX_WORKGROUP_SIZE },
716        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT, iOpenCL::DATA_PARAMETER_PARENT_EVENT },
717        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE, iOpenCL::DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE },
718        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID, iOpenCL::DATA_PARAMETER_OBJECT_ID },
719        { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE, iOpenCL::DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE },
720 
721        { KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS, iOpenCL::DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS },
722        { KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE, iOpenCL::DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE },
723        { KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE, iOpenCL::DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE },
724        { KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET, iOpenCL::DATA_PARAMETER_BUFFER_OFFSET },
725        { KernelArg::ArgType::IMPLICIT_ARG_BUFFER, iOpenCL::DATA_PARAMETER_IMPL_ARG_BUFFER },
726        { KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER, iOpenCL::DATA_PARAMETER_LOCAL_ID_BUFFER }
727     };
728     return map;
729 }
730 const std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> KernelArg::argTypeTokenMap = initArgTypeTokenMap();
731 
VerifyOrder(std::array<KernelArg::ArgType,static_cast<int32_t> (KernelArg::ArgType::End)> & order,KernelArg::ArgType sent)732 bool KernelArgsOrder::VerifyOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order, KernelArg::ArgType sent)
733 {
734     bool validOrder = false;
735     // It's not safe to iterate over a random generated sentinel
736     if (order[static_cast<uint>(KernelArg::ArgType::End) - 1] == sent)
737     {
738         order[static_cast<uint>(KernelArg::ArgType::End) - 1] = KernelArg::ArgType::Default;
739         validOrder = true;
740     }
741     else
742     {
743         IGC_ASSERT(0);
744     }
745 
746     return validOrder;
747 }
748 
TransposeGenerateOrder(std::array<KernelArg::ArgType,static_cast<int32_t> (KernelArg::ArgType::End)> & order)749 void KernelArgsOrder::TransposeGenerateOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order)
750 {
751     int i = 0;
752 
753     for (const auto& j : order)
754     {
755         m_position[static_cast<uint32_t>(j)] = i++;
756     }
757 }
758 
KernelArgsOrder(InputType layout)759 KernelArgsOrder::KernelArgsOrder(InputType layout)
760 {
761     const KernelArg::ArgType SENTINEL = KernelArg::ArgType::End;
762 
763     switch (layout)
764     {
765     case InputType::INDEPENDENT:
766     case InputType::CURBE:
767     {
768         std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)> CURBE =
769         {
770             KernelArg::ArgType::IMPLICIT_R0,
771 
772             KernelArg::ArgType::RUNTIME_VALUE,
773 
774             KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER,
775 
776             KernelArg::ArgType::PTR_LOCAL,
777             KernelArg::ArgType::PTR_GLOBAL,
778             KernelArg::ArgType::PTR_CONSTANT,
779             KernelArg::ArgType::PTR_DEVICE_QUEUE,
780 
781             KernelArg::ArgType::CONSTANT_REG,
782 
783             KernelArg::ArgType::IMPLICIT_CONSTANT_BASE,
784             KernelArg::ArgType::IMPLICIT_GLOBAL_BASE,
785             KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
786             KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
787             KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
788             KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
789             KernelArg::ArgType::IMPLICIT_WORK_DIM,
790             KernelArg::ArgType::IMPLICIT_NUM_GROUPS,
791             KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE,
792             KernelArg::ArgType::IMPLICIT_LOCAL_SIZE,
793             KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN,
794             KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
795             KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
796 
797             KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET,
798 
799             KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
800             KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
801             KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,
802             KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS,
803             KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE,
804             KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER,
805             KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER,
806             KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE,
807             KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES,
808             KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS,
809             KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED,
810             KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA,
811             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET,
812             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT,
813             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH,
814             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH,
815 
816             KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE,
817             KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE,
818             KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE,
819             KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE,
820 
821             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE,
822             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL,
823             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE,
824             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT,
825             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE,
826             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID,
827             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE,
828 
829             KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS,
830             KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE,
831             KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
832 
833             KernelArg::ArgType::R1,
834             KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
835 
836             KernelArg::ArgType::IMPLICIT_ARG_BUFFER,
837             KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER,
838 
839             KernelArg::ArgType::STRUCT,
840             KernelArg::ArgType::SAMPLER,
841             KernelArg::ArgType::IMAGE_1D,
842             KernelArg::ArgType::IMAGE_1D_BUFFER,
843             KernelArg::ArgType::IMAGE_2D,
844             KernelArg::ArgType::IMAGE_2D_DEPTH,
845             KernelArg::ArgType::IMAGE_2D_MSAA,
846             KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH,
847             KernelArg::ArgType::IMAGE_3D,
848             KernelArg::ArgType::IMAGE_CUBE,
849             KernelArg::ArgType::IMAGE_CUBE_DEPTH,
850             KernelArg::ArgType::IMAGE_1D_ARRAY,
851             KernelArg::ArgType::IMAGE_2D_ARRAY,
852             KernelArg::ArgType::IMAGE_2D_DEPTH_ARRAY,
853             KernelArg::ArgType::IMAGE_2D_MSAA_ARRAY,
854             KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY,
855             KernelArg::ArgType::IMAGE_CUBE_ARRAY,
856             KernelArg::ArgType::IMAGE_CUBE_DEPTH_ARRAY,
857 
858             KernelArg::ArgType::BINDLESS_SAMPLER,
859             KernelArg::ArgType::BINDLESS_IMAGE_1D,
860             KernelArg::ArgType::BINDLESS_IMAGE_1D_BUFFER,
861             KernelArg::ArgType::BINDLESS_IMAGE_2D,
862             KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH,
863             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA,
864             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH,
865             KernelArg::ArgType::BINDLESS_IMAGE_3D,
866             KernelArg::ArgType::BINDLESS_IMAGE_CUBE,
867             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH,
868             KernelArg::ArgType::BINDLESS_IMAGE_1D_ARRAY,
869             KernelArg::ArgType::BINDLESS_IMAGE_2D_ARRAY,
870             KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY,
871             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY,
872             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY,
873             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_ARRAY,
874             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY,
875             SENTINEL,
876         };
877 
878         if (VerifyOrder(CURBE, SENTINEL))
879         {
880             TransposeGenerateOrder(CURBE);
881         }
882 
883     }
884     break;
885     case InputType::INDIRECT:
886     {
887         std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)> INDIRECT =
888         {
889             KernelArg::ArgType::IMPLICIT_R0,
890 
891             KernelArg::ArgType::R1,
892             KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
893 
894             KernelArg::ArgType::RUNTIME_VALUE,
895 
896             KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER,
897             KernelArg::ArgType::PTR_LOCAL,
898             KernelArg::ArgType::PTR_GLOBAL,
899             KernelArg::ArgType::PTR_CONSTANT,
900             KernelArg::ArgType::PTR_DEVICE_QUEUE,
901             KernelArg::ArgType::CONSTANT_REG,
902 
903             KernelArg::ArgType::IMPLICIT_CONSTANT_BASE,
904             KernelArg::ArgType::IMPLICIT_GLOBAL_BASE,
905             KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
906             KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
907             KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
908             KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
909             KernelArg::ArgType::IMPLICIT_WORK_DIM,
910             KernelArg::ArgType::IMPLICIT_NUM_GROUPS,
911             KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE,
912             KernelArg::ArgType::IMPLICIT_LOCAL_SIZE,
913             KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN,
914             KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
915             KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
916 
917             KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET,
918 
919             KernelArg::ArgType::IMPLICIT_ARG_BUFFER,
920             KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER,
921 
922             KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
923             KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
924             KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,
925             KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS,
926             KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE,
927             KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER,
928             KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER,
929             KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE,
930             KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES,
931             KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS,
932             KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED,
933             KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA,
934             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET,
935             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT,
936             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH,
937             KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH,
938 
939             KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE,
940             KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE,
941             KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE,
942             KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE,
943 
944             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE,
945             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL,
946             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE,
947             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT,
948             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE,
949             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID,
950             KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE,
951 
952             KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS,
953             KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE,
954             KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
955 
956             KernelArg::ArgType::STRUCT,
957             KernelArg::ArgType::SAMPLER,
958             KernelArg::ArgType::IMAGE_1D,
959             KernelArg::ArgType::IMAGE_1D_BUFFER,
960             KernelArg::ArgType::IMAGE_2D,
961             KernelArg::ArgType::IMAGE_2D_DEPTH,
962             KernelArg::ArgType::IMAGE_2D_MSAA,
963             KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH,
964             KernelArg::ArgType::IMAGE_3D,
965             KernelArg::ArgType::IMAGE_CUBE,
966             KernelArg::ArgType::IMAGE_CUBE_DEPTH,
967             KernelArg::ArgType::IMAGE_1D_ARRAY,
968             KernelArg::ArgType::IMAGE_2D_ARRAY,
969             KernelArg::ArgType::IMAGE_2D_DEPTH_ARRAY,
970             KernelArg::ArgType::IMAGE_2D_MSAA_ARRAY,
971             KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY,
972             KernelArg::ArgType::IMAGE_CUBE_ARRAY,
973             KernelArg::ArgType::IMAGE_CUBE_DEPTH_ARRAY,
974 
975             KernelArg::ArgType::BINDLESS_SAMPLER,
976             KernelArg::ArgType::BINDLESS_IMAGE_1D,
977             KernelArg::ArgType::BINDLESS_IMAGE_1D_BUFFER,
978             KernelArg::ArgType::BINDLESS_IMAGE_2D,
979             KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH,
980             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA,
981             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH,
982             KernelArg::ArgType::BINDLESS_IMAGE_3D,
983             KernelArg::ArgType::BINDLESS_IMAGE_CUBE,
984             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH,
985             KernelArg::ArgType::BINDLESS_IMAGE_1D_ARRAY,
986             KernelArg::ArgType::BINDLESS_IMAGE_2D_ARRAY,
987             KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY,
988             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY,
989             KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY,
990             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_ARRAY,
991             KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY,
992             SENTINEL,
993         };
994 
995         if (VerifyOrder(INDIRECT, SENTINEL))
996         {
997             TransposeGenerateOrder(INDIRECT);
998         }
999     }
1000     break;
1001     default:
1002         IGC_ASSERT(0);
1003         break;
1004     }
1005 }
1006 
operator ()(const KernelArg::ArgType & lhs,const KernelArg::ArgType & rhs) const1007 bool KernelArgsOrder::operator()(const KernelArg::ArgType& lhs, const KernelArg::ArgType& rhs) const
1008 {
1009     return m_position[static_cast<int32_t>(lhs)] < m_position[static_cast<int32_t>(rhs)];
1010 }
1011 
const_iterator(AllocationArgs & args,IterPos pos)1012 KernelArgs::const_iterator::const_iterator(AllocationArgs& args, IterPos pos)
1013 {
1014     m_empty = args.empty();
1015     if (pos == IterPos::BEGIN)
1016     {
1017         m_major = args.begin();
1018         m_majorEnd = args.end();
1019         if (!m_empty)
1020             m_minor = (*args.begin()).second.begin();
1021     }
1022     else if (pos == IterPos::END)
1023     {
1024         m_major = args.end();
1025         m_majorEnd = args.end();
1026         if (!m_empty)
1027             m_minor = (*(--args.end())).second.end();
1028     }
1029 }
1030 
operator ++()1031 KernelArgs::const_iterator& KernelArgs::const_iterator::operator++()
1032 {
1033     IGC_ASSERT(!m_empty);
1034     ++m_minor;
1035 
1036     if (m_minor == (*m_major).second.end())
1037     {
1038         ++m_major;
1039         if (m_major != m_majorEnd)
1040         {
1041             m_minor = (*m_major).second.begin();
1042         }
1043     }
1044 
1045     return *this;
1046 }
1047 
operator *()1048 const KernelArg& KernelArgs::const_iterator::operator*()
1049 {
1050     IGC_ASSERT(!m_empty);
1051     return *m_minor;
1052 }
1053 
operator !=(const const_iterator & iterator)1054 bool KernelArgs::const_iterator::operator!=(const const_iterator& iterator)
1055 {
1056     if (m_empty)
1057         return (m_major != iterator.m_major);
1058     else
1059         return (m_major != iterator.m_major) || (m_minor != iterator.m_minor);
1060 }
1061 
operator ==(const const_iterator & iterator)1062 bool KernelArgs::const_iterator::operator==(const const_iterator& iterator)
1063 {
1064     if (m_empty)
1065         return (m_major == iterator.m_major);
1066     else
1067         return (m_major == iterator.m_major) && (m_minor == iterator.m_minor);
1068 }
1069 
KernelArgs(const Function & F,const DataLayout * DL,MetaDataUtils * pMdUtils,ModuleMetaData * moduleMD,unsigned int GRFSize,KernelArgsOrder::InputType layout)1070 KernelArgs::KernelArgs(const Function& F, const DataLayout* DL, MetaDataUtils* pMdUtils, ModuleMetaData* moduleMD, unsigned int GRFSize, KernelArgsOrder::InputType layout)
1071     : m_KernelArgsOrder(layout),
1072     m_args(m_KernelArgsOrder)
1073 {
1074     ImplicitArgs implicitArgs(F, pMdUtils);
1075     const unsigned int numImplicitArgs = implicitArgs.size();
1076     const unsigned int numRuntimeValue = moduleMD ? moduleMD->pushInfo.constantReg.size() : 0;
1077     IGC_ASSERT_MESSAGE(F.arg_size() >= (numImplicitArgs + numRuntimeValue), "Function arg size does not match meta data args.");
1078     const unsigned int numExplicitArgs = F.arg_size() - numImplicitArgs - numRuntimeValue;
1079     llvm::Function::const_arg_iterator funcArg = F.arg_begin();
1080 
1081     FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(const_cast<llvm::Function*>(&F));
1082     // Explicit function args
1083     for (unsigned int i = 0, e = numExplicitArgs; i < e; ++i, ++funcArg)
1084     {
1085         bool needAllocation = false;
1086         if (moduleMD && moduleMD->UseBindlessImage)
1087         {
1088             // Check for bindless images which require allocation
1089             FunctionMetaData* funcMD = &moduleMD->FuncMD[const_cast<llvm::Function*>(&F)];
1090             ResourceAllocMD* resAllocMD = &funcMD->resAllocMD;
1091             if (resAllocMD->argAllocMDList.size() > funcArg->getArgNo())
1092             {
1093                 ArgAllocMD* argAlloc = &resAllocMD->argAllocMDList[funcArg->getArgNo()];
1094                 if (argAlloc->type == ResourceTypeEnum::BindlessUAVResourceType ||
1095                     argAlloc->type == ResourceTypeEnum::BindlessSamplerResourceType)
1096                 {
1097                     needAllocation = !funcArg->use_empty();
1098                 }
1099             }
1100         }
1101 
1102         int location_index = -1;
1103         int location_count = -1;
1104         bool is_emulation_argument = false;
1105 
1106         auto it = moduleMD->FuncMD.find(const_cast<Function*>(&F));
1107         if (it != moduleMD->FuncMD.end())
1108         {
1109             if (it->second.funcArgs.size() > (unsigned)i)
1110             {
1111                 location_index = it->second.funcArgs[i].bufferLocationIndex;
1112                 location_count = it->second.funcArgs[i].bufferLocationCount;
1113                 is_emulation_argument = it->second.funcArgs[i].isEmulationArg;
1114             }
1115         }
1116 
1117         std::string argBaseType = "";
1118         std::string argAccessQualItem = "";
1119 
1120         if (it != moduleMD->FuncMD.end())
1121         {
1122             if (it->second.m_OpenCLArgBaseTypes.size() > (unsigned)i)
1123                 argBaseType = it->second.m_OpenCLArgBaseTypes[i];
1124             if (it->second.m_OpenCLArgAccessQualifiers.size() > (unsigned)i)
1125                 argAccessQualItem = it->second.m_OpenCLArgAccessQualifiers[i];
1126         }
1127 
1128         KernelArg kernelArg = KernelArg(
1129             &(*funcArg),
1130             DL,
1131             argBaseType,
1132             argAccessQualItem,
1133             location_index,
1134             location_count,
1135             needAllocation,
1136             is_emulation_argument);
1137 
1138         if ((kernelArg.getArgType() == KernelArg::ArgType::IMAGE_3D ||
1139             kernelArg.getArgType() == KernelArg::ArgType::BINDLESS_IMAGE_3D) &&
1140             funcInfoMD->isArgInfoListHasValue()) {
1141             for (auto AI = funcInfoMD->begin_ArgInfoList(), AE = funcInfoMD->end_ArgInfoList(); AI != AE; ++AI) {
1142                 ArgInfoMetaDataHandle argInfo = *AI;
1143                 if (argInfo->getExplicitArgNum() == i) {
1144                     if (argInfo->isImgAccessFloatCoordsHasValue() && argInfo->isImgAccessIntCoordsHasValue()) {
1145                         kernelArg.setImgAccessedFloatCoords(argInfo->getImgAccessFloatCoords());
1146                         kernelArg.setImgAccessedIntCoords(argInfo->getImgAccessIntCoords());
1147                         break;
1148                     }
1149                 }
1150             }
1151         }
1152 
1153         addAllocationArg(kernelArg);
1154     }
1155 
1156     // Implicit function args
1157     for (unsigned int i = 0; i < numImplicitArgs; ++i, ++funcArg)
1158     {
1159         KernelArg kernelArg = KernelArg(implicitArgs[i], DL, &(*funcArg), implicitArgs.getExplicitArgNum(i), implicitArgs.getStructArgOffset(i), GRFSize);
1160         addAllocationArg(kernelArg);
1161     }
1162 
1163     // Need to add Runtime Values, so they can trigger NOSBuffer allocation in correct
1164     // order (especially needed when InputType::INDEPENDENT or InputType::CURBE is used).
1165     for (unsigned int i = 0; i < numRuntimeValue; ++i, ++funcArg)
1166     {
1167         KernelArg kernelArg = KernelArg(
1168             KernelArg::ArgType::RUNTIME_VALUE,      // argType
1169             KernelArg::AccessQual::NONE,            // accessQual
1170             4,                                      // allocateSize
1171             4,                                      // elemAllocateSize
1172             4,                                      // align
1173             true,                                   // isConstantBuf
1174             &(*funcArg),                            // arg
1175             numExplicitArgs + numImplicitArgs + 1); // associatedArgNo
1176         addAllocationArg(kernelArg);
1177     }
1178 }
1179 
addAllocationArg(KernelArg & kernelArg)1180 void KernelArgs::addAllocationArg(KernelArg& kernelArg)
1181 {
1182     KernelArg::ArgType argType = kernelArg.getArgType();
1183 
1184     // Add to the allocation arguments of this type
1185     m_args[argType].push_back(kernelArg);
1186 }
1187 
begin()1188 KernelArgs::const_iterator KernelArgs::begin()
1189 {
1190     return const_iterator(m_args, KernelArgs::const_iterator::IterPos::BEGIN);
1191 }
1192 
end()1193 KernelArgs::const_iterator KernelArgs::end()
1194 {
1195     return const_iterator(m_args, KernelArgs::const_iterator::IterPos::END);
1196 }
1197 
checkForZeroPerThreadData()1198 void KernelArgs::checkForZeroPerThreadData()
1199 {
1200 
1201     // On SKL, when we use Indirect thread payload, Spec says:
1202     // if Cross-Thread Constant Data Read Length for Indirect is greater than 0,
1203     // then Per thread data field must also be greater than 0.
1204     // In that case we allocate one blank payload grf for Per thread constant.
1205 
1206         // if PTD == 0 && CTCD > 0 then we would need to allocate a dummy argument to occupy a single GRF in a PTD
1207         // PTD 1 && CTCD > 0 is perfectly OK
1208     int PerThreadData = 0;
1209     bool HWWAForZeroLengthPTDRequired = true;
1210     for (AllocationArgs::const_iterator i = m_args.begin(), e = m_args.end(); i != e; ++i)
1211     {
1212         const KernelArg* arg = i->second.data();
1213         if (arg->needsAllocation() && !arg->isConstantBuf())
1214         {
1215             if (++PerThreadData > 0 + 1 /* IMPLICIT_R0 */)
1216             {
1217                 HWWAForZeroLengthPTDRequired = false;
1218                 break;
1219             }
1220         }
1221     }
1222     if (HWWAForZeroLengthPTDRequired)
1223     {
1224         KernelArg kernelArg = KernelArg(KernelArg::ArgType::R1, KernelArg::AccessQual::NONE, 32, 4, 32, false, nullptr, 0);
1225         addAllocationArg(kernelArg);
1226     }
1227 }
1228 
empty()1229 bool KernelArgs::empty()
1230 {
1231     return m_args.empty() ? true : begin() == end();
1232 }
1233 
1234