1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
10 #include "AdaptorCommon/ImplicitArgs.hpp"
11 #include "llvmWrapper/IR/DerivedTypes.h"
12 #include "common/LLVMWarningsPush.hpp"
13 #include <llvm/IR/Argument.h>
14 #include <llvm/IR/DataLayout.h>
15 #include <llvm/IR/DerivedTypes.h>
16 #include <llvm/IR/Function.h>
17 #include <llvm/IR/Metadata.h>
18 #include <llvm/IR/Module.h>
19 #include "common/LLVMWarningsPop.hpp"
20 #include "Probe/Assertion.h"
21
22 using namespace IGC;
23 using namespace IGC::IGCMD;
24 using namespace llvm;
25
KernelArg(KernelArg::ArgType argType,KernelArg::AccessQual accessQual,unsigned int allocateSize,unsigned int elemAllocateSize,size_t align,bool isConstantBuf,const llvm::Argument * arg,unsigned int associatedArgNo)26 KernelArg::KernelArg(KernelArg::ArgType argType, KernelArg::AccessQual accessQual, unsigned int allocateSize, unsigned int elemAllocateSize, size_t align, bool isConstantBuf, const llvm::Argument* arg, unsigned int associatedArgNo) :
27 m_implicitArgument(false),
28 m_argType(argType),
29 m_accessQual(accessQual),
30 m_allocateSize(allocateSize), // in BYTES
31 m_elemAllocateSize(elemAllocateSize),
32 m_align(align),
33 m_isConstantBuf(isConstantBuf),
34 m_arg(arg),
35 m_associatedArgNo(associatedArgNo),
36 m_structArgOffset(-1),
37 m_locationIndex(-1),
38 m_locationCount(-1),
39 m_needsAllocation(typeAlwaysNeedsAllocation()),
40 m_isEmulationArgument(false),
41 m_imageInfo({ false, false })
42 {
43 }
44
KernelArg(const Argument * arg,const DataLayout * DL,const StringRef typeStr,const StringRef qualStr,int location_index,int location_count,bool needBindlessHandle,bool isEmulationArgument)45 KernelArg::KernelArg(const Argument* arg, const DataLayout* DL, const StringRef typeStr, const StringRef qualStr, int location_index, int location_count, bool needBindlessHandle, bool isEmulationArgument) :
46 m_implicitArgument(false),
47 m_argType(calcArgType(arg, typeStr)),
48 m_accessQual(calcAccessQual(arg, qualStr)),
49 // Only explicit arguments that need allocation are part of the constant buffer
50 m_isConstantBuf(needBindlessHandle || typeAlwaysNeedsAllocation()),
51 m_arg(arg),
52 m_associatedArgNo(arg->getArgNo()),
53 m_structArgOffset(-1),
54 m_locationIndex(location_index),
55 m_locationCount(location_count),
56 m_needsAllocation(needBindlessHandle || typeAlwaysNeedsAllocation()),
57 m_isEmulationArgument(isEmulationArgument),
58 m_imageInfo({ false, false })
59 {
60 m_allocateSize = calcAllocateSize(arg, DL);
61 m_elemAllocateSize = calcElemAllocateSize(arg, DL);
62 m_align = calcAlignment(arg, DL);
63 }
64
KernelArg(const ImplicitArg & implicitArg,const DataLayout * DL,const Argument * arg,unsigned int ExplicitArgNo,unsigned int structArgOffset,unsigned int GRFSize)65 KernelArg::KernelArg(const ImplicitArg& implicitArg, const DataLayout* DL, const Argument* arg, unsigned int ExplicitArgNo, unsigned int structArgOffset, unsigned int GRFSize) :
66 m_implicitArgument(true),
67 m_argType(calcArgType(implicitArg)),
68 m_accessQual(AccessQual::NONE),
69 m_allocateSize(implicitArg.getAllocateSize(*DL)),
70 m_align(implicitArg.getAlignment(*DL)),
71 m_isConstantBuf(implicitArg.isConstantBuf()),
72 m_arg(arg),
73 m_associatedArgNo(calcAssociatedArgNo(implicitArg, arg, ExplicitArgNo)),
74 m_structArgOffset(structArgOffset),
75 m_locationIndex(-1),
76 m_locationCount(-1),
77 m_needsAllocation(typeAlwaysNeedsAllocation()),
78 m_isEmulationArgument(false),
79 m_imageInfo({ false, false })
80 {
81 IGC_ASSERT(implicitArg.getNumberElements());
82
83 m_elemAllocateSize = m_allocateSize / implicitArg.getNumberElements();
84 if (implicitArg.isLocalIDs() && GRFSize == 64)
85 {
86 m_elemAllocateSize = m_allocateSize / (GRFSize / 2);
87 }
88 }
89
calcAllocateSize(const Argument * arg,const DataLayout * DL) const90 unsigned int KernelArg::calcAllocateSize(const Argument* arg, const DataLayout* DL) const
91 {
92 if (!needsAllocation()) return 0;
93
94 return int_cast<unsigned int>(DL->getTypeAllocSize(arg->getType()));
95 }
96
calcAlignment(const Argument * arg,const DataLayout * DL) const97 unsigned int KernelArg::calcAlignment(const Argument* arg, const DataLayout* DL) const
98 {
99 // If we don't need to allocate, we certainly don't need alignment
100 if (!needsAllocation()) return 0;
101
102 Type* typeToAlign = arg->getType();
103 // Usually, we return the alignment of the parameter type.
104 // For local pointers, we need the alignment of the *contained* type.
105 if (m_argType == ArgType::PTR_LOCAL)
106 {
107 typeToAlign = cast<PointerType>(typeToAlign)->getElementType();
108 }
109
110 return DL->getABITypeAlignment(typeToAlign);
111 }
112
calcElemAllocateSize(const Argument * arg,const DataLayout * DL) const113 unsigned int KernelArg::calcElemAllocateSize(const Argument* arg, const DataLayout* DL) const
114 {
115 if (!needsAllocation()) return 0;
116
117 return int_cast<unsigned int>(DL->getTypeAllocSize(arg->getType()->getScalarType()));
118 }
119
120 // First member of pair is ArgType of buffer.
121 // When ArgType is SAMPLER, second member should be true.
122 // When ArgType is NOT_TO_ALLOCATE, second member should be false.
123 // ArgType enum uses same values for SAMPLER and NOT_TO_ALLOCATE.
124 // This function helps disambiguate between the two values.
getBufferType(const Argument * arg,const StringRef typeStr)125 KernelArg::BufferArgType KernelArg::getBufferType(const Argument* arg, const StringRef typeStr)
126 {
127 if (arg->getType()->getTypeID() != Type::PointerTyID)
128 return { KernelArg::ArgType::SAMPLER, true };
129
130 PointerType* ptrType = cast<PointerType>(arg->getType());
131
132 int address_space = ptrType->getPointerAddressSpace();
133 bool directIdx = false;
134 unsigned int bufId = 0;
135 BufferType bufType = DecodeAS4GFXResource(address_space, directIdx, bufId);
136
137 // Check if this arg is an image
138 if (bufType == BufferType::UAV)
139 {
140 ArgType imgArgType;
141 // Check if argument is image
142 if (isImage(arg, typeStr, imgArgType)) return { imgArgType, false };
143 }
144 else if (bufType == BufferType::SAMPLER)
145 return { KernelArg::ArgType::SAMPLER, true };
146
147 return { KernelArg::ArgType::NOT_TO_ALLOCATE, false };
148 }
149
calcArgType(const Argument * arg,const StringRef typeStr)150 KernelArg::ArgType KernelArg::calcArgType(const Argument* arg, const StringRef typeStr)
151 {
152 switch (arg->getType()->getTypeID())
153 {
154
155 case Type::PointerTyID:
156 {
157 PointerType* ptrType = cast<PointerType>(arg->getType());
158
159 // Check for pointer address space
160 switch (ptrType->getAddressSpace())
161 {
162 case ADDRESS_SPACE_PRIVATE:
163 {
164
165 Type* type = arg->getType();
166 if (typeStr.equals("queue_t") || typeStr.equals("spirv.Queue"))
167 {
168 return KernelArg::ArgType::PTR_DEVICE_QUEUE;
169 }
170 else if (arg->hasByValAttr() &&
171 type->isPointerTy() &&
172 type->getPointerElementType()->isStructTy())
173 {
174 // Pass by value structs will show up as private pointer
175 // arguments in the function signiture.
176 return KernelArg::ArgType::STRUCT;
177 }
178 else
179 {
180 return KernelArg::ArgType::IMPLICIT_PRIVATE_BASE;
181 }
182 }
183
184 case ADDRESS_SPACE_GLOBAL:
185 {
186 ArgType imgArgType;
187 // Check if argument is image
188 if (isImage(arg, typeStr, imgArgType)) return imgArgType;
189 }
190 return KernelArg::ArgType::PTR_GLOBAL;
191
192 case ADDRESS_SPACE_CONSTANT:
193 // Bindless samplers are stored in addrspace(2)
194 if (isSampler(arg, typeStr))
195 return KernelArg::ArgType::SAMPLER;
196 else if (isBindlessSampler(arg, typeStr))
197 return KernelArg::ArgType::BINDLESS_SAMPLER;
198
199 return KernelArg::ArgType::PTR_CONSTANT;
200 case ADDRESS_SPACE_LOCAL:
201 return KernelArg::ArgType::PTR_LOCAL;
202
203 default:
204 #if 0
205 // Need to disable this assertion for two-phase-inlining, i.e.
206 // kernel arguments will be used for subroutines, which may
207 // have arguments from other address spaces. It is unfortunate
208 // that we cannot run ResourceAllocator only on kernels since
209 // BuiltinsConverter checks caller's resource allocation info.
210 //
211 // For the final codegen, this allocation info is only queried
212 // for kernels. This should not affect correctness, but a waste
213 // on subroutines.
214 //
215 // FIXME: There is a chain of dependency.
216 IGC_ASSERT_MESSAGE(0, "Unrecognized address space");
217 #endif
218 // This is a buffer. Try to decode this
219 return getBufferType(arg, typeStr).type;
220 }
221 }
222 case Type::IntegerTyID:
223 // Check if argument is sampler
224 if (isSampler(arg, typeStr)) return KernelArg::ArgType::SAMPLER;
225 // Fall through to default
226
227 default:
228 // May reach here from Type::IntegerTyID
229 return KernelArg::ArgType::CONSTANT_REG;
230 }
231 }
232
calcArgType(const ImplicitArg & arg) const233 KernelArg::ArgType KernelArg::calcArgType(const ImplicitArg& arg) const
234 {
235 switch (arg.getArgType())
236 {
237 case ImplicitArg::R0:
238 return KernelArg::ArgType::IMPLICIT_R0;
239 case ImplicitArg::PAYLOAD_HEADER:
240 return KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER;
241 case ImplicitArg::PRIVATE_BASE:
242 return KernelArg::ArgType::IMPLICIT_PRIVATE_BASE;
243 case ImplicitArg::CONSTANT_BASE:
244 return KernelArg::ArgType::IMPLICIT_CONSTANT_BASE;
245 case ImplicitArg::PRINTF_BUFFER:
246 return KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER;
247 case ImplicitArg::SYNC_BUFFER:
248 return KernelArg::ArgType::IMPLICIT_SYNC_BUFFER;
249 case ImplicitArg::BUFFER_OFFSET:
250 return KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET;
251 case ImplicitArg::GLOBAL_BASE:
252 return KernelArg::ArgType::IMPLICIT_GLOBAL_BASE;
253 case ImplicitArg::WORK_DIM:
254 return KernelArg::ArgType::IMPLICIT_WORK_DIM;
255 case ImplicitArg::NUM_GROUPS:
256 return KernelArg::ArgType::IMPLICIT_NUM_GROUPS;
257 case ImplicitArg::GLOBAL_SIZE:
258 return KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE;
259 case ImplicitArg::LOCAL_SIZE:
260 return KernelArg::ArgType::IMPLICIT_LOCAL_SIZE;
261 case ImplicitArg::ENQUEUED_LOCAL_WORK_SIZE:
262 return KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE;
263 case ImplicitArg::LOCAL_ID_X:
264 // fall through until LOCAL_ID_Z
265 case ImplicitArg::LOCAL_ID_Y:
266 // fall through until LOCAL_ID_Z
267 case ImplicitArg::LOCAL_ID_Z:
268 return KernelArg::ArgType::IMPLICIT_LOCAL_IDS;
269 case ImplicitArg::STAGE_IN_GRID_ORIGIN:
270 return KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN;
271 case ImplicitArg::STAGE_IN_GRID_SIZE:
272 return KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE;
273 case ImplicitArg::CONSTANT_REG_FP32:
274 return KernelArg::ArgType::CONSTANT_REG;
275 case ImplicitArg::CONSTANT_REG_QWORD:
276 return KernelArg::ArgType::CONSTANT_REG;
277 case ImplicitArg::CONSTANT_REG_DWORD:
278 return KernelArg::ArgType::CONSTANT_REG;
279 case ImplicitArg::CONSTANT_REG_WORD:
280 return KernelArg::ArgType::CONSTANT_REG;
281 case ImplicitArg::CONSTANT_REG_BYTE:
282 return KernelArg::ArgType::CONSTANT_REG;
283
284 case ImplicitArg::IMAGE_HEIGHT:
285 return KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT;
286 case ImplicitArg::IMAGE_WIDTH:
287 return KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH;
288 case ImplicitArg::IMAGE_DEPTH:
289 return KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH;
290 case ImplicitArg::IMAGE_NUM_MIP_LEVELS:
291 return KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS;
292 case ImplicitArg::IMAGE_CHANNEL_DATA_TYPE:
293 return KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE;
294 case ImplicitArg::IMAGE_CHANNEL_ORDER:
295 return KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER;
296 case ImplicitArg::IMAGE_SRGB_CHANNEL_ORDER:
297 return KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER;
298 case ImplicitArg::IMAGE_ARRAY_SIZE:
299 return KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE;
300 case ImplicitArg::IMAGE_NUM_SAMPLES:
301 return KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES;
302 case ImplicitArg::SAMPLER_ADDRESS:
303 return KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS;
304 case ImplicitArg::SAMPLER_NORMALIZED:
305 return KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED;
306 case ImplicitArg::SAMPLER_SNAP_WA:
307 return KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA;
308 case ImplicitArg::FLAT_IMAGE_BASEOFFSET:
309 return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET;
310 case ImplicitArg::FLAT_IMAGE_HEIGHT:
311 return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT;
312 case ImplicitArg::FLAT_IMAGE_WIDTH:
313 return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH;
314 case ImplicitArg::FLAT_IMAGE_PITCH:
315 return KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH;
316
317 case ImplicitArg::VME_MB_BLOCK_TYPE:
318 return KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE;
319 case ImplicitArg::VME_SUBPIXEL_MODE:
320 return KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE;
321 case ImplicitArg::VME_SAD_ADJUST_MODE:
322 return KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE;
323 case ImplicitArg::VME_SEARCH_PATH_TYPE:
324 return KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE;
325
326 case ImplicitArg::DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE:
327 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE;
328 case ImplicitArg::DEVICE_ENQUEUE_EVENT_POOL:
329 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL;
330 case ImplicitArg::DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE:
331 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE;
332 case ImplicitArg::DEVICE_ENQUEUE_PARENT_EVENT:
333 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT;
334 case ImplicitArg::DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE:
335 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE;
336 case ImplicitArg::GET_OBJECT_ID:
337 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID;
338 case ImplicitArg::GET_BLOCK_SIMD_SIZE:
339 return KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE;
340
341
342 case ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS:
343 return KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS;
344 case ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_SIZE:
345 return KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE;
346 case ImplicitArg::PRIVATE_MEMORY_STATELESS_SIZE:
347 return KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE;
348 case ImplicitArg::BINDLESS_OFFSET:
349 return KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET;
350
351 case ImplicitArg::IMPLICIT_ARG_BUFFER_PTR:
352 return KernelArg::ArgType::IMPLICIT_ARG_BUFFER;
353 case ImplicitArg::IMPLICIT_ARG_LOCALID:
354 return KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER;
355 default:
356 return KernelArg::ArgType::NOT_TO_ALLOCATE;
357 }
358 }
359
calcAccessQual(const Argument * arg,const StringRef qualStr) const360 KernelArg::AccessQual KernelArg::calcAccessQual(const Argument* arg, const StringRef qualStr) const
361 {
362 if (qualStr.equals("read_write"))
363 return READ_WRITE;
364
365 if (qualStr.startswith("read"))
366 return READ_ONLY;
367
368 if (qualStr.startswith("write"))
369 return WRITE_ONLY;
370
371 return NONE;
372 }
373
calcAssociatedArgNo(const ImplicitArg & implicitArg,const Argument * arg,unsigned int ExplicitArgNo) const374 unsigned int KernelArg::calcAssociatedArgNo(const ImplicitArg& implicitArg, const Argument* arg, unsigned int ExplicitArgNo) const
375 {
376 ImplicitArg::ArgType argType = implicitArg.getArgType();
377 if ((ImplicitArgs::isImplicitImage(argType)) ||
378 (ImplicitArgs::isImplicitStruct(argType)) ||
379 (argType == ImplicitArg::GET_OBJECT_ID) ||
380 (argType == ImplicitArg::GET_BLOCK_SIMD_SIZE) ||
381 (argType == ImplicitArg::BUFFER_OFFSET) ||
382 (argType == ImplicitArg::BINDLESS_OFFSET)
383 )
384 {
385 // For implicit image and sampler and struct arguments and buffer offset,
386 // the implicit arg's value represents the index of the associated
387 // image/sampler/pointer argument
388 return ExplicitArgNo;
389 }
390 return arg->getArgNo();
391 }
392
getNumComponents() const393 unsigned int KernelArg::getNumComponents() const
394 {
395 if (IGCLLVM::FixedVectorType * vecType = dyn_cast<IGCLLVM::FixedVectorType>(m_arg->getType()))
396 {
397 // Vector
398 return int_cast<unsigned int>(vecType->getNumElements());
399 }
400
401 // Scalar
402 return 1;
403 }
404
getAlignment() const405 size_t KernelArg::getAlignment() const
406 {
407 return m_align;
408 }
409
getAllocateSize() const410 unsigned int KernelArg::getAllocateSize() const
411 {
412 return int_cast<unsigned int>(llvm::alignTo(m_allocateSize, iOpenCL::DATA_PARAMETER_DATA_SIZE));
413 }
414
getElemAllocateSize() const415 unsigned int KernelArg::getElemAllocateSize() const
416 {
417 return m_elemAllocateSize;
418 }
419
isConstantBuf() const420 bool KernelArg::isConstantBuf() const
421 {
422 return m_isConstantBuf;
423 }
424
typeAlwaysNeedsAllocation() const425 bool KernelArg::typeAlwaysNeedsAllocation() const
426 {
427 return m_argType < KernelArg::ArgType::NOT_TO_ALLOCATE;
428 }
429
needsAllocation() const430 bool KernelArg::needsAllocation() const
431 {
432 return m_needsAllocation;
433 }
434
getArgType() const435 KernelArg::ArgType KernelArg::getArgType() const {
436 return m_argType;
437 }
438
getAccessQual() const439 KernelArg::AccessQual KernelArg::getAccessQual() const
440 {
441 return m_accessQual;
442 }
443
getArg() const444 const Argument* KernelArg::getArg() const
445 {
446 return m_arg;
447 }
448
getAssociatedArgNo() const449 unsigned int KernelArg::getAssociatedArgNo() const
450 {
451 return m_associatedArgNo;
452 }
453
getStructArgOffset() const454 unsigned int KernelArg::getStructArgOffset() const
455 {
456 return m_structArgOffset;
457 }
458
getLocationCount() const459 unsigned int KernelArg::getLocationCount() const
460 {
461 return m_locationCount;
462 }
463
getLocationIndex() const464 unsigned int KernelArg::getLocationIndex() const
465 {
466 return m_locationIndex;
467 }
468
isImage(const Argument * arg,const StringRef typeStr,ArgType & imageArgType)469 bool KernelArg::isImage(const Argument* arg, const StringRef typeStr, ArgType& imageArgType)
470 {
471 if (!typeStr.startswith("image") && !typeStr.startswith("bindless"))
472 return false;
473
474 // Get the original OpenCL type from the metadata and check if it's an image
475 // clang 3.8 introduced a new type mangling that includes the image access qualifier.
476 // Accept those too.
477 std::vector<std::string> accessQual{ "_t", "_ro_t", "_wo_t", "_rw_t" };
478 for (auto& postfix : accessQual)
479 {
480 if (typeStr.equals("image1d" + postfix))
481 {
482 imageArgType = ArgType::IMAGE_1D;
483 return true;
484 }
485
486 if (typeStr.equals("image1d_buffer" + postfix))
487 {
488 imageArgType = ArgType::IMAGE_1D_BUFFER;
489 return true;
490 }
491
492 if (typeStr.equals("image2d" + postfix))
493 {
494 imageArgType = ArgType::IMAGE_2D;
495 return true;
496 }
497
498 if (typeStr.equals("image2d_depth" + postfix))
499 {
500 imageArgType = ArgType::IMAGE_2D_DEPTH;
501 return true;
502 }
503
504 if (typeStr.equals("image2d_msaa" + postfix))
505 {
506 imageArgType = ArgType::IMAGE_2D_MSAA;
507 return true;
508 }
509
510 if (typeStr.equals("image2d_msaa_depth" + postfix))
511 {
512 imageArgType = ArgType::IMAGE_2D_MSAA_DEPTH;
513 return true;
514 }
515
516 if (typeStr.equals("image3d" + postfix))
517 {
518 imageArgType = ArgType::IMAGE_3D;
519 return true;
520 }
521
522 if (typeStr.equals("image1d_array" + postfix))
523 {
524 imageArgType = ArgType::IMAGE_1D_ARRAY;
525 return true;
526 }
527
528 if (typeStr.equals("image2d_array" + postfix))
529 {
530 imageArgType = ArgType::IMAGE_2D_ARRAY;
531 return true;
532 }
533
534 if (typeStr.equals("image2d_array_depth" + postfix))
535 {
536 imageArgType = ArgType::IMAGE_2D_DEPTH_ARRAY;
537 return true;
538 }
539
540 if (typeStr.equals("image2d_array_msaa" + postfix))
541 {
542 imageArgType = ArgType::IMAGE_2D_MSAA_ARRAY;
543 return true;
544 }
545
546 if (typeStr.equals("image2d_array_msaa_depth" + postfix))
547 {
548 imageArgType = ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY;
549 return true;
550 }
551 }
552
553 // See if these are address space decoded args.
554 // Get the original OpenCL type from the metadata and check if it's an image
555 if (typeStr.equals("bindless_image1d_t"))
556 {
557 imageArgType = ArgType::BINDLESS_IMAGE_1D;
558 return true;
559 }
560
561 if (typeStr.equals("bindless_image1d_buffer_t"))
562 {
563 imageArgType = ArgType::BINDLESS_IMAGE_1D_BUFFER;
564 return true;
565 }
566
567 if (typeStr.equals("bindless_image2d_t"))
568 {
569 imageArgType = ArgType::BINDLESS_IMAGE_2D;
570 return true;
571 }
572
573 if (typeStr.equals("bindless_image2d_depth_t"))
574 {
575 imageArgType = ArgType::BINDLESS_IMAGE_2D_DEPTH;
576 return true;
577 }
578
579 if (typeStr.equals("bindless_image2d_msaa_t"))
580 {
581 imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA;
582 return true;
583 }
584
585 if (typeStr.equals("bindless_image2d_msaa_depth_t"))
586 {
587 imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH;
588 return true;
589 }
590
591 if (typeStr.equals("bindless_image3d_t"))
592 {
593 imageArgType = ArgType::BINDLESS_IMAGE_3D;
594 return true;
595 }
596
597 if (typeStr.equals("bindless_image_cube_array_t"))
598 {
599 imageArgType = ArgType::BINDLESS_IMAGE_CUBE_ARRAY;
600 return true;
601 }
602
603 if (typeStr.equals("bindless_image_cube_t"))
604 {
605 imageArgType = ArgType::BINDLESS_IMAGE_CUBE;
606 return true;
607 }
608
609 if (typeStr.equals("bindless_image1d_array_t"))
610 {
611 imageArgType = ArgType::BINDLESS_IMAGE_1D_ARRAY;
612 return true;
613 }
614
615 if (typeStr.equals("bindless_image2d_array_t"))
616 {
617 imageArgType = ArgType::BINDLESS_IMAGE_2D_ARRAY;
618 return true;
619 }
620
621 if (typeStr.equals("bindless_image2d_array_depth_t"))
622 {
623 imageArgType = ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY;
624 return true;
625 }
626
627 if (typeStr.equals("bindless_image2d_array_msaa_t"))
628 {
629 imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY;
630 return true;
631 }
632
633 if (typeStr.equals("bindless_image2d_array_msaa_depth_t"))
634 {
635 imageArgType = ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY;
636 return true;
637 }
638
639 if (typeStr.equals("bindless_image_cube_array_depth_t"))
640 {
641 imageArgType = ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY;
642 return true;
643 }
644
645 if (typeStr.equals("bindless_image_cube_depth_t"))
646 {
647 imageArgType = ArgType::BINDLESS_IMAGE_CUBE_DEPTH;
648 return true;
649 }
650
651 return false;
652 }
653
isSampler(const Argument * arg,const StringRef typeStr)654 bool KernelArg::isSampler(const Argument* arg, const StringRef typeStr)
655 {
656 // Get the original OpenCL type from the metadata and check if it's a sampler
657 return (typeStr.equals("sampler_t"));
658 }
659
isBindlessSampler(const Argument * arg,const StringRef typeStr)660 bool KernelArg::isBindlessSampler(const Argument* arg, const StringRef typeStr)
661 {
662 return (typeStr.equals("bindless_sampler_t"));
663 }
664
getDataParamToken() const665 iOpenCL::DATA_PARAMETER_TOKEN KernelArg::getDataParamToken() const
666 {
667 auto iter = argTypeTokenMap.find(m_argType);
668 if (iter == argTypeTokenMap.end())
669 {
670 return iOpenCL::DATA_PARAMETER_TOKEN_UNKNOWN;
671 }
672
673 return iter->second;
674 }
675
676
initArgTypeTokenMap()677 std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> initArgTypeTokenMap()
678 {
679 std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> map
680 {
681 { KernelArg::ArgType::IMPLICIT_LOCAL_IDS, iOpenCL::DATA_PARAMETER_LOCAL_ID },
682 { KernelArg::ArgType::IMPLICIT_WORK_DIM, iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS },
683 { KernelArg::ArgType::IMPLICIT_NUM_GROUPS, iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS },
684 { KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE },
685 { KernelArg::ArgType::IMPLICIT_LOCAL_SIZE, iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE },
686 { KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN, iOpenCL::DATA_PARAMETER_STAGE_IN_GRID_ORIGIN },
687 { KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE, iOpenCL::DATA_PARAMETER_STAGE_IN_GRID_SIZE },
688 { KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE },
689
690 { KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT },
691 { KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_IMAGE_WIDTH },
692 { KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH, iOpenCL::DATA_PARAMETER_IMAGE_DEPTH },
693 { KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS, iOpenCL::DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS },
694 { KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE },
695 { KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER },
696 { KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER },
697 { KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE, iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE },
698 { KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES, iOpenCL::DATA_PARAMETER_IMAGE_NUM_SAMPLES },
699 { KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS, iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE },
700 { KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED, iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS },
701 { KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA, iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED },
702 { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_BASEOFFSET },
703 { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_HEIGHT },
704 { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_WIDTH },
705 { KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH, iOpenCL::DATA_PARAMETER_FLAT_IMAGE_PITCH },
706
707 { KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE, iOpenCL::DATA_PARAMETER_VME_MB_BLOCK_TYPE },
708 { KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE, iOpenCL::DATA_PARAMETER_VME_SUBPIXEL_MODE },
709 { KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE, iOpenCL::DATA_PARAMETER_VME_SAD_ADJUST_MODE },
710 { KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE, iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE },
711
712 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE, iOpenCL::DATA_PARAMETER_VME_SEARCH_PATH_TYPE },
713 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL, iOpenCL::DATA_PARAMETER_PARENT_EVENT },
714
715 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE, iOpenCL::DATA_PARAMETER_MAX_WORKGROUP_SIZE },
716 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT, iOpenCL::DATA_PARAMETER_PARENT_EVENT },
717 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE, iOpenCL::DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE },
718 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID, iOpenCL::DATA_PARAMETER_OBJECT_ID },
719 { KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE, iOpenCL::DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE },
720
721 { KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS, iOpenCL::DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS },
722 { KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE, iOpenCL::DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE },
723 { KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE, iOpenCL::DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE },
724 { KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET, iOpenCL::DATA_PARAMETER_BUFFER_OFFSET },
725 { KernelArg::ArgType::IMPLICIT_ARG_BUFFER, iOpenCL::DATA_PARAMETER_IMPL_ARG_BUFFER },
726 { KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER, iOpenCL::DATA_PARAMETER_LOCAL_ID_BUFFER }
727 };
728 return map;
729 }
730 const std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> KernelArg::argTypeTokenMap = initArgTypeTokenMap();
731
VerifyOrder(std::array<KernelArg::ArgType,static_cast<int32_t> (KernelArg::ArgType::End)> & order,KernelArg::ArgType sent)732 bool KernelArgsOrder::VerifyOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order, KernelArg::ArgType sent)
733 {
734 bool validOrder = false;
735 // It's not safe to iterate over a random generated sentinel
736 if (order[static_cast<uint>(KernelArg::ArgType::End) - 1] == sent)
737 {
738 order[static_cast<uint>(KernelArg::ArgType::End) - 1] = KernelArg::ArgType::Default;
739 validOrder = true;
740 }
741 else
742 {
743 IGC_ASSERT(0);
744 }
745
746 return validOrder;
747 }
748
TransposeGenerateOrder(std::array<KernelArg::ArgType,static_cast<int32_t> (KernelArg::ArgType::End)> & order)749 void KernelArgsOrder::TransposeGenerateOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order)
750 {
751 int i = 0;
752
753 for (const auto& j : order)
754 {
755 m_position[static_cast<uint32_t>(j)] = i++;
756 }
757 }
758
KernelArgsOrder(InputType layout)759 KernelArgsOrder::KernelArgsOrder(InputType layout)
760 {
761 const KernelArg::ArgType SENTINEL = KernelArg::ArgType::End;
762
763 switch (layout)
764 {
765 case InputType::INDEPENDENT:
766 case InputType::CURBE:
767 {
768 std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)> CURBE =
769 {
770 KernelArg::ArgType::IMPLICIT_R0,
771
772 KernelArg::ArgType::RUNTIME_VALUE,
773
774 KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER,
775
776 KernelArg::ArgType::PTR_LOCAL,
777 KernelArg::ArgType::PTR_GLOBAL,
778 KernelArg::ArgType::PTR_CONSTANT,
779 KernelArg::ArgType::PTR_DEVICE_QUEUE,
780
781 KernelArg::ArgType::CONSTANT_REG,
782
783 KernelArg::ArgType::IMPLICIT_CONSTANT_BASE,
784 KernelArg::ArgType::IMPLICIT_GLOBAL_BASE,
785 KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
786 KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
787 KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
788 KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
789 KernelArg::ArgType::IMPLICIT_WORK_DIM,
790 KernelArg::ArgType::IMPLICIT_NUM_GROUPS,
791 KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE,
792 KernelArg::ArgType::IMPLICIT_LOCAL_SIZE,
793 KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN,
794 KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
795 KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
796
797 KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET,
798
799 KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
800 KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
801 KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,
802 KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS,
803 KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE,
804 KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER,
805 KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER,
806 KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE,
807 KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES,
808 KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS,
809 KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED,
810 KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA,
811 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET,
812 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT,
813 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH,
814 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH,
815
816 KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE,
817 KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE,
818 KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE,
819 KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE,
820
821 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE,
822 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL,
823 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE,
824 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT,
825 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE,
826 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID,
827 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE,
828
829 KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS,
830 KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE,
831 KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
832
833 KernelArg::ArgType::R1,
834 KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
835
836 KernelArg::ArgType::IMPLICIT_ARG_BUFFER,
837 KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER,
838
839 KernelArg::ArgType::STRUCT,
840 KernelArg::ArgType::SAMPLER,
841 KernelArg::ArgType::IMAGE_1D,
842 KernelArg::ArgType::IMAGE_1D_BUFFER,
843 KernelArg::ArgType::IMAGE_2D,
844 KernelArg::ArgType::IMAGE_2D_DEPTH,
845 KernelArg::ArgType::IMAGE_2D_MSAA,
846 KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH,
847 KernelArg::ArgType::IMAGE_3D,
848 KernelArg::ArgType::IMAGE_CUBE,
849 KernelArg::ArgType::IMAGE_CUBE_DEPTH,
850 KernelArg::ArgType::IMAGE_1D_ARRAY,
851 KernelArg::ArgType::IMAGE_2D_ARRAY,
852 KernelArg::ArgType::IMAGE_2D_DEPTH_ARRAY,
853 KernelArg::ArgType::IMAGE_2D_MSAA_ARRAY,
854 KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY,
855 KernelArg::ArgType::IMAGE_CUBE_ARRAY,
856 KernelArg::ArgType::IMAGE_CUBE_DEPTH_ARRAY,
857
858 KernelArg::ArgType::BINDLESS_SAMPLER,
859 KernelArg::ArgType::BINDLESS_IMAGE_1D,
860 KernelArg::ArgType::BINDLESS_IMAGE_1D_BUFFER,
861 KernelArg::ArgType::BINDLESS_IMAGE_2D,
862 KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH,
863 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA,
864 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH,
865 KernelArg::ArgType::BINDLESS_IMAGE_3D,
866 KernelArg::ArgType::BINDLESS_IMAGE_CUBE,
867 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH,
868 KernelArg::ArgType::BINDLESS_IMAGE_1D_ARRAY,
869 KernelArg::ArgType::BINDLESS_IMAGE_2D_ARRAY,
870 KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY,
871 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY,
872 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY,
873 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_ARRAY,
874 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY,
875 SENTINEL,
876 };
877
878 if (VerifyOrder(CURBE, SENTINEL))
879 {
880 TransposeGenerateOrder(CURBE);
881 }
882
883 }
884 break;
885 case InputType::INDIRECT:
886 {
887 std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)> INDIRECT =
888 {
889 KernelArg::ArgType::IMPLICIT_R0,
890
891 KernelArg::ArgType::R1,
892 KernelArg::ArgType::IMPLICIT_LOCAL_IDS,
893
894 KernelArg::ArgType::RUNTIME_VALUE,
895
896 KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER,
897 KernelArg::ArgType::PTR_LOCAL,
898 KernelArg::ArgType::PTR_GLOBAL,
899 KernelArg::ArgType::PTR_CONSTANT,
900 KernelArg::ArgType::PTR_DEVICE_QUEUE,
901 KernelArg::ArgType::CONSTANT_REG,
902
903 KernelArg::ArgType::IMPLICIT_CONSTANT_BASE,
904 KernelArg::ArgType::IMPLICIT_GLOBAL_BASE,
905 KernelArg::ArgType::IMPLICIT_PRIVATE_BASE,
906 KernelArg::ArgType::IMPLICIT_PRINTF_BUFFER,
907 KernelArg::ArgType::IMPLICIT_SYNC_BUFFER,
908 KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET,
909 KernelArg::ArgType::IMPLICIT_WORK_DIM,
910 KernelArg::ArgType::IMPLICIT_NUM_GROUPS,
911 KernelArg::ArgType::IMPLICIT_GLOBAL_SIZE,
912 KernelArg::ArgType::IMPLICIT_LOCAL_SIZE,
913 KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_ORIGIN,
914 KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
915 KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
916
917 KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET,
918
919 KernelArg::ArgType::IMPLICIT_ARG_BUFFER,
920 KernelArg::ArgType::IMPLICIT_LOCAL_ID_BUFFER,
921
922 KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
923 KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
924 KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,
925 KernelArg::ArgType::IMPLICIT_IMAGE_NUM_MIP_LEVELS,
926 KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_DATA_TYPE,
927 KernelArg::ArgType::IMPLICIT_IMAGE_CHANNEL_ORDER,
928 KernelArg::ArgType::IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER,
929 KernelArg::ArgType::IMPLICIT_IMAGE_ARRAY_SIZE,
930 KernelArg::ArgType::IMPLICIT_IMAGE_NUM_SAMPLES,
931 KernelArg::ArgType::IMPLICIT_SAMPLER_ADDRESS,
932 KernelArg::ArgType::IMPLICIT_SAMPLER_NORMALIZED,
933 KernelArg::ArgType::IMPLICIT_SAMPLER_SNAP_WA,
934 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_BASEOFFSET,
935 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_HEIGHT,
936 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_WIDTH,
937 KernelArg::ArgType::IMPLICIT_FLAT_IMAGE_PITCH,
938
939 KernelArg::ArgType::IMPLICIT_VME_MB_BLOCK_TYPE,
940 KernelArg::ArgType::IMPLICIT_VME_SUBPIXEL_MODE,
941 KernelArg::ArgType::IMPLICIT_VME_SAD_ADJUST_MODE,
942 KernelArg::ArgType::IMPLICIT_VME_SEARCH_PATH_TYPE,
943
944 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE,
945 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL,
946 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE,
947 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT,
948 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE,
949 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID,
950 KernelArg::ArgType::IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE,
951
952 KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS,
953 KernelArg::ArgType::IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE,
954 KernelArg::ArgType::IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
955
956 KernelArg::ArgType::STRUCT,
957 KernelArg::ArgType::SAMPLER,
958 KernelArg::ArgType::IMAGE_1D,
959 KernelArg::ArgType::IMAGE_1D_BUFFER,
960 KernelArg::ArgType::IMAGE_2D,
961 KernelArg::ArgType::IMAGE_2D_DEPTH,
962 KernelArg::ArgType::IMAGE_2D_MSAA,
963 KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH,
964 KernelArg::ArgType::IMAGE_3D,
965 KernelArg::ArgType::IMAGE_CUBE,
966 KernelArg::ArgType::IMAGE_CUBE_DEPTH,
967 KernelArg::ArgType::IMAGE_1D_ARRAY,
968 KernelArg::ArgType::IMAGE_2D_ARRAY,
969 KernelArg::ArgType::IMAGE_2D_DEPTH_ARRAY,
970 KernelArg::ArgType::IMAGE_2D_MSAA_ARRAY,
971 KernelArg::ArgType::IMAGE_2D_MSAA_DEPTH_ARRAY,
972 KernelArg::ArgType::IMAGE_CUBE_ARRAY,
973 KernelArg::ArgType::IMAGE_CUBE_DEPTH_ARRAY,
974
975 KernelArg::ArgType::BINDLESS_SAMPLER,
976 KernelArg::ArgType::BINDLESS_IMAGE_1D,
977 KernelArg::ArgType::BINDLESS_IMAGE_1D_BUFFER,
978 KernelArg::ArgType::BINDLESS_IMAGE_2D,
979 KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH,
980 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA,
981 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH,
982 KernelArg::ArgType::BINDLESS_IMAGE_3D,
983 KernelArg::ArgType::BINDLESS_IMAGE_CUBE,
984 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH,
985 KernelArg::ArgType::BINDLESS_IMAGE_1D_ARRAY,
986 KernelArg::ArgType::BINDLESS_IMAGE_2D_ARRAY,
987 KernelArg::ArgType::BINDLESS_IMAGE_2D_DEPTH_ARRAY,
988 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_ARRAY,
989 KernelArg::ArgType::BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY,
990 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_ARRAY,
991 KernelArg::ArgType::BINDLESS_IMAGE_CUBE_DEPTH_ARRAY,
992 SENTINEL,
993 };
994
995 if (VerifyOrder(INDIRECT, SENTINEL))
996 {
997 TransposeGenerateOrder(INDIRECT);
998 }
999 }
1000 break;
1001 default:
1002 IGC_ASSERT(0);
1003 break;
1004 }
1005 }
1006
operator ()(const KernelArg::ArgType & lhs,const KernelArg::ArgType & rhs) const1007 bool KernelArgsOrder::operator()(const KernelArg::ArgType& lhs, const KernelArg::ArgType& rhs) const
1008 {
1009 return m_position[static_cast<int32_t>(lhs)] < m_position[static_cast<int32_t>(rhs)];
1010 }
1011
const_iterator(AllocationArgs & args,IterPos pos)1012 KernelArgs::const_iterator::const_iterator(AllocationArgs& args, IterPos pos)
1013 {
1014 m_empty = args.empty();
1015 if (pos == IterPos::BEGIN)
1016 {
1017 m_major = args.begin();
1018 m_majorEnd = args.end();
1019 if (!m_empty)
1020 m_minor = (*args.begin()).second.begin();
1021 }
1022 else if (pos == IterPos::END)
1023 {
1024 m_major = args.end();
1025 m_majorEnd = args.end();
1026 if (!m_empty)
1027 m_minor = (*(--args.end())).second.end();
1028 }
1029 }
1030
operator ++()1031 KernelArgs::const_iterator& KernelArgs::const_iterator::operator++()
1032 {
1033 IGC_ASSERT(!m_empty);
1034 ++m_minor;
1035
1036 if (m_minor == (*m_major).second.end())
1037 {
1038 ++m_major;
1039 if (m_major != m_majorEnd)
1040 {
1041 m_minor = (*m_major).second.begin();
1042 }
1043 }
1044
1045 return *this;
1046 }
1047
operator *()1048 const KernelArg& KernelArgs::const_iterator::operator*()
1049 {
1050 IGC_ASSERT(!m_empty);
1051 return *m_minor;
1052 }
1053
operator !=(const const_iterator & iterator)1054 bool KernelArgs::const_iterator::operator!=(const const_iterator& iterator)
1055 {
1056 if (m_empty)
1057 return (m_major != iterator.m_major);
1058 else
1059 return (m_major != iterator.m_major) || (m_minor != iterator.m_minor);
1060 }
1061
operator ==(const const_iterator & iterator)1062 bool KernelArgs::const_iterator::operator==(const const_iterator& iterator)
1063 {
1064 if (m_empty)
1065 return (m_major == iterator.m_major);
1066 else
1067 return (m_major == iterator.m_major) && (m_minor == iterator.m_minor);
1068 }
1069
KernelArgs(const Function & F,const DataLayout * DL,MetaDataUtils * pMdUtils,ModuleMetaData * moduleMD,unsigned int GRFSize,KernelArgsOrder::InputType layout)1070 KernelArgs::KernelArgs(const Function& F, const DataLayout* DL, MetaDataUtils* pMdUtils, ModuleMetaData* moduleMD, unsigned int GRFSize, KernelArgsOrder::InputType layout)
1071 : m_KernelArgsOrder(layout),
1072 m_args(m_KernelArgsOrder)
1073 {
1074 ImplicitArgs implicitArgs(F, pMdUtils);
1075 const unsigned int numImplicitArgs = implicitArgs.size();
1076 const unsigned int numRuntimeValue = moduleMD ? moduleMD->pushInfo.constantReg.size() : 0;
1077 IGC_ASSERT_MESSAGE(F.arg_size() >= (numImplicitArgs + numRuntimeValue), "Function arg size does not match meta data args.");
1078 const unsigned int numExplicitArgs = F.arg_size() - numImplicitArgs - numRuntimeValue;
1079 llvm::Function::const_arg_iterator funcArg = F.arg_begin();
1080
1081 FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(const_cast<llvm::Function*>(&F));
1082 // Explicit function args
1083 for (unsigned int i = 0, e = numExplicitArgs; i < e; ++i, ++funcArg)
1084 {
1085 bool needAllocation = false;
1086 if (moduleMD && moduleMD->UseBindlessImage)
1087 {
1088 // Check for bindless images which require allocation
1089 FunctionMetaData* funcMD = &moduleMD->FuncMD[const_cast<llvm::Function*>(&F)];
1090 ResourceAllocMD* resAllocMD = &funcMD->resAllocMD;
1091 if (resAllocMD->argAllocMDList.size() > funcArg->getArgNo())
1092 {
1093 ArgAllocMD* argAlloc = &resAllocMD->argAllocMDList[funcArg->getArgNo()];
1094 if (argAlloc->type == ResourceTypeEnum::BindlessUAVResourceType ||
1095 argAlloc->type == ResourceTypeEnum::BindlessSamplerResourceType)
1096 {
1097 needAllocation = !funcArg->use_empty();
1098 }
1099 }
1100 }
1101
1102 int location_index = -1;
1103 int location_count = -1;
1104 bool is_emulation_argument = false;
1105
1106 auto it = moduleMD->FuncMD.find(const_cast<Function*>(&F));
1107 if (it != moduleMD->FuncMD.end())
1108 {
1109 if (it->second.funcArgs.size() > (unsigned)i)
1110 {
1111 location_index = it->second.funcArgs[i].bufferLocationIndex;
1112 location_count = it->second.funcArgs[i].bufferLocationCount;
1113 is_emulation_argument = it->second.funcArgs[i].isEmulationArg;
1114 }
1115 }
1116
1117 std::string argBaseType = "";
1118 std::string argAccessQualItem = "";
1119
1120 if (it != moduleMD->FuncMD.end())
1121 {
1122 if (it->second.m_OpenCLArgBaseTypes.size() > (unsigned)i)
1123 argBaseType = it->second.m_OpenCLArgBaseTypes[i];
1124 if (it->second.m_OpenCLArgAccessQualifiers.size() > (unsigned)i)
1125 argAccessQualItem = it->second.m_OpenCLArgAccessQualifiers[i];
1126 }
1127
1128 KernelArg kernelArg = KernelArg(
1129 &(*funcArg),
1130 DL,
1131 argBaseType,
1132 argAccessQualItem,
1133 location_index,
1134 location_count,
1135 needAllocation,
1136 is_emulation_argument);
1137
1138 if ((kernelArg.getArgType() == KernelArg::ArgType::IMAGE_3D ||
1139 kernelArg.getArgType() == KernelArg::ArgType::BINDLESS_IMAGE_3D) &&
1140 funcInfoMD->isArgInfoListHasValue()) {
1141 for (auto AI = funcInfoMD->begin_ArgInfoList(), AE = funcInfoMD->end_ArgInfoList(); AI != AE; ++AI) {
1142 ArgInfoMetaDataHandle argInfo = *AI;
1143 if (argInfo->getExplicitArgNum() == i) {
1144 if (argInfo->isImgAccessFloatCoordsHasValue() && argInfo->isImgAccessIntCoordsHasValue()) {
1145 kernelArg.setImgAccessedFloatCoords(argInfo->getImgAccessFloatCoords());
1146 kernelArg.setImgAccessedIntCoords(argInfo->getImgAccessIntCoords());
1147 break;
1148 }
1149 }
1150 }
1151 }
1152
1153 addAllocationArg(kernelArg);
1154 }
1155
1156 // Implicit function args
1157 for (unsigned int i = 0; i < numImplicitArgs; ++i, ++funcArg)
1158 {
1159 KernelArg kernelArg = KernelArg(implicitArgs[i], DL, &(*funcArg), implicitArgs.getExplicitArgNum(i), implicitArgs.getStructArgOffset(i), GRFSize);
1160 addAllocationArg(kernelArg);
1161 }
1162
1163 // Need to add Runtime Values, so they can trigger NOSBuffer allocation in correct
1164 // order (especially needed when InputType::INDEPENDENT or InputType::CURBE is used).
1165 for (unsigned int i = 0; i < numRuntimeValue; ++i, ++funcArg)
1166 {
1167 KernelArg kernelArg = KernelArg(
1168 KernelArg::ArgType::RUNTIME_VALUE, // argType
1169 KernelArg::AccessQual::NONE, // accessQual
1170 4, // allocateSize
1171 4, // elemAllocateSize
1172 4, // align
1173 true, // isConstantBuf
1174 &(*funcArg), // arg
1175 numExplicitArgs + numImplicitArgs + 1); // associatedArgNo
1176 addAllocationArg(kernelArg);
1177 }
1178 }
1179
addAllocationArg(KernelArg & kernelArg)1180 void KernelArgs::addAllocationArg(KernelArg& kernelArg)
1181 {
1182 KernelArg::ArgType argType = kernelArg.getArgType();
1183
1184 // Add to the allocation arguments of this type
1185 m_args[argType].push_back(kernelArg);
1186 }
1187
begin()1188 KernelArgs::const_iterator KernelArgs::begin()
1189 {
1190 return const_iterator(m_args, KernelArgs::const_iterator::IterPos::BEGIN);
1191 }
1192
end()1193 KernelArgs::const_iterator KernelArgs::end()
1194 {
1195 return const_iterator(m_args, KernelArgs::const_iterator::IterPos::END);
1196 }
1197
checkForZeroPerThreadData()1198 void KernelArgs::checkForZeroPerThreadData()
1199 {
1200
1201 // On SKL, when we use Indirect thread payload, Spec says:
1202 // if Cross-Thread Constant Data Read Length for Indirect is greater than 0,
1203 // then Per thread data field must also be greater than 0.
1204 // In that case we allocate one blank payload grf for Per thread constant.
1205
1206 // if PTD == 0 && CTCD > 0 then we would need to allocate a dummy argument to occupy a single GRF in a PTD
1207 // PTD 1 && CTCD > 0 is perfectly OK
1208 int PerThreadData = 0;
1209 bool HWWAForZeroLengthPTDRequired = true;
1210 for (AllocationArgs::const_iterator i = m_args.begin(), e = m_args.end(); i != e; ++i)
1211 {
1212 const KernelArg* arg = i->second.data();
1213 if (arg->needsAllocation() && !arg->isConstantBuf())
1214 {
1215 if (++PerThreadData > 0 + 1 /* IMPLICIT_R0 */)
1216 {
1217 HWWAForZeroLengthPTDRequired = false;
1218 break;
1219 }
1220 }
1221 }
1222 if (HWWAForZeroLengthPTDRequired)
1223 {
1224 KernelArg kernelArg = KernelArg(KernelArg::ArgType::R1, KernelArg::AccessQual::NONE, 32, 4, 32, false, nullptr, 0);
1225 addAllocationArg(kernelArg);
1226 }
1227 }
1228
empty()1229 bool KernelArgs::empty()
1230 {
1231 return m_args.empty() ? true : begin() == end();
1232 }
1233
1234