1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/CodeGenPublic.h"
12 #include "Compiler/MetaDataApi/MetaDataApi.h"
13 
14 #include <map>
15 #include <vector>
16 #include <cstddef>
17 #include <array>
18 
19 namespace llvm
20 {
21     class Argument;
22     class DataLayout;
23     class Function;
24     class MDNode;
25     class Value;
26     class StringRef;
27 }
28 
29 namespace IGC
30 {
31     class ImplicitArg;
32     /// @brief  KernelArg is used for representing the different OpenCL kernel arguments
33     ///         This class is used for arguments allocation
34 
35     class KernelArg {
36     public:
37 
38         /// @brief  Type of kernel arguments
39         enum class ArgType : int32_t {
40             // Argument types that should be allocated
41             Begin = 0,
42             Default = Begin,
43             IMPLICIT_R0,
44             R1,
45 
46             IMPLICIT_PAYLOAD_HEADER, // known as INPUT_HEADER in USC
47 
48             PTR_LOCAL,
49             PTR_GLOBAL,
50             PTR_CONSTANT,
51             PTR_DEVICE_QUEUE,
52 
53             CONSTANT_REG,
54             RUNTIME_VALUE,
55 
56             IMPLICIT_CONSTANT_BASE,
57             IMPLICIT_GLOBAL_BASE,
58             IMPLICIT_PRIVATE_BASE,
59 
60             IMPLICIT_PRINTF_BUFFER,
61             IMPLICIT_SYNC_BUFFER,
62 
63             IMPLICIT_BUFFER_OFFSET,
64 
65             IMPLICIT_WORK_DIM,
66             IMPLICIT_NUM_GROUPS,
67             IMPLICIT_GLOBAL_SIZE,
68             IMPLICIT_LOCAL_SIZE,
69             IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
70 
71             IMPLICIT_IMAGE_HEIGHT,
72             IMPLICIT_IMAGE_WIDTH,
73             IMPLICIT_IMAGE_DEPTH,
74             IMPLICIT_IMAGE_NUM_MIP_LEVELS,
75             IMPLICIT_IMAGE_CHANNEL_DATA_TYPE,
76             IMPLICIT_IMAGE_CHANNEL_ORDER,
77             IMPLICIT_IMAGE_SRGB_CHANNEL_ORDER,
78             IMPLICIT_IMAGE_ARRAY_SIZE,
79             IMPLICIT_IMAGE_NUM_SAMPLES,
80             IMPLICIT_SAMPLER_ADDRESS,
81             IMPLICIT_SAMPLER_NORMALIZED,
82             IMPLICIT_SAMPLER_SNAP_WA,
83             IMPLICIT_FLAT_IMAGE_BASEOFFSET,
84             IMPLICIT_FLAT_IMAGE_HEIGHT,
85             IMPLICIT_FLAT_IMAGE_WIDTH,
86             IMPLICIT_FLAT_IMAGE_PITCH,
87 
88             // VME
89             IMPLICIT_VME_MB_BLOCK_TYPE,
90             IMPLICIT_VME_SUBPIXEL_MODE,
91             IMPLICIT_VME_SAD_ADJUST_MODE,
92             IMPLICIT_VME_SEARCH_PATH_TYPE,
93 
94             // Device Enqueue
95             IMPLICIT_DEVICE_ENQUEUE_DEFAULT_DEVICE_QUEUE,
96             IMPLICIT_DEVICE_ENQUEUE_EVENT_POOL,
97             IMPLICIT_DEVICE_ENQUEUE_MAX_WORKGROUP_SIZE,
98             IMPLICIT_DEVICE_ENQUEUE_PARENT_EVENT,
99             IMPLICIT_DEVICE_ENQUEUE_PREFERED_WORKGROUP_MULTIPLE,
100             IMPLICIT_DEVICE_ENQUEUE_DATA_PARAMETER_OBJECT_ID,
101             IMPLICIT_DEVICE_ENQUEUE_DISPATCHER_SIMD_SIZE,
102 
103             // Generic address space
104             IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS,
105             IMPLICIT_LOCAL_MEMORY_STATELESS_WINDOW_SIZE,
106             IMPLICIT_PRIVATE_MEMORY_STATELESS_SIZE,
107 
108             IMPLICIT_LOCAL_IDS,
109 
110             // STAGE_IN_GRID runtime values
111             IMPLICIT_STAGE_IN_GRID_ORIGIN,
112             IMPLICIT_STAGE_IN_GRID_SIZE,
113 
114             IMPLICIT_BINDLESS_OFFSET,
115 
116             IMPLICIT_ARG_BUFFER,
117             IMPLICIT_LOCAL_ID_BUFFER,
118 
119             // Argument types that shouldn't be allocated
120             NOT_TO_ALLOCATE,
121             SAMPLER = NOT_TO_ALLOCATE,
122             IMAGE_1D,
123             IMAGE_1D_BUFFER,
124             IMAGE_2D,
125             IMAGE_2D_DEPTH,
126             IMAGE_2D_MSAA,
127             IMAGE_2D_MSAA_DEPTH,
128             IMAGE_3D,
129             IMAGE_CUBE,
130             IMAGE_CUBE_DEPTH,
131             IMAGE_1D_ARRAY,
132             IMAGE_2D_ARRAY,
133             IMAGE_2D_DEPTH_ARRAY,
134             IMAGE_2D_MSAA_ARRAY,
135             IMAGE_2D_MSAA_DEPTH_ARRAY,
136             IMAGE_CUBE_ARRAY,
137             IMAGE_CUBE_DEPTH_ARRAY,
138 
139             // Address space decoded Args
140             BINDLESS_SAMPLER,
141             BINDLESS_IMAGE_1D,
142             BINDLESS_IMAGE_1D_BUFFER,
143             BINDLESS_IMAGE_2D,
144             BINDLESS_IMAGE_2D_DEPTH,
145             BINDLESS_IMAGE_2D_MSAA,
146             BINDLESS_IMAGE_2D_MSAA_DEPTH,
147             BINDLESS_IMAGE_3D,
148             BINDLESS_IMAGE_CUBE,
149             BINDLESS_IMAGE_CUBE_DEPTH,
150             BINDLESS_IMAGE_1D_ARRAY,
151             BINDLESS_IMAGE_2D_ARRAY,
152             BINDLESS_IMAGE_2D_DEPTH_ARRAY,
153             BINDLESS_IMAGE_2D_MSAA_ARRAY,
154             BINDLESS_IMAGE_2D_MSAA_DEPTH_ARRAY,
155             BINDLESS_IMAGE_CUBE_ARRAY,
156             BINDLESS_IMAGE_CUBE_DEPTH_ARRAY,
157 
158             STRUCT,
159             End,
160         };
161 
162         enum AccessQual {
163             NONE,
164             READ_ONLY,
165             WRITE_ONLY,
166             READ_WRITE
167         };
168 
169         KernelArg(
170             ArgType argType,
171             AccessQual accessQual,
172             unsigned int allocateSize,
173             unsigned int elemAllocateSize,
174             size_t align,
175             bool isConstantBuf,
176             const llvm::Argument* arg,
177             unsigned int associatedArgNo);
178 
179         /// @brief  Constructor.
180         ///         Constructs a kernel argument information for explicit arguments
181         /// @param  arg         The LLVM explicit kernel argument
182         /// @param  DL          The DataLayout, used to determine allocation size
183         /// @param  typeStr     The OpenCL type information for the kernel this argument belongs to
184         /// @param  qualStr     The OpenCL access qualifier information for the kernel this argument belongs to
185         /// @param  location_index The location_index for buffer
186         /// @param  location_count The location_count for buffer
187         /// @param  needBindlessHandle   The presence of bindless resources in the shader
188         /// @param  isEmulationArgument  The information, whether this is an emulation argument (IAB)
189         KernelArg(const llvm::Argument* arg, const llvm::DataLayout* DL, const llvm::StringRef typeStr, const llvm::StringRef qualstr, int location_index, int location_count, bool needBindlessHandle, bool isEmulationArgument);
190 
191         /// @brief  Constructor.
192         ///         Constructs a kernel argument information for implicit arguments
193         /// @param  implicitArg The implicit kernel argument
194         /// @param  DL          The DataLayout, used to determine allocation size and alignment
195         /// @param  arg         The LLVM kernel argument associated with this implicit argument
196         /// @param  imageArgNo  The argument number of the associated image argument
197         ///                     This param has meaning only for image dimension implicit arguments
198         /// @param  structArgOffset  The argument offset in the associated struct argument
199         ///                     This param has meaning only for implicit arguments associated
200         ///                     with aggregation explicit argument
201         KernelArg(const ImplicitArg& implicitArg, const llvm::DataLayout* DL, const llvm::Argument* arg, unsigned int ExplicitArgNo, unsigned int structArgOffset, unsigned int GRFSize);
202 
203         /// @brief  Getter functions
204         ArgType                         getArgType()            const;
205         AccessQual                      getAccessQual()         const;
206         unsigned int                    getNumComponents()      const;
207         unsigned int                    getAllocateSize()       const;
208         unsigned int                    getElemAllocateSize()   const;
209         size_t                          getAlignment()          const;
210         bool                            isConstantBuf()         const;
211         bool                            needsAllocation()       const;
212         const llvm::Argument* getArg()                const;
213         unsigned int                    getAssociatedArgNo()    const;
214         unsigned int                    getStructArgOffset()    const;
215         unsigned int                    getLocationIndex()      const;
216         unsigned int                    getLocationCount()      const;
217         iOpenCL::DATA_PARAMETER_TOKEN   getDataParamToken()     const;
218         bool                            typeAlwaysNeedsAllocation() const;
getImgAccessedFloatCoords() const219         bool                            getImgAccessedFloatCoords() const { return m_imageInfo.accessedByFloatCoord; }
getImgAccessedIntCoords() const220         bool                            getImgAccessedIntCoords()   const { return m_imageInfo.accessedByIntCoord; }
isImplicitArg() const221         bool                            isImplicitArg() const { return m_implicitArgument; }
isEmulationArgument() const222         bool                            isEmulationArgument() const { return m_isEmulationArgument; }
223 
224         /// @brief  Setter functions
setImgAccessedFloatCoords(bool val)225         void     setImgAccessedFloatCoords(bool val) { m_imageInfo.accessedByFloatCoord = val; }
setImgAccessedIntCoords(bool val)226         void     setImgAccessedIntCoords(bool val) { m_imageInfo.accessedByIntCoord = val; }
227 
228         /// @brief  Calculates the kernel arg type for the given explicit argument
229         /// @param  arg         The explicit kernel argument
230         /// @param  typeStr    The OpenCL type information for the kernel this argument belongs to
231         /// @return The kernel argument type of the given explicit argument
232         static ArgType calcArgType(const llvm::Argument* arg, const llvm::StringRef typeStr);
233 
234         struct BufferArgType
235         {
236             KernelArg::ArgType type = KernelArg::ArgType::End;
237             bool isSampler = false;
238         };
239         /// @brief  Calculates the kernel arg type for buffer
240         /// @param  arg         The explicit kernel argument
241         /// @param  typeStr     The OpenCL type information for the kernel this argument belongs to
242         /// @return Pair of the kernel argument type of the given explicit argument and whether the type is really SAMPLER
243         static BufferArgType getBufferType(const llvm::Argument* arg, const llvm::StringRef typeStr);
244 
245         /// @brief  Checks whether the given argument is an image
246         /// @param  arg           The kernel argument
247         /// @param  typeStr       The OpenCL type information for the kernel this argument belongs to
248         /// @param  imageArgType  If this is an image, the argtype of this image
249         /// @return true is the given argument is an image, false otherwise
250         static bool isImage(const llvm::Argument* arg, const llvm::StringRef typeStr, ArgType& imageArgType);
251         static bool isSampler(const llvm::Argument* arg, const llvm::StringRef typeStr);
252 
253     private:
254         /// @brief  Calculates the allocation size needed for the given explicit argument
255         /// @param  arg         The kernel argument
256         /// @param  DL          The DataLayout, used to determine allocation size
257         /// @return The allocation size needed for the given explicit argument
258         unsigned int calcAllocateSize(const llvm::Argument* arg, const llvm::DataLayout* DL) const;
259 
260         /// @brief  Calculates the alignment needed for the given explicit argument
261         /// @param  arg         The kernel argument
262         /// @param  DL          The DataLayout, used to determine alignment size
263         /// @return The alignment needed for the given explicit argument
264         unsigned int calcAlignment(const llvm::Argument* arg, const llvm::DataLayout* DL) const;
265 
266         /// @brief  Calculates the allocation size needed for one vector element of the given
267         ///         explicit argument. IF the argument is scalar, it will return the allocation
268         ///         size of the whole element.
269         /// @param  arg         The kernel argument
270         /// @param  DL          The DataLayout, used to determine allocation size
271         /// @return The allocation size needed for one vector element of the given explicit argument
272         unsigned int calcElemAllocateSize(const llvm::Argument* arg, const llvm::DataLayout* DL) const;
273 
274         /// @brief  Calculates the kernel arg type for the given implicit argument
275         /// @param  arg         The implicit kernel argument
276         /// @return The kernel argument type of the given implicit argument
277         ArgType calcArgType(const ImplicitArg& arg) const;
278 
279         /// @brief  Calculates the access qualifier for the given explicit argument
280         /// @param  arg         The explicit kernel argument
281         /// @param  qualStr  The OpenCL access qualifier information for the kernel this argument belongs to
282         /// @return The kernel argument type of the given explicit argument
283         AccessQual calcAccessQual(const llvm::Argument* arg, const llvm::StringRef qualStr) const;
284 
285         /// @brief  Calculates the argument number of the argument associated with the given implicit argument.
286         ///         For implicit image arguments it will return the arg number of the image associated with the implicit arg.
287         ///         For implicit non-image arguments it will return the arg number of the implicit argument itself.
288         /// @param  implicitArg The implicit kernel argument
289         /// @param  arg         The kernel argument
290         /// @param  imageArgNo  If this implicit kernel argument is associated with an image or a sampler,
291         ///                     the argument number of that image/sampler.
292         /// @return The kernel argument type of the given implicit argument
293         unsigned int calcAssociatedArgNo(const ImplicitArg& implicitArg, const llvm::Argument* arg, unsigned int ExplicitArgNo) const;
294 
295         /// @brief  Checks whether the given argument is a sampler
296         /// @param  arg         The kernel argument
297         /// @param  typeStr    The OpenCL type information for the kernel this argument belongs to
298         /// @return true is the given argument is a sampler, false otherwise
299         static bool isBindlessSampler(const llvm::Argument* arg, const llvm::StringRef typeStr);
300 
301     private:
302         /// @brief  Is this an explicit or implicit argument
303         bool                            m_implicitArgument;
304         /// @brief  The argument type
305         ArgType                         m_argType;
306         /// @brief  The argument access qualifier
307         AccessQual                      m_accessQual;
308         /// @brief  The number of bytes needed for allocating the argument
309         unsigned int                    m_allocateSize;
310         /// @brief  The number of bytes needed for allocating one vector element of the argument
311         unsigned int                    m_elemAllocateSize;
312         /// @brief  The argument's alignment
313         ///         Must be declared after m_argType and m_allocateSize!
314         ///         (Order of initialization)
315         size_t                          m_align;
316         /// @brief  Indicates whether the argument is used in calculating the constant buffer length
317         bool                            m_isConstantBuf;
318         /// @brief  The LLVM argument that represents this kernel argument
319         const llvm::Argument* m_arg;
320         /// @brief  The argument number of the associated argument
321         ///         For image dimension/BUFFER_OFFSET arguments this will return the argument number
322         ///         of the assocaited image.
323         ///         For other arguments this will return the argument number of the LLVM argument
324         unsigned int                    m_associatedArgNo;
325         /// @brief  The argument struct offset in the associated struct explicit argument
326         ///         For struct byvalue arguments this will return the struct offset
327         ///         For other arguments this will return -1
328         unsigned int                    m_structArgOffset;
329 
330         /// @brief  The Location Index is the value passed from the frontend for buffers only.
331         int                             m_locationIndex;
332 
333         /// @brief  The Location Index is the value passed from the frontend for buffers only.
334         int                             m_locationCount;
335 
336         /// @brief Indicates if resource if of needs an allocation
337         bool                            m_needsAllocation;
338 
339         /// @brief Indicates if resource is an emulation argument (IAB)
340         bool                            m_isEmulationArgument;
341 
342         /// @brief
343         struct {
344             bool  accessedByFloatCoord;
345             bool  accessedByIntCoord;
346         }                               m_imageInfo;
347 
348     public:
349         /// @brief  If this argument has multiple data fields (aka, structs) then m_next points to
350         ///         the subsequent field in the struct.
351         std::vector<KernelArg>          m_subArguments;
352 
353         /// @brief Maps kernel argument types to their associated data param tokens
354         static const std::map<KernelArg::ArgType, iOpenCL::DATA_PARAMETER_TOKEN> argTypeTokenMap;
355     };
356 
357     /// @brief  KernelArgsOrder class is used to define an order in which CISA variables are mapped to
358     ///         a physical grf "payload" locations
359 
360     class KernelArgsOrder {
361     public:
362         /// @brief  Predefined input layouts
363         enum class InputType : uint32_t
364         {
365             CURBE,
366             INDIRECT,
367             INDEPENDENT,
368         };
369 
370     private:
371         /// @brief  Order of a payload arguments in a physical grf locations
372         ///         Index is an explicitly int32_t casted KernelArg::ArgType
373         ///         Value is a requested position
374         std::array<uint32_t, static_cast<int32_t>(KernelArg::ArgType::End)> m_position;
375 
376         /// @brief  Verifies that passed array defines order for all Argument Types
377         /// @param  order
378         /// @param  sent    Sentinel
379         bool VerifyOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order, KernelArg::ArgType sent);
380 
381         /// @brief  Suppose that you have a 3 arguments: a, b, c and you want them shuffled to
382         ///         b c a
383         ///         1 2 0 value
384         ///         0 1 2 index
385         ///         That way c < a has to return true
386         ///
387         ///         Procedure would fill in a m_position with a
388         ///         2 0 1 value
389         ///         0 1 2 index
390         ///         That way when you would like to obtain a position of an argument you need to call
391         ///         PI(a) = 2;
392         /// @param  order
393         void TransposeGenerateOrder(std::array<KernelArg::ArgType, static_cast<int32_t>(KernelArg::ArgType::End)>& order);
394 
395     public:
396         /// @brief  Constructor
397         ///         Fills in an m_position array
398         /// @param  type    One of the predefined grf layouts
399         explicit KernelArgsOrder(InputType type);
400 
401         /// @brief  Returns true if the first argument is considered to go before the second in the
402         //          strict weak ordering it defines, and false otherwise
403         /// @param  lhs
404         /// @param  rhs
405         bool operator()(const KernelArg::ArgType& lhs, const KernelArg::ArgType& rhs) const;
406     };
407 
408     /// @brief  KernelArgs represent all the explicit and implicit kernel arguments and used for payload allocation
409 
410     class KernelArgs {
411         // Types
412     public:
413         /// @brief  AllocationArgs maps between each kernel argument type and all the arguments of that type
414         typedef std::map<KernelArg::ArgType, std::vector<KernelArg>, KernelArgsOrder> AllocationArgs;
415 
416     public:
417         /// @brief  KernelArgs::const_iterator enables constant iteration over the kernel arguments
418         ///         This enables iteration over a container of containers
419         class const_iterator {
420         public:
421             enum IterPos
422             {
423                 BEGIN,
424                 END,
425             };
426 
427             /// Constructor
428             /// @param  args       The allocation args structure
429             /// @param  pos        ENUM of initial iterator position: BEGIN or END
430             const_iterator(AllocationArgs& args, IterPos pos);
431 
432             /// @brief  Advances the iterator to the next element
433             /// @return The iterator, pointing to the next element
434             const_iterator& operator++();
435 
436             /// @brief  Returns the element the iterator points to
437             /// @return The element the iterator points to
438             const KernelArg& operator*();
439 
440             /// @brief  Checks whether this iterator and the given iterator are different
441             ///         by checking if they point to the same element
442             /// @param  iterator    An iterator to compare this iterator
443             /// @return true if the iterators pare different, false otherwise
444             bool operator!=(const const_iterator& iterator);
445 
446             /// @brief  Checks whether this iterator and the given iterator are same
447             ///         by checking if they point to the same element
448             /// @param  iterator    An iterator to compare this iterator
449             /// @return true if the iterators are same, false otherwise
450             bool operator==(const const_iterator& iterator);
451 
452         private:
453             AllocationArgs::const_iterator          m_major;
454             AllocationArgs::const_iterator          m_majorEnd;
455             std::vector<KernelArg>::const_iterator  m_minor;
456             bool                                    m_empty;
457         };
458 
459         // Member functions
460 
461     public:
462         /// @brief  Constructor.
463         ///         Constructs the function's explicit and implicit kernel arguments information
464         /// @param  F           The function for which to construct the kernel arguments
465         /// @param  DL          The DataLayout
466         /// @param  pMdUtils    The Metadata Utils instance for accessing metadata information
467         /// @param  layout      One of the predefined payload layout types
468         KernelArgs(const llvm::Function& F, const llvm::DataLayout* DL, IGCMD::MetaDataUtils* pMdUtils, ModuleMetaData* moduleMD, unsigned int GRFSize, KernelArgsOrder::InputType layout = KernelArgsOrder::InputType::INDEPENDENT);
469 
470         /// @brief  Returns a constant iterator to the beginning of the kernel arguments
471         /// @return A constant iterator to the beginning of the kernel arguments
472         const_iterator begin();
473 
474         /// @brief  Returns a constant iterator to the end of the kernel arguments
475         /// @return A constant iterator to the end of the kernel arguments
476         const_iterator end();
477 
478         /// #brief Check if we need to insert dummy per-thread data for OpenCL
479         ///
480         void checkForZeroPerThreadData();
481 
482         /// @brief  Checks if there are any kernel arguments
483         /// @return true if there are no arguments, false otherwise
484         bool empty();
485 
486     private:
487         /// @brief  Check if the given argument needs to be allocated and add it to the allocation args container.
488         /// @param  kernelArg   The kernel argument that might need to be allocated
489         void addAllocationArg(KernelArg& kernelArg);
490 
491         /// @brief  Returns OpenCL type info metadata for the given kernel
492         /// @param  F           The kernel for which to return the type info metadata
493         /// @return The type info metadata of the given kernel
494         llvm::MDNode* getTypeInfoMD(const llvm::Function& F);
495 
496         /// @brief  Returns OpenCL access qualifiers info metadata for the given kernel
497         /// @param  F           The kernel for which to return the access info metadata
498         /// @return The access qualifiers info metadata of the given kernel
499         llvm::MDNode* getAccessInfoMD(const llvm::Function& F);
500 
501         /// @brief  Returns the opencl kernel metadata associated with F
502         /// @param  F           The kernel for which to return the access info metadata
503         /// @param  index       The index of the metadata type we require
504         /// @return The metadata node associated with F and index
505         llvm::MDNode* getKernelMD(const llvm::Function& F, int index);
506 
507         // Members
508     private:
509         /// @brief  Order function which defines a payload layout being used
510         KernelArgsOrder m_KernelArgsOrder;
511         /// @brief  Contains all the kernel arguments that need to be allocated or annotated, sorted by their type
512         AllocationArgs m_args;
513     };
514 
515 } // namespace IGC
516