1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #ifndef CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_
23 #define CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_
24 
25 #include "cm_include.h"
26 #include <cstdint>
27 #include <cstddef>
28 
29 class CmTask;
30 class CmEvent;
31 class CmThreadSpace;
32 class CmThreadGroupSpace;
33 class CmBuffer;
34 class CmSurface2D;
35 class CmKernel;
36 class CmVebox;
37 
38 enum CM_QUEUE_TYPE
39 {
40     CM_QUEUE_TYPE_NONE      = 0,
41     CM_QUEUE_TYPE_RENDER    = 1,
42     CM_QUEUE_TYPE_COMPUTE   = 2
43 };
44 
45 enum CM_QUEUE_SSEU_USAGE_HINT_TYPE
46 {
47     CM_QUEUE_SSEU_USAGE_HINT_DEFAULT = 0,
48     CM_QUEUE_SSEU_USAGE_HINT_VME     = 1
49 };
50 
51 struct CM_QUEUE_CREATE_OPTION
52 {
53     CM_QUEUE_TYPE                 QueueType               : 3;
54     bool                          RAMode                  : 1;
55     unsigned int                  Reserved0               : 3;
56     bool                          UserGPUContext          : 1; // Is the user-provided GPU Context already created externally
57     unsigned int                  GPUContext              : 8; // user-provided GPU Context ordinal
58     CM_QUEUE_SSEU_USAGE_HINT_TYPE SseuUsageHint           : 3;
59     unsigned int                  Reserved1               : 1;
60     unsigned int                  Reserved2               : 12;
61 };
62 
63 const CM_QUEUE_CREATE_OPTION CM_DEFAULT_QUEUE_CREATE_OPTION = { CM_QUEUE_TYPE_RENDER, false, 0, false, 0, CM_QUEUE_SSEU_USAGE_HINT_DEFAULT, 0, 0 };
64 
65 //!
66 //! \brief CM task queue management.
67 //!
68 class CmQueue
69 {
70 public:
71     //!
72     //! \brief   Enqueue a task for execution with per-task thread space.
73     //! \details This function enqueues a task represented by the CmTask object.
74     //!          The kernels in the CmTask object may be run concurrently.
75     //!          Tasks get executed according to the order they get enqueued.
76     //!          This is a non-blocking call. It returns immediately without waiting
77     //!          for GPU to start or finish execution. A CmEvent is generated each time
78     //!          a task is enqueued. The CmEvent can be used to check the status of task.
79     //!          The generated event needs to be managed and released by user.
80     //!          Since event is not useful in some cases, runtime provides the capability
81     //!          to avoid generating event.
82     //!          If thread space is valid, the dependency defined by thread space will be honored.
83     //! \param   [in] task
84     //!          pointer to task to submit
85     //! \param   [in,out] event
86     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
87     //!          its value returned by runtime is NULL.
88     //! \param   [in] threadSpace
89     //!          pointer to thread space which can define the thread dependency within the task.
90     //!          This is a per task thread space. If this task has multiple kernels, each kernel
91     //!          will have the thread space of same dimension, same dependency etc. If it is nullptr,
92     //!          there is no thread dependency and the maximum thread space width will be asssumed
93     //!          to calculate the coordinates for each thread. For each kernel , the per kernel thread space
94     //!          that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space.
95     //! \retval  CM_SUCCESS if the task is successfully enqueued.
96     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
97     //! \retval  CM_FAILURE otherwise
98     //!
99     CM_RT_API virtual int32_t Enqueue(CmTask *task,
100                                   CmEvent *&event,
101                                   const CmThreadSpace *threadSpace = nullptr) = 0;
102     //!
103     //! \brief    Destroy the CmEvent generated by Enqueue.
104     //! \details  Destroy the event object previously generated by Enqueue.
105     //!           The CmEvent object can be destroyed even before the corresponding task flushed or finished.
106     //!           If this happens, there is no way the app can get the task status.
107     //! \param    [in] event
108     //!           reference to pointer to event
109     //! \retval   CM_SUCCESS if event destroyed successfully
110     //! \retval   CM_FAILURE otherwise
111     //!
112     CM_RT_API virtual int32_t DestroyEvent(CmEvent *&event) = 0;
113 
114     //!
115     //! \brief    Enqueue the task with thread group space.
116     //! \details
117     //! \param    [in]task
118     //!           pointer to task to submit
119     //! \param    [in,out] event
120     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
121     //!           its value returned by runtime is NULL.
122     //! \param    [in] threadGroupSpace
123     //!           pointer to thread group space which defines the dimensions of the task.
124     //!           pThreadGroupSpace  can not be NULL.
125     //! \retval   CM_SUCCESS if the task is successfully enqueued.
126     //! \retval   CM_INVALID_ARG_VALUE if input task is not valid
127     //! \retval   CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation.
128     //! \retval   CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid.
129     //! \retval   CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments
130     //!
131     CM_RT_API virtual int32_t
132     EnqueueWithGroup(CmTask *task,
133                      CmEvent *&event,
134                      const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0;
135 
136     //!
137     //! \brief    Enqueues the kernel to copy from system(CPU) memory to video(GPU) memory.
138     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from host
139     //!           system memory to video surface.
140     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
141     //!           The CmEvent can be used to check the status.
142     //!           The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well.
143     //! \param    [in] surface
144     //!           surface as copy destination, surface's width in bytes must be 16-Byte aligned
145     //! \param    [in] sysMem
146     //!           host memory as copy source, must be 16-Byte aligned
147     //! \param    [in,out] event
148     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
149     //!           its value returned by runtime is NULL.
150     //! \retval   CM_SUCCESS if the task is successfully enqueued
151     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
152     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
153     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
154     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
155     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
156     //! \retval   CM_FAILURE otherwise
157     //!
158     CM_RT_API virtual int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface,
159                                               const unsigned char *sysMem,
160                                               CmEvent *&event) = 0;
161 
162     //!
163     //! \brief    Enqueues the kernel to copy from video(GPU) memory to system(CPU) memory.
164     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory.
165     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
166     //!           The CmEvent can be used to check the status or other data regarding the task execution.
167     //!           The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well.
168     //! \param    [in] surface
169     //!           surface as copy source, surface's width in bytes must be 16-Byte aligned
170     //! \param    [in] sysMem
171     //!           host memory as copy destination, must be 16-Byte aligned
172     //! \param    [in,out] event
173     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
174     //!           its value returned by runtime is NULL.
175     //! \retval   CM_SUCCESS if the task is successfully enqueued
176     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
177     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
178     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
179     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
180     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
181     //! \retval   CM_FAILURE otherwise
182     //!
183     CM_RT_API virtual int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface,
184                                               unsigned char *sysMem,
185                                               CmEvent *&event) = 0;
186 
187     //!
188     //! \brief    Enqueues the kernel to initialize a 2D surface.
189     //! \details  This function enqueues a task, which contains a pre-defined kernel to initialize a surface 2d
190     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
191     //!           The CmEvent can be used to check the status or other data regarding the task execution.
192     //! \param    [in] surface
193     //!           surface to initialize
194     //! \param    [in] initValue
195     //!           value to fill the surface
196     //! \param    [in,out] event
197     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
198     //!           its value returned by runtime is NULL.
199     //! \retval   CM_SUCCESS if the task is successfully enqueued
200     //! \retval   CM_FAILURE otherwise
201     //!
202     CM_RT_API virtual int32_t EnqueueInitSurface2D(CmSurface2D *surface,
203                                                const uint32_t initValue,
204                                                CmEvent *&event) = 0;
205 
206     //!
207     //! \brief    Enqueue the kernel to copy memory between surfaces.
208     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy memory between surfaces.
209     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
210     //!           The CmEvent can be used to check the status or other data regarding the task execution.
211     //!           The input and output surfaces should have the same width, height and format.
212     //! \param    [in] inputSurface
213     //!           surface as copy source
214     //! \param    [in] outputSurface
215     //!           surface as copy destination
216     //! \param    [in] option
217     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
218     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
219     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
220     //! \param    [in,out] event
221     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
222     //!           its value returned by runtime is NULL.
223     //! \retval   CM_SUCCESS if the task is successfully enqueued
224     //! \retval   CM_GPUCOPY_INVALID_SURFACES if the input and output surfaces have different
225     //!           width, height and format.
226     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
227     //! \retval   CM_FAILURE otherwise
228     //!
229     CM_RT_API virtual int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
230                                               CmSurface2D *inputSurface,
231                                               uint32_t option,
232                                               CmEvent *&event) = 0;
233 
234     //!
235     //! \brief    Enqueues the kernel to copy memory between host memories.
236     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy memory from src to dest memory.
237     //!           Both pDstSysMem and pSrcSysMem need to be 16-Byte aligned.  The maximum size is determined by sytem's memory
238     //!           and it should be less than CM_MAX_1D_SURF_WIDTH bytes which is 1G bytes now. If the copy size is less than
239     //!           1K bytes, the event will not be generated and it is a blocking call.
240     //!           For the size larger than 1K bytes, this is a non-blocking call.
241     //!           A CmEvent is generated to check the status or other data regarding the task execution.
242     //!           To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function
243     //! \param    [in] dstSysMem
244     //!           destination memory, must be 16-Byte aligned
245     //! \param    [in] srcSysMem
246     //!           source memory, must be 16-Byte aligned
247     //! \param    [in] size
248     //!           size of memory to copy in bytes
249     //! \param    [in] option
250     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
251     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
252     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
253     //! \param    [in,out] event
254     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
255     //!           its value returned by runtime is NULL.
256     //! \retval   CM_SUCCESS if the task is successfully enqueued
257     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if pDstSysMem or pSrcSysMem is not 16-Byte aligned.
258     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
259     //! \retval   CM_FAILURE otherwise
260     //!
261     CM_RT_API virtual int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
262                                              unsigned char *srcSysMem,
263                                               uint32_t size,
264                                               uint32_t option,
265                                               CmEvent *&event) = 0;
266 
267     //!
268     //! \brief    Enqueue the kernel to copy memory from system memory to video memory with width and height stride.
269     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface.
270     //!           Depending on user "opiton", this is a non-blocking or blocking call.
271     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
272     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
273     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
274     //!           alignment restriction.
275     //! \param    [in] surface
276     //!           surface as copy destination
277     //! \param    [in] sysMem
278     //!           system memory as copy source must be 16-Byte aligned
279     //! \param    [in] widthStride
280     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
281     //! \param    [in] heightStride
282     //!           height stride of memory stored in host memory, in bytes.
283     //! \param    [in] option
284     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
285     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
286     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
287     //! \param    [in,out] event
288     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
289     //!           its value returned by runtime is NULL.
290     //! \retval   CM_SUCCESS if the task is successfully enqueued
291     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
292     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
293     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
294     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
295     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
296     //! \retval   CM_FAILURE otherwise
297     //!
298     CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
299                                                         const unsigned char *sysMem,
300                                                         const uint32_t widthStride,
301                                                         const uint32_t heightStride,
302                                                         const uint32_t option,
303                                                         CmEvent *& event) = 0;
304 
305     //!
306     //! \brief    Enqueue the kernel to copy memory from video memory to system memory with width and height stride.
307     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory.
308     //!           Depending on user "opiton", this is a non-blocking or blocking call.
309     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
310     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
311     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
312     //!           alignment restriction.
313     //! \param    [in] surface
314     //!           surface as copy source
315     //! \param    [in] sysMem
316     //!           system memory as copy destination, must be 16-Byte aligned
317     //! \param    [in] widthStride
318     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
319     //! \param    [in] heightStride
320     //!           height stride of memory stored in host memory, in bytes,
321     //! \param    [in] option
322     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
323     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
324     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
325     //! \param    [in,out] event
326     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
327     //!           its value returned by runtime is NULL.
328     //! \retval   CM_SUCCESS if the task is successfully enqueued
329     //! \retval   CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surface’s width in bytes.
330     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
331     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
332     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
333     //! \retval   CM_FAILURE otherwise
334     //!
335     CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
336                                                         unsigned char *sysMem,
337                                                         const uint32_t widthStride,
338                                                         const uint32_t heightStride,
339                                                         const uint32_t option,
340                                                         CmEvent *& event) = 0;
341 
342     //!
343     //! \brief    Enqueue the kernel to copy memory from system memory to video memory with width and height stride.
344     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface.
345     //!           Depending on user "opiton", this is a non-blocking or blocking call.
346     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
347     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
348     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
349     //!           alignment restriction.
350     //! \param    [in] surface
351     //!           surface as copy destination
352     //! \param    [in] sysMem
353     //!           system memory as copy source must be 16-Byte aligned
354     //! \param    [in] widthStride
355     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
356     //! \param    [in] heightStride
357     //!           height stride of memory stored in host memory, in bytes.
358     //! \param    [in] option
359     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
360     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
361     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
362     //! \param    [in,out] event
363     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
364     //!           its value returned by runtime is NULL.
365     //! \retval   CM_SUCCESS if the task is successfully enqueued
366     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
367     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
368     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
369     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
370     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
371     //! \retval   CM_FAILURE otherwise
372     //!
373     CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D *surface,
374                                                         const unsigned char *sysMem,
375                                                         const uint32_t widthStride,
376                                                         const uint32_t heightStride,
377                                                         const uint32_t option,
378                                                         CmEvent *& event) = 0;
379 
380     //!
381     //! \brief    Enqueue the kernel to copy memory from video memory to system memory with width and height stride.
382     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory.
383     //!           Depending on user "opiton", this is a non-blocking or blocking call.
384     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
385     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
386     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
387     //!           alignment restriction.
388     //! \param    [in] surface
389     //!           surface as copy source
390     //! \param    [in] sysMem
391     //!           system memory as copy destination, must be 16-Byte aligned
392     //! \param    [in] widthStride
393     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
394     //! \param    [in] heightStride
395     //!           height stride of memory stored in host memory, in bytes,
396     //! \param    [in] option
397     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
398     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
399     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
400     //! \param    [in,out] event
401     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
402     //!           its value returned by runtime is NULL.
403     //! \retval   CM_SUCCESS if the task is successfully enqueued
404     //! \retval   CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surface’s width in bytes.
405     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
406     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
407     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
408     //! \retval   CM_FAILURE otherwise
409     //!
410     CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D *surface,
411                                                         unsigned char *sysMem,
412                                                         const uint32_t widthStride,
413                                                         const uint32_t heightStride,
414                                                         const uint32_t option,
415                                                         CmEvent *& event) = 0;
416 
417     //!
418     //! \brief   Enqueue a task for execution with hints.
419     //! \details This API is designed to saturate the EUs when running a large dependency kernel.
420     //!          At least two kernels must exist in the task. The ideal case is at least one large dependency kernel
421     //!          running with smaller kernels. The idea is to get the smaller kernels for free during the time it already
422     //!          takes to execute the large dependency kernel. Each task can have up to CAP_KERNEL_COUNT_PER_TASK kernels.
423     //!          The 0th bit of the hints indicates to use media object or media walker. Currently, only media object is valid.
424     //!          The next bits indicate whether the next kernel is in the same or different kernel group.
425     //!          For example, if the 1th bit is set then the second kernel is in a different kernel group from the first kernel,
426     //!          if it is not set it is in the same kernel group. The kernels are interleaved between different kernel groups
427     //!          and run concurrently. Within a kernel group, the kernels are dispatched in order. The kernel groups are dispatched
428     //!          to separate sub-slices. The assumption is made that the kernel groups are comparable in kernel execution time.
429     //!          There can be no dependency between different kernels; all kernels in the task should be independent of one another.
430     //!          Additionally, pKernel->AssociateThreadSpace(CmThreadSpace*& pTS) must be called for each kernel.
431     //!          A CmEvent is generated  to check the status or other data regarding the task execution.
432     //!          To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function.
433     //! \param   [in] task
434     //!          pointer to task to submit
435     //! \param   [in,out] event
436     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
437     //!          its value returned by runtime is NULL.
438     //! \param   [in] hints
439     //!          Hints about work load from host to driver.
440     //! \retval  CM_SUCCESS if the task is successfully enqueued.
441     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
442     //! \retval  CM_FAILURE otherwise
443     //!
444     CM_RT_API virtual int32_t EnqueueWithHints(CmTask *task,
445                                            CmEvent *&event,
446                                            uint32_t hints = 0) = 0;
447 
448     //!
449     //! \brief   Enqueue a vebox task to vebox engine.
450     //! \details This call submits a VEBOX task to VEBOX engine for execution.
451     //!          Before this function is called, user need call CmDevice::CreateVebox() to create a CmVebox object,
452     //!          and call the APIs in CmVebox class to set up VEBOX state and surfaces.
453     //! \param   [in] vebox
454     //!          Pointer to a CmVebox object.
455     //! \param   [in,out] event
456     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
457     //!          its value returned by runtime is NULL.
458     //! \retval  CM_SUCCESS if the task is successfully enqueued.
459     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
460     //! \retval  CM_INVALID_ARG_VALUE if input pVebox is not valid
461     //! \retval  CM_FAILURE otherwise
462     //!
463     CM_RT_API virtual int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event) = 0;
464 
465     //!
466     //! \brief   Enqueue a task for execution with per-task thread space in a fast path.
467     //! \details This function enqueues a task represented by the CmTask object.
468     //!          The kernels in the CmTask object may be run concurrently.
469     //!          Tasks get executed according to the order they get enqueued.
470     //!          This is a non-blocking call. It returns immediately without waiting
471     //!          for GPU to start or finish execution. A CmEvent is generated each time
472     //!          a task is enqueued. The CmEvent can be used to check the status of task.
473     //!          The generated event needs to be managed and released by user.
474     //!          Since event is not useful in some cases, runtime provides the capability
475     //!          to avoid generating event.
476     //!          If thread space is valid, the dependency defined by thread space will be honored.
477     //! \param   [in] task
478     //!          pointer to task to submit
479     //! \param   [in,out] event
480     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
481     //!          its value returned by runtime is NULL.
482     //! \param   [in] threadSpace
483     //!          pointer to thread space which can define the thread dependency within the task.
484     //!          This is a per task thread space. If this task has multiple kernels, each kernel
485     //!          will have the thread space of same dimension, same dependency etc. If it is nullptr,
486     //!          there is no thread dependency and the maximum thread space width will be asssumed
487     //!          to calculate the coordinates for each thread. For each kernel , the per kernel thread space
488     //!          that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space.
489     //! \retval  CM_SUCCESS if the task is successfully enqueued.
490     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
491     //! \retval  CM_FAILURE otherwise
492     //!
493     CM_RT_API virtual int32_t EnqueueFast(CmTask *task,
494                               CmEvent *&event,
495                               const CmThreadSpace *threadSpace = nullptr) = 0;
496 
497     //!
498     //! \brief    Destroy the CmEvent generated by EnqueueFast.
499     //! \details  Destroy the event object previously generated by EnqueueFast.
500     //!           The CmEvent object can be destroyed even before the corresponding task flushed or finished.
501     //!           If this happens, there is no way the app can get the task status.
502     //! \param    [in] event
503     //!           reference to pointer to event
504     //! \retval   CM_SUCCESS if event destroyed successfully
505     //! \retval   CM_FAILURE otherwise
506     //!
507     CM_RT_API virtual int32_t DestroyEventFast(CmEvent *&event) = 0;
508 
509     //!
510     //! \brief    Enqueue the task with thread group space in a fast path.
511     //! \details
512     //! \param    [in]task
513     //!           pointer to task to submit
514     //! \param    [in,out] event
515     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
516     //!           its value returned by runtime is NULL.
517     //! \param    [in] threadGroupSpace
518     //!           pointer to thread group space which defines the dimensions of the task.
519     //!           pThreadGroupSpace  can not be NULL.
520     //! \retval   CM_SUCCESS if the task is successfully enqueued.
521     //! \retval   CM_INVALID_ARG_VALUE if input task is not valid
522     //! \retval   CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation.
523     //! \retval   CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid.
524     //! \retval   CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments
525     //!
526     CM_RT_API virtual int32_t EnqueueWithGroupFast(CmTask *task,
527                                   CmEvent *&event,
528                                   const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0;
529 
530     //!
531     //! \brief    Enqueue the kernel to copy memory from video memory buffer/1D surface to system memory.
532     //! \details  This function enqueues a task that contains a pre-defined kernel to copy from
533     //!           video memory buffer/1D surface to system memory. This is a non-blocking call.
534     //!           Buffer read copy task need to wait a CM wait_event to check dependent condition ready
535     //!           status before actual copy starts.
536     //!           Also a Cm notification event is generated each time a task is enqueued.
537     //!           The CmEvent can be used to check the status or other data regarding the task execution.
538     //! \param    [in] buffer
539     //!           CM Buffer as 1D surface is copy source
540     //! \param    [in] offset
541     //!           data copy starting address offset within CM buffer
542     //! \param    [in] sysMem
543     //!           system memory as copy destination, better to be 16-Byte aligned
544     //! \param    [in] sysMemSize
545     //!           data byte count to copy into system memory
546     //! \param    [in] wait_event
547     //!           a wait conditional event before read copy starts
548     //! \param    [in,out] event
549     //!           reference to pointer of CM event generated to notify buffer read copy task status change
550     //! \param    [in] option
551     //!           If it is none-zero, CPU worker thread will be used for buffer read copy
552     //! \retval   CM_SUCCESS if the task is successfully enqueued
553     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
554     //! \retval   CM_FAILURE otherwise
555     //!
556     CM_RT_API virtual int32_t EnqueueReadBuffer(CmBuffer* buffer,
557                                                 size_t offset,
558                                                 const unsigned char* sysMem,
559                                                 uint64_t sysMemSize,
560                                                 CmEvent* wait_event,
561                                                 CmEvent*& event,
562                                                 unsigned option = 0) = 0;
563 
564     //!
565     //! \brief    Enqueue the kernel to copy memory from to system memory to video memory buffer/1D surface
566     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from system
567     //!           memory to 1D surface.This is a non-blocking call.
568     //!           Buffer write copy task need to wait an CM wait_event to check condition ready status
569     //!           before actual copy starts.
570     //!           Also a Cm notification event is generated each time a task is enqueued.
571     //!           The CmEvent can be used to check the status or other data regarding the task execution.
572     //! \param    [in] buffer
573     //!           CM Buffer as 1D surface is copy destination
574     //! \param    [in] offset
575     //!           data copy starting address offset within CM buffer
576     //! \param    [in] sysMem
577     //!           system memory as copy source, better to be 16-Byte aligned
578     //! \param    [in] sysMemSize
579     //!           data byte count to copy from system memory
580     //! \param    [in] wait_event
581     //!           a wait conditional event before write copy starts
582     //! \param    [in,out] event
583     //!           reference to pointer of CM event generated to notify buffer write copy task status change
584     //! \param    [in] option
585     //!           If it is none-zero, CPU worker thread will be used for buffer copy
586     //! \retval   CM_SUCCESS if the task is successfully enqueued
587     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
588     //! \retval   CM_FAILURE otherwise
589     //!
590     CM_RT_API virtual int32_t EnqueueWriteBuffer(CmBuffer* buffer,
591                                                  size_t offset,
592                                                  const unsigned char* sysMem,
593                                                  uint64_t sysMemSize,
594                                                  CmEvent* wait_event,
595                                                  CmEvent*& event,
596                                                  unsigned option = 0) = 0;
597 
598 
599     //!
600     //! \brief    [Only In Emu Mode] set the resident group number and parallel thread number
601     //! \details
602     //! \param    [in] residentGroupNum
603     //!           number of resident groups running on device
604     //! \param    [in] parallelThreadNum
605     //!           number of threads run in parallel
606     //! \retval   CM_SUCCESS if the parameter is successfully set.
607     //! \retval   CM_NOT_IMPLEMENTED if in sim or emu mode
608     //!
609     CM_RT_API virtual int32_t SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum, uint32_t parallelThreadNum) = 0;
610 
611 protected:
612     virtual ~CmQueue() = default;
613 };
614 
615 #endif  // #ifndef CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_
616