1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "cm_queue.h"
23 #include "cm_debug.h"
24 #include "cm_device.h"
25 #include "cm_include.h"
26 #include "cm_mem.h"
27 #include "cm_timer.h"
28 
29 struct CM_CREATEQUEUE_PARAM
30 {
31     CM_QUEUE_CREATE_OPTION createOption; // [in/out]
32     void *cmQueueHandle;                 // [out]
33     int32_t returnValue;                 // [out]
34 };
35 
36 struct CM_ENQUEUE_PARAM
37 {
38     void *cmQueueHandle;        // [in]
39     void *cmTaskHandle;         // [in]
40     void *cmThreadSpaceHandle;  // [in]
41     void *cmEventHandle;        // [out]
42     uint32_t eventIndex;        // [out] index of Event in m_EventArray
43     int32_t returnValue;        // [out]
44 };
45 
46 struct CM_ENQUEUEGROUP_PARAM
47 {
48     void *cmQueueHandle;      // [in]
49     void *cmTaskHandle;       // [in]
50     void *cmTGrpSpaceHandle;  // [in]
51     void *cmEventHandle;      // [out]
52     uint32_t eventIndex;      // [out] index of Event in m_EventArray
53     int32_t returnValue;      // [out]
54 };
55 
56 struct CM_ENQUEUEHINTS_PARAM
57 {
58     void *cmQueueHandle;  // [in]
59     void *cmTaskHandle;   // [in]
60     void *cmEventHandle;  // [in]
61     uint32_t hints;      // [in]
62     uint32_t eventIndex;  // [out] index of Event in m_EventArray
63     int32_t returnValue;  // [out]
64 };
65 
66 struct CM_DESTROYEVENT_PARAM
67 {
68     void *cmQueueHandle;  // [in]
69     void *cmEventHandle;  // [in]
70     int32_t returnValue;  // [out]
71 };
72 
73 struct CM_ENQUEUE_GPUCOPY_V2V_PARAM
74 {
75     void *cmQueueHandle;   // [in]
76     void *cmSrcSurface2d;  // [in]
77     void *cmDstSurface2d;  // [in]
78     uint32_t option;       // [in]
79     void *cmEventHandle;   // [out]
80     uint32_t eventIndex;   // [out] index of Event in m_EventArray
81     int32_t returnValue;   // [out]
82 };
83 
84 struct CM_ENQUEUE_GPUCOPY_L2L_PARAM
85 {
86     void *cmQueueHandle;  // [in]
87     void *srcSysMem;      // [in]
88     void *dstSysMem;      // [in]
89     uint32_t copySize;    // [in]
90     uint32_t option;      // [in]
91     void *cmEventHandle;  // [out]
92     uint32_t eventIndex;  // [out] index of Event in m_EventArray
93     int32_t returnValue;  // [out]
94 };
95 
96 
97 struct CM_ENQUEUE_COPY_BUFFER_PARAM
98 {
99     void* cmQueueHandle;  // [in]
100     void* buffer;         // [in]
101     void* sysMem;         // [in]
102     uint32_t offset;      // [in]
103     uint64_t copySize;    // [in]
104     uint32_t copyDir;     // [in]
105     void* wait_event;     // [in]
106     void* cmEventHandle;  // [out]
107     uint32_t option;      // [in]
108     uint32_t eventIndex;  // [out] index of Event in m_EventArray
109     int32_t  returnValue; // [out]
110 };
111 
112 struct CM_ENQUEUE_2DInit_PARAM
113 {
114     void *cmQueueHandle;  // [in]
115     void *cmSurface2d;    // [in]
116     uint32_t initValue;   // [in]
117     void *cmEventHandle;  // [out]
118     uint32_t eventIndex;  // [out] index of Event in m_EventArray
119     int32_t returnValue;  // [out]
120 };
121 
122 struct CM_ENQUEUE_VEBOX_PARAM
123 {
124     void *cmQueueHandle;  // [IN]
125     void *cmVeboxHandle;  // [IN] CmVeboxG75's handle
126     void *cmEventHandle;  // [out] event's handle
127     uint32_t eventIndex;  // [out] event's index in  m_EventArray CMRT@UMD
128     int32_t returnValue;  // [out] return value
129 };
130 
Create(CmDevice_RT * device,CmQueue_RT * & queue,CM_QUEUE_CREATE_OPTION queueCreateOption)131 int32_t CmQueue_RT::Create(CmDevice_RT *device, CmQueue_RT *&queue, CM_QUEUE_CREATE_OPTION queueCreateOption)
132 {
133     int32_t result = CM_SUCCESS;
134     queue = new(std::nothrow) CmQueue_RT(device, queueCreateOption);
135     if (queue)
136     {
137         result = queue->Initialize(queueCreateOption);
138         if (result != CM_SUCCESS)
139         {
140             CmQueue_RT::Destroy(queue);
141         }
142     }
143     else
144     {
145         CmAssert(0);
146         result = CM_OUT_OF_HOST_MEMORY;
147     }
148     return result;
149 }
150 
Destroy(CmQueue_RT * & queue)151 int32_t CmQueue_RT::Destroy(CmQueue_RT *&queue)
152 {
153     CmSafeRelease(queue);
154     return CM_SUCCESS;
155 }
156 
CmQueue_RT(CmDevice_RT * device,CM_QUEUE_CREATE_OPTION queueCreateOption)157 CmQueue_RT::CmQueue_RT(CmDevice_RT *device, CM_QUEUE_CREATE_OPTION queueCreateOption):
158     m_cmDev(device),
159     m_cmQueueHandle(nullptr),
160     m_queueOption(queueCreateOption) {}
161 
~CmQueue_RT()162 CmQueue_RT::~CmQueue_RT() {}
163 
Initialize()164 int32_t CmQueue_RT::Initialize()
165 {
166     CM_CREATEQUEUE_PARAM inParam;
167     CmSafeMemSet(&inParam, 0, sizeof(inParam));
168 
169     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUE,
170                                                 &inParam, sizeof(inParam));
171     CHK_FAILURE_RETURN(hr);
172     CHK_FAILURE_RETURN(inParam.returnValue);
173     m_cmQueueHandle = inParam.cmQueueHandle;
174     m_queueOption   = inParam.createOption;
175     return CM_SUCCESS;
176 }
177 
Initialize(CM_QUEUE_CREATE_OPTION queueCreateOption)178 int32_t CmQueue_RT::Initialize(CM_QUEUE_CREATE_OPTION queueCreateOption)
179 {
180     CM_CREATEQUEUE_PARAM inParam;
181     CmSafeMemSet(&inParam, 0, sizeof(inParam));
182     inParam.createOption = queueCreateOption;
183 
184     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUEEX,
185                                                 &inParam, sizeof(inParam));
186     CHK_FAILURE_RETURN(hr);
187     CHK_FAILURE_RETURN(inParam.returnValue);
188     m_cmQueueHandle = inParam.cmQueueHandle;
189     return CM_SUCCESS;
190 }
191 
192 //!
193 //! Enqueue an task. Each task have one or more kernels running concurrently.
194 //! Each kernel can run in multiple threads concurrently.
195 //! Tasks get executed according to the order they get enqueued. The next task
196 //! doesn't start execute until the current task finishs.
197 //! When the last argument, pThreadSpace, is not nullptr, there are dependency among all threads within a task
198 //! Enqueue will make sure each x/y pair in the CmThreadSpace object is associated with
199 //! a unique thread in the task to enqueue.Otherwise enqueue will fail.
200 //! This is a non-blocking call. i.e. it returs immediately without waiting for
201 //! GPU to finish the execution of the task.
202 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
203 //! be used to check if the task finishs.
204 //! INPUT:
205 //!     1) Array of CmKernel_RT pointers. These kernels are to run concurrently. The
206 //!        first nullptr pointer in the array indicates the end of kernels
207 //!     2) Reference to the pointer to CMEvent
208 //!     3) A boolean value to indicate if or not to flush the queue after enqueue the task
209 //!        by default the boolean value is TRUE.
210 //! OUTPUT:
211 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
212 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
213 //!     CM_FAILURE otherwise.
214 //!     More error code is coming.
215 //!
Enqueue(CmTask * task,CmEvent * & event,const CmThreadSpace * threadSpace)216 CM_RT_API int32_t CmQueue_RT::Enqueue(CmTask *task,
217                                   CmEvent *&event,
218                                   const CmThreadSpace *threadSpace)
219 {
220     INSERT_PROFILER_RECORD();
221     if (task == nullptr)
222     {
223         CmAssert(0);
224         CmDebugMessage(("Kernel array is NULL."));
225         return CM_INVALID_ARG_VALUE;
226     }
227     m_criticalSection.Acquire();
228 
229     CM_ENQUEUE_PARAM inParam;
230     CmSafeMemSet(&inParam, 0, sizeof(inParam));
231     inParam.cmTaskHandle = task;
232     inParam.cmQueueHandle = m_cmQueueHandle;
233     inParam.cmThreadSpaceHandle = (void *)threadSpace;
234     inParam.cmEventHandle = event;  // to support invisiable event, this field is used for input/output.
235 
236     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUE,
237                                                 &inParam, sizeof(inParam));
238     if (FAILED(hr))
239     {
240         CmAssert(0);
241         m_criticalSection.Release();
242         return hr;
243     }
244     if (inParam.returnValue != CM_SUCCESS)
245     {
246         m_criticalSection.Release();
247         return inParam.returnValue;
248     }
249 
250     event = static_cast<CmEvent *>(inParam.cmEventHandle);
251     m_criticalSection.Release();
252     return CM_SUCCESS;
253 }
254 
EnqueueWithHints(CmTask * task,CmEvent * & event,uint32_t hints)255 CM_RT_API int32_t CmQueue_RT::EnqueueWithHints(CmTask *task,
256                                            CmEvent *&event,
257                                            uint32_t hints)
258 {
259     INSERT_PROFILER_RECORD();
260     if (task == nullptr)
261     {
262         CmAssert(0);
263         CmDebugMessage(("Kernel array is NULL."));
264         return CM_INVALID_ARG_VALUE;
265     }
266     m_criticalSection.Acquire();
267 
268     CM_ENQUEUEHINTS_PARAM inParam;
269     CmSafeMemSet(&inParam, 0, sizeof(inParam));
270     inParam.cmTaskHandle = task;
271     inParam.cmQueueHandle = m_cmQueueHandle;
272     inParam.hints = hints;
273     inParam.cmEventHandle = event;  // to support invisable event, this field is used for input/output
274     int32_t hr =
275         m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHHINTS,
276                                        &inParam, sizeof(inParam));
277     if (FAILED(hr))
278     {
279         CmAssert(0);
280         m_criticalSection.Release();
281         return hr;
282     }
283     if (inParam.returnValue != CM_SUCCESS)
284     {
285         m_criticalSection.Release();
286         return inParam.returnValue;
287     }
288 
289     event = static_cast<CmEvent *>(inParam.cmEventHandle);
290     m_criticalSection.Release();
291     return CM_SUCCESS;
292 }
293 
294 //!
295 //! Enqueue an task, which contains one pre-defined kernel to
296 //! copy from host memory to surface
297 //! This is a non-blocking call. i.e. it returs immediately without waiting for
298 //! GPU to finish the execution of the task.
299 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
300 //! be used to check if the task finishs.
301 //! INPUT:
302 //!     1) Pointer to the CmSurface2D_RT as copy destination
303 //!     2) Pointer to the host memory as copy source
304 //!     3) Reference to the pointer to CMEvent
305 //!     4) A boolean value to indicate if or not to flush the queue after enqueue the task
306 //!        by default the boolean value is TRUE.
307 //! OUTPUT:
308 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
309 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
310 //!     CM_FAILURE otherwise.
311 //!     More error code is coming.
312 //!
EnqueueCopyCPUToGPU(CmSurface2D * surface,const unsigned char * sysMem,CmEvent * & event)313 int32_t CmQueue_RT::EnqueueCopyCPUToGPU(CmSurface2D *surface,
314                                     const unsigned char *sysMem,
315                                     CmEvent *&event)
316 {
317     INSERT_PROFILER_RECORD();
318     return EnqueueCopy(surface,
319                        sysMem,
320                        0,
321                        0,
322                        CM_FASTCOPY_CPU2GPU,
323                        CM_FASTCOPY_OPTION_NONBLOCKING,
324                        event);
325 }
326 
327 //!
328 //! Enqueue an task, which contains one pre-defined kernel to
329 //! copy from surface to host memory
330 //! This is a non-blocking call. i.e. it returs immediately without waiting for
331 //! GPU to finish the execution of the task.
332 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
333 //! be used to check if the task finishs.
334 //! INPUT:
335 //!     1) Pointer to the CmSurface2D_RT as copy source
336 //!     2) Pointer to the host memory as copy destination
337 //!     3) Reference to the pointer to CMEvent
338 //!     4) A boolean value to indicate if or not to flush the queue after enqueue the task
339 //!        by default the boolean value is TRUE.
340 //! OUTPUT:
341 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
342 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
343 //!     CM_FAILURE otherwise.
344 //!     More error code is coming.
345 //!
EnqueueCopyGPUToCPU(CmSurface2D * surface,unsigned char * sysMem,CmEvent * & event)346 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPU(CmSurface2D *surface,
347                                               unsigned char *sysMem,
348                                               CmEvent *&event)
349 {
350     INSERT_PROFILER_RECORD();
351     return EnqueueCopy(surface,
352                        sysMem,
353                        0,
354                        0,
355                        CM_FASTCOPY_GPU2CPU,
356                        CM_FASTCOPY_OPTION_NONBLOCKING,
357                        event);
358 }
359 
360 //!
361 //! Enqueue an task, which contains one pre-defined kernel to
362 //! copy from linear system memory to tiled video memory
363 //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option,
364 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
365 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
366 //! be used to check if the task finishs.
367 //! INPUT:
368 //!     1) Pointer to the CmSurface2D as copy destination
369 //!     2) Pointer to the host memory as copy resource
370 //!     3) width stride in bytes for system memory
371 //!     4) height stride in rows for system memory
372 //!     5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST
373 //!     6) Reference to the pointer to CMEvent
374 //!
375 //! RETURNS:
376 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
377 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
378 //!     CM_FAILURE otherwise.
379 //!
380 CM_RT_API int32_t
EnqueueCopyCPUToGPUFullStride(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)381 CmQueue_RT::EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
382                                           const unsigned char *sysMem,
383                                           const uint32_t widthStride,
384                                           const uint32_t heightStride,
385                                           const uint32_t option,
386                                           CmEvent *&event)
387 {
388     INSERT_PROFILER_RECORD();
389     return EnqueueCopy(surface,
390                        sysMem,
391                        widthStride,
392                        heightStride,
393                        CM_FASTCOPY_CPU2GPU,
394                        option,
395                        event);
396 }
397 
398 //!
399 //! Enqueue an task, which contains one pre-defined kernel to
400 //! copy from tiled video memory to linear system memory
401 //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option,
402 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
403 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
404 //! be used to check if the task finishs.
405 //! INPUT:
406 //!     1) Pointer to the CmSurface2D as copy resource
407 //!     2) Pointer to the host memory as copy destination
408 //!     3) width stride in bytes for system memory
409 //!     4) height stride in rows for system memory
410 //!     5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING
411 //!     6) Reference to the pointer to CMEvent
412 //!
413 //! RETURNS:
414 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
415 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
416 //!     CM_FAILURE otherwise.
417 //!
EnqueueCopyGPUToCPUFullStride(CmSurface2D * surface,unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)418 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
419                                                         unsigned char *sysMem,
420                                                         const uint32_t widthStride,
421                                                         const uint32_t heightStride,
422                                                         const uint32_t option,
423                                                         CmEvent *&event)
424 {
425     INSERT_PROFILER_RECORD();
426     return EnqueueCopy(surface,
427                        sysMem,
428                        widthStride,
429                        heightStride,
430                        CM_FASTCOPY_GPU2CPU,
431                        option,
432                        event);
433 }
434 
435 //!
436 //! Enqueue an task, which contains one pre-defined kernel to
437 //! copy from linear system memory to tiled video memory
438 //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option,
439 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
440 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
441 //! be used to check if the task finishs.
442 //! INPUT:
443 //!     1) Pointer to the CmSurface2D as copy destination
444 //!     2) Pointer to the host memory as copy resource
445 //!     3) width stride in bytes for system memory
446 //!     4) height stride in rows for system memory
447 //!     5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST
448 //!     6) Reference to the pointer to CMEvent
449 //!
450 //! RETURNS:
451 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
452 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
453 //!     CM_FAILURE otherwise.
454 //!
455 CM_RT_API int32_t
EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)456 CmQueue_RT::EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D *surface,
457                                           const unsigned char *sysMem,
458                                           const uint32_t widthStride,
459                                           const uint32_t heightStride,
460                                           const uint32_t option,
461                                           CmEvent *&event)
462 {
463     INSERT_PROFILER_RECORD();
464     return EnqueueCopy(surface,
465                        sysMem,
466                        widthStride,
467                        heightStride,
468                        CM_FASTCOPY_CPU2GPU,
469                        option,
470                        event);
471 }
472 
473 //!
474 //! Enqueue an task, which contains one pre-defined kernel to
475 //! copy from tiled video memory to linear system memory
476 //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option,
477 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
478 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
479 //! be used to check if the task finishs.
480 //! INPUT:
481 //!     1) Pointer to the CmSurface2D as copy resource
482 //!     2) Pointer to the host memory as copy destination
483 //!     3) width stride in bytes for system memory
484 //!     4) height stride in rows for system memory
485 //!     5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING
486 //!     6) Reference to the pointer to CMEvent
487 //!
488 //! RETURNS:
489 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
490 //!     CM_OUT_OF_HOST_MEMORY if out of host memery;
491 //!     CM_FAILURE otherwise.
492 //!
EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D * surface,unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)493 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D *surface,
494                                                         unsigned char *sysMem,
495                                                         const uint32_t widthStride,
496                                                         const uint32_t heightStride,
497                                                         const uint32_t option,
498                                                         CmEvent *&event)
499 {
500     INSERT_PROFILER_RECORD();
501     return EnqueueCopy(surface,
502                        sysMem,
503                        widthStride,
504                        heightStride,
505                        CM_FASTCOPY_GPU2CPU,
506                        option,
507                        event);
508 }
509 
DestroyEvent(CmEvent * & event)510 CM_RT_API int32_t CmQueue_RT::DestroyEvent(CmEvent *&event)
511 {
512     INSERT_PROFILER_RECORD();
513     if (event == nullptr)
514     {
515         return CM_FAILURE;
516     }
517 
518     CM_DESTROYEVENT_PARAM inParam;
519     CmSafeMemSet(&inParam, 0, sizeof(inParam));
520     inParam.cmQueueHandle = m_cmQueueHandle;
521     inParam.cmEventHandle = event;
522 
523     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENT,
524                                                 &inParam, sizeof(inParam));
525     CHK_FAILURE_RETURN(hr);
526     CHK_FAILURE_RETURN(inParam.returnValue);
527     event = nullptr;
528     return CM_SUCCESS;
529 }
530 
531 //!
532 //! Function to enqueue task with thread group space pointer
533 //! Arguments:
534 //!     1. Pointer to CmTask, which can only contain one kernel.
535 //!     2. Reference to the pointer to CmEvent that is to be returned
536 //!     3. Pointer to a CmThreadGroupSpace.
537 //! Return Value:
538 //!     CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated
539 //!     CM_OUT_OF_HOST_MEMORY if out of host memory
540 //!     CM_FAILURE otherwise
541 //! Notes:
542 //!     If the kernel has per thread arg, GPGPU object is to be used.
543 //!     If the kernel has no per thread  arg. GPGPU walker is used.
544 CM_RT_API int32_t
EnqueueWithGroup(CmTask * task,CmEvent * & event,const CmThreadGroupSpace * threadGroupSpace)545 CmQueue_RT::EnqueueWithGroup(CmTask *task,
546                              CmEvent *&event,
547                              const CmThreadGroupSpace *threadGroupSpace)
548 {
549     INSERT_PROFILER_RECORD();
550     if (task == nullptr)
551     {
552         CmAssert(0);
553         CmDebugMessage(("Kernel array is NULL."));
554         return CM_INVALID_ARG_VALUE;
555     }
556     m_criticalSection.Acquire();
557 
558     CM_ENQUEUEGROUP_PARAM inParam;
559     CmSafeMemSet(&inParam, 0, sizeof(inParam));
560     inParam.cmTaskHandle = task;
561     inParam.cmQueueHandle = m_cmQueueHandle;
562     inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace;
563     inParam.cmEventHandle = event;  // to support invisiable event, this field is used for input/output.
564 
565     int32_t hr =
566         m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUP,
567                                        &inParam, sizeof(inParam));
568     if (FAILED(hr))
569     {
570         CmAssert(0);
571         m_criticalSection.Release();
572         return hr;
573     }
574     if (inParam.returnValue != CM_SUCCESS)
575     {
576         m_criticalSection.Release();
577         return inParam.returnValue;
578     }
579 
580     event = static_cast<CmEvent *>(inParam.cmEventHandle);
581     m_criticalSection.Release();
582     return CM_SUCCESS;
583 }
584 
EnqueueCopy(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,CM_FASTCOPY_DIRECTION direction,const uint32_t option,CmEvent * & event)585 int32_t CmQueue_RT::EnqueueCopy(CmSurface2D *surface,
586                             const unsigned char *sysMem,
587                             const uint32_t widthStride,
588                             const uint32_t heightStride,
589                             CM_FASTCOPY_DIRECTION direction,
590                             const uint32_t option,
591                             CmEvent *&event)
592 {
593     CM_ENQUEUE_GPUCOPY_PARAM inParam;
594     CmSafeMemSet(&inParam, 0, sizeof(inParam));
595     inParam.cmQueueHandle = m_cmQueueHandle;
596 
597     inParam.cmSurface2d = surface;
598     inParam.sysMem = (void *)sysMem;
599     inParam.copyDir = direction;
600     inParam.widthStride = widthStride;
601     inParam.heightStride = heightStride;
602     inParam.option = option;
603     inParam.cmEventHandle = event;
604 
605     m_criticalSection.Acquire();
606 
607     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY,
608                                                 &inParam, sizeof(inParam),
609                                                 nullptr, 0);
610     if (FAILED(hr))
611     {
612         CmAssert(0);
613         m_criticalSection.Release();
614         return hr;
615     }
616     if (inParam.returnValue != CM_SUCCESS)
617     {
618         m_criticalSection.Release();
619         return inParam.returnValue;
620     }
621 
622     event = static_cast<CmEvent *>(inParam.cmEventHandle);
623     m_criticalSection.Release();
624     return hr;
625 }
626 
EnqueueInitSurface2D(CmSurface2D * surface,const uint32_t initValue,CmEvent * & event)627 CM_RT_API int32_t CmQueue_RT::EnqueueInitSurface2D(CmSurface2D *surface,
628                                                const uint32_t initValue,
629                                                CmEvent *&event)
630 {
631     INSERT_PROFILER_RECORD();
632 
633     CM_ENQUEUE_2DInit_PARAM inParam;
634     CmSafeMemSet(&inParam, 0, sizeof(inParam));
635     inParam.cmQueueHandle = m_cmQueueHandle;
636     inParam.cmEventHandle = event;
637     inParam.cmSurface2d = surface;
638     inParam.initValue  = initValue;
639     m_criticalSection.Acquire();
640 
641     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUESURF2DINIT,
642                                                 &inParam, sizeof(inParam));
643     if (FAILED(hr))
644     {
645         CmAssert(0);
646         m_criticalSection.Release();
647         return hr;
648     }
649     if (inParam.returnValue != CM_SUCCESS)
650     {
651         m_criticalSection.Release();
652         return inParam.returnValue;
653     }
654 
655     event = static_cast<CmEvent *>(inParam.cmEventHandle);
656     m_criticalSection.Release();
657     return hr;
658 }
659 
EnqueueCopyGPUToGPU(CmSurface2D * outputSurface,CmSurface2D * inputSurface,uint32_t option,CmEvent * & event)660 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
661                                               CmSurface2D *inputSurface,
662                                               uint32_t option,
663                                               CmEvent *&event)
664 {
665     INSERT_PROFILER_RECORD();
666 
667     CM_ENQUEUE_GPUCOPY_V2V_PARAM inParam;
668     CmSafeMemSet(&inParam, 0, sizeof(inParam));
669     inParam.cmQueueHandle = m_cmQueueHandle;
670     inParam.option        = option;
671     inParam.cmEventHandle = event;
672     inParam.cmDstSurface2d = outputSurface;
673     inParam.cmSrcSurface2d = inputSurface;
674 
675     m_criticalSection.Acquire();
676 
677     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_V2V,
678                                                 &inParam, sizeof(inParam));
679     if (FAILED(hr))
680     {
681         CmAssert(0);
682         m_criticalSection.Release();
683         return hr;
684     }
685     if (inParam.returnValue != CM_SUCCESS)
686     {
687         m_criticalSection.Release();
688         return inParam.returnValue;
689     }
690 
691     event = static_cast<CmEvent *>(inParam.cmEventHandle);
692     m_criticalSection.Release();
693     return hr;
694 }
695 
EnqueueCopyCPUToCPU(unsigned char * dstSysMem,unsigned char * srcSysMem,uint32_t size,uint32_t option,CmEvent * & event)696 CM_RT_API int32_t CmQueue_RT::EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
697                                               unsigned char *srcSysMem,
698                                               uint32_t size,
699                                               uint32_t option,
700                                               CmEvent *&event)
701 {
702     INSERT_PROFILER_RECORD();
703 
704     CM_ENQUEUE_GPUCOPY_L2L_PARAM inParam;
705     CmSafeMemSet(&inParam, 0, sizeof(inParam));
706     inParam.cmQueueHandle = m_cmQueueHandle;
707     inParam.srcSysMem     = srcSysMem;
708     inParam.dstSysMem     = dstSysMem;
709     inParam.copySize       = size;
710     inParam.option        = option;
711     inParam.cmEventHandle = event;
712 
713     m_criticalSection.Acquire();
714 
715     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_L2L,
716                                                 &inParam, sizeof(inParam));
717 
718     if (FAILED(hr))
719     {
720         CmAssert(0);
721         m_criticalSection.Release();
722         return hr;
723     }
724     if (inParam.returnValue != CM_SUCCESS)
725     {
726         m_criticalSection.Release();
727         return inParam.returnValue;
728     }
729 
730     event = static_cast<CmEvent *>(inParam.cmEventHandle);
731     m_criticalSection.Release();
732     return hr;
733 }
734 
EnqueueVebox(CmVebox * vebox,CmEvent * & event)735 CM_RT_API int32_t CmQueue_RT::EnqueueVebox(CmVebox *vebox, CmEvent *&event)
736 {
737     INSERT_PROFILER_RECORD();
738 
739     CM_ENQUEUE_VEBOX_PARAM inParam;
740     CmSafeMemSet(&inParam, 0, sizeof(inParam));
741     inParam.cmQueueHandle = m_cmQueueHandle;
742     inParam.cmVeboxHandle = vebox;
743     inParam.cmEventHandle = event;
744 
745     m_criticalSection.Acquire();
746 
747     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEVEBOX,
748                                                 &inParam, sizeof(inParam));
749 
750     if (FAILED(hr))
751     {
752         CmAssert(0);
753         m_criticalSection.Release();
754         return hr;
755     }
756     if (inParam.returnValue != CM_SUCCESS)
757     {
758         m_criticalSection.Release();
759         return inParam.returnValue;
760     }
761 
762     event = static_cast<CmEvent *>(inParam.cmEventHandle);
763     m_criticalSection.Release();
764     return hr;
765 }
766 
GetQueueOption()767 CM_QUEUE_CREATE_OPTION CmQueue_RT::GetQueueOption()
768 {
769     return m_queueOption;
770 }
771 
EnqueueFast(CmTask * task,CmEvent * & event,const CmThreadSpace * threadSpace)772 CM_RT_API int32_t CmQueue_RT::EnqueueFast(CmTask *task,
773                               CmEvent *&event,
774                               const CmThreadSpace *threadSpace)
775 {
776     INSERT_PROFILER_RECORD();
777     if (task == nullptr)
778     {
779         CmAssert(0);
780         CmDebugMessage(("Kernel array is NULL."));
781         return CM_INVALID_ARG_VALUE;
782     }
783     m_criticalSection.Acquire();
784 
785     CM_ENQUEUE_PARAM inParam;
786     CmSafeMemSet(&inParam, 0, sizeof(inParam));
787     inParam.cmTaskHandle = task;
788     inParam.cmQueueHandle = m_cmQueueHandle;
789     inParam.cmThreadSpaceHandle = (void *)threadSpace;
790     inParam.cmEventHandle = event;  // to support invisiable event, this field is used for input/output.
791 
792     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEFAST,
793                                                 &inParam, sizeof(inParam));
794     if (FAILED(hr))
795     {
796         CmAssert(0);
797         m_criticalSection.Release();
798         return hr;
799     }
800     if (inParam.returnValue != CM_SUCCESS)
801     {
802         m_criticalSection.Release();
803         return inParam.returnValue;
804     }
805 
806     event = static_cast<CmEvent *>(inParam.cmEventHandle);
807     m_criticalSection.Release();
808     return CM_SUCCESS;
809 }
810 
EnqueueWithGroupFast(CmTask * task,CmEvent * & event,const CmThreadGroupSpace * threadGroupSpace)811 CM_RT_API int32_t CmQueue_RT::EnqueueWithGroupFast(CmTask *task,
812                               CmEvent *&event,
813                               const CmThreadGroupSpace *threadGroupSpace)
814 {
815     INSERT_PROFILER_RECORD();
816     if (task == nullptr)
817     {
818         CmAssert(0);
819         CmDebugMessage(("Kernel array is NULL."));
820         return CM_INVALID_ARG_VALUE;
821     }
822     m_criticalSection.Acquire();
823 
824     CM_ENQUEUEGROUP_PARAM inParam;
825     CmSafeMemSet(&inParam, 0, sizeof(inParam));
826     inParam.cmTaskHandle = task;
827     inParam.cmQueueHandle = m_cmQueueHandle;
828     inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace;
829     inParam.cmEventHandle = event;  // to support invisiable event, this field is used for input/output.
830 
831     int32_t hr =
832         m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUPFAST,
833                                        &inParam, sizeof(inParam));
834     if (FAILED(hr))
835     {
836         CmAssert(0);
837         m_criticalSection.Release();
838         return hr;
839     }
840     if (inParam.returnValue != CM_SUCCESS)
841     {
842         m_criticalSection.Release();
843         return inParam.returnValue;
844     }
845 
846     event = static_cast<CmEvent *>(inParam.cmEventHandle);
847     m_criticalSection.Release();
848     return CM_SUCCESS;
849 
850 }
851 
852 
DestroyEventFast(CmEvent * & event)853 CM_RT_API int32_t CmQueue_RT::DestroyEventFast(CmEvent *&event)
854 {
855     INSERT_PROFILER_RECORD();
856     if (event == nullptr)
857     {
858         return CM_INVALID_ARG_VALUE;
859     }
860 
861     CM_DESTROYEVENT_PARAM inParam;
862     CmSafeMemSet(&inParam, 0, sizeof(inParam));
863     inParam.cmQueueHandle = m_cmQueueHandle;
864     inParam.cmEventHandle = event;
865 
866     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENTFAST,
867                                                 &inParam, sizeof(inParam));
868     CHK_FAILURE_RETURN(hr);
869     CHK_FAILURE_RETURN(inParam.returnValue);
870     event = nullptr;
871     return CM_SUCCESS;
872 }
873 
SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum,uint32_t parallelThreadNum)874 CM_RT_API int32_t CmQueue_RT::SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum, uint32_t parallelThreadNum)
875 {
876     return CM_NOT_IMPLEMENTED;
877 }
878 
879 
EnqueueReadBuffer(CmBuffer * buffer,size_t offset,const unsigned char * sysMem,uint64_t sysMemSize,CmEvent * wait_event,CmEvent * & event,unsigned option)880 CM_RT_API int32_t CmQueue_RT::EnqueueReadBuffer(CmBuffer* buffer,
881                                                 size_t offset,
882                                                 const unsigned char* sysMem,
883                                                 uint64_t sysMemSize,
884                                                 CmEvent* wait_event,
885                                                 CmEvent*& event,
886                                                 unsigned option)
887 {
888     INSERT_PROFILER_RECORD();
889     CM_ENQUEUE_COPY_BUFFER_PARAM inParam;
890     CmSafeMemSet(&inParam, 0, sizeof(inParam));
891     inParam.cmQueueHandle = m_cmQueueHandle;
892     inParam.buffer = buffer;
893     inParam.sysMem = (void*)sysMem;
894     inParam.copySize = sysMemSize;
895     inParam.offset = offset;
896     inParam.copyDir = 0;
897     inParam.wait_event = wait_event;
898     inParam.option = option;
899     inParam.copyDir = CM_FASTCOPY_GPU2CPU;
900     inParam.cmEventHandle = event;
901 
902     m_criticalSection.Acquire();
903 
904     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER,
905         &inParam,
906         sizeof(inParam));
907     if (FAILED(hr))
908     {
909         CmAssert(0);
910         m_criticalSection.Release();
911         return hr;
912     }
913     if (inParam.returnValue != CM_SUCCESS)
914     {
915         m_criticalSection.Release();
916         return inParam.returnValue;
917     }
918 
919     event = static_cast<CmEvent*>(inParam.cmEventHandle);
920     m_criticalSection.Release();
921     return CM_SUCCESS;
922 }
923 
EnqueueWriteBuffer(CmBuffer * buffer,size_t offset,const unsigned char * sysMem,uint64_t sysMemSize,CmEvent * wait_event,CmEvent * & event,unsigned option)924 CM_RT_API int32_t CmQueue_RT::EnqueueWriteBuffer(CmBuffer* buffer,
925                                                  size_t offset,
926                                                  const unsigned char* sysMem,
927                                                  uint64_t sysMemSize,
928                                                  CmEvent* wait_event,
929                                                  CmEvent*& event,
930                                                  unsigned option)
931 {
932     INSERT_PROFILER_RECORD();
933     CM_ENQUEUE_COPY_BUFFER_PARAM inParam;
934     CmSafeMemSet(&inParam, 0, sizeof(inParam));
935     inParam.cmQueueHandle = m_cmQueueHandle;
936     inParam.buffer = buffer;
937     inParam.sysMem = (void*)sysMem;
938     inParam.copySize = sysMemSize;
939     inParam.offset = offset;
940     inParam.copyDir = 1;
941     inParam.wait_event = wait_event;
942     inParam.option = option;
943     inParam.copyDir = CM_FASTCOPY_CPU2GPU;
944     inParam.cmEventHandle = event;
945 
946     m_criticalSection.Acquire();
947 
948     int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER,
949         &inParam,
950         sizeof(inParam));
951     if (FAILED(hr))
952     {
953         CmAssert(0);
954         m_criticalSection.Release();
955         return hr;
956     }
957     if (inParam.returnValue != CM_SUCCESS)
958     {
959         m_criticalSection.Release();
960         return inParam.returnValue;
961     }
962 
963     event = static_cast<CmEvent*>(inParam.cmEventHandle);
964     m_criticalSection.Release();
965     return CM_SUCCESS;
966 }
967