1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "cm_queue.h"
23 #include "cm_debug.h"
24 #include "cm_device.h"
25 #include "cm_include.h"
26 #include "cm_mem.h"
27 #include "cm_timer.h"
28
29 struct CM_CREATEQUEUE_PARAM
30 {
31 CM_QUEUE_CREATE_OPTION createOption; // [in/out]
32 void *cmQueueHandle; // [out]
33 int32_t returnValue; // [out]
34 };
35
36 struct CM_ENQUEUE_PARAM
37 {
38 void *cmQueueHandle; // [in]
39 void *cmTaskHandle; // [in]
40 void *cmThreadSpaceHandle; // [in]
41 void *cmEventHandle; // [out]
42 uint32_t eventIndex; // [out] index of Event in m_EventArray
43 int32_t returnValue; // [out]
44 };
45
46 struct CM_ENQUEUEGROUP_PARAM
47 {
48 void *cmQueueHandle; // [in]
49 void *cmTaskHandle; // [in]
50 void *cmTGrpSpaceHandle; // [in]
51 void *cmEventHandle; // [out]
52 uint32_t eventIndex; // [out] index of Event in m_EventArray
53 int32_t returnValue; // [out]
54 };
55
56 struct CM_ENQUEUEHINTS_PARAM
57 {
58 void *cmQueueHandle; // [in]
59 void *cmTaskHandle; // [in]
60 void *cmEventHandle; // [in]
61 uint32_t hints; // [in]
62 uint32_t eventIndex; // [out] index of Event in m_EventArray
63 int32_t returnValue; // [out]
64 };
65
66 struct CM_DESTROYEVENT_PARAM
67 {
68 void *cmQueueHandle; // [in]
69 void *cmEventHandle; // [in]
70 int32_t returnValue; // [out]
71 };
72
73 struct CM_ENQUEUE_GPUCOPY_V2V_PARAM
74 {
75 void *cmQueueHandle; // [in]
76 void *cmSrcSurface2d; // [in]
77 void *cmDstSurface2d; // [in]
78 uint32_t option; // [in]
79 void *cmEventHandle; // [out]
80 uint32_t eventIndex; // [out] index of Event in m_EventArray
81 int32_t returnValue; // [out]
82 };
83
84 struct CM_ENQUEUE_GPUCOPY_L2L_PARAM
85 {
86 void *cmQueueHandle; // [in]
87 void *srcSysMem; // [in]
88 void *dstSysMem; // [in]
89 uint32_t copySize; // [in]
90 uint32_t option; // [in]
91 void *cmEventHandle; // [out]
92 uint32_t eventIndex; // [out] index of Event in m_EventArray
93 int32_t returnValue; // [out]
94 };
95
96
97 struct CM_ENQUEUE_COPY_BUFFER_PARAM
98 {
99 void* cmQueueHandle; // [in]
100 void* buffer; // [in]
101 void* sysMem; // [in]
102 uint32_t offset; // [in]
103 uint64_t copySize; // [in]
104 uint32_t copyDir; // [in]
105 void* wait_event; // [in]
106 void* cmEventHandle; // [out]
107 uint32_t option; // [in]
108 uint32_t eventIndex; // [out] index of Event in m_EventArray
109 int32_t returnValue; // [out]
110 };
111
112 struct CM_ENQUEUE_2DInit_PARAM
113 {
114 void *cmQueueHandle; // [in]
115 void *cmSurface2d; // [in]
116 uint32_t initValue; // [in]
117 void *cmEventHandle; // [out]
118 uint32_t eventIndex; // [out] index of Event in m_EventArray
119 int32_t returnValue; // [out]
120 };
121
122 struct CM_ENQUEUE_VEBOX_PARAM
123 {
124 void *cmQueueHandle; // [IN]
125 void *cmVeboxHandle; // [IN] CmVeboxG75's handle
126 void *cmEventHandle; // [out] event's handle
127 uint32_t eventIndex; // [out] event's index in m_EventArray CMRT@UMD
128 int32_t returnValue; // [out] return value
129 };
130
Create(CmDevice_RT * device,CmQueue_RT * & queue,CM_QUEUE_CREATE_OPTION queueCreateOption)131 int32_t CmQueue_RT::Create(CmDevice_RT *device, CmQueue_RT *&queue, CM_QUEUE_CREATE_OPTION queueCreateOption)
132 {
133 int32_t result = CM_SUCCESS;
134 queue = new(std::nothrow) CmQueue_RT(device, queueCreateOption);
135 if (queue)
136 {
137 result = queue->Initialize(queueCreateOption);
138 if (result != CM_SUCCESS)
139 {
140 CmQueue_RT::Destroy(queue);
141 }
142 }
143 else
144 {
145 CmAssert(0);
146 result = CM_OUT_OF_HOST_MEMORY;
147 }
148 return result;
149 }
150
Destroy(CmQueue_RT * & queue)151 int32_t CmQueue_RT::Destroy(CmQueue_RT *&queue)
152 {
153 CmSafeRelease(queue);
154 return CM_SUCCESS;
155 }
156
CmQueue_RT(CmDevice_RT * device,CM_QUEUE_CREATE_OPTION queueCreateOption)157 CmQueue_RT::CmQueue_RT(CmDevice_RT *device, CM_QUEUE_CREATE_OPTION queueCreateOption):
158 m_cmDev(device),
159 m_cmQueueHandle(nullptr),
160 m_queueOption(queueCreateOption) {}
161
~CmQueue_RT()162 CmQueue_RT::~CmQueue_RT() {}
163
Initialize()164 int32_t CmQueue_RT::Initialize()
165 {
166 CM_CREATEQUEUE_PARAM inParam;
167 CmSafeMemSet(&inParam, 0, sizeof(inParam));
168
169 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUE,
170 &inParam, sizeof(inParam));
171 CHK_FAILURE_RETURN(hr);
172 CHK_FAILURE_RETURN(inParam.returnValue);
173 m_cmQueueHandle = inParam.cmQueueHandle;
174 m_queueOption = inParam.createOption;
175 return CM_SUCCESS;
176 }
177
Initialize(CM_QUEUE_CREATE_OPTION queueCreateOption)178 int32_t CmQueue_RT::Initialize(CM_QUEUE_CREATE_OPTION queueCreateOption)
179 {
180 CM_CREATEQUEUE_PARAM inParam;
181 CmSafeMemSet(&inParam, 0, sizeof(inParam));
182 inParam.createOption = queueCreateOption;
183
184 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMDEVICE_CREATEQUEUEEX,
185 &inParam, sizeof(inParam));
186 CHK_FAILURE_RETURN(hr);
187 CHK_FAILURE_RETURN(inParam.returnValue);
188 m_cmQueueHandle = inParam.cmQueueHandle;
189 return CM_SUCCESS;
190 }
191
192 //!
193 //! Enqueue an task. Each task have one or more kernels running concurrently.
194 //! Each kernel can run in multiple threads concurrently.
195 //! Tasks get executed according to the order they get enqueued. The next task
196 //! doesn't start execute until the current task finishs.
197 //! When the last argument, pThreadSpace, is not nullptr, there are dependency among all threads within a task
198 //! Enqueue will make sure each x/y pair in the CmThreadSpace object is associated with
199 //! a unique thread in the task to enqueue.Otherwise enqueue will fail.
200 //! This is a non-blocking call. i.e. it returs immediately without waiting for
201 //! GPU to finish the execution of the task.
202 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
203 //! be used to check if the task finishs.
204 //! INPUT:
205 //! 1) Array of CmKernel_RT pointers. These kernels are to run concurrently. The
206 //! first nullptr pointer in the array indicates the end of kernels
207 //! 2) Reference to the pointer to CMEvent
208 //! 3) A boolean value to indicate if or not to flush the queue after enqueue the task
209 //! by default the boolean value is TRUE.
210 //! OUTPUT:
211 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
212 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
213 //! CM_FAILURE otherwise.
214 //! More error code is coming.
215 //!
Enqueue(CmTask * task,CmEvent * & event,const CmThreadSpace * threadSpace)216 CM_RT_API int32_t CmQueue_RT::Enqueue(CmTask *task,
217 CmEvent *&event,
218 const CmThreadSpace *threadSpace)
219 {
220 INSERT_PROFILER_RECORD();
221 if (task == nullptr)
222 {
223 CmAssert(0);
224 CmDebugMessage(("Kernel array is NULL."));
225 return CM_INVALID_ARG_VALUE;
226 }
227 m_criticalSection.Acquire();
228
229 CM_ENQUEUE_PARAM inParam;
230 CmSafeMemSet(&inParam, 0, sizeof(inParam));
231 inParam.cmTaskHandle = task;
232 inParam.cmQueueHandle = m_cmQueueHandle;
233 inParam.cmThreadSpaceHandle = (void *)threadSpace;
234 inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output.
235
236 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUE,
237 &inParam, sizeof(inParam));
238 if (FAILED(hr))
239 {
240 CmAssert(0);
241 m_criticalSection.Release();
242 return hr;
243 }
244 if (inParam.returnValue != CM_SUCCESS)
245 {
246 m_criticalSection.Release();
247 return inParam.returnValue;
248 }
249
250 event = static_cast<CmEvent *>(inParam.cmEventHandle);
251 m_criticalSection.Release();
252 return CM_SUCCESS;
253 }
254
EnqueueWithHints(CmTask * task,CmEvent * & event,uint32_t hints)255 CM_RT_API int32_t CmQueue_RT::EnqueueWithHints(CmTask *task,
256 CmEvent *&event,
257 uint32_t hints)
258 {
259 INSERT_PROFILER_RECORD();
260 if (task == nullptr)
261 {
262 CmAssert(0);
263 CmDebugMessage(("Kernel array is NULL."));
264 return CM_INVALID_ARG_VALUE;
265 }
266 m_criticalSection.Acquire();
267
268 CM_ENQUEUEHINTS_PARAM inParam;
269 CmSafeMemSet(&inParam, 0, sizeof(inParam));
270 inParam.cmTaskHandle = task;
271 inParam.cmQueueHandle = m_cmQueueHandle;
272 inParam.hints = hints;
273 inParam.cmEventHandle = event; // to support invisable event, this field is used for input/output
274 int32_t hr =
275 m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHHINTS,
276 &inParam, sizeof(inParam));
277 if (FAILED(hr))
278 {
279 CmAssert(0);
280 m_criticalSection.Release();
281 return hr;
282 }
283 if (inParam.returnValue != CM_SUCCESS)
284 {
285 m_criticalSection.Release();
286 return inParam.returnValue;
287 }
288
289 event = static_cast<CmEvent *>(inParam.cmEventHandle);
290 m_criticalSection.Release();
291 return CM_SUCCESS;
292 }
293
294 //!
295 //! Enqueue an task, which contains one pre-defined kernel to
296 //! copy from host memory to surface
297 //! This is a non-blocking call. i.e. it returs immediately without waiting for
298 //! GPU to finish the execution of the task.
299 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
300 //! be used to check if the task finishs.
301 //! INPUT:
302 //! 1) Pointer to the CmSurface2D_RT as copy destination
303 //! 2) Pointer to the host memory as copy source
304 //! 3) Reference to the pointer to CMEvent
305 //! 4) A boolean value to indicate if or not to flush the queue after enqueue the task
306 //! by default the boolean value is TRUE.
307 //! OUTPUT:
308 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
309 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
310 //! CM_FAILURE otherwise.
311 //! More error code is coming.
312 //!
EnqueueCopyCPUToGPU(CmSurface2D * surface,const unsigned char * sysMem,CmEvent * & event)313 int32_t CmQueue_RT::EnqueueCopyCPUToGPU(CmSurface2D *surface,
314 const unsigned char *sysMem,
315 CmEvent *&event)
316 {
317 INSERT_PROFILER_RECORD();
318 return EnqueueCopy(surface,
319 sysMem,
320 0,
321 0,
322 CM_FASTCOPY_CPU2GPU,
323 CM_FASTCOPY_OPTION_NONBLOCKING,
324 event);
325 }
326
327 //!
328 //! Enqueue an task, which contains one pre-defined kernel to
329 //! copy from surface to host memory
330 //! This is a non-blocking call. i.e. it returs immediately without waiting for
331 //! GPU to finish the execution of the task.
332 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
333 //! be used to check if the task finishs.
334 //! INPUT:
335 //! 1) Pointer to the CmSurface2D_RT as copy source
336 //! 2) Pointer to the host memory as copy destination
337 //! 3) Reference to the pointer to CMEvent
338 //! 4) A boolean value to indicate if or not to flush the queue after enqueue the task
339 //! by default the boolean value is TRUE.
340 //! OUTPUT:
341 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
342 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
343 //! CM_FAILURE otherwise.
344 //! More error code is coming.
345 //!
EnqueueCopyGPUToCPU(CmSurface2D * surface,unsigned char * sysMem,CmEvent * & event)346 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPU(CmSurface2D *surface,
347 unsigned char *sysMem,
348 CmEvent *&event)
349 {
350 INSERT_PROFILER_RECORD();
351 return EnqueueCopy(surface,
352 sysMem,
353 0,
354 0,
355 CM_FASTCOPY_GPU2CPU,
356 CM_FASTCOPY_OPTION_NONBLOCKING,
357 event);
358 }
359
360 //!
361 //! Enqueue an task, which contains one pre-defined kernel to
362 //! copy from linear system memory to tiled video memory
363 //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option,
364 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
365 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
366 //! be used to check if the task finishs.
367 //! INPUT:
368 //! 1) Pointer to the CmSurface2D as copy destination
369 //! 2) Pointer to the host memory as copy resource
370 //! 3) width stride in bytes for system memory
371 //! 4) height stride in rows for system memory
372 //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST
373 //! 6) Reference to the pointer to CMEvent
374 //!
375 //! RETURNS:
376 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
377 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
378 //! CM_FAILURE otherwise.
379 //!
380 CM_RT_API int32_t
EnqueueCopyCPUToGPUFullStride(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)381 CmQueue_RT::EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
382 const unsigned char *sysMem,
383 const uint32_t widthStride,
384 const uint32_t heightStride,
385 const uint32_t option,
386 CmEvent *&event)
387 {
388 INSERT_PROFILER_RECORD();
389 return EnqueueCopy(surface,
390 sysMem,
391 widthStride,
392 heightStride,
393 CM_FASTCOPY_CPU2GPU,
394 option,
395 event);
396 }
397
398 //!
399 //! Enqueue an task, which contains one pre-defined kernel to
400 //! copy from tiled video memory to linear system memory
401 //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option,
402 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
403 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
404 //! be used to check if the task finishs.
405 //! INPUT:
406 //! 1) Pointer to the CmSurface2D as copy resource
407 //! 2) Pointer to the host memory as copy destination
408 //! 3) width stride in bytes for system memory
409 //! 4) height stride in rows for system memory
410 //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING
411 //! 6) Reference to the pointer to CMEvent
412 //!
413 //! RETURNS:
414 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
415 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
416 //! CM_FAILURE otherwise.
417 //!
EnqueueCopyGPUToCPUFullStride(CmSurface2D * surface,unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)418 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
419 unsigned char *sysMem,
420 const uint32_t widthStride,
421 const uint32_t heightStride,
422 const uint32_t option,
423 CmEvent *&event)
424 {
425 INSERT_PROFILER_RECORD();
426 return EnqueueCopy(surface,
427 sysMem,
428 widthStride,
429 heightStride,
430 CM_FASTCOPY_GPU2CPU,
431 option,
432 event);
433 }
434
435 //!
436 //! Enqueue an task, which contains one pre-defined kernel to
437 //! copy from linear system memory to tiled video memory
438 //! This API supports both blocking/non-blocking copy, if user pass CM_GPUCOPY_OPTION_BLOCKING as option,
439 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
440 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
441 //! be used to check if the task finishs.
442 //! INPUT:
443 //! 1) Pointer to the CmSurface2D as copy destination
444 //! 2) Pointer to the host memory as copy resource
445 //! 3) width stride in bytes for system memory
446 //! 4) height stride in rows for system memory
447 //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING,CM_FASTCOPY_OPTION_BLOCKING or CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST
448 //! 6) Reference to the pointer to CMEvent
449 //!
450 //! RETURNS:
451 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
452 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
453 //! CM_FAILURE otherwise.
454 //!
455 CM_RT_API int32_t
EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)456 CmQueue_RT::EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D *surface,
457 const unsigned char *sysMem,
458 const uint32_t widthStride,
459 const uint32_t heightStride,
460 const uint32_t option,
461 CmEvent *&event)
462 {
463 INSERT_PROFILER_RECORD();
464 return EnqueueCopy(surface,
465 sysMem,
466 widthStride,
467 heightStride,
468 CM_FASTCOPY_CPU2GPU,
469 option,
470 event);
471 }
472
473 //!
474 //! Enqueue an task, which contains one pre-defined kernel to
475 //! copy from tiled video memory to linear system memory
476 //! This API supports both blocking/non-blocking copy, if user pass CM_FASTCOPY_OPTION_BLOCKING as option,
477 //! this API only return till copy operation is done. otherwise, this API will return immediately no waiting for copy in GPU.
478 //! A CmEvent is generated each time a task is enqueued. The CmEvent can
479 //! be used to check if the task finishs.
480 //! INPUT:
481 //! 1) Pointer to the CmSurface2D as copy resource
482 //! 2) Pointer to the host memory as copy destination
483 //! 3) width stride in bytes for system memory
484 //! 4) height stride in rows for system memory
485 //! 5) option: CM_FASTCOPY_OPTION_NONBLOCKING or CM_FASTCOPY_OPTION_BLOCKING
486 //! 6) Reference to the pointer to CMEvent
487 //!
488 //! RETURNS:
489 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated;
490 //! CM_OUT_OF_HOST_MEMORY if out of host memery;
491 //! CM_FAILURE otherwise.
492 //!
EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D * surface,unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,const uint32_t option,CmEvent * & event)493 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D *surface,
494 unsigned char *sysMem,
495 const uint32_t widthStride,
496 const uint32_t heightStride,
497 const uint32_t option,
498 CmEvent *&event)
499 {
500 INSERT_PROFILER_RECORD();
501 return EnqueueCopy(surface,
502 sysMem,
503 widthStride,
504 heightStride,
505 CM_FASTCOPY_GPU2CPU,
506 option,
507 event);
508 }
509
DestroyEvent(CmEvent * & event)510 CM_RT_API int32_t CmQueue_RT::DestroyEvent(CmEvent *&event)
511 {
512 INSERT_PROFILER_RECORD();
513 if (event == nullptr)
514 {
515 return CM_FAILURE;
516 }
517
518 CM_DESTROYEVENT_PARAM inParam;
519 CmSafeMemSet(&inParam, 0, sizeof(inParam));
520 inParam.cmQueueHandle = m_cmQueueHandle;
521 inParam.cmEventHandle = event;
522
523 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENT,
524 &inParam, sizeof(inParam));
525 CHK_FAILURE_RETURN(hr);
526 CHK_FAILURE_RETURN(inParam.returnValue);
527 event = nullptr;
528 return CM_SUCCESS;
529 }
530
531 //!
532 //! Function to enqueue task with thread group space pointer
533 //! Arguments:
534 //! 1. Pointer to CmTask, which can only contain one kernel.
535 //! 2. Reference to the pointer to CmEvent that is to be returned
536 //! 3. Pointer to a CmThreadGroupSpace.
537 //! Return Value:
538 //! CM_SUCCESS if the task is successfully enqueued and the CmEvent is generated
539 //! CM_OUT_OF_HOST_MEMORY if out of host memory
540 //! CM_FAILURE otherwise
541 //! Notes:
542 //! If the kernel has per thread arg, GPGPU object is to be used.
543 //! If the kernel has no per thread arg. GPGPU walker is used.
544 CM_RT_API int32_t
EnqueueWithGroup(CmTask * task,CmEvent * & event,const CmThreadGroupSpace * threadGroupSpace)545 CmQueue_RT::EnqueueWithGroup(CmTask *task,
546 CmEvent *&event,
547 const CmThreadGroupSpace *threadGroupSpace)
548 {
549 INSERT_PROFILER_RECORD();
550 if (task == nullptr)
551 {
552 CmAssert(0);
553 CmDebugMessage(("Kernel array is NULL."));
554 return CM_INVALID_ARG_VALUE;
555 }
556 m_criticalSection.Acquire();
557
558 CM_ENQUEUEGROUP_PARAM inParam;
559 CmSafeMemSet(&inParam, 0, sizeof(inParam));
560 inParam.cmTaskHandle = task;
561 inParam.cmQueueHandle = m_cmQueueHandle;
562 inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace;
563 inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output.
564
565 int32_t hr =
566 m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUP,
567 &inParam, sizeof(inParam));
568 if (FAILED(hr))
569 {
570 CmAssert(0);
571 m_criticalSection.Release();
572 return hr;
573 }
574 if (inParam.returnValue != CM_SUCCESS)
575 {
576 m_criticalSection.Release();
577 return inParam.returnValue;
578 }
579
580 event = static_cast<CmEvent *>(inParam.cmEventHandle);
581 m_criticalSection.Release();
582 return CM_SUCCESS;
583 }
584
EnqueueCopy(CmSurface2D * surface,const unsigned char * sysMem,const uint32_t widthStride,const uint32_t heightStride,CM_FASTCOPY_DIRECTION direction,const uint32_t option,CmEvent * & event)585 int32_t CmQueue_RT::EnqueueCopy(CmSurface2D *surface,
586 const unsigned char *sysMem,
587 const uint32_t widthStride,
588 const uint32_t heightStride,
589 CM_FASTCOPY_DIRECTION direction,
590 const uint32_t option,
591 CmEvent *&event)
592 {
593 CM_ENQUEUE_GPUCOPY_PARAM inParam;
594 CmSafeMemSet(&inParam, 0, sizeof(inParam));
595 inParam.cmQueueHandle = m_cmQueueHandle;
596
597 inParam.cmSurface2d = surface;
598 inParam.sysMem = (void *)sysMem;
599 inParam.copyDir = direction;
600 inParam.widthStride = widthStride;
601 inParam.heightStride = heightStride;
602 inParam.option = option;
603 inParam.cmEventHandle = event;
604
605 m_criticalSection.Acquire();
606
607 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY,
608 &inParam, sizeof(inParam),
609 nullptr, 0);
610 if (FAILED(hr))
611 {
612 CmAssert(0);
613 m_criticalSection.Release();
614 return hr;
615 }
616 if (inParam.returnValue != CM_SUCCESS)
617 {
618 m_criticalSection.Release();
619 return inParam.returnValue;
620 }
621
622 event = static_cast<CmEvent *>(inParam.cmEventHandle);
623 m_criticalSection.Release();
624 return hr;
625 }
626
EnqueueInitSurface2D(CmSurface2D * surface,const uint32_t initValue,CmEvent * & event)627 CM_RT_API int32_t CmQueue_RT::EnqueueInitSurface2D(CmSurface2D *surface,
628 const uint32_t initValue,
629 CmEvent *&event)
630 {
631 INSERT_PROFILER_RECORD();
632
633 CM_ENQUEUE_2DInit_PARAM inParam;
634 CmSafeMemSet(&inParam, 0, sizeof(inParam));
635 inParam.cmQueueHandle = m_cmQueueHandle;
636 inParam.cmEventHandle = event;
637 inParam.cmSurface2d = surface;
638 inParam.initValue = initValue;
639 m_criticalSection.Acquire();
640
641 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUESURF2DINIT,
642 &inParam, sizeof(inParam));
643 if (FAILED(hr))
644 {
645 CmAssert(0);
646 m_criticalSection.Release();
647 return hr;
648 }
649 if (inParam.returnValue != CM_SUCCESS)
650 {
651 m_criticalSection.Release();
652 return inParam.returnValue;
653 }
654
655 event = static_cast<CmEvent *>(inParam.cmEventHandle);
656 m_criticalSection.Release();
657 return hr;
658 }
659
EnqueueCopyGPUToGPU(CmSurface2D * outputSurface,CmSurface2D * inputSurface,uint32_t option,CmEvent * & event)660 CM_RT_API int32_t CmQueue_RT::EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
661 CmSurface2D *inputSurface,
662 uint32_t option,
663 CmEvent *&event)
664 {
665 INSERT_PROFILER_RECORD();
666
667 CM_ENQUEUE_GPUCOPY_V2V_PARAM inParam;
668 CmSafeMemSet(&inParam, 0, sizeof(inParam));
669 inParam.cmQueueHandle = m_cmQueueHandle;
670 inParam.option = option;
671 inParam.cmEventHandle = event;
672 inParam.cmDstSurface2d = outputSurface;
673 inParam.cmSrcSurface2d = inputSurface;
674
675 m_criticalSection.Acquire();
676
677 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_V2V,
678 &inParam, sizeof(inParam));
679 if (FAILED(hr))
680 {
681 CmAssert(0);
682 m_criticalSection.Release();
683 return hr;
684 }
685 if (inParam.returnValue != CM_SUCCESS)
686 {
687 m_criticalSection.Release();
688 return inParam.returnValue;
689 }
690
691 event = static_cast<CmEvent *>(inParam.cmEventHandle);
692 m_criticalSection.Release();
693 return hr;
694 }
695
EnqueueCopyCPUToCPU(unsigned char * dstSysMem,unsigned char * srcSysMem,uint32_t size,uint32_t option,CmEvent * & event)696 CM_RT_API int32_t CmQueue_RT::EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
697 unsigned char *srcSysMem,
698 uint32_t size,
699 uint32_t option,
700 CmEvent *&event)
701 {
702 INSERT_PROFILER_RECORD();
703
704 CM_ENQUEUE_GPUCOPY_L2L_PARAM inParam;
705 CmSafeMemSet(&inParam, 0, sizeof(inParam));
706 inParam.cmQueueHandle = m_cmQueueHandle;
707 inParam.srcSysMem = srcSysMem;
708 inParam.dstSysMem = dstSysMem;
709 inParam.copySize = size;
710 inParam.option = option;
711 inParam.cmEventHandle = event;
712
713 m_criticalSection.Acquire();
714
715 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_L2L,
716 &inParam, sizeof(inParam));
717
718 if (FAILED(hr))
719 {
720 CmAssert(0);
721 m_criticalSection.Release();
722 return hr;
723 }
724 if (inParam.returnValue != CM_SUCCESS)
725 {
726 m_criticalSection.Release();
727 return inParam.returnValue;
728 }
729
730 event = static_cast<CmEvent *>(inParam.cmEventHandle);
731 m_criticalSection.Release();
732 return hr;
733 }
734
EnqueueVebox(CmVebox * vebox,CmEvent * & event)735 CM_RT_API int32_t CmQueue_RT::EnqueueVebox(CmVebox *vebox, CmEvent *&event)
736 {
737 INSERT_PROFILER_RECORD();
738
739 CM_ENQUEUE_VEBOX_PARAM inParam;
740 CmSafeMemSet(&inParam, 0, sizeof(inParam));
741 inParam.cmQueueHandle = m_cmQueueHandle;
742 inParam.cmVeboxHandle = vebox;
743 inParam.cmEventHandle = event;
744
745 m_criticalSection.Acquire();
746
747 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEVEBOX,
748 &inParam, sizeof(inParam));
749
750 if (FAILED(hr))
751 {
752 CmAssert(0);
753 m_criticalSection.Release();
754 return hr;
755 }
756 if (inParam.returnValue != CM_SUCCESS)
757 {
758 m_criticalSection.Release();
759 return inParam.returnValue;
760 }
761
762 event = static_cast<CmEvent *>(inParam.cmEventHandle);
763 m_criticalSection.Release();
764 return hr;
765 }
766
GetQueueOption()767 CM_QUEUE_CREATE_OPTION CmQueue_RT::GetQueueOption()
768 {
769 return m_queueOption;
770 }
771
EnqueueFast(CmTask * task,CmEvent * & event,const CmThreadSpace * threadSpace)772 CM_RT_API int32_t CmQueue_RT::EnqueueFast(CmTask *task,
773 CmEvent *&event,
774 const CmThreadSpace *threadSpace)
775 {
776 INSERT_PROFILER_RECORD();
777 if (task == nullptr)
778 {
779 CmAssert(0);
780 CmDebugMessage(("Kernel array is NULL."));
781 return CM_INVALID_ARG_VALUE;
782 }
783 m_criticalSection.Acquire();
784
785 CM_ENQUEUE_PARAM inParam;
786 CmSafeMemSet(&inParam, 0, sizeof(inParam));
787 inParam.cmTaskHandle = task;
788 inParam.cmQueueHandle = m_cmQueueHandle;
789 inParam.cmThreadSpaceHandle = (void *)threadSpace;
790 inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output.
791
792 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEFAST,
793 &inParam, sizeof(inParam));
794 if (FAILED(hr))
795 {
796 CmAssert(0);
797 m_criticalSection.Release();
798 return hr;
799 }
800 if (inParam.returnValue != CM_SUCCESS)
801 {
802 m_criticalSection.Release();
803 return inParam.returnValue;
804 }
805
806 event = static_cast<CmEvent *>(inParam.cmEventHandle);
807 m_criticalSection.Release();
808 return CM_SUCCESS;
809 }
810
EnqueueWithGroupFast(CmTask * task,CmEvent * & event,const CmThreadGroupSpace * threadGroupSpace)811 CM_RT_API int32_t CmQueue_RT::EnqueueWithGroupFast(CmTask *task,
812 CmEvent *&event,
813 const CmThreadGroupSpace *threadGroupSpace)
814 {
815 INSERT_PROFILER_RECORD();
816 if (task == nullptr)
817 {
818 CmAssert(0);
819 CmDebugMessage(("Kernel array is NULL."));
820 return CM_INVALID_ARG_VALUE;
821 }
822 m_criticalSection.Acquire();
823
824 CM_ENQUEUEGROUP_PARAM inParam;
825 CmSafeMemSet(&inParam, 0, sizeof(inParam));
826 inParam.cmTaskHandle = task;
827 inParam.cmQueueHandle = m_cmQueueHandle;
828 inParam.cmTGrpSpaceHandle = (void *)threadGroupSpace;
829 inParam.cmEventHandle = event; // to support invisiable event, this field is used for input/output.
830
831 int32_t hr =
832 m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUEWITHGROUPFAST,
833 &inParam, sizeof(inParam));
834 if (FAILED(hr))
835 {
836 CmAssert(0);
837 m_criticalSection.Release();
838 return hr;
839 }
840 if (inParam.returnValue != CM_SUCCESS)
841 {
842 m_criticalSection.Release();
843 return inParam.returnValue;
844 }
845
846 event = static_cast<CmEvent *>(inParam.cmEventHandle);
847 m_criticalSection.Release();
848 return CM_SUCCESS;
849
850 }
851
852
DestroyEventFast(CmEvent * & event)853 CM_RT_API int32_t CmQueue_RT::DestroyEventFast(CmEvent *&event)
854 {
855 INSERT_PROFILER_RECORD();
856 if (event == nullptr)
857 {
858 return CM_INVALID_ARG_VALUE;
859 }
860
861 CM_DESTROYEVENT_PARAM inParam;
862 CmSafeMemSet(&inParam, 0, sizeof(inParam));
863 inParam.cmQueueHandle = m_cmQueueHandle;
864 inParam.cmEventHandle = event;
865
866 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_DESTROYEVENTFAST,
867 &inParam, sizeof(inParam));
868 CHK_FAILURE_RETURN(hr);
869 CHK_FAILURE_RETURN(inParam.returnValue);
870 event = nullptr;
871 return CM_SUCCESS;
872 }
873
SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum,uint32_t parallelThreadNum)874 CM_RT_API int32_t CmQueue_RT::SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum, uint32_t parallelThreadNum)
875 {
876 return CM_NOT_IMPLEMENTED;
877 }
878
879
EnqueueReadBuffer(CmBuffer * buffer,size_t offset,const unsigned char * sysMem,uint64_t sysMemSize,CmEvent * wait_event,CmEvent * & event,unsigned option)880 CM_RT_API int32_t CmQueue_RT::EnqueueReadBuffer(CmBuffer* buffer,
881 size_t offset,
882 const unsigned char* sysMem,
883 uint64_t sysMemSize,
884 CmEvent* wait_event,
885 CmEvent*& event,
886 unsigned option)
887 {
888 INSERT_PROFILER_RECORD();
889 CM_ENQUEUE_COPY_BUFFER_PARAM inParam;
890 CmSafeMemSet(&inParam, 0, sizeof(inParam));
891 inParam.cmQueueHandle = m_cmQueueHandle;
892 inParam.buffer = buffer;
893 inParam.sysMem = (void*)sysMem;
894 inParam.copySize = sysMemSize;
895 inParam.offset = offset;
896 inParam.copyDir = 0;
897 inParam.wait_event = wait_event;
898 inParam.option = option;
899 inParam.copyDir = CM_FASTCOPY_GPU2CPU;
900 inParam.cmEventHandle = event;
901
902 m_criticalSection.Acquire();
903
904 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER,
905 &inParam,
906 sizeof(inParam));
907 if (FAILED(hr))
908 {
909 CmAssert(0);
910 m_criticalSection.Release();
911 return hr;
912 }
913 if (inParam.returnValue != CM_SUCCESS)
914 {
915 m_criticalSection.Release();
916 return inParam.returnValue;
917 }
918
919 event = static_cast<CmEvent*>(inParam.cmEventHandle);
920 m_criticalSection.Release();
921 return CM_SUCCESS;
922 }
923
EnqueueWriteBuffer(CmBuffer * buffer,size_t offset,const unsigned char * sysMem,uint64_t sysMemSize,CmEvent * wait_event,CmEvent * & event,unsigned option)924 CM_RT_API int32_t CmQueue_RT::EnqueueWriteBuffer(CmBuffer* buffer,
925 size_t offset,
926 const unsigned char* sysMem,
927 uint64_t sysMemSize,
928 CmEvent* wait_event,
929 CmEvent*& event,
930 unsigned option)
931 {
932 INSERT_PROFILER_RECORD();
933 CM_ENQUEUE_COPY_BUFFER_PARAM inParam;
934 CmSafeMemSet(&inParam, 0, sizeof(inParam));
935 inParam.cmQueueHandle = m_cmQueueHandle;
936 inParam.buffer = buffer;
937 inParam.sysMem = (void*)sysMem;
938 inParam.copySize = sysMemSize;
939 inParam.offset = offset;
940 inParam.copyDir = 1;
941 inParam.wait_event = wait_event;
942 inParam.option = option;
943 inParam.copyDir = CM_FASTCOPY_CPU2GPU;
944 inParam.cmEventHandle = event;
945
946 m_criticalSection.Acquire();
947
948 int32_t hr = m_cmDev->OSALExtensionExecute(CM_FN_CMQUEUE_ENQUEUECOPY_BUFFER,
949 &inParam,
950 sizeof(inParam));
951 if (FAILED(hr))
952 {
953 CmAssert(0);
954 m_criticalSection.Release();
955 return hr;
956 }
957 if (inParam.returnValue != CM_SUCCESS)
958 {
959 m_criticalSection.Release();
960 return inParam.returnValue;
961 }
962
963 event = static_cast<CmEvent*>(inParam.cmEventHandle);
964 m_criticalSection.Release();
965 return CM_SUCCESS;
966 }
967