1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Wei Lin<wei.w.lin@intel.com>
26 * Yuting Yang<yuting.yang@intel.com>
27 * Lina Sun<lina.sun@intel.com>
28 */
29
30 #include "cm_queue.h"
31 #include "cm_device.h"
32 #include "cm_event.h"
33 #include "cm_task.h"
34 #include "cm_task_internal.h"
35 #include "cm_thread_space.h"
36 #include "cm_kernel.h"
37 #include "cm_kernel_data.h"
38 #include "cm_buffer.h"
39 #include "cm_group_space.h"
40 #include "cm_def.h"
41 #include "hal_cm.h"
42 #include "cm_surface_manager.h"
43 #include <sys/time.h>
44
Create(CmDevice_RT * pDevice,CmQueue_RT * & pQueue)45 INT CmQueue_RT::Create(CmDevice_RT * pDevice, CmQueue_RT * &pQueue)
46 {
47 INT result = CM_SUCCESS;
48 pQueue = new(std::nothrow) CmQueue_RT(pDevice);
49 if (pQueue) {
50 result = pQueue->Initialize();
51 if (result != CM_SUCCESS) {
52 CmQueue_RT::Destroy(pQueue);
53 }
54 } else {
55 CM_ASSERT(0);
56 result = CM_OUT_OF_HOST_MEMORY;
57 }
58 return result;
59 }
60
Destroy(CmQueue_RT * & pQueue)61 INT CmQueue_RT::Destroy(CmQueue_RT * &pQueue)
62 {
63 if (pQueue == NULL) {
64 return CM_FAILURE;
65 }
66
67 UINT result = pQueue->CleanQueue();
68 CmSafeDelete(pQueue);
69
70 return result;
71 }
72
CmQueue_RT(CmDevice_RT * pDevice)73 CmQueue_RT::CmQueue_RT(CmDevice_RT * pDevice):
74 m_pDevice(pDevice),
75 m_EventArray(CM_INIT_EVENT_COUNT), m_EventCount(0), m_pHalMaxValues(NULL)
76 {
77
78 }
79
~CmQueue_RT(void)80 CmQueue_RT::~CmQueue_RT(void)
81 {
82 UINT EventReleaseTimes = 0;
83
84 m_FlushedTasks.DeleteFreePool();
85
86 UINT EventArrayUsedSize = m_EventArray.GetMaxSize();
87 for (UINT i = 0; i < EventArrayUsedSize; i++) {
88 CmEvent_RT *pEvent = (CmEvent_RT *) m_EventArray.GetElement(i);
89 EventReleaseTimes = 0;
90 while (pEvent) {
91 if (EventReleaseTimes > 2) {
92 CM_ASSERT(0);
93 break;
94 }
95 CmEvent_RT::Destroy(pEvent);
96 EventReleaseTimes++;
97 }
98 }
99 m_EventArray.Delete();
100
101 }
102
Initialize(void)103 INT CmQueue_RT::Initialize(void)
104 {
105 CM_HAL_MAX_VALUES_EX *pHalMaxValuesEx = NULL;
106 m_pDevice->GetHalMaxValues(m_pHalMaxValues, pHalMaxValuesEx);
107 return CM_SUCCESS;
108 }
109
GetTaskHasThreadArg(CmKernel * pKernelArray[],UINT numKernels,BOOLEAN & threadArgExists)110 INT CmQueue_RT::GetTaskHasThreadArg(CmKernel * pKernelArray[], UINT numKernels,
111 BOOLEAN & threadArgExists)
112 {
113 threadArgExists = FALSE;
114
115 for (UINT iKrn = 0; iKrn < numKernels; iKrn++) {
116 if (!pKernelArray[iKrn]) {
117 CM_ASSERT(0);
118 return CM_FAILURE;
119 }
120
121 if ((static_cast<CmKernel_RT *>(pKernelArray[iKrn]))->IsThreadArgExisted()) {
122 threadArgExists = TRUE;
123 break;
124 }
125 }
126
127 return CM_SUCCESS;
128 }
129
130 CM_RT_API INT
Enqueue(CmTask * pKernelArray,CmEvent * & pEvent,const CmThreadSpace * pTS)131 CmQueue_RT::Enqueue(CmTask * pKernelArray,
132 CmEvent * &pEvent, const CmThreadSpace * pTS)
133 {
134 INT result;
135
136 if (pKernelArray == NULL) {
137 CM_ASSERT(0);
138 return CM_INVALID_ARG_VALUE;
139 }
140
141 UINT KernelCount = 0;
142 CmTask_RT* pKernelArray_RT = static_cast<CmTask_RT*>(pKernelArray);
143 KernelCount = pKernelArray_RT->GetKernelCount();
144 if (KernelCount == 0) {
145 CM_ASSERT(0);
146 return CM_FAILURE;
147 }
148
149 if (KernelCount > m_pHalMaxValues->iMaxKernelsPerTask) {
150 CM_ASSERT(0);
151 return CM_EXCEED_MAX_KERNEL_PER_ENQUEUE;
152 }
153
154 if (pTS && pTS->IsThreadAssociated()) {
155 if (pTS->GetNeedSetKernelPointer()
156 && pTS->KernelPointerIsNULL()) {
157 CmKernel *pTmp = NULL;
158 pTmp = pKernelArray_RT->GetKernelPointer(0);
159 pTS->SetKernelPointer(pTmp);
160 }
161 }
162
163 typedef CmKernel_RT *pCmKernel;
164 CmKernel_RT **pTmp = new(std::nothrow) pCmKernel[KernelCount + 1];
165 if (pTmp == NULL) {
166 CM_ASSERT(0);
167 return CM_OUT_OF_HOST_MEMORY;
168 }
169
170 UINT totalThreadNumber = 0;
171 for (UINT i = 0; i < KernelCount; i++) {
172 pTmp[i] = (static_cast<CmKernel_RT*>(pKernelArray_RT->GetKernelPointer(i)));
173
174 UINT singleThreadNumber = 0;
175 pTmp[i]->GetThreadCount(singleThreadNumber);
176 totalThreadNumber += singleThreadNumber;
177 }
178 pTmp[KernelCount] = NULL;
179
180 result =
181 Enqueue_RT(pTmp, KernelCount, totalThreadNumber, pEvent, pTS,
182 pKernelArray_RT->GetSyncBitmap(),
183 pKernelArray_RT->GetPowerOption());
184 if (pEvent) {
185 CmEvent_RT *pEvent_RT = (static_cast<CmEvent_RT*>(pEvent));
186 pEvent_RT->SetKernelNames(pKernelArray,
187 const_cast < CmThreadSpace * >(pTS),
188 NULL);
189 }
190
191 CmSafeDeleteArray(pTmp);
192
193 return result;
194 }
195
Enqueue_RT(CmKernel_RT * pKernelArray_RT[],const UINT uiKernelCount,const UINT uiTotalThreadCount,CmEvent * & pEvent,const CmThreadSpace * pTS,UINT64 uiSyncBitmap,PCM_HAL_POWER_OPTION_PARAM pPowerOption)196 INT CmQueue_RT::Enqueue_RT(CmKernel_RT * pKernelArray_RT[],
197 const UINT uiKernelCount,
198 const UINT uiTotalThreadCount,
199 CmEvent * &pEvent,
200 const CmThreadSpace * pTS,
201 UINT64 uiSyncBitmap,
202 PCM_HAL_POWER_OPTION_PARAM pPowerOption)
203 {
204
205 if (pKernelArray_RT == NULL) {
206 CM_ASSERTMESSAGE("Kernel array is NULL.");
207 return CM_INVALID_ARG_VALUE;
208 }
209
210 if (uiKernelCount == 0) {
211 CM_ASSERTMESSAGE("There are no valid kernels.");
212 return CM_INVALID_ARG_VALUE;
213 }
214
215 BOOL bIsEventVisible = (pEvent == CM_NO_EVENT) ? FALSE : TRUE;
216
217 CmTaskInternal *pTask = NULL;
218 INT result = CmTaskInternal::Create(uiKernelCount, uiTotalThreadCount,
219 reinterpret_cast<CmKernel **>(pKernelArray_RT),
220 pTS, m_pDevice, uiSyncBitmap,
221 pTask);
222 if (result != CM_SUCCESS) {
223 CM_ASSERT(0);
224 return result;
225 }
226
227 m_CriticalSection_Queue.Acquire();
228
229 if (!m_EnqueuedTasks.Push(pTask)) {
230 m_CriticalSection_Queue.Release();
231 CM_ASSERT(0);
232 return CM_FAILURE;
233 }
234
235 INT taskDriverId = -1;
236
237 result = CreateEvent(pTask, bIsEventVisible, taskDriverId, pEvent);
238 if (result != CM_SUCCESS) {
239 m_CriticalSection_Queue.Release();
240 CM_ASSERT(0);
241 return result;
242 }
243
244 pTask->SetPowerOption(pPowerOption);
245 UpdateSurfaceStateOnPush(pTask);
246 result = FlushTaskWithoutSync();
247
248 m_CriticalSection_Queue.Release();
249
250 return result;
251 }
252
Enqueue_RT(CmKernel_RT * pKernelArray[],const UINT uiKernelCount,const UINT uiTotalThreadCount,CmEvent * & pEvent,const CmThreadGroupSpace * pTGS,UINT64 uiSyncBitmap,CM_HAL_PREEMPTION_MODE preemptionMode)253 INT CmQueue_RT::Enqueue_RT(CmKernel_RT * pKernelArray[],
254 const UINT uiKernelCount,
255 const UINT uiTotalThreadCount,
256 CmEvent * &pEvent,
257 const CmThreadGroupSpace * pTGS,
258 UINT64 uiSyncBitmap,
259 CM_HAL_PREEMPTION_MODE preemptionMode)
260 {
261 if (pKernelArray == NULL) {
262 CM_ASSERTMESSAGE("Kernel array is NULL.");
263 return CM_INVALID_ARG_VALUE;
264 }
265
266 if (uiKernelCount == 0) {
267 CM_ASSERTMESSAGE("There are no valid kernels.");
268 return CM_INVALID_ARG_VALUE;
269 }
270
271 CmTaskInternal *pTask = NULL;
272 INT result = CmTaskInternal::Create(uiKernelCount, uiTotalThreadCount,
273 reinterpret_cast<CmKernel **>(pKernelArray),
274 pTGS, m_pDevice, uiSyncBitmap,
275 pTask);
276 if (result != CM_SUCCESS) {
277 CM_ASSERT(0);
278 return result;
279 }
280
281 m_CriticalSection_Queue.Acquire();
282
283 pTask->SetPreemptionMode(preemptionMode);
284
285 if (!m_EnqueuedTasks.Push(pTask)) {
286 m_CriticalSection_Queue.Release();
287 CM_ASSERT(0);
288 return CM_FAILURE;
289 }
290
291 INT taskDriverId = -1;
292
293 result =
294 CreateEvent(pTask, !(pEvent == CM_NO_EVENT), taskDriverId, pEvent);
295 if (result != CM_SUCCESS) {
296 m_CriticalSection_Queue.Release();
297 CM_ASSERT(0);
298 return result;
299 }
300
301 UpdateSurfaceStateOnPush(pTask);
302
303 result = FlushTaskWithoutSync();
304 m_CriticalSection_Queue.Release();
305
306 return result;
307 }
308
Enqueue_RT(CmKernel_RT * pKernelArray[],CmEvent * & pEvent,UINT numTasksGenerated,BOOLEAN isLastTask,UINT hints,PCM_HAL_POWER_OPTION_PARAM pPowerOption)309 INT CmQueue_RT::Enqueue_RT(CmKernel_RT * pKernelArray[],
310 CmEvent * &pEvent,
311 UINT numTasksGenerated,
312 BOOLEAN isLastTask,
313 UINT hints, PCM_HAL_POWER_OPTION_PARAM pPowerOption)
314 {
315 INT result = CM_FAILURE;
316 UINT kernelCount = 0;
317 CmTaskInternal *pTask = NULL;
318 INT taskDriverId = -1;
319 BOOL bIsEventVisible = (pEvent == CM_NO_EVENT) ? FALSE : TRUE;
320 BOOLEAN threadArgExists = FALSE;
321
322 if (pKernelArray == NULL) {
323 CM_ASSERTMESSAGE("Kernel array is NULL.");
324 return CM_INVALID_ARG_VALUE;
325 }
326 while (pKernelArray[kernelCount]) {
327 kernelCount++;
328 }
329
330 if (kernelCount < CM_MINIMUM_NUM_KERNELS_ENQWHINTS) {
331 CM_ASSERTMESSAGE
332 ("EnqueueWithHints requires at least 2 kernels.");
333 return CM_FAILURE;
334 }
335
336 UINT totalThreadCount = 0;
337 for (UINT i = 0; i < kernelCount; i++) {
338 UINT threadCount = 0;
339 pKernelArray[i]->GetThreadCount(threadCount);
340 totalThreadCount += threadCount;
341 }
342
343 if (GetTaskHasThreadArg(reinterpret_cast<CmKernel **>(pKernelArray), kernelCount, threadArgExists) !=
344 CM_SUCCESS) {
345 CM_ASSERTMESSAGE
346 ("Error checking if Task has any thread arguments.");
347 return CM_FAILURE;
348 }
349
350 if (!threadArgExists) {
351 if (totalThreadCount >
352 m_pHalMaxValues->iMaxUserThreadsPerTaskNoThreadArg) {
353 CM_ASSERTMESSAGE
354 ("Maximum number of threads per task exceeded.");
355 return CM_EXCEED_MAX_THREAD_AMOUNT_PER_ENQUEUE;
356 }
357 } else {
358 if (totalThreadCount > m_pHalMaxValues->iMaxUserThreadsPerTask) {
359 CM_ASSERTMESSAGE
360 ("Maximum number of threads per task exceeded.");
361 return CM_EXCEED_MAX_THREAD_AMOUNT_PER_ENQUEUE;
362 }
363 }
364
365 result =
366 CmTaskInternal::Create(kernelCount, totalThreadCount, reinterpret_cast<CmKernel **>(pKernelArray),
367 pTask, numTasksGenerated, isLastTask, hints,
368 m_pDevice);
369 if (result != CM_SUCCESS) {
370 CM_ASSERT(0);
371 return result;
372 }
373
374 m_CriticalSection_Queue.Acquire();
375 if (!m_EnqueuedTasks.Push(pTask)) {
376 m_CriticalSection_Queue.Release();
377 CM_ASSERT(0);
378 return CM_FAILURE;
379 }
380
381 result = CreateEvent(pTask, bIsEventVisible, taskDriverId, pEvent);
382 if (result != CM_SUCCESS) {
383 m_CriticalSection_Queue.Release();
384 CM_ASSERT(0);
385 return result;
386 }
387
388 for (UINT i = 0; i < kernelCount; ++i) {
389 CmKernel_RT *pKernel = NULL;
390 pTask->GetKernel(i, (CmKernel * &)pKernel);
391 if (pKernel != NULL) {
392 pKernel->SetAdjustedYCoord(0);
393 }
394 }
395
396 pTask->SetPowerOption(pPowerOption);
397 UpdateSurfaceStateOnPush(pTask);
398
399 result = FlushTaskWithoutSync();
400 m_CriticalSection_Queue.Release();
401
402 return result;
403 }
404
405 CM_RT_API INT
EnqueueWithGroup(CmTask * pTask,CmEvent * & pEvent,const CmThreadGroupSpace * pTGS)406 CmQueue_RT::EnqueueWithGroup(CmTask * pTask, CmEvent * &pEvent,
407 const CmThreadGroupSpace * pTGS)
408 {
409 INT result;
410
411 if (pTask == NULL) {
412 CM_ASSERTMESSAGE("Kernel array is NULL.");
413 return CM_INVALID_ARG_VALUE;
414 }
415
416 UINT count = 0;
417
418 CmTask_RT* pTask_RT = static_cast<CmTask_RT*>(pTask);
419 count = pTask_RT->GetKernelCount();
420
421 if (count == 0) {
422 CM_ASSERTMESSAGE("There are no valid kernels.");
423 return CM_FAILURE;
424 }
425
426 typedef CmKernel_RT *pCmKernel;
427 CmKernel_RT **pTmp = new(std::nothrow) pCmKernel[count + 1];
428 if (pTmp == NULL) {
429 CM_ASSERT(0);
430 return CM_OUT_OF_HOST_MEMORY;
431 }
432
433 UINT totalThreadNumber = 0;
434 for (UINT i = 0; i < count; i++) {
435 UINT singleThreadNumber = 0;
436 pTmp[i] = static_cast<CmKernel_RT *>(pTask_RT->GetKernelPointer(i));
437
438 if (pTmp[i]->IsThreadArgExisted()) {
439 CM_ASSERTMESSAGE
440 ("No thread Args allowed when using group space");
441 CmSafeDeleteArray(pTmp);
442 return CM_THREAD_ARG_NOT_ALLOWED;
443 }
444
445 pTmp[i]->GetThreadCount(singleThreadNumber);
446 totalThreadNumber += singleThreadNumber;
447 }
448 pTmp[count] = NULL;
449
450 result =
451 Enqueue_RT(pTmp, count, totalThreadNumber, pEvent, pTGS,
452 pTask_RT->GetSyncBitmap(), pTask_RT->GetPreemptionMode());
453
454 if (pEvent) {
455 (static_cast<CmEvent_RT*>(pEvent))->SetKernelNames(pTask_RT, NULL,
456 const_cast <
457 CmThreadGroupSpace * >(pTGS));
458 }
459
460 CmSafeDeleteArray(pTmp);
461
462 return result;
463 }
464
465 CM_RT_API INT
EnqueueWithHints(CmTask * pKernelArray,CmEvent * & pEvent,UINT hints)466 CmQueue_RT::EnqueueWithHints(CmTask * pKernelArray,
467 CmEvent * &pEvent, UINT hints)
468 {
469 INT hr = CM_FAILURE;
470 UINT count = 0;
471 UINT index = 0;
472 CmKernel **pKernels = NULL;
473 UINT numTasks = 0;
474 BOOLEAN splitTask = FALSE;
475 BOOLEAN lastTask = FALSE;
476 UINT numTasksGenerated = 0;
477 CmTask_RT* pKernelArray_RT = static_cast<CmTask_RT*>(pKernelArray);
478
479 CMCHK_NULL_RETURN(pKernelArray, CM_INVALID_ARG_VALUE);
480
481 count = pKernelArray_RT->GetKernelCount();
482 if (count == 0) {
483 CM_ASSERT(0);
484 hr = CM_FAILURE;
485 goto finish;
486 }
487
488 if (count > m_pHalMaxValues->iMaxKernelsPerTask) {
489 CM_ASSERT(0);
490 hr = CM_EXCEED_MAX_KERNEL_PER_ENQUEUE;
491 goto finish;
492 }
493
494 for (UINT i = 0; i < count; ++i) {
495 CmKernel_RT *pKernelTmp = NULL;
496 CmThreadSpace *pTSTmp = NULL;
497 pKernelTmp = static_cast<CmKernel_RT*>(pKernelArray_RT->GetKernelPointer(i));
498 CMCHK_NULL(pKernelTmp);
499 pKernelTmp->GetThreadSpace(pTSTmp);
500 CMCHK_NULL(pTSTmp);
501 if (pTSTmp->GetNeedSetKernelPointer()
502 && pTSTmp->KernelPointerIsNULL()) {
503 pTSTmp->SetKernelPointer(pKernelTmp);
504 }
505 }
506
507 numTasks =
508 (hints & CM_HINTS_MASK_NUM_TASKS) >> CM_HINTS_NUM_BITS_TASK_POS;
509 if (numTasks > 1) {
510 splitTask = TRUE;
511 }
512
513 pKernels = new(std::nothrow) CmKernel *[count + 1];
514 CMCHK_NULL(pKernels);
515
516 do {
517 for (index = 0; index < count; ++index) {
518 pKernels[index] = pKernelArray_RT->GetKernelPointer(index);
519 }
520
521 pKernels[count] = NULL;
522
523 if (splitTask) {
524 if (numTasksGenerated == (numTasks - 1)) {
525 lastTask = TRUE;
526 }
527 } else {
528 lastTask = TRUE;
529 }
530
531 CMCHK_HR(Enqueue_RT
532 (reinterpret_cast<CmKernel_RT **>(pKernels), pEvent, numTasksGenerated, lastTask, hints,pKernelArray_RT->GetPowerOption()));
533
534 numTasksGenerated++;
535
536 }
537 while (numTasksGenerated < numTasks);
538
539 finish:
540 CmSafeDeleteArray(pKernels);
541
542 return hr;
543 }
544
UpdateSurfaceStateOnPop(CmTaskInternal * pTask)545 INT CmQueue_RT::UpdateSurfaceStateOnPop(CmTaskInternal * pTask)
546 {
547 CmSurfaceManager *pSurfaceMgr = NULL;
548 INT *pSurfState = NULL;
549 BOOL *surfArray = NULL;
550
551 m_pDevice->GetSurfaceManager(pSurfaceMgr);
552 if (!pSurfaceMgr) {
553 CM_ASSERT(0);
554 return CM_FAILURE;
555 }
556
557 UINT poolSize = pSurfaceMgr->GetSurfacePoolSize();
558 pSurfaceMgr->GetSurfaceState(pSurfState);
559
560 pTask->GetTaskSurfaces(surfArray);
561 for (UINT i = 0; i < poolSize; i++) {
562 if (surfArray[i]) {
563 pSurfState[i]--;
564 }
565 }
566
567 return CM_SUCCESS;
568 }
569
UpdateSurfaceStateOnPush(CmTaskInternal * pTask)570 INT CmQueue_RT::UpdateSurfaceStateOnPush(CmTaskInternal * pTask)
571 {
572 INT *pSurfState = NULL;
573 BOOL *surfArray = NULL;
574 CmSurfaceManager *pSurfaceMgr = NULL;
575 UINT freeSurfNum = 0;
576
577 m_pDevice->GetSurfaceManager(pSurfaceMgr);
578 if (!pSurfaceMgr) {
579 CM_ASSERT(0);
580 return CM_FAILURE;
581 }
582
583 CSync *pSurfaceLock = m_pDevice->GetSurfaceCreationLock();
584 pSurfaceLock->Acquire();
585
586 UINT poolSize = pSurfaceMgr->GetSurfacePoolSize();
587 pSurfaceMgr->GetSurfaceState(pSurfState);
588
589 pTask->GetTaskSurfaces(surfArray);
590 for (UINT i = 0; i < poolSize; i++) {
591 if (surfArray[i]) {
592 pSurfState[i]++;
593 }
594 }
595
596 pSurfaceMgr->DestroySurfaceInPool(freeSurfNum, DELAYED_DESTROY);
597
598 pSurfaceLock->Release();
599
600 return CM_SUCCESS;
601 }
602
PopTaskFromFlushedQueue()603 void CmQueue_RT::PopTaskFromFlushedQueue()
604 {
605 CmTaskInternal *pTopTask = (CmTaskInternal *) m_FlushedTasks.Pop();
606
607 UpdateSurfaceStateOnPop(pTopTask);
608
609 CmTaskInternal::Destroy(pTopTask);
610
611 return;
612 }
613
TouchFlushedTasks(void)614 INT CmQueue_RT::TouchFlushedTasks(void)
615 {
616 INT hr = CM_SUCCESS;
617
618 m_CriticalSection_Queue.Acquire();
619 if (m_FlushedTasks.IsEmpty()) {
620 if (!m_EnqueuedTasks.IsEmpty()) {
621 hr = FlushTaskWithoutSync();
622 if (FAILED(hr)) {
623 m_CriticalSection_Queue.Release();
624 return hr;
625 }
626 } else {
627 m_CriticalSection_Queue.Release();
628 return CM_FAILURE;
629 }
630 }
631
632 hr = QueryFlushedTasks();
633 m_CriticalSection_Queue.Release();
634 return hr;
635 }
636
QueryFlushedTasks(void)637 INT CmQueue_RT::QueryFlushedTasks(void)
638 {
639 INT hr = CM_SUCCESS;
640
641 while (!m_FlushedTasks.IsEmpty()) {
642 CmTaskInternal *pTask = (CmTaskInternal *) m_FlushedTasks.Top();
643 CMCHK_NULL(pTask);
644
645 CM_STATUS status = CM_STATUS_FLUSHED;
646 pTask->GetTaskStatus(status);
647 if (status == CM_STATUS_FINISHED) {
648 PopTaskFromFlushedQueue();
649 } else {
650 if (status == CM_STATUS_STARTED) {
651 PCM_CONTEXT pCmData =
652 (PCM_CONTEXT) m_pDevice->GetAccelData();
653 if (pCmData->pCmHalState->
654 pHwInterface->bMediaReset) {
655 CmTaskInternal *pNextTask =
656 (CmTaskInternal *)
657 m_FlushedTasks.GetNext(pTask);
658 if (pNextTask == NULL) {
659 continue;
660 }
661
662 CM_STATUS nextTaskStatus =
663 CM_STATUS_FLUSHED;
664 pNextTask->GetTaskStatus
665 (nextTaskStatus);
666
667 if (nextTaskStatus == CM_STATUS_STARTED
668 || nextTaskStatus ==
669 CM_STATUS_FINISHED) {
670 pTask->GetTaskStatus(status);
671 if (status == CM_STATUS_STARTED) {
672 INT iTaskId;
673 CmEvent_RT *pTopTaskEvent;
674 pTask->GetTaskEvent(pTopTaskEvent);
675 CMCHK_NULL(pTopTaskEvent);
676
677 pTopTaskEvent->GetTaskDriverId
678 (iTaskId);
679 pCmData->
680 pCmHalState->pTaskStatusTable
681 [iTaskId] =
682 CM_INVALID_INDEX;
683
684 PopTaskFromFlushedQueue
685 ();
686 }
687 }
688 }
689 }
690 break;
691 }
692 }
693
694 finish:
695
696 return hr;
697 }
698
DestroyEvent(CmEvent * & pEvent)699 CM_RT_API INT CmQueue_RT::DestroyEvent(CmEvent * &pEvent)
700 {
701 if (pEvent == NULL) {
702 return CM_FAILURE;
703 }
704
705 UINT index = 0;
706 CmEvent_RT *pEvent_RT = (static_cast<CmEvent_RT*>(pEvent));
707
708 pEvent_RT->GetIndex(index);
709 CM_ASSERT(m_EventArray.GetElement(index) == pEvent);
710
711 INT status = CmEvent_RT::Destroy(pEvent_RT);
712 if (status == CM_SUCCESS && pEvent_RT == NULL) {
713 m_EventArray.SetElement(index, NULL);
714 pEvent = NULL;
715 }
716 return status;
717 }
718
CleanQueue(void)719 INT CmQueue_RT::CleanQueue(void)
720 {
721
722 INT status = CM_SUCCESS;
723 m_CriticalSection_Queue.Acquire();
724
725 if (!m_EnqueuedTasks.IsEmpty()) {
726 FlushTaskWithoutSync(TRUE);
727 }
728 CM_ASSERT(m_EnqueuedTasks.IsEmpty());
729 m_EnqueuedTasks.DeleteFreePool();
730
731 struct timeval start;
732 gettimeofday(&start, NULL);
733 UINT64 timeout_usec;
734 timeout_usec = CM_MAX_TIMEOUT * m_FlushedTasks.GetCount() * 1000000;
735
736 while (!m_FlushedTasks.IsEmpty() && status != CM_EXCEED_MAX_TIMEOUT) {
737 QueryFlushedTasks();
738
739 struct timeval current;
740 gettimeofday(¤t, NULL);
741 UINT64 timeuse_usec;
742 timeuse_usec =
743 1000000 * (current.tv_sec - start.tv_sec) +
744 current.tv_usec - start.tv_usec;
745 if (timeuse_usec > timeout_usec)
746 status = CM_EXCEED_MAX_TIMEOUT;
747 }
748
749 m_FlushedTasks.DeleteFreePool();
750 m_CriticalSection_Queue.Release();
751
752 return status;
753 }
754
GetTaskCount(UINT & numTasks)755 INT CmQueue_RT::GetTaskCount(UINT & numTasks)
756 {
757 numTasks = m_EnqueuedTasks.GetCount() + m_FlushedTasks.GetCount();
758 return CM_SUCCESS;
759 }
760
FlushGeneralTask(CmTaskInternal * pTask)761 INT CmQueue_RT::FlushGeneralTask(CmTaskInternal * pTask)
762 {
763 CM_RETURN_CODE hr = CM_SUCCESS;
764 CM_HAL_EXEC_TASK_PARAM param;
765 CmKernelData *pKernelData = NULL;
766 UINT kernelDataSize = 0;
767 PCM_CONTEXT pCmData = NULL;
768 CmEvent_RT *pEvent = NULL;
769 UINT totalThreadCount = 0;
770 UINT count = 0;
771 PCM_HAL_KERNEL_PARAM pTempData = NULL;
772
773 CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_PARAM));
774
775 pTask->GetKernelCount(count);
776 param.iNumKernels = count;
777
778 param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count];
779 param.piKernelSizes = new(std::nothrow) UINT[count];
780 param.piKernelCurbeOffset = new(std::nothrow) UINT[count];
781
782 CMCHK_NULL_RETURN(param.pKernels, CM_OUT_OF_HOST_MEMORY);
783 CMCHK_NULL_RETURN(param.piKernelSizes, CM_OUT_OF_HOST_MEMORY);
784 CMCHK_NULL_RETURN(param.piKernelCurbeOffset, CM_OUT_OF_HOST_MEMORY);
785
786 for (UINT i = 0; i < count; i++) {
787 pTask->GetKernelData(i, pKernelData);
788 CMCHK_NULL(pKernelData);
789
790 pTask->GetKernelDataSize(i, kernelDataSize);
791 if (kernelDataSize == 0) {
792 CM_ASSERT(0);
793 hr = CM_FAILURE;
794 goto finish;
795 }
796
797 pTempData = pKernelData->GetHalCmKernelData();
798
799 param.pKernels[i] = pTempData;
800 param.piKernelSizes[i] = kernelDataSize;
801 param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i);
802 param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed;
803 param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled;
804 }
805
806 pTask->GetTotalThreadCount(totalThreadCount);
807
808 param.threadSpaceWidth =
809 (totalThreadCount >
810 CM_MAX_THREADSPACE_WIDTH) ? CM_MAX_THREADSPACE_WIDTH :
811 totalThreadCount;
812 if (totalThreadCount % CM_MAX_THREADSPACE_WIDTH) {
813 param.threadSpaceHeight =
814 totalThreadCount / CM_MAX_THREADSPACE_WIDTH + 1;
815 } else {
816 param.threadSpaceHeight =
817 totalThreadCount / CM_MAX_THREADSPACE_WIDTH;
818 }
819 param.DependencyPattern = CM_DEPENDENCY_NONE;
820
821 if (pTask->IsThreadSpaceCreated()) {
822 if (pTask->IsThreadCoordinatesExisted()) {
823 param.ppThreadCoordinates =
824 new(std::nothrow) PCM_HAL_SCOREBOARD_XY[count];
825 param.ppDependencyMasks =
826 new(std::nothrow) PCM_HAL_MASK_AND_RESET[count];
827 CMCHK_NULL_RETURN(param.ppThreadCoordinates,
828 CM_OUT_OF_HOST_MEMORY);
829 CMCHK_NULL_RETURN(param.ppDependencyMasks,
830 CM_OUT_OF_HOST_MEMORY);
831 for (UINT i = 0; i < count; i++) {
832 void *pKernelCoordinates = NULL;
833 void *pDependencyMasks = NULL;
834 pTask->GetKernelCoordinates(i,
835 pKernelCoordinates);
836 pTask->GetKernelDependencyMasks(i,
837 pDependencyMasks);
838 param.ppThreadCoordinates[i] =
839 (PCM_HAL_SCOREBOARD_XY) pKernelCoordinates;
840 param.ppDependencyMasks[i] =
841 (PCM_HAL_MASK_AND_RESET) pDependencyMasks;
842 }
843 } else {
844 param.ppThreadCoordinates = NULL;
845 }
846
847 pTask->GetDependencyPattern(param.DependencyPattern);
848
849 pTask->GetThreadSpaceSize(param.threadSpaceWidth,
850 param.threadSpaceHeight);
851
852 pTask->GetWalkingPattern(param.WalkingPattern);
853
854 if (pTask->CheckWalkingParametersSet()) {
855 param.walkingParamsValid = 1;
856 CMCHK_HR(pTask->GetWalkingParameters
857 (param.walkingParams));
858 } else {
859 param.walkingParamsValid = 0;
860 }
861
862 if (pTask->CheckDependencyVectorsSet()) {
863 param.dependencyVectorsValid = 1;
864 CMCHK_HR(pTask->GetDependencyVectors
865 (param.dependencyVectors));
866 } else {
867 param.dependencyVectorsValid = 0;
868 }
869 }
870
871 pTask->GetColorCountMinusOne(param.ColorCountMinusOne);
872
873 param.uiSyncBitmap = pTask->GetSyncBitmap();
874
875 pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData();
876
877 CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->
878 pfnSetPowerOption(pCmData->pCmHalState,
879 pTask->GetPowerOption
880 ()));
881
882 CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->
883 pfnExecuteTask(pCmData->pCmHalState,
884 ¶m));
885
886 if (param.iTaskIdOut < 0) {
887 CM_ASSERT(0);
888 hr = CM_FAILURE;
889 goto finish;
890 }
891
892 pTask->GetTaskEvent(pEvent);
893 CMCHK_NULL(pEvent);
894 CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut));
895 CMCHK_HR(pEvent->SetTaskOsData(param.OsData));
896 CMCHK_HR(pTask->ReleaseKernel());
897
898 finish:
899 CmSafeDeleteArray(param.pKernels);
900 CmSafeDeleteArray(param.piKernelSizes);
901 CmSafeDeleteArray(param.ppThreadCoordinates);
902 CmSafeDeleteArray(param.ppDependencyMasks);
903 CmSafeDeleteArray(param.piKernelCurbeOffset);
904
905 return hr;
906 }
907
FlushGroupTask(CmTaskInternal * pTask)908 INT CmQueue_RT::FlushGroupTask(CmTaskInternal * pTask)
909 {
910 CM_RETURN_CODE hr = CM_SUCCESS;
911
912 CM_HAL_EXEC_TASK_GROUP_PARAM param;
913 CmKernelData *pKernelData = NULL;
914 UINT kernelDataSize = 0;
915 UINT count = 0;
916 PCM_CONTEXT pCmData = NULL;
917 CmEvent_RT *pEvent = NULL;
918 PCM_HAL_KERNEL_PARAM pTempData = NULL;
919
920 CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_GROUP_PARAM));
921
922 pTask->GetKernelCount(count);
923 param.iNumKernels = count;
924
925 param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count];
926 param.piKernelSizes = new(std::nothrow) UINT[count];
927 param.piKernelCurbeOffset = new(std::nothrow) UINT[count];
928 param.iPreemptionMode = pTask->GetPreemptionMode();
929
930 CMCHK_NULL(param.pKernels);
931 CMCHK_NULL(param.piKernelSizes);
932 CMCHK_NULL(param.piKernelCurbeOffset);
933
934 for (UINT i = 0; i < count; i++) {
935 pTask->GetKernelData(i, pKernelData);
936 CMCHK_NULL(pKernelData);
937
938 pTask->GetKernelDataSize(i, kernelDataSize);
939 if (kernelDataSize == 0) {
940 CM_ASSERT(0);
941 hr = CM_FAILURE;
942 goto finish;
943 }
944
945 pTempData = pKernelData->GetHalCmKernelData();
946
947 param.pKernels[i] = pTempData;
948 param.piKernelSizes[i] = kernelDataSize;
949 param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i);
950 param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed;
951 param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled;
952 }
953
954 pTask->GetSLMSize(param.iSLMSize);
955 if (param.iSLMSize > MAX_SLM_SIZE_PER_GROUP_IN_1K) {
956 CM_ASSERT(0);
957 hr = CM_EXCEED_MAX_SLM_SIZE;
958 goto finish;
959 }
960
961 if (pTask->IsThreadGroupSpaceCreated()) {
962 pTask->GetThreadGroupSpaceSize(param.threadSpaceWidth,
963 param.threadSpaceHeight,
964 param.groupSpaceWidth,
965 param.groupSpaceHeight);
966 }
967
968 param.uiSyncBitmap = pTask->GetSyncBitmap();
969
970 pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData();
971 CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteGroupTask
972 (pCmData->pCmHalState, ¶m));
973
974 if (param.iTaskIdOut < 0) {
975 CM_ASSERT(0);
976 hr = CM_FAILURE;
977 goto finish;
978 }
979
980 pTask->GetTaskEvent(pEvent);
981 CMCHK_NULL(pEvent);
982 CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut));
983 CMCHK_HR(pEvent->SetTaskOsData(param.OsData));
984 CMCHK_HR(pTask->ReleaseKernel());
985
986 finish:
987 CmSafeDeleteArray(param.pKernels);
988 CmSafeDeleteArray(param.piKernelSizes);
989 CmSafeDeleteArray(param.piKernelCurbeOffset);
990
991 return hr;
992 }
993
FlushEnqueueWithHintsTask(CmTaskInternal * pTask)994 INT CmQueue_RT::FlushEnqueueWithHintsTask(CmTaskInternal * pTask)
995 {
996 CM_RETURN_CODE hr = CM_SUCCESS;
997 CM_HAL_EXEC_HINTS_TASK_PARAM param;
998 PCM_CONTEXT pCmData = NULL;
999 CmKernelData *pKernelData = NULL;
1000 UINT kernelDataSize = 0;
1001 UINT count = 0;
1002 CmEvent_RT *pEvent = NULL;
1003 PCM_HAL_KERNEL_PARAM pTempData = NULL;
1004
1005 CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_HINTS_TASK_PARAM));
1006
1007 pTask->GetKernelCount(count);
1008 param.iNumKernels = count;
1009
1010 param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count];
1011 param.piKernelSizes = new(std::nothrow) UINT[count];
1012 param.piKernelCurbeOffset = new(std::nothrow) UINT[count];
1013
1014 CMCHK_NULL(param.pKernels);
1015 CMCHK_NULL(param.piKernelSizes);
1016 CMCHK_NULL(param.piKernelCurbeOffset);
1017
1018 pTask->GetHints(param.iHints);
1019 pTask->GetNumTasksGenerated(param.iNumTasksGenerated);
1020 pTask->GetLastTask(param.isLastTask);
1021
1022 for (UINT i = 0; i < count; i++) {
1023 pTask->GetKernelData(i, pKernelData);
1024 CMCHK_NULL(pKernelData);
1025
1026 pTask->GetKernelDataSize(i, kernelDataSize);
1027 if (kernelDataSize == 0) {
1028 CM_ASSERT(0);
1029 hr = CM_FAILURE;
1030 goto finish;
1031 }
1032
1033 pTempData = pKernelData->GetHalCmKernelData();
1034
1035 param.pKernels[i] = pTempData;
1036 param.piKernelSizes[i] = kernelDataSize;
1037 param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i);
1038 }
1039
1040 pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData();
1041 CMCHK_NULL(pCmData);
1042
1043 CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->
1044 pfnSetPowerOption(pCmData->pCmHalState,
1045 pTask->GetPowerOption
1046 ()));
1047
1048 CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteHintsTask
1049 (pCmData->pCmHalState, ¶m));
1050
1051 if (param.iTaskIdOut < 0) {
1052 CM_ASSERT(0);
1053 hr = CM_FAILURE;
1054 goto finish;
1055 }
1056
1057 pTask->GetTaskEvent(pEvent);
1058 CMCHK_NULL(pEvent);
1059 CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut));
1060 CMCHK_HR(pEvent->SetTaskOsData(param.OsData));
1061 CMCHK_HR(pTask->ReleaseKernel());
1062
1063 finish:
1064
1065 CmSafeDeleteArray(param.pKernels);
1066 CmSafeDeleteArray(param.piKernelSizes);
1067 CmSafeDeleteArray(param.piKernelCurbeOffset);
1068
1069 return hr;
1070 }
1071
FlushTaskWithoutSync(bool bIfFlushBlock)1072 INT CmQueue_RT::FlushTaskWithoutSync(bool bIfFlushBlock)
1073 {
1074 INT hr = CM_SUCCESS;
1075 CmTaskInternal *pTask = NULL;
1076 UINT uiTaskType = CM_TASK_TYPE_DEFAULT;
1077
1078 while (!m_EnqueuedTasks.IsEmpty()) {
1079 UINT flushedTaskCount = m_FlushedTasks.GetCount();
1080 if (bIfFlushBlock) {
1081 while (flushedTaskCount >= m_pHalMaxValues->iMaxTasks) {
1082 QueryFlushedTasks();
1083 flushedTaskCount = m_FlushedTasks.GetCount();
1084 }
1085 } else {
1086 if (flushedTaskCount >= m_pHalMaxValues->iMaxTasks) {
1087 QueryFlushedTasks();
1088 flushedTaskCount = m_FlushedTasks.GetCount();
1089 if (flushedTaskCount >=
1090 m_pHalMaxValues->iMaxTasks) {
1091 break;
1092 }
1093 }
1094 }
1095
1096 pTask = (CmTaskInternal *) m_EnqueuedTasks.Pop();
1097 CMCHK_NULL(pTask);
1098
1099 pTask->GetTaskType(uiTaskType);
1100
1101 m_CriticalSection_HalExecute.Acquire();
1102 switch (uiTaskType) {
1103 case CM_INTERNAL_TASK_WITH_THREADSPACE:
1104 hr = FlushGeneralTask(pTask);
1105 break;
1106
1107 case CM_INTERNAL_TASK_WITH_THREADGROUPSPACE:
1108 hr = FlushGroupTask(pTask);
1109 break;
1110
1111 case CM_INTERNAL_TASK_ENQUEUEWITHHINTS:
1112 hr = FlushEnqueueWithHintsTask(pTask);
1113 break;
1114
1115 default:
1116 hr = FlushGeneralTask(pTask);
1117 break;
1118 }
1119 m_CriticalSection_HalExecute.Release();
1120
1121 m_FlushedTasks.Push(pTask);
1122
1123 }
1124
1125 finish:
1126 return hr;
1127 }
1128
CreateEvent(CmTaskInternal * pTask,BOOL bIsVisible,INT & taskDriverId,CmEvent * & pEvent)1129 INT CmQueue_RT::CreateEvent(CmTaskInternal * pTask, BOOL bIsVisible,
1130 INT & taskDriverId, CmEvent * &pEvent)
1131 {
1132 INT hr = CM_SUCCESS;
1133
1134 m_CriticalSection_Event.Acquire();
1135 UINT freeSlotInEventArray = m_EventArray.GetFirstFreeIndex();
1136 m_CriticalSection_Event.Release();
1137
1138 CmEvent_RT *ptmp = NULL;
1139 hr = CmEvent_RT::Create(freeSlotInEventArray, pTask, taskDriverId,
1140 m_pDevice, bIsVisible, ptmp);
1141
1142 if (hr == CM_SUCCESS) {
1143 m_CriticalSection_Event.Acquire();
1144 m_EventArray.SetElement(freeSlotInEventArray, ptmp);
1145 m_EventCount++;
1146
1147 m_CriticalSection_Event.Release();
1148
1149 pTask->SetTaskEvent(ptmp);
1150
1151 if (bIsVisible == FALSE) {
1152 ptmp = NULL;
1153 }
1154 pEvent =(static_cast<CmEvent*>(ptmp));
1155
1156 }
1157
1158 return hr;
1159 }
1160
AcquireQueueLock(void)1161 void CmQueue_RT::AcquireQueueLock(void)
1162 {
1163 m_CriticalSection_Queue.Acquire();
1164 }
1165
ReleaseQueueLock(void)1166 void CmQueue_RT::ReleaseQueueLock(void)
1167 {
1168 m_CriticalSection_Queue.Release();
1169 }
1170