1 /*
2 * Copyright � 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Wei Lin<wei.w.lin@intel.com>
26 * Yuting Yang<yuting.yang@intel.com>
27 */
28
29 #include "cm_task_internal.h"
30 #include "cm_kernel.h"
31 #include "cm_event.h"
32 #include "cm_device.h"
33 #include "cm_kernel_data.h"
34 #include "cm_thread_space.h"
35 #include "cm_group_space.h"
36 #include "cm_queue.h"
37 #include "cm_surface_manager.h"
38
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],const CmThreadSpace * pTS,CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap,CmTaskInternal * & pTask)39 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
40 CmKernel * pKernelArray[], const CmThreadSpace * pTS,
41 CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap,
42 CmTaskInternal * &pTask)
43 {
44 INT result = CM_SUCCESS;
45 pTask =
46 new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
47 pKernelArray, pCmDevice,
48 uiSyncBitmap);
49 if (pTask) {
50 result = pTask->Initialize(pTS, FALSE);
51 if (result != CM_SUCCESS) {
52 CmTaskInternal::Destroy(pTask);
53 }
54 } else {
55 CM_ASSERT(0);
56 result = CM_OUT_OF_HOST_MEMORY;
57 }
58 return result;
59 }
60
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],const CmThreadGroupSpace * pTGS,CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap,CmTaskInternal * & pTask)61 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
62 CmKernel * pKernelArray[],
63 const CmThreadGroupSpace * pTGS,
64 CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap,
65 CmTaskInternal * &pTask)
66 {
67 INT result = CM_SUCCESS;
68 pTask =
69 new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
70 pKernelArray, pCmDevice,
71 uiSyncBitmap);
72
73 if (pTask) {
74 result = pTask->Initialize(pTGS);
75 if (result != CM_SUCCESS) {
76 CmTaskInternal::Destroy(pTask);
77 }
78 } else {
79 CM_ASSERT(0);
80 result = CM_OUT_OF_HOST_MEMORY;
81 }
82 return result;
83 }
84
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],CmTaskInternal * & pTask,UINT numTasksGenerated,BOOLEAN isLastTask,UINT hints,CmDevice_RT * pCmDevice)85 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
86 CmKernel * pKernelArray[], CmTaskInternal * &pTask,
87 UINT numTasksGenerated, BOOLEAN isLastTask,
88 UINT hints, CmDevice_RT * pCmDevice)
89 {
90 INT result = CM_SUCCESS;
91 pTask =
92 new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
93 pKernelArray, pCmDevice,
94 CM_NO_KERNEL_SYNC);
95 if (pTask) {
96 result =
97 pTask->Initialize(hints, numTasksGenerated, isLastTask);
98 if (result != CM_SUCCESS) {
99 CmTaskInternal::Destroy(pTask);
100 }
101 } else {
102 CM_ASSERT(0);
103 result = CM_OUT_OF_HOST_MEMORY;
104 }
105 return result;
106 }
107
Destroy(CmTaskInternal * & pTask)108 INT CmTaskInternal::Destroy(CmTaskInternal * &pTask)
109 {
110 CmSafeDelete(pTask);
111 return CM_SUCCESS;
112 }
113
CmTaskInternal(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap)114 CmTaskInternal::CmTaskInternal(const UINT kernelCount, const UINT totalThreadCount, CmKernel * pKernelArray[], CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap):
115 m_Kernels(kernelCount),
116 m_KernelData(kernelCount),
117 m_KernelCount(kernelCount),
118 m_TotalThreadCount(totalThreadCount),
119 m_pTaskEvent(NULL),
120 m_IsThreadSpaceCreated(FALSE),
121 m_IsThreadCoordinatesExisted(FALSE),
122 m_ThreadSpaceWidth(0),
123 m_ThreadSpaceHeight(0),
124 m_pThreadCoordinates(NULL),
125 m_DependencyPattern(CM_DEPENDENCY_NONE),
126 m_WalkingPattern(CM_WALK_DEFAULT),
127 m_MediaWalkerParamsSet(FALSE),
128 m_DependencyVectorsSet(FALSE),
129 m_pDependencyMasks(NULL),
130 m_IsThreadGroupSpaceCreated(FALSE),
131 m_GroupSpaceWidth(0),
132 m_GroupSpaceHeight(0),
133 m_SLMSize(0),
134 m_ColorCountMinusOne(0),
135 m_Hints(0),
136 m_NumTasksGenerated(0),
137 m_IsLastTask(FALSE),
138 m_ui64SyncBitmap(uiSyncBitmap),
139 m_pCmDevice(pCmDevice), m_SurfaceArray(NULL), m_TaskType(CM_TASK_TYPE_DEFAULT)
140 {
141 m_pKernelCurbeOffsetArray = new(std::nothrow) UINT[kernelCount];
142 CM_ASSERT(m_pKernelCurbeOffsetArray != NULL);
143
144 for (UINT i = 0; i < kernelCount; i++) {
145 m_Kernels.SetElement(i, pKernelArray[i]);
146 m_KernelData.SetElement(i, NULL);
147 }
148
149 CmSafeMemSet(&m_WalkingParameters, 0, sizeof(m_WalkingParameters));
150 CmSafeMemSet(&m_DependencyVectors, 0, sizeof(m_DependencyVectors));
151 if (m_pKernelCurbeOffsetArray != NULL) {
152 CmSafeMemSet(m_pKernelCurbeOffsetArray, 0,
153 kernelCount * sizeof(UINT));
154 }
155 }
156
~CmTaskInternal(void)157 CmTaskInternal::~CmTaskInternal(void)
158 {
159
160 m_Kernels.Delete();
161 for (UINT i = 0; i < m_KernelCount; i++) {
162 CmKernelData *p = (CmKernelData *) m_KernelData.GetElement(i);
163 CmKernelData::Destroy(p);
164 }
165 m_KernelData.Delete();
166
167 CmSafeDeleteArray(m_pKernelCurbeOffsetArray);
168
169 if (m_pTaskEvent) {
170 CmQueue_RT *pCmQueue;
171 m_pCmDevice->GetQueue(pCmQueue);
172 pCmQueue->DestroyEvent(m_pTaskEvent);
173 }
174
175 if (m_pThreadCoordinates) {
176 for (UINT i = 0; i < m_KernelCount; i++) {
177 if (m_pThreadCoordinates[i]) {
178 CmSafeDeleteArray(m_pThreadCoordinates[i]);
179 }
180 }
181 }
182
183 CmSafeDeleteArray(m_pThreadCoordinates);
184
185 if (m_pDependencyMasks) {
186 for (UINT i = 0; i < m_KernelCount; ++i) {
187 CmSafeDeleteArray(m_pDependencyMasks[i]);
188 }
189 }
190
191 CmSafeDeleteArray(m_pDependencyMasks);
192 CmSafeDeleteArray(m_SurfaceArray);
193 }
194
Initialize(const CmThreadSpace * pTS,BOOL isWithHints)195 INT CmTaskInternal::Initialize(const CmThreadSpace * pTS, BOOL isWithHints)
196 {
197 UINT totalCurbeSize = 0;
198 UINT surfacePoolSize = 0;
199 UINT totalKernelBinarySize = 0;
200 UINT kernelCurbeSize = 0;
201 UINT kernelPayloadSize = 0;
202 CmSurfaceManager *pSurfaceMgr = NULL;
203
204 CM_HAL_MAX_VALUES *pHalMaxValues = NULL;
205 CM_HAL_MAX_VALUES_EX *pHalMaxValuesEx = NULL;
206 m_pCmDevice->GetHalMaxValues(pHalMaxValues, pHalMaxValuesEx);
207
208 m_pCmDevice->GetSurfaceManager(pSurfaceMgr);
209 surfacePoolSize = pSurfaceMgr->GetSurfacePoolSize();
210
211 m_SurfaceArray = new(std::nothrow) BOOL[surfacePoolSize];
212 if (!m_SurfaceArray) {
213 CM_ASSERT(0);
214 return CM_FAILURE;
215 }
216 CmSafeMemSet(m_SurfaceArray, 0, surfacePoolSize * sizeof(BOOL));
217
218 for (UINT i = 0; i < m_KernelCount; i++) {
219
220 CmKernel_RT *pKernel = (CmKernel_RT *) m_Kernels.GetElement(i);
221 if (pKernel == NULL) {
222 CM_ASSERT(0);
223 return CM_FAILURE;
224 }
225
226 pKernel->GetSizeInPayload(kernelPayloadSize);
227 pKernel->GetSizeInCurbe(kernelCurbeSize);
228
229 if ((kernelCurbeSize + kernelPayloadSize) >
230 pHalMaxValues->iMaxArgByteSizePerKernel) {
231 CM_ASSERT(0);
232 return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
233 } else {
234 kernelCurbeSize =
235 pKernel->GetAlignedCurbeSize(kernelCurbeSize);
236 totalCurbeSize += kernelCurbeSize;
237 }
238 m_pKernelCurbeOffsetArray[i] = totalCurbeSize - kernelCurbeSize;
239
240 UINT totalSize = 0;
241 CmKernelData *pKernelData = NULL;
242
243 if (isWithHints) {
244 CmThreadSpace *pKTS = NULL;
245 pKernel->GetThreadSpace(pKTS);
246 if (pKTS) {
247 for (UINT j = i; j > 0; --j) {
248 UINT width, height, myAdjY;
249 CmKernel_RT *pTmpKern =
250 (CmKernel_RT *) m_Kernels.GetElement(j
251 -
252 1);
253 if (!pTmpKern) {
254 CM_ASSERT(0);
255 return CM_FAILURE;
256 }
257 pTmpKern->GetThreadSpace(pKTS);
258 pKTS->GetThreadSpaceSize(width, height);
259 myAdjY = pKernel->GetAdjustedYCoord();
260 pKernel->SetAdjustedYCoord(myAdjY +
261 height);
262 }
263 }
264 }
265
266 pKernel->CollectKernelSurface();
267 INT result =
268 pKernel->CreateKernelData(pKernelData, totalSize, pTS);
269 if ((pKernelData == NULL) || (result != CM_SUCCESS)) {
270 CM_ASSERT(0);
271 CmKernelData::Destroy(pKernelData);
272 return result;
273 }
274
275 m_KernelData.SetElement(i, pKernelData);
276
277 totalKernelBinarySize += pKernel->GetKernelGenxBinarySize();
278
279 BOOL *surfArray = NULL;
280 pKernel->GetKernelSurfaces(surfArray);
281 for (UINT j = 0; j < surfacePoolSize; j++) {
282 m_SurfaceArray[j] |= surfArray[j];
283 }
284 pKernel->ResetKernelSurfaces();
285 }
286
287 if (pTS) {
288 if (FAILED(this->CreateThreadSpaceData(pTS))) {
289 CM_ASSERT(0);
290 return CM_FAILURE;
291 }
292 m_IsThreadSpaceCreated = TRUE;
293 }
294
295 m_TaskType = CM_INTERNAL_TASK_WITH_THREADSPACE;
296
297 if (totalKernelBinarySize >
298 pHalMaxValues->iMaxKernelBinarySize *
299 pHalMaxValues->iMaxKernelsPerTask) {
300 CM_ASSERT(0);
301 return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
302 }
303
304 return CM_SUCCESS;
305 }
306
Initialize(const CmThreadGroupSpace * pTGS)307 INT CmTaskInternal::Initialize(const CmThreadGroupSpace * pTGS)
308 {
309 UINT totalCurbeSize = 0;
310 UINT surfacePoolSize = 0;
311 UINT totalKernelBinarySize = 0;
312 UINT kernelCurbeSize = 0;
313 UINT kernelPayloadSize = 0;
314
315 CmSurfaceManager *pSurfaceMgr = NULL;
316 CM_HAL_MAX_VALUES *pHalMaxValues = NULL;
317 CM_HAL_MAX_VALUES_EX *pHalMaxValuesEx = NULL;
318 m_pCmDevice->GetHalMaxValues(pHalMaxValues, pHalMaxValuesEx);
319
320 m_pCmDevice->GetSurfaceManager(pSurfaceMgr);
321 CM_ASSERT(pSurfaceMgr);
322 surfacePoolSize = pSurfaceMgr->GetSurfacePoolSize();
323 m_SurfaceArray = new(std::nothrow) BOOL[surfacePoolSize];
324 if (!m_SurfaceArray) {
325 CM_ASSERT(0);
326 return CM_OUT_OF_HOST_MEMORY;
327 }
328 CmSafeMemSet(m_SurfaceArray, 0, surfacePoolSize * sizeof(BOOL));
329
330 for (UINT i = 0; i < m_KernelCount; i++) {
331 CmKernel_RT *pKernel = (CmKernel_RT *) m_Kernels.GetElement(i);
332 if (pKernel == NULL) {
333 CM_ASSERT(0);
334 return CM_FAILURE;
335 }
336
337 pKernel->CollectKernelSurface();
338
339 UINT totalSize = 0;
340 CmKernelData *pKernelData = NULL;
341
342 INT result =
343 pKernel->CreateKernelData(pKernelData, totalSize, pTGS);
344 if (result != CM_SUCCESS) {
345 CM_ASSERT(0);
346 CmKernelData::Destroy(pKernelData);
347 return result;
348 }
349
350 pKernelData->SetKernelDataSize(totalSize);
351
352 pKernel->GetSizeInPayload(kernelPayloadSize);
353 pKernel->GetSizeInCurbe(kernelCurbeSize);
354
355 if (kernelCurbeSize + kernelPayloadSize >
356 pHalMaxValues->iMaxArgByteSizePerKernel) {
357 CM_ASSERT(0);
358 return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
359 } else {
360 kernelCurbeSize =
361 pKernel->GetAlignedCurbeSize(kernelCurbeSize);
362 totalCurbeSize += kernelCurbeSize;
363 }
364
365 m_pKernelCurbeOffsetArray[i] = totalCurbeSize - kernelCurbeSize;
366
367 m_KernelData.SetElement(i, pKernelData);
368
369 m_SLMSize = pKernel->GetSLMSize();
370
371 totalKernelBinarySize += pKernel->GetKernelGenxBinarySize();
372
373 BOOL *surfArray = NULL;
374 pKernel->GetKernelSurfaces(surfArray);
375 for (UINT j = 0; j < surfacePoolSize; j++) {
376 m_SurfaceArray[j] |= surfArray[j];
377 }
378 pKernel->ResetKernelSurfaces();
379 }
380
381 if (totalKernelBinarySize >
382 pHalMaxValues->iMaxKernelBinarySize *
383 pHalMaxValues->iMaxKernelsPerTask) {
384 CM_ASSERT(0);
385 return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
386 }
387
388 m_TaskType = CM_INTERNAL_TASK_WITH_THREADGROUPSPACE;
389
390 if (pTGS) {
391 pTGS->GetThreadGroupSpaceSize(m_ThreadSpaceWidth,
392 m_ThreadSpaceHeight,
393 m_GroupSpaceWidth,
394 m_GroupSpaceHeight);
395 m_IsThreadGroupSpaceCreated = TRUE;
396 }
397
398 return CM_SUCCESS;
399 }
400
Initialize(UINT hints,UINT numTasksGenerated,BOOLEAN isLastTask)401 INT CmTaskInternal::Initialize(UINT hints, UINT numTasksGenerated,
402 BOOLEAN isLastTask)
403 {
404 CmThreadSpace *pTS = NULL;
405 INT result = CM_SUCCESS;
406
407 result = this->Initialize(pTS, TRUE);
408
409 m_Hints = hints;
410
411 m_NumTasksGenerated = numTasksGenerated;
412 m_IsLastTask = isLastTask;
413
414 m_TaskType = CM_INTERNAL_TASK_ENQUEUEWITHHINTS;
415
416 return result;
417 }
418
GetKernelCount(UINT & count)419 INT CmTaskInternal::GetKernelCount(UINT & count)
420 {
421 count = m_KernelCount;
422 return CM_SUCCESS;
423 }
424
GetTaskSurfaces(BOOL * & surfArray)425 INT CmTaskInternal::GetTaskSurfaces(BOOL * &surfArray)
426 {
427 surfArray = m_SurfaceArray;
428 return CM_SUCCESS;
429 }
430
GetKernel(const UINT index,CmKernel * & pKernel)431 INT CmTaskInternal::GetKernel(const UINT index, CmKernel * &pKernel)
432 {
433 pKernel = NULL;
434 if (index < m_Kernels.GetSize()) {
435 pKernel = (CmKernel *) m_Kernels.GetElement(index);
436 return CM_SUCCESS;
437 } else {
438 return CM_FAILURE;
439 }
440 }
441
GetKernelData(const UINT index,CmKernelData * & pKernelData)442 INT CmTaskInternal::GetKernelData(const UINT index, CmKernelData * &pKernelData)
443 {
444 pKernelData = NULL;
445 if (index < m_KernelData.GetSize()) {
446 pKernelData = (CmKernelData *) m_KernelData.GetElement(index);
447 return CM_SUCCESS;
448 } else {
449 return CM_FAILURE;
450 }
451 }
452
GetKernelDataSize(const UINT index,UINT & size)453 INT CmTaskInternal::GetKernelDataSize(const UINT index, UINT & size)
454 {
455 size = 0;
456 CmKernelData *pKernelData = NULL;
457 if (index < m_KernelData.GetSize()) {
458 pKernelData = (CmKernelData *) m_KernelData.GetElement(index);
459 if (pKernelData == NULL) {
460 CM_ASSERT(0);
461 return CM_FAILURE;
462 }
463 size = pKernelData->GetKernelDataSize();
464 return CM_SUCCESS;
465 } else {
466 return CM_FAILURE;
467 }
468 }
469
GetKernelCurbeOffset(const UINT index)470 UINT CmTaskInternal::GetKernelCurbeOffset(const UINT index)
471 {
472 return (UINT) m_pKernelCurbeOffsetArray[index];
473 }
474
SetTaskEvent(CmEvent_RT * pEvent)475 INT CmTaskInternal::SetTaskEvent(CmEvent_RT * pEvent)
476 {
477 m_pTaskEvent = (static_cast<CmEvent*>(pEvent));
478 pEvent->Acquire();
479 return CM_SUCCESS;
480 }
481
GetTaskEvent(CmEvent_RT * & pEvent)482 INT CmTaskInternal::GetTaskEvent(CmEvent_RT * &pEvent)
483 {
484 pEvent=(static_cast<CmEvent_RT*>(m_pTaskEvent));
485 return CM_SUCCESS;
486 }
487
GetTaskStatus(CM_STATUS & TaskStatus)488 INT CmTaskInternal::GetTaskStatus(CM_STATUS & TaskStatus)
489 {
490 if (m_pTaskEvent == NULL) {
491 return CM_FAILURE;
492 }
493
494 return m_pTaskEvent->GetStatus(TaskStatus);
495 }
496
ReleaseKernel()497 INT CmTaskInternal::ReleaseKernel()
498 {
499
500 INT hr = CM_SUCCESS;
501
502 for (UINT KrnDataIndex = 0; KrnDataIndex < m_KernelCount;
503 KrnDataIndex++) {
504 CmKernelData *pKernelData;
505 CMCHK_HR(GetKernelData(KrnDataIndex, pKernelData));
506 CMCHK_NULL(pKernelData);
507 CMCHK_HR(pKernelData->ReleaseKernel());
508 }
509
510 finish:
511 return hr;
512 }
513
CreateThreadSpaceData(const CmThreadSpace * pTS)514 INT CmTaskInternal::CreateThreadSpaceData(const CmThreadSpace * pTS)
515 {
516 UINT i;
517 UINT width, height;
518 UINT *pKernelCoordinateIndex = NULL;
519 int hr = CM_SUCCESS;
520 CmThreadSpace *pTS_RT = const_cast < CmThreadSpace * >(pTS);
521
522 CmKernel_RT *pKernel_inTS = NULL;
523 CmKernel_RT *pKernel_inTask = NULL;
524
525 if (pTS_RT->IsThreadAssociated()) {
526 m_pThreadCoordinates =
527 new(std::nothrow) PCM_COORDINATE[m_KernelCount];
528 CMCHK_NULL_RETURN(m_pThreadCoordinates, CM_FAILURE);
529 CmSafeMemSet(m_pThreadCoordinates, 0,
530 m_KernelCount * sizeof(PCM_COORDINATE));
531
532 m_pDependencyMasks =
533 new(std::nothrow) PCM_HAL_MASK_AND_RESET[m_KernelCount];
534 CMCHK_NULL_RETURN(m_pDependencyMasks, CM_FAILURE);
535 CmSafeMemSet(m_pDependencyMasks, 0,
536 m_KernelCount * sizeof(PCM_HAL_MASK_AND_RESET));
537
538 pKernelCoordinateIndex = new(std::nothrow) UINT[m_KernelCount];
539 if (m_pThreadCoordinates && pKernelCoordinateIndex
540 && m_pDependencyMasks) {
541 CmSafeMemSet(pKernelCoordinateIndex, 0,
542 m_KernelCount * sizeof(UINT));
543 for (i = 0; i < m_KernelCount; i++) {
544 pKernelCoordinateIndex[i] = 0;
545 UINT threadCount;
546 this->GetKernel(i, (CmKernel * &)pKernel_inTask);
547
548 if (pKernel_inTask == NULL) {
549 CM_ASSERT(0);
550 hr = CM_FAILURE;
551 goto finish;
552 }
553
554 pKernel_inTask->GetThreadCount(threadCount);
555 m_pThreadCoordinates[i] = new(std::nothrow)
556 CM_COORDINATE[threadCount];
557 if (m_pThreadCoordinates[i]) {
558 CmSafeMemSet(m_pThreadCoordinates[i], 0,
559 sizeof(CM_COORDINATE) *
560 threadCount);
561 } else {
562 CM_ASSERT(0);
563 hr = CM_FAILURE;
564 goto finish;
565 }
566
567 m_pDependencyMasks[i] = new(std::nothrow)
568 CM_HAL_MASK_AND_RESET[threadCount];
569 if (m_pDependencyMasks[i]) {
570 CmSafeMemSet(m_pDependencyMasks[i], 0,
571 sizeof
572 (CM_HAL_MASK_AND_RESET) *
573 threadCount);
574 } else {
575 CM_ASSERT(0);
576 hr = CM_FAILURE;
577 goto finish;
578 }
579 }
580
581 CM_THREAD_SPACE_UNIT *pThreadSpaceUnit = NULL;
582 pTS_RT->GetThreadSpaceSize(width, height);
583 pTS_RT->GetThreadSpaceUnit(pThreadSpaceUnit);
584
585 UINT *pBoardOrder = NULL;
586 pTS_RT->GetBoardOrder(pBoardOrder);
587 for (UINT tIndex = 0; tIndex < height * width; tIndex++) {
588 pKernel_inTS =
589 static_cast <
590 CmKernel_RT *
591 >(pThreadSpaceUnit
592 [pBoardOrder[tIndex]].pKernel);
593 if (pKernel_inTS == NULL) {
594 if (pTS_RT->GetNeedSetKernelPointer()) {
595 pKernel_inTS =(static_cast<CmKernel_RT*> (pTS_RT->GetKernelPointer()));
596 }
597 if (pKernel_inTS == NULL) {
598 CM_ASSERT(0);
599 hr = CM_FAILURE;
600 goto finish;
601 }
602 }
603 UINT kIndex = pKernel_inTS->GetIndexInTask();
604
605 m_pThreadCoordinates[kIndex]
606 [pKernelCoordinateIndex[kIndex]].x =
607 pThreadSpaceUnit[pBoardOrder
608 [tIndex]].scoreboardCoordinates.
609 x;
610 m_pThreadCoordinates[kIndex]
611 [pKernelCoordinateIndex[kIndex]].y =
612 pThreadSpaceUnit[pBoardOrder
613 [tIndex]].scoreboardCoordinates.
614 y;
615 m_pDependencyMasks[kIndex]
616 [pKernelCoordinateIndex[kIndex]].mask =
617 pThreadSpaceUnit[pBoardOrder
618 [tIndex]].dependencyMask;
619 m_pDependencyMasks[kIndex]
620 [pKernelCoordinateIndex[kIndex]].resetMask =
621 pThreadSpaceUnit[pBoardOrder[tIndex]].reset;
622 pKernelCoordinateIndex[kIndex]++;
623 }
624
625 CmSafeDeleteArray(pKernelCoordinateIndex);
626 } else {
627 CM_ASSERT(0);
628 hr = CM_FAILURE;
629 goto finish;
630 }
631
632 m_IsThreadCoordinatesExisted = TRUE;
633 } else {
634 m_pThreadCoordinates = NULL;
635 m_pDependencyMasks = NULL;
636 m_IsThreadCoordinatesExisted = FALSE;
637 }
638
639 if (pTS_RT->IsDependencySet()) {
640 pTS_RT->GetDependencyPatternType(m_DependencyPattern);
641 }
642
643 pTS_RT->GetThreadSpaceSize(m_ThreadSpaceWidth, m_ThreadSpaceHeight);
644
645 pTS_RT->GetColorCountMinusOne(m_ColorCountMinusOne);
646
647 pTS_RT->GetWalkingPattern(m_WalkingPattern);
648
649 m_MediaWalkerParamsSet = pTS_RT->CheckWalkingParametersSet();
650 if (m_MediaWalkerParamsSet) {
651 CM_HAL_WALKING_PARAMETERS tmpMWParams;
652 CMCHK_HR(pTS_RT->GetWalkingParameters(tmpMWParams));
653 CmSafeMemCopy(&m_WalkingParameters, &tmpMWParams,
654 sizeof(tmpMWParams));
655 }
656
657 m_DependencyVectorsSet = pTS_RT->CheckDependencyVectorsSet();
658 if (m_DependencyVectorsSet) {
659 CM_HAL_DEPENDENCY tmpDepVectors;
660 CMCHK_HR(pTS_RT->GetDependencyVectors(tmpDepVectors));
661 CmSafeMemCopy(&m_DependencyVectors, &tmpDepVectors,
662 sizeof(tmpDepVectors));
663 }
664
665 finish:
666 if (hr != CM_SUCCESS) {
667 if (m_pThreadCoordinates) {
668 for (i = 0; i < m_KernelCount; i++) {
669 CmSafeDeleteArray(m_pThreadCoordinates[i]);
670 }
671 }
672
673 if (m_pDependencyMasks) {
674 for (i = 0; i < m_KernelCount; i++) {
675 CmSafeDeleteArray(m_pDependencyMasks[i]);
676 }
677 }
678 CmSafeDeleteArray(m_pThreadCoordinates);
679 CmSafeDeleteArray(m_pDependencyMasks);
680 CmSafeDeleteArray(pKernelCoordinateIndex);
681 }
682 return hr;
683 }
684
GetKernelCoordinates(const UINT index,VOID * & pKernelCoordinates)685 INT CmTaskInternal::GetKernelCoordinates(const UINT index,
686 VOID * &pKernelCoordinates)
687 {
688 if (m_pThreadCoordinates != NULL) {
689 pKernelCoordinates = (PVOID) m_pThreadCoordinates[index];
690 } else {
691 pKernelCoordinates = NULL;
692 }
693
694 return CM_SUCCESS;
695 }
696
GetKernelDependencyMasks(const UINT index,VOID * & pKernelDependencyMasks)697 INT CmTaskInternal::GetKernelDependencyMasks(const UINT index,
698 VOID * &pKernelDependencyMasks)
699 {
700 if (m_pDependencyMasks != NULL) {
701 pKernelDependencyMasks = (PVOID) m_pDependencyMasks[index];
702 } else {
703 pKernelDependencyMasks = NULL;
704 }
705
706 return CM_SUCCESS;
707 }
708
GetDependencyPattern(CM_HAL_DEPENDENCY_PATTERN & DependencyPattern)709 INT CmTaskInternal::GetDependencyPattern(CM_HAL_DEPENDENCY_PATTERN &
710 DependencyPattern)
711 {
712 DependencyPattern = m_DependencyPattern;
713 return CM_SUCCESS;
714 }
715
GetWalkingPattern(CM_HAL_WALKING_PATTERN & WalkingPattern)716 INT CmTaskInternal::GetWalkingPattern(CM_HAL_WALKING_PATTERN & WalkingPattern)
717 {
718 WalkingPattern = m_WalkingPattern;
719 return CM_SUCCESS;
720 }
721
GetWalkingParameters(CM_HAL_WALKING_PARAMETERS & pWalkingParameters)722 INT CmTaskInternal::GetWalkingParameters(CM_HAL_WALKING_PARAMETERS &
723 pWalkingParameters)
724 {
725 CmSafeMemCopy(&pWalkingParameters, &m_WalkingParameters,
726 sizeof(m_WalkingParameters));
727 return CM_SUCCESS;
728 }
729
CheckWalkingParametersSet()730 BOOLEAN CmTaskInternal::CheckWalkingParametersSet()
731 {
732 return m_MediaWalkerParamsSet;
733 }
734
GetDependencyVectors(CM_HAL_DEPENDENCY & pDependencyVectors)735 INT CmTaskInternal::GetDependencyVectors(CM_HAL_DEPENDENCY & pDependencyVectors)
736 {
737 CmSafeMemCopy(&pDependencyVectors, &m_DependencyVectors,
738 sizeof(m_DependencyVectors));
739 return CM_SUCCESS;
740 }
741
CheckDependencyVectorsSet()742 BOOLEAN CmTaskInternal::CheckDependencyVectorsSet()
743 {
744 return m_DependencyVectorsSet;
745 }
746
GetTotalThreadCount(UINT & totalThreadCount)747 INT CmTaskInternal::GetTotalThreadCount(UINT & totalThreadCount)
748 {
749 totalThreadCount = m_TotalThreadCount;
750
751 return CM_SUCCESS;
752 }
753
GetThreadSpaceSize(UINT & width,UINT & height)754 INT CmTaskInternal::GetThreadSpaceSize(UINT & width, UINT & height)
755 {
756 width = m_ThreadSpaceWidth;
757 height = m_ThreadSpaceHeight;
758
759 return CM_SUCCESS;
760 }
761
GetColorCountMinusOne(UINT & colorCount)762 INT CmTaskInternal::GetColorCountMinusOne(UINT & colorCount)
763 {
764 colorCount = m_ColorCountMinusOne;
765
766 return CM_SUCCESS;
767 }
768
IsThreadSpaceCreated(void)769 BOOLEAN CmTaskInternal::IsThreadSpaceCreated(void)
770 {
771 return m_IsThreadSpaceCreated;
772 }
773
IsThreadCoordinatesExisted(void)774 BOOLEAN CmTaskInternal::IsThreadCoordinatesExisted(void)
775 {
776 return m_IsThreadCoordinatesExisted;
777 }
778
GetThreadGroupSpaceSize(UINT & trdSpaceWidth,UINT & trdSpaceHeight,UINT & grpSpaceWidth,UINT & grpSpaceHeight)779 INT CmTaskInternal::GetThreadGroupSpaceSize(UINT & trdSpaceWidth,
780 UINT & trdSpaceHeight,
781 UINT & grpSpaceWidth,
782 UINT & grpSpaceHeight)
783 {
784 trdSpaceWidth = m_ThreadSpaceWidth;
785 trdSpaceHeight = m_ThreadSpaceHeight;
786 grpSpaceWidth = m_GroupSpaceWidth;
787 grpSpaceHeight = m_GroupSpaceHeight;
788
789 return CM_SUCCESS;
790 }
791
GetSLMSize(UINT & iSLMSize)792 INT CmTaskInternal::GetSLMSize(UINT & iSLMSize)
793 {
794 iSLMSize = m_SLMSize;
795 return CM_SUCCESS;
796 }
797
GetHints(UINT & hints)798 INT CmTaskInternal::GetHints(UINT & hints)
799 {
800 hints = m_Hints;
801 return CM_SUCCESS;
802 }
803
GetNumTasksGenerated(UINT & numTasksGenerated)804 INT CmTaskInternal::GetNumTasksGenerated(UINT & numTasksGenerated)
805 {
806 numTasksGenerated = m_NumTasksGenerated;
807 return CM_SUCCESS;
808 }
809
GetLastTask(BOOLEAN & isLastTask)810 INT CmTaskInternal::GetLastTask(BOOLEAN & isLastTask)
811 {
812 isLastTask = m_IsLastTask;
813 return CM_SUCCESS;
814 }
815
IsThreadGroupSpaceCreated(void)816 BOOLEAN CmTaskInternal::IsThreadGroupSpaceCreated(void)
817 {
818 return m_IsThreadGroupSpaceCreated;
819 }
820
GetTaskType(UINT & taskType)821 INT CmTaskInternal::GetTaskType(UINT & taskType)
822 {
823 taskType = m_TaskType;
824
825 return CM_SUCCESS;
826 }
827
GetSyncBitmap()828 UINT64 CmTaskInternal::GetSyncBitmap()
829 {
830 return m_ui64SyncBitmap;
831 }
832
SetPowerOption(PCM_HAL_POWER_OPTION_PARAM pPowerOption)833 INT CmTaskInternal::SetPowerOption(PCM_HAL_POWER_OPTION_PARAM pPowerOption)
834 {
835 CmFastMemCopy(&m_PowerOption, pPowerOption, sizeof(m_PowerOption));
836 return CM_SUCCESS;
837 }
838
GetPowerOption()839 PCM_HAL_POWER_OPTION_PARAM CmTaskInternal::GetPowerOption()
840 {
841 return &m_PowerOption;
842 }
843
SetPreemptionMode(CM_HAL_PREEMPTION_MODE mode)844 INT CmTaskInternal::SetPreemptionMode(CM_HAL_PREEMPTION_MODE mode)
845 {
846 m_PreemptionMode = mode;
847
848 return CM_SUCCESS;
849 }
850
GetPreemptionMode()851 CM_HAL_PREEMPTION_MODE CmTaskInternal::GetPreemptionMode()
852 {
853 return m_PreemptionMode;
854 }
855