1 /*
2  * Copyright � 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *     Wei Lin<wei.w.lin@intel.com>
26  *     Yuting Yang<yuting.yang@intel.com>
27  */
28 
29 #include "cm_task_internal.h"
30 #include "cm_kernel.h"
31 #include "cm_event.h"
32 #include "cm_device.h"
33 #include "cm_kernel_data.h"
34 #include "cm_thread_space.h"
35 #include "cm_group_space.h"
36 #include "cm_queue.h"
37 #include "cm_surface_manager.h"
38 
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],const CmThreadSpace * pTS,CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap,CmTaskInternal * & pTask)39 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
40 			   CmKernel * pKernelArray[], const CmThreadSpace * pTS,
41 			   CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap,
42 			   CmTaskInternal * &pTask)
43 {
44 	INT result = CM_SUCCESS;
45 	pTask =
46 	    new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
47 					     pKernelArray, pCmDevice,
48 					     uiSyncBitmap);
49 	if (pTask) {
50 		result = pTask->Initialize(pTS, FALSE);
51 		if (result != CM_SUCCESS) {
52 			CmTaskInternal::Destroy(pTask);
53 		}
54 	} else {
55 		CM_ASSERT(0);
56 		result = CM_OUT_OF_HOST_MEMORY;
57 	}
58 	return result;
59 }
60 
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],const CmThreadGroupSpace * pTGS,CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap,CmTaskInternal * & pTask)61 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
62 			   CmKernel * pKernelArray[],
63 			   const CmThreadGroupSpace * pTGS,
64 			   CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap,
65 			   CmTaskInternal * &pTask)
66 {
67 	INT result = CM_SUCCESS;
68 	pTask =
69 	    new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
70 					     pKernelArray, pCmDevice,
71 					     uiSyncBitmap);
72 
73 	if (pTask) {
74 		result = pTask->Initialize(pTGS);
75 		if (result != CM_SUCCESS) {
76 			CmTaskInternal::Destroy(pTask);
77 		}
78 	} else {
79 		CM_ASSERT(0);
80 		result = CM_OUT_OF_HOST_MEMORY;
81 	}
82 	return result;
83 }
84 
Create(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],CmTaskInternal * & pTask,UINT numTasksGenerated,BOOLEAN isLastTask,UINT hints,CmDevice_RT * pCmDevice)85 INT CmTaskInternal::Create(const UINT kernelCount, const UINT totalThreadCount,
86 			   CmKernel * pKernelArray[], CmTaskInternal * &pTask,
87 			   UINT numTasksGenerated, BOOLEAN isLastTask,
88 			   UINT hints, CmDevice_RT * pCmDevice)
89 {
90 	INT result = CM_SUCCESS;
91 	pTask =
92 	    new(std::nothrow) CmTaskInternal(kernelCount, totalThreadCount,
93 					     pKernelArray, pCmDevice,
94 					     CM_NO_KERNEL_SYNC);
95 	if (pTask) {
96 		result =
97 		    pTask->Initialize(hints, numTasksGenerated, isLastTask);
98 		if (result != CM_SUCCESS) {
99 			CmTaskInternal::Destroy(pTask);
100 		}
101 	} else {
102 		CM_ASSERT(0);
103 		result = CM_OUT_OF_HOST_MEMORY;
104 	}
105 	return result;
106 }
107 
Destroy(CmTaskInternal * & pTask)108 INT CmTaskInternal::Destroy(CmTaskInternal * &pTask)
109 {
110 	CmSafeDelete(pTask);
111 	return CM_SUCCESS;
112 }
113 
CmTaskInternal(const UINT kernelCount,const UINT totalThreadCount,CmKernel * pKernelArray[],CmDevice_RT * pCmDevice,const UINT64 uiSyncBitmap)114  CmTaskInternal::CmTaskInternal(const UINT kernelCount, const UINT totalThreadCount, CmKernel * pKernelArray[], CmDevice_RT * pCmDevice, const UINT64 uiSyncBitmap):
115 m_Kernels(kernelCount),
116 m_KernelData(kernelCount),
117 m_KernelCount(kernelCount),
118 m_TotalThreadCount(totalThreadCount),
119 m_pTaskEvent(NULL),
120 m_IsThreadSpaceCreated(FALSE),
121 m_IsThreadCoordinatesExisted(FALSE),
122 m_ThreadSpaceWidth(0),
123 m_ThreadSpaceHeight(0),
124 m_pThreadCoordinates(NULL),
125 m_DependencyPattern(CM_DEPENDENCY_NONE),
126 m_WalkingPattern(CM_WALK_DEFAULT),
127 m_MediaWalkerParamsSet(FALSE),
128 m_DependencyVectorsSet(FALSE),
129 m_pDependencyMasks(NULL),
130 m_IsThreadGroupSpaceCreated(FALSE),
131 m_GroupSpaceWidth(0),
132 m_GroupSpaceHeight(0),
133 m_SLMSize(0),
134 m_ColorCountMinusOne(0),
135 m_Hints(0),
136 m_NumTasksGenerated(0),
137 m_IsLastTask(FALSE),
138 m_ui64SyncBitmap(uiSyncBitmap),
139 m_pCmDevice(pCmDevice), m_SurfaceArray(NULL), m_TaskType(CM_TASK_TYPE_DEFAULT)
140 {
141 	m_pKernelCurbeOffsetArray = new(std::nothrow) UINT[kernelCount];
142 	CM_ASSERT(m_pKernelCurbeOffsetArray != NULL);
143 
144 	for (UINT i = 0; i < kernelCount; i++) {
145 		m_Kernels.SetElement(i, pKernelArray[i]);
146 		m_KernelData.SetElement(i, NULL);
147 	}
148 
149 	CmSafeMemSet(&m_WalkingParameters, 0, sizeof(m_WalkingParameters));
150 	CmSafeMemSet(&m_DependencyVectors, 0, sizeof(m_DependencyVectors));
151 	if (m_pKernelCurbeOffsetArray != NULL) {
152 		CmSafeMemSet(m_pKernelCurbeOffsetArray, 0,
153 			     kernelCount * sizeof(UINT));
154 	}
155 }
156 
~CmTaskInternal(void)157 CmTaskInternal::~CmTaskInternal(void)
158 {
159 
160 	m_Kernels.Delete();
161 	for (UINT i = 0; i < m_KernelCount; i++) {
162 		CmKernelData *p = (CmKernelData *) m_KernelData.GetElement(i);
163 		CmKernelData::Destroy(p);
164 	}
165 	m_KernelData.Delete();
166 
167 	CmSafeDeleteArray(m_pKernelCurbeOffsetArray);
168 
169 	if (m_pTaskEvent) {
170 		CmQueue_RT *pCmQueue;
171 		m_pCmDevice->GetQueue(pCmQueue);
172 		pCmQueue->DestroyEvent(m_pTaskEvent);
173 	}
174 
175 	if (m_pThreadCoordinates) {
176 		for (UINT i = 0; i < m_KernelCount; i++) {
177 			if (m_pThreadCoordinates[i]) {
178 				CmSafeDeleteArray(m_pThreadCoordinates[i]);
179 			}
180 		}
181 	}
182 
183 	CmSafeDeleteArray(m_pThreadCoordinates);
184 
185 	if (m_pDependencyMasks) {
186 		for (UINT i = 0; i < m_KernelCount; ++i) {
187 			CmSafeDeleteArray(m_pDependencyMasks[i]);
188 		}
189 	}
190 
191 	CmSafeDeleteArray(m_pDependencyMasks);
192 	CmSafeDeleteArray(m_SurfaceArray);
193 }
194 
Initialize(const CmThreadSpace * pTS,BOOL isWithHints)195 INT CmTaskInternal::Initialize(const CmThreadSpace * pTS, BOOL isWithHints)
196 {
197 	UINT totalCurbeSize = 0;
198 	UINT surfacePoolSize = 0;
199 	UINT totalKernelBinarySize = 0;
200 	UINT kernelCurbeSize = 0;
201 	UINT kernelPayloadSize = 0;
202 	CmSurfaceManager *pSurfaceMgr = NULL;
203 
204 	CM_HAL_MAX_VALUES *pHalMaxValues = NULL;
205 	CM_HAL_MAX_VALUES_EX *pHalMaxValuesEx = NULL;
206 	m_pCmDevice->GetHalMaxValues(pHalMaxValues, pHalMaxValuesEx);
207 
208 	m_pCmDevice->GetSurfaceManager(pSurfaceMgr);
209 	surfacePoolSize = pSurfaceMgr->GetSurfacePoolSize();
210 
211 	m_SurfaceArray = new(std::nothrow) BOOL[surfacePoolSize];
212 	if (!m_SurfaceArray) {
213 		CM_ASSERT(0);
214 		return CM_FAILURE;
215 	}
216 	CmSafeMemSet(m_SurfaceArray, 0, surfacePoolSize * sizeof(BOOL));
217 
218 	for (UINT i = 0; i < m_KernelCount; i++) {
219 
220 		CmKernel_RT *pKernel = (CmKernel_RT *) m_Kernels.GetElement(i);
221 		if (pKernel == NULL) {
222 			CM_ASSERT(0);
223 			return CM_FAILURE;
224 		}
225 
226 		pKernel->GetSizeInPayload(kernelPayloadSize);
227 		pKernel->GetSizeInCurbe(kernelCurbeSize);
228 
229 		if ((kernelCurbeSize + kernelPayloadSize) >
230 		    pHalMaxValues->iMaxArgByteSizePerKernel) {
231 			CM_ASSERT(0);
232 			return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
233 		} else {
234 			kernelCurbeSize =
235 			    pKernel->GetAlignedCurbeSize(kernelCurbeSize);
236 			totalCurbeSize += kernelCurbeSize;
237 		}
238 		m_pKernelCurbeOffsetArray[i] = totalCurbeSize - kernelCurbeSize;
239 
240 		UINT totalSize = 0;
241 		CmKernelData *pKernelData = NULL;
242 
243 		if (isWithHints) {
244 			CmThreadSpace *pKTS = NULL;
245 			pKernel->GetThreadSpace(pKTS);
246 			if (pKTS) {
247 				for (UINT j = i; j > 0; --j) {
248 					UINT width, height, myAdjY;
249 					CmKernel_RT *pTmpKern =
250 					    (CmKernel_RT *) m_Kernels.GetElement(j
251 									      -
252 									      1);
253 					if (!pTmpKern) {
254 						CM_ASSERT(0);
255 						return CM_FAILURE;
256 					}
257 					pTmpKern->GetThreadSpace(pKTS);
258 					pKTS->GetThreadSpaceSize(width, height);
259 					myAdjY = pKernel->GetAdjustedYCoord();
260 					pKernel->SetAdjustedYCoord(myAdjY +
261 								   height);
262 				}
263 			}
264 		}
265 
266 		pKernel->CollectKernelSurface();
267 		INT result =
268 		    pKernel->CreateKernelData(pKernelData, totalSize, pTS);
269 		if ((pKernelData == NULL) || (result != CM_SUCCESS)) {
270 			CM_ASSERT(0);
271 			CmKernelData::Destroy(pKernelData);
272 			return result;
273 		}
274 
275 		m_KernelData.SetElement(i, pKernelData);
276 
277 		totalKernelBinarySize += pKernel->GetKernelGenxBinarySize();
278 
279 		BOOL *surfArray = NULL;
280 		pKernel->GetKernelSurfaces(surfArray);
281 		for (UINT j = 0; j < surfacePoolSize; j++) {
282 			m_SurfaceArray[j] |= surfArray[j];
283 		}
284 		pKernel->ResetKernelSurfaces();
285 	}
286 
287 	if (pTS) {
288 		if (FAILED(this->CreateThreadSpaceData(pTS))) {
289 			CM_ASSERT(0);
290 			return CM_FAILURE;
291 		}
292 		m_IsThreadSpaceCreated = TRUE;
293 	}
294 
295 	m_TaskType = CM_INTERNAL_TASK_WITH_THREADSPACE;
296 
297 	if (totalKernelBinarySize >
298 	    pHalMaxValues->iMaxKernelBinarySize *
299 	    pHalMaxValues->iMaxKernelsPerTask) {
300 		CM_ASSERT(0);
301 		return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
302 	}
303 
304 	return CM_SUCCESS;
305 }
306 
Initialize(const CmThreadGroupSpace * pTGS)307 INT CmTaskInternal::Initialize(const CmThreadGroupSpace * pTGS)
308 {
309 	UINT totalCurbeSize = 0;
310 	UINT surfacePoolSize = 0;
311 	UINT totalKernelBinarySize = 0;
312 	UINT kernelCurbeSize = 0;
313 	UINT kernelPayloadSize = 0;
314 
315 	CmSurfaceManager *pSurfaceMgr = NULL;
316 	CM_HAL_MAX_VALUES *pHalMaxValues = NULL;
317 	CM_HAL_MAX_VALUES_EX *pHalMaxValuesEx = NULL;
318 	m_pCmDevice->GetHalMaxValues(pHalMaxValues, pHalMaxValuesEx);
319 
320 	m_pCmDevice->GetSurfaceManager(pSurfaceMgr);
321 	CM_ASSERT(pSurfaceMgr);
322 	surfacePoolSize = pSurfaceMgr->GetSurfacePoolSize();
323 	m_SurfaceArray = new(std::nothrow) BOOL[surfacePoolSize];
324 	if (!m_SurfaceArray) {
325 		CM_ASSERT(0);
326 		return CM_OUT_OF_HOST_MEMORY;
327 	}
328 	CmSafeMemSet(m_SurfaceArray, 0, surfacePoolSize * sizeof(BOOL));
329 
330 	for (UINT i = 0; i < m_KernelCount; i++) {
331 	    CmKernel_RT *pKernel = (CmKernel_RT *) m_Kernels.GetElement(i);
332 		if (pKernel == NULL) {
333 			CM_ASSERT(0);
334 			return CM_FAILURE;
335 		}
336 
337 		pKernel->CollectKernelSurface();
338 
339 		UINT totalSize = 0;
340 		CmKernelData *pKernelData = NULL;
341 
342 		INT result =
343 		    pKernel->CreateKernelData(pKernelData, totalSize, pTGS);
344 		if (result != CM_SUCCESS) {
345 			CM_ASSERT(0);
346 			CmKernelData::Destroy(pKernelData);
347 			return result;
348 		}
349 
350 		pKernelData->SetKernelDataSize(totalSize);
351 
352 		pKernel->GetSizeInPayload(kernelPayloadSize);
353 		pKernel->GetSizeInCurbe(kernelCurbeSize);
354 
355 		if (kernelCurbeSize + kernelPayloadSize >
356 		    pHalMaxValues->iMaxArgByteSizePerKernel) {
357 			CM_ASSERT(0);
358 			return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
359 		} else {
360 			kernelCurbeSize =
361 			    pKernel->GetAlignedCurbeSize(kernelCurbeSize);
362 			totalCurbeSize += kernelCurbeSize;
363 		}
364 
365 		m_pKernelCurbeOffsetArray[i] = totalCurbeSize - kernelCurbeSize;
366 
367 		m_KernelData.SetElement(i, pKernelData);
368 
369 		m_SLMSize = pKernel->GetSLMSize();
370 
371 		totalKernelBinarySize += pKernel->GetKernelGenxBinarySize();
372 
373 		BOOL *surfArray = NULL;
374 		pKernel->GetKernelSurfaces(surfArray);
375 		for (UINT j = 0; j < surfacePoolSize; j++) {
376 			m_SurfaceArray[j] |= surfArray[j];
377 		}
378 		pKernel->ResetKernelSurfaces();
379 	}
380 
381 	if (totalKernelBinarySize >
382 	    pHalMaxValues->iMaxKernelBinarySize *
383 	    pHalMaxValues->iMaxKernelsPerTask) {
384 		CM_ASSERT(0);
385 		return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
386 	}
387 
388 	m_TaskType = CM_INTERNAL_TASK_WITH_THREADGROUPSPACE;
389 
390 	if (pTGS) {
391 		pTGS->GetThreadGroupSpaceSize(m_ThreadSpaceWidth,
392 					      m_ThreadSpaceHeight,
393 					      m_GroupSpaceWidth,
394 					      m_GroupSpaceHeight);
395 		m_IsThreadGroupSpaceCreated = TRUE;
396 	}
397 
398 	return CM_SUCCESS;
399 }
400 
Initialize(UINT hints,UINT numTasksGenerated,BOOLEAN isLastTask)401 INT CmTaskInternal::Initialize(UINT hints, UINT numTasksGenerated,
402 			       BOOLEAN isLastTask)
403 {
404 	CmThreadSpace *pTS = NULL;
405 	INT result = CM_SUCCESS;
406 
407 	result = this->Initialize(pTS, TRUE);
408 
409 	m_Hints = hints;
410 
411 	m_NumTasksGenerated = numTasksGenerated;
412 	m_IsLastTask = isLastTask;
413 
414 	m_TaskType = CM_INTERNAL_TASK_ENQUEUEWITHHINTS;
415 
416 	return result;
417 }
418 
GetKernelCount(UINT & count)419 INT CmTaskInternal::GetKernelCount(UINT & count)
420 {
421 	count = m_KernelCount;
422 	return CM_SUCCESS;
423 }
424 
GetTaskSurfaces(BOOL * & surfArray)425 INT CmTaskInternal::GetTaskSurfaces(BOOL * &surfArray)
426 {
427 	surfArray = m_SurfaceArray;
428 	return CM_SUCCESS;
429 }
430 
GetKernel(const UINT index,CmKernel * & pKernel)431 INT CmTaskInternal::GetKernel(const UINT index, CmKernel * &pKernel)
432 {
433 	pKernel = NULL;
434 	if (index < m_Kernels.GetSize()) {
435 		pKernel = (CmKernel *) m_Kernels.GetElement(index);
436 		return CM_SUCCESS;
437 	} else {
438 		return CM_FAILURE;
439 	}
440 }
441 
GetKernelData(const UINT index,CmKernelData * & pKernelData)442 INT CmTaskInternal::GetKernelData(const UINT index, CmKernelData * &pKernelData)
443 {
444 	pKernelData = NULL;
445 	if (index < m_KernelData.GetSize()) {
446 		pKernelData = (CmKernelData *) m_KernelData.GetElement(index);
447 		return CM_SUCCESS;
448 	} else {
449 		return CM_FAILURE;
450 	}
451 }
452 
GetKernelDataSize(const UINT index,UINT & size)453 INT CmTaskInternal::GetKernelDataSize(const UINT index, UINT & size)
454 {
455 	size = 0;
456 	CmKernelData *pKernelData = NULL;
457 	if (index < m_KernelData.GetSize()) {
458 		pKernelData = (CmKernelData *) m_KernelData.GetElement(index);
459 		if (pKernelData == NULL) {
460 			CM_ASSERT(0);
461 			return CM_FAILURE;
462 		}
463 		size = pKernelData->GetKernelDataSize();
464 		return CM_SUCCESS;
465 	} else {
466 		return CM_FAILURE;
467 	}
468 }
469 
GetKernelCurbeOffset(const UINT index)470 UINT CmTaskInternal::GetKernelCurbeOffset(const UINT index)
471 {
472 	return (UINT) m_pKernelCurbeOffsetArray[index];
473 }
474 
SetTaskEvent(CmEvent_RT * pEvent)475 INT CmTaskInternal::SetTaskEvent(CmEvent_RT * pEvent)
476 {
477 	m_pTaskEvent = (static_cast<CmEvent*>(pEvent));
478 	pEvent->Acquire();
479 	return CM_SUCCESS;
480 }
481 
GetTaskEvent(CmEvent_RT * & pEvent)482 INT CmTaskInternal::GetTaskEvent(CmEvent_RT * &pEvent)
483 {
484         pEvent=(static_cast<CmEvent_RT*>(m_pTaskEvent));
485 	return CM_SUCCESS;
486 }
487 
GetTaskStatus(CM_STATUS & TaskStatus)488 INT CmTaskInternal::GetTaskStatus(CM_STATUS & TaskStatus)
489 {
490 	if (m_pTaskEvent == NULL) {
491 		return CM_FAILURE;
492 	}
493 
494 	return m_pTaskEvent->GetStatus(TaskStatus);
495 }
496 
ReleaseKernel()497 INT CmTaskInternal::ReleaseKernel()
498 {
499 
500 	INT hr = CM_SUCCESS;
501 
502 	for (UINT KrnDataIndex = 0; KrnDataIndex < m_KernelCount;
503 	     KrnDataIndex++) {
504 		CmKernelData *pKernelData;
505 		CMCHK_HR(GetKernelData(KrnDataIndex, pKernelData));
506 		CMCHK_NULL(pKernelData);
507 		CMCHK_HR(pKernelData->ReleaseKernel());
508 	}
509 
510  finish:
511 	return hr;
512 }
513 
CreateThreadSpaceData(const CmThreadSpace * pTS)514 INT CmTaskInternal::CreateThreadSpaceData(const CmThreadSpace * pTS)
515 {
516 	UINT i;
517 	UINT width, height;
518 	UINT *pKernelCoordinateIndex = NULL;
519 	int hr = CM_SUCCESS;
520 	CmThreadSpace *pTS_RT = const_cast < CmThreadSpace * >(pTS);
521 
522 	CmKernel_RT *pKernel_inTS = NULL;
523 	CmKernel_RT *pKernel_inTask = NULL;
524 
525 	if (pTS_RT->IsThreadAssociated()) {
526 		m_pThreadCoordinates =
527 		    new(std::nothrow) PCM_COORDINATE[m_KernelCount];
528 		CMCHK_NULL_RETURN(m_pThreadCoordinates, CM_FAILURE);
529 		CmSafeMemSet(m_pThreadCoordinates, 0,
530 			     m_KernelCount * sizeof(PCM_COORDINATE));
531 
532 		m_pDependencyMasks =
533 		    new(std::nothrow) PCM_HAL_MASK_AND_RESET[m_KernelCount];
534 		CMCHK_NULL_RETURN(m_pDependencyMasks, CM_FAILURE);
535 		CmSafeMemSet(m_pDependencyMasks, 0,
536 			     m_KernelCount * sizeof(PCM_HAL_MASK_AND_RESET));
537 
538 		pKernelCoordinateIndex = new(std::nothrow) UINT[m_KernelCount];
539 		if (m_pThreadCoordinates && pKernelCoordinateIndex
540 		    && m_pDependencyMasks) {
541 			CmSafeMemSet(pKernelCoordinateIndex, 0,
542 				     m_KernelCount * sizeof(UINT));
543 			for (i = 0; i < m_KernelCount; i++) {
544 				pKernelCoordinateIndex[i] = 0;
545 				UINT threadCount;
546 				this->GetKernel(i, (CmKernel * &)pKernel_inTask);
547 
548 				if (pKernel_inTask == NULL) {
549 					CM_ASSERT(0);
550 					hr = CM_FAILURE;
551 					goto finish;
552 				}
553 
554 				pKernel_inTask->GetThreadCount(threadCount);
555 				m_pThreadCoordinates[i] = new(std::nothrow)
556 				    CM_COORDINATE[threadCount];
557 				if (m_pThreadCoordinates[i]) {
558 					CmSafeMemSet(m_pThreadCoordinates[i], 0,
559 						     sizeof(CM_COORDINATE) *
560 						     threadCount);
561 				} else {
562 					CM_ASSERT(0);
563 					hr = CM_FAILURE;
564 					goto finish;
565 				}
566 
567 				m_pDependencyMasks[i] = new(std::nothrow)
568 				    CM_HAL_MASK_AND_RESET[threadCount];
569 				if (m_pDependencyMasks[i]) {
570 					CmSafeMemSet(m_pDependencyMasks[i], 0,
571 						     sizeof
572 						     (CM_HAL_MASK_AND_RESET) *
573 						     threadCount);
574 				} else {
575 					CM_ASSERT(0);
576 					hr = CM_FAILURE;
577 					goto finish;
578 				}
579 			}
580 
581 			CM_THREAD_SPACE_UNIT *pThreadSpaceUnit = NULL;
582 			pTS_RT->GetThreadSpaceSize(width, height);
583 			pTS_RT->GetThreadSpaceUnit(pThreadSpaceUnit);
584 
585 			UINT *pBoardOrder = NULL;
586 			pTS_RT->GetBoardOrder(pBoardOrder);
587 			for (UINT tIndex = 0; tIndex < height * width; tIndex++) {
588 				pKernel_inTS =
589 				    static_cast <
590 				    CmKernel_RT *
591 				    >(pThreadSpaceUnit
592 				      [pBoardOrder[tIndex]].pKernel);
593 				if (pKernel_inTS == NULL) {
594 					if (pTS_RT->GetNeedSetKernelPointer()) {
595 						pKernel_inTS =(static_cast<CmKernel_RT*> (pTS_RT->GetKernelPointer()));
596 					}
597 					if (pKernel_inTS == NULL) {
598 						CM_ASSERT(0);
599 						hr = CM_FAILURE;
600 						goto finish;
601 					}
602 				}
603 				UINT kIndex = pKernel_inTS->GetIndexInTask();
604 
605 				m_pThreadCoordinates[kIndex]
606 				    [pKernelCoordinateIndex[kIndex]].x =
607 				    pThreadSpaceUnit[pBoardOrder
608 						     [tIndex]].scoreboardCoordinates.
609 				    x;
610 				m_pThreadCoordinates[kIndex]
611 				    [pKernelCoordinateIndex[kIndex]].y =
612 				    pThreadSpaceUnit[pBoardOrder
613 						     [tIndex]].scoreboardCoordinates.
614 				    y;
615 				m_pDependencyMasks[kIndex]
616 				    [pKernelCoordinateIndex[kIndex]].mask =
617 				    pThreadSpaceUnit[pBoardOrder
618 						     [tIndex]].dependencyMask;
619 				m_pDependencyMasks[kIndex]
620 				    [pKernelCoordinateIndex[kIndex]].resetMask =
621 				    pThreadSpaceUnit[pBoardOrder[tIndex]].reset;
622 				pKernelCoordinateIndex[kIndex]++;
623 			}
624 
625 			CmSafeDeleteArray(pKernelCoordinateIndex);
626 		} else {
627 			CM_ASSERT(0);
628 			hr = CM_FAILURE;
629 			goto finish;
630 		}
631 
632 		m_IsThreadCoordinatesExisted = TRUE;
633 	} else {
634 		m_pThreadCoordinates = NULL;
635 		m_pDependencyMasks = NULL;
636 		m_IsThreadCoordinatesExisted = FALSE;
637 	}
638 
639 	if (pTS_RT->IsDependencySet()) {
640 		pTS_RT->GetDependencyPatternType(m_DependencyPattern);
641 	}
642 
643 	pTS_RT->GetThreadSpaceSize(m_ThreadSpaceWidth, m_ThreadSpaceHeight);
644 
645 	pTS_RT->GetColorCountMinusOne(m_ColorCountMinusOne);
646 
647 	pTS_RT->GetWalkingPattern(m_WalkingPattern);
648 
649 	m_MediaWalkerParamsSet = pTS_RT->CheckWalkingParametersSet();
650 	if (m_MediaWalkerParamsSet) {
651 		CM_HAL_WALKING_PARAMETERS tmpMWParams;
652 		CMCHK_HR(pTS_RT->GetWalkingParameters(tmpMWParams));
653 		CmSafeMemCopy(&m_WalkingParameters, &tmpMWParams,
654 			      sizeof(tmpMWParams));
655 	}
656 
657 	m_DependencyVectorsSet = pTS_RT->CheckDependencyVectorsSet();
658 	if (m_DependencyVectorsSet) {
659 		CM_HAL_DEPENDENCY tmpDepVectors;
660 		CMCHK_HR(pTS_RT->GetDependencyVectors(tmpDepVectors));
661 		CmSafeMemCopy(&m_DependencyVectors, &tmpDepVectors,
662 			      sizeof(tmpDepVectors));
663 	}
664 
665  finish:
666 	if (hr != CM_SUCCESS) {
667 		if (m_pThreadCoordinates) {
668 			for (i = 0; i < m_KernelCount; i++) {
669 				CmSafeDeleteArray(m_pThreadCoordinates[i]);
670 			}
671 		}
672 
673 		if (m_pDependencyMasks) {
674 			for (i = 0; i < m_KernelCount; i++) {
675 				CmSafeDeleteArray(m_pDependencyMasks[i]);
676 			}
677 		}
678 		CmSafeDeleteArray(m_pThreadCoordinates);
679 		CmSafeDeleteArray(m_pDependencyMasks);
680 		CmSafeDeleteArray(pKernelCoordinateIndex);
681 	}
682 	return hr;
683 }
684 
GetKernelCoordinates(const UINT index,VOID * & pKernelCoordinates)685 INT CmTaskInternal::GetKernelCoordinates(const UINT index,
686 					 VOID * &pKernelCoordinates)
687 {
688 	if (m_pThreadCoordinates != NULL) {
689 		pKernelCoordinates = (PVOID) m_pThreadCoordinates[index];
690 	} else {
691 		pKernelCoordinates = NULL;
692 	}
693 
694 	return CM_SUCCESS;
695 }
696 
GetKernelDependencyMasks(const UINT index,VOID * & pKernelDependencyMasks)697 INT CmTaskInternal::GetKernelDependencyMasks(const UINT index,
698 					     VOID * &pKernelDependencyMasks)
699 {
700 	if (m_pDependencyMasks != NULL) {
701 		pKernelDependencyMasks = (PVOID) m_pDependencyMasks[index];
702 	} else {
703 		pKernelDependencyMasks = NULL;
704 	}
705 
706 	return CM_SUCCESS;
707 }
708 
GetDependencyPattern(CM_HAL_DEPENDENCY_PATTERN & DependencyPattern)709 INT CmTaskInternal::GetDependencyPattern(CM_HAL_DEPENDENCY_PATTERN &
710 					 DependencyPattern)
711 {
712 	DependencyPattern = m_DependencyPattern;
713 	return CM_SUCCESS;
714 }
715 
GetWalkingPattern(CM_HAL_WALKING_PATTERN & WalkingPattern)716 INT CmTaskInternal::GetWalkingPattern(CM_HAL_WALKING_PATTERN & WalkingPattern)
717 {
718 	WalkingPattern = m_WalkingPattern;
719 	return CM_SUCCESS;
720 }
721 
GetWalkingParameters(CM_HAL_WALKING_PARAMETERS & pWalkingParameters)722 INT CmTaskInternal::GetWalkingParameters(CM_HAL_WALKING_PARAMETERS &
723 					 pWalkingParameters)
724 {
725 	CmSafeMemCopy(&pWalkingParameters, &m_WalkingParameters,
726 		      sizeof(m_WalkingParameters));
727 	return CM_SUCCESS;
728 }
729 
CheckWalkingParametersSet()730 BOOLEAN CmTaskInternal::CheckWalkingParametersSet()
731 {
732 	return m_MediaWalkerParamsSet;
733 }
734 
GetDependencyVectors(CM_HAL_DEPENDENCY & pDependencyVectors)735 INT CmTaskInternal::GetDependencyVectors(CM_HAL_DEPENDENCY & pDependencyVectors)
736 {
737 	CmSafeMemCopy(&pDependencyVectors, &m_DependencyVectors,
738 		      sizeof(m_DependencyVectors));
739 	return CM_SUCCESS;
740 }
741 
CheckDependencyVectorsSet()742 BOOLEAN CmTaskInternal::CheckDependencyVectorsSet()
743 {
744 	return m_DependencyVectorsSet;
745 }
746 
GetTotalThreadCount(UINT & totalThreadCount)747 INT CmTaskInternal::GetTotalThreadCount(UINT & totalThreadCount)
748 {
749 	totalThreadCount = m_TotalThreadCount;
750 
751 	return CM_SUCCESS;
752 }
753 
GetThreadSpaceSize(UINT & width,UINT & height)754 INT CmTaskInternal::GetThreadSpaceSize(UINT & width, UINT & height)
755 {
756 	width = m_ThreadSpaceWidth;
757 	height = m_ThreadSpaceHeight;
758 
759 	return CM_SUCCESS;
760 }
761 
GetColorCountMinusOne(UINT & colorCount)762 INT CmTaskInternal::GetColorCountMinusOne(UINT & colorCount)
763 {
764 	colorCount = m_ColorCountMinusOne;
765 
766 	return CM_SUCCESS;
767 }
768 
IsThreadSpaceCreated(void)769 BOOLEAN CmTaskInternal::IsThreadSpaceCreated(void)
770 {
771 	return m_IsThreadSpaceCreated;
772 }
773 
IsThreadCoordinatesExisted(void)774 BOOLEAN CmTaskInternal::IsThreadCoordinatesExisted(void)
775 {
776 	return m_IsThreadCoordinatesExisted;
777 }
778 
GetThreadGroupSpaceSize(UINT & trdSpaceWidth,UINT & trdSpaceHeight,UINT & grpSpaceWidth,UINT & grpSpaceHeight)779 INT CmTaskInternal::GetThreadGroupSpaceSize(UINT & trdSpaceWidth,
780 					    UINT & trdSpaceHeight,
781 					    UINT & grpSpaceWidth,
782 					    UINT & grpSpaceHeight)
783 {
784 	trdSpaceWidth = m_ThreadSpaceWidth;
785 	trdSpaceHeight = m_ThreadSpaceHeight;
786 	grpSpaceWidth = m_GroupSpaceWidth;
787 	grpSpaceHeight = m_GroupSpaceHeight;
788 
789 	return CM_SUCCESS;
790 }
791 
GetSLMSize(UINT & iSLMSize)792 INT CmTaskInternal::GetSLMSize(UINT & iSLMSize)
793 {
794 	iSLMSize = m_SLMSize;
795 	return CM_SUCCESS;
796 }
797 
GetHints(UINT & hints)798 INT CmTaskInternal::GetHints(UINT & hints)
799 {
800 	hints = m_Hints;
801 	return CM_SUCCESS;
802 }
803 
GetNumTasksGenerated(UINT & numTasksGenerated)804 INT CmTaskInternal::GetNumTasksGenerated(UINT & numTasksGenerated)
805 {
806 	numTasksGenerated = m_NumTasksGenerated;
807 	return CM_SUCCESS;
808 }
809 
GetLastTask(BOOLEAN & isLastTask)810 INT CmTaskInternal::GetLastTask(BOOLEAN & isLastTask)
811 {
812 	isLastTask = m_IsLastTask;
813 	return CM_SUCCESS;
814 }
815 
IsThreadGroupSpaceCreated(void)816 BOOLEAN CmTaskInternal::IsThreadGroupSpaceCreated(void)
817 {
818 	return m_IsThreadGroupSpaceCreated;
819 }
820 
GetTaskType(UINT & taskType)821 INT CmTaskInternal::GetTaskType(UINT & taskType)
822 {
823 	taskType = m_TaskType;
824 
825 	return CM_SUCCESS;
826 }
827 
GetSyncBitmap()828 UINT64 CmTaskInternal::GetSyncBitmap()
829 {
830 	return m_ui64SyncBitmap;
831 }
832 
SetPowerOption(PCM_HAL_POWER_OPTION_PARAM pPowerOption)833 INT CmTaskInternal::SetPowerOption(PCM_HAL_POWER_OPTION_PARAM pPowerOption)
834 {
835 	CmFastMemCopy(&m_PowerOption, pPowerOption, sizeof(m_PowerOption));
836 	return CM_SUCCESS;
837 }
838 
GetPowerOption()839 PCM_HAL_POWER_OPTION_PARAM CmTaskInternal::GetPowerOption()
840 {
841 	return &m_PowerOption;
842 }
843 
SetPreemptionMode(CM_HAL_PREEMPTION_MODE mode)844 INT CmTaskInternal::SetPreemptionMode(CM_HAL_PREEMPTION_MODE mode)
845 {
846 	m_PreemptionMode = mode;
847 
848 	return CM_SUCCESS;
849 }
850 
GetPreemptionMode()851 CM_HAL_PREEMPTION_MODE CmTaskInternal::GetPreemptionMode()
852 {
853 	return m_PreemptionMode;
854 }
855