1 /*
2 * Copyright (c) 2018-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_kernel_ex.cpp
24 //! \brief Contains Class CmKernelEx definitions
25 //!
26
27 #include "cm_kernel_ex.h"
28 #include "cm_surface.h"
29 #include "cm_surface_manager.h"
30 #include "cm_surface_sampler8x8.h"
31 #include "cm_surface_sampler.h"
32 #include "cm_mem.h"
33 #include "cm_surface_2d_rt.h"
34 #include "cm_surface_2d_up_rt.h"
35 #include "cm_surface_3d_rt.h"
36 #include "cm_buffer_rt.h"
37 #include "cm_device_rt.h"
38 #include "cm_hal.h"
39 #include "cm_surface_state.h"
40 #include "cm_surface_state_manager.h"
41 #include "cm_surface_vme.h"
42 #include "cm_ssh.h"
43 #include "cm_thread_space_rt.h"
44 #include "cm_surface_sampler.h"
45 #include "cm_media_state.h"
46
47 #include "mhw_state_heap.h"
48
49 using namespace CMRT_UMD;
50
~CmKernelEx()51 CmKernelEx::~CmKernelEx()
52 {
53 if (m_dummyThreadSpace)
54 {
55 m_device->DestroyThreadSpace(m_dummyThreadSpace);
56 }
57 if (m_dummyThreadGroupSpace)
58 {
59 m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
60 }
61 MOS_DeleteArray(m_indexMap);
62 MOS_DeleteArray(m_flatArgs);
63 MOS_DeleteArray(m_propertyIndexes);
64 MOS_DeleteArray(m_cmSurfIndexes);
65 MOS_DeleteArray(m_data);
66 MOS_DeleteArray(m_surfaceInArg);
67 MOS_DeleteArray(m_curbe);
68 }
69
Initialize(const char * kernelName,const char * options)70 int32_t CmKernelEx::Initialize(const char *kernelName, const char *options)
71 {
72 int ret = CmKernelRT::Initialize(kernelName, options);
73 if (ret != CM_SUCCESS)
74 {
75 return ret;
76 }
77
78 m_indexMap = MOS_NewArray(uint32_t, (m_argCount+1));
79 CM_CHK_NULL_RETURN_CMERROR(m_indexMap);
80 MOS_ZeroMemory(m_indexMap, (m_argCount+1)*sizeof(uint32_t));
81 m_flatArgCount= 0;
82 bool isGpgpuKernel = false;
83 uint32_t minPayload = 0;
84 for (uint32_t i = 0; i < m_argCount; i++)
85 {
86 if (ArgArraySupported(m_args[i].unitKind))
87 {
88 int numSurfaces = m_args[i].unitSize/sizeof(int);
89 m_flatArgCount += numSurfaces;
90 }
91 else
92 {
93 ++m_flatArgCount;
94 }
95
96 if (!isGpgpuKernel &&
97 ( m_args[i].unitKind == CM_ARGUMENT_IMPLICT_LOCALSIZE
98 ||m_args[i].unitKind == CM_ARGUMENT_IMPLICT_GROUPSIZE
99 ||m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID))
100 {
101 isGpgpuKernel = true;
102 }
103 if (i == 0 || (m_args[i].unitKind != CM_ARGUMENT_IMPLICIT_LOCALID && minPayload > m_args[i].unitOffsetInPayload))
104 {
105 minPayload = m_args[i].unitOffsetInPayload;
106 }
107 }
108
109 if (!isGpgpuKernel)
110 {
111 minPayload = CM_PAYLOAD_OFFSET;
112 }
113
114 if (m_flatArgCount == 0)
115 {
116 return CM_SUCCESS;
117 }
118
119 m_flatArgs = MOS_NewArray(_CmArg, m_flatArgCount);
120 CM_CHK_NULL_RETURN_CMERROR(m_flatArgs);
121 MOS_ZeroMemory(m_flatArgs, m_flatArgCount * sizeof(_CmArg));
122 m_propertyIndexes = MOS_NewArray(uint8_t, m_flatArgCount);
123 CM_CHK_NULL_RETURN_CMERROR(m_propertyIndexes);
124 MOS_ZeroMemory(m_propertyIndexes, m_flatArgCount);
125 m_cmSurfIndexes = MOS_NewArray(uint32_t, m_flatArgCount);
126 CM_CHK_NULL_RETURN_CMERROR(m_cmSurfIndexes);
127 MOS_ZeroMemory(m_cmSurfIndexes, m_flatArgCount * sizeof(uint32_t));
128
129 int j = 0;
130 uint32_t offset = 0; //offset in the local buffer
131 int localIDIndex = -1;
132 for (uint32_t i = 0; i < m_argCount; i++)
133 {
134 if (ArgArraySupported(m_args[i].unitKind))
135 {
136 m_indexMap[i] = j;
137 int numSurfaces = m_args[i].unitSize/sizeof(int);
138 for (int k = 0; k < numSurfaces; k ++)
139 {
140 m_flatArgs[j].isaKind = m_args[i].unitKind;
141 m_flatArgs[j].kind = m_args[i].unitKind;
142 m_flatArgs[j].unitSize = sizeof(void *); // we can either store the pointer to CmSurfaceState or pointer to mos_resource here
143 m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload + k*4 - minPayload; //each bte index has 4 bytes
144 m_flatArgs[j].offset = offset;
145 m_flatArgs[j].sizeInCurbe = 4;
146 offset += m_flatArgs[j].unitSize;
147
148 // update curbe size
149 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
150 {
151 m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
152 }
153 ++ j;
154 }
155 }
156 else
157 {
158 m_indexMap[i] = j;
159 m_flatArgs[j].isaKind = m_args[i].unitKind;
160 m_flatArgs[j].kind = m_args[i].unitKind;
161 m_flatArgs[j].unitSize = m_args[i].unitSize;
162 m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload - minPayload;
163 m_flatArgs[j].offset = offset;
164 m_flatArgs[j].sizeInCurbe = m_flatArgs[j].unitSize;
165 offset += m_flatArgs[j].unitSize;
166
167 // update curbe size
168 if (m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID)
169 {
170 localIDIndex = j;
171 }
172 else
173 {
174 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
175 {
176 m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
177 }
178 }
179 ++ j;
180 }
181 m_indexMap[m_argCount] = j;
182 }
183
184 // adjust the payload of local id
185 if (localIDIndex >= 0)
186 {
187 m_flatArgs[localIDIndex].payloadOffset = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
188 }
189
190 m_data = MOS_NewArray(uint8_t, offset);
191 CM_CHK_NULL_RETURN_CMERROR(m_data);
192 m_surfaceInArg = MOS_NewArray(uint8_t, offset);
193 CM_CHK_NULL_RETURN_CMERROR(m_surfaceInArg);
194 MOS_ZeroMemory(m_data, sizeof(uint8_t)*offset);
195 MOS_ZeroMemory(m_surfaceInArg, sizeof(uint8_t)*offset);
196
197 m_hashValue = m_kernelInfo->hashValue;
198
199 return CM_SUCCESS;
200 }
201
AllocateCurbe()202 MOS_STATUS CmKernelEx::AllocateCurbe()
203 {
204 MOS_DeleteArray(m_curbe);
205 if (m_explicitCurbeSize > 0)
206 {
207 m_curbeSize = MOS_ALIGN_CEIL(m_explicitCurbeSize, 64);
208 m_curbeSizePerThread = m_curbeSize;
209 m_curbeSizeCrossThread = 0;
210 m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
211 CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
212 MOS_ZeroMemory(m_curbe, m_curbeSize);
213 }
214 return MOS_STATUS_SUCCESS;
215 }
216
AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace * globalGroupSpace)217 MOS_STATUS CmKernelEx::AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace *globalGroupSpace)
218 {
219 CmThreadGroupSpace *tgs = (globalGroupSpace == nullptr)?m_threadGroupSpace:globalGroupSpace;
220
221 uint32_t thrdSpaceWidth = 0;
222 uint32_t thrdSpaceHeight = 0;
223 uint32_t thrdSpaceDepth = 0;
224 uint32_t grpSpaceWidth = 0;
225 uint32_t grpSpaceHeight = 0;
226 uint32_t grpSpaceDepth = 0;
227
228 if (tgs)
229 {
230 tgs->GetThreadGroupSpaceSize(thrdSpaceWidth, thrdSpaceHeight, thrdSpaceDepth, grpSpaceWidth, grpSpaceHeight, grpSpaceDepth);
231 }
232
233 MOS_DeleteArray(m_curbe);
234 m_curbeSizePerThread = (m_explicitCurbeSize%32 == 4)? 64:32;
235 m_curbeSizeCrossThread = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
236 m_curbeSize = m_curbeSizeCrossThread + m_curbeSizePerThread * thrdSpaceWidth * thrdSpaceHeight * thrdSpaceDepth;
237 m_curbeSize = MOS_ALIGN_CEIL(m_curbeSize, 64);
238 m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
239 CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
240 MOS_ZeroMemory(m_curbe, m_curbeSize);
241
242 int localIdPayload = -1;
243 int groupSizePayload = -1;
244 int localSizePayload = -1;
245
246 for (uint32_t i = 0; i < m_flatArgCount; i++)
247 {
248 if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_LOCALSIZE)
249 localSizePayload = m_flatArgs[i].payloadOffset;
250 if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_GROUPSIZE)
251 groupSizePayload = m_flatArgs[i].payloadOffset;
252 if (m_flatArgs[i].kind == ARG_KIND_IMPLICIT_LOCALID)
253 localIdPayload = m_flatArgs[i].payloadOffset;
254 }
255
256 // set group size implicit args
257 if (groupSizePayload >= 0)
258 {
259 *(uint32_t *)(m_curbe + groupSizePayload) = grpSpaceWidth;
260 *(uint32_t *)(m_curbe + groupSizePayload + 4) = grpSpaceHeight;
261 *(uint32_t *)(m_curbe + groupSizePayload + 8) = grpSpaceDepth;
262 }
263
264 // set local size implicit args
265 if (localSizePayload >= 0)
266 {
267 *(uint32_t *)(m_curbe + localSizePayload) = thrdSpaceWidth;
268 *(uint32_t *)(m_curbe + localSizePayload + 4) = thrdSpaceHeight;
269 *(uint32_t *)(m_curbe + localSizePayload + 8) = thrdSpaceDepth;
270 }
271
272 // set local id data per thread
273 if (localIdPayload >= 0)
274 {
275 int offset = localIdPayload;
276 for (uint32_t idZ = 0; idZ < thrdSpaceDepth; idZ++)
277 {
278 for (uint32_t idY = 0; idY < thrdSpaceHeight; idY++)
279 {
280 for (uint32_t idX = 0; idX < thrdSpaceWidth; idX++)
281 {
282 *(uint32_t *)(m_curbe + offset) = idX;
283 *(uint32_t *)(m_curbe + offset + 4) = idY;
284 *(uint32_t *)(m_curbe + offset + 8) = idZ;
285 offset += m_curbeSizePerThread;
286 }
287 }
288 }
289 }
290
291 return MOS_STATUS_SUCCESS;
292 }
293
IsSurface(uint16_t kind)294 bool CmKernelEx::IsSurface(uint16_t kind)
295 {
296 switch (kind)
297 {
298 case ARG_KIND_SURFACE:
299 case ARG_KIND_SURFACE_1D:
300 case ARG_KIND_SURFACE_2D:
301 case ARG_KIND_SURFACE_2D_UP:
302 case ARG_KIND_SURFACE_SAMPLER:
303 case ARG_KIND_SURFACE2DUP_SAMPLER:
304 case ARG_KIND_SURFACE_3D:
305 case ARG_KIND_SURFACE_SAMPLER8X8_AVS:
306 case ARG_KIND_SURFACE_SAMPLER8X8_VA:
307 case ARG_KIND_SURFACE_2D_SCOREBOARD:
308 case ARG_KIND_STATE_BUFFER:
309 case ARG_KIND_SURFACE_VME:
310 return true;
311 default:
312 return false;
313 }
314 return false;
315 }
316
SetKernelArg(uint32_t index,size_t size,const void * value)317 int32_t CmKernelEx::SetKernelArg(uint32_t index, size_t size, const void * value)
318 {
319 if (!m_blCreatingGPUCopyKernel) // gpucopy kernels only executed by fastpath, no need to set legacy kernels
320 {
321 CmKernelRT::SetKernelArg(index, size, value);
322 }
323 if( index >= m_argCount )
324 {
325 CM_ASSERTMESSAGE("Error: Invalid kernel arg count.");
326 return CM_INVALID_ARG_INDEX;
327
328 }
329
330 if( !value)
331 {
332 CM_ASSERTMESSAGE("Error: Invalid kernel arg value.");
333 return CM_INVALID_ARG_VALUE;
334 }
335
336 if( size == 0)
337 {
338 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
339 return CM_INVALID_ARG_SIZE;
340 }
341
342 uint32_t start = m_indexMap[index];
343 uint32_t len = m_indexMap[index + 1] - start;
344
345 if (IsSurface(m_flatArgs[start].isaKind))
346 {
347 CMRT_UMD::SurfaceIndex *surfIndexes = (CMRT_UMD::SurfaceIndex *)value;
348 if (surfIndexes == (CMRT_UMD::SurfaceIndex *)CM_NULL_SURFACE)
349 {
350 for (uint32_t i = 0; i < len; i++)
351 {
352 *(void **)(m_data + m_flatArgs[start + i].offset) = nullptr;
353 *(void **)(m_surfaceInArg + m_flatArgs[start + i].offset) = nullptr;
354 m_flatArgs[start + i].isSet = true;
355 }
356 return CM_SUCCESS;
357 }
358 // sanity check
359 if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
360 {
361 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
362 return CM_INVALID_ARG_SIZE;
363 }
364
365 for (uint32_t i = 0; i < len; i++)
366 {
367 uint32_t index = surfIndexes[i].get_data();
368
369 m_flatArgs[start + i].isSet = true;
370 if (index == CM_NULL_SURFACE)
371 {
372 *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
373 *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
374 }
375 else
376 {
377 CmSurface* surface = nullptr;
378 m_surfaceMgr->GetSurface(index, surface);
379 if (nullptr == surface)
380 {
381 *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
382 *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
383 }
384 else
385 {
386 m_flatArgs[start + i].kind = ToArgKind(surface);
387
388 // get the CmSurfaceState from the surface index, this will be changed if surfmgr optimized
389 // most likely, this will be moved to CmSurface
390 CmSurfaceState *temp = GetSurfaceState(surface, index);
391 *(CmSurfaceState **)(m_data + m_flatArgs[start + i].offset) = temp;
392 *(CmSurface **)(m_surfaceInArg + m_flatArgs[start+i].offset) = surface;
393 m_propertyIndexes[start + i] = surface->GetPropertyIndex();
394 m_cmSurfIndexes[start + i] = index;
395 }
396 }
397 }
398 }
399 else if (m_flatArgs[start].isaKind == ARG_KIND_SAMPLER) // only support 3D sampler and AVS sampler in fastpath
400 {
401 CMRT_UMD::SamplerIndex *samplerIndexes = (CMRT_UMD::SamplerIndex *)value;
402 // sanity check
403 if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
404 {
405 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
406 return CM_INVALID_ARG_SIZE;
407 }
408
409 for (uint32_t i = 0; i < len; i++)
410 {
411 uint32_t index = samplerIndexes[i].get_data();
412 MHW_SAMPLER_STATE_PARAM *temp = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
413 *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[start + i].offset) = temp;
414 }
415 }
416 else
417 {
418 if (size != m_flatArgs[start].unitSize)
419 {
420 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
421 return CM_INVALID_ARG_SIZE;
422 }
423 CmSafeMemCopy((void *)(m_data + m_flatArgs[start].offset), value, size);
424 }
425 return CM_SUCCESS;
426 }
427
ToArgKind(CmSurface * surface)428 CM_ARG_KIND CmKernelEx::ToArgKind(CmSurface *surface)
429 {
430 switch(surface->Type())
431 {
432 case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
433 return ARG_KIND_SURFACE_1D;
434 case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
435 return ARG_KIND_SURFACE_2D;
436 case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
437 return ARG_KIND_SURFACE_2D_UP;
438 case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
439 return ARG_KIND_SURFACE_3D;
440 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
441 {
442 CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
443 SAMPLER_SURFACE_TYPE type;
444 surfSampler->GetSurfaceType(type);
445 if (type == SAMPLER_SURFACE_TYPE_2D)
446 {
447 return ARG_KIND_SURFACE_SAMPLER;
448 }
449 else if (type == SAMPLER_SURFACE_TYPE_2DUP)
450 {
451 return ARG_KIND_SURFACE2DUP_SAMPLER;
452 }
453 else
454 {
455 return ARG_KIND_SURFACE_3D;
456 }
457 }
458 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
459 {
460 CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
461 if (surfSampler8x8->GetSampler8x8SurfaceType() == CM_VA_SURFACE)
462 {
463 return ARG_KIND_SURFACE_SAMPLER8X8_VA;
464 }
465 else
466 {
467 return ARG_KIND_SURFACE_SAMPLER8X8_AVS;
468 }
469 }
470 case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
471 return ARG_KIND_SURFACE_VME;
472 case CM_ENUM_CLASS_TYPE_CM_STATE_BUFFER:
473 return ARG_KIND_STATE_BUFFER;
474 default:
475 return ARG_KIND_GENERAL;
476 }
477 }
478
GetSurfaceState(CmSurface * surface,uint32_t index)479 CmSurfaceState* CmKernelEx::GetSurfaceState(CmSurface *surface, uint32_t index)
480 {
481 CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
482 uint32_t surfaceArraySize = 0;
483 m_surfaceMgr->GetSurfaceArraySize(surfaceArraySize);
484 CM_CHK_COND_RETURN((surfaceArraySize == 0), nullptr, "Surface Array is empty.");
485 uint32_t aliasIndex = index/surfaceArraySize;
486
487 switch (surface->Type())
488 {
489 case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
490 {
491 CmSurface2DRT* surf2D = static_cast<CmSurface2DRT*>(surface);
492 uint32_t halIndex = 0;
493 surf2D->GetIndexFor2D(halIndex);
494 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
495 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
496 {
497 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
498 }
499 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 0, surfStateParam);
500 }
501 case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
502 {
503 CmSurface2DUPRT* surf2DUP = static_cast<CmSurface2DUPRT*>(surface);
504 uint32_t halIndex = 0;
505 surf2DUP->GetHandle(halIndex);
506 return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState();
507 }
508 case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
509 {
510 CmBuffer_RT* surf1D = static_cast<CmBuffer_RT*>(surface);
511 uint32_t halIndex = 0;
512 surf1D->GetHandle(halIndex);
513 CM_HAL_BUFFER_SURFACE_STATE_ENTRY *surfStateParam = nullptr;
514 if (aliasIndex > 0 || cmHalState->bufferTable[halIndex].surfStateSet)
515 {
516 surfStateParam = &(cmHalState->bufferTable[halIndex].surfaceStateEntry[aliasIndex]);
517 }
518 return cmHalState->bufferTable[halIndex].surfStateMgr->GetSurfaceState(surfStateParam);
519 }
520 case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
521 {
522 CmSurface3DRT *surf3D = static_cast<CmSurface3DRT *>(surface);
523 uint32_t halIndex = 0;
524 surf3D->GetHandle(halIndex);
525 return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
526 }
527 case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
528 {
529 CmSurfaceVme *surfVme = static_cast<CmSurfaceVme*>(surface);
530 CmSurfaceStateVME *surfState = surfVme->GetSurfaceState();
531 if (surfState == nullptr)
532 {
533 int argSize = surfVme->GetVmeCmArgSize();
534 int surfCount = surfVme->GetTotalSurfacesCount();
535
536 uint8_t *vmeValue = MOS_NewArray(uint8_t, argSize);
537 if (vmeValue == nullptr)
538 {
539 return nullptr;
540 }
541 uint16_t surfIndexes[17];
542 SetArgsSingleVme(surfVme, vmeValue, surfIndexes);
543 surfState = MOS_New(CmSurfaceStateVME, cmHalState);
544 if (surfState == nullptr)
545 {
546 return nullptr;
547 }
548 surfState->Initialize((CM_HAL_VME_ARG_VALUE *)vmeValue);
549
550 surfVme->SetSurfState(cmHalState->advExecutor, vmeValue, surfState); // set for destroy later
551 }
552 return surfState;
553 }
554 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
555 {
556 uint32_t halIndex = 0;
557 uint16_t cmIndex = 0;
558 CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
559 surfSampler->GetHandle(halIndex);
560 surfSampler->GetCmIndexCurrent(cmIndex);
561 SAMPLER_SURFACE_TYPE type;
562 surfSampler->GetSurfaceType(type);
563 switch (type)
564 {
565 case SAMPLER_SURFACE_TYPE_2D:
566 {
567 // re-calculate the aliasIndex
568 aliasIndex = cmIndex/surfaceArraySize;
569
570 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
571 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
572 {
573 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
574 }
575 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1, surfStateParam);
576 }
577 case SAMPLER_SURFACE_TYPE_2DUP:
578 {
579 return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
580 }
581 case SAMPLER_SURFACE_TYPE_3D:
582 {
583 return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
584 }
585 default:
586 {
587 }
588 }
589 }
590 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
591 {
592 CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
593 uint32_t halIndex = 0;
594 uint16_t cmIndex = 0;
595
596 surfSampler8x8->GetIndexCurrent(halIndex);
597 surfSampler8x8->GetCmIndex(cmIndex);
598 // re-calculate the aliasIndex
599 aliasIndex = cmIndex/surfaceArraySize;
600
601 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
602 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
603 {
604 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
605 }
606 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(1, 1, surfStateParam);
607 }
608 default: //not implemented yet
609 return nullptr;
610
611 }
612 return nullptr;
613 }
614
GetMaxBteNum()615 uint32_t CmKernelEx::GetMaxBteNum()
616 {
617 uint32_t bteCount = 0;
618 for (uint32_t i = 0; i < m_flatArgCount; i++)
619 {
620 if (IsSurface(m_flatArgs[i].kind))
621 {
622 CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
623 if (surfState == nullptr) //CM_NULL_SURFACE
624 {
625 continue;
626 }
627 bteCount += surfState->GetNumBte();
628 }
629 }
630 return bteCount;
631 }
632
UpdateCurbe(CmSSH * ssh,CmMediaState * mediaState,uint32_t kernelIdx)633 MOS_STATUS CmKernelEx::UpdateCurbe(CmSSH *ssh, CmMediaState *mediaState, uint32_t kernelIdx)
634 {
635 for (uint32_t i = 0; i < m_flatArgCount; i++)
636 {
637 if (IsSurface(m_flatArgs[i].kind))
638 {
639 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
640 if (surface != nullptr && m_propertyIndexes[i] != surface->GetPropertyIndex())
641 {
642 // need to update the surface state
643 CmSurfaceState *temp = GetSurfaceState(surface, m_cmSurfIndexes[i]);
644 m_propertyIndexes[i] = surface->GetPropertyIndex();
645 *(CmSurfaceState **)(m_data + m_flatArgs[i].offset) = temp;
646 }
647 CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
648 if (surfState == nullptr)
649 {
650 continue;
651 }
652 uint32_t bteIdx = ssh->AddSurfaceState(surfState);
653 *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
654 }
655 else if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
656 {
657 MHW_SAMPLER_STATE_PARAM *param = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
658 uint32_t bteIdx = mediaState->AddSampler(param, kernelIdx);
659 *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
660 }
661 else if (m_flatArgs[i].kind != ARG_KIND_IMPLICT_LOCALSIZE
662 && m_flatArgs[i].kind != ARG_KIND_IMPLICT_GROUPSIZE
663 && m_flatArgs[i].kind != ARG_KIND_IMPLICIT_LOCALID)
664 {
665 MOS_SecureMemcpy(m_curbe + m_flatArgs[i].payloadOffset, m_flatArgs[i].sizeInCurbe,
666 m_data + m_flatArgs[i].offset, m_flatArgs[i].unitSize);
667 }
668 }
669
670 // dump
671 /*
672 for (int i = 0; i < m_curbeSize/4; i++)
673 {
674 printf("0x%x, ", *((uint32_t *)m_curbe + i));
675 }
676 printf("\n");
677 */
678 return MOS_STATUS_SUCCESS;
679 }
680
UpdateFastTracker(uint32_t trackerIndex,uint32_t tracker)681 MOS_STATUS CmKernelEx::UpdateFastTracker(uint32_t trackerIndex, uint32_t tracker)
682 {
683 for (uint32_t i = 0; i < m_flatArgCount; i++)
684 {
685 if (IsSurface(m_flatArgs[i].kind))
686 {
687 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
688 if (surface == nullptr)
689 {
690 continue;
691 }
692 surface->SetFastTracker(trackerIndex, tracker);
693 }
694 }
695 return MOS_STATUS_SUCCESS;
696 }
697
698
UpdateSWSBArgs(CmThreadSpaceRT * threadSpace)699 MOS_STATUS CmKernelEx::UpdateSWSBArgs(CmThreadSpaceRT *threadSpace)
700 {
701 CmThreadSpaceRT *ts = (threadSpace == nullptr)?m_threadSpace:threadSpace;
702 if (ts == nullptr)
703 {
704 return MOS_STATUS_SUCCESS;
705 }
706 int ret = ts->SetDependencyArgToKernel(this);
707 return (ret == 0)? MOS_STATUS_SUCCESS : MOS_STATUS_UNKNOWN;
708 }
709
SetStaticBuffer(uint32_t index,const void * value)710 int32_t CmKernelEx::SetStaticBuffer(uint32_t index, const void *value)
711 {
712 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetStaticBuffer(index, value));
713
714 if(index >= CM_GLOBAL_SURFACE_NUMBER)
715 {
716 CM_ASSERTMESSAGE("Error: Surface Index exceeds max global surface number.");
717 return CM_INVALID_GLOBAL_BUFFER_INDEX;
718 }
719
720 if(!value)
721 {
722 CM_ASSERTMESSAGE("Error: Invalid StaticBuffer arg value.");
723 return CM_INVALID_BUFFER_HANDLER;
724 }
725
726 SurfaceIndex* surfIndex = (SurfaceIndex* )value;
727 uint32_t indexData = surfIndex->get_data();
728
729 CmSurface* surface = nullptr;
730 m_surfaceMgr->GetSurface(indexData, surface);
731 if (surface != nullptr)
732 {
733 // for gen9+ platforms, index + 1 is the BTI
734 m_reservedSurfaceBteIndexes[index + CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS]
735 = GetSurfaceState(surface, indexData);
736 }
737 return CM_SUCCESS;
738 }
739
SetSurfaceBTI(SurfaceIndex * surfIndex,uint32_t bti)740 int32_t CmKernelEx::SetSurfaceBTI(SurfaceIndex *surfIndex, uint32_t bti)
741 {
742 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSurfaceBTI(surfIndex, bti));
743
744 CM_CHK_NULL_RETURN_CMERROR(surfIndex);
745 uint32_t index = surfIndex->get_data();
746
747 CmSurface* surface = nullptr;
748 m_surfaceMgr->GetSurface(index, surface);
749 if (surface != nullptr)
750 {
751 m_reservedSurfaceBteIndexes[bti] = GetSurfaceState(surface, index);
752 }
753 return CM_SUCCESS;
754 }
755
SetSamplerBTI(SamplerIndex * sampler,uint32_t nIndex)756 int32_t CmKernelEx::SetSamplerBTI(SamplerIndex* sampler, uint32_t nIndex)
757 {
758 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSamplerBTI(sampler, nIndex));
759
760 uint32_t index = sampler->get_data();
761 m_reservedSamplerBteIndexes[nIndex] = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
762 return MOS_STATUS_SUCCESS;
763 }
764
LoadReservedSurfaces(CmSSH * ssh)765 MOS_STATUS CmKernelEx::LoadReservedSurfaces(CmSSH *ssh)
766 {
767 for (auto it = m_reservedSurfaceBteIndexes.begin(); it != m_reservedSurfaceBteIndexes.end(); ++ it)
768 {
769 ssh->AddSurfaceState(it->second, it->first);
770 }
771
772 // reset the table in legacy kernel for bti reuse
773 if (m_usKernelPayloadSurfaceCount)
774 {
775 CmSafeMemSet(m_IndirectSurfaceInfoArray, 0, m_usKernelPayloadSurfaceCount * sizeof(CM_INDIRECT_SURFACE_INFO));
776 m_usKernelPayloadSurfaceCount = 0;
777 }
778 return MOS_STATUS_SUCCESS;
779 }
780
LoadReservedSamplers(CmMediaState * mediaState,uint32_t kernelIdx)781 MOS_STATUS CmKernelEx::LoadReservedSamplers(CmMediaState *mediaState, uint32_t kernelIdx)
782 {
783 for (auto it = m_reservedSamplerBteIndexes.begin(); it != m_reservedSamplerBteIndexes.end(); ++ it)
784 {
785 mediaState->AddSampler((MHW_SAMPLER_STATE_PARAM *)it->second, kernelIdx, it->first);
786 }
787 return MOS_STATUS_SUCCESS;
788 }
789
GetSamplerParam(uint32_t index)790 void* CmKernelEx::GetSamplerParam(uint32_t index)
791 {
792 CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
793 return (void *)&cmHalState->samplerTable[index];
794 }
795
GetSamplerCount(uint32_t * count3D,uint32_t * countAVS)796 MOS_STATUS CmKernelEx::GetSamplerCount(uint32_t *count3D, uint32_t *countAVS)
797 {
798 *count3D = 0;
799 *countAVS = 0;
800 for (uint32_t i = 0; i < m_flatArgCount; i++)
801 {
802 if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
803 {
804 MHW_SAMPLER_STATE_PARAM *temp = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
805 if (temp->SamplerType == MHW_SAMPLER_TYPE_3D)
806 {
807 ++ (*count3D);
808 }
809 else if (temp->SamplerType == MHW_SAMPLER_TYPE_AVS)
810 {
811 ++ (*countAVS);
812 }
813 else
814 {
815 // only support 3D and AVS samplers by now in fast path
816 return MOS_STATUS_INVALID_PARAMETER;
817 }
818 }
819 }
820 return MOS_STATUS_SUCCESS;
821 }
822
GetThreadSpaceEx()823 CmThreadSpaceRT* CmKernelEx::GetThreadSpaceEx()
824 {
825 if (m_threadSpace)
826 {
827 return m_threadSpace;
828 }
829 if (m_dummyThreadSpace)
830 {
831 m_device->DestroyThreadSpace(m_dummyThreadSpace);
832 }
833 if (m_threadCount)
834 {
835 m_device->CreateThreadSpace(m_threadCount, 1, m_dummyThreadSpace);
836 }
837 return static_cast<CmThreadSpaceRT *>(m_dummyThreadSpace);
838 }
839
GetThreadGroupSpaceEx()840 CmThreadGroupSpace* CmKernelEx::GetThreadGroupSpaceEx()
841 {
842 if (m_threadGroupSpace)
843 {
844 return m_threadGroupSpace;
845 }
846 if (m_dummyThreadGroupSpace)
847 {
848 m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
849 }
850
851 if (m_threadCount)
852 {
853 m_device->CreateThreadGroupSpace(1, 1, m_threadCount, 1, m_dummyThreadGroupSpace);
854 }
855 return m_dummyThreadGroupSpace;
856 }
857
SurfaceDumpEx(uint32_t kernelNumber,int32_t taskId)858 void CmKernelEx::SurfaceDumpEx(uint32_t kernelNumber, int32_t taskId)
859 {
860 for(uint32_t argIdx = 0; argIdx < m_argCount; argIdx++)
861 {
862 uint32_t start = m_indexMap[argIdx];
863 uint32_t len = m_indexMap[argIdx + 1] - start;
864
865 for (uint32_t v = 0; v < len; v ++)
866 {
867 uint32_t i = start + v;
868 if (IsSurface(m_flatArgs[i].kind))
869 {
870 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
871 if (surface == nullptr)
872 {
873 continue;
874 }
875 surface->DumpContent(kernelNumber, m_kernelInfo->kernelName, taskId, argIdx, v);
876 }
877 }
878 }
879 }
880
IsFastPathSupported()881 bool CmKernelEx::IsFastPathSupported()
882 {
883 // current fast path doesn't support media object
884 bool specialDependency = false;
885 if (m_threadSpace)
886 {
887 CM_DEPENDENCY_PATTERN dependencyPatternType = CM_NONE_DEPENDENCY;
888 m_threadSpace->GetDependencyPatternType(dependencyPatternType);
889 specialDependency = (dependencyPatternType == CM_WAVEFRONT26Z || dependencyPatternType == CM_WAVEFRONT26ZI);
890 }
891
892 return !(m_perThreadArgExists || specialDependency);
893 }
894
895