1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_execution_adv.cpp
24 //! \brief Contains Class CmExecutionAdv definitions
25 //!
26 #include "cm_execution_adv.h"
27 #include "cm_debug.h"
28 #include "cm_extension_creator.h"
29 #include "cm_surface_state_manager.h"
30 #include "cm_kernel_ex.h"
31 #include "cm_ish.h"
32 #include "cm_media_state.h"
33 #include "cm_command_buffer.h"
34 #include "cm_kernel_ex.h"
35 #include "cm_ssh.h"
36 #include "cm_event_ex.h"
37 #include "cm_tracker.h"
38 #include "cm_dsh.h"
39 #include "cm_task_rt.h"
40 #include "cm_thread_space_rt.h"
41 #include "cm_surface_manager.h"
42 #include "cm_queue_rt.h"
43 #include "cm_scratch_space.h"
44
45 #include "cm_hal_g9.h"
46 #include "cm_hal_g11.h"
47 #include "cm_hal_g12.h"
48
49 static bool gGTPinInitialized = false;
50
51 static bool advRegistered = CmExtensionCreator<CmExecutionAdv>::RegisterClass<CmExecutionAdv>();
52
53 using namespace CMRT_UMD;
54
CmExecutionAdv()55 CmExecutionAdv::CmExecutionAdv():
56 m_cmhal(nullptr),
57 m_tracker (nullptr),
58 m_ish (nullptr),
59 m_dsh (nullptr)
60 {
61 MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
62 }
63
Initialize(CM_HAL_STATE * state)64 MOS_STATUS CmExecutionAdv::Initialize(CM_HAL_STATE *state)
65 {
66 m_cmhal = state;
67 CM_CHK_NULL_RETURN_MOSERROR(m_cmhal);
68
69 m_tracker = MOS_New(CmTracker, m_cmhal->osInterface);
70 CM_CHK_NULL_RETURN_MOSERROR(m_tracker);
71 CM_CHK_MOSSTATUS_RETURN(m_tracker->Initialize());
72 FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
73
74 m_ish = MOS_New(CmISH);
75 CM_CHK_NULL_RETURN_MOSERROR(m_ish);
76 CM_CHK_MOSSTATUS_RETURN(m_ish->Initialize(m_cmhal, trackerProducer));
77
78 m_dsh = MOS_New(CmDSH, m_cmhal);
79 CM_CHK_NULL_RETURN_MOSERROR(m_dsh);
80 CM_CHK_MOSSTATUS_RETURN(m_dsh->Initialize(trackerProducer));
81
82 MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
83
84 return MOS_STATUS_SUCCESS;
85 }
86
~CmExecutionAdv()87 CmExecutionAdv::~CmExecutionAdv()
88 {
89 MOS_Delete(m_ish);
90 MOS_Delete(m_dsh);
91 MOS_Delete(m_tracker);
92 }
93
Create2DStateMgr(MOS_RESOURCE * resource)94 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create2DStateMgr(MOS_RESOURCE *resource)
95 {
96 return MOS_New(CmSurfaceState2Dor3DMgr, m_cmhal, resource);
97 }
98
Create3DStateMgr(MOS_RESOURCE * resource)99 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create3DStateMgr(MOS_RESOURCE *resource)
100 {
101 return MOS_New(CmSurfaceState3DMgr, m_cmhal, resource);
102 }
103
Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr * stateMgr)104 void CmExecutionAdv::Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr *stateMgr)
105 {
106 MOS_Delete(stateMgr);
107 }
108
CreateBufferStateMgr(MOS_RESOURCE * resource)109 CmSurfaceStateBufferMgr* CmExecutionAdv::CreateBufferStateMgr(MOS_RESOURCE *resource)
110 {
111 return MOS_New(CmSurfaceStateBufferMgr, m_cmhal, resource);
112 }
113
DeleteBufferStateMgr(CmSurfaceStateBufferMgr * stateMgr)114 void CmExecutionAdv::DeleteBufferStateMgr(CmSurfaceStateBufferMgr *stateMgr)
115 {
116 MOS_Delete(stateMgr);
117 }
118
DeleteSurfStateVme(CmSurfaceStateVME * state)119 void CmExecutionAdv::DeleteSurfStateVme(CmSurfaceStateVME *state)
120 {
121 MOS_Delete(state);
122 }
123
SetBufferOrigSize(CmSurfaceStateBufferMgr * stateMgr,uint32_t size)124 void CmExecutionAdv::SetBufferOrigSize(CmSurfaceStateBufferMgr *stateMgr, uint32_t size)
125 {
126 if (stateMgr)
127 {
128 stateMgr->SetOrigSize(size);
129 }
130 }
131
SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr * stateMgr,uint16_t mocs)132 void CmExecutionAdv::SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr *stateMgr, uint16_t mocs)
133 {
134 if (stateMgr)
135 {
136 stateMgr->SetMemoryObjectControl(mocs);
137 }
138 }
139
Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr * stateMgr,MOS_FORMAT format)140 void CmExecutionAdv::Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr *stateMgr, MOS_FORMAT format)
141 {
142 if (stateMgr)
143 {
144 stateMgr->SetOrigFormat(format);
145 }
146 }
147
Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t width,uint32_t height,uint32_t depth)148 void CmExecutionAdv::Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t width, uint32_t height, uint32_t depth)
149 {
150 if (stateMgr)
151 {
152 stateMgr->SetOrigDimension(width, height, depth);
153 }
154 }
155
Set2DRenderTarget(CmSurfaceState2Dor3DMgr * stateMgr,bool renderTarget)156 void CmExecutionAdv::Set2DRenderTarget(CmSurfaceState2Dor3DMgr *stateMgr, bool renderTarget)
157 {
158 if (stateMgr)
159 {
160 stateMgr->SetRenderTarget(renderTarget);
161 }
162 }
163
Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr * stateMgr,uint16_t mocs)164 void CmExecutionAdv::Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr *stateMgr, uint16_t mocs)
165 {
166 if (stateMgr)
167 {
168 stateMgr->SetMemoryObjectControl(mocs);
169 }
170 }
171
Set2DFrameType(CmSurfaceState2Dor3DMgr * stateMgr,CM_FRAME_TYPE frameType)172 void CmExecutionAdv::Set2DFrameType(CmSurfaceState2Dor3DMgr *stateMgr, CM_FRAME_TYPE frameType)
173 {
174 if (stateMgr)
175 {
176 stateMgr->SetFrameType(frameType);
177 }
178 }
179
SetRotationFlag(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t rotation)180 void CmExecutionAdv::SetRotationFlag(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t rotation)
181 {
182 if (stateMgr)
183 {
184 stateMgr->SetRotationFlag(rotation);
185 }
186 }
187
SetChromaSitting(CmSurfaceState2Dor3DMgr * stateMgr,uint8_t chromaSitting)188 void CmExecutionAdv::SetChromaSitting(CmSurfaceState2Dor3DMgr *stateMgr, uint8_t chromaSitting)
189 {
190 if (stateMgr)
191 {
192 stateMgr->SetChromaSitting(chromaSitting);
193 }
194 }
195
GetFastTrackerProducer()196 FrameTrackerProducer *CmExecutionAdv::GetFastTrackerProducer()
197 {
198 return m_tracker->GetTrackerProducer();
199 }
200
CreateKernelRT(CmDeviceRT * device,CmProgramRT * program,uint32_t kernelIndex,uint32_t kernelSeqNum)201 CmKernelRT *CmExecutionAdv::CreateKernelRT(CmDeviceRT *device,
202 CmProgramRT *program,
203 uint32_t kernelIndex,
204 uint32_t kernelSeqNum)
205 {
206 return new (std::nothrow) CmKernelEx(device, program, kernelIndex, kernelSeqNum);
207 }
208
RefreshSurfaces(CmDeviceRT * device)209 int CmExecutionAdv::RefreshSurfaces(CmDeviceRT *device)
210 {
211 CM_CHK_NULL_RETURN_CMERROR(device);
212
213 CmSurfaceManager *surfaceMgr = nullptr;
214 CSync * surfaceLock = nullptr;
215
216 device->GetSurfaceManager(surfaceMgr);
217 CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
218
219 surfaceLock = device->GetSurfaceCreationLock();
220 CM_CHK_NULL_RETURN_CMERROR(surfaceLock);
221
222 uint32_t freeSurfNum = 0;
223 surfaceLock->Acquire();
224 surfaceMgr->RefreshDelayDestroySurfaces(freeSurfNum);
225 surfaceLock->Release();
226
227 return CM_SUCCESS;
228 }
229
SubmitTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadSpace * threadSpace,MOS_GPU_CONTEXT gpuContext)230 int CmExecutionAdv::SubmitTask(CMRT_UMD::CmQueueRT *queue,
231 CMRT_UMD::CmTask *task,
232 CMRT_UMD::CmEvent *&event,
233 const CMRT_UMD::CmThreadSpace *threadSpace,
234 MOS_GPU_CONTEXT gpuContext)
235 {
236 CM_NORMALMESSAGE("================ in fast path, media walker===================");
237
238 CM_HAL_STATE * state = m_cmhal;
239 CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
240 CmTracker *cmTracker = state->advExecutor->GetTracker();
241 CmISH *cmish = state->advExecutor->GetISH();
242 CmDSH *cmdsh = state->advExecutor->GetDSH();
243 CM_CHK_NULL_RETURN_CMERROR(cmTracker);
244 CM_CHK_NULL_RETURN_CMERROR(cmish);
245 CM_CHK_NULL_RETURN_CMERROR(cmdsh);
246
247 CLock Locker(m_criticalSection);
248
249 bool isDummyEventCreated = false;
250 #if MDF_SURFACE_CONTENT_DUMP
251 if (state->dumpSurfaceContent && event == CM_NO_EVENT)
252 {
253 // if surface content dump is needed, the enqueueFast should be a blocking operation
254 // we need a dummy event here
255 isDummyEventCreated = true;
256 event = nullptr;
257 }
258 #endif
259
260 state->osInterface->pfnResetOsStates(state->osInterface);
261 state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
262
263 CM_HAL_OSSYNC_PARAM syncParam;
264 syncParam.osSyncEvent = nullptr;
265
266 // Call HAL layer to wait for Task finished with event-driven mechanism
267 CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
268
269 HANDLE osSyncEvent = syncParam.osSyncEvent;
270
271 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
272 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
273 if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
274 {
275 return CM_FAILURE;
276 }
277
278 // get an array of CmKernelEx
279 CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
280 MOS_ZeroMemory(kernels, sizeof(kernels));
281 for (uint32_t i = 0; i < kernelCount; i++)
282 {
283 kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
284 CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
285 kernels[i]->AllocateCurbe();
286 }
287
288 // get CmDeviceRT
289 CmDeviceRT *device = nullptr;
290 kernels[0]->GetCmDevice(device);
291 CM_CHK_NULL_RETURN_CMERROR(device);
292
293 // set printf buffer if needed
294 if (device->IsPrintEnable())
295 {
296 SurfaceIndex *printBufferIndex = nullptr;
297 device->CreatePrintBuffer();
298 device->GetPrintBufferIndex(printBufferIndex);
299 CM_ASSERT(printBufferIndex);
300 for (uint32_t i = 0; i < kernelCount; i++)
301 {
302 kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
303 }
304 }
305
306 const CmThreadSpaceRT *threadSpaceRTConst = static_cast<const CmThreadSpaceRT *>(threadSpace);
307 CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT *>(threadSpaceRTConst);
308 CmThreadSpaceRT *threadSpaces[CM_MAX_KERNELS_PER_TASK];
309 MOS_ZeroMemory(threadSpaces, sizeof(threadSpaces));
310 if (threadSpaceRT == nullptr)
311 {
312 for (uint32_t i = 0; i < kernelCount; i++)
313 {
314 threadSpaces[i] = kernels[i]->GetThreadSpaceEx();
315 }
316 }
317
318 // if SWSB is used, update the SWSB arguments in kenrel
319 if (!state->cmHalInterface->IsScoreboardParamNeeded())
320 {
321 for (uint32_t i = 0; i < kernelCount; i++)
322 {
323 kernels[i]->UpdateSWSBArgs(threadSpaceRT);
324 }
325 }
326
327 CmCommandBuffer cmdBufData(state);
328 CmCommandBuffer *cmdBuf = &cmdBufData;
329 CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
330
331 uint32_t tracker;
332 uint32_t taskId;
333 MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
334 bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
335
336 cmdBuf->Initialize();
337
338 CmSSH *ssh = cmdBuf->GetSSH();
339 CM_CHK_NULL_RETURN_CMERROR(ssh);
340
341 // Add kernels to ISH directly
342 cmish->LoadKernels(kernels, kernelCount);
343
344 // initialize SSH
345 ssh->Initialize(kernels, kernelCount);
346
347 // create new media state
348 CmMediaState *cmMediaState = cmdsh->CreateMediaState();
349 CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
350 cmMediaState->Allocate(kernels, kernelCount, queue->GetFastTrackerIndex(), tracker);
351
352 // generate curbe and load media id
353 for (uint32_t i = 0; i < kernelCount; i++)
354 {
355 ssh->AssignBindingTable();
356 kernels[i]->LoadReservedSamplers(cmMediaState, i);
357 kernels[i]->LoadReservedSurfaces(ssh);
358 kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
359 kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
360 cmMediaState->LoadCurbe(kernels[i], i);
361 cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset());
362 }
363
364 // prepare cp resources
365 ssh->PrepareResourcesForCp();
366
367 // get the position to write tracker
368 MOS_RESOURCE *trackerResource = nullptr;
369 uint32_t trackerOffset = 0;
370 cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
371
372 // call gtpin callback if needed
373 CmNotifierGroup *ng = nullptr;
374 if (gGTPinInitialized && taskAssigned)
375 {
376 ng = device->GetNotifiers();
377 ng->NotifyTaskFlushed(device, task, ssh, taskId);
378 }
379
380 if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
381 {
382 cmdBuf->AddMMCProlog();
383 }
384 cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
385 cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
386
387 cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
388
389 cmdBuf->AddProtectedProlog();
390
391 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
392 cmdBuf->AddUmdProfilerStart();
393
394 cmdBuf->AddL3CacheConfig(&m_l3Values);
395
396 cmdBuf->AddPreemptionConfig(false);
397
398 cmdBuf->AddPipelineSelect(false);
399
400 cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
401
402 CM_TASK_CONFIG taskConfig;
403 kernelArrayRT->GetProperty(taskConfig);
404 if (threadSpaceRT)
405 {
406 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, &threadSpaceRT); // global thread space
407 }
408 else
409 {
410 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, threadSpaces, kernelCount);
411 }
412
413 cmdBuf->AddCurbeLoad(cmMediaState);
414
415 cmdBuf->AddMediaIDLoad(cmMediaState);
416
417 CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
418 uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
419 for (uint32_t i = 0; i < kernelCount; i ++)
420 {
421 CmThreadSpaceRT *ts = (threadSpaceRT != nullptr) ? threadSpaceRT: threadSpaces[i];
422
423 // check whether need to insert a CBB
424 bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
425 if (needCBB)
426 {
427 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
428
429 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
430
431 cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
432
433 cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
434 }
435
436 if (i > 0)
437 {
438 // check whether the next kernel has a dependency pattern
439 uint32_t dcount = 0;
440 if (ts != nullptr)
441 {
442 CM_HAL_DEPENDENCY *dependency;
443 ts->GetDependency(dependency);
444 dcount = dependency->count;
445 }
446
447 bool syncFlag = false;
448 uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
449 syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
450 // add sync if necessary
451 if ((dcount != 0) || syncFlag)
452 {
453 cmdBuf->AddSyncBetweenKernels();
454 }
455 }
456
457 cmdBuf->AddMediaObjectWalker(ts, i);
458 }
459
460 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
461
462 cmdBuf->AddUmdProfilerEnd();
463 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
464
465 cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
466
467 cmdBuf->AddDummyVFE();
468
469 cmdBuf->AddBatchBufferEnd();
470
471 cmdBuf->ReturnUnusedBuffer();
472
473 #if MDF_SURFACE_STATE_DUMP
474 if (m_cmhal->dumpSurfaceState)
475 {
476 ssh->DumpSSH();
477 }
478 #endif
479
480 #if MDF_COMMAND_BUFFER_DUMP
481 if (m_cmhal->dumpCommandBuffer)
482 {
483 cmdBuf->Dump();
484 }
485 #endif
486
487 #if MDF_CURBE_DATA_DUMP
488 if (m_cmhal->dumpCurbeData)
489 {
490 cmMediaState->Dump();
491 }
492 #endif
493
494 cmdBuf->Submit();
495
496 cmish->Submit(queue->GetFastTrackerIndex(), tracker);
497
498 cmMediaState->Submit();
499 cmdsh->DestroyMediaState(cmMediaState);
500
501 if (event != CM_NO_EVENT && taskAssigned)
502 {
503 CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
504 CM_CHK_NULL_RETURN_CMERROR(eventEx);
505 cmTracker->AssociateEvent(eventEx);
506 eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
507 event = static_cast<CmEventEx *>(eventEx);
508
509 if (gGTPinInitialized)
510 {
511 eventEx->SetNotifier(ng);
512 }
513 }
514 else
515 {
516 event = nullptr;
517 }
518 cmTracker->Refresh();
519
520 // refresh surfaces in surface manager
521 CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
522
523 #if MDF_SURFACE_CONTENT_DUMP
524 if (state->dumpSurfaceContent && event != nullptr)
525 {
526 event->WaitForTaskFinished();
527 if (isDummyEventCreated)
528 {
529 DestoryEvent(queue, event);
530 }
531 for (uint32_t i = 0; i < kernelCount; i++)
532 {
533 kernels[i]->SurfaceDumpEx(i, taskId);
534 }
535 }
536 #endif
537
538 return CM_SUCCESS;
539 }
540
DestoryEvent(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmEvent * & event)541 int CmExecutionAdv::DestoryEvent(CMRT_UMD::CmQueueRT *queue, CMRT_UMD::CmEvent *&event)
542 {
543 CmEventEx *eventEx = static_cast<CmEventEx *>(event);
544 MOS_Delete(eventEx);
545 event = nullptr;
546 return CM_SUCCESS;
547 }
548
SubmitComputeTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)549 int CmExecutionAdv::SubmitComputeTask(CMRT_UMD::CmQueueRT *queue,
550 CMRT_UMD::CmTask *task,
551 CMRT_UMD::CmEvent* &event,
552 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
553 MOS_GPU_CONTEXT gpuContext)
554 {
555 CM_ASSERTMESSAGE("Compute Tasks not support on this platform\n");
556 return CM_FAILURE;
557 }
558
WaitForAllTasksFinished()559 int CmExecutionAdv::WaitForAllTasksFinished()
560 {
561 return m_tracker->WaitForAllTasksFinished();
562 }
563
SetL3Config(const L3ConfigRegisterValues * l3Config)564 void CmExecutionAdv::SetL3Config(const L3ConfigRegisterValues *l3Config)
565 {
566 m_l3Values.config_register0 = l3Config->config_register0;
567 m_l3Values.config_register1 = l3Config->config_register1;
568 m_l3Values.config_register2 = l3Config->config_register2;
569 m_l3Values.config_register3 = l3Config->config_register3;
570 }
571
SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)572 int CmExecutionAdv::SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)
573 {
574 const L3ConfigRegisterValues *table = nullptr;
575 uint32_t count = 0;
576
577 switch(m_cmhal->platform.eRenderCoreFamily)
578 {
579 case IGFX_GEN11_CORE:
580 count = sizeof(ICL_L3_PLANE)/sizeof(L3ConfigRegisterValues);
581 table = (L3ConfigRegisterValues *)ICL_L3_PLANE;
582 break;
583 case IGFX_GEN12_CORE:
584 table = m_cmhal->cmHalInterface->m_l3Plane;
585 count = m_cmhal->cmHalInterface->m_l3ConfigCount;
586 break;
587 default: // gen9
588 count = sizeof(SKL_L3_PLANE) / sizeof(L3ConfigRegisterValues);
589 table = (L3ConfigRegisterValues*)SKL_L3_PLANE;
590 break;
591 }
592 if (static_cast<size_t>(l3SuggestConfig) >= count)
593 {
594 return CM_INVALID_ARG_VALUE;
595 }
596 m_l3Values.config_register0 = table[l3SuggestConfig].config_register0;
597 m_l3Values.config_register1 = table[l3SuggestConfig].config_register1;
598 m_l3Values.config_register2 = table[l3SuggestConfig].config_register2;
599 m_l3Values.config_register3 = table[l3SuggestConfig].config_register3;
600
601 return CM_SUCCESS;
602 }
603
AssignNewTracker()604 int CmExecutionAdv::AssignNewTracker()
605 {
606 FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
607 return trackerProducer->AssignNewTracker();
608 }
609
SubmitGpgpuTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)610 int CmExecutionAdv::SubmitGpgpuTask(CMRT_UMD::CmQueueRT *queue,
611 CMRT_UMD::CmTask *task,
612 CMRT_UMD::CmEvent* &event,
613 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
614 MOS_GPU_CONTEXT gpuContext)
615 {
616 CM_NORMALMESSAGE("================ in fast path, gpgpu walker===================");
617
618 CM_HAL_STATE * state = m_cmhal;
619 CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
620 CmTracker *cmTracker = state->advExecutor->GetTracker();
621 CmISH *cmish = state->advExecutor->GetISH();
622 CmDSH *cmdsh = state->advExecutor->GetDSH();
623 CM_CHK_NULL_RETURN_CMERROR(cmTracker);
624 CM_CHK_NULL_RETURN_CMERROR(cmish);
625 CM_CHK_NULL_RETURN_CMERROR(cmdsh);
626
627 CLock Locker(m_criticalSection);
628
629 bool isDummyEventCreated = false;
630 #if MDF_SURFACE_CONTENT_DUMP
631 if (state->dumpSurfaceContent && event == CM_NO_EVENT)
632 {
633 // if surface content dump is needed, the enqueueFast should be a blocking operation
634 // we need a dummy event here
635 isDummyEventCreated = true;
636 event = nullptr;
637 }
638 #endif
639
640 state->osInterface->pfnSetGpuContext(state->osInterface, gpuContext);
641 state->osInterface->pfnResetOsStates(state->osInterface);
642 state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
643
644 CM_HAL_OSSYNC_PARAM syncParam;
645 syncParam.osSyncEvent = nullptr;
646
647 // Call HAL layer to wait for Task finished with event-driven mechanism
648 CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
649
650 HANDLE osSyncEvent = syncParam.osSyncEvent;
651
652 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
653 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
654 if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
655 {
656 return CM_FAILURE;
657 }
658
659 // get an array of CmKernelEx
660 CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
661 MOS_ZeroMemory(kernels, sizeof(kernels));
662 for (uint32_t i = 0; i < kernelCount; i++)
663 {
664 kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
665 CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
666 kernels[i]->AllocateCurbeAndFillImplicitArgs(const_cast<CmThreadGroupSpace *>(threadGroupSpace));
667 }
668
669 // get CmDeviceRT
670 CmDeviceRT *device = nullptr;
671 kernels[0]->GetCmDevice(device);
672 CM_CHK_NULL_RETURN_CMERROR(device);
673
674 // set printf buffer if needed
675 if (device->IsPrintEnable())
676 {
677 SurfaceIndex *printBufferIndex = nullptr;
678 device->CreatePrintBuffer();
679 device->GetPrintBufferIndex(printBufferIndex);
680 CM_ASSERT(printBufferIndex);
681 for (uint32_t i = 0; i < kernelCount; i++)
682 {
683 kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
684 }
685 }
686
687 CmThreadGroupSpace *threadGroupSpaces[CM_MAX_KERNELS_PER_TASK];
688 MOS_ZeroMemory(threadGroupSpaces, sizeof(threadGroupSpaces));
689 if (threadGroupSpace == nullptr)
690 {
691 for (uint32_t i = 0; i < kernelCount; i++)
692 {
693 threadGroupSpaces[i] = kernels[i]->GetThreadGroupSpaceEx();
694 }
695 }
696
697 CmCommandBuffer cmdBufData(state);
698 CmCommandBuffer *cmdBuf = &cmdBufData;
699 CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
700
701 uint32_t tracker;
702 uint32_t taskId;
703 MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
704 bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
705
706 cmdBuf->Initialize();
707
708 CmSSH *ssh = cmdBuf->GetSSH();
709 CM_CHK_NULL_RETURN_CMERROR(ssh);
710
711 // Add kernels to ISH directly
712 cmish->LoadKernels(kernels, kernelCount);
713
714 // initialize SSH
715 ssh->Initialize(kernels, kernelCount);
716
717 // create new media state
718 CmMediaState *cmMediaState = cmdsh->CreateMediaState();
719 CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
720 cmMediaState->Allocate(kernels, kernelCount, 0, tracker);
721
722 // generate curbe and load media id
723 for (uint32_t i = 0; i < kernelCount; i++)
724 {
725 ssh->AssignBindingTable();
726 kernels[i]->LoadReservedSamplers(cmMediaState, i);
727 kernels[i]->LoadReservedSurfaces(ssh);
728 kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
729 kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
730 cmMediaState->LoadCurbe(kernels[i], i);
731 CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ?
732 const_cast<CmThreadGroupSpace *>(threadGroupSpace)
733 : threadGroupSpaces[i];
734 cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset(), tgs);
735 }
736
737 // prepare cp resources
738 ssh->PrepareResourcesForCp();
739
740 // get the position to write tracker
741 MOS_RESOURCE *trackerResource = nullptr;
742 uint32_t trackerOffset = 0;
743 cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
744
745 if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
746 {
747 cmdBuf->AddMMCProlog();
748 }
749 cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
750 cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
751
752 cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
753
754 cmdBuf->AddProtectedProlog();
755
756 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
757 cmdBuf->AddUmdProfilerStart();
758
759 cmdBuf->AddL3CacheConfig(&m_l3Values);
760
761 cmdBuf->AddPreemptionConfig(true);
762
763 cmdBuf->AddPipelineSelect(true);
764
765 cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
766
767 cmdBuf->AddSipState(cmish->GetSipKernelOffset());
768
769 CM_CHK_MOSSTATUS_RETURN(m_cmhal->osInterface->pfnRegisterResource(
770 m_cmhal->osInterface,
771 &m_cmhal->csrResource,
772 true,
773 true));
774
775 cmdBuf->AddCsrBaseAddress(&m_cmhal->csrResource);
776
777 CM_TASK_CONFIG taskConfig;
778 kernelArrayRT->GetProperty(taskConfig);
779 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE);
780
781 cmdBuf->AddCurbeLoad(cmMediaState);
782
783 cmdBuf->AddMediaIDLoad(cmMediaState);
784
785 const CM_EXECUTION_CONFIG *exeConfig = kernelArrayRT->GetKernelExecuteConfig();
786 CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
787 uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
788 for (uint32_t i = 0; i < kernelCount; i ++)
789 {
790 CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ? const_cast<CmThreadGroupSpace *>(threadGroupSpace) : threadGroupSpaces[i];
791
792 // check whether need to insert a CBB
793 bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
794 if (needCBB)
795 {
796 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
797
798 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
799
800 cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
801
802 cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
803 }
804
805 if (i > 0)
806 {
807 bool syncFlag = false;
808 uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
809 syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
810 // add sync if necessary
811 if (syncFlag)
812 {
813 cmdBuf->AddSyncBetweenKernels();
814 }
815 }
816
817 cmdBuf->AddGpgpuWalker(tgs, kernels[i], i);
818 }
819
820 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
821
822 cmdBuf->AddUmdProfilerEnd();
823 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
824
825 cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
826
827 cmdBuf->AddDummyVFE();
828
829 cmdBuf->AddBatchBufferEnd();
830
831 cmdBuf->ReturnUnusedBuffer();
832
833 cmdBuf->Submit();
834
835 cmish->Submit(queue->GetFastTrackerIndex(), tracker);
836
837 #if MDF_SURFACE_STATE_DUMP
838 if (m_cmhal->dumpSurfaceState)
839 {
840 ssh->DumpSSH();
841 }
842 #endif
843
844 #if MDF_COMMAND_BUFFER_DUMP
845 if (m_cmhal->dumpCommandBuffer)
846 {
847 cmdBuf->Dump();
848 }
849 #endif
850
851 #if MDF_CURBE_DATA_DUMP
852 if (m_cmhal->dumpCurbeData)
853 {
854 cmMediaState->Dump();
855 }
856 #endif
857
858 cmMediaState->Submit();
859 cmdsh->DestroyMediaState(cmMediaState);
860
861 if (event != CM_NO_EVENT && taskAssigned)
862 {
863 CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
864 eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
865 event = static_cast<CmEventEx *>(eventEx);
866 }
867 else
868 {
869 event = nullptr;
870 }
871 cmTracker->Refresh();
872
873 // refresh surfaces in surface manager
874 CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
875
876 #if MDF_SURFACE_CONTENT_DUMP
877 if (state->dumpSurfaceContent && event != nullptr)
878 {
879 event->WaitForTaskFinished();
880 if (isDummyEventCreated)
881 {
882 DestoryEvent(queue, event);
883 }
884 for (uint32_t i = 0; i < kernelCount; i++)
885 {
886 kernels[i]->SurfaceDumpEx(i, taskId);
887 }
888 }
889 #endif
890
891 return CM_SUCCESS;
892 }
893
SwitchToFastPath(CmTask * task)894 bool CmExecutionAdv::SwitchToFastPath(CmTask *task)
895 {
896 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
897 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
898 for (uint32_t i = 0; i < kernelCount; i++)
899 {
900 CmKernelEx *kernel = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
901 if (kernel == nullptr)
902 {
903 return false;
904 }
905 if (kernel->IsFastPathSupported() == false)
906 {
907 return false;
908 }
909 }
910 return true;
911 }
912
913