1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_execution_adv.cpp
24 //! \brief     Contains Class CmExecutionAdv  definitions
25 //!
26 #include "cm_execution_adv.h"
27 #include "cm_debug.h"
28 #include "cm_extension_creator.h"
29 #include "cm_surface_state_manager.h"
30 #include "cm_kernel_ex.h"
31 #include "cm_ish.h"
32 #include "cm_media_state.h"
33 #include "cm_command_buffer.h"
34 #include "cm_kernel_ex.h"
35 #include "cm_ssh.h"
36 #include "cm_event_ex.h"
37 #include "cm_tracker.h"
38 #include "cm_dsh.h"
39 #include "cm_task_rt.h"
40 #include "cm_thread_space_rt.h"
41 #include "cm_surface_manager.h"
42 #include "cm_queue_rt.h"
43 #include "cm_scratch_space.h"
44 
45 #include "cm_hal_g9.h"
46 #include "cm_hal_g11.h"
47 #include "cm_hal_g12.h"
48 
49 static bool gGTPinInitialized = false;
50 
51 static bool advRegistered = CmExtensionCreator<CmExecutionAdv>::RegisterClass<CmExecutionAdv>();
52 
53 using namespace CMRT_UMD;
54 
CmExecutionAdv()55 CmExecutionAdv::CmExecutionAdv():
56     m_cmhal(nullptr),
57     m_tracker (nullptr),
58     m_ish (nullptr),
59     m_dsh (nullptr)
60 {
61     MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
62 }
63 
Initialize(CM_HAL_STATE * state)64 MOS_STATUS CmExecutionAdv::Initialize(CM_HAL_STATE *state)
65 {
66     m_cmhal = state;
67     CM_CHK_NULL_RETURN_MOSERROR(m_cmhal);
68 
69     m_tracker = MOS_New(CmTracker, m_cmhal->osInterface);
70     CM_CHK_NULL_RETURN_MOSERROR(m_tracker);
71     CM_CHK_MOSSTATUS_RETURN(m_tracker->Initialize());
72     FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
73 
74     m_ish = MOS_New(CmISH);
75     CM_CHK_NULL_RETURN_MOSERROR(m_ish);
76     CM_CHK_MOSSTATUS_RETURN(m_ish->Initialize(m_cmhal, trackerProducer));
77 
78     m_dsh = MOS_New(CmDSH, m_cmhal);
79     CM_CHK_NULL_RETURN_MOSERROR(m_dsh);
80     CM_CHK_MOSSTATUS_RETURN(m_dsh->Initialize(trackerProducer));
81 
82     MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
83 
84     return MOS_STATUS_SUCCESS;
85 }
86 
~CmExecutionAdv()87 CmExecutionAdv::~CmExecutionAdv()
88 {
89     MOS_Delete(m_ish);
90     MOS_Delete(m_dsh);
91     MOS_Delete(m_tracker);
92 }
93 
Create2DStateMgr(MOS_RESOURCE * resource)94 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create2DStateMgr(MOS_RESOURCE *resource)
95 {
96     return MOS_New(CmSurfaceState2Dor3DMgr, m_cmhal, resource);
97 }
98 
Create3DStateMgr(MOS_RESOURCE * resource)99 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create3DStateMgr(MOS_RESOURCE *resource)
100 {
101     return MOS_New(CmSurfaceState3DMgr, m_cmhal, resource);
102 }
103 
Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr * stateMgr)104 void CmExecutionAdv::Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr *stateMgr)
105 {
106     MOS_Delete(stateMgr);
107 }
108 
CreateBufferStateMgr(MOS_RESOURCE * resource)109 CmSurfaceStateBufferMgr* CmExecutionAdv::CreateBufferStateMgr(MOS_RESOURCE *resource)
110 {
111     return MOS_New(CmSurfaceStateBufferMgr, m_cmhal, resource);
112 }
113 
DeleteBufferStateMgr(CmSurfaceStateBufferMgr * stateMgr)114 void CmExecutionAdv::DeleteBufferStateMgr(CmSurfaceStateBufferMgr *stateMgr)
115 {
116     MOS_Delete(stateMgr);
117 }
118 
DeleteSurfStateVme(CmSurfaceStateVME * state)119 void CmExecutionAdv::DeleteSurfStateVme(CmSurfaceStateVME *state)
120 {
121     MOS_Delete(state);
122 }
123 
SetBufferOrigSize(CmSurfaceStateBufferMgr * stateMgr,uint32_t size)124 void CmExecutionAdv::SetBufferOrigSize(CmSurfaceStateBufferMgr *stateMgr, uint32_t size)
125 {
126     if (stateMgr)
127     {
128         stateMgr->SetOrigSize(size);
129     }
130 }
131 
SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr * stateMgr,uint16_t mocs)132 void CmExecutionAdv::SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr *stateMgr, uint16_t mocs)
133 {
134     if (stateMgr)
135     {
136         stateMgr->SetMemoryObjectControl(mocs);
137     }
138 }
139 
Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr * stateMgr,MOS_FORMAT format)140 void CmExecutionAdv::Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr *stateMgr, MOS_FORMAT format)
141 {
142     if (stateMgr)
143     {
144         stateMgr->SetOrigFormat(format);
145     }
146 }
147 
Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t width,uint32_t height,uint32_t depth)148 void CmExecutionAdv::Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t width, uint32_t height, uint32_t depth)
149 {
150     if (stateMgr)
151     {
152         stateMgr->SetOrigDimension(width, height, depth);
153     }
154 }
155 
Set2DRenderTarget(CmSurfaceState2Dor3DMgr * stateMgr,bool renderTarget)156 void CmExecutionAdv::Set2DRenderTarget(CmSurfaceState2Dor3DMgr *stateMgr, bool renderTarget)
157 {
158     if (stateMgr)
159     {
160         stateMgr->SetRenderTarget(renderTarget);
161     }
162 }
163 
Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr * stateMgr,uint16_t mocs)164 void CmExecutionAdv::Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr *stateMgr, uint16_t mocs)
165 {
166     if (stateMgr)
167     {
168         stateMgr->SetMemoryObjectControl(mocs);
169     }
170 }
171 
Set2DFrameType(CmSurfaceState2Dor3DMgr * stateMgr,CM_FRAME_TYPE frameType)172 void CmExecutionAdv::Set2DFrameType(CmSurfaceState2Dor3DMgr *stateMgr, CM_FRAME_TYPE frameType)
173 {
174     if (stateMgr)
175     {
176         stateMgr->SetFrameType(frameType);
177     }
178 }
179 
SetRotationFlag(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t rotation)180 void CmExecutionAdv::SetRotationFlag(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t rotation)
181 {
182     if (stateMgr)
183     {
184         stateMgr->SetRotationFlag(rotation);
185     }
186 }
187 
SetChromaSitting(CmSurfaceState2Dor3DMgr * stateMgr,uint8_t chromaSitting)188 void CmExecutionAdv::SetChromaSitting(CmSurfaceState2Dor3DMgr *stateMgr, uint8_t chromaSitting)
189 {
190     if (stateMgr)
191     {
192         stateMgr->SetChromaSitting(chromaSitting);
193     }
194 }
195 
GetFastTrackerProducer()196 FrameTrackerProducer *CmExecutionAdv::GetFastTrackerProducer()
197 {
198     return m_tracker->GetTrackerProducer();
199 }
200 
CreateKernelRT(CmDeviceRT * device,CmProgramRT * program,uint32_t kernelIndex,uint32_t kernelSeqNum)201 CmKernelRT *CmExecutionAdv::CreateKernelRT(CmDeviceRT *device,
202                CmProgramRT *program,
203                uint32_t kernelIndex,
204                uint32_t kernelSeqNum)
205 {
206     return new (std::nothrow) CmKernelEx(device, program, kernelIndex, kernelSeqNum);
207 }
208 
RefreshSurfaces(CmDeviceRT * device)209 int CmExecutionAdv::RefreshSurfaces(CmDeviceRT *device)
210 {
211     CM_CHK_NULL_RETURN_CMERROR(device);
212 
213     CmSurfaceManager *surfaceMgr  = nullptr;
214     CSync *           surfaceLock = nullptr;
215 
216     device->GetSurfaceManager(surfaceMgr);
217     CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
218 
219     surfaceLock = device->GetSurfaceCreationLock();
220     CM_CHK_NULL_RETURN_CMERROR(surfaceLock);
221 
222     uint32_t freeSurfNum = 0;
223     surfaceLock->Acquire();
224     surfaceMgr->RefreshDelayDestroySurfaces(freeSurfNum);
225     surfaceLock->Release();
226 
227     return CM_SUCCESS;
228 }
229 
SubmitTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadSpace * threadSpace,MOS_GPU_CONTEXT gpuContext)230 int CmExecutionAdv::SubmitTask(CMRT_UMD::CmQueueRT *queue,
231                 CMRT_UMD::CmTask *task,
232                 CMRT_UMD::CmEvent *&event,
233                 const CMRT_UMD::CmThreadSpace *threadSpace,
234                 MOS_GPU_CONTEXT gpuContext)
235 {
236     CM_NORMALMESSAGE("================ in fast path, media walker===================");
237 
238     CM_HAL_STATE * state = m_cmhal;
239     CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
240     CmTracker *cmTracker = state->advExecutor->GetTracker();
241     CmISH *cmish = state->advExecutor->GetISH();
242     CmDSH *cmdsh = state->advExecutor->GetDSH();
243     CM_CHK_NULL_RETURN_CMERROR(cmTracker);
244     CM_CHK_NULL_RETURN_CMERROR(cmish);
245     CM_CHK_NULL_RETURN_CMERROR(cmdsh);
246 
247     CLock Locker(m_criticalSection);
248 
249     bool isDummyEventCreated = false;
250 #if MDF_SURFACE_CONTENT_DUMP
251     if (state->dumpSurfaceContent && event == CM_NO_EVENT)
252     {
253         // if surface content dump is needed, the enqueueFast should be a blocking operation
254         // we need a dummy event here
255         isDummyEventCreated = true;
256         event = nullptr;
257     }
258 #endif
259 
260     state->osInterface->pfnResetOsStates(state->osInterface);
261     state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
262 
263     CM_HAL_OSSYNC_PARAM syncParam;
264     syncParam.osSyncEvent = nullptr;
265 
266     // Call HAL layer to wait for Task finished with event-driven mechanism
267     CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
268 
269     HANDLE osSyncEvent = syncParam.osSyncEvent;
270 
271     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
272     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
273     if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
274     {
275         return CM_FAILURE;
276     }
277 
278     // get an array of CmKernelEx
279     CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
280     MOS_ZeroMemory(kernels, sizeof(kernels));
281     for (uint32_t i = 0; i < kernelCount; i++)
282     {
283         kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
284         CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
285         kernels[i]->AllocateCurbe();
286     }
287 
288     // get CmDeviceRT
289     CmDeviceRT *device = nullptr;
290     kernels[0]->GetCmDevice(device);
291     CM_CHK_NULL_RETURN_CMERROR(device);
292 
293     // set printf buffer if needed
294     if (device->IsPrintEnable())
295     {
296         SurfaceIndex *printBufferIndex = nullptr;
297         device->CreatePrintBuffer();
298         device->GetPrintBufferIndex(printBufferIndex);
299         CM_ASSERT(printBufferIndex);
300         for (uint32_t i = 0; i < kernelCount; i++)
301         {
302             kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
303         }
304     }
305 
306     const CmThreadSpaceRT *threadSpaceRTConst = static_cast<const CmThreadSpaceRT *>(threadSpace);
307     CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT *>(threadSpaceRTConst);
308     CmThreadSpaceRT *threadSpaces[CM_MAX_KERNELS_PER_TASK];
309     MOS_ZeroMemory(threadSpaces, sizeof(threadSpaces));
310     if (threadSpaceRT == nullptr)
311     {
312         for (uint32_t i = 0; i < kernelCount; i++)
313         {
314             threadSpaces[i] = kernels[i]->GetThreadSpaceEx();
315         }
316     }
317 
318     // if SWSB is used, update the SWSB arguments in kenrel
319     if (!state->cmHalInterface->IsScoreboardParamNeeded())
320     {
321         for (uint32_t i = 0; i < kernelCount; i++)
322         {
323             kernels[i]->UpdateSWSBArgs(threadSpaceRT);
324         }
325     }
326 
327     CmCommandBuffer cmdBufData(state);
328     CmCommandBuffer *cmdBuf = &cmdBufData;
329     CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
330 
331     uint32_t tracker;
332     uint32_t taskId;
333     MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
334     bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
335 
336     cmdBuf->Initialize();
337 
338     CmSSH *ssh = cmdBuf->GetSSH();
339     CM_CHK_NULL_RETURN_CMERROR(ssh);
340 
341     // Add kernels to ISH directly
342     cmish->LoadKernels(kernels, kernelCount);
343 
344     // initialize SSH
345     ssh->Initialize(kernels, kernelCount);
346 
347     // create new media state
348     CmMediaState *cmMediaState = cmdsh->CreateMediaState();
349     CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
350     cmMediaState->Allocate(kernels, kernelCount, queue->GetFastTrackerIndex(), tracker);
351 
352     // generate curbe and load media id
353     for (uint32_t i = 0; i < kernelCount; i++)
354     {
355         ssh->AssignBindingTable();
356         kernels[i]->LoadReservedSamplers(cmMediaState, i);
357         kernels[i]->LoadReservedSurfaces(ssh);
358         kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
359         kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
360         cmMediaState->LoadCurbe(kernels[i], i);
361         cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset());
362     }
363 
364     // prepare cp resources
365     ssh->PrepareResourcesForCp();
366 
367     // get the position to write tracker
368     MOS_RESOURCE *trackerResource = nullptr;
369     uint32_t trackerOffset = 0;
370     cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
371 
372     // call gtpin callback if needed
373     CmNotifierGroup *ng = nullptr;
374     if (gGTPinInitialized && taskAssigned)
375     {
376         ng = device->GetNotifiers();
377         ng->NotifyTaskFlushed(device, task, ssh, taskId);
378     }
379 
380     if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
381     {
382         cmdBuf->AddMMCProlog();
383     }
384     cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
385     cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
386 
387     cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
388 
389     cmdBuf->AddProtectedProlog();
390 
391     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
392     cmdBuf->AddUmdProfilerStart();
393 
394     cmdBuf->AddL3CacheConfig(&m_l3Values);
395 
396     cmdBuf->AddPreemptionConfig(false);
397 
398     cmdBuf->AddPipelineSelect(false);
399 
400     cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
401 
402     CM_TASK_CONFIG taskConfig;
403     kernelArrayRT->GetProperty(taskConfig);
404     if (threadSpaceRT)
405     {
406         cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, &threadSpaceRT);  // global thread space
407     }
408     else
409     {
410         cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, threadSpaces, kernelCount);
411     }
412 
413     cmdBuf->AddCurbeLoad(cmMediaState);
414 
415     cmdBuf->AddMediaIDLoad(cmMediaState);
416 
417     CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
418     uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
419     for (uint32_t i = 0; i < kernelCount; i ++)
420     {
421         CmThreadSpaceRT *ts = (threadSpaceRT != nullptr) ? threadSpaceRT: threadSpaces[i];
422 
423         // check whether need to insert a CBB
424         bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
425         if (needCBB)
426         {
427             cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
428 
429             cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
430 
431             cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
432 
433             cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
434         }
435 
436         if (i > 0)
437         {
438             // check whether the next kernel has a dependency pattern
439             uint32_t dcount = 0;
440             if (ts != nullptr)
441             {
442                 CM_HAL_DEPENDENCY *dependency;
443                 ts->GetDependency(dependency);
444                 dcount = dependency->count;
445             }
446 
447             bool syncFlag = false;
448             uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
449             syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
450             // add sync if necessary
451             if ((dcount != 0) || syncFlag)
452             {
453                 cmdBuf->AddSyncBetweenKernels();
454             }
455         }
456 
457         cmdBuf->AddMediaObjectWalker(ts, i);
458     }
459 
460     cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
461 
462     cmdBuf->AddUmdProfilerEnd();
463     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
464 
465     cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
466 
467     cmdBuf->AddDummyVFE();
468 
469     cmdBuf->AddBatchBufferEnd();
470 
471     cmdBuf->ReturnUnusedBuffer();
472 
473 #if MDF_SURFACE_STATE_DUMP
474     if (m_cmhal->dumpSurfaceState)
475     {
476         ssh->DumpSSH();
477     }
478 #endif
479 
480 #if MDF_COMMAND_BUFFER_DUMP
481     if (m_cmhal->dumpCommandBuffer)
482     {
483         cmdBuf->Dump();
484     }
485 #endif
486 
487 #if MDF_CURBE_DATA_DUMP
488     if (m_cmhal->dumpCurbeData)
489     {
490         cmMediaState->Dump();
491     }
492 #endif
493 
494     cmdBuf->Submit();
495 
496     cmish->Submit(queue->GetFastTrackerIndex(), tracker);
497 
498     cmMediaState->Submit();
499     cmdsh->DestroyMediaState(cmMediaState);
500 
501     if (event != CM_NO_EVENT && taskAssigned)
502     {
503         CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
504         CM_CHK_NULL_RETURN_CMERROR(eventEx);
505         cmTracker->AssociateEvent(eventEx);
506         eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
507         event = static_cast<CmEventEx *>(eventEx);
508 
509         if (gGTPinInitialized)
510         {
511             eventEx->SetNotifier(ng);
512         }
513     }
514     else
515     {
516         event = nullptr;
517     }
518     cmTracker->Refresh();
519 
520     // refresh surfaces in surface manager
521     CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
522 
523 #if MDF_SURFACE_CONTENT_DUMP
524     if (state->dumpSurfaceContent && event != nullptr)
525     {
526         event->WaitForTaskFinished();
527         if (isDummyEventCreated)
528         {
529             DestoryEvent(queue, event);
530         }
531         for (uint32_t i = 0; i < kernelCount; i++)
532         {
533             kernels[i]->SurfaceDumpEx(i, taskId);
534         }
535     }
536 #endif
537 
538     return CM_SUCCESS;
539 }
540 
DestoryEvent(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmEvent * & event)541 int CmExecutionAdv::DestoryEvent(CMRT_UMD::CmQueueRT *queue, CMRT_UMD::CmEvent *&event)
542 {
543     CmEventEx *eventEx = static_cast<CmEventEx *>(event);
544     MOS_Delete(eventEx);
545     event = nullptr;
546     return CM_SUCCESS;
547 }
548 
SubmitComputeTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)549 int CmExecutionAdv::SubmitComputeTask(CMRT_UMD::CmQueueRT *queue,
550                 CMRT_UMD::CmTask *task,
551                 CMRT_UMD::CmEvent* &event,
552                 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
553                 MOS_GPU_CONTEXT gpuContext)
554 {
555     CM_ASSERTMESSAGE("Compute Tasks not support on this platform\n");
556     return CM_FAILURE;
557 }
558 
WaitForAllTasksFinished()559 int CmExecutionAdv::WaitForAllTasksFinished()
560 {
561     return m_tracker->WaitForAllTasksFinished();
562 }
563 
SetL3Config(const L3ConfigRegisterValues * l3Config)564 void CmExecutionAdv::SetL3Config(const L3ConfigRegisterValues *l3Config)
565 {
566     m_l3Values.config_register0 = l3Config->config_register0;
567     m_l3Values.config_register1 = l3Config->config_register1;
568     m_l3Values.config_register2 = l3Config->config_register2;
569     m_l3Values.config_register3 = l3Config->config_register3;
570 }
571 
SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)572 int CmExecutionAdv::SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)
573 {
574     const L3ConfigRegisterValues *table = nullptr;
575     uint32_t count = 0;
576 
577     switch(m_cmhal->platform.eRenderCoreFamily)
578     {
579         case IGFX_GEN11_CORE:
580             count = sizeof(ICL_L3_PLANE)/sizeof(L3ConfigRegisterValues);
581             table = (L3ConfigRegisterValues *)ICL_L3_PLANE;
582             break;
583         case IGFX_GEN12_CORE:
584             table = m_cmhal->cmHalInterface->m_l3Plane;
585             count = m_cmhal->cmHalInterface->m_l3ConfigCount;
586             break;
587         default: // gen9
588             count = sizeof(SKL_L3_PLANE) / sizeof(L3ConfigRegisterValues);
589             table = (L3ConfigRegisterValues*)SKL_L3_PLANE;
590             break;
591     }
592     if (static_cast<size_t>(l3SuggestConfig) >= count)
593     {
594         return CM_INVALID_ARG_VALUE;
595     }
596     m_l3Values.config_register0 = table[l3SuggestConfig].config_register0;
597     m_l3Values.config_register1 = table[l3SuggestConfig].config_register1;
598     m_l3Values.config_register2 = table[l3SuggestConfig].config_register2;
599     m_l3Values.config_register3 = table[l3SuggestConfig].config_register3;
600 
601     return CM_SUCCESS;
602 }
603 
AssignNewTracker()604 int CmExecutionAdv::AssignNewTracker()
605 {
606     FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
607     return trackerProducer->AssignNewTracker();
608 }
609 
SubmitGpgpuTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)610 int CmExecutionAdv::SubmitGpgpuTask(CMRT_UMD::CmQueueRT *queue,
611                 CMRT_UMD::CmTask *task,
612                 CMRT_UMD::CmEvent* &event,
613                 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
614                 MOS_GPU_CONTEXT gpuContext)
615 {
616     CM_NORMALMESSAGE("================ in fast path, gpgpu walker===================");
617 
618     CM_HAL_STATE * state = m_cmhal;
619     CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
620     CmTracker *cmTracker = state->advExecutor->GetTracker();
621     CmISH *cmish = state->advExecutor->GetISH();
622     CmDSH *cmdsh = state->advExecutor->GetDSH();
623     CM_CHK_NULL_RETURN_CMERROR(cmTracker);
624     CM_CHK_NULL_RETURN_CMERROR(cmish);
625     CM_CHK_NULL_RETURN_CMERROR(cmdsh);
626 
627     CLock Locker(m_criticalSection);
628 
629     bool isDummyEventCreated = false;
630 #if MDF_SURFACE_CONTENT_DUMP
631     if (state->dumpSurfaceContent && event == CM_NO_EVENT)
632     {
633         // if surface content dump is needed, the enqueueFast should be a blocking operation
634         // we need a dummy event here
635         isDummyEventCreated = true;
636         event = nullptr;
637     }
638 #endif
639 
640     state->osInterface->pfnSetGpuContext(state->osInterface, gpuContext);
641     state->osInterface->pfnResetOsStates(state->osInterface);
642     state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
643 
644     CM_HAL_OSSYNC_PARAM syncParam;
645     syncParam.osSyncEvent = nullptr;
646 
647     // Call HAL layer to wait for Task finished with event-driven mechanism
648     CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
649 
650     HANDLE osSyncEvent = syncParam.osSyncEvent;
651 
652     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
653     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
654     if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
655     {
656         return CM_FAILURE;
657     }
658 
659     // get an array of CmKernelEx
660     CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
661     MOS_ZeroMemory(kernels, sizeof(kernels));
662     for (uint32_t i = 0; i < kernelCount; i++)
663     {
664         kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
665         CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
666         kernels[i]->AllocateCurbeAndFillImplicitArgs(const_cast<CmThreadGroupSpace *>(threadGroupSpace));
667     }
668 
669     // get CmDeviceRT
670     CmDeviceRT *device = nullptr;
671     kernels[0]->GetCmDevice(device);
672     CM_CHK_NULL_RETURN_CMERROR(device);
673 
674     // set printf buffer if needed
675     if (device->IsPrintEnable())
676     {
677         SurfaceIndex *printBufferIndex = nullptr;
678         device->CreatePrintBuffer();
679         device->GetPrintBufferIndex(printBufferIndex);
680         CM_ASSERT(printBufferIndex);
681         for (uint32_t i = 0; i < kernelCount; i++)
682         {
683             kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
684         }
685     }
686 
687     CmThreadGroupSpace *threadGroupSpaces[CM_MAX_KERNELS_PER_TASK];
688     MOS_ZeroMemory(threadGroupSpaces, sizeof(threadGroupSpaces));
689     if (threadGroupSpace == nullptr)
690     {
691         for (uint32_t i = 0; i < kernelCount; i++)
692         {
693             threadGroupSpaces[i] = kernels[i]->GetThreadGroupSpaceEx();
694         }
695     }
696 
697     CmCommandBuffer cmdBufData(state);
698     CmCommandBuffer *cmdBuf = &cmdBufData;
699     CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
700 
701     uint32_t tracker;
702     uint32_t taskId;
703     MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
704     bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
705 
706     cmdBuf->Initialize();
707 
708     CmSSH *ssh = cmdBuf->GetSSH();
709     CM_CHK_NULL_RETURN_CMERROR(ssh);
710 
711     // Add kernels to ISH directly
712     cmish->LoadKernels(kernels, kernelCount);
713 
714     // initialize SSH
715     ssh->Initialize(kernels, kernelCount);
716 
717     // create new media state
718     CmMediaState *cmMediaState = cmdsh->CreateMediaState();
719     CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
720     cmMediaState->Allocate(kernels, kernelCount, 0, tracker);
721 
722     // generate curbe and load media id
723     for (uint32_t i = 0; i < kernelCount; i++)
724     {
725         ssh->AssignBindingTable();
726         kernels[i]->LoadReservedSamplers(cmMediaState, i);
727         kernels[i]->LoadReservedSurfaces(ssh);
728         kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
729         kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
730         cmMediaState->LoadCurbe(kernels[i], i);
731         CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ?
732                                   const_cast<CmThreadGroupSpace *>(threadGroupSpace)
733                                   : threadGroupSpaces[i];
734         cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset(), tgs);
735     }
736 
737     // prepare cp resources
738     ssh->PrepareResourcesForCp();
739 
740     // get the position to write tracker
741     MOS_RESOURCE *trackerResource = nullptr;
742     uint32_t trackerOffset = 0;
743     cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
744 
745     if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
746     {
747         cmdBuf->AddMMCProlog();
748     }
749     cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
750     cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
751 
752     cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
753 
754     cmdBuf->AddProtectedProlog();
755 
756     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
757     cmdBuf->AddUmdProfilerStart();
758 
759     cmdBuf->AddL3CacheConfig(&m_l3Values);
760 
761     cmdBuf->AddPreemptionConfig(true);
762 
763     cmdBuf->AddPipelineSelect(true);
764 
765     cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
766 
767     cmdBuf->AddSipState(cmish->GetSipKernelOffset());
768 
769     CM_CHK_MOSSTATUS_RETURN(m_cmhal->osInterface->pfnRegisterResource(
770         m_cmhal->osInterface,
771         &m_cmhal->csrResource,
772         true,
773         true));
774 
775     cmdBuf->AddCsrBaseAddress(&m_cmhal->csrResource);
776 
777     CM_TASK_CONFIG taskConfig;
778     kernelArrayRT->GetProperty(taskConfig);
779     cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE);
780 
781     cmdBuf->AddCurbeLoad(cmMediaState);
782 
783     cmdBuf->AddMediaIDLoad(cmMediaState);
784 
785     const CM_EXECUTION_CONFIG *exeConfig = kernelArrayRT->GetKernelExecuteConfig();
786     CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
787     uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
788     for (uint32_t i = 0; i < kernelCount; i ++)
789     {
790         CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ? const_cast<CmThreadGroupSpace *>(threadGroupSpace) : threadGroupSpaces[i];
791 
792         // check whether need to insert a CBB
793         bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
794         if (needCBB)
795         {
796             cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
797 
798             cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
799 
800             cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
801 
802             cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
803         }
804 
805         if (i > 0)
806         {
807             bool syncFlag = false;
808             uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
809             syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
810             // add sync if necessary
811             if (syncFlag)
812             {
813                 cmdBuf->AddSyncBetweenKernels();
814             }
815         }
816 
817         cmdBuf->AddGpgpuWalker(tgs, kernels[i], i);
818     }
819 
820     cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
821 
822     cmdBuf->AddUmdProfilerEnd();
823     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
824 
825     cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
826 
827     cmdBuf->AddDummyVFE();
828 
829     cmdBuf->AddBatchBufferEnd();
830 
831     cmdBuf->ReturnUnusedBuffer();
832 
833     cmdBuf->Submit();
834 
835     cmish->Submit(queue->GetFastTrackerIndex(), tracker);
836 
837 #if MDF_SURFACE_STATE_DUMP
838     if (m_cmhal->dumpSurfaceState)
839     {
840         ssh->DumpSSH();
841     }
842 #endif
843 
844 #if MDF_COMMAND_BUFFER_DUMP
845     if (m_cmhal->dumpCommandBuffer)
846     {
847         cmdBuf->Dump();
848     }
849 #endif
850 
851 #if MDF_CURBE_DATA_DUMP
852     if (m_cmhal->dumpCurbeData)
853     {
854         cmMediaState->Dump();
855     }
856 #endif
857 
858     cmMediaState->Submit();
859     cmdsh->DestroyMediaState(cmMediaState);
860 
861     if (event != CM_NO_EVENT && taskAssigned)
862     {
863         CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
864         eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
865         event = static_cast<CmEventEx *>(eventEx);
866     }
867     else
868     {
869         event = nullptr;
870     }
871     cmTracker->Refresh();
872 
873     // refresh surfaces in surface manager
874     CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
875 
876 #if MDF_SURFACE_CONTENT_DUMP
877     if (state->dumpSurfaceContent && event != nullptr)
878     {
879         event->WaitForTaskFinished();
880         if (isDummyEventCreated)
881         {
882             DestoryEvent(queue, event);
883         }
884         for (uint32_t i = 0; i < kernelCount; i++)
885         {
886             kernels[i]->SurfaceDumpEx(i, taskId);
887         }
888     }
889 #endif
890 
891     return CM_SUCCESS;
892 }
893 
SwitchToFastPath(CmTask * task)894 bool CmExecutionAdv::SwitchToFastPath(CmTask *task)
895 {
896     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
897     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
898     for (uint32_t i = 0; i < kernelCount; i++)
899     {
900         CmKernelEx *kernel = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
901         if (kernel == nullptr)
902         {
903             return false;
904         }
905         if (kernel->IsFastPathSupported() == false)
906         {
907             return false;
908         }
909     }
910     return true;
911 }
912 
913