1 /*
2 * Copyright (c) 2018-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file mos_gpucontext_specific.cpp
24 //! \brief Container class for the Linux specific gpu context
25 //!
26
27 #include "mos_context_specific.h"
28 #include "mos_gpucontext_specific.h"
29 #include "mos_graphicsresource_specific.h"
30 #include "mos_commandbuffer_specific.h"
31 #include "mos_util_devult_specific.h"
32 #include "mos_cmdbufmgr.h"
33 #include "mos_os_virtualengine.h"
34 #include <unistd.h>
35
36 #define MI_BATCHBUFFER_END 0x05000000
37 static pthread_mutex_t command_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
38
GpuContextSpecific(const MOS_GPU_NODE gpuNode,MOS_GPU_CONTEXT mosGpuCtx,CmdBufMgr * cmdBufMgr,GpuContext * reusedContext)39 GpuContextSpecific::GpuContextSpecific(
40 const MOS_GPU_NODE gpuNode,
41 MOS_GPU_CONTEXT mosGpuCtx,
42 CmdBufMgr *cmdBufMgr,
43 GpuContext *reusedContext)
44 {
45 MOS_OS_FUNCTION_ENTER;
46
47 m_nodeOrdinal = gpuNode;
48 m_cmdBufMgr = cmdBufMgr;
49 m_gpuContext = mosGpuCtx;
50 m_statusBufferResource = nullptr;
51 m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
52
53 if (reusedContext)
54 {
55 MOS_OS_NORMALMESSAGE("gpucontex reusing not enabled on Linux.");
56 }
57
58 #if (_DEBUG || _RELEASE_INTERNAL)
59 // get user engine instance setting from environment variable
60 char *engineInstances = getenv("INTEL_ENGINE_INSTANCE");
61 if (engineInstances != nullptr)
62 {
63 errno = 0;
64 long int instance = strtol(engineInstances, nullptr, 16);
65 /* Check for various possible errors. */
66 if ((errno == ERANGE && instance == LONG_MAX) || (instance < 0))
67 {
68 MOS_OS_NORMALMESSAGE("Invalid INTEL_ENGINE_INSTANCE setting.(%s)\n", engineInstances);
69 m_engineInstanceSelect = 0x0;
70 }
71 else
72 {
73 m_engineInstanceSelect = (uint32_t)instance;
74 }
75 }
76 #endif
77 }
78
~GpuContextSpecific()79 GpuContextSpecific::~GpuContextSpecific()
80 {
81 MOS_OS_FUNCTION_ENTER;
82
83 Clear();
84 }
85
Init(OsContext * osContext,PMOS_INTERFACE osInterface,MOS_GPU_NODE GpuNode,PMOS_GPUCTX_CREATOPTIONS createOption)86 MOS_STATUS GpuContextSpecific::Init(OsContext *osContext,
87 PMOS_INTERFACE osInterface,
88 MOS_GPU_NODE GpuNode,
89 PMOS_GPUCTX_CREATOPTIONS createOption)
90 {
91 MOS_OS_FUNCTION_ENTER;
92
93 MOS_OS_CHK_NULL_RETURN(osContext);
94
95 if (m_cmdBufPoolMutex == nullptr)
96 {
97 m_cmdBufPoolMutex = MosUtilities::MosCreateMutex();
98 }
99
100 MOS_OS_CHK_NULL_RETURN(m_cmdBufPoolMutex);
101
102 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
103
104 m_cmdBufPool.clear();
105
106 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
107
108 m_commandBufferSize = COMMAND_BUFFER_SIZE;
109
110 m_nextFetchIndex = 0;
111
112 m_cmdBufFlushed = true;
113
114 m_osContext = osContext;
115
116 MOS_OS_CHK_STATUS_RETURN(AllocateGPUStatusBuf());
117
118 m_commandBuffer = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
119
120 MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
121
122 m_IndirectHeapSize = 0;
123
124 // each thread has its own GPU context, so do not need any lock as guarder here
125 m_allocationList = (ALLOCATION_LIST *)MOS_AllocAndZeroMemory(sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
126 MOS_OS_CHK_NULL_RETURN(m_allocationList);
127 m_maxNumAllocations = ALLOCATIONLIST_SIZE;
128
129 m_patchLocationList = (PATCHLOCATIONLIST *)MOS_AllocAndZeroMemory(sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
130 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
131 m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
132
133 m_attachedResources = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
134 MOS_OS_CHK_NULL_RETURN(m_attachedResources);
135
136 m_writeModeList = (bool *)MOS_AllocAndZeroMemory(sizeof(bool) * ALLOCATIONLIST_SIZE);
137 MOS_OS_CHK_NULL_RETURN(m_writeModeList);
138
139 m_GPUStatusTag = 1;
140
141 m_createOptionEnhanced = (MOS_GPUCTX_CREATOPTIONS_ENHANCED*)MOS_AllocAndZeroMemory(sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED));
142 MOS_OS_CHK_NULL_RETURN(m_createOptionEnhanced);
143 m_createOptionEnhanced->SSEUValue = createOption->SSEUValue;
144
145 if (typeid(*createOption) == typeid(MOS_GPUCTX_CREATOPTIONS_ENHANCED))
146 {
147 PMOS_GPUCTX_CREATOPTIONS_ENHANCED createOptionEnhanced = static_cast<PMOS_GPUCTX_CREATOPTIONS_ENHANCED>(createOption);
148 m_createOptionEnhanced->UsingSFC = createOptionEnhanced->UsingSFC;
149 }
150
151 for (int i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++)
152 {
153 m_i915Context[i] = nullptr;
154 }
155
156 if (osInterface->ctxBasedScheduling)
157 {
158 unsigned int nengine = 0;
159 struct i915_engine_class_instance *engine_map = nullptr;
160
161 MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_START,
162 &GpuNode, sizeof(GpuNode), nullptr, 0);
163
164 m_i915Context[0] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr,
165 osInterface->pOsContext->intel_context,
166 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
167 if (m_i915Context[0] == nullptr)
168 {
169 MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
170 return MOS_STATUS_UNKNOWN;
171 }
172 m_i915Context[0]->pOsContext = osInterface->pOsContext;
173
174 m_i915ExecFlag = I915_EXEC_DEFAULT;
175
176 if (mos_query_engines_count(osInterface->pOsContext->bufmgr, &nengine))
177 {
178 MOS_OS_ASSERTMESSAGE("Failed to query engines count.\n");
179 return MOS_STATUS_UNKNOWN;
180 }
181 engine_map = (struct i915_engine_class_instance *)MOS_AllocAndZeroMemory(nengine * sizeof(struct i915_engine_class_instance));
182 MOS_OS_CHK_NULL_RETURN(engine_map);
183
184 if (GpuNode == MOS_GPU_NODE_3D)
185 {
186 __u16 engine_class = I915_ENGINE_CLASS_RENDER;
187 __u64 caps = 0;
188
189 if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map))
190 {
191 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
192 MOS_SafeFreeMemory(engine_map);
193 return MOS_STATUS_UNKNOWN;
194 }
195
196 if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine))
197 {
198 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
199 MOS_SafeFreeMemory(engine_map);
200 return MOS_STATUS_UNKNOWN;
201 }
202
203 if (createOption->SSEUValue != 0)
204 {
205 struct drm_i915_gem_context_param_sseu sseu;
206 MOS_ZeroMemory(&sseu, sizeof(sseu));
207 sseu.flags = I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX;
208 sseu.engine.engine_instance = m_i915ExecFlag;
209
210 if (mos_get_context_param_sseu(m_i915Context[0], &sseu))
211 {
212 MOS_OS_ASSERTMESSAGE("Failed to get sseu configuration.");
213 MOS_SafeFreeMemory(engine_map);
214 return MOS_STATUS_UNKNOWN;
215 }
216
217 if (mos_hweight8(sseu.subslice_mask) > createOption->packed.SubSliceCount)
218 {
219 sseu.subslice_mask = mos_switch_off_n_bits(sseu.subslice_mask,
220 mos_hweight8(sseu.subslice_mask)-createOption->packed.SubSliceCount);
221 }
222
223 if (mos_set_context_param_sseu(m_i915Context[0], sseu))
224 {
225 MOS_OS_ASSERTMESSAGE("Failed to set sseu configuration.");
226 MOS_SafeFreeMemory(engine_map);
227 return MOS_STATUS_UNKNOWN;
228 }
229 }
230 }
231 else if (GpuNode == MOS_GPU_NODE_COMPUTE)
232 {
233 __u16 engine_class = 4; //To change later when linux define the name
234 __u64 caps = 0;
235
236 if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map))
237 {
238 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
239 MOS_SafeFreeMemory(engine_map);
240 return MOS_STATUS_UNKNOWN;
241 }
242
243 #if (_DEBUG || _RELEASE_INTERNAL)
244 SelectEngineInstanceByUser(engine_map, &nengine, m_engineInstanceSelect, GpuNode);
245 #endif
246 if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine))
247 {
248 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
249 MOS_SafeFreeMemory(engine_map);
250 return MOS_STATUS_UNKNOWN;
251 }
252 }
253 else if (GpuNode == MOS_GPU_NODE_VIDEO || GpuNode == MOS_GPU_NODE_VIDEO2
254 || GpuNode == MOS_GPU_NODE_VE)
255 {
256 __u16 engine_class = (GpuNode == MOS_GPU_NODE_VE)? I915_ENGINE_CLASS_VIDEO_ENHANCE : I915_ENGINE_CLASS_VIDEO;
257 __u64 caps = 0;
258
259 SetEngineQueryFlags(createOption, caps);
260
261 if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map))
262 {
263 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
264 MOS_SafeFreeMemory(engine_map);
265 return MOS_STATUS_UNKNOWN;
266 }
267
268 #if (_DEBUG || _RELEASE_INTERNAL)
269 SelectEngineInstanceByUser(engine_map, &nengine, m_engineInstanceSelect, GpuNode);
270 #endif
271 if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine))
272 {
273 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
274 MOS_SafeFreeMemory(engine_map);
275 return MOS_STATUS_UNKNOWN;
276 }
277
278 if (nengine >= 2)
279 {
280 if(!osInterface->bGucSubmission)
281 {
282 //master queue
283 m_i915Context[1] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr,
284 osInterface->pOsContext->intel_context,
285 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
286 if (m_i915Context[1] == nullptr)
287 {
288 MOS_OS_ASSERTMESSAGE("Failed to create master context.\n");
289 MOS_SafeFreeMemory(engine_map);
290 return MOS_STATUS_UNKNOWN;
291 }
292 m_i915Context[1]->pOsContext = osInterface->pOsContext;
293
294 if (mos_set_context_param_load_balance(m_i915Context[1], engine_map, 1))
295 {
296 MOS_OS_ASSERTMESSAGE("Failed to set master context bond extension.\n");
297 MOS_SafeFreeMemory(engine_map);
298 return MOS_STATUS_UNKNOWN;
299 }
300
301 //slave queue
302 for (int i=1; i<nengine; i++)
303 {
304 m_i915Context[i+1] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr,
305 osInterface->pOsContext->intel_context,
306 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
307 if (m_i915Context[i+1] == nullptr)
308 {
309 MOS_OS_ASSERTMESSAGE("Failed to create slave context.\n");
310 MOS_SafeFreeMemory(engine_map);
311 return MOS_STATUS_UNKNOWN;
312 }
313 m_i915Context[i+1]->pOsContext = osInterface->pOsContext;
314
315 if (mos_set_context_param_bond(m_i915Context[i+1], engine_map[0], &engine_map[i], 1) != S_SUCCESS)
316 {
317 int err = errno;
318 if (err == ENODEV)
319 {
320 mos_gem_context_destroy(m_i915Context[i+1]);
321 m_i915Context[i+1] = nullptr;
322 break;
323 }
324 else
325 {
326 MOS_OS_ASSERTMESSAGE("Failed to set slave context bond extension. errno=%d\n",err);
327 MOS_SafeFreeMemory(engine_map);
328 return MOS_STATUS_UNKNOWN;
329 }
330 }
331 }
332 }
333 else
334 {
335 //create context with different width
336 for(int i = 1; i < nengine; i++)
337 {
338 unsigned int ctxWidth = i + 1;
339 m_i915Context[i] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr,
340 osInterface->pOsContext->intel_context,
341 0); // I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE not allowed for parallel submission
342 if (mos_set_context_param_parallel(m_i915Context[i], engine_map, ctxWidth) != S_SUCCESS)
343 {
344 MOS_OS_ASSERTMESSAGE("Failed to set parallel extension since discontinuous logical engine.\n");
345 mos_gem_context_destroy(m_i915Context[i]);
346 m_i915Context[i] = nullptr;
347 break;
348 }
349 }
350 }
351 }
352 }
353 else if (GpuNode == MOS_GPU_NODE_BLT)
354 {
355 __u16 engine_class = I915_ENGINE_CLASS_COPY;
356 __u64 caps = 0;
357
358 if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map))
359 {
360 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
361 MOS_SafeFreeMemory(engine_map);
362 return MOS_STATUS_UNKNOWN;
363 }
364
365 if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine))
366 {
367 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
368 MOS_SafeFreeMemory(engine_map);
369 return MOS_STATUS_UNKNOWN;
370 }
371 }
372 else
373 {
374 MOS_OS_ASSERTMESSAGE("Unknown engine class.\n");
375 MOS_SafeFreeMemory(engine_map);
376 return MOS_STATUS_UNKNOWN;
377 }
378 MOS_SafeFreeMemory(engine_map);
379 MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_END,
380 m_i915Context, sizeof(void *),
381 &nengine, sizeof(nengine));
382 }
383 return MOS_STATUS_SUCCESS;
384 }
385
Clear()386 void GpuContextSpecific::Clear()
387 {
388 MOS_OS_FUNCTION_ENTER;
389
390 MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_START,
391 m_i915Context, sizeof(void *), nullptr, 0);
392 // hanlde the status buf bundled w/ the specified gpucontext
393 if (m_statusBufferResource)
394 {
395 if (m_statusBufferResource->Unlock(m_osContext) != MOS_STATUS_SUCCESS)
396 {
397 MOS_OS_ASSERTMESSAGE("failed to unlock the status buf bundled w/ the specified gpucontext");
398 }
399 m_statusBufferResource->Free(m_osContext, 0);
400 MOS_Delete(m_statusBufferResource);
401 }
402 MOS_FreeMemAndSetNull(m_statusBufferMosResource);
403
404 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
405
406 if (m_cmdBufMgr)
407 {
408 for (auto& curCommandBuffer : m_cmdBufPool)
409 {
410 auto curCommandBufferSpecific = static_cast<CommandBufferSpecific *>(curCommandBuffer);
411 if (curCommandBufferSpecific == nullptr)
412 continue;
413 curCommandBufferSpecific->waitReady(); // wait ready and return to comamnd buffer manager.
414 m_cmdBufMgr->ReleaseCmdBuf(curCommandBuffer);
415 }
416 }
417
418 m_cmdBufPool.clear();
419
420 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
421 MosUtilities::MosDestroyMutex(m_cmdBufPoolMutex);
422 m_cmdBufPoolMutex = nullptr;
423 MOS_SafeFreeMemory(m_commandBuffer);
424 MOS_SafeFreeMemory(m_allocationList);
425 MOS_SafeFreeMemory(m_patchLocationList);
426 MOS_SafeFreeMemory(m_attachedResources);
427 MOS_SafeFreeMemory(m_writeModeList);
428 MOS_SafeFreeMemory(m_createOptionEnhanced);
429
430 for (int i=0; i<MAX_ENGINE_INSTANCE_NUM; i++)
431 {
432 if (m_i915Context[i])
433 {
434 mos_gem_context_destroy(m_i915Context[i]);
435 m_i915Context[i] = nullptr;
436 }
437 }
438 MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_END,
439 nullptr, 0, nullptr, 0);
440 }
441
RegisterResource(PMOS_RESOURCE osResource,bool writeFlag)442 MOS_STATUS GpuContextSpecific::RegisterResource(
443 PMOS_RESOURCE osResource,
444 bool writeFlag)
445 {
446 MOS_OS_FUNCTION_ENTER;
447
448 MOS_OS_CHK_NULL_RETURN(osResource);
449
450 MOS_OS_CHK_NULL_RETURN(m_attachedResources);
451
452 PMOS_RESOURCE registeredResources = m_attachedResources;
453 uint32_t allocationIndex = 0;
454
455 for ( allocationIndex = 0; allocationIndex < m_resCount; allocationIndex++, registeredResources++)
456 {
457 if (osResource->bo == registeredResources->bo)
458 {
459 break;
460 }
461 }
462
463 // Allocation list to be updated
464 if (allocationIndex < m_maxNumAllocations)
465 {
466 // New buffer
467 if (allocationIndex == m_resCount)
468 {
469 m_resCount++;
470 }
471
472 // Set allocation
473 if (m_gpuContext >= MOS_GPU_CONTEXT_MAX)
474 {
475 MOS_OS_ASSERTMESSAGE("Gpu context exceeds max.");
476 return MOS_STATUS_UNKNOWN;
477 }
478
479 osResource->iAllocationIndex[m_gpuContext] = (allocationIndex);
480 m_attachedResources[allocationIndex] = *osResource;
481 m_writeModeList[allocationIndex] |= writeFlag;
482 m_allocationList[allocationIndex].hAllocation = &m_attachedResources[allocationIndex];
483 m_allocationList[allocationIndex].WriteOperation |= writeFlag;
484 m_numAllocations = m_resCount;
485 }
486 else
487 {
488 MOS_OS_ASSERTMESSAGE("Reached max # registrations.");
489 return MOS_STATUS_UNKNOWN;
490 }
491
492 return MOS_STATUS_SUCCESS;
493 }
494
SetPatchEntry(PMOS_INTERFACE osInterface,PMOS_PATCH_ENTRY_PARAMS params)495 MOS_STATUS GpuContextSpecific::SetPatchEntry(
496 PMOS_INTERFACE osInterface,
497 PMOS_PATCH_ENTRY_PARAMS params)
498 {
499 MOS_OS_FUNCTION_ENTER;
500
501 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
502 MOS_OS_CHK_NULL_RETURN(osInterface);
503 MOS_OS_CHK_NULL_RETURN(params);
504
505 m_patchLocationList[m_currentNumPatchLocations].AllocationIndex = params->uiAllocationIndex;
506 m_patchLocationList[m_currentNumPatchLocations].AllocationOffset = params->uiResourceOffset;
507 m_patchLocationList[m_currentNumPatchLocations].PatchOffset = params->uiPatchOffset;
508 m_patchLocationList[m_currentNumPatchLocations].uiWriteOperation = params->bWrite ? true: false;
509 m_patchLocationList[m_currentNumPatchLocations].cmdBo =
510 params->cmdBuffer != nullptr ? params->cmdBuffer->OsResource.bo : nullptr;
511
512 if (osInterface->osCpInterface &&
513 osInterface->osCpInterface->IsHMEnabled())
514 {
515 if (MOS_STATUS_SUCCESS != osInterface->osCpInterface->RegisterPatchForHM(
516 (uint32_t *)(params->cmdBufBase + params->uiPatchOffset),
517 params->bWrite,
518 params->HwCommandType,
519 params->forceDwordOffset,
520 params->presResource,
521 &m_patchLocationList[m_currentNumPatchLocations]))
522 {
523 MOS_OS_ASSERTMESSAGE("Failed to RegisterPatchForHM.");
524 }
525 }
526
527 m_currentNumPatchLocations++;
528
529 return MOS_STATUS_SUCCESS;
530 }
531
GetCommandBuffer(PMOS_COMMAND_BUFFER comamndBuffer,uint32_t flags)532 MOS_STATUS GpuContextSpecific::GetCommandBuffer(
533 PMOS_COMMAND_BUFFER comamndBuffer,
534 uint32_t flags)
535 {
536 MOS_OS_FUNCTION_ENTER;
537
538 MOS_OS_CHK_NULL_RETURN(comamndBuffer);
539 MOS_OS_CHK_NULL_RETURN(m_cmdBufMgr);
540 MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
541
542 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
543 CommandBuffer* cmdBuf = nullptr;
544
545 uint32_t secondaryIdx = flags;
546 bool isPrimaryCmdBuffer = (secondaryIdx == 0);
547 bool hasSecondaryCmdBuffer = (!isPrimaryCmdBuffer &&
548 (m_secondaryCmdBufs.count(secondaryIdx) != 0));
549
550 bool needToAlloc = ((isPrimaryCmdBuffer && m_cmdBufFlushed) ||
551 (!isPrimaryCmdBuffer && !hasSecondaryCmdBuffer));
552
553 if (needToAlloc)
554 {
555 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
556 if (m_cmdBufPool.size() < MAX_CMD_BUF_NUM)
557 {
558 cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
559 if (cmdBuf == nullptr)
560 {
561 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
562 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
563 return MOS_STATUS_NULL_POINTER;
564 }
565 if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
566 {
567 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
568 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
569 return eStatus;
570 }
571 m_cmdBufPool.push_back(cmdBuf);
572 }
573 else if (m_cmdBufPool.size() == MAX_CMD_BUF_NUM && m_nextFetchIndex < m_cmdBufPool.size())
574 {
575 auto cmdBufOld = m_cmdBufPool[m_nextFetchIndex];
576 auto cmdBufSpecificOld = static_cast<CommandBufferSpecific *>(cmdBufOld);
577 if (cmdBufSpecificOld == nullptr)
578 {
579 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
580 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
581 return MOS_STATUS_NULL_POINTER;
582 }
583 cmdBufSpecificOld->waitReady();
584 cmdBufSpecificOld->UnBindToGpuContext();
585 m_cmdBufMgr->ReleaseCmdBuf(cmdBufOld); // here just return old command buffer to available pool
586
587 //pick up new comamnd buffer
588 cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
589 if (cmdBuf == nullptr)
590 {
591 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
592 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
593 return MOS_STATUS_NULL_POINTER;
594 }
595 if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
596 {
597 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
598 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
599 return eStatus;
600 }
601 m_cmdBufPool[m_nextFetchIndex] = cmdBuf;
602 }
603 else
604 {
605 MOS_OS_ASSERTMESSAGE("Command buffer bool size exceed max.");
606 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
607 return MOS_STATUS_UNKNOWN;
608 }
609 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
610
611 // util now, we got new command buffer from CmdBufMgr, next step to fill in the input command buffer
612 MOS_OS_CHK_STATUS_RETURN(cmdBuf->GetResource()->ConvertToMosResource(&comamndBuffer->OsResource));
613 comamndBuffer->pCmdBase = (uint32_t *)cmdBuf->GetLockAddr();
614 comamndBuffer->pCmdPtr = (uint32_t *)cmdBuf->GetLockAddr();
615 comamndBuffer->iOffset = 0;
616 comamndBuffer->iRemaining = cmdBuf->GetCmdBufSize();
617 comamndBuffer->iCmdIndex = m_nextFetchIndex;
618 comamndBuffer->iVdboxNodeIndex = MOS_VDBOX_NODE_INVALID;
619 comamndBuffer->iVeboxNodeIndex = MOS_VEBOX_NODE_INVALID;
620 comamndBuffer->is1stLvlBB = true;
621 comamndBuffer->Attributes.pAttriVe = nullptr;
622
623 // zero comamnd buffer
624 MOS_ZeroMemory(comamndBuffer->pCmdBase, comamndBuffer->iRemaining);
625 comamndBuffer->iSubmissionType = SUBMISSION_TYPE_SINGLE_PIPE;
626 MOS_ZeroMemory(&comamndBuffer->Attributes,sizeof(comamndBuffer->Attributes));
627
628 if (isPrimaryCmdBuffer)
629 {
630 // update command buffer relared filed in GPU context
631 m_cmdBufFlushed = false;
632
633 // keep a copy in GPU context
634 MOS_SecureMemcpy(m_commandBuffer, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
635 }
636 else
637 {
638 PMOS_COMMAND_BUFFER tempCmdBuf = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
639 MOS_OS_CHK_NULL_RETURN(tempCmdBuf);
640 m_secondaryCmdBufs[secondaryIdx] = tempCmdBuf;
641 MOS_SecureMemcpy(tempCmdBuf, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
642 }
643
644 // Command buffers are treated as cyclical buffers, the CB after the just submitted one
645 // has the minimal fence value that we should wait
646 m_nextFetchIndex++;
647 if (m_nextFetchIndex >= MAX_CMD_BUF_NUM)
648 {
649 m_nextFetchIndex = 0;
650 }
651 }
652 else
653 {
654 // current command buffer still active, directly copy to comamndBuffer
655 if (isPrimaryCmdBuffer)
656 {
657 MOS_SecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_commandBuffer, sizeof(MOS_COMMAND_BUFFER));
658 }
659 else
660 {
661 MOS_SecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER));
662 }
663 }
664
665 if (isPrimaryCmdBuffer)
666 {
667 MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_commandBuffer->OsResource, false));
668 }
669 else
670 {
671 MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_secondaryCmdBufs[secondaryIdx]->OsResource, false));
672 }
673
674 return MOS_STATUS_SUCCESS;
675 }
676
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t flags)677 void GpuContextSpecific::ReturnCommandBuffer(
678 PMOS_COMMAND_BUFFER cmdBuffer,
679 uint32_t flags)
680 {
681 MOS_OS_FUNCTION_ENTER;
682
683 MOS_OS_ASSERT(cmdBuffer);
684 MOS_OS_ASSERT(m_commandBuffer);
685
686 bool isPrimaryCmdBuf = (flags == 0);
687
688 if (isPrimaryCmdBuf)
689 {
690 m_commandBuffer->iOffset = cmdBuffer->iOffset;
691 m_commandBuffer->iRemaining = cmdBuffer->iRemaining;
692 m_commandBuffer->pCmdPtr = cmdBuffer->pCmdPtr;
693 m_commandBuffer->iVdboxNodeIndex = cmdBuffer->iVdboxNodeIndex;
694 m_commandBuffer->iVeboxNodeIndex = cmdBuffer->iVeboxNodeIndex;
695 }
696 else
697 {
698 uint32_t secondaryIdx = flags;
699 MOS_OS_ASSERT(m_secondaryCmdBufs.count(secondaryIdx));
700
701 MOS_SecureMemcpy(m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER), cmdBuffer, sizeof(MOS_COMMAND_BUFFER));
702 }
703 }
704
ResetCommandBuffer()705 MOS_STATUS GpuContextSpecific::ResetCommandBuffer()
706 {
707 m_cmdBufFlushed = true;
708 auto it = m_secondaryCmdBufs.begin();
709 while(it != m_secondaryCmdBufs.end())
710 {
711 MOS_FreeMemory(it->second);
712 it++;
713 }
714 m_secondaryCmdBufs.clear();
715 return MOS_STATUS_SUCCESS;
716 }
717
SetIndirectStateSize(const uint32_t size)718 MOS_STATUS GpuContextSpecific::SetIndirectStateSize(const uint32_t size)
719 {
720 if(size < m_commandBufferSize)
721 {
722 m_IndirectHeapSize = size;
723 return MOS_STATUS_SUCCESS;
724 }
725 else
726 {
727 MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!");
728 return MOS_STATUS_UNKNOWN;
729 }
730 }
731
GetIndirectState(uint32_t * offset,uint32_t * size)732 MOS_STATUS GpuContextSpecific::GetIndirectState(
733 uint32_t *offset,
734 uint32_t *size)
735 {
736 MOS_OS_FUNCTION_ENTER;
737
738 if (offset)
739 {
740 *offset = m_commandBufferSize - m_IndirectHeapSize;
741 }
742
743 if (size)
744 {
745 *size = m_IndirectHeapSize;
746 }
747
748 return MOS_STATUS_SUCCESS;
749 }
750
GetIndirectStatePointer(uint8_t ** indirectState)751 MOS_STATUS GpuContextSpecific::GetIndirectStatePointer(
752 uint8_t **indirectState)
753 {
754 MOS_OS_FUNCTION_ENTER;
755
756 MOS_OS_CHK_NULL_RETURN(indirectState);
757
758 *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize;
759
760 return MOS_STATUS_SUCCESS;
761 }
762
ResizeCommandBufferAndPatchList(uint32_t requestedCommandBufferSize,uint32_t requestedPatchListSize,uint32_t flags)763 MOS_STATUS GpuContextSpecific::ResizeCommandBufferAndPatchList(
764 uint32_t requestedCommandBufferSize,
765 uint32_t requestedPatchListSize,
766 uint32_t flags)
767 {
768 MOS_OS_FUNCTION_ENTER;
769
770 // m_commandBufferSize is used for allocate command buffer and submit command buffer, in this moment, command buffer has not allocated yet.
771 // Linux KMD requires command buffer size align to 8 bytes, or it will not execute the commands.
772 m_commandBufferSize = MOS_ALIGN_CEIL(requestedCommandBufferSize, 8);
773
774 if (requestedPatchListSize > m_maxPatchLocationsize)
775 {
776 PPATCHLOCATIONLIST newPatchList = (PPATCHLOCATIONLIST)realloc(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * requestedPatchListSize);
777 MOS_OS_CHK_NULL_RETURN(newPatchList);
778
779 m_patchLocationList = newPatchList;
780
781 // now zero the extended portion
782 MOS_ZeroMemory((m_patchLocationList + m_maxPatchLocationsize), sizeof(PATCHLOCATIONLIST) * (requestedPatchListSize - m_maxPatchLocationsize));
783 m_maxPatchLocationsize = requestedPatchListSize;
784 }
785
786 return MOS_STATUS_SUCCESS;
787 }
788
ResizeCommandBuffer(uint32_t requestedSize)789 MOS_STATUS GpuContextSpecific::ResizeCommandBuffer(uint32_t requestedSize)
790 {
791 MOS_OS_FUNCTION_ENTER;
792
793 m_commandBufferSize = requestedSize;
794
795 return MOS_STATUS_SUCCESS;
796 }
797
GetVcsExecFlag(PMOS_INTERFACE osInterface,PMOS_COMMAND_BUFFER cmdBuffer,MOS_GPU_NODE gpuNode)798 uint32_t GetVcsExecFlag(PMOS_INTERFACE osInterface,
799 PMOS_COMMAND_BUFFER cmdBuffer,
800 MOS_GPU_NODE gpuNode)
801 {
802 if (osInterface == 0 ||
803 cmdBuffer == 0)
804 {
805 MOS_OS_ASSERTMESSAGE("Input invalid(null) parameter.");
806 return I915_EXEC_DEFAULT;
807 }
808
809 uint32_t vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
810
811 if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
812 {
813 // That's those case when BB did not have any VDBOX# specific commands.
814 // Thus, we need to select VDBOX# here. Alternatively we can rely on KMD
815 // to make balancing for us, i.e. rely on Virtual Engine support.
816 cmdBuffer->iVdboxNodeIndex = osInterface->pfnGetVdboxNodeId(osInterface, cmdBuffer);
817 if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
818 {
819 cmdBuffer->iVdboxNodeIndex = (gpuNode == MOS_GPU_NODE_VIDEO)?
820 MOS_VDBOX_NODE_1: MOS_VDBOX_NODE_2;
821 }
822 }
823
824 if (MOS_VDBOX_NODE_1 == cmdBuffer->iVdboxNodeIndex)
825 {
826 vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
827 }
828 else if (MOS_VDBOX_NODE_2 == cmdBuffer->iVdboxNodeIndex)
829 {
830 vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
831 }
832
833 return vcsExecFlag;
834 }
835
MapResourcesToAuxTable(mos_linux_bo * cmd_bo)836 MOS_STATUS GpuContextSpecific::MapResourcesToAuxTable(mos_linux_bo *cmd_bo)
837 {
838 MOS_OS_CHK_NULL_RETURN(cmd_bo);
839
840 OsContextSpecific *osCtx = static_cast<OsContextSpecific*>(m_osContext);
841 MOS_OS_CHK_NULL_RETURN(osCtx);
842
843 AuxTableMgr *auxTableMgr = osCtx->GetAuxTableMgr();
844 if (auxTableMgr)
845 {
846 // Map compress allocations to aux table if it is not mapped.
847 for (uint32_t i = 0; i < m_numAllocations; i++)
848 {
849 auto res = (PMOS_RESOURCE)m_allocationList[i].hAllocation;
850 MOS_OS_CHK_NULL_RETURN(res);
851 MOS_OS_CHK_STATUS_RETURN(auxTableMgr->MapResource(res->pGmmResInfo, res->bo));
852 }
853 MOS_OS_CHK_STATUS_RETURN(auxTableMgr->EmitAuxTableBOList(cmd_bo));
854 }
855 return MOS_STATUS_SUCCESS;
856 }
857
SubmitCommandBuffer(PMOS_INTERFACE osInterface,PMOS_COMMAND_BUFFER cmdBuffer,bool nullRendering)858 MOS_STATUS GpuContextSpecific::SubmitCommandBuffer(
859 PMOS_INTERFACE osInterface,
860 PMOS_COMMAND_BUFFER cmdBuffer,
861 bool nullRendering)
862 {
863 MOS_OS_FUNCTION_ENTER;
864
865 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_START, nullptr, 0, nullptr, 0);
866
867 MOS_OS_CHK_NULL_RETURN(osInterface);
868 PMOS_CONTEXT osContext = osInterface->pOsContext;
869 MOS_OS_CHK_NULL_RETURN(osContext);
870 MOS_OS_CHK_NULL_RETURN(cmdBuffer);
871 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
872
873 MOS_GPU_NODE gpuNode = OSKMGetGpuNode(m_gpuContext);
874 uint32_t execFlag = gpuNode;
875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
876 int32_t ret = 0;
877 bool scalaEnabled = false;
878 auto it = m_secondaryCmdBufs.begin();
879
880 // Command buffer object DRM pointer
881 m_cmdBufFlushed = true;
882 auto cmd_bo = cmdBuffer->OsResource.bo;
883
884 // Map Resource to Aux if needed
885 MapResourcesToAuxTable(cmd_bo);
886 for(auto it : m_secondaryCmdBufs)
887 {
888 MapResourcesToAuxTable(it.second->OsResource.bo);
889 }
890
891 if (m_secondaryCmdBufs.size() >= 2)
892 {
893 scalaEnabled = true;
894 cmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_MASTER;
895 }
896
897 std::vector<PMOS_RESOURCE> mappedResList;
898 std::vector<MOS_LINUX_BO *> skipSyncBoList;
899
900 // Now, the patching will be done, based on the patch list.
901 for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++)
902 {
903 auto currentPatch = &m_patchLocationList[patchIndex];
904 MOS_OS_CHK_NULL_RETURN(currentPatch);
905
906 auto tempCmdBo = currentPatch->cmdBo == nullptr ? cmd_bo : currentPatch->cmdBo;
907
908 // Following are for Nested BB buffer, if it's nested BB, we need to ensure it's locked.
909 if (tempCmdBo != cmd_bo)
910 {
911 bool isSecondaryCmdBuf = false;
912 it = m_secondaryCmdBufs.begin();
913 while(it != m_secondaryCmdBufs.end())
914 {
915 if (it->second->OsResource.bo == tempCmdBo)
916 {
917 isSecondaryCmdBuf = true;
918 break;
919 }
920 it++;
921 }
922
923 for(auto allocIdx = 0; allocIdx < m_numAllocations && (!isSecondaryCmdBuf); allocIdx++)
924 {
925 auto tempRes = (PMOS_RESOURCE)m_allocationList[allocIdx].hAllocation;
926 if (tempCmdBo == tempRes->bo)
927 {
928 GraphicsResource::LockParams param;
929 param.m_writeRequest = true;
930 tempRes->pGfxResource->Lock(m_osContext, param);
931 mappedResList.push_back(tempRes);
932 break;
933 }
934 }
935 }
936
937 // This is the resource for which patching will be done
938 auto resource = (PMOS_RESOURCE)m_allocationList[currentPatch->AllocationIndex].hAllocation;
939 MOS_OS_CHK_NULL_RETURN(resource);
940
941 // For now, we'll assume the system memory's DRM bo pointer
942 // is NULL. If nullptr is detected, then the resource has been
943 // placed inside the command buffer's indirect state area.
944 // We'll simply set alloc_bo to the command buffer's bo pointer.
945 MOS_OS_ASSERT(resource->bo);
946
947 auto alloc_bo = (resource->bo) ? resource->bo : tempCmdBo;
948
949 MOS_OS_CHK_STATUS_RETURN(osInterface->osCpInterface->PermeatePatchForHM(
950 tempCmdBo->virt,
951 currentPatch,
952 resource));
953
954 uint64_t boOffset = alloc_bo->offset64;
955 if (alloc_bo != tempCmdBo)
956 {
957 auto item_ctx = osContext->contextOffsetList.begin();
958 for (; item_ctx != osContext->contextOffsetList.end(); item_ctx++)
959 {
960 if (item_ctx->intel_context == osContext->intel_context && item_ctx->target_bo == alloc_bo)
961 {
962 boOffset = item_ctx->offset64;
963 break;
964 }
965 }
966 }
967
968 if (osContext->bUse64BitRelocs)
969 {
970 *((uint64_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
971 boOffset + currentPatch->AllocationOffset;
972 }
973 else
974 {
975 *((uint32_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
976 boOffset + currentPatch->AllocationOffset;
977 }
978
979 if (scalaEnabled)
980 {
981 it = m_secondaryCmdBufs.begin();
982 while(it != m_secondaryCmdBufs.end())
983 {
984 if (it->second->OsResource.bo == tempCmdBo &&
985 it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
986 !mos_gem_bo_is_exec_object_async(alloc_bo))
987 {
988 skipSyncBoList.push_back(alloc_bo);
989 break;
990 }
991 it++;
992 }
993 }
994 else if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
995 !mos_gem_bo_is_exec_object_async(alloc_bo))
996 {
997 skipSyncBoList.push_back(alloc_bo);
998 }
999
1000 #if (_DEBUG || _RELEASE_INTERNAL)
1001 {
1002 uint32_t evtData[] = {alloc_bo->handle, currentPatch->uiWriteOperation, currentPatch->AllocationOffset};
1003 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_INFO,
1004 evtData, sizeof(evtData),
1005 &boOffset, sizeof(boOffset));
1006 }
1007 #endif
1008 if(mos_gem_bo_is_softpin(alloc_bo))
1009 {
1010 if (alloc_bo != tempCmdBo)
1011 {
1012 ret = mos_bo_add_softpin_target(tempCmdBo, alloc_bo, currentPatch->uiWriteOperation);
1013 }
1014 }
1015 else
1016 {
1017 // This call will patch the command buffer with the offsets of the indirect state region of the command buffer
1018 ret = mos_bo_emit_reloc2(
1019 tempCmdBo, // Command buffer
1020 currentPatch->PatchOffset, // Offset in the command buffer
1021 alloc_bo, // Allocation object for which the patch will be made.
1022 currentPatch->AllocationOffset, // Offset to the indirect state
1023 I915_GEM_DOMAIN_RENDER, // Read domain
1024 (currentPatch->uiWriteOperation) ? I915_GEM_DOMAIN_RENDER : 0x0, // Write domain
1025 boOffset);
1026 }
1027
1028 if (ret != 0)
1029 {
1030 MOS_OS_ASSERTMESSAGE("Error patching alloc_bo = 0x%x, cmd_bo = 0x%x.",
1031 (uintptr_t)alloc_bo,
1032 (uintptr_t)tempCmdBo);
1033 return MOS_STATUS_UNKNOWN;
1034 }
1035 }
1036
1037 for(auto res: mappedResList)
1038 {
1039 res->pGfxResource->Unlock(m_osContext);
1040 }
1041 mappedResList.clear();
1042
1043 if (scalaEnabled)
1044 {
1045 it = m_secondaryCmdBufs.begin();
1046 while(it != m_secondaryCmdBufs.end())
1047 {
1048 //Add Batch buffer End Command
1049 uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1050 if (MOS_FAILED(Mos_AddCommand(
1051 it->second,
1052 &batchBufferEndCmd,
1053 sizeof(uint32_t))))
1054 {
1055 MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1056 return MOS_STATUS_UNKNOWN;
1057 }
1058 it++;
1059 }
1060 }
1061 else
1062 {
1063 //Add Batch buffer End Command
1064 uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1065 if (MOS_FAILED(Mos_AddCommand(
1066 cmdBuffer,
1067 &batchBufferEndCmd,
1068 sizeof(uint32_t))))
1069 {
1070 MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1071 return MOS_STATUS_UNKNOWN;
1072 }
1073 }
1074
1075 // Now, we can unmap the video command buffer, since we don't need CPU access anymore.
1076 MOS_OS_CHK_NULL_RETURN(cmdBuffer->OsResource.pGfxResource);
1077 cmdBuffer->OsResource.pGfxResource->Unlock(m_osContext);
1078
1079 it = m_secondaryCmdBufs.begin();
1080 while(it != m_secondaryCmdBufs.end())
1081 {
1082 MOS_OS_CHK_NULL_RETURN(it->second->OsResource.pGfxResource);
1083 it->second->OsResource.pGfxResource->Unlock(m_osContext);
1084
1085 it++;
1086 }
1087
1088 int32_t perfData;
1089 if (osContext->pPerfData != nullptr)
1090 {
1091 perfData = *(int32_t *)(osContext->pPerfData);
1092 }
1093 else
1094 {
1095 perfData = 0;
1096 }
1097
1098 drm_clip_rect_t *cliprects = nullptr;
1099 int32_t num_cliprects = 0;
1100 int32_t DR4 = osContext->uEnablePerfTag ? perfData : 0;
1101
1102 //Since CB2 command is not supported, remove it and set cliprects to nullprt as default.
1103 if ((gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2) &&
1104 (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK))
1105 {
1106 if (osContext->bKMDHasVCS2)
1107 {
1108 if (osContext->bPerCmdBufferBalancing && osInterface->pfnGetVdboxNodeId)
1109 {
1110 execFlag = GetVcsExecFlag(osInterface, cmdBuffer, gpuNode);
1111 }
1112 else if (gpuNode == MOS_GPU_NODE_VIDEO)
1113 {
1114 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1115 }
1116 else if (gpuNode == MOS_GPU_NODE_VIDEO2)
1117 {
1118 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
1119 }
1120 else if ((gpuNode == MOS_GPU_NODE_BLT))
1121 {
1122 execFlag = I915_EXEC_BLT;
1123 }
1124 else
1125 {
1126 MOS_OS_ASSERTMESSAGE("Invalid gpuNode.");
1127 }
1128 }
1129 else
1130 {
1131 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1132 }
1133 }
1134
1135 #if (_DEBUG || _RELEASE_INTERNAL)
1136
1137 MOS_LINUX_BO *bad_cmd_bo = nullptr;
1138 MOS_LINUX_BO *nop_cmd_bo = nullptr;
1139 uint32_t dwComponentTag = 0;
1140 uint32_t dwCallType = 0;
1141
1142 //dwComponentTag 3: decode,5: vpp,6: encode
1143 //dwCallType 8: PAK(CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE)
1144 // 34: PREENC
1145 // 5: VPP
1146 dwComponentTag = (perfData & 0xF000) >> 12;
1147 dwCallType = (perfData & 0xFC) >> 2;
1148
1149 if (osInterface->bTriggerCodecHang &&
1150 (dwComponentTag == 3 || (dwComponentTag == 6 && dwCallType == 8) ||
1151 (dwComponentTag == 6 && dwCallType == 34) ||
1152 (dwComponentTag == 5 && dwCallType == 5)))
1153 {
1154 bad_cmd_bo = Mos_GetBadCommandBuffer_Linux(osInterface);
1155 if (bad_cmd_bo)
1156 {
1157 ret = mos_bo_mrb_exec(bad_cmd_bo,
1158 4096,
1159 nullptr,
1160 0,
1161 0,
1162 execFlag);
1163 }
1164 else
1165 {
1166 MOS_OS_ASSERTMESSAGE("Mos_GetBadCommandBuffer_Linux failed!");
1167 }
1168 }
1169 else if (osInterface->bTriggerVPHang == true)
1170 {
1171 bad_cmd_bo = Mos_GetBadCommandBuffer_Linux(osInterface);
1172
1173 if (bad_cmd_bo)
1174 {
1175 ret = mos_bo_mrb_exec(bad_cmd_bo,
1176 4096,
1177 nullptr,
1178 0,
1179 0,
1180 execFlag);
1181 }
1182 else
1183 {
1184 MOS_OS_ASSERTMESSAGE("Mos_GetBadCommandBuffer_Linux failed!");
1185 }
1186
1187 osInterface->bTriggerVPHang = false;
1188 }
1189
1190 nop_cmd_bo = nullptr;
1191 if (nullRendering == true)
1192 {
1193 nop_cmd_bo = Mos_GetNopCommandBuffer_Linux(osInterface);
1194
1195 if (nop_cmd_bo)
1196 {
1197 ret = mos_bo_mrb_exec(nop_cmd_bo,
1198 4096,
1199 nullptr,
1200 0,
1201 0,
1202 execFlag);
1203 }
1204 else
1205 {
1206 MOS_OS_ASSERTMESSAGE("Mos_GetNopCommandBuffer_Linux failed!");
1207 }
1208 }
1209
1210 #endif //(_DEBUG || _RELEASE_INTERNAL)
1211
1212 if (gpuNode != I915_EXEC_RENDER &&
1213 osInterface->osCpInterface->IsTearDownHappen())
1214 {
1215 // skip PAK command when CP tear down happen to avoid of GPU hang
1216 // conditonal batch buffer start PoC is in progress
1217 }
1218 else if (nullRendering == false)
1219 {
1220 if (osInterface->ctxBasedScheduling && m_i915Context[0] != nullptr)
1221 {
1222 if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASK)
1223 {
1224 if (scalaEnabled && !osInterface->bGucSubmission)
1225 {
1226 uint32_t secondaryIndex = 0;
1227 it = m_secondaryCmdBufs.begin();
1228 while(it != m_secondaryCmdBufs.end())
1229 {
1230 if (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1231 {
1232 if(execFlag == MOS_GPU_NODE_VE)
1233 {
1234 // decode excluded since init in other place
1235 it->second->iSubmissionType |= (secondaryIndex << SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT);
1236 secondaryIndex++;
1237 }
1238 }
1239
1240 ret = SubmitPipeCommands(it->second,
1241 it->second->OsResource.bo,
1242 osContext,
1243 skipSyncBoList,
1244 execFlag,
1245 DR4);
1246 it++;
1247 }
1248 }
1249 else if(scalaEnabled && osInterface->bGucSubmission)
1250 {
1251 ret = ParallelSubmitCommands(m_secondaryCmdBufs,
1252 osContext,
1253 execFlag,
1254 DR4);
1255 }
1256 else
1257 {
1258 ret = SubmitPipeCommands(cmdBuffer,
1259 cmd_bo,
1260 osContext,
1261 skipSyncBoList,
1262 execFlag,
1263 DR4);
1264 }
1265 }
1266 else
1267 {
1268 ret = mos_gem_bo_context_exec2(cmd_bo,
1269 m_commandBufferSize,
1270 m_i915Context[0],
1271 cliprects,
1272 num_cliprects,
1273 DR4,
1274 m_i915ExecFlag,
1275 nullptr);
1276 }
1277 }
1278 else
1279 {
1280 ret = mos_gem_bo_context_exec2(cmd_bo,
1281 m_commandBufferSize,
1282 osContext->intel_context,
1283 cliprects,
1284 num_cliprects,
1285 DR4,
1286 execFlag,
1287 nullptr);
1288 }
1289 if (ret != 0)
1290 {
1291 eStatus = MOS_STATUS_UNKNOWN;
1292 }
1293 }
1294
1295 if (eStatus != MOS_STATUS_SUCCESS)
1296 {
1297 MOS_OS_ASSERTMESSAGE("Command buffer submission failed!");
1298 }
1299
1300 MOS_DEVULT_FuncCall(pfnUltGetCmdBuf, cmdBuffer);
1301
1302 #if MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1303 pthread_mutex_lock(&command_dump_mutex);
1304 if (osInterface->bDumpCommandBuffer)
1305 {
1306 if (scalaEnabled)
1307 {
1308 it = m_secondaryCmdBufs.begin();
1309 while(it != m_secondaryCmdBufs.end())
1310 {
1311 mos_bo_map(it->second->OsResource.bo, 0);
1312 osInterface->pfnDumpCommandBuffer(osInterface, it->second);
1313 mos_bo_unmap(it->second->OsResource.bo);
1314 it++;
1315 }
1316 }
1317 else
1318 {
1319 mos_bo_map(cmd_bo, 0);
1320 osInterface->pfnDumpCommandBuffer(osInterface, cmdBuffer);
1321 mos_bo_unmap(cmd_bo);
1322 }
1323 }
1324 pthread_mutex_unlock(&command_dump_mutex);
1325 #endif // MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1326
1327 #if (_DEBUG || _RELEASE_INTERNAL)
1328 if (bad_cmd_bo)
1329 {
1330 mos_bo_wait_rendering(bad_cmd_bo);
1331 mos_bo_unreference(bad_cmd_bo);
1332 }
1333 if (nop_cmd_bo)
1334 {
1335 mos_bo_unreference(nop_cmd_bo);
1336 }
1337 #endif //(_DEBUG || _RELEASE_INTERNAL)
1338
1339 //clear command buffer relocations to fix memory leak issue
1340 mos_gem_bo_clear_relocs(cmd_bo, 0);
1341 it = m_secondaryCmdBufs.begin();
1342 while(it != m_secondaryCmdBufs.end())
1343 {
1344 mos_gem_bo_clear_relocs(it->second->OsResource.bo, 0);
1345 MOS_FreeMemory(it->second);
1346 it++;
1347 }
1348 m_secondaryCmdBufs.clear();
1349 skipSyncBoList.clear();
1350
1351 // Reset resource allocation
1352 m_numAllocations = 0;
1353 MOS_ZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * m_maxNumAllocations);
1354 m_currentNumPatchLocations = 0;
1355 MOS_ZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * m_maxNumAllocations);
1356 m_resCount = 0;
1357
1358 MOS_ZeroMemory(m_writeModeList, sizeof(bool) * m_maxNumAllocations);
1359 finish:
1360 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_END, &eStatus, sizeof(eStatus), nullptr, 0);
1361 return eStatus;
1362 }
1363
SubmitPipeCommands(MOS_COMMAND_BUFFER * cmdBuffer,MOS_LINUX_BO * cmdBo,PMOS_CONTEXT osContext,const std::vector<MOS_LINUX_BO * > & skipSyncBoList,uint32_t execFlag,int32_t dr4)1364 int32_t GpuContextSpecific::SubmitPipeCommands(
1365 MOS_COMMAND_BUFFER *cmdBuffer,
1366 MOS_LINUX_BO *cmdBo,
1367 PMOS_CONTEXT osContext,
1368 const std::vector<MOS_LINUX_BO *> &skipSyncBoList,
1369 uint32_t execFlag,
1370 int32_t dr4)
1371 {
1372 int32_t ret = 0;
1373 int fence = -1;
1374 unsigned int fenceFlag = 0;
1375
1376 MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1377 bool isVeboxSubmission = false;
1378
1379 if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1380 {
1381 execFlag = I915_EXEC_DEFAULT;
1382 }
1383 if (execFlag == MOS_GPU_NODE_VE)
1384 {
1385 execFlag = I915_EXEC_DEFAULT;
1386 isVeboxSubmission = true;
1387 }
1388
1389 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1390 {
1391 fence = osContext->submit_fence;
1392 fenceFlag = I915_EXEC_FENCE_SUBMIT;
1393 int slaveIndex = (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_MASK) >> SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT;
1394 if(slaveIndex < 7)
1395 {
1396 queue = m_i915Context[2 + slaveIndex]; //0 is for single pipe, 1 is for master, slave starts from 2
1397 }
1398 else
1399 {
1400 MOS_OS_ASSERTMESSAGE("slaveIndex value: %s is invalid!", slaveIndex);
1401 return -1;
1402 }
1403
1404 if (isVeboxSubmission)
1405 {
1406 queue = m_i915Context[cmdBuffer->iVeboxNodeIndex + 1];
1407 }
1408
1409 for(auto bo: skipSyncBoList)
1410 {
1411 mos_bo_set_exec_object_async(cmdBo, bo);
1412 }
1413 }
1414
1415 //Keep FE and BE0 running on same engine for VT decode
1416 if((cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE)
1417 || (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER))
1418 {
1419 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1420 {
1421 //Only master pipe needs fence out flag
1422 fenceFlag = I915_EXEC_FENCE_OUT;
1423 }
1424 queue = m_i915Context[1];
1425 }
1426
1427 ret = mos_gem_bo_context_exec2(cmdBo,
1428 cmdBo->size,
1429 queue,
1430 nullptr,
1431 0,
1432 dr4,
1433 execFlag | fenceFlag,
1434 &fence);
1435
1436 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1437 {
1438 osContext->submit_fence = fence;
1439 }
1440
1441 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1442 {
1443 close(fence);
1444 }
1445
1446 return ret;
1447 }
1448
ParallelSubmitCommands(std::map<uint32_t,PMOS_COMMAND_BUFFER> secondaryCmdBufs,PMOS_CONTEXT osContext,uint32_t execFlag,int32_t dr4)1449 int32_t GpuContextSpecific::ParallelSubmitCommands(
1450 std::map<uint32_t, PMOS_COMMAND_BUFFER> secondaryCmdBufs,
1451 PMOS_CONTEXT osContext,
1452 uint32_t execFlag,
1453 int32_t dr4)
1454 {
1455 int32_t ret = 0;
1456 int fence = -1;
1457 unsigned int fenceFlag = 0;
1458 auto it = m_secondaryCmdBufs.begin();
1459 MOS_LINUX_BO *cmdBos[MAX_PARALLEN_CMD_BO_NUM];
1460 int numBos = 0; // exclude FE bo
1461
1462 MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1463 bool isVeboxSubmission = false;
1464
1465 if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1466 {
1467 execFlag = I915_EXEC_DEFAULT;
1468 }
1469 if (execFlag == MOS_GPU_NODE_VE)
1470 {
1471 execFlag = I915_EXEC_DEFAULT;
1472 isVeboxSubmission = true;
1473 }
1474
1475 while(it != m_secondaryCmdBufs.end())
1476 {
1477 if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE)
1478 {
1479 fenceFlag = I915_EXEC_FENCE_OUT;
1480 queue = m_i915Context[0];
1481
1482 ret = mos_gem_bo_context_exec2(it->second->OsResource.bo,
1483 it->second->OsResource.bo->size,
1484 queue,
1485 nullptr,
1486 0,
1487 dr4,
1488 execFlag | fenceFlag,
1489 &fence);
1490
1491 osContext->submit_fence = fence;
1492 }
1493
1494 if((it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1495 || (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE))
1496 {
1497 cmdBos[numBos++] = it->second->OsResource.bo;
1498
1499 if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1500 {
1501 queue = m_i915Context[numBos - 1];
1502 MOS_OS_CHK_NULL_RETURN(queue);
1503 if(-1 != fence)
1504 {
1505 fenceFlag = I915_EXEC_FENCE_IN;
1506 }
1507
1508 ret = mos_gem_bo_context_exec3(cmdBos,
1509 numBos,
1510 queue,
1511 nullptr,
1512 0,
1513 dr4,
1514 execFlag | fenceFlag,
1515 &fence);
1516
1517 for(int i = 0; i < numBos; i++)
1518 {
1519 cmdBos[i] = nullptr;
1520 }
1521 numBos = 0;
1522
1523 if(-1 != fence)
1524 {
1525 close(fence);
1526 }
1527 }
1528 }
1529
1530 it++;
1531 }
1532
1533 return ret;
1534 }
1535
IncrementGpuStatusTag()1536 void GpuContextSpecific::IncrementGpuStatusTag()
1537 {
1538 m_GPUStatusTag = m_GPUStatusTag % UINT_MAX + 1;
1539 if (m_GPUStatusTag == 0)
1540 {
1541 m_GPUStatusTag = 1;
1542 }
1543 }
1544
ResetGpuContextStatus()1545 void GpuContextSpecific::ResetGpuContextStatus()
1546 {
1547 MOS_ZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
1548 m_numAllocations = 0;
1549 MOS_ZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
1550 m_currentNumPatchLocations = 0;
1551
1552 MOS_ZeroMemory(m_attachedResources, sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
1553 m_resCount = 0;
1554
1555 MOS_ZeroMemory(m_writeModeList, sizeof(bool) * ALLOCATIONLIST_SIZE);
1556
1557 if ((m_cmdBufFlushed == true) && m_commandBuffer->OsResource.bo)
1558 {
1559 m_commandBuffer->OsResource.bo = nullptr;
1560 }
1561 }
1562
AllocateGPUStatusBuf()1563 MOS_STATUS GpuContextSpecific::AllocateGPUStatusBuf()
1564 {
1565 MOS_OS_FUNCTION_ENTER;
1566
1567 m_statusBufferMosResource = (MOS_RESOURCE_HANDLE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE));
1568 MOS_OS_CHK_NULL_RETURN(m_statusBufferMosResource);
1569
1570 GraphicsResource::CreateParams params;
1571 params.m_tileType = MOS_TILE_LINEAR;
1572 params.m_type = MOS_GFXRES_BUFFER;
1573 params.m_format = Format_Buffer;
1574 params.m_width = sizeof(MOS_GPU_STATUS_DATA);
1575 params.m_height = 1;
1576 params.m_depth = 1;
1577 params.m_arraySize = 1;
1578 params.m_name = "GPU Status Buffer";
1579
1580 GraphicsResource *graphicsResource = GraphicsResource::CreateGraphicResource(GraphicsResource::osSpecificResource);
1581 MOS_OS_CHK_NULL_RETURN(graphicsResource);
1582
1583 MOS_OS_CHK_STATUS_RETURN(graphicsResource->Allocate(m_osContext, params));
1584
1585 GraphicsResource::LockParams lockParams;
1586 lockParams.m_writeRequest = true;
1587 auto gpuStatusData = (MOS_GPU_STATUS_DATA *)graphicsResource->Lock(m_osContext, lockParams);
1588 if (gpuStatusData == nullptr)
1589 {
1590 MOS_OS_ASSERTMESSAGE("Unable to lock gpu eStatus buffer for read.");
1591 graphicsResource->Free(m_osContext);
1592 MOS_Delete(graphicsResource);
1593 return MOS_STATUS_UNKNOWN;
1594 }
1595
1596 m_statusBufferResource = graphicsResource;
1597 return MOS_STATUS_SUCCESS;
1598 }
1599
1600 #if (_DEBUG || _RELEASE_INTERNAL)
SelectEngineInstanceByUser(struct i915_engine_class_instance * engineMap,uint32_t * engineNum,uint32_t userEngineInstance,MOS_GPU_NODE gpuNode)1601 bool GpuContextSpecific::SelectEngineInstanceByUser(struct i915_engine_class_instance *engineMap,
1602 uint32_t *engineNum, uint32_t userEngineInstance, MOS_GPU_NODE gpuNode)
1603 {
1604 uint32_t engineInstance = 0x0;
1605
1606 if(gpuNode == MOS_GPU_NODE_COMPUTE)
1607 {
1608 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_COMPUTE_INSTANCE_SHIFT)
1609 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
1610 }
1611 else if(gpuNode == MOS_GPU_NODE_VE)
1612 {
1613 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VEBOX_INSTANCE_SHIFT)
1614 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
1615 }
1616 else if(gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2)
1617 {
1618 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VDBOX_INSTANCE_SHIFT)
1619 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
1620 }
1621 else
1622 {
1623 MOS_OS_NORMALMESSAGE("Invalid gpu node in use.");
1624 }
1625
1626 if(engineInstance)
1627 {
1628 auto unSelectIndex = 0;
1629 for(auto bit = 0; bit < *engineNum; bit++)
1630 {
1631 if(((engineInstance >> bit) & 0x1) && (bit > unSelectIndex))
1632 {
1633 engineMap[unSelectIndex].engine_class = engineMap[bit].engine_class;
1634 engineMap[unSelectIndex].engine_instance = engineMap[bit].engine_instance;
1635 engineMap[bit].engine_class = 0;
1636 engineMap[bit].engine_instance = 0;
1637 unSelectIndex++;
1638 }
1639 else if(((engineInstance >> bit) & 0x1) && (bit == unSelectIndex))
1640 {
1641 unSelectIndex++;
1642 }
1643 else if(!((engineInstance >> bit) & 0x1))
1644 {
1645 engineMap[bit].engine_class = 0;
1646 engineMap[bit].engine_instance = 0;
1647 }
1648 }
1649 *engineNum = unSelectIndex;
1650 }
1651 return engineInstance;
1652 }
1653 #endif
1654