1 /*
2 * Copyright (c) 2013-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_memdecomp.cpp
24 //! \brief This module sets up a kernel for media memory decompression.
25
26 #include "codechal_memdecomp.h"
27 #include "codeckrnheader.h"
28
29 //!
30 //! \class MediaObjectCopyCurbe
31 //! \brief Media object memory decompress copy knernel curbe.
32 //! Note: Cube data DW0-6 must be defined at the begining of the class.
33 //!
34 class MediaObjectCopyCurbe
35 {
36 public:
37 // DW 0
38 union
39 {
40 struct
41 {
42 uint32_t srcSurface0Index;
43 };
44 struct
45 {
46 uint32_t value;
47 };
48 } m_dw0;
49
50 // DW 1
51 union
52 {
53 struct
54 {
55 uint32_t srcSurface1Index;
56 };
57 struct
58 {
59 uint32_t value;
60 };
61 } m_dw1;
62
63 // DW 2
64 union
65 {
66 struct
67 {
68 uint32_t srcSurface2Index;
69 };
70 struct
71 {
72 uint32_t value;
73 };
74 } m_dw2;
75
76 // DW 3
77 union
78 {
79 struct
80 {
81 uint32_t dstSurface0Index;
82 };
83 struct
84 {
85 uint32_t value;
86 };
87 } m_dw3;
88
89 // DW 4
90 union
91 {
92 struct
93 {
94 uint32_t dstSurface1Index;
95 };
96 struct
97 {
98 uint32_t value;
99 };
100 } m_dw4;
101
102 // DW 5
103 union
104 {
105 struct
106 {
107 uint32_t dstSurface2Index;
108 };
109 struct
110 {
111 uint32_t value;
112 };
113 } m_dw5;
114
115 // DW 6
116 union
117 {
118 struct
119 {
120 uint32_t surfaceWidth;
121 };
122 struct
123 {
124 uint32_t value;
125 };
126 } m_dw6;
127
128 //!
129 //! \brief Constructor
130 //!
131 MediaObjectCopyCurbe();
132
133 //!
134 //! \brief Destructor
135 //!
~MediaObjectCopyCurbe()136 ~MediaObjectCopyCurbe(){};
137
138 static const size_t m_byteSize = 28; //!< Byte size of cube data DW0-6.
139 } ;
140
MediaObjectCopyCurbe()141 MediaObjectCopyCurbe::MediaObjectCopyCurbe()
142 {
143 MOS_ZeroMemory(this, m_byteSize);
144 }
145
~MediaMemDecompState()146 MediaMemDecompState::~MediaMemDecompState()
147 {
148 MHW_FUNCTION_ENTER;
149
150 Delete_MhwCpInterface(m_cpInterface);
151 m_cpInterface = nullptr;
152
153 if (m_cmdBufIdGlobal)
154 {
155 m_osInterface->pfnUnlockResource(m_osInterface, &m_resCmdBufIdGlobal);
156 m_osInterface->pfnFreeResource(m_osInterface, &m_resCmdBufIdGlobal);
157 m_cmdBufIdGlobal = nullptr;
158 }
159
160 if (m_miInterface)
161 {
162 MOS_Delete(m_miInterface);
163 m_miInterface = nullptr;
164 }
165
166 if (m_renderInterface)
167 {
168 MOS_Delete(m_renderInterface);
169 m_renderInterface = nullptr;
170 }
171
172 if (m_osInterface)
173 {
174 m_osInterface->pfnDestroy(m_osInterface, false);
175 MOS_FreeMemory(m_osInterface);
176 m_osInterface = nullptr;
177 }
178 }
179
MediaMemDecompState()180 MediaMemDecompState::MediaMemDecompState() :
181 MediaMemDecompBaseState(),
182 m_currCmdBufId(0)
183 {
184 MHW_FUNCTION_ENTER;
185 m_stateHeapSettings.m_ishBehavior = HeapManager::Behavior::clientControlled;
186 m_stateHeapSettings.m_dshBehavior = HeapManager::Behavior::destructiveExtend;
187 m_stateHeapSettings.m_keepDshLocked = true;
188 m_stateHeapSettings.dwDshIncrement = 2 * MOS_PAGE_SIZE;
189
190 MOS_ZeroMemory(&m_renderContext, sizeof(m_renderContext));
191 MOS_ZeroMemory(&m_krnUniId, sizeof(m_krnUniId));
192 MOS_ZeroMemory(&m_kernelSize, sizeof(m_kernelSize));
193 MOS_ZeroMemory(&m_resCmdBufIdGlobal, sizeof(m_resCmdBufIdGlobal));
194
195 for (uint8_t idx = decompKernelStatePa; idx < decompKernelStateMax; idx++)
196 {
197 m_kernelBinary[idx] = nullptr;
198 m_kernelStates[idx] = MHW_KERNEL_STATE();
199 }
200
201 m_krnUniId[decompKernelStatePa] = IDR_CODEC_ALLPACopy;
202 m_krnUniId[decompKernelStatePl2] = IDR_CODEC_ALLPL2Copy;
203
204 }
205
GetKernelBinaryAndSize(uint8_t * kernelBase,uint32_t krnUniId,uint8_t ** kernelBinary,uint32_t * kernelSize)206 MOS_STATUS MediaMemDecompState::GetKernelBinaryAndSize(
207 uint8_t *kernelBase,
208 uint32_t krnUniId,
209 uint8_t **kernelBinary,
210 uint32_t *kernelSize)
211 {
212 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
213
214 MHW_CHK_NULL_RETURN(kernelBase);
215 MHW_CHK_NULL_RETURN(kernelBinary);
216 MHW_CHK_NULL_RETURN(kernelSize);
217
218 if (krnUniId >= IDR_CODEC_TOTAL_NUM_KERNELS)
219 {
220 eStatus = MOS_STATUS_INVALID_PARAMETER;
221 return eStatus;
222 }
223
224 uint32_t *kernelOffsetTable = (uint32_t*)kernelBase;
225 uint8_t *base = (uint8_t*)(kernelOffsetTable + IDR_CODEC_TOTAL_NUM_KERNELS + 1);
226
227 *kernelSize =
228 kernelOffsetTable[krnUniId + 1] -
229 kernelOffsetTable[krnUniId];
230 *kernelBinary =
231 ((*kernelSize) > 0) ? (base + kernelOffsetTable[krnUniId]) : nullptr;
232
233 return eStatus;
234 }
235
InitKernelState(uint32_t kernelStateIdx)236 MOS_STATUS MediaMemDecompState::InitKernelState(
237 uint32_t kernelStateIdx)
238 {
239 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
240
241 MHW_FUNCTION_ENTER;
242
243 if (kernelStateIdx >= decompKernelStateMax)
244 {
245 eStatus = MOS_STATUS_INVALID_PARAMETER;
246 return eStatus;
247 }
248
249 uint8_t **kernelBase = &m_kernelBinary[kernelStateIdx];
250 uint32_t *kernelSize = &m_kernelSize[kernelStateIdx];
251
252 MHW_CHK_STATUS_RETURN(GetKernelBinaryAndSize(
253 m_kernelBase,
254 m_krnUniId[kernelStateIdx],
255 kernelBase,
256 kernelSize));
257
258 m_stateHeapSettings.dwIshSize +=
259 MOS_ALIGN_CEIL(*kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
260 m_stateHeapSettings.dwDshSize += MHW_CACHELINE_SIZE* m_numMemDecompSyncTags;
261 m_stateHeapSettings.dwNumSyncTags += m_numMemDecompSyncTags;
262
263 return eStatus;
264 }
265
MemoryDecompress(PMOS_RESOURCE targetResource)266 MOS_STATUS MediaMemDecompState::MemoryDecompress(
267 PMOS_RESOURCE targetResource)
268 {
269 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
270
271 MHW_FUNCTION_ENTER;
272
273 MHW_CHK_NULL_RETURN(targetResource);
274
275 MOS_SURFACE targetSurface;
276 MOS_ZeroMemory(&targetSurface, sizeof(MOS_SURFACE));
277 targetSurface.Format = Format_Invalid;
278 targetSurface.OsResource = *targetResource;
279 MHW_CHK_STATUS_RETURN(GetResourceInfo(&targetSurface));
280
281 //Set context before proceeding
282 auto gpuContext = m_osInterface->CurrentGpuContextOrdinal;
283 m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
284 m_osInterface->pfnResetOsStates(m_osInterface);
285
286 DecompKernelStateIdx kernelStateIdx;
287 bool useUVPlane;
288 if ((targetSurface.Format == Format_YUY2) || (targetSurface.Format == Format_UYVY))
289 {
290 kernelStateIdx = decompKernelStatePa;
291 useUVPlane = false;
292 }
293 else if ((targetSurface.Format == Format_NV12) || (targetSurface.Format == Format_P010))
294 {
295 kernelStateIdx = decompKernelStatePl2;
296 useUVPlane = true;
297 }
298 else
299 {
300 eStatus = MOS_STATUS_INVALID_PARAMETER;
301 return eStatus;
302 }
303
304 auto kernelState = &m_kernelStates[kernelStateIdx];
305 kernelState->m_currTrackerId = m_currCmdBufId;
306
307 // preprocess in cp first
308 m_osInterface->osCpInterface->PrepareResources((void **)&targetResource, 1, nullptr, 0);
309
310 if (kernelStateIdx == decompKernelStatePl2)
311 {
312 if (m_osInterface->osCpInterface->IsSMEnabled())
313 {
314 uint32_t *kernelBase = nullptr;
315 uint32_t kernelSize = 0;
316 m_osInterface->osCpInterface->GetTK(
317 &kernelBase,
318 &kernelSize,
319 nullptr);
320 if (nullptr == kernelBase || 0 == kernelSize)
321 {
322 MHW_ASSERT("Could not get TK kernels for MMC!");
323 eStatus = MOS_STATUS_INVALID_PARAMETER;
324 return eStatus;
325 }
326
327 kernelState->KernelParams.pBinary = (uint8_t *)kernelBase;
328 }
329 else
330 {
331 kernelState->KernelParams.pBinary = m_kernelBinary[kernelStateIdx];
332 }
333 MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
334 kernelState->KernelParams.pBinary,
335 0,
336 kernelState->KernelParams.iSize));
337 }
338
339 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
340 m_stateHeapInterface,
341 kernelState->KernelParams.iBTCount));
342
343 uint32_t dshSize = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData() +
344 MOS_ALIGN_CEIL(kernelState->KernelParams.iCurbeLength,
345 m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
346
347 eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
348 m_stateHeapInterface,
349 MHW_DSH_TYPE,
350 kernelState,
351 dshSize,
352 false,
353 true);
354
355 if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
356 {
357 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
358 m_stateHeapInterface,
359 MHW_DSH_TYPE,
360 kernelState,
361 dshSize,
362 false,
363 true));
364 }
365 else if (eStatus != MOS_STATUS_SUCCESS)
366 {
367 return eStatus;
368 }
369
370 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
371 m_stateHeapInterface,
372 MHW_SSH_TYPE,
373 kernelState,
374 kernelState->dwSshSize,
375 false,
376 false));
377
378 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
379 MOS_ZeroMemory(&idParams, sizeof(idParams));
380 idParams.pKernelState = kernelState;
381 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
382 m_stateHeapInterface,
383 1,
384 &idParams));
385
386 MHW_CHK_STATUS_RETURN(SetMediaObjectCopyCurbe(kernelStateIdx));
387
388 MOS_COMMAND_BUFFER cmdBuffer;
389 // Send HW commands (including SSH)
390 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
391
392 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
393 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
394 genericPrologParams.pOsInterface = m_osInterface;
395 genericPrologParams.pvMiInterface = m_miInterface;
396 genericPrologParams.bMmcEnabled = true;
397 MHW_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams));
398
399 MHW_CHK_NULL_RETURN(m_renderInterface);
400 if (m_renderInterface->GetL3CacheConfig()->bL3CachingEnabled)
401 {
402 MHW_CHK_STATUS_RETURN(m_renderInterface->SetL3Cache(&cmdBuffer));
403 }
404
405 MHW_CHK_STATUS_RETURN(m_renderInterface->EnablePreemption(&cmdBuffer));
406
407 MHW_CHK_STATUS_RETURN(m_renderInterface->AddPipelineSelectCmd(&cmdBuffer, false));
408
409 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
410 m_stateHeapInterface,
411 kernelState));
412
413 MHW_RCS_SURFACE_PARAMS surfaceParams;
414 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
415 surfaceParams.dwNumPlanes = useUVPlane ? 2 : 1; // Y+UV : Y
416 surfaceParams.psSurface = &targetSurface;
417 // Y Plane
418 surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceSrcY;
419
420 if (surfaceParams.psSurface->Format == Format_YUY2)
421 {
422 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL;
423 }
424 else if (surfaceParams.psSurface->Format == Format_UYVY)
425 {
426 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY;
427 }
428 else if (surfaceParams.psSurface->Format == Format_P010)
429 {
430 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UNORM;
431 }
432 else //NV12
433 {
434 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R8_UNORM;
435 }
436
437 uint32_t widthInBytes = GetSurfaceWidthInBytes(surfaceParams.psSurface);
438 surfaceParams.dwWidthToUse[MHW_Y_PLANE] = WIDTH_IN_DW(widthInBytes);
439
440 // UV Plane
441 if (useUVPlane)
442 {
443 surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceSrcU;
444 if (surfaceParams.psSurface->Format == Format_P010)
445 {
446 surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY;
447 }
448 else //NV12
449 {
450 surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UINT;
451 }
452 surfaceParams.dwBaseAddrOffset[MHW_U_PLANE] =
453 targetSurface.dwPitch *
454 MOS_ALIGN_FLOOR(targetSurface.UPlaneOffset.iYOffset, MOS_YTILE_H_ALIGNMENT);
455 surfaceParams.dwWidthToUse[MHW_U_PLANE] = WIDTH_IN_DW(widthInBytes);
456 surfaceParams.dwHeightToUse[MHW_U_PLANE] = surfaceParams.psSurface->dwHeight / 2;
457 surfaceParams.dwYOffset[MHW_U_PLANE] =
458 (targetSurface.UPlaneOffset.iYOffset % MOS_YTILE_H_ALIGNMENT);
459 }
460 m_osInterface->pfnGetMemoryCompressionMode(
461 m_osInterface, &targetSurface.OsResource, (PMOS_MEMCOMP_STATE)&surfaceParams.psSurface->CompressionMode);
462 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
463 m_stateHeapInterface,
464 kernelState,
465 &cmdBuffer,
466 1,
467 &surfaceParams));
468
469 //In place decompression: src shares the same surface with dst.
470 surfaceParams.bIsWritable = true;
471 surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceDstY;
472 if (useUVPlane)
473 {
474 surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceDstU;
475 }
476 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
477 m_stateHeapInterface,
478 kernelState,
479 &cmdBuffer,
480 1,
481 &surfaceParams));
482
483 MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams;
484 MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams));
485 MOS_RESOURCE *dsh = nullptr, *ish = nullptr;
486 MHW_CHK_NULL_RETURN(dsh = kernelState->m_dshRegion.GetResource());
487 MHW_CHK_NULL_RETURN(ish = kernelState->m_ishRegion.GetResource());
488 stateBaseAddrParams.presDynamicState = dsh;
489 stateBaseAddrParams.dwDynamicStateSize = kernelState->m_dshRegion.GetHeapSize();
490 stateBaseAddrParams.presInstructionBuffer = ish;
491 stateBaseAddrParams.dwInstructionBufferSize = kernelState->m_ishRegion.GetHeapSize();
492 MHW_CHK_STATUS_RETURN(m_renderInterface->AddStateBaseAddrCmd(
493 &cmdBuffer,
494 &stateBaseAddrParams));
495
496 MHW_VFE_PARAMS vfeParams = {};
497 vfeParams.pKernelState = kernelState;
498 auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
499
500 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
501
502 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(
503 &cmdBuffer,
504 &vfeParams));
505
506 MHW_CURBE_LOAD_PARAMS curbeLoadParams;
507 MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
508 curbeLoadParams.pKernelState = kernelState;
509 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaCurbeLoadCmd(
510 &cmdBuffer,
511 &curbeLoadParams));
512
513 MHW_ID_LOAD_PARAMS idLoadParams;
514 MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
515 idLoadParams.pKernelState = kernelState;
516 idLoadParams.dwNumKernelsLoaded = 1;
517 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaIDLoadCmd(
518 &cmdBuffer,
519 &idLoadParams));
520
521 uint32_t resolutionX;
522 if (kernelStateIdx == decompKernelStatePa) // Format_YUY2, Format_UYVY
523 {
524 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
525 }
526 else // DecompKernelStatePl2: Format_NV12, Format_P010
527 {
528 if (targetSurface.Format == Format_P010) // Format_P010
529 {
530 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
531 }
532 else // Format_NV12
533 {
534 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth, 32);
535 }
536 }
537 uint32_t resolutionY = MOS_ROUNDUP_DIVIDE(targetSurface.dwHeight, 16);
538
539 MHW_WALKER_PARAMS walkerParams;
540 MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
541 walkerParams.WalkerMode = MHW_WALKER_MODE_SINGLE;
542 walkerParams.BlockResolution.x = resolutionX;
543 walkerParams.BlockResolution.y = resolutionY;
544 walkerParams.GlobalResolution.x = resolutionX;
545 walkerParams.GlobalResolution.y = resolutionY;
546 walkerParams.GlobalOutlerLoopStride.x = resolutionX;
547 walkerParams.GlobalOutlerLoopStride.y = 0;
548 walkerParams.GlobalInnerLoopUnit.x = 0;
549 walkerParams.GlobalInnerLoopUnit.y = resolutionY;
550 walkerParams.dwLocalLoopExecCount = 0xFFFF; //MAX VALUE
551 walkerParams.dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE
552
553 // No dependency
554 walkerParams.ScoreboardMask = 0;
555 // Raster scan walking pattern
556 walkerParams.LocalOutLoopStride.x = 0;
557 walkerParams.LocalOutLoopStride.y = 1;
558 walkerParams.LocalInnerLoopUnit.x = 1;
559 walkerParams.LocalInnerLoopUnit.y = 0;
560 walkerParams.LocalEnd.x = resolutionX - 1;
561 walkerParams.LocalEnd.y = 0;
562
563 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(
564 &cmdBuffer,
565 &walkerParams));
566
567 // Check if destination surface needs to be synchronized, before command buffer submission
568 MOS_SYNC_PARAMS syncParams;
569 MOS_ZeroMemory(&syncParams, sizeof(syncParams));
570 syncParams.uiSemaphoreCount = 1;
571 syncParams.GpuContext = m_renderContext;
572 syncParams.presSyncResource = &targetSurface.OsResource;
573 syncParams.bReadOnly = false;
574 syncParams.bDisableDecodeSyncLock = m_disableDecodeSyncLock;
575 syncParams.bDisableLockForTranscode = m_disableLockForTranscode;
576
577 MHW_CHK_STATUS_RETURN(m_osInterface->pfnPerformOverlaySync(m_osInterface, &syncParams));
578 MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
579
580 // Update the resource tag (s/w tag) for On-Demand Sync
581 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
582
583 // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag
584 if (m_osInterface->bTagResourceSync)
585 {
586 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
587 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
588
589 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
590 MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(
591 &cmdBuffer,
592 nullptr,
593 &pipeControlParams));
594
595 MHW_CHK_STATUS_RETURN(WriteSyncTagToResourceCmd(&cmdBuffer));
596 }
597
598 MHW_MI_STORE_DATA_PARAMS miStoreDataParams;
599 MOS_ZeroMemory(&miStoreDataParams, sizeof(miStoreDataParams));
600 miStoreDataParams.pOsResource = &m_resCmdBufIdGlobal;
601 miStoreDataParams.dwValue = m_currCmdBufId;
602 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
603 &cmdBuffer,
604 &miStoreDataParams));
605
606 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
607 m_stateHeapInterface,
608 kernelState));
609 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
610 m_stateHeapInterface));
611
612 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
613 // This code is temporal and it will be moved to batch buffer end in short
614 PLATFORM platform;
615 m_osInterface->pfnGetPlatform(m_osInterface, &platform);
616 if (GFX_IS_GEN_9_OR_LATER(platform))
617 {
618 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
619
620 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
621 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
622 pipeControlParams.bGenericMediaStateClear = true;
623 pipeControlParams.bIndirectStatePointersDisable = true;
624 pipeControlParams.bDisableCSStall = false;
625 MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(&cmdBuffer, NULL, &pipeControlParams));
626
627 if (MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaSendDummyVFEafterPipelineSelect))
628 {
629 MHW_VFE_PARAMS vfeStateParams = {};
630 vfeStateParams.dwNumberofURBEntries = 1;
631 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(&cmdBuffer, &vfeStateParams));
632 }
633 }
634
635 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
636 &cmdBuffer,
637 nullptr));
638
639 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
640
641 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(
642 m_osInterface,
643 &cmdBuffer,
644 m_renderContextUsesNullHw));
645
646 // Update the compression mode
647 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionMode(
648 m_osInterface,
649 targetResource,
650 MOS_MEMCOMP_DISABLED));
651 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionHint(
652 m_osInterface,
653 targetResource,
654 false));
655
656 //Update CmdBufId...
657 m_currCmdBufId++;
658 if (m_currCmdBufId == MemoryBlock::m_invalidTrackerId)
659 {
660 m_currCmdBufId++;
661 }
662
663 // Send the signal to indicate decode completion, in case On-Demand Sync is not present
664 MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceSignal(m_osInterface, &syncParams));
665
666 if (gpuContext != m_renderContext)
667 {
668 m_osInterface->pfnSetGpuContext(m_osInterface, gpuContext);
669 }
670
671 return eStatus;
672 }
673
GetResourceInfo(PMOS_SURFACE surface)674 MOS_STATUS MediaMemDecompState::GetResourceInfo(
675 PMOS_SURFACE surface)
676 {
677 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
678
679 MHW_CHK_NULL_RETURN(m_osInterface);
680 MHW_CHK_NULL_RETURN(surface);
681
682 MOS_SURFACE details;
683 MOS_ZeroMemory(&details, sizeof(details));
684 details.Format = Format_Invalid;
685
686 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(
687 m_osInterface,
688 &surface->OsResource,
689 &details));
690
691 surface->Format = details.Format;
692 surface->dwWidth = details.dwWidth;
693 surface->dwHeight = details.dwHeight;
694 surface->dwPitch = details.dwPitch;
695 surface->dwDepth = details.dwDepth;
696 surface->bArraySpacing = details.bArraySpacing;
697 surface->TileType = details.TileType;
698 surface->TileModeGMM = details.TileModeGMM;
699 surface->bGMMTileEnabled = details.bGMMTileEnabled;
700 surface->dwOffset = details.RenderOffset.YUV.Y.BaseOffset;
701 surface->UPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.U.BaseOffset;
702 surface->UPlaneOffset.iXOffset = details.RenderOffset.YUV.U.XOffset;
703 surface->UPlaneOffset.iYOffset =
704 (surface->UPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
705 details.RenderOffset.YUV.U.YOffset;
706 surface->VPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.V.BaseOffset;
707 surface->VPlaneOffset.iXOffset = details.RenderOffset.YUV.V.XOffset;
708 surface->VPlaneOffset.iYOffset =
709 (surface->VPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
710 details.RenderOffset.YUV.V.YOffset;
711 surface->bCompressible = details.bCompressible;
712 surface->bIsCompressed = details.bIsCompressed;
713 surface->CompressionMode = details.CompressionMode;
714
715 return eStatus;
716 }
717
GetSurfaceWidthInBytes(PMOS_SURFACE surface)718 uint32_t MediaMemDecompState::GetSurfaceWidthInBytes(PMOS_SURFACE surface)
719 {
720 uint32_t widthInBytes;
721
722 switch (surface->Format)
723 {
724 case Format_IMC1:
725 case Format_IMC3:
726 case Format_IMC2:
727 case Format_IMC4:
728 case Format_NV12:
729 case Format_YV12:
730 case Format_I420:
731 case Format_IYUV:
732 case Format_400P:
733 case Format_411P:
734 case Format_422H:
735 case Format_422V:
736 case Format_444P:
737 case Format_RGBP:
738 case Format_BGRP:
739 widthInBytes = surface->dwWidth;
740 break;
741 case Format_YUY2:
742 case Format_YUYV:
743 case Format_YVYU:
744 case Format_UYVY:
745 case Format_VYUY:
746 case Format_P010:
747 widthInBytes = surface->dwWidth << 1;
748 break;
749 case Format_A8R8G8B8:
750 case Format_X8R8G8B8:
751 case Format_A8B8G8R8:
752 widthInBytes = surface->dwWidth << 2;
753 break;
754 default:
755 widthInBytes = surface->dwWidth;
756 break;
757 }
758
759 return widthInBytes;
760 }
761
WriteSyncTagToResourceCmd(PMOS_COMMAND_BUFFER cmdBuffer)762 MOS_STATUS MediaMemDecompState::WriteSyncTagToResourceCmd(
763 PMOS_COMMAND_BUFFER cmdBuffer)
764 {
765 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
766
767 MHW_FUNCTION_ENTER;
768
769 MOS_RESOURCE globalGpuContextSyncTagBuffer;
770 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
771 m_osInterface,
772 &globalGpuContextSyncTagBuffer));
773
774 uint32_t offset = m_osInterface->pfnGetGpuStatusTagOffset(
775 m_osInterface,
776 m_osInterface->CurrentGpuContextOrdinal);
777 uint32_t value = m_osInterface->pfnGetGpuStatusTag(
778 m_osInterface,
779 m_osInterface->CurrentGpuContextOrdinal);
780
781 MHW_MI_STORE_DATA_PARAMS params;
782 params.pOsResource = &globalGpuContextSyncTagBuffer;
783 params.dwResourceOffset = offset;
784 params.dwValue = value;
785
786 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, ¶ms));
787
788 // Increment GPU Context Tag for next use
789 m_osInterface->pfnIncrementGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
790
791 return eStatus;
792 }
793
SetMediaObjectCopyCurbe(DecompKernelStateIdx kernelStateIdx)794 MOS_STATUS MediaMemDecompState::SetMediaObjectCopyCurbe(
795 DecompKernelStateIdx kernelStateIdx)
796 {
797 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
798
799 MHW_FUNCTION_ENTER;
800
801 if ((kernelStateIdx >= decompKernelStateMax))
802 {
803 eStatus = MOS_STATUS_INVALID_PARAMETER;
804 return eStatus;
805 }
806
807 MediaObjectCopyCurbe cmd;
808
809 cmd.m_dw0.srcSurface0Index = copySurfaceSrcY;
810 cmd.m_dw3.dstSurface0Index = copySurfaceDstY;
811
812 if (kernelStateIdx == decompKernelStatePl2)
813 {
814 cmd.m_dw1.srcSurface1Index = copySurfaceSrcU;
815 cmd.m_dw4.dstSurface1Index = copySurfaceDstU;
816 }
817
818 MHW_CHK_STATUS_RETURN(m_kernelStates[kernelStateIdx].m_dshRegion.AddData(
819 &cmd,
820 m_kernelStates[kernelStateIdx].dwCurbeOffset,
821 sizeof(cmd)));
822
823 return eStatus;
824 }
825
SetKernelStateParams()826 MOS_STATUS MediaMemDecompState::SetKernelStateParams()
827 {
828 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
829
830 MHW_FUNCTION_ENTER;
831
832 MHW_CHK_NULL_RETURN(m_renderInterface->GetHwCaps());
833
834 for (uint32_t krnIdx = 0; krnIdx < decompKernelStateMax; krnIdx++)
835 {
836 auto kernelState = &m_kernelStates[krnIdx];
837 kernelState->KernelParams.pBinary = m_kernelBinary[krnIdx];
838 kernelState->KernelParams.iSize = m_kernelSize[krnIdx];
839 kernelState->KernelParams.iBTCount = copySurfaceNum;
840 kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
841 kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(
842 MediaObjectCopyCurbe::m_byteSize,
843 m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
844 kernelState->KernelParams.iBlockWidth = 32;
845 kernelState->KernelParams.iBlockHeight = 16;
846 kernelState->KernelParams.iIdCount = 1;
847
848 kernelState->dwCurbeOffset =
849 m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
850
851 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
852 m_stateHeapInterface,
853 kernelState->KernelParams.iBTCount,
854 &kernelState->dwSshSize,
855 &kernelState->dwBindingTableSize));
856
857 kernelState->dwKernelBinaryOffset = 0;
858
859 eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
860 m_stateHeapInterface,
861 MHW_ISH_TYPE,
862 kernelState,
863 kernelState->KernelParams.iSize,
864 true,
865 false);
866
867 if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
868 {
869 MHW_ASSERTMESSAGE("CodecHal does not handle this case");
870 return eStatus;
871 }
872 else if (eStatus != MOS_STATUS_SUCCESS)
873 {
874 return eStatus;
875 }
876
877 MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
878 kernelState->KernelParams.pBinary,
879 0,
880 kernelState->KernelParams.iSize));
881 }
882
883 return eStatus;
884 }
885
Initialize(PMOS_INTERFACE osInterface,MhwCpInterface * cpInterface,MhwMiInterface * miInterface,MhwRenderInterface * renderInterface)886 MOS_STATUS MediaMemDecompState::Initialize(
887 PMOS_INTERFACE osInterface,
888 MhwCpInterface *cpInterface,
889 MhwMiInterface *miInterface,
890 MhwRenderInterface *renderInterface)
891 {
892 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
893
894 MHW_FUNCTION_ENTER;
895
896 MHW_CHK_NULL_RETURN(osInterface);
897 MHW_CHK_NULL_RETURN(cpInterface);
898 MHW_CHK_NULL_RETURN(miInterface);
899 MHW_CHK_NULL_RETURN(renderInterface);
900
901 m_osInterface = osInterface;
902 m_cpInterface = cpInterface;
903 m_miInterface = miInterface;
904 m_renderInterface = renderInterface;
905
906 for (uint8_t kernelIdx = decompKernelStatePa; kernelIdx < decompKernelStateMax; kernelIdx++)
907 {
908 MHW_CHK_STATUS_RETURN(InitKernelState(kernelIdx));
909 }
910
911 if (m_stateHeapSettings.dwIshSize > 0 &&
912 m_stateHeapSettings.dwDshSize > 0 &&
913 m_stateHeapSettings.dwNumSyncTags > 0)
914 {
915 MHW_CHK_STATUS_RETURN(m_renderInterface->AllocateHeaps(
916 m_stateHeapSettings));
917 }
918
919 m_stateHeapInterface = m_renderInterface->m_stateHeapInterface;
920 MHW_CHK_NULL_RETURN(m_stateHeapInterface);
921
922 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, MOS_GPU_CONTEXT_RENDER) == MOS_STATUS_SUCCESS)
923 {
924 m_renderContext = MOS_GPU_CONTEXT_RENDER;
925 }
926 else
927 {
928 MOS_GPUCTX_CREATOPTIONS createOption;
929 MHW_CHK_STATUS_RETURN(m_osInterface->pfnCreateGpuContext(
930 m_osInterface,
931 MOS_GPU_CONTEXT_RENDER,
932 MOS_GPU_NODE_3D,
933 &createOption));
934
935 m_renderContext = MOS_GPU_CONTEXT_RENDER;
936 }
937
938 MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
939 nullHWAccelerationEnable.Value = 0;
940 m_disableDecodeSyncLock = false;
941 #if (_DEBUG || _RELEASE_INTERNAL)
942 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
943 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
944 MOS_UserFeature_ReadValue_ID(
945 nullptr,
946 __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE_ID,
947 &userFeatureData);
948 nullHWAccelerationEnable.Value = userFeatureData.u32Data;
949
950 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
951 MOS_UserFeature_ReadValue_ID(
952 nullptr,
953 __MEDIA_USER_FEATURE_VALUE_DECODE_LOCK_DISABLE_ID,
954 &userFeatureData);
955 m_disableDecodeSyncLock = userFeatureData.u32Data ? true : false;
956 #endif // _DEBUG || _RELEASE_INTERNAL
957
958 m_disableLockForTranscode =
959 MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaDisableLockForTranscodePerf);
960
961 MHW_CHK_STATUS_RETURN(SetKernelStateParams());
962
963 m_renderContextUsesNullHw =
964 ((m_renderContext == MOS_GPU_CONTEXT_RENDER) ? nullHWAccelerationEnable.CtxRender : nullHWAccelerationEnable.CtxRender2) ||
965 nullHWAccelerationEnable.Mmc;
966
967 MOS_ALLOC_GFXRES_PARAMS allocParams;
968 MOS_ZeroMemory(&allocParams, sizeof(allocParams));
969 allocParams.Type = MOS_GFXRES_BUFFER;
970 allocParams.TileType = MOS_TILE_LINEAR;
971 allocParams.Format = Format_Buffer;
972 allocParams.dwBytes = MHW_CACHELINE_SIZE;
973 allocParams.pBufName = "CmdBufIdGlobal";
974 MHW_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
975 m_osInterface,
976 &allocParams,
977 &m_resCmdBufIdGlobal));
978 m_currCmdBufId = MemoryBlock::m_invalidTrackerId + 1;
979
980 MOS_LOCK_PARAMS lockParams;
981 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
982 lockParams.WriteOnly = 1;
983 m_cmdBufIdGlobal = (uint32_t *)m_osInterface->pfnLockResource(
984 m_osInterface,
985 &m_resCmdBufIdGlobal,
986 &lockParams);
987 MHW_CHK_NULL_RETURN(m_cmdBufIdGlobal);
988 MOS_ZeroMemory(m_cmdBufIdGlobal, allocParams.dwBytes);
989
990 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetCmdBufStatusPtr(
991 m_stateHeapInterface,
992 m_cmdBufIdGlobal));
993
994 return eStatus;
995 }
996