1 /*
2 * Copyright (c) 2013-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_memdecomp.cpp
24 //! \brief    This module sets up a kernel for media memory decompression.
25 
26 #include "codechal_memdecomp.h"
27 #include "codeckrnheader.h"
28 
29 //!
30 //! \class MediaObjectCopyCurbe
31 //! \brief Media object memory decompress copy knernel curbe.
32 //!        Note: Cube data DW0-6 must be defined at the begining of the class.
33 //!
34 class MediaObjectCopyCurbe
35 {
36 public:
37     // DW 0
38     union
39     {
40         struct
41         {
42             uint32_t srcSurface0Index;
43         };
44         struct
45         {
46             uint32_t value;
47         };
48     } m_dw0;
49 
50     // DW 1
51     union
52     {
53         struct
54         {
55             uint32_t srcSurface1Index;
56         };
57         struct
58         {
59             uint32_t value;
60         };
61     } m_dw1;
62 
63     // DW 2
64     union
65     {
66         struct
67         {
68             uint32_t srcSurface2Index;
69         };
70         struct
71         {
72             uint32_t value;
73         };
74     } m_dw2;
75 
76     // DW 3
77     union
78     {
79         struct
80         {
81             uint32_t dstSurface0Index;
82         };
83         struct
84         {
85             uint32_t value;
86         };
87     } m_dw3;
88 
89     // DW 4
90     union
91     {
92         struct
93         {
94             uint32_t dstSurface1Index;
95         };
96         struct
97         {
98             uint32_t value;
99         };
100     } m_dw4;
101 
102     // DW 5
103     union
104     {
105         struct
106         {
107             uint32_t dstSurface2Index;
108         };
109         struct
110         {
111             uint32_t value;
112         };
113     } m_dw5;
114 
115     // DW 6
116     union
117     {
118         struct
119         {
120             uint32_t surfaceWidth;
121         };
122         struct
123         {
124             uint32_t value;
125         };
126     } m_dw6;
127 
128     //!
129     //! \brief    Constructor
130     //!
131     MediaObjectCopyCurbe();
132 
133     //!
134     //! \brief    Destructor
135     //!
~MediaObjectCopyCurbe()136     ~MediaObjectCopyCurbe(){};
137 
138     static const size_t m_byteSize = 28; //!< Byte size of cube data DW0-6.
139 } ;
140 
MediaObjectCopyCurbe()141 MediaObjectCopyCurbe::MediaObjectCopyCurbe()
142 {
143     MOS_ZeroMemory(this, m_byteSize);
144 }
145 
~MediaMemDecompState()146 MediaMemDecompState::~MediaMemDecompState()
147 {
148     MHW_FUNCTION_ENTER;
149 
150     Delete_MhwCpInterface(m_cpInterface);
151     m_cpInterface = nullptr;
152 
153     if (m_cmdBufIdGlobal)
154     {
155         m_osInterface->pfnUnlockResource(m_osInterface, &m_resCmdBufIdGlobal);
156         m_osInterface->pfnFreeResource(m_osInterface, &m_resCmdBufIdGlobal);
157         m_cmdBufIdGlobal = nullptr;
158     }
159 
160     if (m_miInterface)
161     {
162         MOS_Delete(m_miInterface);
163         m_miInterface = nullptr;
164     }
165 
166     if (m_renderInterface)
167     {
168         MOS_Delete(m_renderInterface);
169         m_renderInterface = nullptr;
170     }
171 
172     if (m_osInterface)
173     {
174         m_osInterface->pfnDestroy(m_osInterface, false);
175         MOS_FreeMemory(m_osInterface);
176         m_osInterface = nullptr;
177     }
178 }
179 
MediaMemDecompState()180 MediaMemDecompState::MediaMemDecompState() :
181     MediaMemDecompBaseState(),
182     m_currCmdBufId(0)
183 {
184     MHW_FUNCTION_ENTER;
185     m_stateHeapSettings.m_ishBehavior = HeapManager::Behavior::clientControlled;
186     m_stateHeapSettings.m_dshBehavior = HeapManager::Behavior::destructiveExtend;
187     m_stateHeapSettings.m_keepDshLocked = true;
188     m_stateHeapSettings.dwDshIncrement = 2 * MOS_PAGE_SIZE;
189 
190     MOS_ZeroMemory(&m_renderContext, sizeof(m_renderContext));
191     MOS_ZeroMemory(&m_krnUniId, sizeof(m_krnUniId));
192     MOS_ZeroMemory(&m_kernelSize, sizeof(m_kernelSize));
193     MOS_ZeroMemory(&m_resCmdBufIdGlobal, sizeof(m_resCmdBufIdGlobal));
194 
195     for (uint8_t idx = decompKernelStatePa; idx < decompKernelStateMax; idx++)
196     {
197         m_kernelBinary[idx] = nullptr;
198         m_kernelStates[idx] = MHW_KERNEL_STATE();
199     }
200 
201      m_krnUniId[decompKernelStatePa] = IDR_CODEC_ALLPACopy;
202      m_krnUniId[decompKernelStatePl2] = IDR_CODEC_ALLPL2Copy;
203 
204 }
205 
GetKernelBinaryAndSize(uint8_t * kernelBase,uint32_t krnUniId,uint8_t ** kernelBinary,uint32_t * kernelSize)206 MOS_STATUS MediaMemDecompState::GetKernelBinaryAndSize(
207     uint8_t  *kernelBase,
208     uint32_t krnUniId,
209     uint8_t  **kernelBinary,
210     uint32_t *kernelSize)
211 {
212     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
213 
214     MHW_CHK_NULL_RETURN(kernelBase);
215     MHW_CHK_NULL_RETURN(kernelBinary);
216     MHW_CHK_NULL_RETURN(kernelSize);
217 
218     if (krnUniId >= IDR_CODEC_TOTAL_NUM_KERNELS)
219     {
220         eStatus = MOS_STATUS_INVALID_PARAMETER;
221         return eStatus;
222     }
223 
224     uint32_t *kernelOffsetTable = (uint32_t*)kernelBase;
225     uint8_t  *base              = (uint8_t*)(kernelOffsetTable + IDR_CODEC_TOTAL_NUM_KERNELS + 1);
226 
227     *kernelSize =
228         kernelOffsetTable[krnUniId + 1] -
229         kernelOffsetTable[krnUniId];
230     *kernelBinary =
231         ((*kernelSize) > 0) ? (base + kernelOffsetTable[krnUniId]) : nullptr;
232 
233     return eStatus;
234 }
235 
InitKernelState(uint32_t kernelStateIdx)236 MOS_STATUS MediaMemDecompState::InitKernelState(
237     uint32_t                 kernelStateIdx)
238 {
239     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
240 
241     MHW_FUNCTION_ENTER;
242 
243     if (kernelStateIdx >= decompKernelStateMax)
244     {
245         eStatus = MOS_STATUS_INVALID_PARAMETER;
246         return eStatus;
247     }
248 
249     uint8_t **kernelBase  = &m_kernelBinary[kernelStateIdx];
250     uint32_t *kernelSize = &m_kernelSize[kernelStateIdx];
251 
252     MHW_CHK_STATUS_RETURN(GetKernelBinaryAndSize(
253         m_kernelBase,
254         m_krnUniId[kernelStateIdx],
255         kernelBase,
256         kernelSize));
257 
258     m_stateHeapSettings.dwIshSize +=
259         MOS_ALIGN_CEIL(*kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
260     m_stateHeapSettings.dwDshSize += MHW_CACHELINE_SIZE* m_numMemDecompSyncTags;
261     m_stateHeapSettings.dwNumSyncTags += m_numMemDecompSyncTags;
262 
263     return eStatus;
264 }
265 
MemoryDecompress(PMOS_RESOURCE targetResource)266 MOS_STATUS MediaMemDecompState::MemoryDecompress(
267     PMOS_RESOURCE targetResource)
268 {
269     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
270 
271     MHW_FUNCTION_ENTER;
272 
273     MHW_CHK_NULL_RETURN(targetResource);
274 
275     MOS_SURFACE targetSurface;
276     MOS_ZeroMemory(&targetSurface, sizeof(MOS_SURFACE));
277     targetSurface.Format     = Format_Invalid;
278     targetSurface.OsResource = *targetResource;
279     MHW_CHK_STATUS_RETURN(GetResourceInfo(&targetSurface));
280 
281     //Set context before proceeding
282     auto gpuContext = m_osInterface->CurrentGpuContextOrdinal;
283     m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
284     m_osInterface->pfnResetOsStates(m_osInterface);
285 
286     DecompKernelStateIdx kernelStateIdx;
287     bool                 useUVPlane;
288     if ((targetSurface.Format == Format_YUY2) || (targetSurface.Format == Format_UYVY))
289     {
290         kernelStateIdx = decompKernelStatePa;
291         useUVPlane     = false;
292     }
293     else if ((targetSurface.Format == Format_NV12) || (targetSurface.Format == Format_P010))
294     {
295         kernelStateIdx = decompKernelStatePl2;
296         useUVPlane     = true;
297     }
298     else
299     {
300         eStatus = MOS_STATUS_INVALID_PARAMETER;
301         return eStatus;
302     }
303 
304     auto kernelState = &m_kernelStates[kernelStateIdx];
305     kernelState->m_currTrackerId = m_currCmdBufId;
306 
307     // preprocess in cp first
308     m_osInterface->osCpInterface->PrepareResources((void **)&targetResource, 1, nullptr, 0);
309 
310     if (kernelStateIdx == decompKernelStatePl2)
311     {
312         if (m_osInterface->osCpInterface->IsSMEnabled())
313         {
314             uint32_t *kernelBase = nullptr;
315             uint32_t  kernelSize = 0;
316             m_osInterface->osCpInterface->GetTK(
317                 &kernelBase,
318                 &kernelSize,
319                 nullptr);
320             if (nullptr == kernelBase || 0 == kernelSize)
321             {
322                 MHW_ASSERT("Could not get TK kernels for MMC!");
323                 eStatus = MOS_STATUS_INVALID_PARAMETER;
324                 return eStatus;
325             }
326 
327             kernelState->KernelParams.pBinary = (uint8_t *)kernelBase;
328         }
329         else
330         {
331             kernelState->KernelParams.pBinary = m_kernelBinary[kernelStateIdx];
332         }
333         MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
334             kernelState->KernelParams.pBinary,
335             0,
336             kernelState->KernelParams.iSize));
337     }
338 
339     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
340         m_stateHeapInterface,
341         kernelState->KernelParams.iBTCount));
342 
343     uint32_t dshSize = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData() +
344         MOS_ALIGN_CEIL(kernelState->KernelParams.iCurbeLength,
345         m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
346 
347     eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
348         m_stateHeapInterface,
349         MHW_DSH_TYPE,
350         kernelState,
351         dshSize,
352         false,
353         true);
354 
355     if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
356     {
357         MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
358             m_stateHeapInterface,
359             MHW_DSH_TYPE,
360             kernelState,
361             dshSize,
362             false,
363             true));
364     }
365     else if (eStatus != MOS_STATUS_SUCCESS)
366     {
367         return eStatus;
368     }
369 
370     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
371         m_stateHeapInterface,
372         MHW_SSH_TYPE,
373         kernelState,
374         kernelState->dwSshSize,
375         false,
376         false));
377 
378     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
379     MOS_ZeroMemory(&idParams, sizeof(idParams));
380     idParams.pKernelState = kernelState;
381     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
382         m_stateHeapInterface,
383         1,
384         &idParams));
385 
386     MHW_CHK_STATUS_RETURN(SetMediaObjectCopyCurbe(kernelStateIdx));
387 
388     MOS_COMMAND_BUFFER cmdBuffer;
389     // Send HW commands (including SSH)
390     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
391 
392     MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
393     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
394     genericPrologParams.pOsInterface        = m_osInterface;
395     genericPrologParams.pvMiInterface       = m_miInterface;
396     genericPrologParams.bMmcEnabled         = true;
397     MHW_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams));
398 
399     MHW_CHK_NULL_RETURN(m_renderInterface);
400     if (m_renderInterface->GetL3CacheConfig()->bL3CachingEnabled)
401     {
402         MHW_CHK_STATUS_RETURN(m_renderInterface->SetL3Cache(&cmdBuffer));
403     }
404 
405     MHW_CHK_STATUS_RETURN(m_renderInterface->EnablePreemption(&cmdBuffer));
406 
407     MHW_CHK_STATUS_RETURN(m_renderInterface->AddPipelineSelectCmd(&cmdBuffer, false));
408 
409     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
410         m_stateHeapInterface,
411         kernelState));
412 
413     MHW_RCS_SURFACE_PARAMS surfaceParams;
414     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
415     surfaceParams.dwNumPlanes = useUVPlane ? 2 : 1;  // Y+UV : Y
416     surfaceParams.psSurface   = &targetSurface;
417     // Y Plane
418     surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceSrcY;
419 
420     if (surfaceParams.psSurface->Format == Format_YUY2)
421     {
422         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL;
423     }
424     else if (surfaceParams.psSurface->Format == Format_UYVY)
425     {
426         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY;
427     }
428     else if (surfaceParams.psSurface->Format == Format_P010)
429     {
430         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UNORM;
431     }
432     else  //NV12
433     {
434         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R8_UNORM;
435     }
436 
437     uint32_t widthInBytes = GetSurfaceWidthInBytes(surfaceParams.psSurface);
438     surfaceParams.dwWidthToUse[MHW_Y_PLANE] = WIDTH_IN_DW(widthInBytes);
439 
440     // UV Plane
441     if (useUVPlane)
442     {
443         surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceSrcU;
444         if (surfaceParams.psSurface->Format == Format_P010)
445         {
446             surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY;
447         }
448         else  //NV12
449         {
450             surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UINT;
451         }
452         surfaceParams.dwBaseAddrOffset[MHW_U_PLANE] =
453             targetSurface.dwPitch *
454             MOS_ALIGN_FLOOR(targetSurface.UPlaneOffset.iYOffset, MOS_YTILE_H_ALIGNMENT);
455         surfaceParams.dwWidthToUse[MHW_U_PLANE]  = WIDTH_IN_DW(widthInBytes);
456         surfaceParams.dwHeightToUse[MHW_U_PLANE] = surfaceParams.psSurface->dwHeight / 2;
457         surfaceParams.dwYOffset[MHW_U_PLANE] =
458             (targetSurface.UPlaneOffset.iYOffset % MOS_YTILE_H_ALIGNMENT);
459     }
460     m_osInterface->pfnGetMemoryCompressionMode(
461         m_osInterface, &targetSurface.OsResource, (PMOS_MEMCOMP_STATE)&surfaceParams.psSurface->CompressionMode);
462     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
463         m_stateHeapInterface,
464         kernelState,
465         &cmdBuffer,
466         1,
467         &surfaceParams));
468 
469     //In place decompression: src shares the same surface with dst.
470     surfaceParams.bIsWritable                       = true;
471     surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceDstY;
472     if (useUVPlane)
473     {
474         surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceDstU;
475     }
476     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
477         m_stateHeapInterface,
478         kernelState,
479         &cmdBuffer,
480         1,
481         &surfaceParams));
482 
483     MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams;
484     MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams));
485     MOS_RESOURCE *dsh = nullptr, *ish = nullptr;
486     MHW_CHK_NULL_RETURN(dsh = kernelState->m_dshRegion.GetResource());
487     MHW_CHK_NULL_RETURN(ish = kernelState->m_ishRegion.GetResource());
488     stateBaseAddrParams.presDynamicState = dsh;
489     stateBaseAddrParams.dwDynamicStateSize = kernelState->m_dshRegion.GetHeapSize();
490     stateBaseAddrParams.presInstructionBuffer = ish;
491     stateBaseAddrParams.dwInstructionBufferSize = kernelState->m_ishRegion.GetHeapSize();
492     MHW_CHK_STATUS_RETURN(m_renderInterface->AddStateBaseAddrCmd(
493         &cmdBuffer,
494         &stateBaseAddrParams));
495 
496     MHW_VFE_PARAMS vfeParams = {};
497     vfeParams.pKernelState = kernelState;
498     auto waTable          = m_osInterface->pfnGetWaTable(m_osInterface);
499 
500     vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
501 
502     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(
503         &cmdBuffer,
504         &vfeParams));
505 
506     MHW_CURBE_LOAD_PARAMS curbeLoadParams;
507     MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
508     curbeLoadParams.pKernelState = kernelState;
509     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaCurbeLoadCmd(
510         &cmdBuffer,
511         &curbeLoadParams));
512 
513     MHW_ID_LOAD_PARAMS idLoadParams;
514     MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
515     idLoadParams.pKernelState = kernelState;
516     idLoadParams.dwNumKernelsLoaded = 1;
517     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaIDLoadCmd(
518         &cmdBuffer,
519         &idLoadParams));
520 
521     uint32_t resolutionX;
522     if (kernelStateIdx == decompKernelStatePa)  // Format_YUY2, Format_UYVY
523     {
524         resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
525     }
526     else  // DecompKernelStatePl2: Format_NV12, Format_P010
527     {
528         if (targetSurface.Format == Format_P010)  // Format_P010
529         {
530             resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
531         }
532         else  // Format_NV12
533         {
534             resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth, 32);
535         }
536     }
537     uint32_t resolutionY = MOS_ROUNDUP_DIVIDE(targetSurface.dwHeight, 16);
538 
539     MHW_WALKER_PARAMS walkerParams;
540     MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
541     walkerParams.WalkerMode               = MHW_WALKER_MODE_SINGLE;
542     walkerParams.BlockResolution.x        = resolutionX;
543     walkerParams.BlockResolution.y        = resolutionY;
544     walkerParams.GlobalResolution.x       = resolutionX;
545     walkerParams.GlobalResolution.y       = resolutionY;
546     walkerParams.GlobalOutlerLoopStride.x = resolutionX;
547     walkerParams.GlobalOutlerLoopStride.y = 0;
548     walkerParams.GlobalInnerLoopUnit.x    = 0;
549     walkerParams.GlobalInnerLoopUnit.y    = resolutionY;
550     walkerParams.dwLocalLoopExecCount     = 0xFFFF;  //MAX VALUE
551     walkerParams.dwGlobalLoopExecCount    = 0xFFFF;  //MAX VALUE
552 
553     // No dependency
554     walkerParams.ScoreboardMask = 0;
555     // Raster scan walking pattern
556     walkerParams.LocalOutLoopStride.x = 0;
557     walkerParams.LocalOutLoopStride.y = 1;
558     walkerParams.LocalInnerLoopUnit.x = 1;
559     walkerParams.LocalInnerLoopUnit.y = 0;
560     walkerParams.LocalEnd.x           = resolutionX - 1;
561     walkerParams.LocalEnd.y           = 0;
562 
563     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(
564         &cmdBuffer,
565         &walkerParams));
566 
567     // Check if destination surface needs to be synchronized, before command buffer submission
568     MOS_SYNC_PARAMS    syncParams;
569     MOS_ZeroMemory(&syncParams, sizeof(syncParams));
570     syncParams.uiSemaphoreCount         = 1;
571     syncParams.GpuContext               = m_renderContext;
572     syncParams.presSyncResource         = &targetSurface.OsResource;
573     syncParams.bReadOnly                = false;
574     syncParams.bDisableDecodeSyncLock   = m_disableDecodeSyncLock;
575     syncParams.bDisableLockForTranscode = m_disableLockForTranscode;
576 
577     MHW_CHK_STATUS_RETURN(m_osInterface->pfnPerformOverlaySync(m_osInterface, &syncParams));
578     MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
579 
580     // Update the resource tag (s/w tag) for On-Demand Sync
581     m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
582 
583     // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag
584     if (m_osInterface->bTagResourceSync)
585     {
586         MHW_PIPE_CONTROL_PARAMS pipeControlParams;
587         MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
588 
589         pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
590         MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(
591             &cmdBuffer,
592             nullptr,
593             &pipeControlParams));
594 
595         MHW_CHK_STATUS_RETURN(WriteSyncTagToResourceCmd(&cmdBuffer));
596     }
597 
598     MHW_MI_STORE_DATA_PARAMS        miStoreDataParams;
599     MOS_ZeroMemory(&miStoreDataParams, sizeof(miStoreDataParams));
600     miStoreDataParams.pOsResource = &m_resCmdBufIdGlobal;
601     miStoreDataParams.dwValue = m_currCmdBufId;
602     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
603         &cmdBuffer,
604         &miStoreDataParams));
605 
606     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
607         m_stateHeapInterface,
608         kernelState));
609     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
610         m_stateHeapInterface));
611 
612     // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
613     // This code is temporal and it will be moved to batch buffer end in short
614     PLATFORM platform;
615     m_osInterface->pfnGetPlatform(m_osInterface, &platform);
616     if (GFX_IS_GEN_9_OR_LATER(platform))
617     {
618         MHW_PIPE_CONTROL_PARAMS pipeControlParams;
619 
620         MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
621         pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
622         pipeControlParams.bGenericMediaStateClear = true;
623         pipeControlParams.bIndirectStatePointersDisable = true;
624         pipeControlParams.bDisableCSStall = false;
625         MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(&cmdBuffer, NULL, &pipeControlParams));
626 
627         if (MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaSendDummyVFEafterPipelineSelect))
628         {
629             MHW_VFE_PARAMS vfeStateParams = {};
630             vfeStateParams.dwNumberofURBEntries = 1;
631             MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(&cmdBuffer, &vfeStateParams));
632         }
633     }
634 
635     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
636         &cmdBuffer,
637         nullptr));
638 
639     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
640 
641     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(
642         m_osInterface,
643         &cmdBuffer,
644         m_renderContextUsesNullHw));
645 
646     // Update the compression mode
647     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionMode(
648         m_osInterface,
649         targetResource,
650         MOS_MEMCOMP_DISABLED));
651     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionHint(
652         m_osInterface,
653         targetResource,
654         false));
655 
656     //Update CmdBufId...
657     m_currCmdBufId++;
658     if (m_currCmdBufId == MemoryBlock::m_invalidTrackerId)
659     {
660         m_currCmdBufId++;
661     }
662 
663     // Send the signal to indicate decode completion, in case On-Demand Sync is not present
664     MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceSignal(m_osInterface, &syncParams));
665 
666     if (gpuContext != m_renderContext)
667     {
668         m_osInterface->pfnSetGpuContext(m_osInterface, gpuContext);
669     }
670 
671     return eStatus;
672 }
673 
GetResourceInfo(PMOS_SURFACE surface)674 MOS_STATUS MediaMemDecompState::GetResourceInfo(
675     PMOS_SURFACE   surface)
676 {
677     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
678 
679     MHW_CHK_NULL_RETURN(m_osInterface);
680     MHW_CHK_NULL_RETURN(surface);
681 
682     MOS_SURFACE details;
683     MOS_ZeroMemory(&details, sizeof(details));
684     details.Format = Format_Invalid;
685 
686     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(
687         m_osInterface,
688         &surface->OsResource,
689         &details));
690 
691     surface->Format                      = details.Format;
692     surface->dwWidth                     = details.dwWidth;
693     surface->dwHeight                    = details.dwHeight;
694     surface->dwPitch                     = details.dwPitch;
695     surface->dwDepth                     = details.dwDepth;
696     surface->bArraySpacing               = details.bArraySpacing;
697     surface->TileType                    = details.TileType;
698     surface->TileModeGMM                 = details.TileModeGMM;
699     surface->bGMMTileEnabled             = details.bGMMTileEnabled;
700     surface->dwOffset                    = details.RenderOffset.YUV.Y.BaseOffset;
701     surface->UPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.U.BaseOffset;
702     surface->UPlaneOffset.iXOffset       = details.RenderOffset.YUV.U.XOffset;
703     surface->UPlaneOffset.iYOffset =
704         (surface->UPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
705         details.RenderOffset.YUV.U.YOffset;
706     surface->VPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.V.BaseOffset;
707     surface->VPlaneOffset.iXOffset       = details.RenderOffset.YUV.V.XOffset;
708     surface->VPlaneOffset.iYOffset =
709         (surface->VPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
710         details.RenderOffset.YUV.V.YOffset;
711     surface->bCompressible   = details.bCompressible;
712     surface->bIsCompressed   = details.bIsCompressed;
713     surface->CompressionMode = details.CompressionMode;
714 
715     return eStatus;
716 }
717 
GetSurfaceWidthInBytes(PMOS_SURFACE surface)718 uint32_t MediaMemDecompState::GetSurfaceWidthInBytes(PMOS_SURFACE surface)
719 {
720     uint32_t widthInBytes;
721 
722     switch (surface->Format)
723     {
724     case Format_IMC1:
725     case Format_IMC3:
726     case Format_IMC2:
727     case Format_IMC4:
728     case Format_NV12:
729     case Format_YV12:
730     case Format_I420:
731     case Format_IYUV:
732     case Format_400P:
733     case Format_411P:
734     case Format_422H:
735     case Format_422V:
736     case Format_444P:
737     case Format_RGBP:
738     case Format_BGRP:
739         widthInBytes = surface->dwWidth;
740         break;
741     case Format_YUY2:
742     case Format_YUYV:
743     case Format_YVYU:
744     case Format_UYVY:
745     case Format_VYUY:
746     case Format_P010:
747         widthInBytes = surface->dwWidth << 1;
748         break;
749     case Format_A8R8G8B8:
750     case Format_X8R8G8B8:
751     case Format_A8B8G8R8:
752         widthInBytes = surface->dwWidth << 2;
753         break;
754     default:
755         widthInBytes = surface->dwWidth;
756         break;
757     }
758 
759     return widthInBytes;
760 }
761 
WriteSyncTagToResourceCmd(PMOS_COMMAND_BUFFER cmdBuffer)762 MOS_STATUS MediaMemDecompState::WriteSyncTagToResourceCmd(
763     PMOS_COMMAND_BUFFER   cmdBuffer)
764 {
765     MOS_STATUS               eStatus = MOS_STATUS_SUCCESS;
766 
767     MHW_FUNCTION_ENTER;
768 
769     MOS_RESOURCE globalGpuContextSyncTagBuffer;
770     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
771         m_osInterface,
772         &globalGpuContextSyncTagBuffer));
773 
774     uint32_t offset = m_osInterface->pfnGetGpuStatusTagOffset(
775         m_osInterface,
776         m_osInterface->CurrentGpuContextOrdinal);
777     uint32_t value  = m_osInterface->pfnGetGpuStatusTag(
778         m_osInterface,
779         m_osInterface->CurrentGpuContextOrdinal);
780 
781     MHW_MI_STORE_DATA_PARAMS params;
782     params.pOsResource      = &globalGpuContextSyncTagBuffer;
783     params.dwResourceOffset = offset;
784     params.dwValue          = value;
785 
786     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &params));
787 
788     // Increment GPU Context Tag for next use
789     m_osInterface->pfnIncrementGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
790 
791     return eStatus;
792 }
793 
SetMediaObjectCopyCurbe(DecompKernelStateIdx kernelStateIdx)794 MOS_STATUS MediaMemDecompState::SetMediaObjectCopyCurbe(
795     DecompKernelStateIdx kernelStateIdx)
796 {
797     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
798 
799     MHW_FUNCTION_ENTER;
800 
801     if ((kernelStateIdx >= decompKernelStateMax))
802     {
803         eStatus = MOS_STATUS_INVALID_PARAMETER;
804         return eStatus;
805     }
806 
807     MediaObjectCopyCurbe cmd;
808 
809     cmd.m_dw0.srcSurface0Index = copySurfaceSrcY;
810     cmd.m_dw3.dstSurface0Index = copySurfaceDstY;
811 
812     if (kernelStateIdx == decompKernelStatePl2)
813     {
814         cmd.m_dw1.srcSurface1Index = copySurfaceSrcU;
815         cmd.m_dw4.dstSurface1Index = copySurfaceDstU;
816     }
817 
818     MHW_CHK_STATUS_RETURN(m_kernelStates[kernelStateIdx].m_dshRegion.AddData(
819         &cmd,
820         m_kernelStates[kernelStateIdx].dwCurbeOffset,
821         sizeof(cmd)));
822 
823     return eStatus;
824 }
825 
SetKernelStateParams()826 MOS_STATUS MediaMemDecompState::SetKernelStateParams()
827 {
828     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
829 
830     MHW_FUNCTION_ENTER;
831 
832     MHW_CHK_NULL_RETURN(m_renderInterface->GetHwCaps());
833 
834     for (uint32_t krnIdx = 0; krnIdx < decompKernelStateMax; krnIdx++)
835     {
836         auto kernelState = &m_kernelStates[krnIdx];
837         kernelState->KernelParams.pBinary = m_kernelBinary[krnIdx];
838         kernelState->KernelParams.iSize   = m_kernelSize[krnIdx];
839         kernelState->KernelParams.iBTCount     = copySurfaceNum;
840         kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
841         kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(
842             MediaObjectCopyCurbe::m_byteSize,
843             m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
844         kernelState->KernelParams.iBlockWidth  = 32;
845         kernelState->KernelParams.iBlockHeight = 16;
846         kernelState->KernelParams.iIdCount     = 1;
847 
848         kernelState->dwCurbeOffset =
849             m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
850 
851         MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
852             m_stateHeapInterface,
853             kernelState->KernelParams.iBTCount,
854             &kernelState->dwSshSize,
855             &kernelState->dwBindingTableSize));
856 
857         kernelState->dwKernelBinaryOffset = 0;
858 
859         eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
860             m_stateHeapInterface,
861             MHW_ISH_TYPE,
862             kernelState,
863             kernelState->KernelParams.iSize,
864             true,
865             false);
866 
867         if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
868         {
869             MHW_ASSERTMESSAGE("CodecHal does not handle this case");
870             return eStatus;
871         }
872         else if (eStatus != MOS_STATUS_SUCCESS)
873         {
874             return eStatus;
875         }
876 
877         MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
878             kernelState->KernelParams.pBinary,
879             0,
880             kernelState->KernelParams.iSize));
881     }
882 
883     return eStatus;
884 }
885 
Initialize(PMOS_INTERFACE osInterface,MhwCpInterface * cpInterface,MhwMiInterface * miInterface,MhwRenderInterface * renderInterface)886 MOS_STATUS MediaMemDecompState::Initialize(
887     PMOS_INTERFACE                  osInterface,
888     MhwCpInterface                  *cpInterface,
889     MhwMiInterface                  *miInterface,
890     MhwRenderInterface              *renderInterface)
891 {
892     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
893 
894     MHW_FUNCTION_ENTER;
895 
896     MHW_CHK_NULL_RETURN(osInterface);
897     MHW_CHK_NULL_RETURN(cpInterface);
898     MHW_CHK_NULL_RETURN(miInterface);
899     MHW_CHK_NULL_RETURN(renderInterface);
900 
901     m_osInterface = osInterface;
902     m_cpInterface = cpInterface;
903     m_miInterface = miInterface;
904     m_renderInterface = renderInterface;
905 
906     for (uint8_t kernelIdx = decompKernelStatePa; kernelIdx < decompKernelStateMax; kernelIdx++)
907     {
908         MHW_CHK_STATUS_RETURN(InitKernelState(kernelIdx));
909     }
910 
911     if (m_stateHeapSettings.dwIshSize > 0 &&
912         m_stateHeapSettings.dwDshSize > 0 &&
913         m_stateHeapSettings.dwNumSyncTags > 0)
914     {
915         MHW_CHK_STATUS_RETURN(m_renderInterface->AllocateHeaps(
916             m_stateHeapSettings));
917     }
918 
919     m_stateHeapInterface = m_renderInterface->m_stateHeapInterface;
920     MHW_CHK_NULL_RETURN(m_stateHeapInterface);
921 
922     if (m_osInterface->pfnIsGpuContextValid(m_osInterface, MOS_GPU_CONTEXT_RENDER) == MOS_STATUS_SUCCESS)
923     {
924         m_renderContext = MOS_GPU_CONTEXT_RENDER;
925     }
926     else
927     {
928         MOS_GPUCTX_CREATOPTIONS createOption;
929         MHW_CHK_STATUS_RETURN(m_osInterface->pfnCreateGpuContext(
930             m_osInterface,
931             MOS_GPU_CONTEXT_RENDER,
932             MOS_GPU_NODE_3D,
933             &createOption));
934 
935         m_renderContext = MOS_GPU_CONTEXT_RENDER;
936     }
937 
938     MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
939     nullHWAccelerationEnable.Value = 0;
940     m_disableDecodeSyncLock        = false;
941 #if (_DEBUG || _RELEASE_INTERNAL)
942     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
943     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
944     MOS_UserFeature_ReadValue_ID(
945         nullptr,
946         __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE_ID,
947         &userFeatureData);
948     nullHWAccelerationEnable.Value = userFeatureData.u32Data;
949 
950     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
951     MOS_UserFeature_ReadValue_ID(
952         nullptr,
953         __MEDIA_USER_FEATURE_VALUE_DECODE_LOCK_DISABLE_ID,
954         &userFeatureData);
955     m_disableDecodeSyncLock = userFeatureData.u32Data ? true : false;
956 #endif  // _DEBUG || _RELEASE_INTERNAL
957 
958     m_disableLockForTranscode =
959         MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaDisableLockForTranscodePerf);
960 
961     MHW_CHK_STATUS_RETURN(SetKernelStateParams());
962 
963     m_renderContextUsesNullHw =
964         ((m_renderContext == MOS_GPU_CONTEXT_RENDER) ? nullHWAccelerationEnable.CtxRender : nullHWAccelerationEnable.CtxRender2) ||
965         nullHWAccelerationEnable.Mmc;
966 
967     MOS_ALLOC_GFXRES_PARAMS allocParams;
968     MOS_ZeroMemory(&allocParams, sizeof(allocParams));
969     allocParams.Type = MOS_GFXRES_BUFFER;
970     allocParams.TileType = MOS_TILE_LINEAR;
971     allocParams.Format = Format_Buffer;
972     allocParams.dwBytes = MHW_CACHELINE_SIZE;
973     allocParams.pBufName = "CmdBufIdGlobal";
974     MHW_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
975         m_osInterface,
976         &allocParams,
977         &m_resCmdBufIdGlobal));
978     m_currCmdBufId = MemoryBlock::m_invalidTrackerId + 1;
979 
980     MOS_LOCK_PARAMS lockParams;
981     MOS_ZeroMemory(&lockParams, sizeof(lockParams));
982     lockParams.WriteOnly = 1;
983     m_cmdBufIdGlobal = (uint32_t *)m_osInterface->pfnLockResource(
984         m_osInterface,
985         &m_resCmdBufIdGlobal,
986         &lockParams);
987     MHW_CHK_NULL_RETURN(m_cmdBufIdGlobal);
988     MOS_ZeroMemory(m_cmdBufIdGlobal, allocParams.dwBytes);
989 
990     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetCmdBufStatusPtr(
991         m_stateHeapInterface,
992         m_cmdBufIdGlobal));
993 
994     return eStatus;
995 }
996