1 /*
2 * Copyright (c) 2017-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_csc_ds_g11.cpp
24 //! \brief    This file implements the Csc+Ds feature for all codecs on Gen11 platform
25 //!
26 
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_sfc_g11.h"
29 #include "codechal_encode_csc_ds_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS)
33 #include "igcodeckrn_g11.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g11.h"
37 #endif
38 
GetBTCount() const39 uint8_t CodechalEncodeCscDsG11::GetBTCount() const
40 {
41     return (uint8_t)cscNumSurfaces;
42 }
43 
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG11::AllocateSurfaceCsc()
45 {
46     CODECHAL_ENCODE_FUNCTION_ENTER;
47 
48     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49 
50     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51 
52     // allocate the MbStats surface
53     if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
54     {
55         MOS_ALLOC_GFXRES_PARAMS    allocParamsForBufferLinear;
56         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
57         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
58         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
59         allocParamsForBufferLinear.Format = Format_Buffer;
60         uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
61         uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
62         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
63             MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
64         allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
65 
66         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
67             m_osInterface,
68             &allocParamsForBufferLinear,
69             &m_resMbStatsBuffer), "Failed to allocate  MB Statistics Buffer.");
70     }
71 
72     return eStatus;
73 }
74 
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)75 MOS_STATUS CodechalEncodeCscDsG11::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
76 {
77     CODECHAL_ENCODE_FUNCTION_ENTER;
78 
79     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
80 
81     // check input color format, and set target traverse thread space size
82     switch (format)
83     {
84     case Format_NV12:
85         m_colorRawSurface = cscColorNv12Linear;
86         m_cscRequireColor = 1;
87         break;
88     case Format_YUY2:
89     case Format_YUYV:
90         m_colorRawSurface = cscColorYUY2;
91         m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
92         m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
93         break;
94     case Format_A8R8G8B8:
95         m_colorRawSurface = cscColorARGB;
96         m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
97         m_cscRequireColor = 1;
98         //Use EU for better performance in big resolution cases
99         if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
100         {
101             m_cscUsingSfc = 0;
102         }
103         break;
104     case Format_A8B8G8R8:
105         m_colorRawSurface = cscColorABGR;
106         m_cscRequireColor = 1;
107         m_cscUsingSfc     = IsSfcEnabled() ? 1 : 0;
108         // Use EU for better performance in big resolution cases or TU1
109         if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
110         {
111             m_cscUsingSfc = 0;
112         }
113         break;
114     case Format_P010:
115         m_colorRawSurface = cscColorP010;
116         m_cscRequireConvTo8bPlanar = 1;
117         break;
118     case Format_Y210:
119         m_colorRawSurface = cscColorY210;
120         if (m_encoder->m_vdencEnabled)
121         {
122             CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear or Tile X not yet supported!");
123             eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
124         }
125         else
126         {
127             m_cscRequireConvTo8bPlanar = 1;
128         }
129         break;
130     case Format_P210:
131         // not supported yet so fall-thru to default
132         m_colorRawSurface = cscColorP210;
133         m_cscRequireConvTo8bPlanar = 1;
134     default:
135         CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
136         eStatus = MOS_STATUS_INVALID_PARAMETER;
137         break;
138     }
139 
140     return eStatus;
141 }
142 
InitKernelStateCsc()143 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateCsc()
144 {
145     CODECHAL_ENCODE_FUNCTION_ENTER;
146 
147     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
148 
149     CODECHAL_KERNEL_HEADER currKrnHeader;
150     auto kernelSize = m_combinedKernelSize;
151     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
152         m_kernelBase,
153         ENC_SCALING_CONVERSION,
154         0,
155         &currKrnHeader,
156         &kernelSize));
157 
158     m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
159     m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
160     m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
161     m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
162     m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
163     m_cscKernelState->KernelParams.iIdCount = 1;
164     m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
165     m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
166     m_cscKernelState->KernelParams.pBinary =
167         m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
168     m_cscKernelState->KernelParams.iSize = kernelSize;
169 
170     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
171         m_cscKernelState->KernelParams.iBTCount,
172         &m_cscKernelState->dwSshSize,
173         &m_cscKernelState->dwBindingTableSize));
174 
175     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
176     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
177 
178     return eStatus;
179 }
180 
SetKernelParamsCsc(KernelParams * params)181 MOS_STATUS CodechalEncodeCscDsG11::SetKernelParamsCsc(KernelParams* params)
182 {
183     CODECHAL_ENCODE_FUNCTION_ENTER;
184 
185     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
186 
187     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
188 
189     m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
190 
191     auto inputFrameWidth = m_encoder->m_frameWidth;
192     auto inputFrameHeight = m_encoder->m_frameHeight;
193     auto inputSurface = m_rawSurfaceToEnc;
194     auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
195     auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
196     auto mbStatsSurface = &m_resMbStatsBuffer;
197 
198     m_curbeParams.bHevcEncHistorySum = false;
199     m_surfaceParamsCsc.hevcExtParams = nullptr;
200 
201     if (dsDisabled == params->stageDsConversion)
202     {
203         m_curbeParams.bConvertFlag = m_cscFlag != 0;
204 
205         if (m_2xScalingEnabled && m_scalingEnabled)
206         {
207             m_curbeParams.downscaleStage = dsStage2x4x;
208             m_currRefList->b4xScalingUsed =
209             m_currRefList->b2xScalingUsed = true;
210             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
211             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
212         }
213         else if (m_2xScalingEnabled)
214         {
215             m_curbeParams.downscaleStage = dsStage2x;
216             m_currRefList->b2xScalingUsed = true;
217             output4xDsSurface = nullptr;
218             mbStatsSurface = nullptr;
219             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
220             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
221         }
222         else if (m_scalingEnabled)
223         {
224             m_curbeParams.downscaleStage = dsStage4x;
225             m_currRefList->b4xScalingUsed = true;
226             output2xDsSurface = nullptr;
227             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
228             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
229         }
230         else
231         {
232             // do CSC only
233             m_curbeParams.downscaleStage = dsDisabled;
234             output4xDsSurface = nullptr;
235             output2xDsSurface = nullptr;
236             mbStatsSurface = nullptr;
237             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
238             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
239         }
240 
241         // history sum to be enabled only for the 4x stage
242         if (params->hevcExtParams)
243         {
244             auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
245             m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
246             m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
247             m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
248         }
249     }
250     else
251     {
252         // do 16x/32x downscaling
253         inputFrameWidth = m_encoder->m_downscaledWidth4x;
254         inputFrameHeight = m_encoder->m_downscaledHeight4x;
255         m_curbeParams.bConvertFlag = false;
256         mbStatsSurface = nullptr;
257 
258         if (dsStage16x == params->stageDsConversion)
259         {
260             m_currRefList->b16xScalingUsed = true;
261             m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
262             m_curbeParams.downscaleStage = dsStage16x;
263             inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
264             inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
265 
266             inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
267             output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
268             output2xDsSurface = nullptr;
269             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
270             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
271         }
272         else if (dsStage32x == params->stageDsConversion)
273         {
274             m_currRefList->b32xScalingUsed = true;
275             m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
276             m_curbeParams.downscaleStage = dsStage2x;
277             inputFrameWidth = m_encoder->m_downscaledWidth16x;
278             inputFrameHeight = m_encoder->m_downscaledHeight16x;
279             inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
280             output4xDsSurface = nullptr;
281             output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
282             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
283             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
284         }
285     }
286 
287     // setup Curbe
288     m_curbeParams.dwInputPictureWidth = inputFrameWidth;
289     m_curbeParams.dwInputPictureHeight = inputFrameHeight;
290 
291     // setup surface states
292     m_surfaceParamsCsc.psInputSurface = inputSurface;
293     m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
294     m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
295     m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
296     m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
297     m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
298 
299     if (dsStage16x == params->stageDsConversion)
300     {
301         // here to calculate the walkder resolution, we need to use the input surface resolution.
302         // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
303         inputFrameWidth = inputFrameWidth >> 2;
304         inputFrameHeight = inputFrameHeight >> 2;
305     }
306 
307     // setup walker param
308     m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
309     m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
310 
311     return eStatus;
312 }
313 
SetCurbeCsc()314 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeCsc()
315 {
316     CODECHAL_ENCODE_FUNCTION_ENTER;
317 
318     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
319 
320     CscKernelCurbeData curbe;
321 
322     curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
323     curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
324     curbe.DW0_RoundingEnable = 1;
325 
326     curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : m_colorRawSurface); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
327     curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
328     curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
329     curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
330     curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
331     curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
332     curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
333 
334     curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
335     curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
336 
337     // when the input surface is NV12 tiled format and not aligned with 4 bytes,
338     // need kernel to do the padding copy with force to linear format, it's
339     // transparent to kernel and hw can handle it
340     if (m_colorRawSurface == cscColorNv12TileY && m_cscFlag == 1)
341         curbe.DW1_PictureFormat = cscColorNv12Linear;
342 
343     // RGB->YUV CSC coefficients
344     if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
345     {
346         curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
347         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
348         curbe.DW6_CSC_Coefficient_C4 = 0x004F;
349         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
350         curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
351         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
352         if (cscColorARGB == m_colorRawSurface)
353         {
354             curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
355             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
356             curbe.DW6_CSC_Coefficient_C5 = 0x0008;
357             curbe.DW7_CSC_Coefficient_C6 = 0x0017;
358             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
359             curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
360         }
361         else // cscColorABGR == m_colorRawSurface
362         {
363             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
364             curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
365             curbe.DW6_CSC_Coefficient_C5 = 0x0017;
366             curbe.DW7_CSC_Coefficient_C6 = 0x0008;
367             curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
368             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
369         }
370     }
371     else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
372     {
373         curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
374         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
375         curbe.DW6_CSC_Coefficient_C4 = 0x0041;
376         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
377         curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
378         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
379         if (cscColorARGB == m_colorRawSurface)
380         {
381             curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
382             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
383             curbe.DW6_CSC_Coefficient_C5 = 0x000D;
384             curbe.DW7_CSC_Coefficient_C6 = 0x0021;
385             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
386             curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
387         }
388         else // cscColorABGR == m_colorRawSurface
389         {
390             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
391             curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
392             curbe.DW6_CSC_Coefficient_C5 = 0x0021;
393             curbe.DW7_CSC_Coefficient_C6 = 0x000D;
394             curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
395             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
396         }
397     }
398     else
399     {
400         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
401         return MOS_STATUS_INVALID_PARAMETER;
402     }
403 
404     curbe.DW10_BTI_InputSurface = cscSrcYPlane;
405     curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
406     curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
407     curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
408     curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
409     curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
410     curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
411     curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
412 
413     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
414         &curbe,
415         m_cscKernelState->dwCurbeOffset,
416         sizeof(curbe)));
417 
418     return eStatus;
419 }
420 
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)421 MOS_STATUS CodechalEncodeCscDsG11::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
422 {
423     CODECHAL_ENCODE_FUNCTION_ENTER;
424 
425     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
426 
427     // PAK input surface (could be 10-bit)
428     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
429     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
430     surfaceParams.bIs2DSurface = true;
431     surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
432         cscColorP010 == m_colorRawSurface ||
433         cscColorP210 == m_colorRawSurface ||
434         cscColorNv12Linear == m_colorRawSurface);
435     surfaceParams.bMediaBlockRW = true;
436 
437     // Configure to R16/32 for input surface
438     if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
439     {
440         // 32x scaling requires R16_UNROM
441         surfaceParams.bUse16UnormSurfaceFormat = true;
442     }
443     else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
444     {
445         surfaceParams.bUse32UnormSurfaceFormat = true;
446     }
447     else
448     {
449         /*
450         * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
451         * bUseCommonKernel        | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
452         *            1            |       1      |  0/1   |        R8
453         *            1            |       0      |  0/1   |        R16
454         *            0            |       1      |  0/1   |        R8
455         *            0            |       0      |   1    |        R8
456         *            0            |       0      |   0    |        R32
457         */
458         surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
459                                                    cscColorNv12Linear == m_colorRawSurface);
460     }
461 
462     // when input surface is NV12 tiled and not aligned by 4 bytes, need kernel to do the
463     // padding copy by forcing to linear format and set the HeightInUse as Linear format
464     // kernel will use this info to calucate UV offset
465     surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
466     if (cscColorNv12Linear == m_colorRawSurface ||
467         (cscColorNv12TileY == m_colorRawSurface && m_cscFlag == 1))
468     {
469         surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
470     }
471     surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
472         MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
473         (codechalL3 | codechalLLC));
474 
475     surfaceParams.dwBindingTableOffset = cscSrcYPlane;
476     surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
477     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
478         m_hwInterface,
479         cmdBuffer,
480         &surfaceParams,
481         m_cscKernelState));
482 
483     // Converted NV12 output surface, or ENC 8-bit output surface
484     if (m_surfaceParamsCsc.psOutputCopiedSurface)
485     {
486         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
487         surfaceParams.bIs2DSurface =
488         surfaceParams.bUseUVPlane =
489         surfaceParams.bMediaBlockRW =
490         surfaceParams.bIsWritable = true;
491         surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
492         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
493             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
494             codechalLLC);
495 
496         surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
497         surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
498         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
499             m_hwInterface,
500             cmdBuffer,
501             &surfaceParams,
502             m_cscKernelState));
503     }
504 
505     // 4x downscaled surface
506     if (m_surfaceParamsCsc.psOutput4xDsSurface)
507     {
508         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
509         surfaceParams.bIs2DSurface =
510         surfaceParams.bMediaBlockRW =
511         surfaceParams.bIsWritable = true;
512         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
513         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
514             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
515             codechalLLC);
516         surfaceParams.dwBindingTableOffset = cscDst4xDs;
517         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
518             m_hwInterface,
519             cmdBuffer,
520             &surfaceParams,
521             m_cscKernelState));
522     }
523 
524     // MB Stats surface
525     if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
526     {
527         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
528         surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize);
529         surfaceParams.bIsWritable = true;
530         surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
531         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
532             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
533             codechalLLC);
534         surfaceParams.dwBindingTableOffset = cscDstMbStats;
535         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
536             m_hwInterface,
537             cmdBuffer,
538             &surfaceParams,
539             m_cscKernelState));
540     }
541 
542     // 2x downscaled surface
543     if (m_surfaceParamsCsc.psOutput2xDsSurface)
544     {
545         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
546         surfaceParams.bIs2DSurface =
547         surfaceParams.bMediaBlockRW =
548         surfaceParams.bIsWritable = true;
549         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
550         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
551             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
552             codechalLLC);
553         surfaceParams.dwBindingTableOffset = cscDst2xDs;
554         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
555             m_hwInterface,
556             cmdBuffer,
557             &surfaceParams,
558             m_cscKernelState));
559     }
560 
561     if (m_surfaceParamsCsc.hevcExtParams)
562     {
563         auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
564 
565         // History buffer
566         if (hevcExtParams->presHistoryBuffer)
567         {
568             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
569             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistoryBuffer);
570             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
571             surfaceParams.bIsWritable = true;
572             surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
573             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
574                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
575                 codechalLLC);
576             surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
577             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
578                 m_hwInterface,
579                 cmdBuffer,
580                 &surfaceParams,
581                 m_cscKernelState));
582         }
583 
584         // History sum output buffer
585         if (hevcExtParams->presHistorySumBuffer)
586         {
587             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
588             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistorySumBuffer);
589             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
590             surfaceParams.bIsWritable = true;
591             surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
592             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
593                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
594                 codechalLLC);
595             surfaceParams.dwBindingTableOffset = cscDstHistSum;
596             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
597                 m_hwInterface,
598                 cmdBuffer,
599                 &surfaceParams,
600                 m_cscKernelState));
601         }
602 
603         // multi-thread task buffer
604         if (hevcExtParams->presMultiThreadTaskBuffer)
605         {
606             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
607             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeMultiThreadTaskBuffer);
608             surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
609             surfaceParams.bIsWritable = true;
610             surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
611             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
612                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
613                 codechalLLC);
614             surfaceParams.dwBindingTableOffset = cscDstMultiTask;
615             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
616                 m_hwInterface,
617                 cmdBuffer,
618                 &surfaceParams,
619                 m_cscKernelState));
620         }
621     }
622 
623     return eStatus;
624 }
625 
InitKernelStateDS()626 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateDS()
627 {
628     CODECHAL_ENCODE_FUNCTION_ENTER;
629 
630     m_dsBTCount[0] = ds4xNumSurfaces;
631     m_dsCurbeLength[0] =
632     m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
633     m_dsBTISrcY = ds4xSrcYPlane;
634     m_dsBTIDstY = ds4xDstYPlane;
635     m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
636     m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
637     m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
638     m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
639     m_dsBTIDstMbVProc = ds4xDstMbVProc;
640     m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
641     m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
642 
643     uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
644     m_dsKernelBase = m_kernelBase;
645     CODECHAL_KERNEL_HEADER currKrnHeader;
646     for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
647     {
648         kernelSize = m_combinedKernelSize;
649         m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
650 
651         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
652             m_dsKernelBase,
653             ENC_SCALING4X,
654             krnStateIdx,
655             &currKrnHeader,
656             &kernelSize))
657 
658         m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
659         m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
660         m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
661         m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
662         m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
663         m_dsKernelState->KernelParams.iIdCount = 1;
664         m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
665 
666         m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
667         m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
668         m_dsKernelState->KernelParams.iSize = kernelSize;
669         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
670             m_dsKernelState->KernelParams.iBTCount,
671             &m_dsKernelState->dwSshSize,
672             &m_dsKernelState->dwBindingTableSize));
673 
674         CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
675         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
676 
677         if (m_32xMeSupported)
678         {
679             m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
680 
681             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
682                 m_dsKernelBase,
683                 ENC_SCALING2X,
684                 krnStateIdx,
685                 &currKrnHeader,
686                 &kernelSize))
687 
688             m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
689             m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
690             m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
691             m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
692             m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
693 
694             m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
695             m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
696             m_dsKernelState->KernelParams.iSize = kernelSize;
697             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
698                 m_dsKernelState->KernelParams.iBTCount,
699                 &m_dsKernelState->dwSshSize,
700                 &m_dsKernelState->dwBindingTableSize));
701 
702             CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
703             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
704         }
705 
706         if (m_encoder->m_interlacedFieldDisabled)
707         {
708             m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
709 
710             if (m_32xMeSupported)
711             {
712                 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
713             }
714         }
715     }
716 
717     return MOS_STATUS_SUCCESS;
718 }
719 
SetCurbeDS4x()720 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeDS4x()
721 {
722     CODECHAL_ENCODE_FUNCTION_ENTER;
723 
724     if (CODECHAL_AVC != m_standard)
725     {
726         return CodechalEncodeCscDs::SetCurbeDS4x();
727     }
728 
729     Ds4xKernelCurbeData curbe;
730 
731     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
732     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
733 
734     curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
735     curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
736 
737     if (m_curbeParams.bFieldPicture)
738     {
739         curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
740         curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
741     }
742 
743     if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
744     {
745         curbe.DW5_FlatnessThreshold = 128;
746     }
747 
748     // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
749     curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
750     curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
751     curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
752 
753     if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
754     {
755         curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
756 
757         if (m_curbeParams.bFieldPicture)
758         {
759             curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
760         }
761     }
762 
763     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
764         &curbe,
765         m_dsKernelState->dwCurbeOffset,
766         sizeof(curbe)));
767 
768     CODECHAL_DEBUG_TOOL(
769         if (m_encoder->m_encodeParState)
770         {
771             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
772         }
773     )
774 
775     return MOS_STATUS_SUCCESS;
776 }
777 
InitSfcState()778 MOS_STATUS CodechalEncodeCscDsG11::InitSfcState()
779 {
780     CODECHAL_ENCODE_FUNCTION_ENTER;
781 
782     if (!m_sfcState)
783     {
784         m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG11);
785         CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
786 
787         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
788 
789         m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
790     }
791     return MOS_STATUS_SUCCESS;
792 }
793 
794 
CodechalEncodeCscDsG11(CodechalEncoderState * encoder)795 CodechalEncodeCscDsG11::CodechalEncodeCscDsG11(CodechalEncoderState* encoder)
796     : CodechalEncodeCscDs(encoder)
797 {
798     m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
799     m_cscCurbeLength = sizeof(CscKernelCurbeData);
800 #if defined(ENABLE_KERNELS)
801     m_kernelBase = (uint8_t*)IGCODECKRN_G11;
802 #endif
803 }
804 
~CodechalEncodeCscDsG11()805 CodechalEncodeCscDsG11::~CodechalEncodeCscDsG11()
806 {
807     // free the MbStats surface
808     m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
809 }
810