1 /*
2 * Copyright (c) 2017-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_csc_ds_g11.cpp
24 //! \brief This file implements the Csc+Ds feature for all codecs on Gen11 platform
25 //!
26
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_sfc_g11.h"
29 #include "codechal_encode_csc_ds_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS)
33 #include "igcodeckrn_g11.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g11.h"
37 #endif
38
GetBTCount() const39 uint8_t CodechalEncodeCscDsG11::GetBTCount() const
40 {
41 return (uint8_t)cscNumSurfaces;
42 }
43
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG11::AllocateSurfaceCsc()
45 {
46 CODECHAL_ENCODE_FUNCTION_ENTER;
47
48 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49
50 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51
52 // allocate the MbStats surface
53 if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
54 {
55 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
56 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
57 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
58 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
59 allocParamsForBufferLinear.Format = Format_Buffer;
60 uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
61 uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
62 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
63 MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
64 allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
65
66 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
67 m_osInterface,
68 &allocParamsForBufferLinear,
69 &m_resMbStatsBuffer), "Failed to allocate MB Statistics Buffer.");
70 }
71
72 return eStatus;
73 }
74
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)75 MOS_STATUS CodechalEncodeCscDsG11::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
76 {
77 CODECHAL_ENCODE_FUNCTION_ENTER;
78
79 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
80
81 // check input color format, and set target traverse thread space size
82 switch (format)
83 {
84 case Format_NV12:
85 m_colorRawSurface = cscColorNv12Linear;
86 m_cscRequireColor = 1;
87 break;
88 case Format_YUY2:
89 case Format_YUYV:
90 m_colorRawSurface = cscColorYUY2;
91 m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
92 m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
93 break;
94 case Format_A8R8G8B8:
95 m_colorRawSurface = cscColorARGB;
96 m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
97 m_cscRequireColor = 1;
98 //Use EU for better performance in big resolution cases
99 if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
100 {
101 m_cscUsingSfc = 0;
102 }
103 break;
104 case Format_A8B8G8R8:
105 m_colorRawSurface = cscColorABGR;
106 m_cscRequireColor = 1;
107 m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
108 // Use EU for better performance in big resolution cases or TU1
109 if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
110 {
111 m_cscUsingSfc = 0;
112 }
113 break;
114 case Format_P010:
115 m_colorRawSurface = cscColorP010;
116 m_cscRequireConvTo8bPlanar = 1;
117 break;
118 case Format_Y210:
119 m_colorRawSurface = cscColorY210;
120 if (m_encoder->m_vdencEnabled)
121 {
122 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear or Tile X not yet supported!");
123 eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
124 }
125 else
126 {
127 m_cscRequireConvTo8bPlanar = 1;
128 }
129 break;
130 case Format_P210:
131 // not supported yet so fall-thru to default
132 m_colorRawSurface = cscColorP210;
133 m_cscRequireConvTo8bPlanar = 1;
134 default:
135 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
136 eStatus = MOS_STATUS_INVALID_PARAMETER;
137 break;
138 }
139
140 return eStatus;
141 }
142
InitKernelStateCsc()143 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateCsc()
144 {
145 CODECHAL_ENCODE_FUNCTION_ENTER;
146
147 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
148
149 CODECHAL_KERNEL_HEADER currKrnHeader;
150 auto kernelSize = m_combinedKernelSize;
151 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
152 m_kernelBase,
153 ENC_SCALING_CONVERSION,
154 0,
155 &currKrnHeader,
156 &kernelSize));
157
158 m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
159 m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
160 m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
161 m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
162 m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
163 m_cscKernelState->KernelParams.iIdCount = 1;
164 m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
165 m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
166 m_cscKernelState->KernelParams.pBinary =
167 m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
168 m_cscKernelState->KernelParams.iSize = kernelSize;
169
170 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
171 m_cscKernelState->KernelParams.iBTCount,
172 &m_cscKernelState->dwSshSize,
173 &m_cscKernelState->dwBindingTableSize));
174
175 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
176 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
177
178 return eStatus;
179 }
180
SetKernelParamsCsc(KernelParams * params)181 MOS_STATUS CodechalEncodeCscDsG11::SetKernelParamsCsc(KernelParams* params)
182 {
183 CODECHAL_ENCODE_FUNCTION_ENTER;
184
185 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
186
187 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
188
189 m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
190
191 auto inputFrameWidth = m_encoder->m_frameWidth;
192 auto inputFrameHeight = m_encoder->m_frameHeight;
193 auto inputSurface = m_rawSurfaceToEnc;
194 auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
195 auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
196 auto mbStatsSurface = &m_resMbStatsBuffer;
197
198 m_curbeParams.bHevcEncHistorySum = false;
199 m_surfaceParamsCsc.hevcExtParams = nullptr;
200
201 if (dsDisabled == params->stageDsConversion)
202 {
203 m_curbeParams.bConvertFlag = m_cscFlag != 0;
204
205 if (m_2xScalingEnabled && m_scalingEnabled)
206 {
207 m_curbeParams.downscaleStage = dsStage2x4x;
208 m_currRefList->b4xScalingUsed =
209 m_currRefList->b2xScalingUsed = true;
210 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
211 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
212 }
213 else if (m_2xScalingEnabled)
214 {
215 m_curbeParams.downscaleStage = dsStage2x;
216 m_currRefList->b2xScalingUsed = true;
217 output4xDsSurface = nullptr;
218 mbStatsSurface = nullptr;
219 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
220 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
221 }
222 else if (m_scalingEnabled)
223 {
224 m_curbeParams.downscaleStage = dsStage4x;
225 m_currRefList->b4xScalingUsed = true;
226 output2xDsSurface = nullptr;
227 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
228 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
229 }
230 else
231 {
232 // do CSC only
233 m_curbeParams.downscaleStage = dsDisabled;
234 output4xDsSurface = nullptr;
235 output2xDsSurface = nullptr;
236 mbStatsSurface = nullptr;
237 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
238 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
239 }
240
241 // history sum to be enabled only for the 4x stage
242 if (params->hevcExtParams)
243 {
244 auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
245 m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
246 m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
247 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
248 }
249 }
250 else
251 {
252 // do 16x/32x downscaling
253 inputFrameWidth = m_encoder->m_downscaledWidth4x;
254 inputFrameHeight = m_encoder->m_downscaledHeight4x;
255 m_curbeParams.bConvertFlag = false;
256 mbStatsSurface = nullptr;
257
258 if (dsStage16x == params->stageDsConversion)
259 {
260 m_currRefList->b16xScalingUsed = true;
261 m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
262 m_curbeParams.downscaleStage = dsStage16x;
263 inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
264 inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
265
266 inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
267 output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
268 output2xDsSurface = nullptr;
269 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
270 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
271 }
272 else if (dsStage32x == params->stageDsConversion)
273 {
274 m_currRefList->b32xScalingUsed = true;
275 m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
276 m_curbeParams.downscaleStage = dsStage2x;
277 inputFrameWidth = m_encoder->m_downscaledWidth16x;
278 inputFrameHeight = m_encoder->m_downscaledHeight16x;
279 inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
280 output4xDsSurface = nullptr;
281 output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
282 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
283 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
284 }
285 }
286
287 // setup Curbe
288 m_curbeParams.dwInputPictureWidth = inputFrameWidth;
289 m_curbeParams.dwInputPictureHeight = inputFrameHeight;
290
291 // setup surface states
292 m_surfaceParamsCsc.psInputSurface = inputSurface;
293 m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
294 m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
295 m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
296 m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
297 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
298
299 if (dsStage16x == params->stageDsConversion)
300 {
301 // here to calculate the walkder resolution, we need to use the input surface resolution.
302 // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
303 inputFrameWidth = inputFrameWidth >> 2;
304 inputFrameHeight = inputFrameHeight >> 2;
305 }
306
307 // setup walker param
308 m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
309 m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
310
311 return eStatus;
312 }
313
SetCurbeCsc()314 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeCsc()
315 {
316 CODECHAL_ENCODE_FUNCTION_ENTER;
317
318 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
319
320 CscKernelCurbeData curbe;
321
322 curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
323 curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
324 curbe.DW0_RoundingEnable = 1;
325
326 curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : m_colorRawSurface); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
327 curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
328 curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
329 curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
330 curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
331 curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
332 curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
333
334 curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
335 curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
336
337 // when the input surface is NV12 tiled format and not aligned with 4 bytes,
338 // need kernel to do the padding copy with force to linear format, it's
339 // transparent to kernel and hw can handle it
340 if (m_colorRawSurface == cscColorNv12TileY && m_cscFlag == 1)
341 curbe.DW1_PictureFormat = cscColorNv12Linear;
342
343 // RGB->YUV CSC coefficients
344 if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
345 {
346 curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
347 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
348 curbe.DW6_CSC_Coefficient_C4 = 0x004F;
349 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
350 curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
351 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
352 if (cscColorARGB == m_colorRawSurface)
353 {
354 curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
355 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
356 curbe.DW6_CSC_Coefficient_C5 = 0x0008;
357 curbe.DW7_CSC_Coefficient_C6 = 0x0017;
358 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
359 curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
360 }
361 else // cscColorABGR == m_colorRawSurface
362 {
363 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
364 curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
365 curbe.DW6_CSC_Coefficient_C5 = 0x0017;
366 curbe.DW7_CSC_Coefficient_C6 = 0x0008;
367 curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
368 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
369 }
370 }
371 else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
372 {
373 curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
374 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
375 curbe.DW6_CSC_Coefficient_C4 = 0x0041;
376 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
377 curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
378 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
379 if (cscColorARGB == m_colorRawSurface)
380 {
381 curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
382 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
383 curbe.DW6_CSC_Coefficient_C5 = 0x000D;
384 curbe.DW7_CSC_Coefficient_C6 = 0x0021;
385 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
386 curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
387 }
388 else // cscColorABGR == m_colorRawSurface
389 {
390 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
391 curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
392 curbe.DW6_CSC_Coefficient_C5 = 0x0021;
393 curbe.DW7_CSC_Coefficient_C6 = 0x000D;
394 curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
395 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
396 }
397 }
398 else
399 {
400 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
401 return MOS_STATUS_INVALID_PARAMETER;
402 }
403
404 curbe.DW10_BTI_InputSurface = cscSrcYPlane;
405 curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
406 curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
407 curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
408 curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
409 curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
410 curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
411 curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
412
413 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
414 &curbe,
415 m_cscKernelState->dwCurbeOffset,
416 sizeof(curbe)));
417
418 return eStatus;
419 }
420
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)421 MOS_STATUS CodechalEncodeCscDsG11::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
422 {
423 CODECHAL_ENCODE_FUNCTION_ENTER;
424
425 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
426
427 // PAK input surface (could be 10-bit)
428 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
429 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
430 surfaceParams.bIs2DSurface = true;
431 surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
432 cscColorP010 == m_colorRawSurface ||
433 cscColorP210 == m_colorRawSurface ||
434 cscColorNv12Linear == m_colorRawSurface);
435 surfaceParams.bMediaBlockRW = true;
436
437 // Configure to R16/32 for input surface
438 if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
439 {
440 // 32x scaling requires R16_UNROM
441 surfaceParams.bUse16UnormSurfaceFormat = true;
442 }
443 else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
444 {
445 surfaceParams.bUse32UnormSurfaceFormat = true;
446 }
447 else
448 {
449 /*
450 * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
451 * bUseCommonKernel | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
452 * 1 | 1 | 0/1 | R8
453 * 1 | 0 | 0/1 | R16
454 * 0 | 1 | 0/1 | R8
455 * 0 | 0 | 1 | R8
456 * 0 | 0 | 0 | R32
457 */
458 surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
459 cscColorNv12Linear == m_colorRawSurface);
460 }
461
462 // when input surface is NV12 tiled and not aligned by 4 bytes, need kernel to do the
463 // padding copy by forcing to linear format and set the HeightInUse as Linear format
464 // kernel will use this info to calucate UV offset
465 surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
466 if (cscColorNv12Linear == m_colorRawSurface ||
467 (cscColorNv12TileY == m_colorRawSurface && m_cscFlag == 1))
468 {
469 surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
470 }
471 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
472 MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
473 (codechalL3 | codechalLLC));
474
475 surfaceParams.dwBindingTableOffset = cscSrcYPlane;
476 surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
477 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
478 m_hwInterface,
479 cmdBuffer,
480 &surfaceParams,
481 m_cscKernelState));
482
483 // Converted NV12 output surface, or ENC 8-bit output surface
484 if (m_surfaceParamsCsc.psOutputCopiedSurface)
485 {
486 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
487 surfaceParams.bIs2DSurface =
488 surfaceParams.bUseUVPlane =
489 surfaceParams.bMediaBlockRW =
490 surfaceParams.bIsWritable = true;
491 surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
492 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
493 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
494 codechalLLC);
495
496 surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
497 surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
498 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
499 m_hwInterface,
500 cmdBuffer,
501 &surfaceParams,
502 m_cscKernelState));
503 }
504
505 // 4x downscaled surface
506 if (m_surfaceParamsCsc.psOutput4xDsSurface)
507 {
508 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
509 surfaceParams.bIs2DSurface =
510 surfaceParams.bMediaBlockRW =
511 surfaceParams.bIsWritable = true;
512 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
513 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
514 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
515 codechalLLC);
516 surfaceParams.dwBindingTableOffset = cscDst4xDs;
517 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
518 m_hwInterface,
519 cmdBuffer,
520 &surfaceParams,
521 m_cscKernelState));
522 }
523
524 // MB Stats surface
525 if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
526 {
527 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
528 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize);
529 surfaceParams.bIsWritable = true;
530 surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
531 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
532 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
533 codechalLLC);
534 surfaceParams.dwBindingTableOffset = cscDstMbStats;
535 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
536 m_hwInterface,
537 cmdBuffer,
538 &surfaceParams,
539 m_cscKernelState));
540 }
541
542 // 2x downscaled surface
543 if (m_surfaceParamsCsc.psOutput2xDsSurface)
544 {
545 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
546 surfaceParams.bIs2DSurface =
547 surfaceParams.bMediaBlockRW =
548 surfaceParams.bIsWritable = true;
549 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
550 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
551 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
552 codechalLLC);
553 surfaceParams.dwBindingTableOffset = cscDst2xDs;
554 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
555 m_hwInterface,
556 cmdBuffer,
557 &surfaceParams,
558 m_cscKernelState));
559 }
560
561 if (m_surfaceParamsCsc.hevcExtParams)
562 {
563 auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
564
565 // History buffer
566 if (hevcExtParams->presHistoryBuffer)
567 {
568 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
569 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistoryBuffer);
570 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
571 surfaceParams.bIsWritable = true;
572 surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
573 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
574 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
575 codechalLLC);
576 surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
577 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
578 m_hwInterface,
579 cmdBuffer,
580 &surfaceParams,
581 m_cscKernelState));
582 }
583
584 // History sum output buffer
585 if (hevcExtParams->presHistorySumBuffer)
586 {
587 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
588 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistorySumBuffer);
589 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
590 surfaceParams.bIsWritable = true;
591 surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
592 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
593 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
594 codechalLLC);
595 surfaceParams.dwBindingTableOffset = cscDstHistSum;
596 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
597 m_hwInterface,
598 cmdBuffer,
599 &surfaceParams,
600 m_cscKernelState));
601 }
602
603 // multi-thread task buffer
604 if (hevcExtParams->presMultiThreadTaskBuffer)
605 {
606 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
607 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeMultiThreadTaskBuffer);
608 surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
609 surfaceParams.bIsWritable = true;
610 surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
611 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
612 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
613 codechalLLC);
614 surfaceParams.dwBindingTableOffset = cscDstMultiTask;
615 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
616 m_hwInterface,
617 cmdBuffer,
618 &surfaceParams,
619 m_cscKernelState));
620 }
621 }
622
623 return eStatus;
624 }
625
InitKernelStateDS()626 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateDS()
627 {
628 CODECHAL_ENCODE_FUNCTION_ENTER;
629
630 m_dsBTCount[0] = ds4xNumSurfaces;
631 m_dsCurbeLength[0] =
632 m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
633 m_dsBTISrcY = ds4xSrcYPlane;
634 m_dsBTIDstY = ds4xDstYPlane;
635 m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
636 m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
637 m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
638 m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
639 m_dsBTIDstMbVProc = ds4xDstMbVProc;
640 m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
641 m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
642
643 uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
644 m_dsKernelBase = m_kernelBase;
645 CODECHAL_KERNEL_HEADER currKrnHeader;
646 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
647 {
648 kernelSize = m_combinedKernelSize;
649 m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
650
651 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
652 m_dsKernelBase,
653 ENC_SCALING4X,
654 krnStateIdx,
655 &currKrnHeader,
656 &kernelSize))
657
658 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
659 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
660 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
661 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
662 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
663 m_dsKernelState->KernelParams.iIdCount = 1;
664 m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
665
666 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
667 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
668 m_dsKernelState->KernelParams.iSize = kernelSize;
669 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
670 m_dsKernelState->KernelParams.iBTCount,
671 &m_dsKernelState->dwSshSize,
672 &m_dsKernelState->dwBindingTableSize));
673
674 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
675 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
676
677 if (m_32xMeSupported)
678 {
679 m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
680
681 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
682 m_dsKernelBase,
683 ENC_SCALING2X,
684 krnStateIdx,
685 &currKrnHeader,
686 &kernelSize))
687
688 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
689 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
690 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
691 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
692 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
693
694 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
695 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
696 m_dsKernelState->KernelParams.iSize = kernelSize;
697 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
698 m_dsKernelState->KernelParams.iBTCount,
699 &m_dsKernelState->dwSshSize,
700 &m_dsKernelState->dwBindingTableSize));
701
702 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
703 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
704 }
705
706 if (m_encoder->m_interlacedFieldDisabled)
707 {
708 m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
709
710 if (m_32xMeSupported)
711 {
712 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
713 }
714 }
715 }
716
717 return MOS_STATUS_SUCCESS;
718 }
719
SetCurbeDS4x()720 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeDS4x()
721 {
722 CODECHAL_ENCODE_FUNCTION_ENTER;
723
724 if (CODECHAL_AVC != m_standard)
725 {
726 return CodechalEncodeCscDs::SetCurbeDS4x();
727 }
728
729 Ds4xKernelCurbeData curbe;
730
731 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
732 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
733
734 curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
735 curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
736
737 if (m_curbeParams.bFieldPicture)
738 {
739 curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
740 curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
741 }
742
743 if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
744 {
745 curbe.DW5_FlatnessThreshold = 128;
746 }
747
748 // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
749 curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
750 curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
751 curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
752
753 if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
754 {
755 curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
756
757 if (m_curbeParams.bFieldPicture)
758 {
759 curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
760 }
761 }
762
763 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
764 &curbe,
765 m_dsKernelState->dwCurbeOffset,
766 sizeof(curbe)));
767
768 CODECHAL_DEBUG_TOOL(
769 if (m_encoder->m_encodeParState)
770 {
771 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
772 }
773 )
774
775 return MOS_STATUS_SUCCESS;
776 }
777
InitSfcState()778 MOS_STATUS CodechalEncodeCscDsG11::InitSfcState()
779 {
780 CODECHAL_ENCODE_FUNCTION_ENTER;
781
782 if (!m_sfcState)
783 {
784 m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG11);
785 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
786
787 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
788
789 m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
790 }
791 return MOS_STATUS_SUCCESS;
792 }
793
794
CodechalEncodeCscDsG11(CodechalEncoderState * encoder)795 CodechalEncodeCscDsG11::CodechalEncodeCscDsG11(CodechalEncoderState* encoder)
796 : CodechalEncodeCscDs(encoder)
797 {
798 m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
799 m_cscCurbeLength = sizeof(CscKernelCurbeData);
800 #if defined(ENABLE_KERNELS)
801 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
802 #endif
803 }
804
~CodechalEncodeCscDsG11()805 CodechalEncodeCscDsG11::~CodechalEncodeCscDsG11()
806 {
807 // free the MbStats surface
808 m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
809 }
810