1 /*
2 * Copyright (c) 2017-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encoder_base.cpp
24 //! \brief Implements the encode interface for CodecHal.
25 //! \details The encode interface is further sub-divided by standard, this file is for the base interface which is shared by all encode standards.
26 //!
27
28 #include "codechal_encoder_base.h"
29 #include "codechal_encode_tracked_buffer_hevc.h"
30 #include "mos_solo_generic.h"
31 #include "hal_oca_interface.h"
32 #include "codechal_encode_csc_ds.h"
33
PrepareNodes(MOS_GPU_NODE & videoGpuNode,bool & setVideoNode)34 void CodechalEncoderState::PrepareNodes(
35 MOS_GPU_NODE& videoGpuNode,
36 bool& setVideoNode)
37 {
38 if (MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface))
39 return;
40
41 if (m_vdboxOneDefaultUsed)
42 {
43 setVideoNode = true;
44 videoGpuNode = MOS_GPU_NODE_VIDEO;
45 }
46 else if (m_needCheckCpEnabled)
47 {
48 if (m_osInterface->osCpInterface->IsCpEnabled() ||
49 m_vdencEnabled)
50 {
51 setVideoNode = true;
52 videoGpuNode = MOS_GPU_NODE_VIDEO;
53 }
54 }
55 }
56
SetGpuCtxCreatOption()57 MOS_STATUS CodechalEncoderState::SetGpuCtxCreatOption()
58 {
59 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
60
61 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS);
62 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
63
64 return eStatus;
65 }
66
CreateGpuContexts()67 MOS_STATUS CodechalEncoderState::CreateGpuContexts()
68 {
69 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
70
71 if (CodecHalUsesVideoEngine(m_codecFunction))
72 {
73 MOS_GPU_NODE videoGpuNode = MOS_GPU_NODE_VIDEO;
74 bool setVideoNode = false;
75
76 // Create Video Context
77 if (MEDIA_IS_SKU(m_skuTable, FtrVcs2) ||
78 (MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface) && m_numVdbox > 1)) // Eventually move this functionality to Mhw
79 {
80 setVideoNode = false;
81
82 PrepareNodes(videoGpuNode, setVideoNode);
83
84 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateVideoNodeAssociation(
85 m_osInterface,
86 setVideoNode,
87 &videoGpuNode));
88 m_videoNodeAssociationCreated = true;
89 }
90 m_videoGpuNode = videoGpuNode;
91
92 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetGpuCtxCreatOption());
93 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
94
95 MOS_GPU_CONTEXT gpuContext = (videoGpuNode == MOS_GPU_NODE_VIDEO2) && !MOS_VE_MULTINODESCALING_SUPPORTED(m_osInterface) ? MOS_GPU_CONTEXT_VDBOX2_VIDEO3 : MOS_GPU_CONTEXT_VIDEO3;
96
97 eStatus = (MOS_STATUS)m_osInterface->pfnCreateGpuContext(
98 m_osInterface,
99 gpuContext,
100 videoGpuNode,
101 m_gpuCtxCreatOpt);
102
103 if (eStatus != MOS_STATUS_SUCCESS)
104 {
105 // Failed to create new context. Try to reuse the existing one on the same VDBox.
106 if (videoGpuNode == MOS_GPU_NODE_VIDEO2)
107 {
108 // check other GPU contexts on VDBox2
109 gpuContext = MOS_GPU_CONTEXT_VDBOX2_VIDEO;
110 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS)
111 {
112 gpuContext = MOS_GPU_CONTEXT_VDBOX2_VIDEO2;
113 eStatus = (MOS_STATUS)m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext);
114 }
115 }
116 else // videoGpuNode == MOS_GPU_NODE_VIDEO
117 {
118 // check other GPU contexts on VDBox1
119 gpuContext = MOS_GPU_CONTEXT_VIDEO;
120 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS)
121 {
122 gpuContext = MOS_GPU_CONTEXT_VIDEO2;
123 eStatus = (MOS_STATUS)m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext);
124 }
125 }
126
127 if (eStatus != MOS_STATUS_SUCCESS)
128 {
129 // No valid GPU context on current VDBox, so destroy the video node association.
130 if (MEDIA_IS_SKU(m_skuTable, FtrVcs2))
131 {
132 m_osInterface->pfnDestroyVideoNodeAssociation(m_osInterface, videoGpuNode);
133 m_videoNodeAssociationCreated = false;
134 }
135
136 if (videoGpuNode == MOS_GPU_NODE_VIDEO2)
137 {
138 // If no valid GPU context on VDBox2, check GPU contexts on VDBox1
139 gpuContext = MOS_GPU_CONTEXT_VIDEO3;
140 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS)
141 {
142 gpuContext = MOS_GPU_CONTEXT_VIDEO;
143 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext) != MOS_STATUS_SUCCESS)
144 {
145 // If this context is also invalid, return an error as no context for the video engine
146 // is available, so PAK cannot occur
147 gpuContext = MOS_GPU_CONTEXT_VIDEO2;
148 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext));
149 }
150 }
151
152 // When using existing VDBOX1, UMD needs to notify KMD to increase the VDBOX1 counter
153 setVideoNode = true;
154 videoGpuNode = MOS_GPU_NODE_VIDEO;
155 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateVideoNodeAssociation(
156 m_osInterface,
157 setVideoNode,
158 &videoGpuNode));
159 m_videoNodeAssociationCreated = true;
160 }
161 else // videoGpuNode == MOS_GPU_NODE_VIDEO
162 {
163 // We won't check GPU contexts on VDBox2 if there is no valid GPU context on VDBox1
164 // since VDBox2 is not full featured.
165 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
166 }
167 }
168
169 // save the updated VDBox ordinal
170 m_videoGpuNode = videoGpuNode;
171 }
172
173 if (m_videoNodeAssociationCreated)
174 {
175 CODECHAL_UPDATE_VDBOX_USER_FEATURE(videoGpuNode);
176 }
177
178 m_videoContext = gpuContext;
179
180 m_osInterface->pfnSetEncodePakContext(m_osInterface, m_videoContext);
181 }
182
183 if (CodecHalUsesRenderEngine(m_codecFunction, m_standard))
184 {
185 MOS_GPU_CONTEXT gpuContext = MOS_GPU_CONTEXT_RENDER2;
186 MOS_GPU_NODE renderGpuNode = MOS_GPU_NODE_3D;
187
188 if (!MEDIA_IS_SKU(m_skuTable, FtrCCSNode))
189 {
190 m_computeContextEnabled = false;
191 }
192
193 if (m_computeContextEnabled)
194 {
195 gpuContext = MOS_GPU_CONTEXT_COMPUTE;
196 renderGpuNode = MOS_GPU_NODE_COMPUTE;
197 }
198 else
199 {
200 gpuContext = MOS_GPU_CONTEXT_RENDER2;
201 renderGpuNode = MOS_GPU_NODE_3D;
202 }
203 MOS_GPUCTX_CREATOPTIONS createOption;
204
205 if (m_hwInterface->m_slicePowerGate)
206 {
207 createOption.packed.SubSliceCount = (m_gtSystemInfo->SubSliceCount / m_gtSystemInfo->SliceCount);
208 // If there are multiply sub slices, disable half of sub slices.
209 if (createOption.packed.SubSliceCount > 1)
210 createOption.packed.SubSliceCount >>= 1;
211 createOption.packed.SliceCount = (uint8_t)m_gtSystemInfo->SliceCount;
212 createOption.packed.MaxEUcountPerSubSlice = (uint8_t)(m_gtSystemInfo->EUCount / m_gtSystemInfo->SubSliceCount);
213 createOption.packed.MinEUcountPerSubSlice = (uint8_t)(m_gtSystemInfo->EUCount / m_gtSystemInfo->SubSliceCount);
214 }
215
216 eStatus = (MOS_STATUS)m_osInterface->pfnCreateGpuContext(m_osInterface, gpuContext, renderGpuNode, &createOption);
217
218 if (eStatus != MOS_STATUS_SUCCESS)
219 {
220 // If this context is also invalid, return an error as no context for the 3D engine
221 // is available, so ENC cannot occur
222 gpuContext = MOS_GPU_CONTEXT_RENDER;
223 CODECHAL_ENCODE_ASSERTMESSAGE("create gpu context failure for Render Engine!");
224 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnIsGpuContextValid(m_osInterface, gpuContext));
225 }
226
227 m_renderContext = gpuContext;
228 m_osInterface->pfnSetEncodeEncContext(m_osInterface, m_renderContext);
229 }
230
231 // Set Vdbox index in use
232 m_vdboxIndex = (m_videoGpuNode == MOS_GPU_NODE_VIDEO2)? MHW_VDBOX_NODE_2 : MHW_VDBOX_NODE_1;
233
234 return eStatus;
235 }
236
DestroyMeResources(HmeParams * param)237 MOS_STATUS CodechalEncoderState::DestroyMeResources(
238 HmeParams* param)
239 {
240 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
241
242 CODECHAL_ENCODE_FUNCTION_ENTER;
243
244 CODECHAL_ENCODE_CHK_NULL_RETURN(param);
245
246 if (param->ps16xMeMvDataBuffer != nullptr)
247 {
248 m_osInterface->pfnFreeResource(
249 m_osInterface,
250 ¶m->ps16xMeMvDataBuffer->OsResource);
251 }
252
253 if (param->ps32xMeMvDataBuffer != nullptr)
254 {
255 m_osInterface->pfnFreeResource(
256 m_osInterface,
257 ¶m->ps32xMeMvDataBuffer->OsResource);
258 }
259
260 if (param->ps4xMeDistortionBuffer != nullptr)
261 {
262 m_osInterface->pfnFreeResource(
263 m_osInterface,
264 ¶m->ps4xMeDistortionBuffer->OsResource);
265 }
266
267 if (param->ps4xMeMvDataBuffer != nullptr)
268 {
269 m_osInterface->pfnFreeResource(
270 m_osInterface,
271 ¶m->ps4xMeMvDataBuffer->OsResource);
272 }
273
274 if (param->presMvAndDistortionSumSurface != nullptr)
275 {
276 m_osInterface->pfnFreeResource(
277 m_osInterface,
278 param->presMvAndDistortionSumSurface);
279 }
280
281 return eStatus;
282 }
283
CleanUpResource(PMOS_RESOURCE resource,PMOS_ALLOC_GFXRES_PARAMS allocParams)284 MOS_STATUS CodechalEncoderState::CleanUpResource(
285 PMOS_RESOURCE resource,
286 PMOS_ALLOC_GFXRES_PARAMS allocParams)
287 {
288 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
289
290 CODECHAL_ENCODE_FUNCTION_ENTER;
291
292 CODECHAL_ENCODE_CHK_NULL_RETURN(resource);
293 CODECHAL_ENCODE_CHK_NULL_RETURN(allocParams);
294
295 MOS_LOCK_PARAMS lockFlag;
296 MOS_ZeroMemory(&lockFlag, sizeof(lockFlag));
297 lockFlag.WriteOnly = true;
298 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, resource, &lockFlag);
299 if(data == nullptr)
300 {
301 return MOS_STATUS_NULL_POINTER;
302 }
303
304 if(allocParams->Format == Format_Buffer)
305 {
306 MOS_ZeroMemory(data, allocParams->dwBytes);
307 }
308 else if(allocParams->Format == Format_Buffer_2D)
309 {
310 MOS_ZeroMemory(data, allocParams->dwHeight * allocParams->dwWidth);
311 }
312 else
313 {
314 eStatus = MOS_STATUS_INVALID_PARAMETER;
315 }
316
317 m_osInterface->pfnUnlockResource(m_osInterface, resource);
318
319 return eStatus;
320 }
321
AllocateResources4xMe(HmeParams * param)322 MOS_STATUS CodechalEncoderState::AllocateResources4xMe(
323 HmeParams* param)
324 {
325 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
326
327 CODECHAL_ENCODE_FUNCTION_ENTER;
328
329 CODECHAL_ENCODE_CHK_NULL_RETURN(param);
330
331 if(!m_encEnabled || !m_hmeSupported)
332 {
333 return eStatus;
334 }
335
336 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
337 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
338 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
339 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
340 allocParamsForBuffer2D.Format = Format_Buffer_2D;
341
342 MOS_ZeroMemory(param->ps4xMeMvDataBuffer, sizeof(MOS_SURFACE));
343 param->ps4xMeMvDataBuffer->TileType = MOS_TILE_LINEAR;
344 param->ps4xMeMvDataBuffer->bArraySpacing = true;
345 param->ps4xMeMvDataBuffer->Format = Format_Buffer_2D;
346 param->ps4xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear.
347 param->ps4xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
348 param->ps4xMeMvDataBuffer->dwPitch = param->ps4xMeMvDataBuffer->dwWidth;
349
350 allocParamsForBuffer2D.dwWidth = param->ps4xMeMvDataBuffer->dwWidth;
351 allocParamsForBuffer2D.dwHeight = param->ps4xMeMvDataBuffer->dwHeight;
352 allocParamsForBuffer2D.pBufName = "4xME MV Data Buffer";
353
354 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
355 m_osInterface,
356 &allocParamsForBuffer2D,
357 ¶m->ps4xMeMvDataBuffer->OsResource);
358
359 if (eStatus != MOS_STATUS_SUCCESS)
360 {
361 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 4xME MV Data Buffer.");
362 return eStatus;
363 }
364
365 CleanUpResource(¶m->ps4xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D);
366
367 if (param->b4xMeDistortionBufferSupported)
368 {
369 uint32_t adjustedHeight =
370 m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT * SCALE_FACTOR_4x;
371 uint32_t downscaledFieldHeightInMb4x =
372 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(((adjustedHeight + 1) >> 1)/4);
373
374 MOS_ZeroMemory(param->ps4xMeDistortionBuffer, sizeof(MOS_SURFACE));
375 param->ps4xMeDistortionBuffer->TileType = MOS_TILE_LINEAR;
376 param->ps4xMeDistortionBuffer->bArraySpacing = true;
377 param->ps4xMeDistortionBuffer->Format = Format_Buffer_2D;
378 param->ps4xMeDistortionBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64);
379 param->ps4xMeDistortionBuffer->dwHeight = 2 * MOS_ALIGN_CEIL((downscaledFieldHeightInMb4x * 4 * 10), 8);
380 param->ps4xMeDistortionBuffer->dwPitch = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64);
381
382 allocParamsForBuffer2D.dwWidth = param->ps4xMeDistortionBuffer->dwWidth;
383 allocParamsForBuffer2D.dwHeight = param->ps4xMeDistortionBuffer->dwHeight;
384 allocParamsForBuffer2D.pBufName = "4xME Distortion Buffer";
385
386 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
387 m_osInterface,
388 &allocParamsForBuffer2D,
389 ¶m->ps4xMeDistortionBuffer->OsResource);
390
391 if (eStatus != MOS_STATUS_SUCCESS)
392 {
393 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 4xME Distortion Buffer.");
394 return eStatus;
395 }
396 CleanUpResource(¶m->ps4xMeDistortionBuffer->OsResource, &allocParamsForBuffer2D);
397 }
398
399 return eStatus;
400 }
401
AllocateResources16xMe(HmeParams * param)402 MOS_STATUS CodechalEncoderState::AllocateResources16xMe(
403 HmeParams* param)
404 {
405 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
406
407 CODECHAL_ENCODE_FUNCTION_ENTER;
408
409 CODECHAL_ENCODE_CHK_NULL_RETURN(param);
410
411 if (!m_encEnabled || !m_hmeSupported)
412 {
413 return eStatus;
414 }
415
416 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
417 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
418 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
419 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
420 allocParamsForBuffer2D.Format = Format_Buffer_2D;
421
422 if (m_16xMeSupported)
423 {
424 MOS_ZeroMemory(param->ps16xMeMvDataBuffer, sizeof(MOS_SURFACE));
425 param->ps16xMeMvDataBuffer->TileType = MOS_TILE_LINEAR;
426 param->ps16xMeMvDataBuffer->bArraySpacing = true;
427 param->ps16xMeMvDataBuffer->Format = Format_Buffer_2D;
428 param->ps16xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear
429 param->ps16xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
430 param->ps16xMeMvDataBuffer->dwPitch = param->ps16xMeMvDataBuffer->dwWidth;
431
432 allocParamsForBuffer2D.dwWidth = param->ps16xMeMvDataBuffer->dwWidth;
433 allocParamsForBuffer2D.dwHeight = param->ps16xMeMvDataBuffer->dwHeight;
434 allocParamsForBuffer2D.pBufName = "16xME MV Data Buffer";
435
436 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
437 m_osInterface,
438 &allocParamsForBuffer2D,
439 ¶m->ps16xMeMvDataBuffer->OsResource);
440
441 if (eStatus != MOS_STATUS_SUCCESS)
442 {
443 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate 16xME MV Data Buffer.");
444 return eStatus;
445 }
446 CleanUpResource(¶m->ps16xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D);
447 }
448
449 return eStatus;
450 }
451
AllocateResources32xMe(HmeParams * param)452 MOS_STATUS CodechalEncoderState::AllocateResources32xMe(
453 HmeParams* param)
454 {
455 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
456
457 CODECHAL_ENCODE_FUNCTION_ENTER;
458
459 CODECHAL_ENCODE_CHK_NULL_RETURN(param);
460 if (!m_encEnabled || !m_hmeSupported)
461 {
462 return eStatus;
463 }
464
465 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
466 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
467 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
468 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
469 allocParamsForBuffer2D.Format = Format_Buffer_2D;
470
471 if (m_32xMeSupported)
472 {
473 MOS_ZeroMemory(param->ps32xMeMvDataBuffer, sizeof(MOS_SURFACE));
474 param->ps32xMeMvDataBuffer->TileType = MOS_TILE_LINEAR;
475 param->ps32xMeMvDataBuffer->bArraySpacing = true;
476 param->ps32xMeMvDataBuffer->Format = Format_Buffer_2D;
477 param->ps32xMeMvDataBuffer->dwWidth = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64); // MediaBlockRW requires pitch multiple of 64 bytes when linear
478 param->ps32xMeMvDataBuffer->dwHeight = (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
479 param->ps32xMeMvDataBuffer->dwPitch = param->ps32xMeMvDataBuffer->dwWidth;
480
481 allocParamsForBuffer2D.dwWidth = param->ps32xMeMvDataBuffer->dwWidth;
482 allocParamsForBuffer2D.dwHeight = param->ps32xMeMvDataBuffer->dwHeight;
483 allocParamsForBuffer2D.pBufName = "32xME MV Data Buffer";
484
485 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
486 m_osInterface,
487 &allocParamsForBuffer2D,
488 ¶m->ps32xMeMvDataBuffer->OsResource);
489
490 if (eStatus != MOS_STATUS_SUCCESS)
491 {
492 CODECHAL_ENCODE_ASSERTMESSAGE("%s: Failed to allocate 32xME MV Data Buffer\n", __FUNCTION__);
493 return eStatus;
494 }
495 CleanUpResource(¶m->ps32xMeMvDataBuffer->OsResource, &allocParamsForBuffer2D);
496 }
497
498 return eStatus;
499 }
500
501 // Encode Public Interface Functions
Allocate(CodechalSetting * codecHalSettings)502 MOS_STATUS CodechalEncoderState::Allocate(CodechalSetting * codecHalSettings)
503 {
504 CODECHAL_ENCODE_FUNCTION_ENTER;
505
506 if (m_cscDsState)
507 {
508 // call before m_hwInterface->Initialize() to reserve ISH space for CscDs kernel
509 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->Initialize());
510 }
511
512 CODECHAL_ENCODE_CHK_STATUS_RETURN(Codechal::Allocate(codecHalSettings));
513
514 CODECHAL_ENCODE_CHK_STATUS_RETURN(Initialize(codecHalSettings));
515
516 // Create MMC state
517 if (m_mmcState == nullptr)
518 {
519 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState = MOS_New(CodecHalMmcState, m_hwInterface));
520 }
521
522 // create resource allocator
523 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator = MOS_New(CodechalEncodeAllocator, this));
524
525 // create tracked buffer state
526 if (m_standard == CODECHAL_HEVC)
527 {
528 CODECHAL_ENCODE_CHK_NULL_RETURN(m_trackedBuf = MOS_New(CodechalEncodeTrackedBufferHevc, this));
529 }
530 else
531 {
532 CODECHAL_ENCODE_CHK_NULL_RETURN(m_trackedBuf = MOS_New(CodechalEncodeTrackedBuffer, this));
533 }
534
535 MotionEstimationDisableCheck();
536
537 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources());
538
539 CODECHAL_ENCODE_CHK_STATUS_RETURN(CreateGpuContexts());
540
541 if (CodecHalUsesRenderEngine(codecHalSettings->codecFunction, codecHalSettings->standard))
542 {
543 m_renderContextUsesNullHw = m_useNullHw[m_renderContext];
544 }
545
546 if (CodecHalUsesVideoEngine(codecHalSettings->codecFunction))
547 {
548 m_videoContextUsesNullHw = m_useNullHw[m_videoContext];
549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterBBCompleteNotifyEvent(
550 m_osInterface,
551 m_videoContext));
552 }
553 else
554 {
555 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterBBCompleteNotifyEvent(
556 m_osInterface,
557 m_renderContext));
558 }
559
560 if (!m_perfProfiler)
561 {
562 m_perfProfiler = MediaPerfProfiler::Instance();
563 CODECHAL_ENCODE_CHK_NULL_RETURN(m_perfProfiler);
564
565 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->Initialize((void*)this, m_osInterface));
566 }
567 return MOS_STATUS_SUCCESS;
568 }
569
Execute(void * params)570 MOS_STATUS CodechalEncoderState::Execute(void *params)
571 {
572 CODECHAL_ENCODE_FUNCTION_ENTER;
573
574 CODECHAL_ENCODE_CHK_STATUS_RETURN(Codechal::Execute(params));
575
576 EncoderParams *encodeParams = (EncoderParams *)params;
577 // MSDK event handling
578 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_SetGpuAppTaskEvent(m_osInterface,encodeParams->gpuAppTaskEvent));
579
580 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->SetWatchdogTimerThreshold(m_frameWidth, m_frameHeight));
581
582 if (m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC)
583 {
584 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecutePreEnc(encodeParams));
585 }
586 else
587 {
588 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteEnc(encodeParams));
589 }
590
591 return MOS_STATUS_SUCCESS;
592 }
593
594 // Encoder Public Interface Functions
Initialize(CodechalSetting * settings)595 MOS_STATUS CodechalEncoderState::Initialize(
596 CodechalSetting * settings)
597 {
598 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
599 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
600
601 CODECHAL_ENCODE_FUNCTION_ENTER;
602
603 CODECHAL_ENCODE_CHK_NULL_RETURN(settings);
604
605 m_storeData = 1;
606 m_firstFrame = true;
607 m_firstTwoFrames = true;
608 m_standard = settings->standard;
609 m_mode = settings->mode;
610 m_codecFunction = settings->codecFunction;
611
612 if (CodecHalUsesVideoEngine(m_codecFunction))
613 {
614 m_pakEnabled = true;
615 }
616
617 if (CodecHalUsesRenderEngine(m_codecFunction, m_standard))
618 {
619 m_encEnabled = true;
620 }
621
622 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
623 if (m_encEnabled)
624 {
625 m_brcPakStatisticsSize = CODECHAL_ENCODE_BRC_PAK_STATISTICS_SIZE;
626
627 m_hwScoreboardType = 1;
628
629 m_encodeVfeMaxThreads = 0;
630 #if (_DEBUG || _RELEASE_INTERNAL)
631 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
632 MOS_UserFeature_ReadValue_ID(
633 nullptr,
634 __MEDIA_USER_FEATURE_VALUE_ENCODE_VFE_MAX_THREADS_ID,
635 &userFeatureData);
636 m_encodeVfeMaxThreads = (uint32_t)userFeatureData.u32Data;
637 #endif // _DEBUG || _RELEASE_INTERNAL
638
639 m_encodeVfeMaxThreadsScaling = 0;
640 #if (_DEBUG || _RELEASE_INTERNAL)
641 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
642 MOS_UserFeature_ReadValue_ID(
643 nullptr,
644 __MEDIA_USER_FEATURE_VALUE_ENCODE_VFE_MAX_THREADS_SCALING_ID,
645 &userFeatureData);
646 m_encodeVfeMaxThreadsScaling = (uint32_t)userFeatureData.i32Data;
647 #endif // _DEBUG || _RELEASE_INTERNAL
648
649 {
650 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
651 MOS_UserFeature_ReadValue_ID(
652 nullptr,
653 __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_ID,
654 &userFeatureData);
655
656 m_hwWalker = (userFeatureData.i32Data) ? true : false;
657
658 if (m_hwWalker)
659 {
660 m_walkerMode = (MHW_WALKER_MODE)0;
661 #if (_DEBUG || _RELEASE_INTERNAL)
662 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
663 MOS_UserFeature_ReadValue_ID(
664 nullptr,
665 __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_MODE_ID,
666 &userFeatureData);
667 m_walkerMode = (MHW_WALKER_MODE)userFeatureData.u32Data;
668 #endif // _DEBUG || _RELEASE_INTERNAL
669
670 if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdownOverride))
671 {
672 //Default Slice State
673 m_sliceShutdownDefaultState = (uint32_t)0;
674 #if (_DEBUG || _RELEASE_INTERNAL)
675 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
676 MOS_UserFeature_ReadValue_ID(
677 nullptr,
678 __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_DEFAULT_STATE_ID,
679 &userFeatureData);
680 m_sliceShutdownDefaultState = (uint32_t)userFeatureData.u32Data;
681 #endif // _DEBUG || _RELEASE_INTERNAL
682
683 //Requested Slice State
684 m_sliceShutdownRequestState = (uint32_t)0;
685 #if (_DEBUG || _RELEASE_INTERNAL)
686 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
687 MOS_UserFeature_ReadValue_ID(
688 nullptr,
689 __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_REQUEST_STATE_ID,
690 &userFeatureData);
691 m_sliceShutdownRequestState = (uint32_t)userFeatureData.u32Data;
692 #endif // _DEBUG || _RELEASE_INTERNAL
693
694 //Slice Shutdown Resolution Threshold
695 m_ssdResolutionThreshold = (uint32_t)0;
696 #if (_DEBUG || _RELEASE_INTERNAL)
697 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
698 MOS_UserFeature_ReadValue_ID(
699 nullptr,
700 __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_RESOLUTION_THRESHOLD_ID,
701 &userFeatureData);
702 m_ssdResolutionThreshold = (uint32_t)userFeatureData.i32Data;
703 #endif // _DEBUG || _RELEASE_INTERNAL
704
705 //Slice Shutdown Target Usage Threshold
706 m_ssdTargetUsageThreshold = (uint32_t)0;
707 #if (_DEBUG || _RELEASE_INTERNAL)
708 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
709 MOS_UserFeature_ReadValue_ID(
710 nullptr,
711 __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_TARGET_USAGE_THRESHOLD_ID,
712 &userFeatureData);
713 m_ssdTargetUsageThreshold = (uint32_t)userFeatureData.i32Data;
714 #endif // _DEBUG || _RELEASE_INTERNAL
715
716 #if (_DEBUG || _RELEASE_INTERNAL)
717 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
718 MOS_UserFeature_ReadValue_ID(
719 nullptr,
720 __MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_ID,
721 &userFeatureData);
722
723 if (userFeatureData.i32Data)
724 {
725 char path_buffer[256];
726 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
727 MOS_ZeroMemory(path_buffer, 256);
728 userFeatureData.StringData.pStringData = path_buffer;
729
730 statusKey = MOS_UserFeature_ReadValue_ID(
731 nullptr,
732 __MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_PATH_ID,
733 &userFeatureData);
734
735 if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
736 {
737 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swBrcMode));
738 }
739 }
740 // SW BRC DLL Reporting
741 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_BRC_SOFTWARE_IN_USE_ID, (m_swBrcMode == nullptr) ? false : true);
742 #endif // _DEBUG || _RELEASE_INTERNAL
743
744 if (!m_sliceShutdownDefaultState &&
745 !m_sliceShutdownRequestState &&
746 !m_ssdTargetUsageThreshold &&
747 !m_ssdResolutionThreshold)
748 {
749 // slice shutdown used for power efficiency
750 // use it in case of ult and if hw has more than 2 slices
751 if (MEDIA_IS_SKU(m_skuTable, FtrULT))
752 {
753 if ((GFX_IS_GEN_10_OR_LATER(m_platform) && m_gtSystemInfo->SliceCount >= 2) ||
754 MEDIA_IS_SKU(m_skuTable, FtrGT3))
755 {
756 m_sliceShutdownDefaultState = CODECHAL_SLICE_SHUTDOWN_ONE_SLICE;
757 m_sliceShutdownRequestState = CODECHAL_SLICE_SHUTDOWN_TWO_SLICES;
758 m_ssdResolutionThreshold = m_hwInterface->m_ssdResolutionThreshold;
759 m_ssdTargetUsageThreshold = m_hwInterface->m_ssdTargetUsageThreshold;
760 }
761 }
762 else if (MEDIA_IS_SKU(m_skuTable, FtrGT4))
763 {
764 m_sliceShutdownDefaultState = CODECHAL_SLICE_SHUTDOWN_ONE_SLICE;
765 m_sliceShutdownRequestState = CODECHAL_SLICE_SHUTDOWN_TWO_SLICES;
766 m_ssdResolutionThreshold = m_hwInterface->m_ssdResolutionThreshold;
767 m_ssdTargetUsageThreshold = m_hwInterface->m_ssdTargetUsageThreshold;
768 }
769 }
770 }
771 }
772 }
773
774 if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdown))
775 {
776 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
777 MOS_UserFeature_ReadValue_ID(
778 nullptr,
779 __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_ENABLE_ID,
780 &userFeatureData);
781 m_sliceShutdownEnable = (userFeatureData.i32Data) ? true : false;
782 }
783
784 m_targetUsageOverride = (uint8_t)0;
785 #if (_DEBUG || _RELEASE_INTERNAL)
786 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
787 MOS_UserFeature_ReadValue_ID(
788 nullptr,
789 __MEDIA_USER_FEATURE_VALUE_ENCODE_TARGET_USAGE_OVERRIDE_ID,
790 &userFeatureData);
791 m_targetUsageOverride = (uint8_t)userFeatureData.u32Data;
792 #endif // _DEBUG || _RELEASE_INTERNAL
793 }
794
795 if (m_pakEnabled)
796 {
797 //RCPanic settings
798 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
799 MOS_UserFeature_ReadValue_ID(
800 nullptr,
801 __MEDIA_USER_FEATURE_VALUE_RC_PANIC_ENABLE_ID,
802 &userFeatureData);
803 m_panicEnable = (userFeatureData.i32Data) ? true : false;
804
805 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
806 userFeatureData.i32Data = 1;
807 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
808 MOS_UserFeature_ReadValue_ID(
809 nullptr,
810 __MEDIA_USER_FEATURE_VALUE_ENCODE_SUPPRESS_RECON_PIC_ENABLE_ID,
811 &userFeatureData);
812 m_suppressReconPicSupported = (userFeatureData.u32Data) ? true : false;
813 }
814
815 #if (_DEBUG || _RELEASE_INTERNAL)
816 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
817 MOS_UserFeature_ReadValue_ID(
818 NULL,
819 __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_COMPUTE_CONTEXT_ID,
820 &userFeatureData);
821 m_computeContextEnabled = (userFeatureData.u32Data) ? true : false;
822 #endif
823
824 #if USE_CODECHAL_DEBUG_TOOL
825 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
826 MOS_UserFeature_ReadValue_ID(
827 nullptr,
828 __MEDIA_USER_FEATURE_VALUE_CODECHAL_ENABLE_FAKE_HEADER_SIZE_ID,
829 &userFeatureData);
830 m_enableFakeHrdSize = (uint32_t)userFeatureData.u32Data;
831
832 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
833 MOS_UserFeature_ReadValue_ID(
834 nullptr,
835 __MEDIA_USER_FEATURE_VALUE_CODECHAL_FAKE_IFRAME_HEADER_SIZE_ID,
836 &userFeatureData);
837 m_fakeIFrameHrdSize = (uint32_t)userFeatureData.u32Data;
838
839 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
840 MOS_UserFeature_ReadValue_ID(
841 nullptr,
842 __MEDIA_USER_FEATURE_VALUE_CODECHAL_FAKE_PBFRAME_HEADER_SIZE_ID,
843 &userFeatureData);
844 m_fakePBFrameHrdSize = (uint32_t)userFeatureData.u32Data;
845 #endif
846
847 m_oriFrameWidth = settings->width;
848 m_oriFrameHeight = settings->height;
849 m_picWidthInMb = (uint16_t)CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_oriFrameWidth);
850 m_picHeightInMb = (uint16_t)CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_oriFrameHeight);
851 m_frameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH;
852 m_frameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
853 m_createWidth = m_frameWidth;
854 m_createHeight = m_frameHeight;
855
856 // HME Scaling WxH
857 m_downscaledWidthInMb4x =
858 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
859 m_downscaledHeightInMb4x =
860 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
861 m_downscaledWidth4x =
862 m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
863 m_downscaledHeight4x =
864 m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
865
866 // SuperHME Scaling WxH
867 m_downscaledWidthInMb16x =
868 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
869 m_downscaledHeightInMb16x =
870 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
871 m_downscaledWidth16x =
872 m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
873 m_downscaledHeight16x =
874 m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
875
876 // UltraHME Scaling WxH
877 m_downscaledWidthInMb32x =
878 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_32x);
879 m_downscaledHeightInMb32x =
880 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_32x);
881 m_downscaledWidth32x =
882 m_downscaledWidthInMb32x * CODECHAL_MACROBLOCK_WIDTH;
883 m_downscaledHeight32x =
884 m_downscaledHeightInMb32x * CODECHAL_MACROBLOCK_HEIGHT;
885
886 m_minScaledDimension = CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE;
887 m_minScaledDimensionInMb = (CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE + 15) >> 4;
888
889 m_currOriginalPic.PicFlags = PICTURE_INVALID;
890 m_currOriginalPic.FrameIdx = 0;
891 m_currOriginalPic.PicEntry = 0;
892
893 m_hwInterface->GetCpInterface()->RegisterParams(settings->GetCpParams());
894
895 // flag to enable kmd for the frame tracking (so encoder driver doesn't need to send a separate command buffer
896 // for frame tracking purpose). Currently this feature is disabled for HEVC.
897 // For HEVC, this feature will be enabled later.
898
899 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
900 statusKey = MOS_UserFeature_ReadValue_ID(
901 nullptr,
902 __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_FRAME_TRACKING_ID,
903 &userFeatureData);
904 if (statusKey == MOS_STATUS_SUCCESS)
905 {
906 m_frameTrackingEnabled = userFeatureData.i32Data ? true : false;
907 }
908 else
909 {
910 m_frameTrackingEnabled = m_osInterface->bEnableKmdMediaFrameTracking ? true: false;
911 }
912
913 if (m_standard == CODECHAL_AVC)
914 {
915 if (CodecHalUsesVideoEngine(m_codecFunction))
916 {
917 m_inlineEncodeStatusUpdate = m_osInterface->bInlineCodecStatusUpdate ? true: false;
918 }
919 }
920
921 // Disable SHME and UHME if HME is disabled
922 if(!m_hmeSupported)
923 {
924 m_16xMeSupported = false;
925 m_32xMeSupported = false;
926 }
927 // Disable UHME if SHME is disabled
928 else if(!m_16xMeSupported)
929 {
930 m_32xMeSupported = false;
931 }
932
933 // Set Vdbox index in use
934 m_vdboxIndex = (m_videoGpuNode == MOS_GPU_NODE_VIDEO2)? MHW_VDBOX_NODE_2 : MHW_VDBOX_NODE_1;
935
936 if (eStatus != MOS_STATUS_SUCCESS)
937 {
938 Destroy();
939 }
940
941 if (!m_feiEnable)
942 {
943 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMDFResources());
944 }
945
946 return eStatus;
947 }
948
AllocateMDFResources()949 MOS_STATUS CodechalEncoderState::AllocateMDFResources()
950 {
951 uint32_t devOp;
952
953 if (CodecHalIsFeiEncode(m_codecFunction) && m_codecFunction != CODECHAL_FUNCTION_FEI_PAK)
954 {
955 devOp = CM_DEVICE_CREATE_OPTION_SCRATCH_SPACE_DISABLE;
956
957 if (m_cmDev == nullptr)
958 {
959 m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface);
960 CODECHAL_ENCODE_CHK_STATUS_RETURN(CreateCmDevice(m_osInterface->pOsContext, m_cmDev, devOp));
961 }
962 //just WA for issues in MDF null support
963 if (!m_cmQueue)
964 {
965 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateQueue(m_cmQueue));
966 }
967 if (!m_cmTask)
968 {
969 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateTask(m_cmTask));
970 }
971 }
972 return MOS_STATUS_SUCCESS;
973 }
974
DestroyMDFResources()975 MOS_STATUS CodechalEncoderState::DestroyMDFResources()
976 {
977 uint32_t i;
978
979 if (m_cmDev && m_cmTask)
980 {
981 m_cmDev->DestroyTask(m_cmTask);
982 m_cmTask = nullptr;
983 }
984 if (m_cmDev)
985 {
986 DestroyCmDevice(m_cmDev);
987 m_cmDev = nullptr;
988 }
989
990 return MOS_STATUS_SUCCESS;
991 }
992
SetMfeSharedState(MfeSharedState * pMfeSharedState)993 MOS_STATUS CodechalEncoderState::SetMfeSharedState(MfeSharedState *pMfeSharedState)
994 {
995 CODECHAL_ENCODE_FUNCTION_ENTER;
996
997 CODECHAL_ENCODE_CHK_NULL_RETURN(pMfeSharedState);
998
999 m_mfeEncodeSharedState = pMfeSharedState;
1000
1001 return MOS_STATUS_SUCCESS;
1002 }
1003
1004
AddKernelMdf(CmDevice * device,CmQueue * queue,CmKernel * kernel,CmTask * task,CmThreadSpace * threadspace,CmEvent * & event,bool isEnqueue)1005 MOS_STATUS CodechalEncoderState::AddKernelMdf(
1006 CmDevice * device,
1007 CmQueue * queue,
1008 CmKernel * kernel,
1009 CmTask * task,
1010 CmThreadSpace *threadspace,
1011 CmEvent *& event,
1012 bool isEnqueue)
1013 {
1014 CODECHAL_ENCODE_CHK_NULL_RETURN(device);
1015 CODECHAL_ENCODE_CHK_NULL_RETURN(kernel);
1016 CODECHAL_ENCODE_CHK_NULL_RETURN(queue);
1017 CODECHAL_ENCODE_CHK_NULL_RETURN(task);
1018 CODECHAL_ENCODE_CHK_NULL_RETURN(threadspace);
1019
1020 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernel->AssociateThreadSpace(threadspace));
1021 CODECHAL_ENCODE_CHK_STATUS_RETURN(task->AddKernel(kernel));
1022 if (isEnqueue)
1023 {
1024 queue->Enqueue(task, event);
1025 task->Reset();
1026 }
1027 else
1028 {
1029 CODECHAL_ENCODE_CHK_STATUS_RETURN(task->AddSync());
1030 }
1031
1032 return MOS_STATUS_SUCCESS;
1033 }
1034
CreateMDFKernelResource(CodechalEncodeMdfKernelResource * resource,uint8_t kernelNum,uint8_t bufNum,uint8_t surfNum,uint8_t vmeSurfNum,uint16_t curbeSize)1035 MOS_STATUS CodechalEncoderState::CreateMDFKernelResource(
1036 CodechalEncodeMdfKernelResource *resource,
1037 uint8_t kernelNum,
1038 uint8_t bufNum,
1039 uint8_t surfNum,
1040 uint8_t vmeSurfNum,
1041 uint16_t curbeSize)
1042 {
1043 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1044
1045 CODECHAL_ENCODE_CHK_NULL_RETURN(resource);
1046 if (kernelNum > 0)
1047 {
1048 resource->ppKernel = (CmKernel **)MOS_AllocAndZeroMemory(sizeof(CmKernel *) * kernelNum);
1049 resource->KernelNum = kernelNum;
1050 }
1051 if (bufNum > 0)
1052 {
1053 resource->ppCmBuf = (CmBuffer **)MOS_AllocAndZeroMemory(sizeof(CmBuffer *) * bufNum);
1054 resource->BufNum = bufNum;
1055 }
1056 if (surfNum > 0)
1057 {
1058 resource->ppCmSurf = (CmSurface2D **)MOS_AllocAndZeroMemory(sizeof(CmSurface2D *) * surfNum);
1059 resource->SurfNum = surfNum;
1060 }
1061 if (vmeSurfNum > 0)
1062 {
1063 resource->ppCmVmeSurf = (SurfaceIndex **)MOS_AllocAndZeroMemory(sizeof(SurfaceIndex *) * vmeSurfNum);
1064 resource->VmeSurfNum = vmeSurfNum;
1065 }
1066 if (curbeSize > 0)
1067 {
1068 resource->pCurbe = (uint8_t *)MOS_AllocAndZeroMemory(curbeSize);
1069 resource->wCurbeSize = curbeSize;
1070 }
1071
1072 resource->e = nullptr;
1073
1074 return MOS_STATUS_SUCCESS;
1075 }
1076
DestroyMDFKernelResource(CodechalEncodeMdfKernelResource * resource)1077 MOS_STATUS CodechalEncoderState::DestroyMDFKernelResource(
1078 CodechalEncodeMdfKernelResource *resource)
1079 {
1080 int i;
1081 CODECHAL_ENCODE_CHK_NULL_RETURN(resource);
1082
1083 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMDFKernelSurfaces(resource));
1084
1085 if (resource->ppKernel && resource->KernelNum)
1086 {
1087 for (i = 0; i < resource->KernelNum; i++)
1088 {
1089 if (resource->ppKernel != nullptr)
1090 {
1091 m_cmDev->DestroyKernel(resource->ppKernel[i]);
1092 resource->ppKernel[i] = nullptr;
1093 }
1094 }
1095 MOS_FreeMemory(resource->ppKernel);
1096 resource->ppKernel = nullptr;
1097 }
1098 if (resource->pTS)
1099 {
1100 m_cmDev->DestroyThreadSpace(resource->pTS);
1101 resource->pTS = nullptr;
1102 }
1103 if (resource->ppCmBuf && resource->BufNum)
1104 {
1105 MOS_FreeMemory(resource->ppCmBuf);
1106 resource->ppCmBuf = nullptr;
1107 resource->BufNum = 0;
1108 }
1109 if (resource->ppCmSurf && resource->SurfNum)
1110 {
1111 MOS_FreeMemory(resource->ppCmSurf);
1112 resource->ppCmSurf = nullptr;
1113 resource->SurfNum = 0;
1114 }
1115 if (resource->ppCmVmeSurf && resource->VmeSurfNum)
1116 {
1117 MOS_FreeMemory(resource->ppCmVmeSurf);
1118 resource->ppCmVmeSurf = nullptr;
1119 resource->VmeSurfNum = 0;
1120 }
1121 if (resource->ppKernel && resource->KernelNum)
1122 {
1123 MOS_FreeMemory(resource->ppKernel);
1124 resource->ppKernel = nullptr;
1125 resource->KernelNum = 0;
1126 }
1127 if (resource->pCurbe && resource->wCurbeSize)
1128 {
1129 MOS_FreeMemory(resource->pCurbe);
1130 resource->pCurbe = nullptr;
1131 resource->wCurbeSize = 0;
1132 }
1133 if (resource->pCommonISA)
1134 {
1135 MOS_FreeMemory(resource->pCommonISA);
1136 resource->pCommonISA = nullptr;
1137 }
1138
1139 return MOS_STATUS_SUCCESS;
1140 }
1141
FreeMDFKernelSurfaces(CodechalEncodeMdfKernelResource * resource)1142 MOS_STATUS CodechalEncoderState::FreeMDFKernelSurfaces(
1143 CodechalEncodeMdfKernelResource* resource)
1144 {
1145 int i;
1146
1147 for (i = 0; i < resource->VmeSurfNum; i++)
1148 {
1149 if (resource->ppCmVmeSurf[i] != nullptr && resource->ppCmVmeSurf[i] != (SurfaceIndex *)CM_NULL_SURFACE)
1150 {
1151 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(resource->ppCmVmeSurf[i]));
1152 resource->ppCmVmeSurf[i] = nullptr;
1153 }
1154 }
1155 for (i = 0; i < resource->BufNum; i++)
1156 {
1157 if (resource->ppCmBuf[i] != nullptr)
1158 {
1159 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(resource->ppCmBuf[i]));
1160 resource->ppCmBuf[i] = nullptr;
1161 }
1162 }
1163 for (i = 0; i < resource->SurfNum; i++)
1164 {
1165 if (resource->ppCmSurf[i] != nullptr)
1166 {
1167 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(resource->ppCmSurf[i]));
1168 resource->ppCmSurf[i] = nullptr;
1169 }
1170 }
1171
1172 return MOS_STATUS_SUCCESS;
1173 }
1174
InitCommon()1175 MOS_STATUS CodechalEncoderState::InitCommon()
1176 {
1177 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1178
1179 CODECHAL_ENCODE_FUNCTION_ENTER;
1180
1181 EncoderParams* encodeParams = &m_encodeParams;
1182 m_newSeq = encodeParams->bNewSeq ? true: false; // used by all except JPEG
1183 m_mbDataBufferSize = encodeParams->dwMbDataBufferSize; // used by all except JPEG
1184 m_newVuiData = encodeParams->bNewVuiData ? true: false; // used by AVC and MPEG2
1185 m_picQuant = encodeParams->bPicQuant ? true: false; // used by AVC and MPEG2
1186 m_newQmatrixData = encodeParams->bNewQmatrixData ? true: false; // used by AVC and MPEG2
1187 m_numSlices = encodeParams->dwNumSlices; // used by all except VP9
1188 m_slcData =
1189 (PCODEC_ENCODER_SLCDATA)(encodeParams->pSlcHeaderData); // used by AVC, MPEG2, and HEVC
1190
1191 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->presBitstreamBuffer);
1192 m_rawSurface = *(encodeParams->psRawSurface); // used by all
1193 m_resBitstreamBuffer = *(encodeParams->presBitstreamBuffer); // used by all
1194
1195 CODECHAL_ENCODE_CHK_COND_RETURN(
1196 Mos_ResourceIsNull(&m_rawSurface.OsResource),
1197 "Raw surface is nullptr!");
1198
1199 m_rawSurfaceToEnc =
1200 m_rawSurfaceToPak = &m_rawSurface;
1201
1202 if(encodeParams->psReconSurface)
1203 {
1204 m_reconSurface = *(encodeParams->psReconSurface); // used by all except JPEG
1205 }
1206
1207 if(encodeParams->pBSBuffer)
1208 {
1209 m_bsBuffer = *(encodeParams->pBSBuffer); // used by all except VP9
1210 }
1211
1212 return eStatus;
1213 }
1214
ResizeOnResChange()1215 void CodechalEncoderState::ResizeOnResChange()
1216 {
1217 CODECHAL_ENCODE_FUNCTION_ENTER;
1218
1219 // if resolution changed, free existing tracked buffer resources
1220 m_trackedBuf->Resize();
1221 }
1222
CheckResChangeAndCsc()1223 MOS_STATUS CodechalEncoderState::CheckResChangeAndCsc()
1224 {
1225 CODECHAL_ENCODE_FUNCTION_ENTER;
1226
1227 if (m_resolutionChanged)
1228 {
1229 ResizeOnResChange();
1230 }
1231
1232 if (m_cscDsState)
1233 {
1234 // check recon surface's alignment meet HW requirement
1235 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1236 m_cscDsState->CheckReconSurfaceAlignment(&m_reconSurface));
1237
1238 if (!m_cscDsState->IsEnabled() ||
1239 CodecHal_PictureIsField(m_currOriginalPic) ||
1240 CodecHal_PictureIsInterlacedFrame(m_currOriginalPic))
1241 {
1242 // CSC disabled for interlaced frame
1243 m_cscDsState->ResetCscFlag();
1244
1245 // check raw surface's alignment meet HW requirement
1246 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->CheckRawSurfaceAlignment(m_rawSurfaceToEnc));
1247 }
1248 else
1249 {
1250 // check if we need to do CSC or copy non-aligned surface
1251 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->CheckCondition());
1252 }
1253 }
1254
1255 return MOS_STATUS_SUCCESS;
1256 }
1257
1258 // Function to allocate all resources common to all encoders
AllocateResources()1259 MOS_STATUS CodechalEncoderState::AllocateResources()
1260 {
1261 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1262
1263 CODECHAL_ENCODE_FUNCTION_ENTER;
1264
1265 uint32_t numMbs = m_picWidthInMb * ((m_picHeightInMb+1)>>1)<<1;
1266
1267 // initiate allocation paramters and lock flags
1268 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1269 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1270 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1271 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1272 allocParamsForBufferLinear.Format = Format_Buffer;
1273
1274 MOS_ALLOC_GFXRES_PARAMS allocParams2D;
1275 MOS_ZeroMemory(&allocParams2D, sizeof(allocParams2D));
1276 allocParams2D.Type = MOS_GFXRES_2D;
1277 allocParams2D.TileType = MOS_TILE_LINEAR;
1278 allocParams2D.Format = Format_Buffer_2D;
1279
1280 MOS_LOCK_PARAMS lockFlagsNoOverWrite;;
1281 MOS_ZeroMemory(&lockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
1282 lockFlagsNoOverWrite.WriteOnly = 1;
1283 lockFlagsNoOverWrite.NoOverWrite = 1;
1284
1285 MOS_LOCK_PARAMS lockFlags;
1286 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1287 lockFlags.WriteOnly = 1;
1288
1289 // create VME and MFX sync objects
1290 if ((m_codecFunction == CODECHAL_FUNCTION_ENC_PAK) ||
1291 (m_codecFunction == (CODECHAL_FUNCTION_ENC | CODECHAL_FUNCTION_ENC_PAK)) ||
1292 (m_codecFunction == CODECHAL_FUNCTION_FEI_ENC_PAK) ||
1293 (m_codecFunction == (CODECHAL_FUNCTION_FEI_ENC | CODECHAL_FUNCTION_FEI_ENC_PAK)))
1294 {
1295 // Create OS synchronization object to sync between MFX => VME
1296 // if 3 is not good enough, need to increase MBCode buffer number
1297 m_semaphoreMaxCount = MOS_MAX_SEMAPHORE_COUNT;
1298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse));
1299
1300 // Create OS synchronization object to sync between VME => MFX
1301 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectVideoContextInUse));
1302 }
1303
1304 // Create VME and VDENC/PAK sync objects
1305 if (m_codecFunction == CODECHAL_FUNCTION_ENC_VDENC_PAK)
1306 {
1307 m_semaphoreMaxCount = MOS_MAX_SEMAPHORE_COUNT;
1308 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateSyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse));
1309 }
1310
1311 //For HEVC, moved to standard specific as LCU size is not available here
1312 if (m_hwInterface->GetMfxInterface()->IsRowStoreCachingSupported() &&
1313 ((m_mode == CODECHAL_ENCODE_MODE_AVC) ||
1314 (m_mode == CODECHAL_ENCODE_MODE_VP9 && m_vdencEnabled)))
1315 {
1316 MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams = {};
1317 rowstoreParams.Mode = m_mode;
1318 rowstoreParams.dwPicWidth = m_frameWidth;
1319 rowstoreParams.bMbaff = false;
1320 m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams);
1321 }
1322
1323 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_skipFrameBasedHWCounterRead == false)
1324 {
1325 // eStatus query reporting
1326 m_encodeStatusBuf.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), MHW_CACHELINE_SIZE);
1327 uint32_t size = sizeof(HwCounter) * CODECHAL_ENCODE_STATUS_NUM + sizeof(HwCounter);
1328 allocParamsForBufferLinear.dwBytes = size;
1329 allocParamsForBufferLinear.pBufName = "HWCounterQueryBuffer";
1330 allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers
1331
1332 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1333 m_osInterface,
1334 &allocParamsForBufferLinear,
1335 &m_resHwCount);
1336
1337 if (eStatus != MOS_STATUS_SUCCESS)
1338 {
1339 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Encode eStatus Buffer.");
1340 return eStatus;
1341 }
1342
1343 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1344 m_osInterface->pfnSkipResourceSync(
1345 &m_resHwCount));
1346
1347 uint8_t *dataHwCount = (uint8_t *)m_osInterface->pfnLockResource(
1348 m_osInterface,
1349 &(m_resHwCount),
1350 &lockFlagsNoOverWrite);
1351
1352 if (!dataHwCount)
1353 {
1354 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to Local Resource for MbEnc Adv Count Query Buffer.");
1355 return eStatus;
1356 }
1357
1358 MOS_ZeroMemory(dataHwCount, size);
1359 m_dataHwCount = (uint32_t*)dataHwCount;
1360 }
1361
1362 // eStatus query reporting
1363 // HW requires the MI_CONDITIONAL_BATCH_BUFFER_END compare address aligned with cache line since TGL,
1364 // this change will guarantee the multi pak pass BRC works correctly
1365 m_encodeStatusBuf.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), MHW_CACHELINE_SIZE);
1366 uint32_t size = m_encodeStatusBuf.dwReportSize * CODECHAL_ENCODE_STATUS_NUM + sizeof(uint32_t) * 2;
1367 allocParamsForBufferLinear.dwBytes = size;
1368 allocParamsForBufferLinear.pBufName = "StatusQueryBuffer";
1369 allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers
1370
1371 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1372 m_osInterface->pfnAllocateResource(
1373 m_osInterface,
1374 &allocParamsForBufferLinear,
1375 &m_encodeStatusBuf.resStatusBuffer),
1376 "Failed to allocate Encode eStatus Buffer.");
1377
1378 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1379 m_osInterface->pfnSkipResourceSync(
1380 &m_encodeStatusBuf.resStatusBuffer));
1381
1382 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
1383 m_osInterface,
1384 &(m_encodeStatusBuf.resStatusBuffer),
1385 &lockFlagsNoOverWrite);
1386
1387 MOS_ZeroMemory(data, size);
1388 m_encodeStatusBuf.pData = (uint32_t*)data;
1389 m_encodeStatusBuf.pEncodeStatus = (uint8_t*)(data + sizeof(uint32_t) * 2);
1390 m_encodeStatusBuf.dwSize = size;
1391
1392 // Addresses writen to by HW commands (MI_STORE_DATA_IMM, MI_FLUSH_DW, PIPE_CONTROL) must be QW aligned since these
1393 // commands are capable of writing QWs so the least significant 3 bits of the address field are not used for the
1394 // actual address
1395 m_encodeStatusBuf.dwStoreDataOffset = 0;
1396 m_encodeStatusBuf.dwBSByteCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwMFCBitstreamByteCountPerFrame);
1397 m_encodeStatusBuf.dwBSSEBitCountOffset = CODECHAL_OFFSETOF(EncodeStatus, dwMFCBitstreamSyntaxElementOnlyBitCount);
1398 m_encodeStatusBuf.dwImageStatusMaskOffset = CODECHAL_OFFSETOF(EncodeStatus, dwImageStatusMask);
1399 m_encodeStatusBuf.dwImageStatusCtrlOffset = CODECHAL_OFFSETOF(EncodeStatus, ImageStatusCtrl);
1400 m_encodeStatusBuf.dwNumSlicesOffset = CODECHAL_OFFSETOF(EncodeStatus, NumSlices);
1401 m_encodeStatusBuf.dwErrorFlagOffset = CODECHAL_OFFSETOF(EncodeStatus, dwErrorFlags);
1402 m_encodeStatusBuf.dwBRCQPReportOffset = CODECHAL_OFFSETOF(EncodeStatus, BrcQPReport);
1403 m_encodeStatusBuf.dwNumPassesOffset = CODECHAL_OFFSETOF(EncodeStatus, dwNumberPasses);
1404 m_encodeStatusBuf.dwQpStatusCountOffset = CODECHAL_OFFSETOF(EncodeStatus, QpStatusCount);
1405 m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset = CODECHAL_OFFSETOF(EncodeStatus, ImageStatusCtrlOfLastBRCPass);
1406 m_encodeStatusBuf.dwSceneChangedOffset = CODECHAL_OFFSETOF(EncodeStatus, dwSceneChangedFlag);
1407 m_encodeStatusBuf.dwSumSquareErrorOffset = CODECHAL_OFFSETOF(EncodeStatus, sumSquareError[0]);
1408 m_encodeStatusBuf.dwSliceReportOffset = CODECHAL_OFFSETOF(EncodeStatus, sliceReport);
1409 m_encodeStatusBuf.dwHuCStatusMaskOffset = CODECHAL_OFFSETOF(EncodeStatus, HuCStatusRegMask);
1410 m_encodeStatusBuf.dwHuCStatusRegOffset = CODECHAL_OFFSETOF(EncodeStatus, HuCStatusReg);
1411 m_encodeStatusBuf.dwLookaheadStatusOffset = CODECHAL_OFFSETOF(EncodeStatus, lookaheadStatus);
1412
1413 m_encodeStatusBuf.wCurrIndex = 0;
1414 m_encodeStatusBuf.wFirstIndex = 0;
1415
1416 if (m_encEnabled)
1417 {
1418 m_encodeStatusBufRcs.dwReportSize = MOS_ALIGN_CEIL(sizeof(EncodeStatus), sizeof(uint64_t));
1419 size = m_encodeStatusBufRcs.dwReportSize * CODECHAL_ENCODE_STATUS_NUM + sizeof(uint32_t) * 2;
1420 allocParamsForBufferLinear.dwBytes = size;
1421 allocParamsForBufferLinear.pBufName = "StatusQueryBufferRcs";
1422 allocParamsForBufferLinear.bIsPersistent = true; // keeping status buffer persistent since its used in all command buffers
1423 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1424 m_osInterface,
1425 &allocParamsForBufferLinear,
1426 &m_encodeStatusBufRcs.resStatusBuffer);
1427
1428 if (eStatus != MOS_STATUS_SUCCESS)
1429 {
1430 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Encode eStatus Buffer.");
1431 return eStatus;
1432 }
1433
1434 data = (uint8_t*)m_osInterface->pfnLockResource(
1435 m_osInterface,
1436 &(m_encodeStatusBufRcs.resStatusBuffer),
1437 &lockFlagsNoOverWrite);
1438
1439 if (data == nullptr)
1440 {
1441 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to lock Encode eStatus Buffer RCS.");
1442 return eStatus;
1443 }
1444
1445 MOS_ZeroMemory(data, size);
1446 m_encodeStatusBufRcs.pData = (uint32_t*)data;
1447 m_encodeStatusBufRcs.pEncodeStatus = (uint8_t*)(data + sizeof(uint32_t) * 2);
1448 m_encodeStatusBufRcs.dwSize = size;
1449 m_encodeStatusBufRcs.dwStoreDataOffset = 0;
1450 m_encodeStatusBufRcs.wCurrIndex = 0;
1451 m_encodeStatusBufRcs.wFirstIndex = 0;
1452 }
1453
1454 if (m_pakEnabled)
1455 {
1456 m_stateHeapInterface->pfnSetCmdBufStatusPtr(m_stateHeapInterface, m_encodeStatusBuf.pData);
1457 }
1458 else
1459 {
1460 m_stateHeapInterface->pfnSetCmdBufStatusPtr(m_stateHeapInterface, m_encodeStatusBufRcs.pData);
1461 }
1462
1463 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1464 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1465 lockFlagsWriteOnly.WriteOnly = 1;
1466
1467 if(m_inlineEncodeStatusUpdate)
1468 {
1469 m_atomicScratchBuf.dwSize = MOS_ALIGN_CEIL(sizeof(AtomicScratchBuffer), sizeof(uint64_t));
1470 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1471 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1472 allocParamsForBufferLinear.Format = Format_Buffer;
1473
1474 size = MHW_CACHELINE_SIZE * 4 * 2; // each set of scratch is 4 cacheline size, and allocate 2 set.
1475 allocParamsForBufferLinear.dwBytes = size;
1476 allocParamsForBufferLinear.pBufName = "atomic sratch buffer";
1477
1478 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1479 m_osInterface,
1480 &allocParamsForBufferLinear,
1481 &(m_atomicScratchBuf.resAtomicScratchBuffer));
1482
1483 if (eStatus != MOS_STATUS_SUCCESS)
1484 {
1485 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Finger Print Source Buffer.");
1486 return eStatus;
1487 }
1488
1489 data = (uint8_t*)m_osInterface->pfnLockResource(
1490 m_osInterface,
1491 &(m_atomicScratchBuf.resAtomicScratchBuffer),
1492 &lockFlagsWriteOnly);
1493
1494 if (data == nullptr)
1495 {
1496 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to lock Finger Print Source Buffer.");
1497 return eStatus;
1498 }
1499
1500 MOS_ZeroMemory(data, size);
1501 m_atomicScratchBuf.pData = (uint32_t*)data;
1502 m_atomicScratchBuf.dwSize = size;
1503 m_atomicScratchBuf.dwZeroValueOffset = 0;
1504 m_atomicScratchBuf.dwOperand1Offset = MHW_CACHELINE_SIZE;
1505 m_atomicScratchBuf.dwOperand2Offset = MHW_CACHELINE_SIZE * 2;
1506 m_atomicScratchBuf.dwOperand3Offset = MHW_CACHELINE_SIZE * 3;
1507 m_atomicScratchBuf.wEncodeUpdateIndex = 0;
1508 m_atomicScratchBuf.wTearDownIndex = 1;
1509 m_atomicScratchBuf.dwOperandSetSize = MHW_CACHELINE_SIZE * 4;
1510 }
1511
1512 if (m_pakEnabled)
1513 {
1514 if(m_hwInterface->GetMfxInterface()->IsDeblockingFilterRowstoreCacheEnabled() == false)
1515 {
1516 // Deblocking Filter Row Store Scratch buffer
1517 allocParamsForBufferLinear.dwBytes = m_picWidthInMb * 4 * CODECHAL_CACHELINE_SIZE; // 4 cachelines per MB
1518 allocParamsForBufferLinear.pBufName = "Deblocking Filter Row Store Scratch Buffer";
1519
1520 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1521 m_osInterface->pfnAllocateResource(
1522 m_osInterface,
1523 &allocParamsForBufferLinear,
1524 &m_resDeblockingFilterRowStoreScratchBuffer),
1525 "Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
1526 }
1527
1528 if(m_hwInterface->GetMfxInterface()->IsBsdMpcRowstoreCacheEnabled() == false)
1529 {
1530 // MPC Row Store Scratch buffer
1531 allocParamsForBufferLinear.dwBytes = m_picWidthInMb * 2 * 64; // 2 cachelines per MB
1532 allocParamsForBufferLinear.pBufName = "MPC Row Store Scratch Buffer";
1533
1534 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1535 m_osInterface->pfnAllocateResource(
1536 m_osInterface,
1537 &allocParamsForBufferLinear,
1538 &m_resMPCRowStoreScratchBuffer),
1539 "Failed to allocate MPC Row Store Scratch Buffer.");
1540 }
1541
1542 if (!m_vdencEnabled && m_standard != CODECHAL_HEVC) // StreamOut is needed for HEVC VDEnc
1543 {
1544 // streamout data buffer
1545 allocParamsForBufferLinear.dwBytes = numMbs * MFX_PAK_STREAMOUT_DATA_BYTE_SIZE * sizeof(uint32_t);
1546 allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
1547
1548 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1549 {
1550 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1551 m_osInterface->pfnAllocateResource(
1552 m_osInterface,
1553 &allocParamsForBufferLinear,
1554 &m_resStreamOutBuffer[i]),
1555 "Failed to allocate Pak Stream Out Buffer.");
1556 }
1557 }
1558 }
1559
1560 if (m_encEnabled || m_vdencEnabled)
1561 {
1562 // Scaled surfaces are required to run both HME and IFrameDist
1563 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateScalingResources());
1564 }
1565
1566 if(m_encEnabled && (!m_vdencEnabled))
1567 {
1568 for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++)
1569 {
1570 allocParamsForBufferLinear.dwBytes = CODECHAL_MAD_BUFFER_SIZE;
1571 allocParamsForBufferLinear.pBufName = "MAD Data Buffer";
1572
1573 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1574 m_osInterface->pfnAllocateResource(
1575 m_osInterface,
1576 &allocParamsForBufferLinear,
1577 &m_resMadDataBuffer[i]),
1578 "Failed to allocate MAD Data Buffer.");
1579 }
1580 }
1581
1582 if (m_vdencEnabled)
1583 {
1584 // VDENC BRC PAK MMIO buffer
1585 allocParamsForBufferLinear.dwBytes = sizeof(VdencBrcPakMmio);
1586 allocParamsForBufferLinear.pBufName = "VDENC BRC PAK MMIO Buffer";
1587
1588 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1589 m_osInterface->pfnAllocateResource(
1590 m_osInterface,
1591 &allocParamsForBufferLinear,
1592 &m_resPakMmioBuffer),
1593 "%s: Failed to allocate VDENC BRC PAK MMIO Buffer\n", __FUNCTION__);
1594
1595 // VDEnc StreamIn data buffers, shared between driver/ME kernel/VDEnc
1596 if ((m_mode == CODECHAL_ENCODE_MODE_HEVC) || (m_mode == CODECHAL_ENCODE_MODE_VP9))
1597 {
1598 allocParamsForBufferLinear.dwBytes = (MOS_ALIGN_CEIL(m_frameWidth, 64)/32) * (MOS_ALIGN_CEIL(m_frameHeight, 64)/32) * CODECHAL_CACHELINE_SIZE;
1599 }
1600 else
1601 {
1602 allocParamsForBufferLinear.dwBytes = m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE;
1603 }
1604 allocParamsForBufferLinear.pBufName = "VDEnc StreamIn Data Buffer";
1605
1606 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1607 {
1608 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1609 m_osInterface->pfnAllocateResource(
1610 m_osInterface,
1611 &allocParamsForBufferLinear,
1612 &m_resVdencStreamInBuffer[i]),
1613 "Failed to allocate VDEnc StreamIn Data Buffer.");
1614
1615 data = (uint8_t*)m_osInterface->pfnLockResource(
1616 m_osInterface,
1617 &m_resVdencStreamInBuffer[i],
1618 &lockFlags);
1619
1620 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1621
1622 MOS_ZeroMemory(
1623 data,
1624 allocParamsForBufferLinear.dwBytes);
1625
1626 m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencStreamInBuffer[i]);
1627 }
1628 }
1629
1630 if (m_vdencEnabled)
1631 {
1632 // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1633 allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1634 allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1635
1636 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1637 m_osInterface->pfnAllocateResource(
1638 m_osInterface,
1639 &allocParamsForBufferLinear,
1640 &m_resHucStatus2Buffer),
1641 "%s: Failed to allocate HUC STATUS 2 Buffer\n", __FUNCTION__);
1642 }
1643
1644 return eStatus;
1645 }
1646
AllocateScalingResources()1647 MOS_STATUS CodechalEncoderState::AllocateScalingResources()
1648 {
1649 uint32_t numMBs, size;
1650 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
1651 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1652 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1653
1654 CODECHAL_ENCODE_FUNCTION_ENTER;
1655
1656 //Allocate the Batch Buffer for scaling Kernel.
1657 numMBs = m_picWidthInMb * ((m_picHeightInMb + 1) >> 1) << 1;
1658 size = m_hwInterface->GetMediaObjectBufferSize(
1659 numMBs,
1660 64);
1661
1662 for (int i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++)
1663 {
1664 MOS_ZeroMemory(&m_scalingBBUF[i].BatchBuffer, sizeof(m_scalingBBUF[0].BatchBuffer));
1665
1666 /* For CM based Downscale kernel, unlike the old asm based downscale kernel,
1667 HW walker can be used as no inline data is required by the kernel. */
1668 if (!m_useCmScalingKernel && !m_useMwWlkrForAsmScalingKernel)
1669 {
1670 m_scalingBBUF[i].BatchBuffer.bSecondLevel = true;
1671 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1672 m_osInterface,
1673 &m_scalingBBUF[i].BatchBuffer,
1674 NULL,
1675 size));
1676
1677 m_scalingBBUF[i].dwSize = size;
1678 m_scalingBBUF[i].dwNumMbsInBBuf = 0;
1679 m_scalingBBufIdx = CODECHAL_ENCODE_VME_BBUF_NUM - 1;
1680 }
1681 }
1682
1683 //MB stats buffer is supported by AVC kernels on g9+.
1684 if(m_mbStatsSupported)
1685 {
1686 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1687 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1688 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1689 allocParamsForBufferLinear.Format = Format_Buffer;
1690
1691 // Starting from g9 HVS kernel, MBEnc Curbe is decoupled from BRC kernel and a new MBEnc BRC surface is added.
1692 // new HVS-based BRC kernel requires size of MBStat surface be 1024-aligned
1693 m_hwInterface->m_avcMbStatBufferSize = MOS_ALIGN_CEIL(m_picWidthInMb * 16 * sizeof(uint32_t)* (4 * m_downscaledHeightInMb4x), 1024);
1694
1695 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize;
1696 allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
1697
1698 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1699 m_osInterface,
1700 &allocParamsForBufferLinear,
1701 &m_resMbStatsBuffer), "Failed to allocate MB Statistics Buffer.");
1702
1703 m_mbStatsBottomFieldOffset = m_picWidthInMb * 16 * sizeof(uint32_t) * (2 * m_downscaledHeightInMb4x);
1704
1705 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1706 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1707 lockFlagsWriteOnly.WriteOnly = 1;
1708
1709 uint8_t* pData = (uint8_t*)m_osInterface->pfnLockResource(
1710 m_osInterface,
1711 &m_resMbStatsBuffer,
1712 &lockFlagsWriteOnly);
1713
1714 if (pData == nullptr)
1715 {
1716 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to Lock m_resMbStatsBuffer");
1717 eStatus = MOS_STATUS_UNKNOWN;
1718 return eStatus;
1719 }
1720
1721 MOS_ZeroMemory(pData, m_hwInterface->m_avcMbStatBufferSize);
1722 m_osInterface->pfnUnlockResource(
1723 m_osInterface, &m_resMbStatsBuffer);
1724 }
1725 else if(m_flatnessCheckSupported)
1726 {
1727 MOS_ZeroMemory(&m_flatnessCheckSurface, sizeof(MOS_SURFACE));
1728 m_flatnessCheckSurface.TileType = MOS_TILE_LINEAR;
1729 m_flatnessCheckSurface.bArraySpacing = true;
1730 m_flatnessCheckSurface.Format = Format_Buffer_2D;
1731
1732 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1733 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
1734 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
1735 allocParamsForBuffer2D.Format = Format_Buffer_2D;
1736 // Data size for 1MB is 1DWORDs (4Bytes)
1737 allocParamsForBuffer2D.dwWidth = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
1738 // Because FlatnessCheckSurface was referenced and filled during 4xDownScaling operation,
1739 // the height should be fit to MediaWalker height setting for 4xDS Kernel.
1740 allocParamsForBuffer2D.dwHeight = MOS_ALIGN_CEIL(4 * m_downscaledHeightInMb4x, 64);
1741 allocParamsForBuffer2D.pBufName = "Flatness Check Surface";
1742
1743 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1744 m_osInterface,
1745 &allocParamsForBuffer2D,
1746 &m_flatnessCheckSurface.OsResource);
1747
1748 if (eStatus != MOS_STATUS_SUCCESS)
1749 {
1750 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate FlatnessCheck Surface.");
1751 return eStatus;
1752 }
1753
1754 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
1755 m_osInterface,
1756 &m_flatnessCheckSurface));
1757
1758 m_flatnessCheckBottomFieldOffset = m_flatnessCheckSurface.dwPitch * m_flatnessCheckSurface.dwHeight >> 1;
1759 }
1760
1761 return eStatus;
1762 }
1763
ExecuteMeKernel(MeCurbeParams * meParams,MeSurfaceParams * meSurfaceParams,HmeLevel hmeLevel)1764 MOS_STATUS CodechalEncoderState::ExecuteMeKernel(
1765 MeCurbeParams *meParams,
1766 MeSurfaceParams *meSurfaceParams,
1767 HmeLevel hmeLevel)
1768 {
1769 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1770
1771 CODECHAL_ENCODE_FUNCTION_ENTER;
1772
1773 CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1774 CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1775
1776 PerfTagSetting perfTag;
1777 perfTag.Value = 0;
1778 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1779 perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
1780 perfTag.PictureCodingType = m_pictureCodingType;
1781 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1782 // Each ME kernel buffer counts as a separate perf task
1783 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1784
1785 CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
1786 (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1787
1788 bool vdencMeInUse = false;
1789 if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
1790 {
1791 vdencMeInUse = true;
1792 // Non legacy stream in is for hevc vp9 streamin kernel
1793 encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1794 }
1795
1796 uint32_t krnStateIdx = vdencMeInUse ?
1797 CODECHAL_ENCODE_ME_IDX_VDENC :
1798 ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
1799
1800 PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
1801
1802 // If Single Task Phase is not enabled, use BT count for the kernel state.
1803 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1804 {
1805 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
1806 m_maxBtCount : kernelState->KernelParams.iBTCount;
1807 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
1808 m_stateHeapInterface,
1809 maxBtCount));
1810 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1811 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
1812 }
1813
1814 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1815 m_stateHeapInterface,
1816 kernelState,
1817 false,
1818 0,
1819 false,
1820 m_storeData));
1821 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1822 MOS_ZeroMemory(&idParams, sizeof(idParams));
1823 idParams.pKernelState = kernelState;
1824 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
1825 m_stateHeapInterface,
1826 1,
1827 &idParams));
1828
1829 // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
1830 meParams->hmeLvl = hmeLevel;
1831 meParams->pKernelState = kernelState;
1832
1833 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoderGenState->SetCurbeMe(meParams));
1834
1835 CODECHAL_DEBUG_TOOL(
1836 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1837 encFunctionType,
1838 MHW_DSH_TYPE,
1839 kernelState));
1840 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1841 encFunctionType,
1842 kernelState));
1843 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1844 encFunctionType,
1845 MHW_ISH_TYPE,
1846 kernelState));
1847 )
1848 MOS_COMMAND_BUFFER cmdBuffer;
1849 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1850 SendKernelCmdsParams sendKernelCmdsParams;
1851 sendKernelCmdsParams = SendKernelCmdsParams();
1852 sendKernelCmdsParams.EncFunctionType = encFunctionType;
1853 sendKernelCmdsParams.pKernelState = kernelState;
1854
1855 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1856
1857 // Add binding table
1858 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
1859 m_stateHeapInterface,
1860 kernelState));
1861
1862 // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
1863 meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x :
1864 (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
1865 meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x :
1866 (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
1867 meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false;
1868 meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false;
1869 meSurfaceParams->pKernelState = kernelState;
1870
1871 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoderGenState->SendMeSurfaces(&cmdBuffer, meSurfaceParams));
1872
1873 // Dump SSH for ME kernel
1874 CODECHAL_DEBUG_TOOL(
1875 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1876 encFunctionType,
1877 MHW_SSH_TYPE,
1878 kernelState)));
1879
1880 /* zero out the mv data memory and me distortion buffer for the driver ULT
1881 kernel only writes out this data used for current frame, in some cases the the data used for
1882 previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
1883 in the P frame mv data buffer */
1884
1885 // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
1886 CODECHAL_DEBUG_TOOL(
1887 CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
1888 uint8_t* data = NULL;
1889 uint32_t size = 0;
1890 bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
1891
1892 if (driverMeDumpEnabled)
1893 {
1894 MOS_LOCK_PARAMS lockFlags;
1895 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1896 lockFlags.WriteOnly = 1;
1897
1898 switch (hmeLevel)
1899 {
1900 case HME_LEVEL_32x:
1901 data = (uint8_t*)m_osInterface->pfnLockResource(
1902 m_osInterface,
1903 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
1904 &lockFlags);
1905 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1906 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
1907 (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1908 MOS_ZeroMemory(data, size);
1909 m_osInterface->pfnUnlockResource(
1910 m_osInterface,
1911 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
1912 break;
1913 case HME_LEVEL_16x:
1914 data = (uint8_t*)m_osInterface->pfnLockResource(
1915 m_osInterface,
1916 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
1917 &lockFlags);
1918 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1919 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
1920 (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1921 MOS_ZeroMemory(data, size);
1922 m_osInterface->pfnUnlockResource(
1923 m_osInterface,
1924 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
1925 break;
1926 case HME_LEVEL_4x:
1927 if (!m_vdencEnabled)
1928 {
1929 data = (uint8_t*)m_osInterface->pfnLockResource(
1930 m_osInterface,
1931 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
1932 &lockFlags);
1933 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1934 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
1935 (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1936 MOS_ZeroMemory(data, size);
1937 m_osInterface->pfnUnlockResource(
1938 m_osInterface,
1939 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
1940 }
1941 break;
1942 default:
1943 return MOS_STATUS_INVALID_PARAMETER;
1944 }
1945
1946 // zeroing out ME dist buffer
1947 if (meSurfaceParams->b4xMeDistortionBufferSupported)
1948 {
1949 data = (uint8_t*)m_osInterface->pfnLockResource(
1950 m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
1951 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1952 size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
1953 MOS_ZeroMemory(data, size);
1954 m_osInterface->pfnUnlockResource(
1955 m_osInterface,
1956 &meSurfaceParams->psMeDistortionBuffer->OsResource);
1957 }
1958 }
1959 );
1960
1961 uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
1962 (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
1963
1964 uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
1965 uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
1966
1967 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
1968 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
1969 walkerCodecParams.WalkerMode = m_walkerMode;
1970 walkerCodecParams.dwResolutionX = resolutionX;
1971 walkerCodecParams.dwResolutionY = resolutionY;
1972 walkerCodecParams.bNoDependency = true;
1973 walkerCodecParams.bMbaff = meSurfaceParams->bMbaff;
1974 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
1975 walkerCodecParams.ucGroupId = m_groupId;
1976
1977 MHW_WALKER_PARAMS walkerParams;
1978 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
1979 m_hwInterface,
1980 &walkerParams,
1981 &walkerCodecParams));
1982
1983 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
1984 &cmdBuffer,
1985 &walkerParams));
1986
1987 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
1988
1989 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
1990 m_stateHeapInterface,
1991 kernelState));
1992 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1993 {
1994 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
1995 m_stateHeapInterface));
1996 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1997 }
1998
1999 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2000 &cmdBuffer,
2001 encFunctionType,
2002 nullptr)));
2003
2004 m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
2005
2006 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
2007
2008 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2009 {
2010 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface->pOsContext);
2011 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
2012 m_lastTaskInPhase = false;
2013 }
2014
2015 return eStatus;
2016 }
2017
CheckSupportedFormat(PMOS_SURFACE surface)2018 bool CodechalEncoderState::CheckSupportedFormat(
2019 PMOS_SURFACE surface)
2020 {
2021 bool isColorFormatSupported = true;
2022
2023 if (!surface)
2024 {
2025 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (NULL) Pointer.");
2026 return isColorFormatSupported;
2027 }
2028
2029 // if input is not Tile-Y, or color format not NV12, invoke Ds+Copy kernel
2030 if (!IS_Y_MAJOR_TILE_FORMAT(surface->TileType) ||
2031 surface->Format != Format_NV12)
2032 {
2033 isColorFormatSupported = false;
2034 }
2035
2036 return isColorFormatSupported;
2037 }
2038
FreeResources()2039 void CodechalEncoderState::FreeResources()
2040 {
2041 CODECHAL_ENCODE_FUNCTION_ENTER;
2042
2043 // destroy sync objects
2044 if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2045 {
2046 m_osInterface->pfnDestroySyncResource(m_osInterface, &m_resSyncObjectRenderContextInUse);
2047 }
2048 if (!Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
2049 {
2050 m_osInterface->pfnDestroySyncResource(m_osInterface, &m_resSyncObjectVideoContextInUse);
2051 }
2052
2053 // Release eStatus buffer
2054 if (!Mos_ResourceIsNull(&m_encodeStatusBuf.resStatusBuffer))
2055 {
2056 if(m_encodeStatusBuf.pEncodeStatus != nullptr)
2057 {
2058 EncodeStatus* tmpEncodeStatus = nullptr;
2059 for(int i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++)
2060 {
2061 tmpEncodeStatus = (EncodeStatus*)(m_encodeStatusBuf.pEncodeStatus + i * m_encodeStatusBuf.dwReportSize);
2062 if(tmpEncodeStatus != nullptr && tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo != nullptr)
2063 {
2064 MOS_FreeMemory(tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo);
2065 tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo = nullptr;
2066 }
2067 }
2068 }
2069
2070 m_osInterface->pfnUnlockResource(
2071 m_osInterface,
2072 &(m_encodeStatusBuf.resStatusBuffer));
2073
2074 m_osInterface->pfnFreeResource(
2075 m_osInterface,
2076 &m_encodeStatusBuf.resStatusBuffer);
2077
2078 m_encodeStatusBuf.pData = nullptr;
2079 m_encodeStatusBuf.pEncodeStatus = nullptr;
2080 }
2081
2082 // Release HW Counter buffer
2083 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_skipFrameBasedHWCounterRead == false)
2084 {
2085 if (!Mos_ResourceIsNull(&m_resHwCount))
2086 {
2087 m_osInterface->pfnUnlockResource(
2088 m_osInterface,
2089 &(m_resHwCount));
2090
2091 m_osInterface->pfnFreeResource(
2092 m_osInterface,
2093 &m_resHwCount);
2094 }
2095 }
2096
2097 if (!Mos_ResourceIsNull(&m_encodeStatusBufRcs.resStatusBuffer))
2098 {
2099 if(m_encodeStatusBufRcs.pEncodeStatus != nullptr)
2100 {
2101 EncodeStatus* tmpEncodeStatus = nullptr;
2102 for(int i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++)
2103 {
2104 tmpEncodeStatus = (EncodeStatus*)(m_encodeStatusBufRcs.pEncodeStatus + i * m_encodeStatusBufRcs.dwReportSize);
2105 if(tmpEncodeStatus != nullptr && tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo != nullptr)
2106 {
2107 MOS_FreeMemory(tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo);
2108 tmpEncodeStatus->encodeStatusReport.pHEVCTileinfo = nullptr;
2109 }
2110 }
2111 }
2112
2113 m_osInterface->pfnUnlockResource(
2114 m_osInterface,
2115 &(m_encodeStatusBufRcs.resStatusBuffer));
2116
2117 m_osInterface->pfnFreeResource(
2118 m_osInterface,
2119 &m_encodeStatusBufRcs.resStatusBuffer);
2120
2121 m_encodeStatusBufRcs.pData = nullptr;
2122 m_encodeStatusBufRcs.pEncodeStatus = nullptr;
2123 }
2124
2125 if (m_pakEnabled)
2126 {
2127 if (!Mos_ResourceIsNull(&m_resDeblockingFilterRowStoreScratchBuffer))
2128 {
2129 m_osInterface->pfnFreeResource(
2130 m_osInterface,
2131 &m_resDeblockingFilterRowStoreScratchBuffer);
2132 }
2133 if (!Mos_ResourceIsNull(&m_resMPCRowStoreScratchBuffer))
2134 {
2135 m_osInterface->pfnFreeResource(
2136 m_osInterface,
2137 &m_resMPCRowStoreScratchBuffer);
2138 }
2139
2140 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
2141 {
2142 if (!Mos_ResourceIsNull(&m_resStreamOutBuffer[i]))
2143 {
2144 m_osInterface->pfnFreeResource(
2145 m_osInterface,
2146 &m_resStreamOutBuffer[i]);
2147 }
2148
2149 if (!Mos_ResourceIsNull(&m_sliceMapSurface[i].OsResource))
2150 {
2151 m_osInterface->pfnFreeResource(
2152 m_osInterface,
2153 &m_sliceMapSurface[i].OsResource);
2154 }
2155 }
2156 }
2157
2158 // release CSC Downscaling kernel resources
2159 if (m_cscDsState)
2160 {
2161 MOS_Delete(m_cscDsState);
2162 m_cscDsState = nullptr;
2163 }
2164
2165 if (m_encoderGenState)
2166 {
2167 MOS_Delete(m_encoderGenState);
2168 m_encoderGenState = nullptr;
2169 }
2170
2171 if(m_inlineEncodeStatusUpdate)
2172 {
2173 if (!Mos_ResourceIsNull(&m_atomicScratchBuf.resAtomicScratchBuffer))
2174 {
2175 m_osInterface->pfnUnlockResource(
2176 m_osInterface,
2177 &(m_atomicScratchBuf.resAtomicScratchBuffer));
2178
2179 m_osInterface->pfnFreeResource(
2180 m_osInterface,
2181 &m_atomicScratchBuf.resAtomicScratchBuffer);
2182 }
2183 }
2184
2185 if (m_encEnabled)
2186 {
2187 for (auto i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++)
2188 {
2189 if (!Mos_ResourceIsNull(&m_scalingBBUF[i].BatchBuffer.OsResource))
2190 {
2191 Mhw_FreeBb(m_osInterface, &m_scalingBBUF[i].BatchBuffer, nullptr);
2192 }
2193 }
2194
2195 if(!Mos_ResourceIsNull(&m_flatnessCheckSurface.OsResource))
2196 {
2197 m_osInterface->pfnFreeResource(
2198 m_osInterface,
2199 &m_flatnessCheckSurface.OsResource);
2200 }
2201
2202 if(!Mos_ResourceIsNull(&m_resMbStatsBuffer))
2203 {
2204 m_osInterface->pfnFreeResource(
2205 m_osInterface,
2206 &m_resMbStatsBuffer);
2207 }
2208
2209 for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++)
2210 {
2211 if (!Mos_ResourceIsNull(&m_resMadDataBuffer[i]))
2212 {
2213 m_osInterface->pfnFreeResource(
2214 m_osInterface,
2215 &m_resMadDataBuffer[i]);
2216 }
2217 }
2218 }
2219
2220 if (m_vdencEnabled)
2221 {
2222 m_osInterface->pfnFreeResource(
2223 m_osInterface,
2224 &m_resPakMmioBuffer);
2225
2226 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucFwBuffer);
2227
2228 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
2229 {
2230 m_osInterface->pfnFreeResource(
2231 m_osInterface,
2232 &m_resVdencStreamInBuffer[i]);
2233 }
2234 }
2235
2236 if (m_vdencEnabled)
2237 {
2238 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer);
2239 }
2240
2241 m_osInterface->pfnFreeResource(m_osInterface, &m_resVdencCmdInitializerDmemBuffer);
2242 for (auto i = 0; i < 2; i++)
2243 {
2244 m_osInterface->pfnFreeResource(m_osInterface, &m_resVdencCmdInitializerDataBuffer[i]);
2245 }
2246
2247 return;
2248 }
2249
Destroy()2250 void CodechalEncoderState::Destroy()
2251 {
2252 CODECHAL_ENCODE_FUNCTION_ENTER;
2253
2254 if (m_videoNodeAssociationCreated &&
2255 MEDIA_IS_SKU(m_skuTable, FtrVcs2) &&
2256 (m_videoGpuNode < MOS_GPU_NODE_MAX))
2257 {
2258 // Destroy encode video node associations
2259 m_osInterface->pfnDestroyVideoNodeAssociation(m_osInterface, m_videoGpuNode);
2260 }
2261
2262 if (m_mmcState != nullptr)
2263 {
2264 MOS_Delete(m_mmcState);
2265 m_mmcState = nullptr;
2266 }
2267
2268 MOS_Delete(m_allocator);
2269 m_allocator = nullptr;
2270
2271 MOS_Delete(m_trackedBuf);
2272 m_trackedBuf = nullptr;
2273
2274 // Release encoder resources
2275 FreeResources();
2276 return;
2277 }
2278
CalculateCommandBufferSize()2279 uint32_t CodechalEncoderState::CalculateCommandBufferSize()
2280 {
2281 uint32_t commandBufferSize =
2282 m_pictureStatesSize +
2283 m_extraPictureStatesSize +
2284 (m_sliceStatesSize * m_numSlices);
2285
2286 if (m_singleTaskPhaseSupported)
2287 {
2288 commandBufferSize *= (m_numPasses + 1);
2289 }
2290
2291 // 4K align since allocation is in chunks of 4K bytes.
2292 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, 0x1000);
2293
2294 return commandBufferSize;
2295 }
2296
VerifySpaceAvailable()2297 MOS_STATUS CodechalEncoderState::VerifySpaceAvailable()
2298 {
2299 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2300
2301 CODECHAL_ENCODE_FUNCTION_ENTER;
2302
2303 uint32_t requestedSize = 0;
2304 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext)
2305 {
2306 requestedSize = m_vmeStatesSize;
2307
2308 eStatus = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize(
2309 m_osInterface,
2310 requestedSize,
2311 0);
2312
2313 return eStatus;
2314 }
2315
2316 uint32_t requestedPatchListSize = 0;
2317 MOS_STATUS statusPatchList = MOS_STATUS_SUCCESS, statusCmdBuf;
2318
2319 bool m_usePatchList = m_osInterface->bUsesPatchList || MEDIA_IS_SKU(m_skuTable, FtrMediaPatchless);
2320 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext)
2321 {
2322 if (m_usePatchList)
2323 {
2324 requestedPatchListSize =
2325 m_picturePatchListSize +
2326 m_extraPicturePatchListSize +
2327 (m_slicePatchListSize * m_numSlices);
2328
2329 if (m_singleTaskPhaseSupported)
2330 {
2331 requestedPatchListSize *= (m_numPasses + 1);
2332 }
2333 }
2334
2335 requestedSize = CalculateCommandBufferSize();
2336
2337 // Try a maximum of 3 attempts to request the required sizes from OS
2338 // OS could reset the sizes if necessary, therefore, requires to re-verify
2339 for (auto i = 0; i < 3; i++)
2340 {
2341 //Experiment shows resizing CmdBuf size and PatchList size in two calls one after the other would cause previously
2342 //successfully requested size to fallback to wrong value, hence never satisfying the requirement. So we call pfnResize()
2343 //only once depending on whether CmdBuf size not enough, or PatchList size not enough, or both.
2344 if (m_usePatchList)
2345 {
2346 statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize(
2347 m_osInterface,
2348 requestedPatchListSize);
2349 }
2350
2351 statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize(
2352 m_osInterface,
2353 requestedSize,
2354 0);
2355
2356 if (statusPatchList != MOS_STATUS_SUCCESS && statusCmdBuf != MOS_STATUS_SUCCESS)
2357 {
2358 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSize + COMMAND_BUFFER_RESERVED_SPACE, requestedPatchListSize));
2359 }
2360 else if (statusPatchList != MOS_STATUS_SUCCESS)
2361 {
2362 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(0, requestedPatchListSize));
2363 }
2364 else if (statusCmdBuf != MOS_STATUS_SUCCESS)
2365 {
2366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSize + COMMAND_BUFFER_RESERVED_SPACE, 0));
2367 }
2368 else
2369 {
2370 m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
2371 return eStatus;
2372 }
2373 }
2374 }
2375
2376 if (m_usePatchList)
2377 {
2378 statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize(
2379 m_osInterface,
2380 requestedPatchListSize);
2381 }
2382
2383 statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize(
2384 m_osInterface,
2385 requestedSize,
2386 0);
2387
2388 if ((statusCmdBuf == MOS_STATUS_SUCCESS) && (statusPatchList == MOS_STATUS_SUCCESS))
2389 {
2390 m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
2391 return eStatus;
2392 }
2393
2394 if (m_singleTaskPhaseSupported)
2395 {
2396 uint32_t requestedSizeOriginal = 0, requestedPatchListSizeOriginal = 0;
2397 for (auto i = 0; i < 3; i++)
2398 {
2399 //Experiment shows resizing CmdBuf size and PatchList size in two calls one after the other would cause previously
2400 //successfully requested size to fallback to wrong value, hence never satisfying the requirement. So we call pfnResize()
2401 //only once depending on whether CmdBuf size not enough, or PatchList size not enough, or both.
2402 if (m_usePatchList)
2403 {
2404 statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize(
2405 m_osInterface,
2406 requestedPatchListSizeOriginal);
2407 }
2408
2409 statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize(
2410 m_osInterface,
2411 requestedSizeOriginal,
2412 0);
2413
2414 if (statusPatchList != MOS_STATUS_SUCCESS && statusCmdBuf != MOS_STATUS_SUCCESS)
2415 {
2416 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSizeOriginal + COMMAND_BUFFER_RESERVED_SPACE, requestedPatchListSizeOriginal));
2417 }
2418 else if (statusPatchList != MOS_STATUS_SUCCESS)
2419 {
2420 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(0, requestedPatchListSizeOriginal));
2421 }
2422 else if (statusCmdBuf != MOS_STATUS_SUCCESS)
2423 {
2424 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->ResizeCommandBufferAndPatchList(requestedSizeOriginal + COMMAND_BUFFER_RESERVED_SPACE, 0));
2425 }
2426 else
2427 {
2428 m_singleTaskPhaseSupportedInPak = false;
2429 return eStatus;
2430 }
2431 }
2432 if (m_usePatchList)
2433 {
2434 statusPatchList = (MOS_STATUS)m_osInterface->pfnVerifyPatchListSize(
2435 m_osInterface,
2436 requestedPatchListSizeOriginal);
2437 }
2438
2439 statusCmdBuf = (MOS_STATUS)m_osInterface->pfnVerifyCommandBufferSize(
2440 m_osInterface,
2441 requestedSizeOriginal,
2442 0);
2443
2444 if (statusPatchList == MOS_STATUS_SUCCESS && statusCmdBuf == MOS_STATUS_SUCCESS)
2445 {
2446 m_singleTaskPhaseSupportedInPak = false;
2447 }
2448 else
2449 {
2450 eStatus = MOS_STATUS_NO_SPACE;
2451 }
2452 }
2453 else
2454 {
2455 eStatus = MOS_STATUS_NO_SPACE;
2456 }
2457
2458 return eStatus;
2459 }
2460
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)2461 MOS_STATUS CodechalEncoderState::AddMediaVfeCmd(
2462 PMOS_COMMAND_BUFFER cmdBuffer,
2463 SendKernelCmdsParams *params)
2464 {
2465 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
2466
2467 MHW_VFE_PARAMS vfeParams = {};
2468 vfeParams.pKernelState = params->pKernelState;
2469 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
2470 vfeParams.Scoreboard.ScoreboardEnable = m_useHwScoreboard;
2471 vfeParams.Scoreboard.ScoreboardType = m_hwScoreboardType;
2472 vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
2473
2474 if (!m_useHwScoreboard)
2475 {
2476 vfeParams.Scoreboard.ScoreboardMask = 0;
2477 }
2478 else if (params->bEnableCustomScoreBoard == true)
2479 {
2480 MOS_SecureMemcpy(&vfeParams.Scoreboard, sizeof(vfeParams.Scoreboard),
2481 params->pCustomScoreBoard, sizeof(*params->pCustomScoreBoard));
2482 }
2483 else if (params->bEnable45ZWalkingPattern == true)
2484 {
2485 vfeParams.Scoreboard.ScoreboardMask = 0x0F;
2486 vfeParams.Scoreboard.ScoreboardType = 1;
2487
2488 // Scoreboard 0
2489 vfeParams.Scoreboard.ScoreboardDelta[0].x = 0;
2490 vfeParams.Scoreboard.ScoreboardDelta[0].y = 0xF;
2491 // Scoreboard 1
2492 vfeParams.Scoreboard.ScoreboardDelta[1].x = 0;
2493 vfeParams.Scoreboard.ScoreboardDelta[1].y = 0xE;
2494 // Scoreboard 2
2495 vfeParams.Scoreboard.ScoreboardDelta[2].x = 0xF;
2496 vfeParams.Scoreboard.ScoreboardDelta[2].y = 3;
2497 // Scoreboard 3
2498 vfeParams.Scoreboard.ScoreboardDelta[3].x = 0xF;
2499 vfeParams.Scoreboard.ScoreboardDelta[3].y = 1;
2500 }
2501 else
2502 {
2503 vfeParams.Scoreboard.ScoreboardMask = 0xFF;
2504
2505 // Scoreboard 0
2506 vfeParams.Scoreboard.ScoreboardDelta[0].x = 0xF;
2507 vfeParams.Scoreboard.ScoreboardDelta[0].y = 0;
2508
2509 // Scoreboard 1
2510 vfeParams.Scoreboard.ScoreboardDelta[1].x = 0;
2511 vfeParams.Scoreboard.ScoreboardDelta[1].y = 0xF;
2512
2513 // Scoreboard 2
2514 vfeParams.Scoreboard.ScoreboardDelta[2].x = 1;
2515 vfeParams.Scoreboard.ScoreboardDelta[2].y = 0xF;
2516 // Scoreboard 3
2517 vfeParams.Scoreboard.ScoreboardDelta[3].x = 0xF;
2518 vfeParams.Scoreboard.ScoreboardDelta[3].y = 0xF;
2519 // Scoreboard 4
2520 vfeParams.Scoreboard.ScoreboardDelta[4].x = 0xF;
2521 vfeParams.Scoreboard.ScoreboardDelta[4].y = 1;
2522 // Scoreboard 5
2523 vfeParams.Scoreboard.ScoreboardDelta[5].x = 0;
2524 vfeParams.Scoreboard.ScoreboardDelta[5].y = 0xE;
2525 // Scoreboard 6
2526 vfeParams.Scoreboard.ScoreboardDelta[6].x = 1;
2527 vfeParams.Scoreboard.ScoreboardDelta[6].y = 0xE;
2528 // Scoreboard 7
2529 vfeParams.Scoreboard.ScoreboardDelta[7].x = 0xF;
2530 vfeParams.Scoreboard.ScoreboardDelta[7].y = 0xE;
2531 }
2532
2533 if (MEDIA_IS_WA(m_waTable, WaUseStallingScoreBoard))
2534 vfeParams.Scoreboard.ScoreboardType = 0;
2535
2536 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
2537
2538 return MOS_STATUS_SUCCESS;
2539 }
2540
SendGenericKernelCmds(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)2541 MOS_STATUS CodechalEncoderState::SendGenericKernelCmds(
2542 PMOS_COMMAND_BUFFER cmdBuffer,
2543 SendKernelCmdsParams *params)
2544 {
2545 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2546
2547 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
2548
2549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetDefaultSSEuSetting(params->EncFunctionType, m_setRequestedEUSlices, m_setRequestedSubSlices, m_setRequestedEUs));
2550
2551 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
2552 {
2553 bool requestFrameTracking = false;
2554
2555 if (CodecHalUsesOnlyRenderEngine(m_codecFunction) && m_lastEncPhase)
2556 {
2557 // frame tracking tag is only added in the last command buffer header
2558 requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
2559 }
2560 // Send command buffer header at the beginning (OS dependent)
2561 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(cmdBuffer, requestFrameTracking));
2562
2563 m_firstTaskInPhase = false;
2564 }
2565
2566 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(cmdBuffer, params->EncFunctionType));
2567
2568 if (m_renderEngineInterface->GetL3CacheConfig()->bL3CachingEnabled)
2569 {
2570 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->SetL3Cache(cmdBuffer));
2571 }
2572
2573 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->EnablePreemption(cmdBuffer));
2574
2575 // Add Pipeline select command
2576 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddPipelineSelectCmd(cmdBuffer, m_computeContextEnabled));
2577
2578 // Add State Base Addr command
2579 MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams;
2580 MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams));
2581 // This bit will not be used in Driver ID but it will be used to determine if Render Target Flag has to be Clear or Set
2582 // Read this bit in pfnAddStateBaseAddrCmd and propagate it using ResourceParams via bRenderTarget
2583 stateBaseAddrParams.bDynamicStateRenderTarget = params->bDshInUse;
2584
2585 MOS_RESOURCE* dsh = params->pKernelState->m_dshRegion.GetResource();
2586 CODECHAL_ENCODE_CHK_NULL_RETURN(dsh);
2587 MOS_RESOURCE* ish = params->pKernelState->m_ishRegion.GetResource();
2588 CODECHAL_ENCODE_CHK_NULL_RETURN(ish);
2589 stateBaseAddrParams.presDynamicState = dsh;
2590 stateBaseAddrParams.dwDynamicStateSize = params->pKernelState->m_dshRegion.GetHeapSize();
2591 stateBaseAddrParams.presInstructionBuffer = ish;
2592 stateBaseAddrParams.dwInstructionBufferSize = params->pKernelState->m_ishRegion.GetHeapSize();
2593
2594 if (m_computeContextEnabled)
2595 {
2596 stateBaseAddrParams.presGeneralState = dsh;
2597 stateBaseAddrParams.dwGeneralStateSize = params->pKernelState->m_dshRegion.GetHeapSize();
2598 stateBaseAddrParams.presIndirectObjectBuffer = dsh;
2599 stateBaseAddrParams.dwIndirectObjectBufferSize = params->pKernelState->m_dshRegion.GetHeapSize();
2600 stateBaseAddrParams.bDynamicStateRenderTarget = false;
2601 }
2602
2603 if (m_standard == CODECHAL_HEVC)
2604 {
2605 stateBaseAddrParams.mocs4InstructionCache = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_L3].Value;
2606 }
2607
2608 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddStateBaseAddrCmd(cmdBuffer, &stateBaseAddrParams));
2609
2610 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupWalkerContext(cmdBuffer, params));
2611
2612 return eStatus;
2613 }
2614
2615 // Refer to layout of EncodeBRCPAKStatistics_g7
ReadBrcPakStatistics(PMOS_COMMAND_BUFFER cmdBuffer,EncodeReadBrcPakStatsParams * params)2616 MOS_STATUS CodechalEncoderState::ReadBrcPakStatistics(
2617 PMOS_COMMAND_BUFFER cmdBuffer,
2618 EncodeReadBrcPakStatsParams* params)
2619 {
2620 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2621
2622 CODECHAL_ENCODE_FUNCTION_ENTER;
2623
2624 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2625 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
2626 CODECHAL_ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer);
2627 CODECHAL_ENCODE_CHK_NULL_RETURN(params->presStatusBuffer);
2628
2629 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum");
2630 MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer);
2631
2632 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2633 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2634
2635 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2636 miStoreRegMemParams.dwOffset = 0;
2637 miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset;
2638 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2639
2640 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2641 miStoreRegMemParams.dwOffset = sizeof(uint32_t);
2642 miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountSliceRegOffset;
2643 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2644
2645 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2646 storeDataParams.pOsResource = params->presBrcPakStatisticBuffer;
2647 storeDataParams.dwResourceOffset = sizeof(uint32_t) * 2;
2648 storeDataParams.dwValue = params->ucPass + 1;
2649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2650
2651 storeDataParams.pOsResource = params->presStatusBuffer;
2652 storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset;
2653 storeDataParams.dwValue = params->ucPass + 1;
2654 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2655
2656 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2657 miStoreRegMemParams.dwOffset = sizeof(uint32_t) * (4 + params->ucPass);
2658 miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset;
2659 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2660
2661 return eStatus;
2662 }
2663
2664 //------------------------------------------------------------------------------
2665 //| Purpose: Retrieves the MFC image eStatus information
2666 //| Return: N/A
2667 //------------------------------------------------------------------------------
ReadImageStatus(PMOS_COMMAND_BUFFER cmdBuffer)2668 MOS_STATUS CodechalEncoderState::ReadImageStatus(
2669 PMOS_COMMAND_BUFFER cmdBuffer)
2670 {
2671 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2672
2673 CODECHAL_ENCODE_FUNCTION_ENTER;
2674
2675 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2676
2677 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum");
2678 MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer);
2679
2680 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
2681
2682 uint32_t baseOffset =
2683 (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2684 sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
2685
2686 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2687 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2688
2689 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2690 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwImageStatusMaskOffset;
2691 miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusMaskRegOffset;
2692 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2693
2694 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2695 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwImageStatusCtrlOffset;
2696 miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset;
2697 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2698
2699 // VDEnc dynamic slice overflow semaphore, DW0 is SW programmed mask(MFX_IMAGE_MASK does not support), DW1 is MFX_IMAGE_STATUS_CONTROL
2700 if (m_vdencBrcEnabled)
2701 {
2702 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2703
2704 // Added for VDEnc slice overflow bit in MFC_IMAGE_STATUS_CONTROL
2705 // The bit is connected on the non-AVC encoder side of MMIO register.
2706 // Need a dummy MFX_PIPE_MODE_SELECT to decoder and read this register.
2707 if (m_waReadVDEncOverflowStatus)
2708 {
2709 pipeModeSelectParams = {};
2710 pipeModeSelectParams.Mode = CODECHAL_DECODE_MODE_AVCVLD;
2711 m_hwInterface->GetMfxInterface()->SetDecodeInUse(true);
2712 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMfxInterface()->AddMfxPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2713 }
2714
2715 // Store MFC_IMAGE_STATUS_CONTROL MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame.
2716 for (int i = 0; i < 2; i++)
2717 {
2718 if (m_resVdencBrcUpdateDmemBufferPtr[i])
2719 {
2720 miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i];
2721 miStoreRegMemParams.dwOffset = 7 * sizeof(uint32_t); // offset of SliceSizeViolation in HUC_BRC_UPDATE_DMEM
2722 miStoreRegMemParams.dwRegister = mmioRegisters->mfcImageStatusCtrlRegOffset;
2723 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2724 }
2725 }
2726
2727 // Restore MFX_PIPE_MODE_SELECT to encode mode
2728 if (m_waReadVDEncOverflowStatus)
2729 {
2730 pipeModeSelectParams = {};
2731 pipeModeSelectParams.Mode = m_mode;
2732 pipeModeSelectParams.bVdencEnabled = true;
2733 m_hwInterface->GetMfxInterface()->SetDecodeInUse(false);
2734 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMfxInterface()->AddMfxPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2735 }
2736 }
2737
2738 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2739 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2740 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2741
2742 return eStatus;
2743 }
2744
2745 //------------------------------------------------------------------------------
2746 //| Purpose: Retrieves the MFC registers and stores them in the eStatus report
2747 //| Return: N/A
2748 //------------------------------------------------------------------------------
ReadMfcStatus(PMOS_COMMAND_BUFFER cmdBuffer)2749 MOS_STATUS CodechalEncoderState::ReadMfcStatus(
2750 PMOS_COMMAND_BUFFER cmdBuffer)
2751 {
2752 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2753
2754 CODECHAL_ENCODE_FUNCTION_ENTER;
2755
2756 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2757
2758 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum");
2759 MmioRegistersMfx* mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer);
2760
2761 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
2762
2763 uint32_t baseOffset =
2764 (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2765 sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
2766
2767 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2768 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2769 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2770
2771 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2772 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2773
2774 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2775 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
2776 miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset;
2777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2778
2779 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2780 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwBSSEBitCountOffset;
2781 miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamSeBitcountFrameRegOffset;
2782 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2783
2784 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2785 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwQpStatusCountOffset;
2786 miStoreRegMemParams.dwRegister = mmioRegisters->mfcQPStatusCountOffset;
2787 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2788
2789 if (mmioRegisters->mfcAvcNumSlicesRegOffset > 0)
2790 {
2791 //read MFC_AVC_NUM_SLICES register to status report
2792 miStoreRegMemParams.presStoreBuffer = &encodeStatusBuf->resStatusBuffer;
2793 miStoreRegMemParams.dwOffset = baseOffset + encodeStatusBuf->dwNumSlicesOffset;
2794 miStoreRegMemParams.dwRegister = mmioRegisters->mfcAvcNumSlicesRegOffset;
2795 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2796 }
2797
2798 if (m_vdencBrcEnabled)
2799 {
2800 // Store PAK FrameSize MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame.
2801 for (int i = 0; i < 2; i++)
2802 {
2803 if (m_resVdencBrcUpdateDmemBufferPtr[i])
2804 {
2805 miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i];
2806 miStoreRegMemParams.dwOffset = 5 * sizeof(uint32_t);
2807 miStoreRegMemParams.dwRegister = mmioRegisters->mfcBitstreamBytecountFrameRegOffset;
2808 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2809
2810 if (m_vdencBrcNumOfSliceOffset)
2811 {
2812 miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[i];
2813 miStoreRegMemParams.dwOffset = m_vdencBrcNumOfSliceOffset;
2814 miStoreRegMemParams.dwRegister = mmioRegisters->mfcAvcNumSlicesRegOffset;
2815 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2816 }
2817 }
2818 }
2819 }
2820
2821 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(cmdBuffer));
2822
2823 return eStatus;
2824 }
2825
2826 //------------------------------------------------------------------------------
2827 //| Purpose: Retrieves the MFC registers and stores them in the eStatus report
2828 //| Return: N/A
2829 //------------------------------------------------------------------------------
SetStatusReportParams(PCODEC_REF_LIST currRefList)2830 MOS_STATUS CodechalEncoderState::SetStatusReportParams(
2831 PCODEC_REF_LIST currRefList)
2832 {
2833 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2834
2835 CODECHAL_ENCODE_FUNCTION_ENTER;
2836
2837 EncodeStatusBuffer* encodeStatusBuf = nullptr;
2838 if ((m_codecFunction == CODECHAL_FUNCTION_ENC) ||
2839 (m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC) ||
2840 (m_codecFunction == CODECHAL_FUNCTION_FEI_ENC) ||
2841 (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK))
2842 {
2843 encodeStatusBuf = &m_encodeStatusBufRcs;
2844 }
2845 else
2846 {
2847 encodeStatusBuf = &m_encodeStatusBuf;
2848 }
2849
2850 EncodeStatus* encodeStatus =
2851 (EncodeStatus*)(encodeStatusBuf->pEncodeStatus +
2852 encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize);
2853
2854 EncodeStatusReport* encodeStatusReport = &encodeStatus->encodeStatusReport;
2855
2856 encodeStatus->dwStoredData = m_storeData;
2857 encodeStatusReport->StatusReportNumber = m_statusReportFeedbackNumber;
2858 encodeStatusReport->CurrOriginalPic = m_currOriginalPic;
2859 encodeStatus->wPictureCodingType = m_pictureCodingType;
2860 switch (m_codecFunction)
2861 {
2862 case CODECHAL_FUNCTION_ENC:
2863 encodeStatusReport->Func = CODECHAL_ENCODE_ENC_ID;
2864 break;
2865 case CODECHAL_FUNCTION_PAK:
2866 encodeStatusReport->Func = CODECHAL_ENCODE_PAK_ID;
2867 break;
2868 case CODECHAL_FUNCTION_ENC_PAK:
2869 case CODECHAL_FUNCTION_ENC_VDENC_PAK:
2870 encodeStatusReport->Func = CODECHAL_ENCODE_ENC_PAK_ID;
2871 break;
2872 case CODECHAL_FUNCTION_FEI_PRE_ENC:
2873 encodeStatusReport->Func = CODECHAL_ENCODE_FEI_PRE_ENC_ID;
2874 break;
2875 case CODECHAL_FUNCTION_FEI_ENC:
2876 encodeStatusReport->Func = CODECHAL_ENCODE_FEI_ENC_ID;
2877 break;
2878 case CODECHAL_FUNCTION_FEI_PAK:
2879 encodeStatusReport->Func = CODECHAL_ENCODE_FEI_PAK_ID;
2880 break;
2881 case CODECHAL_FUNCTION_FEI_ENC_PAK:
2882 encodeStatusReport->Func = CODECHAL_ENCODE_FEI_ENC_PAK_ID;
2883 break;
2884 case CODECHAL_FUNCTION_HYBRIDPAK:
2885 encodeStatusReport->Func = CODECHAL_ENCODE_ENC_ID; /* Only the render engine(EU) is used, MFX is not used */
2886 break;
2887 default:
2888 break;
2889 }
2890 encodeStatusReport->pCurrRefList = m_currRefList;
2891 encodeStatusReport->NumberTilesInFrame = m_numberTilesInFrame;
2892 encodeStatusReport->UsedVdBoxNumber = m_numUsedVdbox;
2893
2894 return eStatus;
2895 }
2896
2897 //------------------------------------------------------------------------------
2898 //| Purpose: Set each of status report buffer to completed status (only render context)
2899 //| Return: N/A
2900 //------------------------------------------------------------------------------
InitStatusReport()2901 MOS_STATUS CodechalEncoderState::InitStatusReport()
2902 {
2903 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2904
2905 CODECHAL_ENCODE_FUNCTION_ENTER;
2906
2907 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
2908 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf);
2909
2910 EncodeStatus* encodeStatus = (EncodeStatus*)(encodeStatusBuf->pEncodeStatus + encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize);
2911 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
2912
2913 for (auto i = 0; i < CODECHAL_NUM_MEDIA_STATES; i += 1)
2914 {
2915 encodeStatus->qwStoredDataEnc[i].dwStoredData = CODECHAL_STATUS_QUERY_END_FLAG;
2916 }
2917
2918 if (m_encEnabled)
2919 {
2920 EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs;
2921 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBufRcs);
2922
2923 encodeStatus = (EncodeStatus*)(encodeStatusBufRcs->pEncodeStatus + encodeStatusBufRcs->wCurrIndex * encodeStatusBufRcs->dwReportSize);
2924 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
2925
2926 for (auto i = 0; i < CODECHAL_NUM_MEDIA_STATES; i += 1)
2927 {
2928 encodeStatus->qwStoredDataEnc[i].dwStoredData = CODECHAL_STATUS_QUERY_END_FLAG;
2929 }
2930 }
2931
2932 return eStatus;
2933 }
2934
2935 //------------------------------------------------------------------------------
2936 //| Purpose: Indicates to the driver that the batch buffer has started processing
2937 //| Return: N/A
2938 //------------------------------------------------------------------------------
StartStatusReport(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_MEDIA_STATE_TYPE encFunctionType)2939 MOS_STATUS CodechalEncoderState::StartStatusReport(
2940 PMOS_COMMAND_BUFFER cmdBuffer,
2941 CODECHAL_MEDIA_STATE_TYPE encFunctionType)
2942 {
2943 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2944
2945 CODECHAL_ENCODE_FUNCTION_ENTER;
2946
2947 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2948
2949 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
2950 EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs;
2951
2952 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext)
2953 {
2954 uint32_t offset =
2955 (encodeStatusBufRcs->wCurrIndex * m_encodeStatusBufRcs.dwReportSize) +
2956 encodeStatusBufRcs->dwStoreDataOffset + 8 + // VME stored data offset is 2nd
2957 sizeof(uint32_t) * 2 * encFunctionType + // Each VME stored data is 1 QW
2958 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2959
2960 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
2961 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
2962 pipeControlParams.presDest = &encodeStatusBufRcs->resStatusBuffer;
2963 pipeControlParams.dwPostSyncOp = MHW_FLUSH_WRITE_IMMEDIATE_DATA;
2964 pipeControlParams.dwResourceOffset = offset;
2965 pipeControlParams.dwDataDW1 = CODECHAL_STATUS_QUERY_START_FLAG;
2966 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(
2967 cmdBuffer,
2968 nullptr,
2969 &pipeControlParams));
2970 }
2971
2972 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext)
2973 {
2974 uint32_t offset =
2975 (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2976 encodeStatusBuf->dwStoreDataOffset + // MFX stored data offset is 1st, so no additional offset is needed
2977 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2978
2979 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2980 storeDataParams.pOsResource = &encodeStatusBuf->resStatusBuffer;
2981 storeDataParams.dwResourceOffset = offset;
2982 storeDataParams.dwValue = CODECHAL_STATUS_QUERY_START_FLAG;
2983
2984 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2985 cmdBuffer,
2986 &storeDataParams));
2987
2988 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_skipFrameBasedHWCounterRead == false )
2989 {
2990 uint32_t writeOffset = sizeof(HwCounter) * CODECHAL_ENCODE_STATUS_NUM;
2991
2992 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
2993
2994 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW(
2995 m_osInterface,
2996 cmdBuffer,
2997 &m_resHwCount,
2998 encodeStatusBuf->wCurrIndex));
2999 }
3000 }
3001
3002 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void*)this, m_osInterface, m_miInterface, cmdBuffer));
3003
3004 return eStatus;
3005 }
3006
3007 //------------------------------------------------------------------------------
3008 //| Purpose: Indicates to the driver that the batch buffer has completed processing
3009 //| Return: N/A
3010 //------------------------------------------------------------------------------
EndStatusReport(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_MEDIA_STATE_TYPE encFunctionType)3011 MOS_STATUS CodechalEncoderState::EndStatusReport(
3012 PMOS_COMMAND_BUFFER cmdBuffer,
3013 CODECHAL_MEDIA_STATE_TYPE encFunctionType)
3014 {
3015 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3016
3017 CODECHAL_ENCODE_FUNCTION_ENTER;
3018
3019 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3020
3021 // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag if applicable
3022 if (m_frameTrackingEnabled && m_osInterface->bTagResourceSync)
3023 {
3024 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
3025 bool writeResourceSyncTag = false;
3026
3027 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext)
3028 {
3029 syncParams.GpuContext = m_renderContext;
3030
3031 // Enc only and VDEnc SHME requires render engine GPU tag
3032 if (CodecHalUsesOnlyRenderEngine(m_codecFunction) ||
3033 (m_vdencEnabled && m_16xMeSupported))
3034 {
3035 writeResourceSyncTag = m_lastEncPhase && m_lastTaskInPhase;
3036 }
3037 }
3038 else if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext)
3039 {
3040 syncParams.GpuContext = m_videoContext;
3041 writeResourceSyncTag = m_lastTaskInPhase;
3042 }
3043
3044 if (writeResourceSyncTag)
3045 {
3046 if (!m_firstField || CodecHal_PictureIsFrame(m_currOriginalPic))
3047 {
3048 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->WriteSyncTagToResource(cmdBuffer, &syncParams));
3049 }
3050 }
3051 }
3052
3053 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3054 uint32_t offset = 0;
3055 if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext)
3056 {
3057 // Flush the write cache for ENC output
3058 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
3059 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
3060 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
3061 pipeControlParams.bGenericMediaStateClear = true;
3062 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(cmdBuffer, nullptr, &pipeControlParams));
3063
3064 if (MEDIA_IS_WA(m_waTable, WaSendDummyVFEafterPipelineSelect))
3065 {
3066 MHW_VFE_PARAMS vfeStateParams = {};
3067 vfeStateParams.dwNumberofURBEntries = 1;
3068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeStateParams));
3069 }
3070
3071 offset =
3072 (m_encodeStatusBufRcs.wCurrIndex * m_encodeStatusBufRcs.dwReportSize) +
3073 m_encodeStatusBufRcs.dwStoreDataOffset + 8 + // VME stored data offset is 2nd
3074 sizeof(uint32_t) * 2 * encFunctionType + // Each VME stored data is 1 QW
3075 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
3076 storeDataParams.pOsResource = &m_encodeStatusBufRcs.resStatusBuffer;
3077 }
3078 else if (m_osInterface->pfnGetGpuContext(m_osInterface) == m_videoContext)
3079 {
3080 offset =
3081 m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize +
3082 m_encodeStatusBuf.dwStoreDataOffset + // MFX stored data offset is 1st, so no additional offset is needed
3083 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
3084 storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
3085 }
3086
3087 storeDataParams.dwResourceOffset = offset;
3088 storeDataParams.dwValue = CODECHAL_STATUS_QUERY_END_FLAG;
3089 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3090 cmdBuffer,
3091 &storeDataParams));
3092
3093 if (encFunctionType == CODECHAL_NUM_MEDIA_STATES && m_inlineEncodeStatusUpdate)
3094 {
3095 if (m_currPass < m_numPasses)
3096 {
3097 if(m_vdencBrcEnabled)
3098 {
3099 //delay to check at the beginning of next pass util huc status updated;
3100 }
3101 else
3102 {
3103 // inc m_storeData conditionaly
3104 UpdateEncodeStatus(cmdBuffer, false);
3105 }
3106 }
3107 else
3108 {
3109 // inc m_storeData forcely
3110 UpdateEncodeStatus(cmdBuffer, true);
3111 }
3112 }
3113
3114 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void*)this, m_osInterface, m_miInterface, cmdBuffer));
3115
3116 return eStatus;
3117 }
3118
3119 //!
3120 //! \brief Update m_storeData in offset 0 of statusReport.
3121 //! \details Add conditonal encode status report to avoid of extra small batch buffer
3122 //! to avoid of extra context switch interrupt. if ImageStatusRegister show
3123 //! encoding completion, update the m_storeData, otherwise continue.
3124 //! n0 n1 n2 n3
3125 //! +-------+--------+--------+--------+--------+--------+--------+--------+
3126 //! | 0 0 | 0 | val/0 1/0 | 0 1 |
3127 //! +-------+--------+--------+--------+--------+--------+--------+--------+
3128 //! low high low high low high low high
3129 //!
3130 //! if(m_forceOperation==true)
3131 //! step-1: m_storeData = m_storeData + 1 // ADD directly
3132 //! else
3133 //! step-1: n2_lo = ImageStatusCtrl & dwImageStatusMask // AND
3134 //! step-2: n2_lo = (n2_lo == 0) ? 0 : n2_lo // uint32_t CMP
3135 //! step-3: n2_lo:n2_hi = (n2_lo:n2_hi == 0:1) ? 0:0 : n2_lo:n2_hi // uint64_t CMP
3136 //! step-4: n2_hi = n2_hi ^ n3_hi // XOR
3137 //! step-5: m_storeData = m_storeData + n2_hi // ADD conditionaly
3138 //!
3139 //! \param [in] cmdBuffer
3140 //! Command buffer
3141 //! \param [in] forceOperation
3142 //! whether add m_storeData directly
3143 //! \return MOS_STATUS
3144 //! MOS_STATUS_SUCCESS if success, else fail reason
3145 //!
UpdateEncodeStatus(PMOS_COMMAND_BUFFER cmdBuffer,bool forceOperation)3146 MOS_STATUS CodechalEncoderState::UpdateEncodeStatus(
3147 PMOS_COMMAND_BUFFER cmdBuffer,
3148 bool forceOperation)
3149 {
3150 MmioRegistersMfx *mmioRegisters;
3151 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3152
3153 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3154
3155 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),"ERROR - vdbox index exceed the maximum");
3156 mmioRegisters = m_hwInterface->SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer);
3157
3158 // Get the right offset of EncodeStatusUpdate Operand scratch buffer
3159 uint32_t baseOffset = m_atomicScratchBuf.dwOperandSetSize * m_atomicScratchBuf.wEncodeUpdateIndex;
3160 uint32_t zeroValueOffset = baseOffset;
3161 uint32_t operand1Offset = baseOffset + m_atomicScratchBuf.dwOperand1Offset;
3162 uint32_t operand2Offset = baseOffset + m_atomicScratchBuf.dwOperand2Offset;
3163 uint32_t operand3Offset = baseOffset + m_atomicScratchBuf.dwOperand3Offset;
3164
3165 // Forcely ADD m_storeData, always happened in last pass.
3166 if(forceOperation)
3167 {
3168 // Make Flush DW call to make sure all previous work is done
3169 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3170 MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams));
3171 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3172 cmdBuffer,
3173 &flushDwParams));
3174
3175 // n2_hi = 0x1
3176 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3177 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3178 storeDataParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3179 storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t) ;
3180 storeDataParams.dwValue = 0x1;
3181 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3182 cmdBuffer,
3183 &storeDataParams));
3184
3185 // VCS_GPR0_Lo = n2_hi = 0x1
3186 MHW_MI_STORE_REGISTER_MEM_PARAMS registerMemParams;
3187 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3188 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3189 registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ;
3190 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
3191 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3192 cmdBuffer,
3193 ®isterMemParams));
3194
3195 // Make Flush DW call to make sure all previous work is done
3196 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3197 cmdBuffer,
3198 &flushDwParams));
3199
3200 // m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + 1
3201 MHW_MI_ATOMIC_PARAMS atomicParams;
3202 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3203 atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer;
3204 atomicParams.dwResourceOffset = 0;
3205 atomicParams.dwDataSize = sizeof(uint32_t);
3206 atomicParams.Operation = MHW_MI_ATOMIC_ADD;
3207 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3208 cmdBuffer,
3209 &atomicParams));
3210
3211 // Make Flush DW call to make sure all previous work is done
3212 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3213 cmdBuffer,
3214 &flushDwParams));
3215 return MOS_STATUS_SUCCESS;
3216 }
3217 else
3218 {
3219 // Make Flush DW call to make sure all previous work is done
3220 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3221 MOS_ZeroMemory(&flushDwParams , sizeof(flushDwParams));
3222 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3223 cmdBuffer,
3224 &flushDwParams));
3225
3226 // n1_lo = 0x00
3227 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3228 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3229 storeDataParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3230 storeDataParams.dwResourceOffset = operand1Offset;
3231 storeDataParams.dwValue = 0x00;
3232 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3233 cmdBuffer,
3234 &storeDataParams));
3235
3236 // n2_lo = dwImageStatusMask
3237 MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
3238 MOS_ZeroMemory(©MemMemParams , sizeof(copyMemMemParams));
3239 if(!m_vdencBrcEnabled)
3240 {
3241 copyMemMemParams.presSrc = &m_encodeStatusBuf.resStatusBuffer;
3242 copyMemMemParams.dwSrcOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
3243 m_encodeStatusBuf.dwImageStatusMaskOffset +
3244 (sizeof(uint32_t) * 2);
3245 }
3246 else
3247 {
3248 copyMemMemParams.presSrc = &m_resPakMmioBuffer;
3249 copyMemMemParams.dwSrcOffset = (sizeof(uint32_t) * 1);
3250 }
3251 copyMemMemParams.presDst = &m_atomicScratchBuf.resAtomicScratchBuffer;
3252 copyMemMemParams.dwDstOffset = operand2Offset;
3253 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
3254 cmdBuffer,
3255 ©MemMemParams));
3256
3257 // VCS_GPR0_Lo = ImageStatusCtrl
3258 MHW_MI_STORE_REGISTER_MEM_PARAMS registerMemParams;
3259 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3260 if(!m_vdencBrcEnabled)
3261 {
3262 registerMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
3263 registerMemParams.dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
3264 m_encodeStatusBuf.dwImageStatusMaskOffset +
3265 (sizeof(uint32_t) * 2) + sizeof(uint32_t);
3266 }
3267 else
3268 {
3269 registerMemParams.presStoreBuffer = &m_resPakMmioBuffer;
3270 registerMemParams.dwOffset = (sizeof(uint32_t) * 0);
3271 }
3272 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
3273 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3274 cmdBuffer,
3275 ®isterMemParams));
3276
3277 // Reset GPR4_Lo
3278 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3279 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3280 registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0.
3281 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4
3282 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3283 cmdBuffer,
3284 ®isterMemParams));
3285
3286 // Make Flush DW call to make sure all previous work is done
3287 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3288 cmdBuffer,
3289 &flushDwParams));
3290
3291 // step-1: n2_lo = n2_lo & VCS_GPR0_Lo = dwImageStatusMask & ImageStatusCtrl
3292 MHW_MI_ATOMIC_PARAMS atomicParams;
3293 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3294 atomicParams.pOsResource = &m_atomicScratchBuf.resAtomicScratchBuffer;
3295 atomicParams.dwResourceOffset = operand2Offset;
3296 atomicParams.dwDataSize = sizeof(uint32_t);
3297 atomicParams.Operation = MHW_MI_ATOMIC_AND;
3298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3299 cmdBuffer,
3300 &atomicParams));
3301
3302 // n3_lo = 0
3303 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3304 storeDataParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3305 storeDataParams.dwResourceOffset = operand3Offset;
3306 storeDataParams.dwValue = 0;
3307 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3308 cmdBuffer,
3309 &storeDataParams));
3310
3311 // Make Flush DW call to make sure all previous work is done
3312 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3313 cmdBuffer,
3314 &flushDwParams));
3315
3316 // GPR0_lo = n1_lo = 0
3317 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3318 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3319 registerMemParams.dwOffset = operand1Offset;
3320 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0
3321 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3322 cmdBuffer,
3323 ®isterMemParams));
3324
3325 // Reset GPR4_Lo
3326 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3327 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3328 registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0.
3329 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4
3330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3331 cmdBuffer,
3332 ®isterMemParams));
3333
3334 // Make Flush DW call to make sure all previous work is done
3335 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3336 cmdBuffer,
3337 &flushDwParams));
3338
3339 // step-2: n2_lo == n1_lo ? 0 : n2_lo
3340 // compare n1 vs n2. i.e. GRP0 vs. memory of operand2
3341 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3342 atomicParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3343 atomicParams.dwResourceOffset = operand2Offset;
3344 atomicParams.dwDataSize = sizeof(uint32_t);
3345 atomicParams.Operation = MHW_MI_ATOMIC_CMP;
3346 atomicParams.bReturnData = true;
3347
3348 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3349 cmdBuffer,
3350 &atomicParams));
3351
3352 // n2_hi = 1
3353 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3354 storeDataParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer);
3355 storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
3356 storeDataParams.dwValue = 1;
3357 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3358 cmdBuffer,
3359 &storeDataParams));
3360
3361 // n3_hi = 1
3362 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3363 storeDataParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer);
3364 storeDataParams.dwResourceOffset = operand3Offset + sizeof(uint32_t);
3365 storeDataParams.dwValue = 1;
3366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3367 cmdBuffer,
3368 &storeDataParams));
3369
3370 // VCS_GPR0_Lo = n3_lo = 0
3371 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3372 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3373 registerMemParams.dwOffset = operand3Offset;
3374 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
3375 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3376 cmdBuffer,
3377 ®isterMemParams));
3378
3379 // GPR0_Hi = n2_hi = 1
3380 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3381 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3382 registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ; // update 1
3383 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; // VCS_GPR0_Hi
3384 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3385 cmdBuffer,
3386 ®isterMemParams));
3387
3388 // Reset GPR4_Lo and GPR4_Hi
3389 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3390 registerMemParams.presStoreBuffer =&(m_atomicScratchBuf.resAtomicScratchBuffer);
3391 registerMemParams.dwOffset = zeroValueOffset ;
3392 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4_Hi
3393 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3394 cmdBuffer,
3395 ®isterMemParams));
3396 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3397 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3398 registerMemParams.dwOffset = zeroValueOffset ;
3399 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; // VCS_GPR4_Hi
3400 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3401 cmdBuffer,
3402 ®isterMemParams));
3403
3404 // Make Flush DW call to make sure all previous work is done
3405 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3406 cmdBuffer,
3407 &flushDwParams));
3408
3409 // steop-3: n2 = (n2 == 0:1) ? 0:0 : n2 // uint64_t CMP
3410 // If n2==0 (Lo) and 1 (Hi), covert n2 to 0 (Lo)and 0 (Hi), else no change.
3411 // n2 == 0:1 means encoding completsion. the n2 memory will be updated with 0:0, otherwise, no change.
3412 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3413 atomicParams.pOsResource = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3414 atomicParams.dwResourceOffset = operand2Offset;
3415 atomicParams.dwDataSize = sizeof(uint64_t);
3416 atomicParams.Operation = MHW_MI_ATOMIC_CMP;
3417 atomicParams.bReturnData = true;
3418 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3419 cmdBuffer,
3420 &atomicParams));
3421
3422 // Make Flush DW call to make sure all previous work is done
3423 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3424 cmdBuffer,
3425 &flushDwParams));
3426
3427 // VCS_GPR0_Lo = n3_hi = 1
3428 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3429 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3430 registerMemParams.dwOffset = operand3Offset + sizeof(uint32_t);
3431 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
3432 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3433 cmdBuffer,
3434 ®isterMemParams));
3435
3436 // Make Flush DW call to make sure all previous work is done
3437 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3438 cmdBuffer,
3439 &flushDwParams));
3440
3441 // step-4: n2_hi = n2_hi ^ VCS_GPR0_Lo = n2_hi ^ n3_hi
3442 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3443 atomicParams.pOsResource =&(m_atomicScratchBuf.resAtomicScratchBuffer);
3444 atomicParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
3445 atomicParams.dwDataSize = sizeof(uint32_t);
3446 atomicParams.Operation = MHW_MI_ATOMIC_XOR;
3447 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3448 cmdBuffer,
3449 &atomicParams));
3450
3451 // VCS_GPR0_Lo = n2_hi
3452 MOS_ZeroMemory(®isterMemParams, sizeof(registerMemParams));
3453 registerMemParams.presStoreBuffer = &(m_atomicScratchBuf.resAtomicScratchBuffer);
3454 registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t) ;
3455 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
3456 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(
3457 cmdBuffer,
3458 ®isterMemParams));
3459
3460 // step-5: m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + n2_hi
3461 // if not completed n2_hi should be 0, then m_storeData = m_storeData + 0
3462 // if completed, n2_hi should be 1, then m_storeData = m_storeData + 1
3463 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
3464 atomicParams.pOsResource =&m_encodeStatusBuf.resStatusBuffer;
3465 atomicParams.dwResourceOffset = 0;
3466 atomicParams.dwDataSize = sizeof(uint32_t);
3467 atomicParams.Operation = MHW_MI_ATOMIC_ADD;
3468 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
3469 cmdBuffer,
3470 &atomicParams));
3471
3472 // Make Flush DW call to make sure all previous work is done
3473 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
3474 cmdBuffer,
3475 &flushDwParams));
3476 }
3477
3478 return eStatus;
3479 }
3480
3481 //------------------------------------------------------------------------------
3482 //| Purpose: Sets up the eStatus reporting values for the next frame
3483 //| Return: N/A
3484 //------------------------------------------------------------------------------
ResetStatusReport()3485 MOS_STATUS CodechalEncoderState::ResetStatusReport()
3486 {
3487 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3488
3489 CODECHAL_ENCODE_FUNCTION_ENTER;
3490
3491 CODECHAL_ENCODE_CHK_NULL_RETURN(m_encodeStatusBuf.pEncodeStatus);
3492
3493 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
3494 EncodeStatusBuffer* encodeStatusBufRcs = &m_encodeStatusBufRcs;
3495
3496 EncodeStatus* encodeStatus =
3497 (EncodeStatus*)(encodeStatusBuf->pEncodeStatus +
3498 encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize);
3499
3500 if (!m_frameTrackingEnabled && !m_inlineEncodeStatusUpdate)
3501 {
3502 bool renderEngineInUse = m_osInterface->pfnGetGpuContext(m_osInterface) == m_renderContext;
3503 bool nullRendering = false;
3504
3505 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
3506 if (renderEngineInUse)
3507 {
3508 syncParams.GpuContext = m_renderContext;
3509 nullRendering = m_renderContextUsesNullHw;
3510 }
3511 else
3512 {
3513 syncParams.GpuContext = m_videoContext;
3514 nullRendering = m_videoContextUsesNullHw;
3515 }
3516
3517 m_osInterface->pfnResetOsStates(m_osInterface);
3518 MOS_COMMAND_BUFFER cmdBuffer;
3519 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
3520
3521 cmdBuffer.Attributes.bTurboMode = m_hwInterface->m_turboMode;
3522 cmdBuffer.Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
3523
3524 CODECHAL_ENCODE_CHK_STATUS_RETURN(UpdateCmdBufAttribute(&cmdBuffer, renderEngineInUse));
3525
3526 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
3527 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
3528 genericPrologParams.pOsInterface = m_osInterface;
3529 genericPrologParams.pvMiInterface = m_miInterface;
3530 genericPrologParams.bMmcEnabled = CodecHalMmcState::IsMmcEnabled();
3531 genericPrologParams.presStoreData = (renderEngineInUse) ?
3532 &encodeStatusBufRcs->resStatusBuffer : &encodeStatusBuf->resStatusBuffer;
3533 genericPrologParams.dwStoreDataValue = m_storeData;
3534 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams));
3535
3536 // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag
3537 if (m_osInterface->bTagResourceSync)
3538 {
3539 if (!m_firstField || CodecHal_PictureIsFrame(m_currOriginalPic))
3540 {
3541 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->WriteSyncTagToResource(&cmdBuffer, &syncParams));
3542 }
3543 }
3544
3545 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
3546 &cmdBuffer,
3547 nullptr));
3548 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
3549 &cmdBuffer,
3550 CODECHAL_NUM_MEDIA_STATES,
3551 "_RESET_STATUS")));
3552
3553 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
3554
3555 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface->pOsContext);
3556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, nullRendering));
3557 }
3558
3559 if (m_videoContextUsesNullHw ||
3560 m_renderContextUsesNullHw)
3561 {
3562 if (CodecHalUsesOnlyRenderEngine(m_codecFunction))
3563 {
3564 *(encodeStatusBufRcs->pData) = m_storeData;
3565 }
3566 else
3567 {
3568 *(encodeStatusBuf->pData) = m_storeData;
3569 }
3570 }
3571
3572 encodeStatus->dwHeaderBytesInserted = m_headerBytesInserted;
3573 m_headerBytesInserted = 0;
3574
3575 if (!m_disableStatusReport)
3576 {
3577 m_storeData++;
3578 encodeStatusBuf->wCurrIndex = (encodeStatusBuf->wCurrIndex + 1) % CODECHAL_ENCODE_STATUS_NUM;
3579 encodeStatusBufRcs->wCurrIndex = (encodeStatusBufRcs->wCurrIndex + 1) % CODECHAL_ENCODE_STATUS_NUM;
3580 }
3581
3582 // clean up the Status for next frame
3583 encodeStatus =
3584 (EncodeStatus*)(encodeStatusBuf->pEncodeStatus +
3585 encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize);
3586 MOS_ZeroMemory((uint8_t*)encodeStatus, sizeof(EncodeStatus));
3587
3588 if (m_encEnabled)
3589 {
3590 EncodeStatus* pEncodeStatusRcs =
3591 (EncodeStatus*)(encodeStatusBufRcs->pEncodeStatus +
3592 encodeStatusBufRcs->wCurrIndex * encodeStatusBufRcs->dwReportSize);
3593 MOS_ZeroMemory((uint8_t*)pEncodeStatusRcs, sizeof(EncodeStatus));
3594 }
3595
3596 return eStatus;
3597 }
3598
ReadCounterValue(uint16_t index,EncodeStatusReport * encodeStatusReport)3599 MOS_STATUS CodechalEncoderState::ReadCounterValue(uint16_t index, EncodeStatusReport* encodeStatusReport)
3600 {
3601 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3602 CODECHAL_ENCODE_FUNCTION_ENTER;
3603 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
3604 uint64_t *address2Counter = nullptr;
3605
3606 if (m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface))
3607 {
3608 if(MEDIA_IS_WA(m_waTable, WaReadCtrNounceRegister))
3609 {
3610 //Report counter from register
3611 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3612 m_osInterface->osCpInterface->ReadCtrNounceRegister(
3613 true,
3614 (uint32_t *)&m_regHwCount[index]));
3615 address2Counter = (uint64_t *)&m_regHwCount[index];
3616 CODECHAL_ENCODE_NORMALMESSAGE("MMIO returns end ctr is %llx", *address2Counter);
3617 CODECHAL_ENCODE_NORMALMESSAGE("bitstream size = %d.", encodeStatusReport->bitstreamSize);
3618
3619 // Here gets the end counter of current bit stream, which should minus counter increment.
3620 *address2Counter = *address2Counter - (((encodeStatusReport->bitstreamSize + 63) >> 6) << 2);
3621 }
3622 else
3623 {
3624 //Report HW counter by command output resource
3625 address2Counter = (uint64_t *)(((char *)(m_dataHwCount)) + (index * sizeof(HwCounter)));
3626 }
3627 }
3628 else
3629 {
3630 //Report driver generated counter which was submitted to HW by command
3631 uint32_t ctr[4] = { 0 };
3632 eStatus = m_hwInterface->GetCpInterface()->GetCounterValue(ctr);
3633 if (MOS_STATUS_SUCCESS == eStatus)
3634 {
3635 address2Counter = (uint64_t *)ctr;
3636 }
3637 else
3638 {
3639 return eStatus;
3640 }
3641 }
3642 encodeStatusReport->HWCounterValue.Count = *address2Counter;
3643 //Report back in Big endian
3644 encodeStatusReport->HWCounterValue.Count = SwapEndianness(encodeStatusReport->HWCounterValue.Count);
3645 //IV value computation
3646 encodeStatusReport->HWCounterValue.IV = *(++address2Counter);
3647 encodeStatusReport->HWCounterValue.IV = SwapEndianness(encodeStatusReport->HWCounterValue.IV);
3648 CODECHAL_ENCODE_NORMALMESSAGE(
3649 "encodeStatusReport->HWCounterValue.Count = 0x%llx, encodeStatusReport->HWCounterValue.IV = 0x%llx",
3650 encodeStatusReport->HWCounterValue.Count,
3651 encodeStatusReport->HWCounterValue.IV);
3652 return eStatus;
3653 }
3654
3655 //------------------------------------------------------------------------------
3656 //| Purpose: Gets available eStatus report data
3657 //| Return: N/A
3658 //------------------------------------------------------------------------------
GetStatusReport(void * status,uint16_t numStatus)3659 MOS_STATUS CodechalEncoderState::GetStatusReport(
3660 void *status,
3661 uint16_t numStatus)
3662 {
3663 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3664
3665 CODECHAL_ENCODE_FUNCTION_ENTER;
3666
3667 CODECHAL_ENCODE_CHK_NULL_RETURN(status);
3668 EncodeStatusReport *codecStatus = (EncodeStatusReport *)status;
3669
3670 EncodeStatusBuffer* encodeStatusBuf = nullptr;
3671 if (m_pakEnabled)
3672 {
3673 encodeStatusBuf = &m_encodeStatusBuf;
3674 }
3675 else
3676 {
3677 encodeStatusBuf = &m_encodeStatusBufRcs;
3678 }
3679
3680 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf->pEncodeStatus);
3681
3682 uint16_t numReportsAvailable =
3683 (encodeStatusBuf->wCurrIndex - encodeStatusBuf->wFirstIndex) &
3684 (CODECHAL_ENCODE_STATUS_NUM - 1); // max is (CODECHAL_ENCODE_STATUS_NUM - 1)
3685
3686 uint32_t globalHWStoredData = 0;
3687 if (m_pakEnabled)
3688 {
3689 globalHWStoredData = *(m_encodeStatusBuf.pData); // HW stored Data
3690 }
3691 else
3692 {
3693 globalHWStoredData = *(m_encodeStatusBufRcs.pData); // HW stored Data
3694 }
3695 uint32_t globalCount = m_storeData - globalHWStoredData;
3696
3697 uint16_t reportsGenerated = 0;
3698 if (m_videoContextUsesNullHw ||
3699 m_renderContextUsesNullHw)
3700 {
3701 for (auto i = 0; i < numReportsAvailable; i++)
3702 {
3703 codecStatus[i].CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
3704 // Set fake bitstream size to avoid DDI report error
3705 codecStatus[i].bitstreamSize = 1024;
3706 reportsGenerated++;
3707 }
3708
3709 encodeStatusBuf->wFirstIndex =
3710 (encodeStatusBuf->wFirstIndex + reportsGenerated) % CODECHAL_ENCODE_STATUS_NUM;
3711
3712 return eStatus;
3713 }
3714
3715 CODECHAL_ENCODE_VERBOSEMESSAGE(" numStatus = %d, dwNumReportsAvailable = %d.", numStatus, numReportsAvailable);
3716 CODECHAL_ENCODE_VERBOSEMESSAGE(" hwstoreData = %d, globalCount = %d", globalHWStoredData, globalCount);
3717
3718 if (numReportsAvailable < numStatus && numStatus < CODECHAL_ENCODE_STATUS_NUM)
3719 {
3720 for (auto i = numReportsAvailable; i < numStatus; i++)
3721 {
3722 codecStatus[i].CodecStatus = CODECHAL_STATUS_UNAVAILABLE;
3723 }
3724 numStatus = numReportsAvailable;
3725 }
3726
3727 if (numReportsAvailable == 0)
3728 {
3729 CODECHAL_ENCODE_ASSERTMESSAGE("No reports available, wCurrIndex = %d, wFirstIndex = %d", encodeStatusBuf->wCurrIndex, encodeStatusBuf->wFirstIndex);
3730 return MOS_STATUS_SUCCESS;
3731 }
3732
3733 uint16_t index = 0;
3734
3735 for (auto i = 0; i < numStatus; i++)
3736 {
3737 if(codecStatus->bSequential)
3738 {
3739 index = (encodeStatusBuf->wFirstIndex + i) & (CODECHAL_ENCODE_STATUS_NUM - 1);
3740 }
3741 else
3742 {
3743 index = (encodeStatusBuf->wFirstIndex + numStatus - i - 1) & (CODECHAL_ENCODE_STATUS_NUM - 1);
3744 }
3745
3746 EncodeStatus* encodeStatus =
3747 (EncodeStatus*)(encodeStatusBuf->pEncodeStatus +
3748 index * encodeStatusBuf->dwReportSize);
3749 EncodeStatusReport* encodeStatusReport = &encodeStatus->encodeStatusReport;
3750 PCODEC_REF_LIST refList = encodeStatusReport->pCurrRefList;
3751 PMHW_VDBOX_IMAGE_STATUS_CONTROL imgStatusCtrl = &encodeStatus->ImageStatusCtrl;
3752 PMHW_VDBOX_PAK_NUM_OF_SLICES numSlices = &encodeStatus->NumSlices;
3753 uint32_t localCount = encodeStatus->dwStoredData - globalHWStoredData;
3754
3755 if (localCount == 0 || localCount > globalCount)
3756 {
3757 CODECHAL_DEBUG_TOOL(
3758 m_statusReportDebugInterface->m_bufferDumpFrameNum = encodeStatus->dwStoredData;
3759 )
3760
3761 // Current command is executed
3762 if (m_osInterface->pfnIsGPUHung(m_osInterface))
3763 {
3764 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
3765 *(encodeStatusBuf->pData) += 1;
3766 }
3767 else if (encodeStatusReport->Func != CODECHAL_ENCODE_ENC_ID &&
3768 encodeStatusReport->Func != CODECHAL_ENCODE_FEI_ENC_ID &&
3769 encodeStatus->dwStoredDataMfx != CODECHAL_STATUS_QUERY_END_FLAG)
3770 {
3771 if(encodeStatusReport->Func == CODECHAL_ENCODE_FEI_PRE_ENC_ID)
3772 {
3773 CODECHAL_DEBUG_TOOL(
3774 m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaledBottomFieldOffset;
3775 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
3776 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
3777 CodechalDbgAttr::attrReconstructedSurface,
3778 "4xScaledSurf"));
3779
3780 /*CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncode1Dbuffer(
3781 m_debugInterface,
3782 pEncoder));*/
3783
3784 // dump EncodeFeiPreproc
3785 FeiPreEncParams PreEncParams;
3786 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpBuffer(
3787 CodecHal_PictureIsBottomField(m_currOriginalPic) ? &PreEncParams.resStatsBotFieldBuffer
3788 : &PreEncParams.resStatsBotFieldBuffer,
3789 CodechalDbgAttr::attrOutput,
3790 "MbStats",
3791 m_picWidthInMb * m_frameFieldHeightInMb * 64,
3792 CodecHal_PictureIsBottomField(m_currOriginalPic) ? m_mbvProcStatsBottomFieldOffset : 0,
3793 CODECHAL_MEDIA_STATE_PREPROC));)
3794 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
3795 }
3796 else
3797 {
3798 CODECHAL_ENCODE_NORMALMESSAGE("Media reset may have occured.");
3799 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
3800 }
3801 }
3802 else if (m_hwWalker && encodeStatusReport->Func == CODECHAL_ENCODE_ENC_ID)
3803 {
3804 // iterate over all media states and check that all of them completed
3805 for (auto j = 0; j < CODECHAL_NUM_MEDIA_STATES; j += 1)
3806 {
3807 if (encodeStatus->qwStoredDataEnc[j].dwStoredData != CODECHAL_STATUS_QUERY_END_FLAG)
3808 {
3809 // some media state failed to complete
3810 CODECHAL_ENCODE_ASSERTMESSAGE("Error: Unable to finish encoding");
3811 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
3812 break;
3813 }
3814 }
3815
3816 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
3817
3818 if (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK && m_mode == CODECHAL_ENCODE_MODE_VP9 &&
3819 encodeStatusReport->CodecStatus != CODECHAL_STATUS_ERROR)
3820 {
3821 unsigned int size = ((m_frameWidth + 63) >> 6) * ((m_frameHeight + 63) >> 6) + 1;
3822 encodeStatusReport->bitstreamSize = CODECHAL_VP9_MB_CODE_SIZE * sizeof(uint32_t) * size;
3823 }
3824 }
3825 // The huffman tables sent by application were incorrect (used only for JPEG encoding)
3826 else if(m_standard == CODECHAL_JPEG && imgStatusCtrl->MissingHuffmanCode == 1)
3827 {
3828 CODECHAL_ENCODE_ASSERTMESSAGE("Error: JPEG standard encoding: missing huffman code");
3829 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
3830 }
3831 else
3832 {
3833 if (m_codecGetStatusReportDefined)
3834 {
3835 // Call corresponding CODEC's status report function if existing
3836 eStatus = GetStatusReport(encodeStatus, encodeStatusReport);
3837 if (MOS_STATUS_SUCCESS != eStatus)
3838 {
3839 return eStatus;
3840 }
3841
3842 if (m_osInterface->osCpInterface->IsCpEnabled() && m_skipFrameBasedHWCounterRead == false)
3843 {
3844 eStatus = ReadCounterValue(index, encodeStatusReport);
3845 if (MOS_STATUS_SUCCESS != eStatus)
3846 {
3847 return eStatus;
3848 }
3849 }
3850 }
3851 else
3852 {
3853 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
3854 encodeStatusReport->bitstreamSize =
3855 encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
3856
3857 // dwHeaderBytesInserted is for WAAVCSWHeaderInsertion
3858 // and is 0 otherwise
3859 encodeStatusReport->QpY = encodeStatus->BrcQPReport.DW0.QPPrimeY;
3860 encodeStatusReport->SuggestedQpYDelta =
3861 encodeStatus->ImageStatusCtrl.CumulativeSliceDeltaQP;
3862 encodeStatusReport->NumberPasses = (uint8_t)(encodeStatus->ImageStatusCtrl.TotalNumPass + 1);
3863 encodeStatusReport->SceneChangeDetected =
3864 (encodeStatus->dwSceneChangedFlag & CODECHAL_ENCODE_SCENE_CHANGE_DETECTED_MASK) ? 1 : 0;
3865
3866 CODECHAL_ENCODE_CHK_NULL_RETURN(m_skuTable);
3867
3868 if (m_osInterface->osCpInterface->IsCpEnabled() && m_skipFrameBasedHWCounterRead == false)
3869 {
3870 eStatus = ReadCounterValue(index, encodeStatusReport);
3871 if (MOS_STATUS_SUCCESS != eStatus)
3872 {
3873 return eStatus;
3874 }
3875 }
3876
3877 if (m_picWidthInMb != 0 && m_frameFieldHeightInMb != 0)
3878 {
3879 encodeStatusReport->AverageQp = (unsigned char)(((uint32_t)encodeStatus->QpStatusCount.cumulativeQP)
3880 / (m_picWidthInMb * m_frameFieldHeightInMb));
3881 }
3882 encodeStatusReport->PanicMode = encodeStatus->ImageStatusCtrl.Panic;
3883
3884 // If Num slices is greater than spec limit set NumSlicesNonCompliant to 1 and report error
3885 if (numSlices->NumberOfSlices > m_maxNumSlicesAllowed)
3886 {
3887 encodeStatusReport->NumSlicesNonCompliant = 1;
3888 }
3889 encodeStatusReport->NumberSlices = numSlices->NumberOfSlices;
3890 }
3891
3892 if (encodeStatusReport->bitstreamSize > m_bitstreamUpperBound)
3893 {
3894 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
3895 encodeStatusReport->bitstreamSize = 0;
3896 CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!");
3897 return MOS_STATUS_NOT_ENOUGH_BUFFER;
3898 }
3899
3900 if(refList && refList->bMADEnabled)
3901 {
3902 // set lock flag to READ_ONLY
3903 MOS_LOCK_PARAMS lockFlags;
3904 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3905 lockFlags.ReadOnly = 1;
3906
3907 uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource(
3908 m_osInterface,
3909 &m_resMadDataBuffer[refList->ucMADBufferIdx],
3910 &lockFlags);
3911
3912 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3913
3914 eStatus = MOS_SecureMemcpy(
3915 &encodeStatusReport->MAD,
3916 CODECHAL_MAD_BUFFER_SIZE,
3917 data,
3918 CODECHAL_MAD_BUFFER_SIZE);
3919 if(eStatus != MOS_STATUS_SUCCESS)
3920 {
3921 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
3922 return eStatus;
3923 }
3924
3925 m_osInterface->pfnUnlockResource(
3926 m_osInterface,
3927 &m_resMadDataBuffer[refList->ucMADBufferIdx]);
3928
3929 // The driver needs to divide the output distortion by 4 before sending to the app
3930 encodeStatusReport->MAD /= 4;
3931 }
3932 else
3933 {
3934 encodeStatusReport->MAD = 0;
3935 }
3936
3937 CODECHAL_DEBUG_TOOL(
3938 CODEC_REF_LIST currRefList = *refList;
3939 currRefList.RefPic = encodeStatusReport->CurrOriginalPic;
3940
3941 m_statusReportDebugInterface->m_currPic = encodeStatusReport->CurrOriginalPic;
3942 m_statusReportDebugInterface->m_bufferDumpFrameNum = encodeStatus->dwStoredData;
3943 m_statusReportDebugInterface->m_frameType = encodeStatus->wPictureCodingType;
3944
3945 if (!m_vdencEnabled) {
3946 if (currRefList.bMADEnabled)
3947 {
3948 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3949 m_statusReportDebugInterface->DumpBuffer(
3950 &m_resMadDataBuffer[currRefList.ucMADBufferIdx],
3951 CodechalDbgAttr::attrInput,
3952 "MADWrite",
3953 CODECHAL_MAD_BUFFER_SIZE,
3954 0,
3955 CODECHAL_MEDIA_STATE_ENC_NORMAL));
3956 }
3957
3958 DumpMbEncPakOutput(refList, m_statusReportDebugInterface);
3959 }
3960
3961 if (CodecHalUsesVideoEngine(m_codecFunction)) {
3962 /* Only where the MFX engine is used the bitstream surface will be available */
3963 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpBuffer(
3964 &currRefList.resBitstreamBuffer,
3965 CodechalDbgAttr::attrBitstream,
3966 "_PAK",
3967 encodeStatusReport->bitstreamSize,
3968 0,
3969 CODECHAL_NUM_MEDIA_STATES));
3970
3971 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpData(
3972 encodeStatusReport,
3973 sizeof(EncodeStatusReport),
3974 CodechalDbgAttr::attrStatusReport,
3975 "EncodeStatusReport_Buffer"));
3976
3977 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpFrameStatsBuffer(m_statusReportDebugInterface));
3978
3979 if (m_vdencEnabled)
3980 {
3981 /*CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeVdencOutputs(
3982 m_debugInterface, pEncoder));
3983
3984 if (m_cmdGenHucUsed)
3985 {
3986 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeHucCmdGen(
3987 m_debugInterface, pEncoder));
3988 }*/
3989 }
3990 }
3991
3992 if (currRefList.b32xScalingUsed) {
3993 m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
3994 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
3995 m_trackedBuf->Get32xDsSurface(currRefList.ucScalingIdx),
3996 CodechalDbgAttr::attrReconstructedSurface,
3997 "32xScaledSurf"))
3998 }
3999
4000 if (currRefList.b2xScalingUsed) // Currently only used for Gen10 Hevc Encode
4001 {
4002 m_statusReportDebugInterface->m_scaledBottomFieldOffset = 0; // No bottom field offset for Hevc
4003 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
4004 m_trackedBuf->Get2xDsSurface(currRefList.ucScalingIdx),
4005 CodechalDbgAttr::attrReconstructedSurface,
4006 "2xScaledSurf"))
4007 }
4008
4009 if (currRefList.b16xScalingUsed) {
4010 m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
4011 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
4012 m_trackedBuf->Get16xDsSurface(currRefList.ucScalingIdx),
4013 CodechalDbgAttr::attrReconstructedSurface,
4014 "16xScaledSurf"))
4015 }
4016
4017 if (currRefList.b4xScalingUsed) {
4018 m_statusReportDebugInterface->m_scaledBottomFieldOffset = m_scaledBottomFieldOffset;
4019 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
4020 m_trackedBuf->Get4xDsSurface(currRefList.ucScalingIdx),
4021 CodechalDbgAttr::attrReconstructedSurface,
4022 "4xScaledSurf"))
4023 }
4024
4025 if (!(m_codecFunction == CODECHAL_FUNCTION_ENC || m_codecFunction == CODECHAL_FUNCTION_FEI_ENC)) {
4026 if (m_codecFunction == CODECHAL_FUNCTION_HYBRIDPAK)
4027 {
4028 m_statusReportDebugInterface->m_hybridPakP1 = false;
4029 }
4030
4031 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_statusReportDebugInterface->DumpYUVSurface(
4032 &currRefList.sRefReconBuffer,
4033 CodechalDbgAttr::attrReconstructedSurface,
4034 "ReconSurf"))
4035 })
4036 }
4037 CODECHAL_ENCODE_VERBOSEMESSAGE("Incrementing reports generated to %d.", (reportsGenerated + 1));
4038 reportsGenerated++;
4039 }
4040 else
4041 {
4042 //update GPU status, and skip the hang frame
4043 if(m_osInterface->pfnIsGPUHung(m_osInterface))
4044 {
4045 *(encodeStatusBuf->pData) += 1;
4046 reportsGenerated++;
4047 }
4048
4049 CODECHAL_ENCODE_VERBOSEMESSAGE("Status buffer %d is INCOMPLETE.", i);
4050 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
4051 }
4052 codecStatus[i] = *encodeStatusReport;
4053 }
4054
4055 encodeStatusBuf->wFirstIndex =
4056 (encodeStatusBuf->wFirstIndex + reportsGenerated) % CODECHAL_ENCODE_STATUS_NUM;
4057 CODECHAL_ENCODE_VERBOSEMESSAGE("wFirstIndex now becomes %d.", encodeStatusBuf->wFirstIndex);
4058
4059 return eStatus;
4060 }
4061
4062 //------------------------------------------------------------------------------
4063 //| Purpose: Reports user feature keys used for encoding
4064 //| Return: N/A
4065 //------------------------------------------------------------------------------
UserFeatureKeyReport()4066 MOS_STATUS CodechalEncoderState::UserFeatureKeyReport()
4067 {
4068 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4069
4070 CODECHAL_ENCODE_FUNCTION_ENTER;
4071
4072 MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData;
4073
4074 // Encode HW Walker Reporting
4075 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4076 userFeatureWriteData.Value.i32Data = m_hwWalker;
4077 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_ID;
4078 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4079
4080 if (m_hwWalker)
4081 {
4082 // Encode HW Walker m_mode Reporting
4083 #if (_DEBUG || _RELEASE_INTERNAL)
4084 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4085 userFeatureWriteData.Value.i32Data = m_walkerMode;
4086 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_HW_WALKER_MODE_ID;
4087 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4088 #endif // _DEBUG || _RELEASE_INTERNAL
4089 }
4090
4091 if (MEDIA_IS_SKU(m_skuTable, FtrSliceShutdown))
4092 {
4093 // SliceShutdownEnable Reporting
4094 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4095 userFeatureWriteData.Value.i32Data = m_sliceShutdownEnable;
4096 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_SLICE_SHUTDOWN_ENABLE_ID;
4097 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4098 }
4099
4100 #if (_DEBUG || _RELEASE_INTERNAL)
4101 // report encode CSC method
4102 if (m_cscDsState)
4103 {
4104 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4105 userFeatureWriteData.Value.i32Data = m_cscDsState->CscMethod();
4106 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_CSC_METHOD_ID;
4107 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4108
4109 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4110 userFeatureWriteData.Value.u32Data = (uint32_t)m_rawSurface.TileType;
4111 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_RAW_TILE_ID;
4112 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4113
4114 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4115 userFeatureWriteData.Value.u32Data = (uint32_t)m_rawSurface.Format;
4116 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_RAW_FORMAT_ID;
4117 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4118 }
4119
4120 // Encode compute context Reporting
4121 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
4122 userFeatureWriteData.Value.i32Data = m_computeContextEnabled;
4123 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_ENCODE_ENABLE_COMPUTE_CONTEXT_ID;
4124 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1);
4125 #endif
4126
4127 return eStatus;
4128 }
4129
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,int32_t nullRendering)4130 MOS_STATUS CodechalEncoderState::SubmitCommandBuffer(
4131 PMOS_COMMAND_BUFFER cmdBuffer,
4132 int32_t nullRendering)
4133 {
4134 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4135
4136 CODECHAL_ENCODE_FUNCTION_ENTER;
4137
4138 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4139
4140 HalOcaInterface::On1stLevelBBEnd(*cmdBuffer, *m_osInterface->pOsContext);
4141 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, nullRendering));
4142 return eStatus;
4143 }
4144
MotionEstimationDisableCheck()4145 void CodechalEncoderState::MotionEstimationDisableCheck()
4146 {
4147 CODECHAL_ENCODE_FUNCTION_ENTER;
4148
4149 if (m_downscaledWidth4x < m_minScaledDimension || m_downscaledWidthInMb4x < m_minScaledDimensionInMb ||
4150 m_downscaledHeight4x < m_minScaledDimension || m_downscaledHeightInMb4x < m_minScaledDimensionInMb)
4151 {
4152 m_32xMeSupported = false;
4153 m_16xMeSupported = false;
4154 if (m_downscaledWidth4x < m_minScaledDimension || m_downscaledWidthInMb4x < m_minScaledDimensionInMb)
4155 {
4156 m_downscaledWidth4x = m_minScaledDimension;
4157 m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
4158 }
4159 if (m_downscaledHeight4x < m_minScaledDimension || m_downscaledHeightInMb4x < m_minScaledDimensionInMb)
4160 {
4161 m_downscaledHeight4x = m_minScaledDimension;
4162 m_downscaledHeightInMb4x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight4x);
4163 }
4164 }
4165 else if (m_downscaledWidth16x < m_minScaledDimension || m_downscaledWidthInMb16x < m_minScaledDimensionInMb ||
4166 m_downscaledHeight16x < m_minScaledDimension || m_downscaledHeightInMb16x < m_minScaledDimensionInMb)
4167 {
4168 m_32xMeSupported = false;
4169 if (m_downscaledWidth16x < m_minScaledDimension || m_downscaledWidthInMb16x < m_minScaledDimensionInMb)
4170 {
4171 m_downscaledWidth16x = m_minScaledDimension;
4172 m_downscaledWidthInMb16x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
4173 }
4174 if (m_downscaledHeight16x < m_minScaledDimension || m_downscaledHeightInMb16x < m_minScaledDimensionInMb)
4175 {
4176 m_downscaledHeight16x = m_minScaledDimension;
4177 m_downscaledHeightInMb16x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight16x);
4178 }
4179 }
4180 else
4181 {
4182 if (m_downscaledWidth32x < m_minScaledDimension || m_downscaledWidthInMb32x < m_minScaledDimensionInMb)
4183 {
4184 m_downscaledWidth32x = m_minScaledDimension;
4185 m_downscaledWidthInMb32x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
4186 }
4187 if (m_downscaledHeight32x < m_minScaledDimension || m_downscaledHeightInMb32x < m_minScaledDimensionInMb)
4188 {
4189 m_downscaledHeight32x = m_minScaledDimension;
4190 m_downscaledHeightInMb32x = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_downscaledHeight32x);
4191 }
4192 }
4193 }
4194
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)4195 MOS_STATUS CodechalEncoderState::SendPrologWithFrameTracking(
4196 PMOS_COMMAND_BUFFER cmdBuffer,
4197 bool frameTrackingRequested,
4198 MHW_MI_MMIOREGISTERS* mmioRegister)
4199 {
4200 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4201
4202 CODECHAL_ENCODE_FUNCTION_ENTER;
4203
4204 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4205
4206 MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
4207
4208 // initialize command buffer attributes
4209 cmdBuffer->Attributes.bTurboMode = m_hwInterface->m_turboMode;
4210 cmdBuffer->Attributes.bMediaPreemptionEnabled = MOS_RCS_ENGINE_USED(gpuContext) ?
4211 m_renderEngineInterface->IsPreemptionEnabled() : 0;
4212 cmdBuffer->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
4213 cmdBuffer->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
4214 cmdBuffer->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
4215 cmdBuffer->Attributes.bValidPowerGatingRequest = true;
4216
4217 if (frameTrackingRequested && m_frameTrackingEnabled)
4218 {
4219 cmdBuffer->Attributes.bEnableMediaFrameTracking = true;
4220 cmdBuffer->Attributes.resMediaFrameTrackingSurface =
4221 m_encodeStatusBuf.resStatusBuffer;
4222 cmdBuffer->Attributes.dwMediaFrameTrackingTag = m_storeData;
4223 // Set media frame tracking address offset(the offset from the encoder status buffer page)
4224 cmdBuffer->Attributes.dwMediaFrameTrackingAddrOffset = 0;
4225 }
4226
4227 #ifdef _MMC_SUPPORTED
4228 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
4229 #endif
4230
4231 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
4232 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
4233 genericPrologParams.pOsInterface = m_osInterface;
4234 genericPrologParams.pvMiInterface = m_miInterface;
4235 genericPrologParams.bMmcEnabled = CodecHalMmcState::IsMmcEnabled();
4236 genericPrologParams.dwStoreDataValue = m_storeData - 1;
4237 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(cmdBuffer, &genericPrologParams, mmioRegister));
4238
4239 return eStatus;
4240 }
4241
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)4242 MOS_STATUS CodechalEncoderState::UpdateCmdBufAttribute(
4243 PMOS_COMMAND_BUFFER cmdBuffer,
4244 bool renderEngineInUse)
4245 {
4246 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4247
4248 return eStatus;
4249 }
4250
ExecuteEnc(EncoderParams * encodeParams)4251 MOS_STATUS CodechalEncoderState::ExecuteEnc(
4252 EncoderParams* encodeParams)
4253 {
4254 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4255
4256 CODECHAL_ENCODE_FUNCTION_ENTER;
4257
4258 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
4259
4260 if (m_mfeEnabled == false || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_ENC
4261 || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_FEI_ENC)
4262 {
4263 // No need to wait if the driver is executing on a simulator
4264 EncodeStatusBuffer* pencodeStatusBuf = CodecHalUsesOnlyRenderEngine(m_codecFunction) ? &m_encodeStatusBufRcs : &m_encodeStatusBuf;
4265 if (!m_osInterface->bSimIsActive &&
4266 m_recycledBufStatusNum[m_currRecycledBufIdx] >
4267 *(pencodeStatusBuf->pData))
4268 {
4269 uint32_t waitMs;
4270
4271 // Wait for Batch Buffer complete event OR timeout
4272 for (waitMs = MHW_TIMEOUT_MS_DEFAULT; waitMs > 0; waitMs -= MHW_EVENT_TIMEOUT_MS)
4273 {
4274 if (m_recycledBufStatusNum[m_currRecycledBufIdx] <= *(pencodeStatusBuf->pData))
4275 {
4276 break;
4277 }
4278
4279 MOS_Sleep(MHW_EVENT_TIMEOUT_MS);
4280 }
4281
4282 CODECHAL_ENCODE_VERBOSEMESSAGE("Waited for %d ms", (MHW_TIMEOUT_MS_DEFAULT - waitMs));
4283
4284 if (m_recycledBufStatusNum[m_currRecycledBufIdx] >
4285 *(pencodeStatusBuf->pData))
4286 {
4287 CODECHAL_ENCODE_ASSERTMESSAGE("No recycled buffers available, wait timed out at %d ms!", MHW_TIMEOUT_MS_DEFAULT);
4288 CODECHAL_ENCODE_ASSERTMESSAGE("m_storeData = %d, m_recycledBufStatusNum[%d] = %d, data = %d", m_storeData, m_currRecycledBufIdx, m_recycledBufStatusNum[m_currRecycledBufIdx], *(pencodeStatusBuf->pData));
4289 return MOS_STATUS_CLIENT_AR_NO_SPACE;
4290 }
4291 }
4292
4293 m_recycledBufStatusNum[m_currRecycledBufIdx] = m_storeData;
4294
4295 // These parameters are updated at the DDI level
4296 if (encodeParams->bMbDisableSkipMapEnabled)
4297 {
4298 CodecHalGetResourceInfo(m_osInterface, encodeParams->psMbDisableSkipMapSurface);
4299 }
4300
4301 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->psRawSurface);
4302 CodecHalGetResourceInfo(m_osInterface, encodeParams->psRawSurface);
4303 if (encodeParams->bMbQpDataEnabled)
4304 {
4305 CodecHalGetResourceInfo(m_osInterface, encodeParams->psMbQpDataSurface);
4306 }
4307
4308 if (m_standard != CODECHAL_JPEG)
4309 {
4310 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->psReconSurface);
4311 CodecHalGetResourceInfo(m_osInterface, encodeParams->psReconSurface);
4312 }
4313
4314 m_encodeParams = *encodeParams;
4315
4316 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->UpdateParams(true));
4317
4318 if (CodecHalUsesVideoEngine(m_codecFunction))
4319 {
4320 // Get resource details of the bitstream resource
4321 MOS_SURFACE details;
4322 MOS_ZeroMemory(&details, sizeof(details));
4323 details.Format = Format_Invalid;
4324 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeParams->presBitstreamBuffer);
4325 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, encodeParams->presBitstreamBuffer, &details));
4326
4327 m_encodeParams.dwBitstreamSize = details.dwHeight * details.dwWidth;
4328 }
4329
4330 m_osInterface->pfnIncPerfFrameID(m_osInterface);
4331
4332 // init function common to all codecs, before encode each frame
4333 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitCommon());
4334
4335 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(InitializePicture(m_encodeParams),
4336 "Encoding initialization failed.");
4337
4338 if (m_newSeq)
4339 {
4340 CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckResChangeAndCsc());
4341 }
4342
4343 if (FRAME_SKIP_NORMAL == m_skipFrameFlag)
4344 {
4345 if (m_standard == CODECHAL_MPEG2)
4346 {
4347 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(EncodeCopySkipFrame(), "Skip-frame failed.\n");
4348 m_skipFrameFlag = FRAME_NO_SKIP;
4349 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ResetStatusReport(), "Flushing encode status buffer for skipped frame failed.\n");
4350 m_firstFrame = false;
4351 return eStatus;
4352 }
4353 }
4354
4355 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
4356 syncParams.bReadOnly = true;
4357
4358 // Synchronize MB QP data surface resource if any.
4359 if (encodeParams->bMbQpDataEnabled)
4360 {
4361 syncParams.presSyncResource = &encodeParams->psMbQpDataSurface->OsResource;
4362 syncParams.GpuContext = m_renderContext;
4363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
4364 }
4365
4366 // Check if source surface needs to be synchronized and should wait for decode or VPP or any other context
4367 syncParams.presSyncResource = &m_rawSurface.OsResource;
4368
4369 if (CodecHalUsesRenderEngine(m_codecFunction, m_standard) &&
4370 m_firstField)
4371 {
4372 syncParams.GpuContext = m_renderContext;
4373 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
4374
4375 if (CodecHalUsesVideoEngine(m_codecFunction))
4376 {
4377 // Perform Sync on PAK context if it is not ENC only case.
4378 // This is done to set the read mask for PAK context for on demand sync
4379 syncParams.GpuContext = m_videoContext;
4380 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
4381 }
4382 // Update the resource tag (s/w tag) for On-Demand Sync
4383 // set the tag on render context for ENC case only, else set it on video context for ENC+PAK case
4384 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
4385 }
4386 else if (CodecHalUsesVideoEngine(m_codecFunction))
4387 {
4388 // Perform resource sync for encode uses only video engine
4389 syncParams.GpuContext = m_videoContext;
4390 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
4391 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
4392 }
4393
4394 CODECHAL_ENCODE_CHK_NULL_RETURN(m_currRefList);
4395
4396 if (CODECHAL_JPEG != m_standard && m_firstField)
4397 {
4398 for (int i = 0; i < m_currRefList->ucNumRef; i++)
4399 {
4400 CODECHAL_ENCODE_CHK_COND_RETURN(
4401 m_currReconstructedPic.FrameIdx == m_currRefList->RefList[i].FrameIdx,
4402 "the same frame (FrameIdx = %d) cannot be used as both Recon surface and ref frame",
4403 m_currReconstructedPic.FrameIdx);
4404 }
4405
4406 // clear flags
4407 m_currRefList->b2xScalingUsed =
4408 m_currRefList->b4xScalingUsed =
4409 m_currRefList->b16xScalingUsed =
4410 m_currRefList->b32xScalingUsed = false;
4411
4412 // allocate tracked buffer for current frame
4413 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateForCurrFrame());
4414 m_currRefList->ucScalingIdx = m_trackedBuf->GetCurrIndex();
4415
4416 if (m_trackedBuf->IsMbCodeAllocationNeeded())
4417 {
4418 // MbCode/MvData buffer can be tracked using the same index as DS surface
4419 m_currRefList->ucMbCodeIdx = m_currMbCodeIdx = m_trackedBuf->GetCurrIndexMbCode();
4420
4421 m_resMbCodeSurface = m_currRefList->resRefMbCodeBuffer = *m_trackedBuf->GetCurrMbCodeBuffer();
4422 if (m_trackedBuf->GetCurrMvDataBuffer())
4423 {
4424 m_resMvDataSurface = m_currRefList->resRefMvDataBuffer = *m_trackedBuf->GetCurrMvDataBuffer();
4425 }
4426 }
4427 else
4428 {
4429 CODECHAL_ENCODE_NORMALMESSAGE("App provides MbCode and MvData buffer!");
4430 if(CODECHAL_AVC == m_standard)
4431 {
4432 m_currRefList->resRefMbCodeBuffer = m_resMbCodeSurface;
4433 m_currRefList->resRefMvDataBuffer = m_resMvDataSurface;
4434 }
4435 }
4436
4437 m_trackedBuf->SetAllocationFlag(false);
4438 }
4439
4440 if (CodecHalUsesRenderEngine(m_codecFunction, m_standard))
4441 {
4442 // set render engine context
4443 m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
4444 m_osInterface->pfnResetOsStates(m_osInterface);
4445
4446 // set all status reports to completed state
4447 InitStatusReport();
4448
4449 // on-demand sync for tracked buffer
4450 syncParams = g_cInitSyncParams;
4451 syncParams.GpuContext = m_renderContext;
4452 syncParams.bReadOnly = false;
4453 if (m_trackedBuf->GetWait() && !Mos_ResourceIsNull(&m_resMbCodeSurface))
4454 {
4455 syncParams.presSyncResource = &m_resMbCodeSurface;
4456 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
4457 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
4458 }
4459
4460 // Call ENC Kernels
4461 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecuteKernelFunctions(),
4462 "ENC failed.");
4463 }
4464 }
4465
4466 if (m_mfeEnabled == false || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_PAK
4467 || encodeParams->ExecCodecFunction == CODECHAL_FUNCTION_FEI_PAK)
4468 {
4469 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_PreProcessEncode(m_osInterface, &m_resBitstreamBuffer, &m_reconSurface));
4470
4471 if (CodecHalUsesVideoEngine(m_codecFunction))
4472 {
4473 // Set to video context
4474 m_osInterface->pfnSetGpuContext(m_osInterface, m_videoContext);
4475 m_osInterface->pfnResetOsStates(m_osInterface);
4476 m_currPass = 0;
4477
4478 for (m_currPass = 0; m_currPass <= m_numPasses; m_currPass++)
4479 {
4480 m_firstTaskInPhase = (m_currPass == 0);
4481 m_lastTaskInPhase = (m_currPass == m_numPasses);
4482
4483 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
4484 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
4485
4486 // Setup picture level PAK commands
4487 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecutePictureLevel(),
4488 "Picture level encoding failed.");
4489
4490 // Setup slice level PAK commands
4491 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ExecuteSliceLevel(),
4492 "Slice level encoding failed.");
4493
4494 m_lastTaskInPhase = false;
4495 }
4496 }
4497
4498 m_prevRawSurface = *m_rawSurfaceToPak;
4499
4500 // User Feature Key Reporting - only happens after first frame
4501 if (m_firstFrame == true)
4502 {
4503 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(UserFeatureKeyReport(),
4504 "Reporting user feature keys failed.");
4505 }
4506
4507 m_currRecycledBufIdx =
4508 (m_currRecycledBufIdx + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
4509
4510 if (m_currRecycledBufIdx == 0)
4511 {
4512 MOS_ZeroMemory(m_recycledBufStatusNum, sizeof(m_recycledBufStatusNum));
4513 }
4514
4515 m_currLaDataIdx = (m_currLaDataIdx + 1) % m_numLaDataEntry;
4516
4517 // Flush encode eStatus buffer
4518 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(ResetStatusReport(),
4519 "Flushing encode eStatus buffer failed.");
4520
4521 if (m_firstFrame == false && m_firstTwoFrames == true)
4522 {
4523 m_firstTwoFrames = false;
4524 }
4525 m_firstFrame = false;
4526
4527 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_resBitstreamBuffer, &m_reconSurface));
4528 }
4529 return eStatus;
4530 }
4531
GetNumBrcPakPasses(uint16_t usBRCPrecision)4532 uint8_t CodechalEncoderState::GetNumBrcPakPasses(uint16_t usBRCPrecision)
4533 {
4534 uint8_t numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES;
4535
4536 switch (usBRCPrecision)
4537 {
4538 case 0:
4539 case 2: numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES;
4540 break;
4541
4542 case 1: numBRCPAKPasses = CODECHAL_ENCODE_BRC_MINIMUM_NUM_PASSES;
4543 break;
4544
4545 case 3: numBRCPAKPasses = CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
4546 break;
4547
4548 default: CODECHAL_ENCODE_ASSERT("Invalid BRC Precision value in Pic Params.");
4549 numBRCPAKPasses = CODECHAL_ENCODE_BRC_DEFAULT_NUM_PASSES;
4550 break;
4551 }
4552
4553 return numBRCPAKPasses;
4554 }
4555
CodechalEncoderGenState(CodechalEncoderState * encoder)4556 CodechalEncoderGenState::CodechalEncoderGenState(CodechalEncoderState* encoder)
4557 {
4558 CODECHAL_ENCODE_ASSERT(encoder);
4559 m_encoder = encoder;
4560 m_hwInterface = encoder->GetHwInterface();
4561 m_osInterface = encoder->GetOsInterface();
4562 m_miInterface = encoder->m_miInterface;
4563 m_renderEngineInterface = encoder->m_renderEngineInterface;
4564 m_stateHeapInterface = encoder->m_stateHeapInterface;
4565 }
4566
CodechalEncoderState(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)4567 CodechalEncoderState::CodechalEncoderState(
4568 CodechalHwInterface* hwInterface,
4569 CodechalDebugInterface* debugInterface,
4570 PCODECHAL_STANDARD_INFO standardInfo):
4571 Codechal(hwInterface, debugInterface)
4572 {
4573 // Add Null checks here for all interfaces.
4574 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_hwInterface);
4575 m_mfxInterface = m_hwInterface->GetMfxInterface();
4576 m_hcpInterface = m_hwInterface->GetHcpInterface();
4577 m_hucInterface = m_hwInterface->GetHucInterface();
4578 m_vdencInterface = m_hwInterface->GetVdencInterface();
4579 m_miInterface = hwInterface->GetMiInterface();
4580 m_renderEngineInterface = hwInterface->GetRenderInterface();
4581 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_renderEngineInterface);
4582 m_stateHeapInterface = m_renderEngineInterface->m_stateHeapInterface;
4583 CODECHAL_ENCODE_ASSERT(m_renderEngineInterface->GetHwCaps());
4584
4585 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
4586 m_osInterface->pfnGetPlatform(m_osInterface, &m_platform);
4587 m_skuTable = m_osInterface->pfnGetSkuTable(m_osInterface);
4588 m_waTable = m_osInterface->pfnGetWaTable(m_osInterface);
4589 m_gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
4590 m_videoGpuNode = MOS_GPU_NODE_MAX;
4591 m_renderContext = MOS_GPU_CONTEXT_INVALID_HANDLE;
4592 m_videoContext = MOS_GPU_CONTEXT_INVALID_HANDLE;
4593
4594 m_vdencEnabled = CodecHalUsesVdencEngine(standardInfo->CodecFunction);
4595 m_codecFunction = standardInfo->CodecFunction;
4596
4597 m_vdencMeKernelState = MHW_KERNEL_STATE();
4598 m_vdencStreaminKernelState = MHW_KERNEL_STATE();
4599 m_vdencMeKernelStateRAB = MHW_KERNEL_STATE();
4600 m_vdencStreaminKernelStateRAB = MHW_KERNEL_STATE();
4601
4602 for (auto i = 0; i < CODEC_NUM_FIELDS_PER_FRAME; i++)
4603 {
4604 m_scaling2xKernelStates[i] = MHW_KERNEL_STATE();
4605 m_scaling4xKernelStates[i] = MHW_KERNEL_STATE();
4606 }
4607 for (auto i = 0; i < CODECHAL_ENCODE_ME_IDX_NUM; i++)
4608 {
4609 m_meKernelStates[i] = MHW_KERNEL_STATE();
4610 }
4611
4612 pfnGetKernelHeaderAndSize = nullptr;
4613
4614 MOS_ZeroMemory(&m_encodeParams, sizeof(m_encodeParams));
4615 MOS_ZeroMemory(&m_resHwCount, sizeof(m_resHwCount));
4616 MOS_ZeroMemory(&m_rawSurface, sizeof(m_rawSurface)); // Pointer to MOS_SURFACE of raw surface
4617 MOS_ZeroMemory(&m_reconSurface, sizeof(m_reconSurface)); // Pointer to MOS_SURFACE of reconstructed surface
4618 MOS_ZeroMemory(&m_resBitstreamBuffer, sizeof(m_resBitstreamBuffer)); // Pointer to MOS_SURFACE of bitstream surface
4619 MOS_ZeroMemory(&m_resMbCodeSurface, sizeof(m_resMbCodeSurface)); // Pointer to MOS_SURFACE of MbCode surface
4620 MOS_ZeroMemory(&m_resMvDataSurface, sizeof(m_resMvDataSurface)); // Pointer to MOS_SURFACE of MvData surface
4621
4622 MOS_ZeroMemory(&m_resSyncObjectRenderContextInUse, sizeof(m_resSyncObjectRenderContextInUse));
4623 MOS_ZeroMemory(&m_resSyncObjectVideoContextInUse, sizeof(m_resSyncObjectVideoContextInUse));
4624 MOS_ZeroMemory(&m_encodeStatusBuf, sizeof(m_encodeStatusBuf)); // Stores all the status_query related data for PAK engine
4625 MOS_ZeroMemory(&m_encodeStatusBufRcs, sizeof(m_encodeStatusBufRcs)); // Stores all the status_query related data for render ring (RCS)
4626 MOS_ZeroMemory(&m_imgStatusControlBuffer, sizeof(m_imgStatusControlBuffer)); // Stores image eStatus control data
4627 MOS_ZeroMemory(&m_atomicScratchBuf, sizeof(m_atomicScratchBuf)); // Stores atomic operands and result
4628 MOS_ZeroMemory(&m_bsBuffer, sizeof(m_bsBuffer));
4629
4630 MOS_ZeroMemory(&m_resVdencCmdInitializerDmemBuffer, sizeof(m_resVdencCmdInitializerDmemBuffer));
4631 MOS_ZeroMemory(&m_resVdencCmdInitializerDataBuffer, sizeof(m_resVdencCmdInitializerDataBuffer));
4632
4633 MOS_ZeroMemory(&m_resDistortionBuffer, sizeof(m_resDistortionBuffer)); // MBEnc Distortion Buffer
4634 for (auto i = 0; i < CODECHAL_ENCODE_MAX_NUM_MAD_BUFFERS; i++)
4635 {
4636 MOS_ZeroMemory(&m_resMadDataBuffer[i], sizeof(m_resMadDataBuffer[i])); // Buffers to store Mean of Absolute Differences
4637 }
4638 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4639 {
4640 MOS_ZeroMemory(&m_sliceMapSurface[i], sizeof(m_sliceMapSurface[i]));
4641 }
4642
4643 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4644 {
4645 MOS_ZeroMemory(&m_resVdencStreamInBuffer[i], sizeof(m_resVdencStreamInBuffer[i]));
4646 }
4647 MOS_ZeroMemory(&m_resPakMmioBuffer, sizeof(m_resPakMmioBuffer));
4648 MOS_ZeroMemory(&m_resHucStatus2Buffer, sizeof(m_resHucStatus2Buffer));
4649 MOS_ZeroMemory(&m_resHucFwBuffer, sizeof(m_resHucFwBuffer));
4650
4651 MOS_ZeroMemory(&m_resDeblockingFilterRowStoreScratchBuffer, sizeof(m_resDeblockingFilterRowStoreScratchBuffer)); // Handle of deblock row store surface
4652 MOS_ZeroMemory(&m_resMPCRowStoreScratchBuffer, sizeof(m_resMPCRowStoreScratchBuffer)); // Handle of mpc row store surface
4653 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4654 {
4655 MOS_ZeroMemory(&m_resStreamOutBuffer[i], sizeof(m_resStreamOutBuffer[i])); // Handle of streamout data surface
4656 }
4657
4658 MOS_ZeroMemory(&m_scaling4xBindingTable, sizeof(m_scaling4xBindingTable));
4659 MOS_ZeroMemory(&m_scaling2xBindingTable, sizeof(m_scaling2xBindingTable));
4660 for (auto i = 0; i < CODECHAL_ENCODE_VME_BBUF_NUM; i++)
4661 {
4662 MOS_ZeroMemory(&m_scalingBBUF[i], sizeof(m_scalingBBUF[i])); // This Batch Buffer is used for scaling kernel.
4663 }
4664 MOS_ZeroMemory(&m_flatnessCheckSurface, sizeof(m_flatnessCheckSurface));
4665 MOS_ZeroMemory(&m_resMbStatisticsSurface, sizeof(m_resMbStatisticsSurface));
4666 MOS_ZeroMemory(&m_resMbStatsBuffer, sizeof(m_resMbStatsBuffer));
4667
4668 MOS_ZeroMemory(&m_meBindingTable, sizeof(m_meBindingTable));
4669
4670 MOS_ZeroMemory(&m_vdencMeKernelBindingTable, sizeof(m_vdencMeKernelBindingTable));
4671
4672 MOS_ZeroMemory(&m_vdencStreaminKernelBindingTable, sizeof(m_vdencStreaminKernelBindingTable));
4673 }
4674
~CodechalEncoderState()4675 CodechalEncoderState::~CodechalEncoderState()
4676 {
4677 if (m_gpuCtxCreatOpt)
4678 {
4679 MOS_Delete(m_gpuCtxCreatOpt);
4680 m_gpuCtxCreatOpt = nullptr;
4681 }
4682
4683 DestroyMDFResources();
4684
4685 if (m_perfProfiler)
4686 {
4687 MediaPerfProfiler::Destroy(m_perfProfiler, (void*)this, m_osInterface);
4688 m_perfProfiler = nullptr;
4689 }
4690 }
4691
SetupWalkerContext(MOS_COMMAND_BUFFER * cmdBuffer,SendKernelCmdsParams * params)4692 MOS_STATUS CodechalEncoderState::SetupWalkerContext(
4693 MOS_COMMAND_BUFFER* cmdBuffer,
4694 SendKernelCmdsParams* params)
4695 {
4696 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4697
4698 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4699 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
4700
4701 MOS_RESOURCE* dsh = params->pKernelState->m_dshRegion.GetResource();
4702 CODECHAL_ENCODE_CHK_NULL_RETURN(dsh);
4703
4704 // Add Media VFE command
4705 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddMediaVfeCmd(cmdBuffer, params));
4706
4707 // Add Media Curbe Load command
4708 if (params->pKernelState->KernelParams.iCurbeLength)
4709 {
4710 MHW_CURBE_LOAD_PARAMS curbeLoadParams;
4711 MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
4712 curbeLoadParams.pKernelState = params->pKernelState;
4713 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaCurbeLoadCmd(cmdBuffer, &curbeLoadParams));
4714
4715 HalOcaInterface::OnIndirectState(
4716 *cmdBuffer,
4717 *m_osInterface->pOsContext,
4718 dsh,
4719 params->pKernelState->m_dshRegion.GetOffset() + params->pKernelState->dwCurbeOffset,
4720 false,
4721 params->pKernelState->KernelParams.iCurbeLength);
4722 }
4723
4724 MHW_ID_LOAD_PARAMS idLoadParams;
4725 MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
4726 idLoadParams.pKernelState = params->pKernelState;
4727 idLoadParams.dwNumKernelsLoaded = 1;
4728 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaIDLoadCmd(cmdBuffer, &idLoadParams));
4729
4730 uint32_t InterfaceDescriptorTotalLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
4731 uint32_t InterfaceDescriptorDataStartOffset = MOS_ALIGN_CEIL(
4732 params->pKernelState->m_dshRegion.GetOffset() + params->pKernelState->dwIdOffset,
4733 m_stateHeapInterface->pStateHeapInterface->GetIdAlignment());
4734
4735 HalOcaInterface::OnIndirectState(
4736 *cmdBuffer,
4737 *m_osInterface->pOsContext,
4738 dsh,
4739 InterfaceDescriptorDataStartOffset,
4740 false,
4741 InterfaceDescriptorTotalLength);
4742
4743 return eStatus;
4744 }
4745
4746 #if USE_CODECHAL_DEBUG_TOOL
DumpMbEncPakOutput(PCODEC_REF_LIST currRefList,CodechalDebugInterface * debugInterface)4747 MOS_STATUS CodechalEncoderState::DumpMbEncPakOutput(PCODEC_REF_LIST currRefList, CodechalDebugInterface* debugInterface)
4748 {
4749 CODECHAL_ENCODE_FUNCTION_ENTER;
4750 CODECHAL_ENCODE_CHK_NULL_RETURN(currRefList);
4751 CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface);
4752
4753 CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
4754 &currRefList->resRefMbCodeBuffer,
4755 CodechalDbgAttr::attrOutput,
4756 "MbCode",
4757 m_picWidthInMb * m_frameFieldHeightInMb * 64,
4758 CodecHal_PictureIsBottomField(currRefList->RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
4759 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
4760 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
4761
4762 if (m_mvDataSize)
4763 {
4764 CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
4765 &currRefList->resRefMvDataBuffer,
4766 CodechalDbgAttr::attrOutput,
4767 "MbData",
4768 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
4769 CodecHal_PictureIsBottomField(currRefList->RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
4770 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
4771 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
4772 }
4773 if (CodecHalIsFeiEncode(m_codecFunction))
4774 {
4775 CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
4776 &m_resDistortionBuffer,
4777 CodechalDbgAttr::attrOutput,
4778 "DistortionSurf",
4779 m_picWidthInMb * m_frameFieldHeightInMb * 48,
4780 CodecHal_PictureIsBottomField(currRefList->RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
4781 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
4782 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
4783 }
4784 return MOS_STATUS_SUCCESS;
4785 }
4786
AddBufferWithIMMValue(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_RESOURCE presStoreBuffer,uint32_t offset,uint32_t value,bool bAdd)4787 MOS_STATUS CodechalEncoderState::AddBufferWithIMMValue(
4788 PMOS_COMMAND_BUFFER cmdBuffer,
4789 PMOS_RESOURCE presStoreBuffer,
4790 uint32_t offset,
4791 uint32_t value,
4792 bool bAdd)
4793 {
4794 MHW_MI_STORE_REGISTER_MEM_PARAMS StoreRegParams;
4795 MHW_MI_STORE_DATA_PARAMS StoreDataParams;
4796 MHW_MI_LOAD_REGISTER_REG_PARAMS LoadRegRegParams;
4797 MHW_MI_LOAD_REGISTER_IMM_PARAMS LoadRegisterImmParams;
4798 MHW_MI_FLUSH_DW_PARAMS FlushDwParams;
4799 MHW_MI_MATH_PARAMS MiMathParams;
4800 MHW_MI_ALU_PARAMS MiAluParams[4];
4801 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4802
4803 CODECHAL_ENCODE_FUNCTION_ENTER;
4804
4805 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
4806 {
4807 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
4808 eStatus = MOS_STATUS_INVALID_PARAMETER;
4809 return eStatus;
4810 }
4811
4812 auto pMmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
4813 auto pMmioRegistersHcp = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
4814
4815 MOS_ZeroMemory(&FlushDwParams, sizeof(FlushDwParams));
4816 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &FlushDwParams));
4817
4818 MOS_ZeroMemory(&LoadRegRegParams, sizeof(LoadRegRegParams));
4819
4820 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
4821 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
4822
4823 miLoadRegMemParams.presStoreBuffer = presStoreBuffer;
4824 miLoadRegMemParams.dwOffset = offset;
4825 miLoadRegMemParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset;
4826 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
4827
4828 MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams));
4829 LoadRegisterImmParams.dwData = 0;
4830 LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0HiOffset;
4831 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(
4832 cmdBuffer,
4833 &LoadRegisterImmParams));
4834
4835 MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams));
4836 LoadRegisterImmParams.dwData = value;
4837 LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4LoOffset;
4838 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(
4839 cmdBuffer,
4840 &LoadRegisterImmParams));
4841 MOS_ZeroMemory(&LoadRegisterImmParams, sizeof(LoadRegisterImmParams));
4842 LoadRegisterImmParams.dwData = 0;
4843 LoadRegisterImmParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister4HiOffset;
4844 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(
4845 cmdBuffer,
4846 &LoadRegisterImmParams));
4847
4848 MOS_ZeroMemory(&MiMathParams, sizeof(MiMathParams));
4849 MOS_ZeroMemory(&MiAluParams, sizeof(MiAluParams));
4850 // load srcA, reg0
4851 MiAluParams[0].AluOpcode = MHW_MI_ALU_LOAD;
4852 MiAluParams[0].Operand1 = MHW_MI_ALU_SRCA;
4853 MiAluParams[0].Operand2 = MHW_MI_ALU_GPREG0;
4854 // load srcB, reg4
4855 MiAluParams[1].AluOpcode = MHW_MI_ALU_LOAD;
4856 MiAluParams[1].Operand1 = MHW_MI_ALU_SRCB;
4857 MiAluParams[1].Operand2 = MHW_MI_ALU_GPREG4;
4858
4859 if (bAdd)
4860 {
4861 // add srcA, srcB
4862 MiAluParams[2].AluOpcode = MHW_MI_ALU_ADD;
4863 }
4864 else
4865 {
4866 // sub srcA, srcB
4867 MiAluParams[2].AluOpcode = MHW_MI_ALU_SUB;
4868 }
4869
4870 // store reg0, ACCU
4871 MiAluParams[3].AluOpcode = MHW_MI_ALU_STORE;
4872 MiAluParams[3].Operand1 = MHW_MI_ALU_GPREG0;
4873 MiAluParams[3].Operand2 = MHW_MI_ALU_ACCU;
4874
4875 MiMathParams.pAluPayload = MiAluParams;
4876 MiMathParams.dwNumAluParams = 4; // four ALU commands needed for this substract opertaion. see following ALU commands.
4877 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiMathCmd(
4878 cmdBuffer,
4879 &MiMathParams));
4880
4881 // update the value
4882 MOS_ZeroMemory(&StoreRegParams, sizeof(StoreRegParams));
4883 StoreRegParams.presStoreBuffer = presStoreBuffer;
4884 StoreRegParams.dwOffset = offset;
4885 StoreRegParams.dwRegister = pMmioRegistersMfx->generalPurposeRegister0LoOffset;
4886 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &StoreRegParams));
4887
4888 return eStatus;
4889 }
4890 #endif
4891