1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_kernel_hme_g10.cpp
24 //! \brief    Hme kernel implementation for Gen10 platform
25 //!
26 #include "codechal_kernel_hme_g10.h"
27 
28 // clang-format off
29 const uint32_t CodechalKernelHmeG10::Curbe::m_initCurbe[48] =
30 {
31     0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
32     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
33     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
34     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
35     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
36     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
37 };
38 // clang-format on
39 
CodechalKernelHmeG10(CodechalEncoderState * encoder,bool me4xDistBufferSupported)40 CodechalKernelHmeG10::CodechalKernelHmeG10(
41     CodechalEncoderState *encoder,
42     bool     me4xDistBufferSupported)
43         : CodechalKernelHme(encoder, me4xDistBufferSupported)
44 {
45 }
46 
SetCurbe(MHW_KERNEL_STATE * kernelState)47 MOS_STATUS CodechalKernelHmeG10::SetCurbe(MHW_KERNEL_STATE *kernelState)
48 {
49     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
50 
51     Curbe     curbe;
52     uint32_t  mvShiftFactor       = 0;
53     uint32_t  prevMvReadPosFactor = 0;
54     uint32_t  scaleFactor;
55     bool      useMvFromPrevStep;
56     bool      writeDistortions;
57 
58     if (m_32xMeInUse)
59     {
60         useMvFromPrevStep   = false;
61         writeDistortions    = false;
62         scaleFactor         = scalingFactor32X;
63         mvShiftFactor       = 1;
64         prevMvReadPosFactor = 0;
65     }
66     else if (m_16xMeInUse)
67     {
68         useMvFromPrevStep   = Is32xMeEnabled() ? true : false;
69         writeDistortions    = false;
70         scaleFactor         = scalingFactor16X;
71         mvShiftFactor       = 2;
72         prevMvReadPosFactor = 1;
73     }
74     else if (m_4xMeInUse)
75     {
76         useMvFromPrevStep   = Is16xMeEnabled() ? true : false;
77         writeDistortions    = true;
78         scaleFactor         = scalingFactor4X;
79         mvShiftFactor       = 2;
80         prevMvReadPosFactor = 0;
81     }
82     else
83     {
84         return MOS_STATUS_INVALID_PARAMETER;
85     }
86 
87     curbe.m_data.DW3.SubPelMode = m_curbeParam.subPelMode;
88 
89     if (m_fieldScalingOutputInterleaved)
90     {
91         curbe.m_data.DW3.SrcAccess = curbe.m_data.DW3.RefAccess = CodecHal_PictureIsField(m_curbeParam.currOriginalPic);
92         curbe.m_data.DW7.SrcFieldPolarity                = CodecHal_PictureIsBottomField(m_curbeParam.currOriginalPic);
93     }
94     curbe.m_data.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
95     curbe.m_data.DW4.PictureWidth        = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
96     curbe.m_data.DW5.QpPrimeY            = m_curbeParam.qpPrimeY;
97     curbe.m_data.DW6.WriteDistortions    = writeDistortions;
98     curbe.m_data.DW6.UseMvFromPrevStep   = useMvFromPrevStep;
99     if (m_vdencEnabled &&
100         (m_standard == CODECHAL_HEVC ||
101             m_standard == CODECHAL_VP9))
102     {
103         curbe.m_data.DW6.SuperCombineDist = 5;  //SuperCombineDist[m_curbeParam.targetUsage]; harded coded in KCM
104     }
105     else
106     {
107         curbe.m_data.DW6.SuperCombineDist = SuperCombineDist[m_curbeParam.targetUsage];
108     }
109     curbe.m_data.DW6.MaxVmvR = CodecHal_PictureIsFrame(m_curbeParam.currOriginalPic) ? m_curbeParam.maxMvLen * 4 : (m_curbeParam.maxMvLen >> 1) * 4;
110 
111     if (m_pictureCodingType == B_TYPE)
112     {
113         curbe.m_data.DW1.BiWeight             = 32;
114         curbe.m_data.DW13.NumRefIdxL1MinusOne = m_curbeParam.numRefIdxL1Minus1;
115     }
116 
117     if (m_pictureCodingType == B_TYPE || m_pictureCodingType == P_TYPE)
118     {
119         curbe.m_data.DW13.NumRefIdxL0MinusOne = m_curbeParam.numRefIdxL0Minus1;
120     }
121 
122     if (Is16xMeEnabled() && m_surfaceParam.vdencStreamInEnabled)
123     {
124         curbe.m_data.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
125         curbe.m_data.DW30.ActualMBWidth  = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
126     }
127 
128     curbe.m_data.DW13.RefStreaminCost = 0;
129     // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
130     curbe.m_data.DW13.ROIEnable = 0;
131 
132     if (!CodecHal_PictureIsFrame(m_curbeParam.currOriginalPic))
133     {
134         if (m_pictureCodingType != I_TYPE)
135         {
136             curbe.m_data.DW14.List0RefID0FieldParity = m_curbeParam.list0RefID0FieldParity;
137             curbe.m_data.DW14.List0RefID1FieldParity = m_curbeParam.list0RefID1FieldParity;
138             curbe.m_data.DW14.List0RefID2FieldParity = m_curbeParam.list0RefID2FieldParity;
139             curbe.m_data.DW14.List0RefID3FieldParity = m_curbeParam.list0RefID3FieldParity;
140             curbe.m_data.DW14.List0RefID4FieldParity = m_curbeParam.list0RefID4FieldParity;
141             curbe.m_data.DW14.List0RefID5FieldParity = m_curbeParam.list0RefID5FieldParity;
142             curbe.m_data.DW14.List0RefID6FieldParity = m_curbeParam.list0RefID6FieldParity;
143             curbe.m_data.DW14.List0RefID7FieldParity = m_curbeParam.list0RefID7FieldParity;
144         }
145         if (m_pictureCodingType == B_TYPE)
146         {
147             curbe.m_data.DW14.List1RefID0FieldParity = m_curbeParam.list1RefID0FieldParity;
148             curbe.m_data.DW14.List1RefID1FieldParity = m_curbeParam.list1RefID1FieldParity;
149         }
150     }
151     curbe.m_data.DW15.MvShiftFactor       = mvShiftFactor;
152     curbe.m_data.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
153 
154     // r3 & r4
155     uint8_t methodIndex;
156     if (m_pictureCodingType == B_TYPE)
157     {
158         CODECHAL_ENCODE_CHK_NULL_RETURN(m_bmeMethodTable);
159         methodIndex = m_curbeParam.bmeMethodTable ?
160             m_curbeParam.bmeMethodTable[m_curbeParam.targetUsage] : m_bmeMethodTable[m_curbeParam.targetUsage];
161     }
162     else
163     {
164         CODECHAL_ENCODE_CHK_NULL_RETURN(m_meMethodTable);
165         methodIndex = m_curbeParam.meMethodTable ?
166             m_curbeParam.meMethodTable[m_curbeParam.targetUsage] : m_meMethodTable[m_curbeParam.targetUsage];
167     }
168 
169     uint8_t tableIndex = (m_pictureCodingType == B_TYPE) ? 1 : 0;
170     MOS_SecureMemcpy(&curbe.m_data.SpDelta, 14 * sizeof(uint32_t), codechalEncodeSearchPath[tableIndex][methodIndex], 14 * sizeof(uint32_t));
171 
172     if (m_4xMeInUse && m_vdencEnabled &&
173         (m_standard == CODECHAL_HEVC ||
174             m_standard == CODECHAL_VP9))
175     {
176         curbe.m_data.DW6.LCUSize           = 1;  // only LCU64 is supported by VDEnc HW
177         curbe.m_data.DW6.InputStreamInEn   = 0;
178         curbe.m_data.DW31.NumImePredictors = 8;
179         curbe.m_data.DW31.MaxCuSize        = 3;
180         curbe.m_data.DW31.MaxTuSize        = 3;
181 
182         switch (m_curbeParam.targetUsage)
183         {
184         case 1:
185         case 4:
186             curbe.m_data.DW36.NumMergeCandCu64x64 = 4;
187             curbe.m_data.DW36.NumMergeCandCu32x32 = 3;
188             curbe.m_data.DW36.NumMergeCandCu16x16 = 2;
189             curbe.m_data.DW36.NumMergeCandCu8x8   = 1;
190             break;
191         case 7:
192             curbe.m_data.DW36.NumMergeCandCu64x64 = 2;
193             curbe.m_data.DW36.NumMergeCandCu32x32 = 2;
194             curbe.m_data.DW36.NumMergeCandCu16x16 = 2;
195             curbe.m_data.DW36.NumMergeCandCu8x8   = 0;
196             break;
197         default:
198             break;
199         }
200     }
201 
202     //r5
203     curbe.m_data.DW40._4xMeMvOutputDataSurfIndex      = BindingTableOffset::meOutputMvDataSurface;
204     curbe.m_data.DW41._16xOr32xMeMvInputDataSurfIndex = BindingTableOffset::meInputMvDataSurface;
205     curbe.m_data.DW42._4xMeOutputDistSurfIndex        = BindingTableOffset::meDistortionSurface;
206     curbe.m_data.DW43._4xMeOutputBrcDistSurfIndex     = BindingTableOffset::meBrcDistortion;
207     curbe.m_data.DW44.VMEFwdInterPredictionSurfIndex  = BindingTableOffset::meCurrForFwdRef;
208     curbe.m_data.DW45.VMEBwdInterPredictionSurfIndex  = BindingTableOffset::meCurrForBwdRef;
209     curbe.m_data.DW46.VDEncStreamInOutputSurfIndex    = BindingTableOffset::meVdencStreamInOutputBuffer;
210     curbe.m_data.DW47.VDEncStreamInInputSurfIndex     = BindingTableOffset::meVdencStreamInInputBuffer;
211 
212     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(&curbe.m_data, kernelState->dwCurbeOffset, Curbe::m_curbeSize));
213 
214     return MOS_STATUS_SUCCESS;
215 }
216 
SendSurfaces(PMOS_COMMAND_BUFFER cmd,MHW_KERNEL_STATE * kernelState)217 MOS_STATUS CodechalKernelHmeG10::SendSurfaces(PMOS_COMMAND_BUFFER cmd, MHW_KERNEL_STATE *kernelState)
218 {
219     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalKernelHme::SendSurfaces(cmd, kernelState));
220 
221     if (m_vdencEnabled && m_4xMeInUse)
222     {
223         CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
224         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
225         surfaceParams.dwSize                = m_surfaceParam.vdencStreamInSurfaceSize;
226         surfaceParams.bIs2DSurface          = false;
227         surfaceParams.presBuffer            = m_surfaceParam.meVdencStreamInBuffer;
228         surfaceParams.dwBindingTableOffset  = BindingTableOffset::meVdencStreamInOutputBuffer;
229         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
230         surfaceParams.bIsWritable           = true;
231         surfaceParams.bRenderTarget         = true;
232         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
233             m_hwInterface,
234             cmd,
235             &surfaceParams,
236             kernelState));
237     }
238     return MOS_STATUS_SUCCESS;
239 }
240 
GetActiveKernelState()241 MHW_KERNEL_STATE *CodechalKernelHmeG10::GetActiveKernelState()
242 {
243     EncOperation operation;
244 
245     uint32_t kernelOffset = 0;
246     uint32_t kernelIndex;
247 
248     if (m_pictureCodingType == P_TYPE)
249     {
250         kernelIndex  = KernelIndex::hmeP;
251         operation    = ENC_ME;
252         kernelOffset = 0;
253     }
254     else
255     {
256         kernelIndex  = KernelIndex::hmeB;
257         operation    = ENC_ME;
258         kernelOffset = 1;
259     }
260     if (m_vdencEnabled && m_4xMeInUse)
261     {
262         if (m_standard == CODECHAL_AVC)
263         {
264             kernelIndex  = KernelIndex::hmeVDEncStreamIn;
265             operation    = VDENC_ME;
266             kernelOffset = 0;
267         }
268         else
269         {
270             kernelIndex  = KernelIndex::hmeVDEncStreamIn;
271             operation    = VDENC_STREAMIN;
272             kernelOffset = 0;
273         }
274     }
275 
276     auto it = m_kernelStatePool.find(kernelIndex);
277     if (it != m_kernelStatePool.end())
278     {
279         return it->second;
280     }
281     MHW_KERNEL_STATE *kernelState = nullptr;
282     CreateKernelState(&kernelState, kernelIndex, operation, kernelOffset);
283 
284     return kernelState;
285 }
286 
GetMediaStateType()287 CODECHAL_MEDIA_STATE_TYPE CodechalKernelHmeG10::GetMediaStateType()
288 {
289     CODECHAL_MEDIA_STATE_TYPE mediaStateType;
290     mediaStateType = m_32xMeInUse ? CODECHAL_MEDIA_STATE_32X_ME : m_16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
291     if (m_4xMeInUse && m_vdencEnabled && m_standard == CODECHAL_AVC)
292     {
293         mediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
294     }
295 
296     return mediaStateType;
297 }
298