1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_kernel_hme_g10.cpp
24 //! \brief Hme kernel implementation for Gen10 platform
25 //!
26 #include "codechal_kernel_hme_g10.h"
27
28 // clang-format off
29 const uint32_t CodechalKernelHmeG10::Curbe::m_initCurbe[48] =
30 {
31 0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
32 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
33 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
34 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
35 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
36 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
37 };
38 // clang-format on
39
CodechalKernelHmeG10(CodechalEncoderState * encoder,bool me4xDistBufferSupported)40 CodechalKernelHmeG10::CodechalKernelHmeG10(
41 CodechalEncoderState *encoder,
42 bool me4xDistBufferSupported)
43 : CodechalKernelHme(encoder, me4xDistBufferSupported)
44 {
45 }
46
SetCurbe(MHW_KERNEL_STATE * kernelState)47 MOS_STATUS CodechalKernelHmeG10::SetCurbe(MHW_KERNEL_STATE *kernelState)
48 {
49 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
50
51 Curbe curbe;
52 uint32_t mvShiftFactor = 0;
53 uint32_t prevMvReadPosFactor = 0;
54 uint32_t scaleFactor;
55 bool useMvFromPrevStep;
56 bool writeDistortions;
57
58 if (m_32xMeInUse)
59 {
60 useMvFromPrevStep = false;
61 writeDistortions = false;
62 scaleFactor = scalingFactor32X;
63 mvShiftFactor = 1;
64 prevMvReadPosFactor = 0;
65 }
66 else if (m_16xMeInUse)
67 {
68 useMvFromPrevStep = Is32xMeEnabled() ? true : false;
69 writeDistortions = false;
70 scaleFactor = scalingFactor16X;
71 mvShiftFactor = 2;
72 prevMvReadPosFactor = 1;
73 }
74 else if (m_4xMeInUse)
75 {
76 useMvFromPrevStep = Is16xMeEnabled() ? true : false;
77 writeDistortions = true;
78 scaleFactor = scalingFactor4X;
79 mvShiftFactor = 2;
80 prevMvReadPosFactor = 0;
81 }
82 else
83 {
84 return MOS_STATUS_INVALID_PARAMETER;
85 }
86
87 curbe.m_data.DW3.SubPelMode = m_curbeParam.subPelMode;
88
89 if (m_fieldScalingOutputInterleaved)
90 {
91 curbe.m_data.DW3.SrcAccess = curbe.m_data.DW3.RefAccess = CodecHal_PictureIsField(m_curbeParam.currOriginalPic);
92 curbe.m_data.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(m_curbeParam.currOriginalPic);
93 }
94 curbe.m_data.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
95 curbe.m_data.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
96 curbe.m_data.DW5.QpPrimeY = m_curbeParam.qpPrimeY;
97 curbe.m_data.DW6.WriteDistortions = writeDistortions;
98 curbe.m_data.DW6.UseMvFromPrevStep = useMvFromPrevStep;
99 if (m_vdencEnabled &&
100 (m_standard == CODECHAL_HEVC ||
101 m_standard == CODECHAL_VP9))
102 {
103 curbe.m_data.DW6.SuperCombineDist = 5; //SuperCombineDist[m_curbeParam.targetUsage]; harded coded in KCM
104 }
105 else
106 {
107 curbe.m_data.DW6.SuperCombineDist = SuperCombineDist[m_curbeParam.targetUsage];
108 }
109 curbe.m_data.DW6.MaxVmvR = CodecHal_PictureIsFrame(m_curbeParam.currOriginalPic) ? m_curbeParam.maxMvLen * 4 : (m_curbeParam.maxMvLen >> 1) * 4;
110
111 if (m_pictureCodingType == B_TYPE)
112 {
113 curbe.m_data.DW1.BiWeight = 32;
114 curbe.m_data.DW13.NumRefIdxL1MinusOne = m_curbeParam.numRefIdxL1Minus1;
115 }
116
117 if (m_pictureCodingType == B_TYPE || m_pictureCodingType == P_TYPE)
118 {
119 curbe.m_data.DW13.NumRefIdxL0MinusOne = m_curbeParam.numRefIdxL0Minus1;
120 }
121
122 if (Is16xMeEnabled() && m_surfaceParam.vdencStreamInEnabled)
123 {
124 curbe.m_data.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
125 curbe.m_data.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
126 }
127
128 curbe.m_data.DW13.RefStreaminCost = 0;
129 // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
130 curbe.m_data.DW13.ROIEnable = 0;
131
132 if (!CodecHal_PictureIsFrame(m_curbeParam.currOriginalPic))
133 {
134 if (m_pictureCodingType != I_TYPE)
135 {
136 curbe.m_data.DW14.List0RefID0FieldParity = m_curbeParam.list0RefID0FieldParity;
137 curbe.m_data.DW14.List0RefID1FieldParity = m_curbeParam.list0RefID1FieldParity;
138 curbe.m_data.DW14.List0RefID2FieldParity = m_curbeParam.list0RefID2FieldParity;
139 curbe.m_data.DW14.List0RefID3FieldParity = m_curbeParam.list0RefID3FieldParity;
140 curbe.m_data.DW14.List0RefID4FieldParity = m_curbeParam.list0RefID4FieldParity;
141 curbe.m_data.DW14.List0RefID5FieldParity = m_curbeParam.list0RefID5FieldParity;
142 curbe.m_data.DW14.List0RefID6FieldParity = m_curbeParam.list0RefID6FieldParity;
143 curbe.m_data.DW14.List0RefID7FieldParity = m_curbeParam.list0RefID7FieldParity;
144 }
145 if (m_pictureCodingType == B_TYPE)
146 {
147 curbe.m_data.DW14.List1RefID0FieldParity = m_curbeParam.list1RefID0FieldParity;
148 curbe.m_data.DW14.List1RefID1FieldParity = m_curbeParam.list1RefID1FieldParity;
149 }
150 }
151 curbe.m_data.DW15.MvShiftFactor = mvShiftFactor;
152 curbe.m_data.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
153
154 // r3 & r4
155 uint8_t methodIndex;
156 if (m_pictureCodingType == B_TYPE)
157 {
158 CODECHAL_ENCODE_CHK_NULL_RETURN(m_bmeMethodTable);
159 methodIndex = m_curbeParam.bmeMethodTable ?
160 m_curbeParam.bmeMethodTable[m_curbeParam.targetUsage] : m_bmeMethodTable[m_curbeParam.targetUsage];
161 }
162 else
163 {
164 CODECHAL_ENCODE_CHK_NULL_RETURN(m_meMethodTable);
165 methodIndex = m_curbeParam.meMethodTable ?
166 m_curbeParam.meMethodTable[m_curbeParam.targetUsage] : m_meMethodTable[m_curbeParam.targetUsage];
167 }
168
169 uint8_t tableIndex = (m_pictureCodingType == B_TYPE) ? 1 : 0;
170 MOS_SecureMemcpy(&curbe.m_data.SpDelta, 14 * sizeof(uint32_t), codechalEncodeSearchPath[tableIndex][methodIndex], 14 * sizeof(uint32_t));
171
172 if (m_4xMeInUse && m_vdencEnabled &&
173 (m_standard == CODECHAL_HEVC ||
174 m_standard == CODECHAL_VP9))
175 {
176 curbe.m_data.DW6.LCUSize = 1; // only LCU64 is supported by VDEnc HW
177 curbe.m_data.DW6.InputStreamInEn = 0;
178 curbe.m_data.DW31.NumImePredictors = 8;
179 curbe.m_data.DW31.MaxCuSize = 3;
180 curbe.m_data.DW31.MaxTuSize = 3;
181
182 switch (m_curbeParam.targetUsage)
183 {
184 case 1:
185 case 4:
186 curbe.m_data.DW36.NumMergeCandCu64x64 = 4;
187 curbe.m_data.DW36.NumMergeCandCu32x32 = 3;
188 curbe.m_data.DW36.NumMergeCandCu16x16 = 2;
189 curbe.m_data.DW36.NumMergeCandCu8x8 = 1;
190 break;
191 case 7:
192 curbe.m_data.DW36.NumMergeCandCu64x64 = 2;
193 curbe.m_data.DW36.NumMergeCandCu32x32 = 2;
194 curbe.m_data.DW36.NumMergeCandCu16x16 = 2;
195 curbe.m_data.DW36.NumMergeCandCu8x8 = 0;
196 break;
197 default:
198 break;
199 }
200 }
201
202 //r5
203 curbe.m_data.DW40._4xMeMvOutputDataSurfIndex = BindingTableOffset::meOutputMvDataSurface;
204 curbe.m_data.DW41._16xOr32xMeMvInputDataSurfIndex = BindingTableOffset::meInputMvDataSurface;
205 curbe.m_data.DW42._4xMeOutputDistSurfIndex = BindingTableOffset::meDistortionSurface;
206 curbe.m_data.DW43._4xMeOutputBrcDistSurfIndex = BindingTableOffset::meBrcDistortion;
207 curbe.m_data.DW44.VMEFwdInterPredictionSurfIndex = BindingTableOffset::meCurrForFwdRef;
208 curbe.m_data.DW45.VMEBwdInterPredictionSurfIndex = BindingTableOffset::meCurrForBwdRef;
209 curbe.m_data.DW46.VDEncStreamInOutputSurfIndex = BindingTableOffset::meVdencStreamInOutputBuffer;
210 curbe.m_data.DW47.VDEncStreamInInputSurfIndex = BindingTableOffset::meVdencStreamInInputBuffer;
211
212 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(&curbe.m_data, kernelState->dwCurbeOffset, Curbe::m_curbeSize));
213
214 return MOS_STATUS_SUCCESS;
215 }
216
SendSurfaces(PMOS_COMMAND_BUFFER cmd,MHW_KERNEL_STATE * kernelState)217 MOS_STATUS CodechalKernelHmeG10::SendSurfaces(PMOS_COMMAND_BUFFER cmd, MHW_KERNEL_STATE *kernelState)
218 {
219 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalKernelHme::SendSurfaces(cmd, kernelState));
220
221 if (m_vdencEnabled && m_4xMeInUse)
222 {
223 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
224 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
225 surfaceParams.dwSize = m_surfaceParam.vdencStreamInSurfaceSize;
226 surfaceParams.bIs2DSurface = false;
227 surfaceParams.presBuffer = m_surfaceParam.meVdencStreamInBuffer;
228 surfaceParams.dwBindingTableOffset = BindingTableOffset::meVdencStreamInOutputBuffer;
229 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
230 surfaceParams.bIsWritable = true;
231 surfaceParams.bRenderTarget = true;
232 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
233 m_hwInterface,
234 cmd,
235 &surfaceParams,
236 kernelState));
237 }
238 return MOS_STATUS_SUCCESS;
239 }
240
GetActiveKernelState()241 MHW_KERNEL_STATE *CodechalKernelHmeG10::GetActiveKernelState()
242 {
243 EncOperation operation;
244
245 uint32_t kernelOffset = 0;
246 uint32_t kernelIndex;
247
248 if (m_pictureCodingType == P_TYPE)
249 {
250 kernelIndex = KernelIndex::hmeP;
251 operation = ENC_ME;
252 kernelOffset = 0;
253 }
254 else
255 {
256 kernelIndex = KernelIndex::hmeB;
257 operation = ENC_ME;
258 kernelOffset = 1;
259 }
260 if (m_vdencEnabled && m_4xMeInUse)
261 {
262 if (m_standard == CODECHAL_AVC)
263 {
264 kernelIndex = KernelIndex::hmeVDEncStreamIn;
265 operation = VDENC_ME;
266 kernelOffset = 0;
267 }
268 else
269 {
270 kernelIndex = KernelIndex::hmeVDEncStreamIn;
271 operation = VDENC_STREAMIN;
272 kernelOffset = 0;
273 }
274 }
275
276 auto it = m_kernelStatePool.find(kernelIndex);
277 if (it != m_kernelStatePool.end())
278 {
279 return it->second;
280 }
281 MHW_KERNEL_STATE *kernelState = nullptr;
282 CreateKernelState(&kernelState, kernelIndex, operation, kernelOffset);
283
284 return kernelState;
285 }
286
GetMediaStateType()287 CODECHAL_MEDIA_STATE_TYPE CodechalKernelHmeG10::GetMediaStateType()
288 {
289 CODECHAL_MEDIA_STATE_TYPE mediaStateType;
290 mediaStateType = m_32xMeInUse ? CODECHAL_MEDIA_STATE_32X_ME : m_16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
291 if (m_4xMeInUse && m_vdencEnabled && m_standard == CODECHAL_AVC)
292 {
293 mediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
294 }
295
296 return mediaStateType;
297 }
298