1 /******************************************************************************\
2 Copyright (c) 2017-2018, Intel Corporation
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
7 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8
9 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
11 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12
13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
14
15 This sample was distributed or derived from the Intel's Media Samples package.
16 The original version of this sample may be obtained from https://software.intel.com/en-us/intel-media-server-studio
17 or https://software.intel.com/en-us/media-client-solutions-support.
18 \**********************************************************************************/
19
20 #include "fei_predictors_repacking.h"
21 #include <algorithm>
22
23 const mfxU8 ZigzagOrder[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 };
24
PredictorsRepaking()25 PredictorsRepaking::PredictorsRepaking() :
26 m_max_fei_enc_mvp_num(4),
27 m_repakingMode(PERFORMANCE),
28 m_width(0),
29 m_height(0),
30 m_downsample_power2(0),
31 m_widthCU_ds(0),
32 m_heightCU_ds(0),
33 m_widthCU_enc(0),
34 m_heightCU_enc(0),
35 m_maxNumMvPredictorsL0(0),
36 m_maxNumMvPredictorsL1(0)
37 {}
38
Init(const mfxVideoParam & videoParams,mfxU16 preencDSfactor,const mfxU16 numMvPredictors[2])39 mfxStatus PredictorsRepaking::Init(const mfxVideoParam& videoParams, mfxU16 preencDSfactor, const mfxU16 numMvPredictors[2])
40 {
41 if (videoParams.mfx.FrameInfo.Width == 0 || videoParams.mfx.FrameInfo.Height == 0)
42 return MFX_ERR_INVALID_VIDEO_PARAM;
43
44 m_width = videoParams.mfx.FrameInfo.Width;
45 m_height = videoParams.mfx.FrameInfo.Height;
46 m_downsample_power2 = ConvertDSratioPower2(preencDSfactor);
47
48 m_widthCU_ds = (MSDK_ALIGN16((MSDK_ALIGN16(m_width) >> m_downsample_power2))) >> 4;
49 m_heightCU_ds = (MSDK_ALIGN16((MSDK_ALIGN16(m_height) >> m_downsample_power2))) >> 4;
50 m_widthCU_enc = (MSDK_ALIGN32(m_width)) >> 4;
51 m_heightCU_enc = (MSDK_ALIGN32(m_height)) >> 4;
52
53 m_maxNumMvPredictorsL0 = numMvPredictors[0];
54 m_maxNumMvPredictorsL1 = numMvPredictors[1];
55
56 return MFX_ERR_NONE;
57 }
58
ConvertDSratioPower2(mfxU8 downsample_ratio)59 mfxU8 PredictorsRepaking::ConvertDSratioPower2(mfxU8 downsample_ratio)
60 {
61 switch (downsample_ratio)
62 {
63 case 1:
64 return 0;
65 case 2:
66 return 1;
67 case 4:
68 return 2;
69 case 8:
70 return 3;
71 default:
72 return 0;
73 }
74 }
75
RepackPredictors(const HevcTask & task,mfxExtFeiHevcEncMVPredictors & mvp,mfxU16 nMvPredictors[2])76 mfxStatus PredictorsRepaking::RepackPredictors(const HevcTask& task, mfxExtFeiHevcEncMVPredictors& mvp, mfxU16 nMvPredictors[2])
77 {
78 mfxStatus sts = MFX_ERR_NONE;
79
80 switch (m_repakingMode)
81 {
82 case PERFORMANCE:
83 sts = RepackPredictorsPerformance(task, mvp, nMvPredictors);
84 break;
85 case QUALITY:
86 sts = RepackPredictorsQuality(task, mvp, nMvPredictors);
87 break;
88 default:
89 return MFX_ERR_UNSUPPORTED;
90 }
91
92 return sts;
93 }
94
RepackPredictorsPerformance(const HevcTask & task,mfxExtFeiHevcEncMVPredictors & mvp,mfxU16 nMvPredictors[2])95 mfxStatus PredictorsRepaking::RepackPredictorsPerformance(const HevcTask& task, mfxExtFeiHevcEncMVPredictors& mvp, mfxU16 nMvPredictors[2])
96 {
97 std::vector<mfxExtFeiPreEncMVExtended*> mvs_vec;
98 std::vector<const RefIdxPair*> refIdx_vec;
99
100 mfxU8 numFinalL0Predictors = (std::min)(task.m_numRefActive[0], (mfxU8)m_maxNumMvPredictorsL0);
101 mfxU8 numFinalL1Predictors = (std::min)(task.m_numRefActive[1], (mfxU8)m_maxNumMvPredictorsL1);
102 mfxU8 numPredPairs = (std::min)(m_max_fei_enc_mvp_num, (std::max)(numFinalL0Predictors, numFinalL1Predictors));
103
104 // I-frames, nothing to do
105 if (numPredPairs == 0 || (task.m_frameType & MFX_FRAMETYPE_I))
106 return MFX_ERR_NONE;
107
108 mvs_vec.reserve(m_max_fei_enc_mvp_num);
109 refIdx_vec.reserve(m_max_fei_enc_mvp_num);
110
111 // PreENC parameters reading
112 for (std::list<PreENCOutput>::const_iterator it = task.m_preEncOutput.begin(); it != task.m_preEncOutput.end(); ++it)
113 {
114 if (!it->m_mv)
115 return MFX_ERR_UNDEFINED_BEHAVIOR;
116
117 mvs_vec.push_back((*it).m_mv);
118 refIdx_vec.push_back(&(*it).m_activeRefIdxPair);
119 }
120
121 // check that task has enough PreENC motion vectors dumps to create MVPredictors for Encode
122 if (numPredPairs > mvs_vec.size())
123 return MFX_ERR_UNDEFINED_BEHAVIOR;
124
125 if (m_widthCU_enc > mvp.Pitch || m_heightCU_enc > mvp.Height)
126 MSDK_CHECK_STATUS(MFX_ERR_UNDEFINED_BEHAVIOR, "Invalid MVP buffer size");
127
128 const mfxI16Pair zeroPair = { 0, 0 };
129
130 // disable all MVP blocks at first
131 std::for_each(mvp.Data, mvp.Data + mvp.Pitch * mvp.Height,
132 [](mfxFeiHevcEncMVPredictors& block)
133 {
134 block.BlockSize = 0;
135 block.RefIdx[0].RefL0 = block.RefIdx[0].RefL1 = 0xf;
136 block.RefIdx[1].RefL0 = block.RefIdx[1].RefL1 = 0xf;
137 block.RefIdx[2].RefL0 = block.RefIdx[2].RefL1 = 0xf;
138 block.RefIdx[3].RefL0 = block.RefIdx[3].RefL1 = 0xf;
139 }
140 );
141
142 // the main loop thru all blocks
143 for (mfxU32 rowIdx = 0; rowIdx < m_heightCU_enc; ++rowIdx) // row index for full surface (raster-scan order)
144 {
145 for (mfxU32 colIdx = 0; colIdx < m_widthCU_enc; ++colIdx) // column index for full surface (raster-scan order)
146 {
147 // calculation of the input index for encoder after permutation from raster scan order index into 32x32 layout
148 // HEVC encoder works with 32x32 layout
149 mfxU32 permutEncIdx =
150 ((colIdx >> 1) << 2) // column offset;
151 + (rowIdx & ~1) * m_widthCU_enc // offset for new line of 32x32 blocks layout;
152 + (colIdx & 1) // zero or single offset depending on the number of comumn index;
153 + ((rowIdx & 1) << 1); // zero or double offset depending on the number of row index,
154 // zero shift for top 16x16 blocks into 32x32 layout and double for bottom blocks;
155
156 mfxFeiHevcEncMVPredictors& block = mvp.Data[permutEncIdx];
157
158 // BlockSize is used only when mfxExtFeiHevcEncFrameCtrl::MVPredictor = 7
159 // 0 - MV predictor is disabled
160 // 1 - enabled per 16x16 block
161 // 2 - enabled per 32x32 block (used only first 16x16 block data)
162 block.BlockSize = 1; // Using finest granularity
163
164 mfxU32 linearPreEncIdx = rowIdx * m_widthCU_ds + colIdx;
165 for (mfxU32 j = 0; j < numPredPairs; ++j)
166 {
167 block.RefIdx[j].RefL0 = refIdx_vec[j]->RefL0;
168 block.RefIdx[j].RefL1 = refIdx_vec[j]->RefL1;
169
170 if (m_downsample_power2 == 0)// w/o VPP
171 {
172 if (colIdx >= m_widthCU_ds || rowIdx >= m_heightCU_ds)
173 {
174 block.MV[j][0] = zeroPair;
175 block.MV[j][1] = zeroPair;
176 }
177 else
178 {
179 block.MV[j][0] = mvs_vec[j]->MB[linearPreEncIdx].MV[0][0];
180 block.MV[j][1] = mvs_vec[j]->MB[linearPreEncIdx].MV[0][1];
181 }
182 }
183 else
184 {
185 mfxU32 preencCUIdx = 0; // index CU from PreENC output
186 mfxU32 rowMVIdx; // row index for motion vector
187 mfxU32 colMVIdx; // column index for motion vector
188 mfxU32 preencMVIdx = 0; // linear index for motion vector
189
190 switch (m_downsample_power2)
191 {
192 case 1:
193 preencCUIdx = (rowIdx >> 1) * m_widthCU_ds + (colIdx >> 1);
194 rowMVIdx = rowIdx & 1;
195 colMVIdx = colIdx & 1;
196 preencMVIdx = rowMVIdx * 8 + colMVIdx * 2;
197 break;
198 case 2:
199 preencCUIdx = (rowIdx >> 2) * m_widthCU_ds + (colIdx >> 2);
200 rowMVIdx = rowIdx & 3;
201 colMVIdx = colIdx & 3;
202 preencMVIdx = rowMVIdx * 4 + colMVIdx;
203 break;
204 case 3:
205 preencCUIdx = (rowIdx >> 3) * m_widthCU_ds + (colIdx >> 3);
206 rowMVIdx = rowIdx & 7;
207 colMVIdx = colIdx & 7;
208 preencMVIdx = rowMVIdx / 2 * 4 + colMVIdx / 2;
209 break;
210 default:
211 break;
212 }
213
214 block.MV[j][0] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][0];
215 block.MV[j][1] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][1];
216
217 block.MV[j][0].x <<= m_downsample_power2;
218 block.MV[j][0].y <<= m_downsample_power2;
219 block.MV[j][1].x <<= m_downsample_power2;
220 block.MV[j][1].y <<= m_downsample_power2;
221 }
222 }
223
224 // Duplicate predictors to the first L0 reference in the first L1 MVP slot
225 if (task.m_ldb)
226 {
227 assert(m_maxNumMvPredictorsL1 == 1);
228
229 block.RefIdx[0].RefL1 = block.RefIdx[0].RefL0;
230 block.MV[0][1] = block.MV[0][0];
231 numFinalL1Predictors = 1;
232 }
233 }
234 }
235
236 nMvPredictors[0] = numFinalL0Predictors;
237 nMvPredictors[1] = numFinalL1Predictors;
238
239 return MFX_ERR_NONE;
240 }
241
242 void SelectFromMV(const mfxI16Pair(*mv)[2], mfxI32 count, mfxI16Pair(&res)[2]);
243
RepackPredictorsQuality(const HevcTask & task,mfxExtFeiHevcEncMVPredictors & mvp,mfxU16 nMvPredictors[2])244 mfxStatus PredictorsRepaking::RepackPredictorsQuality(const HevcTask& task, mfxExtFeiHevcEncMVPredictors& mvp, mfxU16 nMvPredictors[2])
245 {
246 std::vector<mfxExtFeiPreEncMVExtended*> mvs_vec;
247 std::vector<mfxExtFeiPreEncMBStatExtended*> mbs_vec;
248 std::vector<const RefIdxPair*> refIdx_vec;
249
250 mfxU8 numFinalL0Predictors = (std::min)(task.m_numRefActive[0], (mfxU8)m_maxNumMvPredictorsL0);
251 // Currently RepackPredictorsQuality() doesn't have logic to handle L1 predictors of GPB frames
252 mfxU8 numFinalL1Predictors = (std::min)((mfxU8)(task.m_ldb ? 0 : task.m_numRefActive[1]), (mfxU8)m_maxNumMvPredictorsL1);
253 mfxU8 numPredPairs = (std::min)(m_max_fei_enc_mvp_num, (std::max)(numFinalL0Predictors, numFinalL1Predictors));
254
255 // I-frames, nothing to do
256 if (numPredPairs == 0 || (task.m_frameType & MFX_FRAMETYPE_I))
257 return MFX_ERR_NONE;
258
259 mvs_vec.reserve(m_max_fei_enc_mvp_num);
260 mbs_vec.reserve(m_max_fei_enc_mvp_num);
261 refIdx_vec.reserve(m_max_fei_enc_mvp_num);
262
263 // PreENC parameters reading
264 for (std::list<PreENCOutput>::const_iterator it = task.m_preEncOutput.begin(); it != task.m_preEncOutput.end(); ++it)
265 {
266 if (!it->m_mv || !it->m_mb)
267 return MFX_ERR_UNDEFINED_BEHAVIOR;
268 mvs_vec.push_back((*it).m_mv);
269 mbs_vec.push_back((*it).m_mb);
270 refIdx_vec.push_back(&(*it).m_activeRefIdxPair);
271 }
272
273 // check that task has enough PreENC motion vectors dumps to create MVPredictors for Encode
274 if (numPredPairs > mvs_vec.size())
275 return MFX_ERR_UNDEFINED_BEHAVIOR;
276
277 if (m_widthCU_enc > mvp.Pitch || m_heightCU_enc > mvp.Height)
278 MSDK_CHECK_STATUS(MFX_ERR_UNDEFINED_BEHAVIOR, "Invalid MVP buffer size");
279
280 const mfxI16Pair zeroPair = { 0, 0 };
281
282 // disable all MVP blocks at first
283 std::for_each(mvp.Data, mvp.Data + mvp.Pitch * mvp.Height,
284 [](mfxFeiHevcEncMVPredictors& block)
285 {
286 block.BlockSize = 0;
287 block.RefIdx[0].RefL0 = block.RefIdx[0].RefL1 = 0xf;
288 block.RefIdx[1].RefL0 = block.RefIdx[1].RefL1 = 0xf;
289 block.RefIdx[2].RefL0 = block.RefIdx[2].RefL1 = 0xf;
290 block.RefIdx[3].RefL0 = block.RefIdx[3].RefL1 = 0xf;
291 }
292 );
293
294 // the main loop thru all blocks
295 for (mfxU32 rowIdx = 0; rowIdx < m_heightCU_enc; ++rowIdx) // row index for full surface (raster-scan order)
296 {
297 for (mfxU32 colIdx = 0; colIdx < m_widthCU_enc; ++colIdx) // column index for full surface (raster-scan order)
298 {
299 // intermediate arrays to be sorted by distortion
300 mfxU8 ref[4][2];
301 mfxI16Pair mv[4][2];
302 mfxU16 distortion[4][2];
303
304 // calculation of the input index for encoder after permutation from raster scan order index into 32x32 layout
305 // HEVC encoder works with 32x32 layout
306 mfxU32 permutEncIdx =
307 ((colIdx >> 1) << 2) // column offset;
308 + (rowIdx & ~1) * m_widthCU_enc // offset for new line of 32x32 blocks layout;
309 + (colIdx & 1) // zero or single offset depending on the number of comumn index;
310 + ((rowIdx & 1) << 1); // zero or double offset depending on the number of row index,
311 // zero shift for top 16x16 blocks into 32x32 layout and double for bottom blocks;
312
313 mfxFeiHevcEncMVPredictors& block = mvp.Data[permutEncIdx];
314
315 // BlockSize is used only when mfxExtFeiHevcEncFrameCtrl::MVPredictor = 7
316 // 0 - MV predictor disabled
317 // 1 - enabled per 16x16 block
318 // 2 - enabled per 32x32 block (used only first 16x16 block data)
319 block.BlockSize = 1; // Using finest granularity
320
321 mfxU32 linearPreEncIdx = rowIdx * m_widthCU_ds + colIdx;
322 for (mfxU32 j = 0; j < numPredPairs; ++j)
323 {
324 ref[j][0] = refIdx_vec[j]->RefL0;
325 ref[j][1] = refIdx_vec[j]->RefL1;
326
327 if (m_downsample_power2 == 0)// w/o VPP
328 {
329 if (colIdx >= m_widthCU_ds || rowIdx >= m_heightCU_ds) // TODO move check to the beginning of the loop
330 {
331 mv[j][0] = zeroPair;
332 mv[j][1] = zeroPair;
333 }
334 else
335 {
336 SelectFromMV(&mvs_vec[j]->MB[linearPreEncIdx].MV[0], 16, mv[j]);
337 distortion[j][0] = mbs_vec[j]->MB[linearPreEncIdx].Inter[0].BestDistortion;
338 distortion[j][1] = mbs_vec[j]->MB[linearPreEncIdx].Inter[1].BestDistortion;
339 }
340 }
341 else
342 {
343 mfxU32 preencCUIdx = 0; // index CU from PreENC output
344 mfxU32 rowMVIdx; // row index for motion vector
345 mfxU32 colMVIdx; // column index for motion vector
346 mfxU32 preencMVIdx = 0; // linear index for motion vector
347
348 switch (m_downsample_power2)
349 {
350 case 1:
351 preencCUIdx = (rowIdx >> 1) * m_widthCU_ds + (colIdx >> 1);
352 rowMVIdx = rowIdx & 1;
353 colMVIdx = colIdx & 1;
354 preencMVIdx = rowMVIdx * 8 + colMVIdx * 4;
355 SelectFromMV(&mvs_vec[j]->MB[preencCUIdx].MV[preencMVIdx], 4, mv[j]);
356 break;
357 case 2:
358 preencCUIdx = (rowIdx >> 2) * m_widthCU_ds + (colIdx >> 2);
359 rowMVIdx = rowIdx & 3;
360 colMVIdx = colIdx & 3;
361 preencMVIdx = rowMVIdx * 4 + colMVIdx;
362 mv[j][0] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][0];
363 mv[j][1] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][1];
364 break;
365 case 3:
366 preencCUIdx = (rowIdx >> 3) * m_widthCU_ds + (colIdx >> 3);
367 rowMVIdx = rowIdx & 7;
368 colMVIdx = colIdx & 7;
369 preencMVIdx = rowMVIdx / 2 * 4 + colMVIdx / 2;
370 mv[j][0] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][0];
371 mv[j][1] = mvs_vec[j]->MB[preencCUIdx].MV[ZigzagOrder[preencMVIdx]][1];
372 break;
373 default:
374 break;
375 }
376
377 mv[j][0].x <<= m_downsample_power2;
378 mv[j][0].y <<= m_downsample_power2;
379 mv[j][1].x <<= m_downsample_power2;
380 mv[j][1].y <<= m_downsample_power2;
381
382 distortion[j][0] = (j < numFinalL0Predictors) ? mbs_vec[j]->MB[preencCUIdx].Inter[0].BestDistortion : 0xffff;
383 distortion[j][1] = (j < numFinalL1Predictors) ? mbs_vec[j]->MB[preencCUIdx].Inter[1].BestDistortion : 0xffff;
384 }
385 }
386
387 // sort predictors by ascending distortion
388 if (numPredPairs < 2) // nothing to sort
389 {
390 block.MV[0][0] = mv[0][0];
391 block.MV[0][1] = mv[0][1];
392 block.RefIdx[0].RefL0 = ref[0][0];
393 block.RefIdx[0].RefL1 = ref[0][1];
394 continue;
395 }
396
397 // smaller idx to be first argument to be preferred if equal
398 #define CMP_DIST(k,l) { \
399 mfxU8 res0 = distortion[k][0] > distortion[l][0]; \
400 mfxU8 res1 = distortion[k][1] > distortion[l][1]; \
401 worse[k][0] += res0; worse[l][0] += res0 ^ 1; \
402 worse[k][1] += res1; worse[l][1] += res1 ^ 1; \
403 }
404
405 // fill unused
406 for (mfxU32 j = numPredPairs; j < 4; ++j)
407 {
408 distortion[j][1] = distortion[j][0] = 0xffff;
409 ref[j][1] = ref[j][0] = 0xff;
410 mv[j][0].y = mv[j][0].x = 0x8000;
411 mv[j][1].y = mv[j][1].x = 0x8000;
412 }
413
414 mfxU8 worse[4][2] = { {0,} };
415 CMP_DIST(0, 1); CMP_DIST(2, 3);
416 CMP_DIST(0, 2); CMP_DIST(1, 3);
417 CMP_DIST(0, 3); CMP_DIST(1, 2);
418 // here 'worse' tells how many cases are better, so it is position in sorted array
419 for (mfxU32 j = 0; j < 4; j++)
420 {
421 block.MV[worse[j][0]][0] = mv[j][0];
422 block.MV[worse[j][1]][1] = mv[j][1];
423 block.RefIdx[worse[j][0]].RefL0 = ref[j][0];
424 block.RefIdx[worse[j][1]].RefL1 = ref[j][1];
425 }
426 }
427 }
428
429 nMvPredictors[0] = numFinalL0Predictors;
430 nMvPredictors[1] = numFinalL1Predictors;
431
432 return MFX_ERR_NONE;
433 }
434
435 // Selects best MV pair from set of consequent MV pairs
436 // Count is expected to be 4 or 16
437 // May be improved later
SelectFromMV(const mfxI16Pair (* mv)[2],mfxI32 count,mfxI16Pair (& res)[2])438 void SelectFromMV(const mfxI16Pair(* mv)[2], mfxI32 count, mfxI16Pair (&res)[2])
439 {
440 for (int ref = 0; ref < 2; ref++)
441 {
442 mfxI32 found = 0, xsum = 0, ysum = 0;
443 for (int i = 0; i < count; i++)
444 {
445 if (mv[i][ref].x == -0x8000) // ignore intra
446 continue;
447 found++;
448 xsum += mv[i][ref].x;
449 ysum += mv[i][ref].y;
450 }
451 if (!found)
452 res[ref] = mv[0][ref]; // all MV are fill with 0x8000
453 else {
454 res[ref].x = xsum / found;
455 res[ref].y = ysum / found;
456 }
457 }
458 }
459