1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5
6 #include "EbDefinitions.h"
7 #include "EbModeDecisionProcess.h"
8 #include "EbTransforms.h"
9 #include "EbFullLoop.h"
10 #include "EbRateDistortionCost.h"
11 #include "EbErrorCodes.h"
12 #include "EbErrorHandling.h"
13
14 static const EB_U64 depth0Th[2][MAX_HIERARCHICAL_LEVEL][MAX_TEMPORAL_LAYERS] = {
15 {
16 { 1000 },
17 { 1000, 4000 },
18 { 1000, 4000, 9500 },
19 { 1000, 4000, 9500, 3000 },
20 { 1000, 4000, 9500, 3000, 3000 },
21 { 1000, 4000, 9500, 3000, 3000, 3000 }
22 },
23
24 {
25 { 0 },
26 { 0, 1000 },
27 { 0, 1000, 7000 },
28 { 0, 1000, 7000, 9500 },
29 { 0, 1000, 7000, 9500, 9500 },
30 { 0, 1000, 7000, 9500, 9500, 9500 }
31 }
32 };
33 static const EB_U64 depth1Th[2][MAX_HIERARCHICAL_LEVEL][MAX_TEMPORAL_LAYERS] = {
34 {
35 { 0 },
36 { 0, 2000 },
37 { 0, 2000, 5500 },
38 { 0, 2000, 5500, 9500 },
39 { 0, 2000, 5500, 9500, 9500 },
40 { 0, 2000, 5500, 9500, 9500, 9500 }
41 },
42
43 {
44 { 0 },
45 { 0, 1500 },
46 { 0, 1500, 1500 },
47 { 0, 1500, 1500, 1500 },
48 { 0, 1500, 1500, 1500, 1500 },
49 { 0, 1500, 1500, 1500, 1500, 1500 }
50 }
51 };
52 static const EB_U64 depth2Th[2][MAX_HIERARCHICAL_LEVEL][MAX_TEMPORAL_LAYERS] = {
53 {
54 { 0 },
55 { 0, 500 },
56 { 0, 500, 2000 },
57 { 0, 500, 2000, 2500 },
58 { 0, 500, 2000, 2500, 2500 },
59 { 0, 500, 2000, 2500, 2500, 2500 }
60 },
61
62 {
63 { 0 },
64 { 0, 1500 },
65 { 0, 1500, 1000 },
66 { 0, 1500, 1000, 4500 },
67 { 0, 1500, 1000, 4500, 4500 },
68 { 0, 1500, 1000, 4500, 4500, 4500 }
69 }
70 };
71
72 /*********************************************************************
73 * UnifiedQuantizeInvQuantize
74 *
75 * Unified Quant +iQuant
76 *********************************************************************/
ProductUnifiedQuantizeInvQuantizeMd(PictureControlSet_t * pictureControlSetPtr,EB_S16 * coeff,const EB_U32 coeffStride,EB_S16 * quantCoeff,EB_S16 * reconCoeff,EB_U32 qp,EB_U32 areaSize,EB_U32 * yCountNonZeroCoeffs,EB_PF_MODE pfMode,EB_U8 enableContouringQCUpdateFlag,EB_U32 componentType,EB_RDOQ_PMCORE_TYPE rdoqPmCoreMethod,CabacEncodeContext_t * cabacEncodeCtxPtr,EB_U64 lambda,EB_MODETYPE type,EB_U32 intraLumaMode,EB_U32 intraChromaMode,CabacCost_t * CabacCost)77 void ProductUnifiedQuantizeInvQuantizeMd(
78 PictureControlSet_t *pictureControlSetPtr,
79 EB_S16 *coeff,
80 const EB_U32 coeffStride,
81 EB_S16 *quantCoeff,
82 EB_S16 *reconCoeff,
83 EB_U32 qp,
84 EB_U32 areaSize,
85 EB_U32 *yCountNonZeroCoeffs,
86 EB_PF_MODE pfMode,
87 EB_U8 enableContouringQCUpdateFlag,
88 EB_U32 componentType,
89 EB_RDOQ_PMCORE_TYPE rdoqPmCoreMethod,
90 CabacEncodeContext_t *cabacEncodeCtxPtr,
91 EB_U64 lambda,
92 EB_MODETYPE type,
93 EB_U32 intraLumaMode,
94 EB_U32 intraChromaMode,
95 CabacCost_t *CabacCost)
96
97 {
98 EB_PICTURE sliceType = pictureControlSetPtr->sliceType;
99 EB_U32 temporalLayerIndex = pictureControlSetPtr->temporalLayerIndex;
100 //for the Quant
101 const EB_S32 qpRem = (EB_S32)QpModSix[qp]; //the output is between 0-5
102 const EB_S32 qpPer = (EB_S32)QpDivSix[qp] + TRANS_BIT_INCREMENT; //the output is between 0 and 8+TRANS_BIT_INCREMENT (CHKN TRANS_BIT_INCREMENT = 0)
103 const EB_U32 qFunc = QFunc[qpRem]; // 15 bits
104
105 const EB_U32 transformShiftNum = 7 - Log2f(areaSize);
106 const EB_S32 shiftedQBits = QUANT_SHIFT + qpPer + transformShiftNum;
107 const EB_U32 q_offset = ((sliceType == EB_I_PICTURE || sliceType == EB_IDR_PICTURE) ? QUANT_OFFSET_I : QUANT_OFFSET_P) << (shiftedQBits - 9);
108
109 //for the iQuant
110 const EB_S32 shiftedFFunc = (qpPer > 8) ? (EB_S32)FFunc[qpRem] << (qpPer - 2) : (EB_S32)FFunc[qpRem] << qpPer; // this is 6+8+TRANS_BIT_INCREMENT
111 const EB_S32 shiftNum = (qpPer > 8) ? QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShiftNum - 2 : QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShiftNum;
112 const EB_S32 iq_offset = 1 << (shiftNum - 1);
113
114
115 if (pfMode == PF_N2) {
116 areaSize = areaSize >> 1;
117 } else if (pfMode == PF_N4) {
118 areaSize = areaSize >> 2;
119 }
120
121 if (rdoqPmCoreMethod){
122 DecoupledQuantizeInvQuantizeLoops(
123 coeff,
124 coeffStride,
125 quantCoeff,
126 reconCoeff,
127 cabacEncodeCtxPtr,
128 lambda,
129 type,
130 intraLumaMode,
131 intraChromaMode,
132 componentType,
133 pictureControlSetPtr->temporalLayerIndex,
134 pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag,
135 (EB_U8) 0,
136 (EB_U16)qp,
137 (EB_U32)EB_8BIT,
138 CabacCost,
139 qFunc,
140 q_offset,
141 shiftedQBits,
142 shiftedFFunc,
143 iq_offset,
144 shiftNum,
145 areaSize,
146 &(*yCountNonZeroCoeffs),
147 rdoqPmCoreMethod);
148 }
149 else{
150
151 QiQ_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)][areaSize >> 3](
152 coeff,
153 coeffStride,
154 quantCoeff,
155 reconCoeff,
156 qFunc,
157 q_offset,
158 shiftedQBits,
159 shiftedFFunc,
160 iq_offset,
161 shiftNum,
162 areaSize,
163 &(*yCountNonZeroCoeffs));
164
165 UpdateQiQCoef(
166 quantCoeff,
167 reconCoeff,
168 coeffStride,
169 shiftedFFunc,
170 iq_offset,
171 shiftNum,
172 areaSize,
173 &(*yCountNonZeroCoeffs),
174 componentType,
175 sliceType,
176 temporalLayerIndex,
177 0,
178 enableContouringQCUpdateFlag);
179 }
180 }
181
182 /****************************************
183 ************ Full loop ****************
184 ****************************************/
ProductFullLoop(EbPictureBufferDesc_t * inputPicturePtr,EB_U32 inputOriginIndex,ModeDecisionCandidateBuffer_t * candidateBuffer,ModeDecisionContext_t * contextPtr,const CodedUnitStats_t * cuStatsPtr,PictureControlSet_t * pictureControlSetPtr,EB_U32 qp,EB_U32 * yCountNonZeroCoeffs,EB_U64 * yCoeffBits,EB_U64 * yFullDistortion)185 void ProductFullLoop(
186 EbPictureBufferDesc_t *inputPicturePtr,
187 EB_U32 inputOriginIndex,
188 ModeDecisionCandidateBuffer_t *candidateBuffer,
189 ModeDecisionContext_t *contextPtr,
190 const CodedUnitStats_t *cuStatsPtr,
191 PictureControlSet_t *pictureControlSetPtr,
192 EB_U32 qp,
193 EB_U32 *yCountNonZeroCoeffs,
194 EB_U64 *yCoeffBits,
195 EB_U64 *yFullDistortion)
196 {
197 EB_U32 tuOriginIndex;
198
199 EB_U32 currentTuIndex,tuIt;
200 EB_U64 yTuCoeffBits;
201 EB_U64 tuFullDistortion[3][DIST_CALC_TOTAL];
202 candidateBuffer->yDc[0] = 0;
203 candidateBuffer->yDc[1] = 0;
204 candidateBuffer->yDc[2] = 0;
205 candidateBuffer->yDc[3] = 0;
206 candidateBuffer->yCountNonZeroCoeffs[0] = 0;
207 candidateBuffer->yCountNonZeroCoeffs[1] = 0;
208 candidateBuffer->yCountNonZeroCoeffs[2] = 0;
209 candidateBuffer->yCountNonZeroCoeffs[3] = 0;
210
211 if (cuStatsPtr->size == MAX_LCU_SIZE){
212
213 for (tuIt = 0; tuIt < 4; tuIt++)
214 {
215
216 tuOriginIndex = ((tuIt & 1) << 5) + ((tuIt>1) << 11);
217 currentTuIndex = tuIt + 1;
218 yTuCoeffBits = 0;
219 EstimateTransform(
220 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]),
221 MAX_LCU_SIZE,
222 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeffNxNPtr->bufferY)[tuOriginIndex]),
223 MAX_LCU_SIZE,
224 32,
225 contextPtr->transformInnerArrayPtr,
226 0,
227 EB_FALSE,
228 contextPtr->pfMdMode);
229
230
231 ProductUnifiedQuantizeInvQuantizeMd(
232 pictureControlSetPtr,
233 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeffNxNPtr->bufferY)[tuOriginIndex]),
234 MAX_LCU_SIZE,
235 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]),
236 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferY)[tuOriginIndex]),
237 qp,
238 32,
239 &(yCountNonZeroCoeffs[currentTuIndex]),
240 contextPtr->pfMdMode,
241 0,
242 COMPONENT_LUMA,
243 contextPtr->rdoqPmCoreMethod,
244 (CabacEncodeContext_t*)contextPtr->coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
245 contextPtr->fullLambda,
246 candidateBuffer->candidatePtr->type, // Input: CU type (INTRA, INTER)
247 candidateBuffer->candidatePtr->intraLumaMode,
248 EB_INTRA_CHROMA_DM,
249 pictureControlSetPtr->cabacCost);
250
251 PictureFullDistortionLuma(
252 contextPtr->transQuantBuffersPtr->tuTransCoeffNxNPtr,
253 tuOriginIndex,
254 candidateBuffer->reconCoeffPtr,
255 tuOriginIndex,
256 (32 >> contextPtr->pfMdMode),
257 tuFullDistortion[0],
258 yCountNonZeroCoeffs[currentTuIndex],
259 candidateBuffer->candidatePtr->type);
260
261
262 tuFullDistortion[0][DIST_CALC_RESIDUAL] = (tuFullDistortion[0][DIST_CALC_RESIDUAL] + 8) >> 4;
263 tuFullDistortion[0][DIST_CALC_PREDICTION] = (tuFullDistortion[0][DIST_CALC_PREDICTION] + 8) >> 4;
264
265 TuEstimateCoeffBitsLuma(
266 tuOriginIndex,
267 contextPtr->coeffEstEntropyCoderPtr,
268 candidateBuffer->residualQuantCoeffPtr,
269 yCountNonZeroCoeffs[currentTuIndex],
270 &yTuCoeffBits,
271 32,
272 candidateBuffer->candidatePtr->type,
273 candidateBuffer->candidatePtr->intraLumaMode,
274 contextPtr->pfMdMode,
275 contextPtr->coeffCabacUpdate,
276 &(candidateBuffer->candBuffCoeffCtxModel),
277 contextPtr->CabacCost);
278
279 TuCalcCostLuma(
280 MAX_LCU_SIZE,
281 candidateBuffer->candidatePtr,
282 currentTuIndex,
283 32,
284 yCountNonZeroCoeffs[currentTuIndex],
285 tuFullDistortion[0],
286 &yTuCoeffBits,
287 contextPtr->qp,
288 contextPtr->fullLambda,
289 contextPtr->fullChromaLambda);
290
291 (*yCoeffBits) += yTuCoeffBits;
292 yFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[0][DIST_CALC_RESIDUAL];
293 yFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[0][DIST_CALC_PREDICTION];
294 candidateBuffer->yDc[tuIt] = ABS(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]);
295 candidateBuffer->yCountNonZeroCoeffs[tuIt] = (EB_U16)yCountNonZeroCoeffs[currentTuIndex];
296
297 }
298
299 }else{
300
301 tuOriginIndex = cuStatsPtr->originX + (cuStatsPtr->originY<<6);
302 yTuCoeffBits = 0;
303 EstimateTransform(
304 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]),
305 MAX_LCU_SIZE,
306 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferY)[tuOriginIndex]),
307 MAX_LCU_SIZE,
308 cuStatsPtr->size,
309 contextPtr->transformInnerArrayPtr,
310 0,
311 EB_FALSE,
312 contextPtr->pfMdMode);
313
314 ProductUnifiedQuantizeInvQuantizeMd(
315 pictureControlSetPtr,
316 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferY)[tuOriginIndex]),
317 MAX_LCU_SIZE,
318 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]),
319 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferY)[tuOriginIndex]),
320 qp,
321 cuStatsPtr->size,
322 &(yCountNonZeroCoeffs[0]),
323 contextPtr->pfMdMode,
324 0,
325 COMPONENT_LUMA,
326 contextPtr->rdoqPmCoreMethod,
327 (CabacEncodeContext_t*)contextPtr->coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
328 contextPtr->fullLambda,
329 candidateBuffer->candidatePtr->type, // Input: CU type (INTRA, INTER)
330 candidateBuffer->candidatePtr->intraLumaMode,
331 EB_INTRA_CHROMA_DM,
332 pictureControlSetPtr->cabacCost);
333
334 if (contextPtr->spatialSseFullLoop == EB_TRUE) {
335
336 if (yCountNonZeroCoeffs[0]) {
337 //since we are missing PF-N2 version for 16x16 and 8x8 iT, do zero out.
338 if (cuStatsPtr->size < 32 && contextPtr->pfMdMode == PF_N2) {
339 PfZeroOutUselessQuadrants(
340 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferY)[tuOriginIndex]),
341 candidateBuffer->reconCoeffPtr->strideY,
342 (cuStatsPtr->size >> 1));
343 }
344
345 EstimateInvTransform(
346 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferY)[tuOriginIndex]),
347 candidateBuffer->reconCoeffPtr->strideY,
348 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferY)[tuOriginIndex]),
349 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideY,
350 cuStatsPtr->size,
351 contextPtr->transformInnerArrayPtr,
352 BIT_INCREMENT_8BIT,
353 EB_FALSE,
354 cuStatsPtr->size < 32 ? PF_OFF : contextPtr->pfMdMode);
355
356 if ((cuStatsPtr->size >> 3) < 9)
357 AdditionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][cuStatsPtr->size >> 3](
358 &(candidateBuffer->predictionPtr->bufferY[tuOriginIndex]),
359 64,
360 &(((EB_S16*)(contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferY))[tuOriginIndex]),
361 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideY,
362 &(candidateBuffer->reconPtr->bufferY[tuOriginIndex]),
363 candidateBuffer->reconPtr->strideY,
364 cuStatsPtr->size,
365 cuStatsPtr->size);
366
367 }
368 else {
369
370 PictureCopy8Bit(
371 candidateBuffer->predictionPtr,
372 tuOriginIndex,
373 0,
374 candidateBuffer->reconPtr,
375 tuOriginIndex,
376 0,
377 cuStatsPtr->size,
378 cuStatsPtr->size,
379 0,
380 0,
381 PICTURE_BUFFER_DESC_Y_FLAG);
382 }
383
384 tuFullDistortion[0][DIST_CALC_RESIDUAL] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(cuStatsPtr->size) - 2](
385 &(inputPicturePtr->bufferY[inputOriginIndex]),
386 inputPicturePtr->strideY,
387 &(candidateBuffer->reconPtr->bufferY[tuOriginIndex]),
388 candidateBuffer->reconPtr->strideY,
389 cuStatsPtr->size,
390 cuStatsPtr->size);
391
392 tuFullDistortion[0][DIST_CALC_PREDICTION] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(cuStatsPtr->size) - 2](
393 &(inputPicturePtr->bufferY[inputOriginIndex]),
394 inputPicturePtr->strideY,
395 &(candidateBuffer->predictionPtr->bufferY[tuOriginIndex]),
396 candidateBuffer->predictionPtr->strideY,
397 cuStatsPtr->size,
398 cuStatsPtr->size);
399 }
400 else {
401
402 PictureFullDistortionLuma(
403 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr,
404 tuOriginIndex,
405 candidateBuffer->reconCoeffPtr,
406 tuOriginIndex,
407 (contextPtr->cuStats->size >> contextPtr->pfMdMode),
408 tuFullDistortion[0],
409 yCountNonZeroCoeffs[0],
410 candidateBuffer->candidatePtr->type);
411
412 const EB_U32 lumaShift = 2 * (7 - Log2f(cuStatsPtr->size));
413 tuFullDistortion[0][DIST_CALC_RESIDUAL] = (tuFullDistortion[0][DIST_CALC_RESIDUAL] + (EB_U64)(1 << (lumaShift - 1))) >> lumaShift;
414 tuFullDistortion[0][DIST_CALC_PREDICTION] = (tuFullDistortion[0][DIST_CALC_PREDICTION] + (EB_U64)(1 << (lumaShift - 1))) >> lumaShift;
415 }
416
417 TuEstimateCoeffBitsLuma(
418 tuOriginIndex,
419 contextPtr->coeffEstEntropyCoderPtr,
420 candidateBuffer->residualQuantCoeffPtr,
421 yCountNonZeroCoeffs[0],
422 &yTuCoeffBits,
423 contextPtr->cuStats->size,
424 candidateBuffer->candidatePtr->type,
425 candidateBuffer->candidatePtr->intraLumaMode,
426 contextPtr->pfMdMode,
427 contextPtr->coeffCabacUpdate,
428 &(candidateBuffer->candBuffCoeffCtxModel),
429 contextPtr->CabacCost);
430
431 TuCalcCostLuma(
432 cuStatsPtr->size,
433 candidateBuffer->candidatePtr,
434 0,
435 cuStatsPtr->size,
436 yCountNonZeroCoeffs[0],
437 tuFullDistortion[0],
438 &yTuCoeffBits,
439 contextPtr->qp,
440 contextPtr->fullLambda,
441 contextPtr->fullChromaLambda);
442
443 (*yCoeffBits) += yTuCoeffBits;
444 yFullDistortion[DIST_CALC_RESIDUAL] = tuFullDistortion[0][DIST_CALC_RESIDUAL];
445 yFullDistortion[DIST_CALC_PREDICTION] = tuFullDistortion[0][DIST_CALC_PREDICTION];
446 candidateBuffer->yDc[0] = ABS(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferY)[tuOriginIndex]);
447 candidateBuffer->yCountNonZeroCoeffs[0] = (EB_U16)yCountNonZeroCoeffs[0];
448 }
449 }
450
451
UnifiedQuantizeInvQuantize_R(EB_S16 * coeff,const EB_U32 coeffStride,EB_S16 * quantCoeff,EB_S16 * reconCoeff,EB_U32 qp,EB_U32 bitDepth,EB_U32 areaSize,EB_PICTURE sliceType,EB_U32 * yCountNonZeroCoeffs,EB_S8 mdNonZeroCoeff,EB_PF_MODE pfMode,EB_U32 tuOriginX,EB_U32 tuOriginY,EB_U32 lcuOriginY,EB_U32 enableCbflag,EB_U8 enableContouringQCUpdateFlag,EB_MODETYPE type,EB_U32 componentType,EB_U32 temporalLayerIndex,EB_BOOL encDecFlag,EB_U32 dZoffset,EB_RDOQ_PMCORE_TYPE rdoqPmCoreMethod,CabacEncodeContext_t * cabacEncodeCtxPtr,EB_U64 lambda,EB_U32 intraLumaMode,EB_U32 intraChromaMode,CabacCost_t * CabacCost)452 void UnifiedQuantizeInvQuantize_R(
453 EB_S16 *coeff,
454 const EB_U32 coeffStride,
455 EB_S16 *quantCoeff,
456 EB_S16 *reconCoeff,
457 EB_U32 qp,
458 EB_U32 bitDepth,
459 EB_U32 areaSize,
460 EB_PICTURE sliceType,
461 EB_U32 *yCountNonZeroCoeffs,
462 EB_S8 mdNonZeroCoeff,
463 EB_PF_MODE pfMode,
464 EB_U32 tuOriginX,
465 EB_U32 tuOriginY,
466 EB_U32 lcuOriginY,
467 EB_U32 enableCbflag,
468 EB_U8 enableContouringQCUpdateFlag,
469 EB_MODETYPE type,
470 EB_U32 componentType,
471 EB_U32 temporalLayerIndex,
472 EB_BOOL encDecFlag,
473 EB_U32 dZoffset,
474 EB_RDOQ_PMCORE_TYPE rdoqPmCoreMethod,
475 CabacEncodeContext_t *cabacEncodeCtxPtr,
476 EB_U64 lambda,
477 EB_U32 intraLumaMode,
478 EB_U32 intraChromaMode,
479 CabacCost_t *CabacCost)
480
481 {
482
483 //for the Quant
484 const EB_S32 qpRem = (EB_S32)QpModSix[qp]; //the output is between 0-5
485 const EB_S32 qpPer = (EB_S32)QpDivSix[qp] + TRANS_BIT_INCREMENT; //the output is between 0 and 8+TRANS_BIT_INCREMENT (CHKN TRANS_BIT_INCREMENT = 0)
486 const EB_U32 qFunc = QFunc[qpRem]; // 15 bits
487
488 const EB_U32 internalBitDepth = (EB_U32)bitDepth + TRANS_BIT_INCREMENT; //CHKN always 8 for 8 bit
489
490 const EB_U32 transformShiftNum = MAX_TR_DYNAMIC_RANGE - internalBitDepth - Log2f(areaSize);
491 const EB_S32 shiftedQBits = QUANT_SHIFT + qpPer + transformShiftNum;
492 const EB_U32 q_offset = ((sliceType == EB_I_PICTURE || sliceType == EB_IDR_PICTURE) ? QUANT_OFFSET_I : QUANT_OFFSET_P) << (shiftedQBits - 9);
493
494 //for the iQuant
495 const EB_S32 shiftedFFunc = (qpPer > 8) ? (EB_S32)FFunc[qpRem] << (qpPer - 2) : (EB_S32)FFunc[qpRem] << qpPer; // this is 6+8+TRANS_BIT_INCREMENT
496 const EB_S32 shiftNum = (qpPer > 8) ? QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShiftNum - 2 : QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShiftNum;
497 const EB_S32 iq_offset = 1 << (shiftNum - 1);
498 EB_U32 adptive_qp_offset;
499
500 adptive_qp_offset = q_offset;
501
502 (void)(encDecFlag);
503 (void)(mdNonZeroCoeff);
504 adptive_qp_offset = dZoffset ? (dZoffset * (1 << shiftedQBits) / 20) : adptive_qp_offset;
505
506 if (pfMode == PF_N2) {
507 areaSize = areaSize >> 1;
508 } else if (pfMode == PF_N4) {
509 areaSize = areaSize >> 2;
510 }
511
512 if (rdoqPmCoreMethod){
513
514 DecoupledQuantizeInvQuantizeLoops(
515 coeff,
516 coeffStride,
517 quantCoeff,
518 reconCoeff,
519 cabacEncodeCtxPtr,
520 lambda,
521 type,
522 intraLumaMode,
523 intraChromaMode,
524 componentType,
525 (EB_U8)temporalLayerIndex,
526 temporalLayerIndex < 3 ? EB_TRUE : EB_FALSE,
527 (EB_U8)0,
528 (EB_U16)qp,
529 bitDepth,
530 CabacCost,
531 qFunc,
532 q_offset,
533 shiftedQBits,
534 shiftedFFunc,
535 iq_offset,
536 shiftNum,
537 areaSize,
538 &(*yCountNonZeroCoeffs),
539 rdoqPmCoreMethod);
540 }else{
541
542 QiQ_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)][areaSize >> 3](
543 coeff,
544 coeffStride,
545 quantCoeff,
546 reconCoeff,
547 qFunc,
548 adptive_qp_offset,
549 shiftedQBits,
550 shiftedFFunc,
551 iq_offset,
552 shiftNum,
553 areaSize,
554 &(*yCountNonZeroCoeffs));
555
556 UpdateQiQCoef_R(
557 quantCoeff,
558 reconCoeff,
559 coeffStride,
560 shiftedFFunc,
561 iq_offset,
562 shiftNum,
563 areaSize,
564 &(*yCountNonZeroCoeffs),
565 componentType,
566 sliceType,
567 temporalLayerIndex,
568 enableCbflag,
569 enableContouringQCUpdateFlag);
570 }
571
572 (void)tuOriginX;
573 (void)tuOriginY;
574 (void)lcuOriginY;
575 (void)type;
576 }
577 /****************************************
578 ************ Full loop ****************
579 ****************************************/
FullLoop_R(LargestCodingUnit_t * lcuPtr,ModeDecisionCandidateBuffer_t * candidateBuffer,ModeDecisionContext_t * contextPtr,const CodedUnitStats_t * cuStatsPtr,EbPictureBufferDesc_t * inputPicturePtr,PictureControlSet_t * pictureControlSetPtr,EB_U32 componentMask,EB_U32 cbQp,EB_U32 crQp,EB_U32 * cbCountNonZeroCoeffs,EB_U32 * crCountNonZeroCoeffs)580 void FullLoop_R (
581 LargestCodingUnit_t *lcuPtr,
582 ModeDecisionCandidateBuffer_t *candidateBuffer,
583 ModeDecisionContext_t *contextPtr,
584 const CodedUnitStats_t *cuStatsPtr,
585 EbPictureBufferDesc_t *inputPicturePtr,
586 PictureControlSet_t *pictureControlSetPtr,
587 EB_U32 componentMask,
588 EB_U32 cbQp,
589 EB_U32 crQp,
590 EB_U32 *cbCountNonZeroCoeffs,
591 EB_U32 *crCountNonZeroCoeffs)
592 {
593 (void)lcuPtr;
594
595 EB_S16 *chromaResidualPtr;
596 EB_U32 tuIndex;
597 EB_U32 tuOriginIndex;
598 EB_U32 tuCbOriginIndex;
599 EB_U32 tuCrOriginIndex;
600 EB_U32 tuCount;
601 const TransformUnitStats_t *tuStatPtr;
602 EB_U32 tuItr;
603 EB_U32 tuSize;
604 EB_U32 chromatTuSize;
605 EB_U32 tuOriginX;
606 EB_U32 tuOriginY;
607
608 EbPictureBufferDesc_t * tuTransCoeffTmpPtr;
609 EbPictureBufferDesc_t * tuQuantCoeffTmpPtr;
610
611 if (cuStatsPtr->size == MAX_LCU_SIZE) {
612 tuCount = 4;
613 tuIndex = 1;
614 tuTransCoeffTmpPtr = contextPtr->transQuantBuffersPtr->tuTransCoeffNxNPtr;
615 tuQuantCoeffTmpPtr = candidateBuffer->residualQuantCoeffPtr;
616
617 }
618 else {
619 tuCount = 1;
620 tuIndex = 0;
621 tuTransCoeffTmpPtr = contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr;
622 tuQuantCoeffTmpPtr = candidateBuffer->residualQuantCoeffPtr;
623 }
624
625 tuItr = 0;
626 do {
627 tuStatPtr = GetTransformUnitStats(tuIndex);
628 tuOriginX = TU_ORIGIN_ADJUST(cuStatsPtr->originX, cuStatsPtr->size, tuStatPtr->offsetX);
629 tuOriginY = TU_ORIGIN_ADJUST(cuStatsPtr->originY, cuStatsPtr->size, tuStatPtr->offsetY);
630 tuSize = cuStatsPtr->size >> tuStatPtr->depth;
631 chromatTuSize = tuSize == 4 ? tuSize : (tuSize >> 1);
632 tuOriginIndex = tuOriginX + tuOriginY * candidateBuffer->residualQuantCoeffPtr->strideY;
633 tuCbOriginIndex = tuSize == 4 ?
634 tuOriginIndex :
635 ((tuOriginX + tuOriginY * candidateBuffer->residualQuantCoeffPtr->strideCb) >> 1);
636 tuCrOriginIndex = tuSize == 4 ?
637 tuOriginIndex :
638 ((tuOriginX + tuOriginY * candidateBuffer->residualQuantCoeffPtr->strideCr) >> 1);
639
640 // This function replaces the previous Intra Chroma mode if the LM fast
641 // cost is better.
642 // *Note - this might require that we have inv transform in the loop
643 EB_PF_MODE correctedPFMode = contextPtr->pfMdMode;
644
645 if (chromatTuSize == 4)
646 correctedPFMode = PF_OFF;
647 else if (chromatTuSize == 8 && contextPtr->pfMdMode == PF_N4)
648 correctedPFMode = PF_N2;
649
650 if (componentMask & PICTURE_BUFFER_DESC_Cb_FLAG) {
651 // Configure the Chroma Residual Ptr
652 chromaResidualPtr = //(candidateBuffer->candidatePtr->type == INTRA_MODE )?
653 //&(((EB_S16*) candidateBuffer->intraChromaResidualPtr->bufferCb)[tuChromaOriginIndex]):
654 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferCb)[tuCbOriginIndex]);
655
656 // Cb Transform
657 EstimateTransform(
658 chromaResidualPtr,
659 candidateBuffer->residualQuantCoeffPtr->strideCb,
660 &(((EB_S16*)tuTransCoeffTmpPtr->bufferCb)[tuCbOriginIndex]),
661 tuTransCoeffTmpPtr->strideCb,
662 chromatTuSize,
663 contextPtr->transformInnerArrayPtr,
664 0,
665 EB_FALSE,
666 correctedPFMode);
667
668 UnifiedQuantizeInvQuantize_R(
669 &(((EB_S16*)tuTransCoeffTmpPtr->bufferCb)[tuCbOriginIndex]),
670 tuTransCoeffTmpPtr->strideCb,
671 &(((EB_S16*)tuQuantCoeffTmpPtr->bufferCb)[tuCbOriginIndex]),
672 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCb)[tuCbOriginIndex]),
673 cbQp,
674 inputPicturePtr->bitDepth,
675 chromatTuSize,
676 pictureControlSetPtr->sliceType,
677 &(cbCountNonZeroCoeffs[tuIndex]),
678 -1,
679 correctedPFMode,
680 0,
681 0,
682 0,
683 0,
684 0,
685 candidateBuffer->candidatePtr->type,
686 COMPONENT_CHROMA,
687 pictureControlSetPtr->temporalLayerIndex,
688 EB_FALSE,
689 0,
690 contextPtr->rdoqPmCoreMethod,
691 (CabacEncodeContext_t*)contextPtr->coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
692 contextPtr->fullLambda,
693 candidateBuffer->candidatePtr->intraLumaMode,
694 EB_INTRA_CHROMA_DM,
695 pictureControlSetPtr->cabacCost);
696
697 if (contextPtr->spatialSseFullLoop == EB_TRUE) {
698 if (cbCountNonZeroCoeffs[tuIndex]) {
699
700 EB_PF_MODE correctedPFMode = contextPtr->pfMdMode;
701 EB_U32 chromatTuSize = (tuSize >> 1);
702 if (chromatTuSize == 4)
703 correctedPFMode = PF_OFF;
704 else if (chromatTuSize == 8 && contextPtr->pfMdMode == PF_N4)
705 correctedPFMode = PF_N2;
706
707 if (correctedPFMode) {
708 PfZeroOutUselessQuadrants(
709 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCb)[tuCbOriginIndex]),
710 candidateBuffer->reconCoeffPtr->strideCb,
711 (chromatTuSize >> 1));
712 }
713
714 EstimateInvTransform(
715 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCb)[tuCbOriginIndex]),
716 candidateBuffer->reconCoeffPtr->strideCb,
717 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferCb)[tuCbOriginIndex]),
718 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideCb,
719 chromatTuSize,
720 contextPtr->transformInnerArrayPtr,
721 BIT_INCREMENT_8BIT,
722 EB_FALSE,
723 EB_FALSE);
724
725 PictureAddition(
726 &(candidateBuffer->predictionPtr->bufferCb[tuCbOriginIndex]),
727 candidateBuffer->predictionPtr->strideCb,
728 &(((EB_S16*)(contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferCb))[tuCbOriginIndex]),
729 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideCb,
730 &(candidateBuffer->reconPtr->bufferCb[tuCbOriginIndex]),
731 candidateBuffer->reconPtr->strideCb,
732 chromatTuSize,
733 chromatTuSize);
734
735 }
736 else {
737
738 PictureCopy8Bit(
739 candidateBuffer->predictionPtr,
740 tuOriginIndex,
741 tuCbOriginIndex,
742 candidateBuffer->reconPtr,
743 tuOriginIndex,
744 tuCbOriginIndex,
745 tuSize,
746 tuSize,
747 chromatTuSize,
748 chromatTuSize,
749 PICTURE_BUFFER_DESC_Cb_FLAG);
750 }
751 }
752
753 }
754
755
756 if (componentMask & PICTURE_BUFFER_DESC_Cr_FLAG) {
757 // Configure the Chroma Residual Ptr
758 chromaResidualPtr = //(candidateBuffer->candidatePtr->type == INTRA_MODE )?
759 //&(((EB_S16*) candidateBuffer->intraChromaResidualPtr->bufferCr)[tuChromaOriginIndex]):
760 &(((EB_S16*)candidateBuffer->residualQuantCoeffPtr->bufferCr)[tuCrOriginIndex]);
761
762 // Cr Transform
763 EstimateTransform(
764 chromaResidualPtr,
765 candidateBuffer->residualQuantCoeffPtr->strideCr,
766 &(((EB_S16*)tuTransCoeffTmpPtr->bufferCr)[tuCrOriginIndex]),
767 tuTransCoeffTmpPtr->strideCr,
768 chromatTuSize,
769 contextPtr->transformInnerArrayPtr,
770 0,
771 EB_FALSE,
772 correctedPFMode);
773
774 UnifiedQuantizeInvQuantize_R(
775 &(((EB_S16*)tuTransCoeffTmpPtr->bufferCr)[tuCrOriginIndex]),
776 tuTransCoeffTmpPtr->strideCr,
777 &(((EB_S16*)tuQuantCoeffTmpPtr->bufferCr)[tuCrOriginIndex]),
778 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCr)[tuCrOriginIndex]),
779 crQp,
780 inputPicturePtr->bitDepth,
781 chromatTuSize,
782 pictureControlSetPtr->sliceType,
783 &(crCountNonZeroCoeffs[tuIndex]),
784 -1,
785 correctedPFMode,
786 0,
787 0,
788 0,
789 0,
790 0,
791 candidateBuffer->candidatePtr->type,
792 COMPONENT_CHROMA,
793 pictureControlSetPtr->temporalLayerIndex,
794 EB_FALSE,
795 0,
796 contextPtr->rdoqPmCoreMethod,
797 (CabacEncodeContext_t*)contextPtr->coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
798 contextPtr->fullLambda,
799 candidateBuffer->candidatePtr->intraLumaMode,
800 EB_INTRA_CHROMA_DM,
801 pictureControlSetPtr->cabacCost);
802
803 if (contextPtr->spatialSseFullLoop == EB_TRUE) {
804 if (crCountNonZeroCoeffs[tuIndex]) {
805
806 EB_PF_MODE correctedPFMode = contextPtr->pfMdMode;
807 EB_U32 chromatTuSize = (tuSize >> 1);
808 if (chromatTuSize == 4)
809 correctedPFMode = PF_OFF;
810 else if (chromatTuSize == 8 && contextPtr->pfMdMode == PF_N4)
811 correctedPFMode = PF_N2;
812
813 if (correctedPFMode) {
814 PfZeroOutUselessQuadrants(
815 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCr)[tuCbOriginIndex]),
816 candidateBuffer->reconCoeffPtr->strideCr,
817 (chromatTuSize >> 1));
818 }
819
820 EstimateInvTransform(
821 &(((EB_S16*)candidateBuffer->reconCoeffPtr->bufferCr)[tuCbOriginIndex]),
822 candidateBuffer->reconCoeffPtr->strideCr,
823 &(((EB_S16*)contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferCr)[tuCbOriginIndex]),
824 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideCr,
825 chromatTuSize,
826 contextPtr->transformInnerArrayPtr,
827 BIT_INCREMENT_8BIT,
828 EB_FALSE,
829 EB_FALSE);
830
831 PictureAddition(
832 &(candidateBuffer->predictionPtr->bufferCr[tuCbOriginIndex]),
833 candidateBuffer->predictionPtr->strideCr,
834 &(((EB_S16*)(contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->bufferCr))[tuCbOriginIndex]),
835 contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr->strideCr,
836 &(candidateBuffer->reconPtr->bufferCr[tuCbOriginIndex]),
837 candidateBuffer->reconPtr->strideCr,
838 chromatTuSize,
839 chromatTuSize);
840
841 }
842 else {
843
844 PictureCopy8Bit(
845 candidateBuffer->predictionPtr,
846 tuOriginIndex,
847 tuCbOriginIndex,
848 candidateBuffer->reconPtr,
849 tuOriginIndex,
850 tuCbOriginIndex,
851 tuSize,
852 tuSize,
853 chromatTuSize,
854 chromatTuSize,
855 PICTURE_BUFFER_DESC_Cr_FLAG);
856 }
857 }
858
859 }
860
861 ++tuItr;
862 tuIndex = tuIndexList[tuStatPtr->depth][tuItr];
863
864 } while (tuItr < tuCount);
865
866 }
867
868 //****************************************
869 // ************ CuFullDistortionFastTuMode ****************
870 //****************************************/
CuFullDistortionFastTuMode_R(EbPictureBufferDesc_t * inputPicturePtr,EB_U32 inputCbOriginIndex,LargestCodingUnit_t * lcuPtr,ModeDecisionCandidateBuffer_t * candidateBuffer,ModeDecisionContext_t * contextPtr,ModeDecisionCandidate_t * candidatePtr,const CodedUnitStats_t * cuStatsPtr,EB_U64 cbFullDistortion[DIST_CALC_TOTAL],EB_U64 crFullDistortion[DIST_CALC_TOTAL],EB_U32 countNonZeroCoeffs[3][MAX_NUM_OF_TU_PER_CU],EB_U32 componentMask,EB_U64 * cbCoeffBits,EB_U64 * crCoeffBits)871 void CuFullDistortionFastTuMode_R (
872 EbPictureBufferDesc_t *inputPicturePtr,
873 EB_U32 inputCbOriginIndex,
874 LargestCodingUnit_t *lcuPtr,
875 ModeDecisionCandidateBuffer_t *candidateBuffer,
876 ModeDecisionContext_t *contextPtr ,
877 ModeDecisionCandidate_t *candidatePtr,
878 const CodedUnitStats_t *cuStatsPtr,
879 EB_U64 cbFullDistortion[DIST_CALC_TOTAL] ,
880 EB_U64 crFullDistortion[DIST_CALC_TOTAL] ,
881 EB_U32 countNonZeroCoeffs[3][MAX_NUM_OF_TU_PER_CU],
882 EB_U32 componentMask,
883 EB_U64 *cbCoeffBits,
884 EB_U64 *crCoeffBits)
885 {
886 (void)lcuPtr;
887
888 EB_U64 yTuCoeffBits;
889 EB_U64 cbTuCoeffBits;
890 EB_U64 crTuCoeffBits;
891 EB_U32 tuOriginIndex;
892 EB_U32 tuOriginX;
893 EB_U32 tuOriginY;
894 EB_U32 currentTuIndex;
895 EB_U32 chromaShift;
896 EB_U32 tuChromaOriginIndex;
897 EB_U64 tuFullDistortion[3][DIST_CALC_TOTAL];
898 EbPictureBufferDesc_t *transformBuffer;
899 EB_U32 tuTotalCount;
900 EB_U32 tuSize;
901 EB_U32 chromaTuSize;
902 const TransformUnitStats_t *tuStatPtr;
903 EB_U32 tuItr = 0;
904
905 if (cuStatsPtr->size == MAX_LCU_SIZE){
906 currentTuIndex = 1;
907 transformBuffer = contextPtr->transQuantBuffersPtr->tuTransCoeffNxNPtr;
908 tuTotalCount = 4;
909
910 }
911 else{
912 currentTuIndex = 0;
913 transformBuffer = contextPtr->transQuantBuffersPtr->tuTransCoeff2Nx2NPtr;
914 tuTotalCount = 1;
915 }
916
917 do {
918 tuStatPtr = GetTransformUnitStats(currentTuIndex);
919
920 tuOriginX = TU_ORIGIN_ADJUST(cuStatsPtr->originX, cuStatsPtr->size, tuStatPtr->offsetX);
921 tuOriginY = TU_ORIGIN_ADJUST(cuStatsPtr->originY, cuStatsPtr->size, tuStatPtr->offsetY);
922 tuSize = cuStatsPtr->size >> tuStatPtr->depth;
923 chromaTuSize = tuSize == 4 ? tuSize : (tuSize >> 1);
924 tuOriginIndex = tuOriginX + tuOriginY * candidateBuffer->residualQuantCoeffPtr->strideY ;
925 tuChromaOriginIndex = tuSize == 4 ?
926 tuOriginIndex :
927 ((tuOriginX + tuOriginY * candidateBuffer->residualQuantCoeffPtr->strideCb) >> 1);
928
929 // Reset the Bit Costs
930 yTuCoeffBits = 0;
931 cbTuCoeffBits = 0;
932 crTuCoeffBits = 0;
933
934 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK){
935
936 EB_U32 countNonZeroCoeffsAll[3];
937 countNonZeroCoeffsAll[0] = countNonZeroCoeffs[0][currentTuIndex];
938 countNonZeroCoeffsAll[1] = countNonZeroCoeffs[1][currentTuIndex];
939 countNonZeroCoeffsAll[2] = countNonZeroCoeffs[2][currentTuIndex];
940
941 EB_PF_MODE correctedPFMode = contextPtr->pfMdMode;
942
943 if(chromaTuSize == 4)
944 correctedPFMode = PF_OFF;
945 else if(chromaTuSize == 8 && contextPtr->pfMdMode == PF_N4)
946 correctedPFMode = PF_N2;
947
948 if (contextPtr->spatialSseFullLoop == EB_TRUE) {
949
950 tuFullDistortion[1][DIST_CALC_RESIDUAL] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(chromaTuSize) - 2](
951 &(inputPicturePtr->bufferCb[inputCbOriginIndex]),
952 inputPicturePtr->strideCb,
953 &(candidateBuffer->reconPtr->bufferCb[tuChromaOriginIndex]),
954 candidateBuffer->reconPtr->strideCb,
955 chromaTuSize,
956 chromaTuSize);
957
958
959 tuFullDistortion[1][DIST_CALC_PREDICTION] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(chromaTuSize) - 2](
960 &(inputPicturePtr->bufferCb[inputCbOriginIndex]),
961 inputPicturePtr->strideCb,
962 &(candidateBuffer->predictionPtr->bufferCb[tuChromaOriginIndex]),
963 candidateBuffer->predictionPtr->strideCb,
964 chromaTuSize,
965 chromaTuSize);
966
967 tuFullDistortion[2][DIST_CALC_RESIDUAL] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(chromaTuSize) - 2](
968 &(inputPicturePtr->bufferCr[inputCbOriginIndex]),
969 inputPicturePtr->strideCr,
970 &(candidateBuffer->reconPtr->bufferCr[tuChromaOriginIndex]),
971 candidateBuffer->reconPtr->strideCr,
972 chromaTuSize,
973 chromaTuSize);
974
975 tuFullDistortion[2][DIST_CALC_PREDICTION] = SpatialFullDistortionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][Log2f(chromaTuSize) - 2](
976 &(inputPicturePtr->bufferCr[inputCbOriginIndex]),
977 inputPicturePtr->strideCr,
978 &(candidateBuffer->predictionPtr->bufferCr[tuChromaOriginIndex]),
979 candidateBuffer->predictionPtr->strideCr,
980 chromaTuSize,
981 chromaTuSize);
982
983 }
984 else {
985 // *Full Distortion (SSE)
986 // *Note - there are known issues with how this distortion metric is currently
987 // calculated. The amount of scaling between the two arrays is not
988 // equivalent.
989 PictureFullDistortion_R(
990 transformBuffer,
991 tuOriginIndex,
992 tuChromaOriginIndex,
993 candidateBuffer->reconCoeffPtr,
994 (tuSize >> contextPtr->pfMdMode),
995 (chromaTuSize >> correctedPFMode),
996 PICTURE_BUFFER_DESC_CHROMA_MASK,//componentMask,
997 tuFullDistortion[0],
998 tuFullDistortion[1],
999 tuFullDistortion[2],
1000 countNonZeroCoeffsAll,
1001 candidateBuffer->candidatePtr->type);
1002
1003
1004 chromaShift = 2 * (7 - Log2f(chromaTuSize));
1005 tuFullDistortion[1][DIST_CALC_RESIDUAL] = (tuFullDistortion[1][DIST_CALC_RESIDUAL] + (EB_U64)(1 << (chromaShift - 1))) >> chromaShift;
1006 tuFullDistortion[1][DIST_CALC_PREDICTION] = (tuFullDistortion[1][DIST_CALC_PREDICTION] + (EB_U64)(1 << (chromaShift - 1))) >> chromaShift;
1007 tuFullDistortion[2][DIST_CALC_RESIDUAL] = (tuFullDistortion[2][DIST_CALC_RESIDUAL] + (EB_U64)(1 << (chromaShift - 1))) >> chromaShift;
1008 tuFullDistortion[2][DIST_CALC_PREDICTION] = (tuFullDistortion[2][DIST_CALC_PREDICTION] + (EB_U64)(1 << (chromaShift - 1))) >> chromaShift;
1009
1010 }
1011
1012 TuEstimateCoeffBits_R(
1013 tuOriginIndex,
1014 tuChromaOriginIndex,
1015 PICTURE_BUFFER_DESC_CHROMA_MASK,//componentMask,
1016 contextPtr->coeffEstEntropyCoderPtr,
1017 candidateBuffer->residualQuantCoeffPtr,
1018 countNonZeroCoeffs[0][currentTuIndex],
1019 countNonZeroCoeffs[1][currentTuIndex],
1020 countNonZeroCoeffs[2][currentTuIndex],
1021 &yTuCoeffBits,
1022 &cbTuCoeffBits,
1023 &crTuCoeffBits,
1024 candidateBuffer->candidatePtr->transformSize,
1025 candidateBuffer->candidatePtr->transformChromaSize,
1026 candidateBuffer->candidatePtr->type,
1027 candidateBuffer->candidatePtr->intraLumaMode,
1028 EB_INTRA_CHROMA_DM,
1029 correctedPFMode,
1030 contextPtr->coeffCabacUpdate,
1031 &(candidateBuffer->candBuffCoeffCtxModel),
1032 contextPtr->CabacCost);
1033
1034 TuCalcCost(
1035 contextPtr->cuSize,
1036 candidatePtr,
1037 currentTuIndex,
1038 tuSize,
1039 chromaTuSize,
1040 countNonZeroCoeffs[0][currentTuIndex],
1041 countNonZeroCoeffs[1][currentTuIndex],
1042 countNonZeroCoeffs[2][currentTuIndex],
1043 tuFullDistortion[0],
1044 tuFullDistortion[1],
1045 tuFullDistortion[2],
1046 PICTURE_BUFFER_DESC_CHROMA_MASK,//componentMask,
1047 &yTuCoeffBits,
1048 &cbTuCoeffBits,
1049 &crTuCoeffBits,
1050 contextPtr->qp,
1051 contextPtr->fullLambda,
1052 contextPtr->fullChromaLambda);
1053
1054 *cbCoeffBits += cbTuCoeffBits;
1055 *crCoeffBits += crTuCoeffBits;
1056 cbFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[1][DIST_CALC_RESIDUAL];
1057 crFullDistortion[DIST_CALC_RESIDUAL] += tuFullDistortion[2][DIST_CALC_RESIDUAL];
1058 cbFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[1][DIST_CALC_PREDICTION];
1059 crFullDistortion[DIST_CALC_PREDICTION] += tuFullDistortion[2][DIST_CALC_PREDICTION];
1060
1061 }
1062
1063 ++tuItr;
1064 currentTuIndex = tuIndexList[tuStatPtr->depth][tuItr];
1065
1066 } while (tuItr < tuTotalCount);
1067 }
1068
1069
ExitInterDepthDecision(ModeDecisionContext_t * contextPtr,EB_U32 leafIndex,LargestCodingUnit_t * tbPtr,EB_U32 lcuAddr,EB_U32 tbOriginX,EB_U32 tbOriginY,EB_U64 fullLambda,MdRateEstimationContext_t * mdRateEstimationPtr,PictureControlSet_t * pictureControlSetPtr)1070 EB_U32 ExitInterDepthDecision(
1071 ModeDecisionContext_t *contextPtr,
1072 EB_U32 leafIndex,
1073 LargestCodingUnit_t *tbPtr,
1074 EB_U32 lcuAddr,
1075 EB_U32 tbOriginX,
1076 EB_U32 tbOriginY,
1077 EB_U64 fullLambda,
1078 MdRateEstimationContext_t *mdRateEstimationPtr,
1079 PictureControlSet_t *pictureControlSetPtr)
1080 {
1081 EB_U32 lastCuIndex;
1082 EB_U32 leftCuIndex;
1083 EB_U32 topCuIndex;
1084 EB_U32 topLeftCuIndex;
1085 EB_U32 depthZeroCandidateCuIndex;
1086 EB_U32 depthOneCandidateCuIndex = leafIndex;
1087 EB_U32 depthTwoCandidateCuIndex = leafIndex;
1088 EB_U64 depthNRate = 0;
1089 EB_U64 depthNPlusOneRate = 0;
1090 EB_U64 depthNCost = 0;
1091 EB_U64 depthNPlusOneCost = 0;
1092 EB_U32 cuOriginX;
1093 EB_U32 cuOriginY;
1094 EB_U32 tbMaxDepth = ((SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr)->maxLcuDepth;
1095
1096 EncodeContext_t *encodeContextPtr = ((SequenceControlSet_t*)(pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr))->encodeContextPtr;
1097 SequenceControlSet_t *sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr;
1098 const CodedUnitStats_t *curCuStatsPtr;
1099 const CodedUnitStats_t *depthTwoCuStatsPtr;
1100 const CodedUnitStats_t *depthOneCuStatsPtr;
1101 const CodedUnitStats_t *depthZeroCuStatsPtr;
1102
1103 (void)lcuAddr;
1104 lastCuIndex = leafIndex;
1105 curCuStatsPtr = GetCodedUnitStats(leafIndex);
1106 cuOriginX = tbOriginX + curCuStatsPtr->originX;
1107 cuOriginY = tbOriginY + curCuStatsPtr->originY;
1108
1109
1110
1111 //Parent is winner, update its cost, and trigger and inter-depth check-point.
1112 EB_U64 SplitRate = 0;
1113 SplitFlagRate(
1114 contextPtr,
1115 tbPtr->codedLeafArrayPtr[leafIndex],
1116 0,
1117 &SplitRate,
1118 contextPtr->fullLambda,
1119 contextPtr->mdRateEstimationPtr,
1120 sequenceControlSetPtr->maxLcuDepth);
1121
1122 contextPtr->mdLocalCuUnit[leafIndex].cost += SplitRate;
1123
1124 if (curCuStatsPtr->depth == 0) {
1125 contextPtr->groupOf16x16BlocksCount = 0;
1126 }
1127 else if (curCuStatsPtr->depth == 1) {
1128 contextPtr->groupOf16x16BlocksCount++;
1129 contextPtr->groupOf8x8BlocksCount = 0;
1130 }
1131 else if (curCuStatsPtr->depth == 2) {
1132 contextPtr->groupOf8x8BlocksCount++;
1133 }
1134
1135
1136 /*** Stage 0: Inter depth decision: depth 2 vs depth 3 ***/
1137
1138 // Walks to the last coded 8x8 block for merging
1139 if ((GROUP_OF_4_8x8_BLOCKS(cuOriginX, cuOriginY))) {
1140
1141 depthTwoCandidateCuIndex = leafIndex - DEPTH_THREE_STEP - DEPTH_THREE_STEP - DEPTH_THREE_STEP - 1;
1142
1143 contextPtr->groupOf8x8BlocksCount++;
1144
1145 // From the last coded cu index, get the indices of the left, top, and top left cus
1146 leftCuIndex = leafIndex - DEPTH_THREE_STEP;
1147 topCuIndex = leftCuIndex - DEPTH_THREE_STEP;
1148 topLeftCuIndex = topCuIndex - DEPTH_THREE_STEP;
1149
1150 // From the top left index, get the index of the candidate pu for merging
1151 depthTwoCandidateCuIndex = topLeftCuIndex - 1;
1152
1153 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1154 // This needs to be done in the case that the N block was initially not calculated.
1155
1156 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1157 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1158 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1159 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1160
1161 // Compute depth N cost
1162 SplitFlagRate(
1163 contextPtr,
1164 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1165 0,
1166 &depthNRate,
1167 fullLambda,
1168 mdRateEstimationPtr,
1169 tbMaxDepth);
1170 if (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].testedCuFlag == EB_FALSE)
1171 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = MAX_CU_COST;
1172
1173 depthNCost = contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost + depthNRate;
1174 // Compute depth N+1 cost
1175 SplitFlagRate(
1176 contextPtr,
1177 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1178 1,
1179 &depthNPlusOneRate,
1180 fullLambda,
1181 mdRateEstimationPtr,
1182 tbMaxDepth);
1183 depthNPlusOneCost =
1184 contextPtr->mdLocalCuUnit[leafIndex].cost +
1185 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1186 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1187 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1188 depthNPlusOneRate;
1189
1190
1191 // Inter depth comparison: depth 2 vs depth 3
1192 if (depthNCost <= depthNPlusOneCost){
1193
1194 // If the cost is low enough to warrant not spliting further:
1195 // 1. set the split flag of the candidate pu for merging to false
1196 // 2. update the last pu index
1197 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex]->splitFlag = EB_FALSE;
1198 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNCost;
1199 lastCuIndex = depthTwoCandidateCuIndex;
1200 }
1201 else {
1202 // If the cost is not low enough:
1203 // update the cost of the candidate pu for merging
1204 // this update is required for the next inter depth decision
1205 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNPlusOneCost;
1206 }
1207
1208
1209 }
1210
1211 // Stage 1: Inter depth decision: depth 1 vs depth 2
1212
1213 // Walks to the last coded 16x16 block for merging
1214 depthTwoCuStatsPtr = GetCodedUnitStats(depthTwoCandidateCuIndex);
1215 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
1216 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
1217
1218 if (GROUP_OF_4_16x16_BLOCKS(cuOriginX, cuOriginY) &&
1219 (contextPtr->groupOf8x8BlocksCount == 4)){
1220
1221
1222 depthOneCandidateCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP - DEPTH_TWO_STEP - DEPTH_TWO_STEP - 1;
1223
1224 contextPtr->groupOf8x8BlocksCount = 0;
1225 contextPtr->groupOf16x16BlocksCount++;
1226
1227 // From the last coded pu index, get the indices of the left, top, and top left pus
1228 leftCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP;
1229 topCuIndex = leftCuIndex - DEPTH_TWO_STEP;
1230 topLeftCuIndex = topCuIndex - DEPTH_TWO_STEP;
1231
1232 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1233 // This needs to be done in the case that the N block was initially not calculated.
1234
1235 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1236 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1237 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1238 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1239
1240 // From the top left index, get the index of the candidate pu for merging
1241 depthOneCandidateCuIndex = topLeftCuIndex - 1;
1242
1243 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
1244 if (depthOneCuStatsPtr->depth == 1) {
1245
1246 // Compute depth N cost
1247 SplitFlagRate(
1248 contextPtr,
1249 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1250 0,
1251 &depthNRate,
1252 fullLambda,
1253 mdRateEstimationPtr,
1254 tbMaxDepth);
1255 if (contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].testedCuFlag == EB_FALSE)
1256 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = MAX_CU_COST;
1257 depthNCost = contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost + depthNRate;
1258
1259 // Compute depth N+1 cost
1260 SplitFlagRate(
1261 contextPtr,
1262 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1263 1,
1264 &depthNPlusOneRate,
1265 fullLambda,
1266 mdRateEstimationPtr,
1267 tbMaxDepth);
1268 depthNPlusOneCost =
1269 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost +
1270 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1271 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1272 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1273 depthNPlusOneRate;
1274 CHECK_REPORT_ERROR(
1275 (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost != MAX_CU_COST),
1276 encodeContextPtr->appCallbackPtr,
1277 EB_ENC_FL_ERROR4);
1278 CHECK_REPORT_ERROR(
1279 (contextPtr->mdLocalCuUnit[leftCuIndex].cost != MAX_CU_COST),
1280 encodeContextPtr->appCallbackPtr,
1281 EB_ENC_FL_ERROR4);
1282 CHECK_REPORT_ERROR(
1283 (contextPtr->mdLocalCuUnit[topCuIndex].cost != MAX_CU_COST),
1284 encodeContextPtr->appCallbackPtr,
1285 EB_ENC_FL_ERROR4);
1286 CHECK_REPORT_ERROR(
1287 (contextPtr->mdLocalCuUnit[topLeftCuIndex].cost != MAX_CU_COST),
1288 encodeContextPtr->appCallbackPtr,
1289 EB_ENC_FL_ERROR4);
1290
1291
1292 // Inter depth comparison: depth 1 vs depth 2
1293 if (depthNCost <= depthNPlusOneCost){
1294
1295 // If the cost is low enough to warrant not spliting further:
1296 // 1. set the split flag of the candidate pu for merging to false
1297 // 2. update the last pu index
1298 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex]->splitFlag = EB_FALSE;
1299 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNCost;
1300 lastCuIndex = depthOneCandidateCuIndex;
1301 }
1302 else {
1303 // If the cost is not low enough:
1304 // update the cost of the candidate pu for merging
1305 // this update is required for the next inter depth decision
1306 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNPlusOneCost;
1307 }
1308
1309
1310 }
1311 }
1312
1313 // Stage 2: Inter depth decision: depth 0 vs depth 1
1314
1315 // Walks to the last coded 32x32 block for merging
1316 // Stage 2 isn't performed in I slices since the abcense of 64x64 candidates
1317 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
1318 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
1319 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
1320 if ((pictureControlSetPtr->sliceType == EB_P_PICTURE || pictureControlSetPtr->sliceType == EB_B_PICTURE)
1321 && GROUP_OF_4_32x32_BLOCKS(cuOriginX, cuOriginY) &&
1322 (contextPtr->groupOf16x16BlocksCount == 4)) {
1323
1324 depthZeroCandidateCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP - DEPTH_ONE_STEP - DEPTH_ONE_STEP - 1;
1325
1326 contextPtr->groupOf16x16BlocksCount = 0;
1327
1328 // From the last coded pu index, get the indices of the left, top, and top left pus
1329 leftCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP;
1330 topCuIndex = leftCuIndex - DEPTH_ONE_STEP;
1331 topLeftCuIndex = topCuIndex - DEPTH_ONE_STEP;
1332
1333 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1334 // This needs to be done in the case that the N block was initially not calculated.
1335
1336 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1337 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1338 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1339 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1340
1341 // From the top left index, get the index of the candidate pu for merging
1342 depthZeroCandidateCuIndex = topLeftCuIndex - 1;
1343
1344 depthZeroCuStatsPtr = GetCodedUnitStats(depthZeroCandidateCuIndex);
1345 if (depthZeroCuStatsPtr->depth == 0) {
1346
1347 // Compute depth N cost
1348 SplitFlagRate(
1349 contextPtr,
1350 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
1351 0,
1352 &depthNRate,
1353 fullLambda,
1354 mdRateEstimationPtr,
1355 tbMaxDepth);
1356 if (contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].testedCuFlag == EB_FALSE)
1357 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost = MAX_CU_COST;
1358 depthNCost = contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost + depthNRate;
1359 // Compute depth N+1 cost
1360 SplitFlagRate(
1361 contextPtr,
1362 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
1363 1,
1364 &depthNPlusOneRate,
1365 fullLambda,
1366 mdRateEstimationPtr,
1367 tbMaxDepth);
1368 depthNPlusOneCost =
1369 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost +
1370 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1371 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1372 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1373 depthNPlusOneRate;
1374
1375 // Inter depth comparison: depth 0 vs depth 1
1376 if (depthNCost <= depthNPlusOneCost){
1377
1378 // If the cost is low enough to warrant not spliting further:
1379 // 1. set the split flag of the candidate pu for merging to false
1380 // 2. update the last pu index
1381 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex]->splitFlag = EB_FALSE;
1382 lastCuIndex = depthZeroCandidateCuIndex;
1383 }
1384
1385
1386 }
1387 }
1388
1389 return lastCuIndex;
1390 }
1391
StopSplitCondition(SequenceControlSet_t * sequenceControlSetPtr,PictureControlSet_t * pictureControlSetPtr,ModeDecisionContext_t * contextPtr,const CodedUnitStats_t * curCuStatsPtr,EB_U32 lcuAddr,EB_U32 leafIndex)1392 EB_BOOL StopSplitCondition(
1393 SequenceControlSet_t *sequenceControlSetPtr,
1394 PictureControlSet_t *pictureControlSetPtr,
1395 ModeDecisionContext_t *contextPtr,
1396 const CodedUnitStats_t *curCuStatsPtr,
1397 EB_U32 lcuAddr,
1398 EB_U32 leafIndex)
1399 {
1400
1401 LcuParams_t *lcuParams = &sequenceControlSetPtr->lcuParamsArray[lcuAddr];
1402
1403 EB_BOOL stopSplitFlag = EB_TRUE;
1404
1405 if ( pictureControlSetPtr->ParentPcsPtr->depthMode == PICT_FULL85_DEPTH_MODE ||
1406 pictureControlSetPtr->ParentPcsPtr->depthMode == PICT_FULL84_DEPTH_MODE ||
1407 (pictureControlSetPtr->ParentPcsPtr->depthMode == PICT_LCU_SWITCH_DEPTH_MODE && (pictureControlSetPtr->ParentPcsPtr->lcuMdModeArray[lcuAddr] == LCU_FULL85_DEPTH_MODE || pictureControlSetPtr->ParentPcsPtr->lcuMdModeArray[lcuAddr] == LCU_FULL84_DEPTH_MODE || pictureControlSetPtr->ParentPcsPtr->lcuMdModeArray[lcuAddr] == LCU_AVC_DEPTH_MODE))
1408 ) {
1409
1410 stopSplitFlag = EB_FALSE;
1411 }
1412 else if (pictureControlSetPtr->temporalLayerIndex == 0) {
1413 stopSplitFlag = EB_FALSE;
1414 }
1415 else{
1416 if (sequenceControlSetPtr->staticConfig.qp >= 20 &&
1417 pictureControlSetPtr->sliceType != EB_I_PICTURE &&
1418 pictureControlSetPtr->temporalLayerIndex == 0 &&
1419 pictureControlSetPtr->ParentPcsPtr->logoPicFlag &&
1420 pictureControlSetPtr->ParentPcsPtr->edgeResultsPtr[lcuAddr].edgeBlockNum) {
1421
1422 stopSplitFlag = EB_FALSE;
1423 }
1424
1425
1426 if (stopSplitFlag != EB_FALSE)
1427 {
1428
1429 EB_U32 lcuEdgeFlag = pictureControlSetPtr->ParentPcsPtr->edgeResultsPtr[lcuAddr].edgeBlockNum == 0 ? 0 : 1;
1430 EB_U64 d0Th;
1431 EB_U64 d1Th;
1432 EB_U64 d2Th;
1433
1434 d0Th = depth0Th[lcuEdgeFlag][pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->temporalLayerIndex];
1435 d1Th = depth1Th[lcuEdgeFlag][pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->temporalLayerIndex];
1436 d2Th = depth2Th[lcuEdgeFlag][pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->temporalLayerIndex];
1437
1438 EB_BOOL interSlice = (pictureControlSetPtr->sliceType == EB_P_PICTURE) || (pictureControlSetPtr->sliceType == EB_B_PICTURE) ? EB_TRUE : EB_FALSE;
1439 EB_BOOL stopAtDepth0 = ((curCuStatsPtr->depth == 0) && (contextPtr->mdLocalCuUnit[leafIndex].fullDistortion < d0Th)) ? EB_TRUE : EB_FALSE;
1440 EB_BOOL stopAtDepth1 = ((curCuStatsPtr->depth == 1) && (contextPtr->mdLocalCuUnit[leafIndex].fullDistortion < d1Th)) ? EB_TRUE : EB_FALSE;
1441 EB_BOOL stopAtDepth2 = ((curCuStatsPtr->depth == 2) && (contextPtr->mdLocalCuUnit[leafIndex].fullDistortion < d2Th)) ? EB_TRUE : EB_FALSE;
1442
1443 stopSplitFlag = (interSlice && (stopAtDepth0 || stopAtDepth1 || stopAtDepth2)) ? EB_TRUE : EB_FALSE;
1444
1445 if (!lcuParams->isCompleteLcu ||
1446 pictureControlSetPtr->ParentPcsPtr->lcuIsolatedNonHomogeneousAreaArray[lcuAddr] ||
1447 (sequenceControlSetPtr->inputResolution < INPUT_SIZE_4K_RANGE && pictureControlSetPtr->lcuPtrArray[lcuAddr]->auraStatus == AURA_STATUS_1)) {
1448
1449 stopSplitFlag = EB_FALSE;
1450 }
1451
1452 }
1453
1454 }
1455 return stopSplitFlag;
1456 }
1457
1458 /**********************************************
1459 * Inter Depth Split Decision
1460 **********************************************/
ProductPerformInterDepthDecision(ModeDecisionContext_t * contextPtr,EB_U32 leafIndex,LargestCodingUnit_t * tbPtr,EB_U32 lcuAddr,EB_U32 tbOriginX,EB_U32 tbOriginY,EB_U64 fullLambda,MdRateEstimationContext_t * mdRateEstimationPtr,PictureControlSet_t * pictureControlSetPtr)1461 EB_U32 ProductPerformInterDepthDecision(
1462 ModeDecisionContext_t *contextPtr,
1463 EB_U32 leafIndex,
1464 LargestCodingUnit_t *tbPtr,
1465 EB_U32 lcuAddr,
1466 EB_U32 tbOriginX,
1467 EB_U32 tbOriginY,
1468 EB_U64 fullLambda,
1469 MdRateEstimationContext_t *mdRateEstimationPtr,
1470 PictureControlSet_t *pictureControlSetPtr)
1471 {
1472 EB_U32 lastCuIndex;
1473 EB_U32 leftCuIndex;
1474 EB_U32 topCuIndex;
1475 EB_U32 topLeftCuIndex;
1476 EB_U32 depthZeroCandidateCuIndex;
1477 EB_U32 depthOneCandidateCuIndex = leafIndex;
1478 EB_U32 depthTwoCandidateCuIndex = leafIndex;
1479 EB_U64 depthNRate = 0;
1480 EB_U64 depthNPlusOneRate = 0;
1481 EB_U64 depthNCost = 0;
1482 EB_U64 depthNPlusOneCost = 0;
1483 EB_U32 cuOriginX;
1484 EB_U32 cuOriginY;
1485
1486 EB_U32 tbMaxDepth = ((SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr)->maxLcuDepth;
1487 EB_BOOL stopSplitFlag ;
1488 EB_BOOL lastDepthFlag = tbPtr->codedLeafArrayPtr[leafIndex]->splitFlag == EB_FALSE ? EB_TRUE : EB_FALSE;
1489 EncodeContext_t *encodeContextPtr = ((SequenceControlSet_t*)(pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr))->encodeContextPtr;
1490 SequenceControlSet_t *sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr;
1491 const CodedUnitStats_t *curCuStatsPtr;
1492 const CodedUnitStats_t *depthTwoCuStatsPtr;
1493 const CodedUnitStats_t *depthOneCuStatsPtr;
1494 const CodedUnitStats_t *depthZeroCuStatsPtr;
1495
1496 lastCuIndex = leafIndex;
1497 curCuStatsPtr = GetCodedUnitStats(leafIndex);
1498 cuOriginX = tbOriginX + curCuStatsPtr->originX;
1499 cuOriginY = tbOriginY + curCuStatsPtr->originY;
1500 EB_U8 interDepthW12 = 0;
1501 EB_U8 interDepthW01 = 0;
1502
1503 stopSplitFlag = StopSplitCondition(
1504 sequenceControlSetPtr,
1505 pictureControlSetPtr,
1506 contextPtr,
1507 curCuStatsPtr,
1508 lcuAddr,
1509 leafIndex);
1510
1511 if (lastDepthFlag || stopSplitFlag) {
1512 tbPtr->codedLeafArrayPtr[leafIndex]->splitFlag = EB_FALSE;
1513
1514
1515 if (curCuStatsPtr->depth == 1) {
1516 contextPtr->groupOf16x16BlocksCount ++;
1517 } else if (curCuStatsPtr->depth == 2) {
1518 contextPtr->groupOf8x8BlocksCount ++;
1519 }
1520 }
1521
1522 /*** Stage 0: Inter depth decision: depth 2 vs depth 3 ***/
1523
1524 // Walks to the last coded 8x8 block for merging
1525 if ((GROUP_OF_4_8x8_BLOCKS(cuOriginX, cuOriginY))) {
1526
1527 depthTwoCandidateCuIndex = leafIndex - DEPTH_THREE_STEP - DEPTH_THREE_STEP - DEPTH_THREE_STEP - 1;
1528
1529 contextPtr->groupOf8x8BlocksCount ++;
1530
1531 // From the last coded cu index, get the indices of the left, top, and top left cus
1532 leftCuIndex = leafIndex - DEPTH_THREE_STEP;
1533 topCuIndex = leftCuIndex - DEPTH_THREE_STEP;
1534 topLeftCuIndex = topCuIndex - DEPTH_THREE_STEP;
1535
1536 // From the top left index, get the index of the candidate pu for merging
1537 depthTwoCandidateCuIndex = topLeftCuIndex - 1;
1538
1539 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1540 // This needs to be done in the case that the N block was initially not calculated.
1541
1542 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1543 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1544 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1545 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1546
1547 // Compute depth N cost
1548 SplitFlagRate(
1549 contextPtr,
1550 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1551 0,
1552 &depthNRate,
1553 fullLambda,
1554 mdRateEstimationPtr,
1555 tbMaxDepth);
1556 if (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].testedCuFlag == EB_FALSE)
1557 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = MAX_CU_COST;
1558
1559 depthNCost = contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost + depthNRate;
1560
1561 // Compute depth N+1 cost
1562 SplitFlagRate(
1563 contextPtr,
1564 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1565 1,
1566 &depthNPlusOneRate,
1567 fullLambda,
1568 mdRateEstimationPtr,
1569 tbMaxDepth);
1570 depthNPlusOneCost =
1571 contextPtr->mdLocalCuUnit[leafIndex].cost +
1572 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1573 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1574 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1575 depthNPlusOneRate;
1576 // Inter depth comparison: depth 2 vs depth 3
1577 if (depthNCost <= depthNPlusOneCost){
1578
1579 // If the cost is low enough to warrant not spliting further:
1580 // 1. set the split flag of the candidate pu for merging to false
1581 // 2. update the last pu index
1582 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex]->splitFlag = EB_FALSE;
1583 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNCost;
1584 lastCuIndex = depthTwoCandidateCuIndex;
1585 }
1586 else {
1587 // If the cost is not low enough:
1588 // update the cost of the candidate pu for merging
1589 // this update is required for the next inter depth decision
1590 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNPlusOneCost;
1591 }
1592
1593
1594 }
1595
1596 // Stage 1: Inter depth decision: depth 1 vs depth 2
1597
1598 // Walks to the last coded 16x16 block for merging
1599 depthTwoCuStatsPtr = GetCodedUnitStats(depthTwoCandidateCuIndex);
1600 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
1601 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
1602 if (GROUP_OF_4_16x16_BLOCKS(cuOriginX, cuOriginY) &&
1603 (contextPtr->groupOf8x8BlocksCount == 4 ) ){
1604
1605 depthOneCandidateCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP - DEPTH_TWO_STEP - DEPTH_TWO_STEP - 1;
1606
1607 contextPtr->groupOf8x8BlocksCount = 0;
1608 contextPtr->groupOf16x16BlocksCount ++;
1609
1610 // From the last coded pu index, get the indices of the left, top, and top left pus
1611 leftCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP;
1612 topCuIndex = leftCuIndex - DEPTH_TWO_STEP;
1613 topLeftCuIndex = topCuIndex - DEPTH_TWO_STEP;
1614
1615 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1616 // This needs to be done in the case that the N block was initially not calculated.
1617
1618 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1619 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1620 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1621 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1622
1623 // From the top left index, get the index of the candidate pu for merging
1624 depthOneCandidateCuIndex = topLeftCuIndex - 1;
1625
1626 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
1627 if (depthOneCuStatsPtr->depth == 1) {
1628
1629 // Compute depth N cost
1630 SplitFlagRate(
1631 contextPtr,
1632 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1633 0,
1634 &depthNRate,
1635 fullLambda,
1636 mdRateEstimationPtr,
1637 tbMaxDepth);
1638 if (contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].testedCuFlag == EB_FALSE)
1639 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = MAX_CU_COST;
1640 depthNCost = contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost + depthNRate;
1641
1642 // Compute depth N+1 cost
1643 SplitFlagRate(
1644 contextPtr,
1645 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1646 1,
1647 &depthNPlusOneRate,
1648 fullLambda,
1649 mdRateEstimationPtr,
1650 tbMaxDepth);
1651 depthNPlusOneCost =
1652 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost +
1653 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1654 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1655 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1656 depthNPlusOneRate;
1657 CHECK_REPORT_ERROR(
1658 (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost != MAX_CU_COST),
1659 encodeContextPtr->appCallbackPtr,
1660 EB_ENC_FL_ERROR4);
1661 CHECK_REPORT_ERROR(
1662 (contextPtr->mdLocalCuUnit[leftCuIndex].cost != MAX_CU_COST),
1663 encodeContextPtr->appCallbackPtr,
1664 EB_ENC_FL_ERROR4);
1665 CHECK_REPORT_ERROR(
1666 (contextPtr->mdLocalCuUnit[topCuIndex].cost != MAX_CU_COST),
1667 encodeContextPtr->appCallbackPtr,
1668 EB_ENC_FL_ERROR4);
1669 CHECK_REPORT_ERROR(
1670 (contextPtr->mdLocalCuUnit[topLeftCuIndex].cost != MAX_CU_COST),
1671 encodeContextPtr->appCallbackPtr,
1672 EB_ENC_FL_ERROR4);
1673
1674 if (depthNPlusOneCost < MAX_CU_COST)
1675 depthNPlusOneCost = depthNPlusOneCost + ((EB_S64)depthNPlusOneCost*interDepthW12) / 100;
1676
1677 // Inter depth comparison: depth 1 vs depth 2
1678 if (depthNCost <= depthNPlusOneCost){
1679
1680 // If the cost is low enough to warrant not spliting further:
1681 // 1. set the split flag of the candidate pu for merging to false
1682 // 2. update the last pu index
1683 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex]->splitFlag = EB_FALSE;
1684 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNCost;
1685 lastCuIndex = depthOneCandidateCuIndex;
1686 }
1687 else {
1688 // If the cost is not low enough:
1689 // update the cost of the candidate pu for merging
1690 // this update is required for the next inter depth decision
1691 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNPlusOneCost;
1692 }
1693
1694
1695 }
1696 }
1697
1698 // Stage 2: Inter depth decision: depth 0 vs depth 1
1699
1700 // Walks to the last coded 32x32 block for merging
1701 // Stage 2 isn't performed in I slices since the abcense of 64x64 candidates
1702 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
1703 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
1704 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
1705 if ((pictureControlSetPtr->sliceType == EB_P_PICTURE || pictureControlSetPtr->sliceType == EB_B_PICTURE )
1706 && GROUP_OF_4_32x32_BLOCKS(cuOriginX, cuOriginY) &&
1707 (contextPtr->groupOf16x16BlocksCount == 4)) {
1708
1709 depthZeroCandidateCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP - DEPTH_ONE_STEP - DEPTH_ONE_STEP - 1;
1710
1711 contextPtr->groupOf16x16BlocksCount = 0;
1712
1713 // From the last coded pu index, get the indices of the left, top, and top left pus
1714 leftCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP;
1715 topCuIndex = leftCuIndex - DEPTH_ONE_STEP;
1716 topLeftCuIndex = topCuIndex - DEPTH_ONE_STEP;
1717
1718 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1719 // This needs to be done in the case that the N block was initially not calculated.
1720
1721 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1722 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1723 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1724 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1725
1726 // From the top left index, get the index of the candidate pu for merging
1727 depthZeroCandidateCuIndex = topLeftCuIndex - 1;
1728
1729 depthZeroCuStatsPtr = GetCodedUnitStats(depthZeroCandidateCuIndex);
1730 if (depthZeroCuStatsPtr->depth == 0) {
1731
1732 // Compute depth N cost
1733 SplitFlagRate(
1734 contextPtr,
1735 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
1736 0,
1737 &depthNRate,
1738 fullLambda,
1739 mdRateEstimationPtr,
1740 tbMaxDepth);
1741 if (contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].testedCuFlag == EB_FALSE)
1742 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost = MAX_CU_COST;
1743 depthNCost = contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost + depthNRate;
1744 // Compute depth N+1 cost
1745 SplitFlagRate(
1746 contextPtr,
1747 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
1748 1,
1749 &depthNPlusOneRate,
1750 fullLambda,
1751 mdRateEstimationPtr,
1752 tbMaxDepth);
1753 depthNPlusOneCost =
1754 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost +
1755 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1756 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1757 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1758 depthNPlusOneRate;
1759 if (depthNPlusOneCost < MAX_CU_COST)
1760 depthNPlusOneCost = depthNPlusOneCost + ((EB_S64)depthNPlusOneCost*interDepthW01) / 100;
1761
1762 // Inter depth comparison: depth 0 vs depth 1
1763 if (depthNCost <= depthNPlusOneCost){
1764
1765 // If the cost is low enough to warrant not spliting further:
1766 // 1. set the split flag of the candidate pu for merging to false
1767 // 2. update the last pu index
1768 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex]->splitFlag = EB_FALSE;
1769 lastCuIndex = depthZeroCandidateCuIndex;
1770 }
1771
1772
1773 }
1774 }
1775
1776 return lastCuIndex;
1777 }
1778
PillarInterDepthDecision(ModeDecisionContext_t * contextPtr,EB_U32 leafIndex,LargestCodingUnit_t * tbPtr,EB_U32 tbOriginX,EB_U32 tbOriginY,EB_U64 fullLambda,MdRateEstimationContext_t * mdRateEstimationPtr,PictureControlSet_t * pictureControlSetPtr)1779 EB_U32 PillarInterDepthDecision(
1780 ModeDecisionContext_t *contextPtr,
1781 EB_U32 leafIndex,
1782 LargestCodingUnit_t *tbPtr,
1783 EB_U32 tbOriginX,
1784 EB_U32 tbOriginY,
1785 EB_U64 fullLambda,
1786 MdRateEstimationContext_t *mdRateEstimationPtr,
1787 PictureControlSet_t *pictureControlSetPtr)
1788 {
1789 EB_U32 lastCuIndex;
1790 EB_U32 leftCuIndex;
1791 EB_U32 topCuIndex;
1792 EB_U32 topLeftCuIndex;
1793 EB_U32 depthZeroCandidateCuIndex;
1794 EB_U32 depthOneCandidateCuIndex = leafIndex;
1795 EB_U32 depthTwoCandidateCuIndex = leafIndex;
1796 EB_U64 depthNRate = 0;
1797 EB_U64 depthNPlusOneRate = 0;
1798 EB_U64 depthNCost = 0;
1799 EB_U64 depthNPlusOneCost = 0;
1800 EB_U32 cuOriginX;
1801 EB_U32 cuOriginY;
1802
1803 SequenceControlSet_t *sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr;
1804 EncodeContext_t *encodeContextPtr = sequenceControlSetPtr->encodeContextPtr;
1805 EB_U32 tbMaxDepth = sequenceControlSetPtr->maxLcuDepth;
1806 EB_BOOL lastDepthFlag = tbPtr->codedLeafArrayPtr[leafIndex]->splitFlag == EB_FALSE ? EB_TRUE : EB_FALSE;
1807
1808 const CodedUnitStats_t *curCuStatsPtr;
1809 const CodedUnitStats_t *depthTwoCuStatsPtr;
1810 const CodedUnitStats_t *depthOneCuStatsPtr;
1811 const CodedUnitStats_t *depthZeroCuStatsPtr;
1812
1813 lastCuIndex = leafIndex;
1814 curCuStatsPtr = GetCodedUnitStats(leafIndex);
1815 cuOriginX = tbOriginX + curCuStatsPtr->originX;
1816 cuOriginY = tbOriginY + curCuStatsPtr->originY;
1817 EB_U8 interDepthW12 = 0;
1818 EB_U8 interDepthW01 = 0;
1819
1820 if (lastDepthFlag) {
1821 tbPtr->codedLeafArrayPtr[leafIndex]->splitFlag = EB_FALSE;
1822
1823
1824 if (curCuStatsPtr->depth == 1) {
1825 contextPtr->groupOf16x16BlocksCount++;
1826 }
1827 else if (curCuStatsPtr->depth == 2) {
1828 contextPtr->groupOf8x8BlocksCount++;
1829 }
1830 }
1831
1832 /*** Stage 0: Inter depth decision: depth 2 vs depth 3 ***/
1833
1834 // Walks to the last coded 8x8 block for merging
1835 if ((GROUP_OF_4_8x8_BLOCKS(cuOriginX, cuOriginY))) {
1836
1837 depthTwoCandidateCuIndex = leafIndex - DEPTH_THREE_STEP - DEPTH_THREE_STEP - DEPTH_THREE_STEP - 1;
1838
1839 contextPtr->groupOf8x8BlocksCount++;
1840
1841 // From the last coded cu index, get the indices of the left, top, and top left cus
1842 leftCuIndex = leafIndex - DEPTH_THREE_STEP;
1843 topCuIndex = leftCuIndex - DEPTH_THREE_STEP;
1844 topLeftCuIndex = topCuIndex - DEPTH_THREE_STEP;
1845
1846 // From the top left index, get the index of the candidate pu for merging
1847 depthTwoCandidateCuIndex = topLeftCuIndex - 1;
1848
1849 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1850 // This needs to be done in the case that the N block was initially not calculated.
1851
1852 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1853 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1854 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1855 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1856
1857 // Compute depth N cost
1858 SplitFlagRate(
1859 contextPtr,
1860 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1861 0,
1862 &depthNRate,
1863 fullLambda,
1864 mdRateEstimationPtr,
1865 tbMaxDepth);
1866 if (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].testedCuFlag == EB_FALSE)
1867 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = MAX_CU_COST;
1868
1869 depthNCost = contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost + depthNRate;
1870 // Compute depth N+1 cost
1871 SplitFlagRate(
1872 contextPtr,
1873 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex],
1874 1,
1875 &depthNPlusOneRate,
1876 fullLambda,
1877 mdRateEstimationPtr,
1878 tbMaxDepth);
1879 depthNPlusOneCost =
1880 contextPtr->mdLocalCuUnit[leafIndex].cost +
1881 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1882 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1883 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1884 depthNPlusOneRate;
1885 // Inter depth comparison: depth 2 vs depth 3
1886 if (depthNCost <= depthNPlusOneCost){
1887
1888 // If the cost is low enough to warrant not spliting further:
1889 // 1. set the split flag of the candidate pu for merging to false
1890 // 2. update the last pu index
1891 tbPtr->codedLeafArrayPtr[depthTwoCandidateCuIndex]->splitFlag = EB_FALSE;
1892 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNCost;
1893 lastCuIndex = depthTwoCandidateCuIndex;
1894 }
1895 else {
1896 // If the cost is not low enough:
1897 // update the cost of the candidate pu for merging
1898 // this update is required for the next inter depth decision
1899 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost = depthNPlusOneCost;
1900 }
1901
1902
1903 }
1904
1905 // Stage 1: Inter depth decision: depth 1 vs depth 2
1906
1907 // Walks to the last coded 16x16 block for merging
1908 depthTwoCuStatsPtr = GetCodedUnitStats(depthTwoCandidateCuIndex);
1909 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
1910 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
1911 if (GROUP_OF_4_16x16_BLOCKS(cuOriginX, cuOriginY) &&
1912 (contextPtr->groupOf8x8BlocksCount == 4)){
1913
1914 depthOneCandidateCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP - DEPTH_TWO_STEP - DEPTH_TWO_STEP - 1;
1915
1916 contextPtr->groupOf8x8BlocksCount = 0;
1917 contextPtr->groupOf16x16BlocksCount++;
1918
1919 // From the last coded pu index, get the indices of the left, top, and top left pus
1920 leftCuIndex = depthTwoCandidateCuIndex - DEPTH_TWO_STEP;
1921 topCuIndex = leftCuIndex - DEPTH_TWO_STEP;
1922 topLeftCuIndex = topCuIndex - DEPTH_TWO_STEP;
1923
1924 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
1925 // This needs to be done in the case that the N block was initially not calculated.
1926
1927 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
1928 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
1929 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
1930 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
1931
1932 // From the top left index, get the index of the candidate pu for merging
1933 depthOneCandidateCuIndex = topLeftCuIndex - 1;
1934
1935 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
1936 if (depthOneCuStatsPtr->depth == 1) {
1937
1938 // Compute depth N cost
1939 SplitFlagRate(
1940 contextPtr,
1941 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1942 0,
1943 &depthNRate,
1944 fullLambda,
1945 mdRateEstimationPtr,
1946 tbMaxDepth);
1947 if (contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].testedCuFlag == EB_FALSE)
1948 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = MAX_CU_COST;
1949 depthNCost = contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost + depthNRate;
1950
1951 // Compute depth N+1 cost
1952 SplitFlagRate(
1953 contextPtr,
1954 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex],
1955 1,
1956 &depthNPlusOneRate,
1957 fullLambda,
1958 mdRateEstimationPtr,
1959 tbMaxDepth);
1960 depthNPlusOneCost =
1961 contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost +
1962 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
1963 contextPtr->mdLocalCuUnit[topCuIndex].cost +
1964 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
1965 depthNPlusOneRate;
1966 CHECK_REPORT_ERROR(
1967 (contextPtr->mdLocalCuUnit[depthTwoCandidateCuIndex].cost != MAX_CU_COST),
1968 encodeContextPtr->appCallbackPtr,
1969 EB_ENC_FL_ERROR4);
1970 CHECK_REPORT_ERROR(
1971 (contextPtr->mdLocalCuUnit[leftCuIndex].cost != MAX_CU_COST),
1972 encodeContextPtr->appCallbackPtr,
1973 EB_ENC_FL_ERROR4);
1974 CHECK_REPORT_ERROR(
1975 (contextPtr->mdLocalCuUnit[topCuIndex].cost != MAX_CU_COST),
1976 encodeContextPtr->appCallbackPtr,
1977 EB_ENC_FL_ERROR4);
1978 CHECK_REPORT_ERROR(
1979 (contextPtr->mdLocalCuUnit[topLeftCuIndex].cost != MAX_CU_COST),
1980 encodeContextPtr->appCallbackPtr,
1981 EB_ENC_FL_ERROR4);
1982
1983 if (depthNPlusOneCost < MAX_CU_COST)
1984 depthNPlusOneCost = depthNPlusOneCost + ((EB_S64)depthNPlusOneCost*interDepthW12) / 100;
1985
1986 // Inter depth comparison: depth 1 vs depth 2
1987 if (depthNCost <= depthNPlusOneCost) {
1988 // If the cost is low enough to warrant not spliting further:
1989 // 1. set the split flag of the candidate pu for merging to false
1990 // 2. update the last pu index
1991 tbPtr->codedLeafArrayPtr[depthOneCandidateCuIndex]->splitFlag = EB_FALSE;
1992 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNCost;
1993 lastCuIndex = depthOneCandidateCuIndex;
1994 }
1995 else {
1996 // If the cost is not low enough:
1997 // update the cost of the candidate pu for merging
1998 // this update is required for the next inter depth decision
1999 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost = depthNPlusOneCost;
2000 }
2001
2002
2003 }
2004 }
2005
2006 // Stage 2: Inter depth decision: depth 0 vs depth 1
2007
2008 // Walks to the last coded 32x32 block for merging
2009 // Stage 2 isn't performed in I slices since the abcense of 64x64 candidates
2010 depthOneCuStatsPtr = GetCodedUnitStats(depthOneCandidateCuIndex);
2011 cuOriginX = tbOriginX + depthTwoCuStatsPtr->originX;
2012 cuOriginY = tbOriginY + depthTwoCuStatsPtr->originY;
2013 if ((pictureControlSetPtr->sliceType == EB_P_PICTURE || pictureControlSetPtr->sliceType == EB_B_PICTURE)
2014 && GROUP_OF_4_32x32_BLOCKS(cuOriginX, cuOriginY) &&
2015 (contextPtr->groupOf16x16BlocksCount == 4)) {
2016
2017 depthZeroCandidateCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP - DEPTH_ONE_STEP - DEPTH_ONE_STEP - 1;
2018
2019 contextPtr->groupOf16x16BlocksCount = 0;
2020
2021 // From the last coded pu index, get the indices of the left, top, and top left pus
2022 leftCuIndex = depthOneCandidateCuIndex - DEPTH_ONE_STEP;
2023 topCuIndex = leftCuIndex - DEPTH_ONE_STEP;
2024 topLeftCuIndex = topCuIndex - DEPTH_ONE_STEP;
2025
2026 // Copy the Mode & Depth of the Top-Left N+1 block to the N block for the SplitContext calculation
2027 // This needs to be done in the case that the N block was initially not calculated.
2028
2029 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborMode;
2030 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].leftNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].leftNeighborDepth;
2031 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborMode = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborMode;
2032 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].topNeighborDepth = contextPtr->mdLocalCuUnit[topLeftCuIndex].topNeighborDepth;
2033
2034 // From the top left index, get the index of the candidate pu for merging
2035 depthZeroCandidateCuIndex = topLeftCuIndex - 1;
2036
2037 depthZeroCuStatsPtr = GetCodedUnitStats(depthZeroCandidateCuIndex);
2038 if (depthZeroCuStatsPtr->depth == 0) {
2039
2040 // Compute depth N cost
2041 SplitFlagRate(
2042 contextPtr,
2043 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
2044 0,
2045 &depthNRate,
2046 fullLambda,
2047 mdRateEstimationPtr,
2048 tbMaxDepth);
2049 if (contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].testedCuFlag == EB_FALSE)
2050 contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost = MAX_CU_COST;
2051 depthNCost = contextPtr->mdLocalCuUnit[depthZeroCandidateCuIndex].cost + depthNRate;
2052 // Compute depth N+1 cost
2053 SplitFlagRate(
2054 contextPtr,
2055 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex],
2056 1,
2057 &depthNPlusOneRate,
2058 fullLambda,
2059 mdRateEstimationPtr,
2060 tbMaxDepth);
2061 depthNPlusOneCost =
2062 contextPtr->mdLocalCuUnit[depthOneCandidateCuIndex].cost +
2063 contextPtr->mdLocalCuUnit[leftCuIndex].cost +
2064 contextPtr->mdLocalCuUnit[topCuIndex].cost +
2065 contextPtr->mdLocalCuUnit[topLeftCuIndex].cost +
2066 depthNPlusOneRate;
2067 if (depthNPlusOneCost < MAX_CU_COST)
2068 depthNPlusOneCost = depthNPlusOneCost + ((EB_S64)depthNPlusOneCost*interDepthW01) / 100;
2069
2070 // Inter depth comparison: depth 0 vs depth 1
2071 if (depthNCost <= depthNPlusOneCost){
2072
2073 // If the cost is low enough to warrant not spliting further:
2074 // 1. set the split flag of the candidate pu for merging to false
2075 // 2. update the last pu index
2076 tbPtr->codedLeafArrayPtr[depthZeroCandidateCuIndex]->splitFlag = EB_FALSE;
2077 lastCuIndex = depthZeroCandidateCuIndex;
2078 }
2079
2080 }
2081 }
2082
2083 return lastCuIndex;
2084 }
2085
2086