1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5
6 #include <string.h>
7
8 #include "EbDefinitions.h"
9 #include "EbUtility.h"
10 #include "EbTransformUnit.h"
11 #include "EbRateDistortionCost.h"
12 #include "EbDeblockingFilter.h"
13 #include "EbSampleAdaptiveOffset.h"
14 #include "EbPictureOperators.h"
15
16 #include "EbModeDecisionProcess.h"
17 #include "EbEncDecProcess.h"
18 #include "EbErrorCodes.h"
19 #include "EbErrorHandling.h"
20 #include "EbComputeSAD.h"
21 #include "EbTransforms.h"
22 #include "EbModeDecisionConfiguration.h"
23 #include "emmintrin.h"
24
25 //#define DEBUG_REF_INFO
26 //#define DUMP_RECON
27 #ifdef DUMP_RECON
dump_buf_desc_to_file(EbPictureBufferDesc_t * reconBuffer,const char * filename,int POC)28 static void dump_buf_desc_to_file(EbPictureBufferDesc_t* reconBuffer, const char* filename, int POC)
29 {
30 if (POC == 0) {
31 FILE* tmp=fopen(filename, "w");
32 fclose(tmp);
33 }
34 FILE* fp = fopen(filename, "r+");
35 assert(fp);
36 long descSize = reconBuffer->height * reconBuffer->width; //Luma
37 descSize += 2 * ((reconBuffer->height * reconBuffer->width) >> (3 - reconBuffer->colorFormat));
38 long offset = descSize * POC;
39 fseek(fp, 0, SEEK_END);
40 long fileSize = ftell(fp);
41 if (offset > fileSize) {
42 int count = (offset - fileSize) / descSize;
43 char *tmpBuf = (char*)malloc(descSize);
44 for (int i=0;i<count;i++) {
45 fwrite(tmpBuf, 1, descSize, fp);
46 }
47 free(tmpBuf);
48 }
49 //printf("---Seek to offset %d(POC pos) for writting\n", offset/descSize);
50 fseek(fp, offset, SEEK_SET);
51 assert(ftell(fp) == offset);
52
53 EB_COLOR_FORMAT colorFormat = reconBuffer->colorFormat; // Chroma format
54 EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
55 EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
56 unsigned char* luma_ptr = reconBuffer->bufferY + reconBuffer->strideY*(reconBuffer->originY) + reconBuffer->originX;
57 unsigned char* cb_ptr = reconBuffer->bufferCb + reconBuffer->strideCb*(reconBuffer->originY>>subHeightCMinus1) + (reconBuffer->originX>>subWidthCMinus1);
58 unsigned char* cr_ptr = reconBuffer->bufferCr + reconBuffer->strideCr*(reconBuffer->originY>>subHeightCMinus1) + (reconBuffer->originX>>subWidthCMinus1);
59 for (int i=0;i<reconBuffer->height;i++) {
60 fwrite(luma_ptr, 1, reconBuffer->width, fp);
61 luma_ptr += reconBuffer->strideY;
62 }
63
64 for (int i=0;i<reconBuffer->height>>subHeightCMinus1;i++) {
65 fwrite(cb_ptr, 1, reconBuffer->width>>subWidthCMinus1, fp);
66 cb_ptr += reconBuffer->strideCb;
67 }
68
69 for (int i=0;i<reconBuffer->height>>subHeightCMinus1;i++) {
70 fwrite(cr_ptr, 1, reconBuffer->width>>subWidthCMinus1, fp);
71 cr_ptr += reconBuffer->strideCr;
72 }
73 fseek(fp, 0, SEEK_END);
74 //printf("After write POC %d, filesize %d\n", POC, ftell(fp));
75 fclose(fp);
76
77 }
78 #endif
79
80 #ifdef DEBUG_REF_INFO
dump_left_array(NeighborArrayUnit_t * neighbor,int y_pos,int size)81 static void dump_left_array(NeighborArrayUnit_t *neighbor, int y_pos, int size)
82 {
83 printf("*Dump left array\n");
84 for (int i=0; i<size; i++) {
85 printf("%3u ", neighbor->leftArray[i+y_pos]);
86 }
87 printf("\n----------------------\n");
88 }
89
dump_intra_ref(IntraReferenceSamples_t * ref,int size,int mask)90 static void dump_intra_ref(IntraReferenceSamples_t* ref, int size, int mask)
91 {
92 unsigned char* ptr = NULL;
93 if (mask==0) {
94 ptr = ref->yIntraReferenceArray;
95 } else if (mask == 1) {
96 ptr = ref->cbIntraReferenceArray;
97 } else if (mask ==2) {
98 ptr = ref->crIntraReferenceArray;
99 } else {
100 assert(0);
101 }
102
103 printf("*Dumping intra reference array for component %d\n", mask);
104 for (int i=0; i<size; i++) {
105 printf("%3u ", ptr[i]);
106 }
107 printf("\n----------------------\n");
108 }
109
dump_block_from_desc(int size,EbPictureBufferDesc_t * buf_tmp,int startX,int startY,int componentMask)110 static void dump_block_from_desc(int size, EbPictureBufferDesc_t *buf_tmp, int startX, int startY, int componentMask)
111 {
112 unsigned char* buf=NULL;
113 int stride=0;
114 int bitDepth = buf_tmp->bitDepth;
115 int val=(bitDepth==8)?1:2;
116 EB_COLOR_FORMAT colorFormat = buf_tmp->colorFormat; // Chroma format
117 EB_U16 subWidthCMinus1 = (colorFormat==EB_YUV444?1:2)-1;
118 EB_U16 subHeightCMinus1 = (colorFormat>=EB_YUV422?1:2)-1;
119 if (componentMask ==0) {
120 buf=buf_tmp->bufferY;
121 stride=buf_tmp->strideY;
122 subWidthCMinus1=0;
123 subHeightCMinus1=0;
124 } else if (componentMask == 1) {
125 buf=buf_tmp->bufferCb;
126 stride=buf_tmp->strideCb;
127 } else if (componentMask == 2) {
128 buf=buf_tmp->bufferCr;
129 stride=buf_tmp->strideCr;
130 } else {
131 assert(0);
132 }
133
134 int offset=((stride*(buf_tmp->originY+startY))>>subHeightCMinus1) +((startX+buf_tmp->originX)>>subWidthCMinus1);
135 printf("bitDepth is %d, dump block size %d at offset %d, (%d, %d), component is %s\n",
136 bitDepth, size, offset, startX, startY, componentMask==0?"luma":(componentMask==1?"Cb":"Cr"));
137 unsigned char* start_tmp=buf+offset*val;
138 for (int i=0;i<size;i++) {
139 for (int j=0;j<size+1;j++) {
140 if (j==size) {
141 printf("|||");
142 } else if (j%4 == 0) {
143 printf("|");
144 }
145
146 if (bitDepth == 8) {
147 printf("%4u ", start_tmp[j]);
148 } else if (bitDepth == 16) {
149 printf("%4d ", *((EB_S16*)start_tmp + j));
150 } else {
151 printf("bitDepth is %d\n", bitDepth);
152 assert(0);
153 }
154 }
155 printf("\n");
156 start_tmp += stride*val;
157 }
158 printf("------------------------\n");
159 }
160 #endif
161 /*******************************************
162 * set Penalize Skip Flag
163 *
164 * Summary: Set the PenalizeSkipFlag to true
165 * When there is luminance/chrominance change
166 * or in noisy clip with low motion at meduim
167 * varince area
168 *
169 *******************************************/
170 typedef void (*EB_ENCODE_LOOP_FUNC_PTR)(
171 EncDecContext_t *contextPtr,
172 LargestCodingUnit_t *lcuPtr,
173 EB_U32 originX,
174 EB_U32 originY,
175 EB_U32 cbQp,
176 EbPictureBufferDesc_t *predSamples, // no basis/offset
177 EbPictureBufferDesc_t *coeffSamplesTB, // lcu based
178 EbPictureBufferDesc_t *residual16bit, // no basis/offset
179 EbPictureBufferDesc_t *transform16bit, // no basis/offset
180 EB_S16 *transformScratchBuffer,
181 EB_U32 *countNonZeroCoeffs,
182 EB_U32 useDeltaQp,
183 CabacEncodeContext_t *cabacEncodeCtxPtr,
184 EB_U32 intraLumaMode,
185 EB_U32 componentMask,
186 EB_COLOR_FORMAT colorFormat,
187 EB_BOOL secondChroma,
188 EB_U32 tuSize,
189 CabacCost_t *CabacCost,
190 EB_U32 dZoffset) ;
191
192 typedef void (*EB_GENERATE_RECON_FUNC_PTR)(
193 EncDecContext_t *contextPtr,
194 EB_U32 originX,
195 EB_U32 originY,
196 EB_U32 componentMask,
197 EB_COLOR_FORMAT colorFormat,
198 EB_BOOL secondChroma,
199 EB_U32 tuSize,
200 EbPictureBufferDesc_t *predSamples, // no basis/offset
201 EbPictureBufferDesc_t *residual16bit, // no basis/offset
202 EB_S16 *transformScratchBuffer);
203
204 typedef void (*EB_ENCODE_LOOP_INTRA_4x4_FUNC_PTR)(
205 EncDecContext_t *contextPtr,
206 LargestCodingUnit_t *lcuPtr,
207 EB_U32 originX,
208 EB_U32 originY,
209 EB_U32 cbQp,
210 EbPictureBufferDesc_t *predSamples, // no basis/offset
211 EbPictureBufferDesc_t *coeffSamplesTB, // lcu based
212 EbPictureBufferDesc_t *residual16bit, // no basis/offset
213 EbPictureBufferDesc_t *transform16bit, // no basis/offset
214 EB_S16 *transformScratchBuffer,
215 EB_U32 *countNonZeroCoeffs,
216 EB_U32 componentMask,
217 EB_U32 useDeltaQp,
218 CabacEncodeContext_t *cabacEncodeCtxPtr,
219 EB_U32 intraLumaMode,
220 CabacCost_t *CabacCost,
221 EB_U32 dZoffset) ;
222
223 typedef void (*EB_GENERATE_RECON_INTRA_4x4_FUNC_PTR)(
224 EncDecContext_t *contextPtr,
225 EB_U32 originX,
226 EB_U32 originY,
227 EbPictureBufferDesc_t *predSamples, // no basis/offset
228 EbPictureBufferDesc_t *residual16bit, // no basis/offset
229 EB_S16 *transformScratchBuffer,
230 EB_U32 componentMask);
231
232 typedef EB_ERRORTYPE(*EB_GENERATE_INTRA_SAMPLES_FUNC_PTR)(
233 EB_BOOL constrainedIntraFlag, //input parameter, indicates if constrained intra is switched on/off
234 EB_BOOL strongIntraSmoothingFlag,
235 EB_U32 originX,
236 EB_U32 originY,
237 EB_U32 size,
238 EB_U32 lcuSize,
239 EB_U32 cuDepth,
240 NeighborArrayUnit_t *modeTypeNeighborArray,
241 NeighborArrayUnit_t *lumaReconNeighborArray,
242 NeighborArrayUnit_t *cbReconNeighborArray,
243 NeighborArrayUnit_t *crReconNeighborArray,
244 void *refWrapperPtr,
245 EB_COLOR_FORMAT colorFormat,
246 EB_BOOL pictureLeftBoundary,
247 EB_BOOL pictureTopBoundary,
248 EB_BOOL pictureRightBoundary);
249
250 typedef EB_ERRORTYPE(*EB_GENERATE_LUMA_INTRA_SAMPLES_FUNC_PTR)(
251 EB_BOOL constrainedIntraFlag, //input parameter, indicates if constrained intra is switched on/off
252 EB_BOOL strongIntraSmoothingFlag,
253 EB_U32 originX,
254 EB_U32 originY,
255 EB_U32 size,
256 EB_U32 lcuSize,
257 EB_U32 cuDepth,
258 NeighborArrayUnit_t *modeTypeNeighborArray,
259 NeighborArrayUnit_t *lumaReconNeighborArray,
260 NeighborArrayUnit_t *cbReconNeighborArray,
261 NeighborArrayUnit_t *crReconNeighborArray,
262 void *refWrapperPtr,
263 EB_BOOL pictureLeftBoundary,
264 EB_BOOL pictureTopBoundary,
265 EB_BOOL pictureRightBoundary);
266
267 typedef EB_ERRORTYPE(*EB_GENERATE_CHROMA_INTRA_SAMPLES_FUNC_PTR)(
268 EB_BOOL constrainedIntraFlag, //input parameter, indicates if constrained intra is switched on/off
269 EB_BOOL strongIntraSmoothingFlag,
270 EB_U32 originX,
271 EB_U32 originY,
272 EB_U32 size,
273 EB_U32 lcuSize,
274 EB_U32 cuDepth,
275 NeighborArrayUnit_t *modeTypeNeighborArray,
276 NeighborArrayUnit_t *lumaReconNeighborArray,
277 NeighborArrayUnit_t *cbReconNeighborArray,
278 NeighborArrayUnit_t *crReconNeighborArray,
279 void *refWrapperPtr,
280 EB_COLOR_FORMAT colorFormat,
281 EB_BOOL secondChroma,
282 EB_BOOL pictureLeftBoundary,
283 EB_BOOL pictureTopBoundary,
284 EB_BOOL pictureRightBoundary);
285
286 typedef EB_ERRORTYPE(*EB_ENC_PASS_INTRA_FUNC_PTR)(
287 void *refSamples,
288 EB_U32 originX,
289 EB_U32 originY,
290 EB_U32 puSize,
291 EB_U32 puChromaSize,
292 EbPictureBufferDesc_t *predictionPtr,
293 EB_COLOR_FORMAT colorFormat,
294 EB_BOOL secondChroma,
295 EB_U32 lumaMode,
296 EB_U32 chromaMode,
297 EB_U32 componentMask);
298
299 typedef EB_ERRORTYPE(*EB_ENC_PASS_INTRA4X4_FUNC_PTR)(
300 void *referenceSamples,
301 EB_U32 originX,
302 EB_U32 originY,
303 EB_U32 puSize,
304 EB_U32 chromaPuSize,
305 EbPictureBufferDesc_t *predictionPtr,
306 EB_U32 lumaMode,
307 EB_U32 chromaMode,
308 EB_COLOR_FORMAT colorFormat,
309 EB_BOOL secondChroma,
310 EB_U32 componentMask);
311
312 typedef EB_ERRORTYPE (*EB_LCU_INTERNAL_DLF_FUNC_PTR)(
313 EbPictureBufferDesc_t *reconpicture,
314 EB_U32 lcuPosx,
315 EB_U32 lcuPosy,
316 EB_U32 lcuWidth,
317 EB_U32 lcuHeight,
318 EB_U8 *verticalEdgeBSArray,
319 EB_U8 *horizontalEdgeBSArray,
320 PictureControlSet_t *reconPictureControlSet);
321 typedef void (*EB_LCU_BOUNDARY_DLF_FUNC_PTR)(
322 EbPictureBufferDesc_t *reconpicture,
323 EB_U32 lcuPos_x,
324 EB_U32 lcuPos_y,
325 EB_U32 lcuWidth,
326 EB_U32 lcuHeight,
327 EB_U8 *lcuVerticalEdgeBSArray,
328 EB_U8 *lcuHorizontalEdgeBSArray,
329 EB_U8 *topLcuVerticalEdgeBSArray,
330 EB_U8 *leftLcuHorizontalEdgeBSArray,
331 PictureControlSet_t *pictureControlSetPtr);
332 typedef void (*EB_LCU_PIC_EDGE_DLF_FUNC_PTR)(
333 EbPictureBufferDesc_t *reconPic,
334 EB_U32 lcuIdx,
335 EB_U32 lcuPos_x,
336 EB_U32 lcuPos_y,
337 EB_U32 lcuWidth,
338 EB_U32 lcuHeight,
339 PictureControlSet_t *pictureControlSetPtr);
340
341 void AddChromaEncDec(
342 PictureControlSet_t *pictureControlSetPtr,
343 LargestCodingUnit_t *lcuPtr,
344 CodingUnit_t *cuPtr,
345 ModeDecisionContext_t *contextPtr,
346 EncDecContext_t *contextPtrED,
347 EbPictureBufferDesc_t *inputPicturePtr,
348 EB_U32 inputCbOriginIndex,
349 EB_U32 cuChromaOriginIndex,
350 EB_U32 candIdxInput);
351 /***************************************************
352 * Update Coding Unit Neighbor Arrays
353 ***************************************************/
EncodePassUpdateLeafDepthNeighborArrays(NeighborArrayUnit_t * leafDepthNeighborArray,EB_U8 depth,EB_U32 originX,EB_U32 originY,EB_U32 size)354 static void EncodePassUpdateLeafDepthNeighborArrays(
355 NeighborArrayUnit_t *leafDepthNeighborArray,
356 EB_U8 depth,
357 EB_U32 originX,
358 EB_U32 originY,
359 EB_U32 size)
360 {
361 // Mode Type Update
362 NeighborArrayUnitModeWrite(
363 leafDepthNeighborArray,
364 &depth,
365 originX,
366 originY,
367 size,
368 size,
369 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
370
371 return;
372 }
373
374 /***************************************************
375 * Update Intra Mode Neighbor Arrays
376 ***************************************************/
EncodePassUpdateIntraModeNeighborArrays(NeighborArrayUnit_t * modeTypeNeighborArray,NeighborArrayUnit_t * intraLumaModeNeighborArray,EB_U8 lumaMode,EB_U32 originX,EB_U32 originY,EB_U32 size)377 static void EncodePassUpdateIntraModeNeighborArrays(
378 NeighborArrayUnit_t *modeTypeNeighborArray,
379 NeighborArrayUnit_t *intraLumaModeNeighborArray,
380 EB_U8 lumaMode,
381 EB_U32 originX,
382 EB_U32 originY,
383 EB_U32 size)
384 {
385 EB_U8 modeType = INTRA_MODE;
386
387 // Mode Type Update
388 NeighborArrayUnitModeWrite(
389 modeTypeNeighborArray,
390 &modeType,
391 originX,
392 originY,
393 size,
394 size,
395 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
396
397 // Intra Luma Mode Update
398 NeighborArrayUnitModeWrite(
399 intraLumaModeNeighborArray,
400 &lumaMode,
401 originX,
402 originY,
403 size,
404 size,
405 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
406
407 return;
408 }
409
410 /***************************************************
411 * Update Inter Mode Neighbor Arrays
412 ***************************************************/
EncodePassUpdateInterModeNeighborArrays(NeighborArrayUnit_t * modeTypeNeighborArray,NeighborArrayUnit_t * mvNeighborArray,NeighborArrayUnit_t * skipNeighborArray,MvUnit_t * mvUnit,EB_U8 * skipFlag,EB_U32 originX,EB_U32 originY,EB_U32 size)413 static void EncodePassUpdateInterModeNeighborArrays(
414 NeighborArrayUnit_t *modeTypeNeighborArray,
415 NeighborArrayUnit_t *mvNeighborArray,
416 NeighborArrayUnit_t *skipNeighborArray,
417 MvUnit_t *mvUnit,
418 EB_U8 *skipFlag,
419 EB_U32 originX,
420 EB_U32 originY,
421 EB_U32 size)
422 {
423 EB_U8 modeType = INTER_MODE;
424
425 // Mode Type Update
426 NeighborArrayUnitModeWrite(
427 modeTypeNeighborArray,
428 &modeType,
429 originX,
430 originY,
431 size,
432 size,
433 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
434
435 // Motion Vector Unit
436 NeighborArrayUnitModeWrite(
437 mvNeighborArray,
438 (EB_U8*)mvUnit,
439 originX,
440 originY,
441 size,
442 size,
443 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
444
445 // Skip Flag
446 NeighborArrayUnitModeWrite(
447 skipNeighborArray,
448 skipFlag,
449 originX,
450 originY,
451 size,
452 size,
453 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
454
455 return;
456 }
457
458 /***************************************************
459 * Update Recon Samples Neighbor Arrays
460 ***************************************************/
EncodePassUpdateReconSampleNeighborArrays(NeighborArrayUnit_t * lumaReconSampleNeighborArray,NeighborArrayUnit_t * cbReconSampleNeighborArray,NeighborArrayUnit_t * crReconSampleNeighborArray,EbPictureBufferDesc_t * reconBuffer,EB_U32 originX,EB_U32 originY,EB_U32 size,EB_U32 componentMask,EB_COLOR_FORMAT colorFormat,EB_BOOL is16bit)461 static void EncodePassUpdateReconSampleNeighborArrays(
462 NeighborArrayUnit_t *lumaReconSampleNeighborArray,
463 NeighborArrayUnit_t *cbReconSampleNeighborArray,
464 NeighborArrayUnit_t *crReconSampleNeighborArray,
465 EbPictureBufferDesc_t *reconBuffer,
466 EB_U32 originX,
467 EB_U32 originY,
468 EB_U32 size,
469 EB_U32 componentMask,
470 EB_COLOR_FORMAT colorFormat,
471 EB_BOOL is16bit)
472 {
473 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
474 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
475
476 if (is16bit == EB_TRUE){
477 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
478 // Recon Samples - Luma
479 NeighborArrayUnit16bitSampleWrite(
480 lumaReconSampleNeighborArray,
481 (EB_U16*)(reconBuffer->bufferY),
482 reconBuffer->strideY,
483 reconBuffer->originX + originX,
484 reconBuffer->originY + originY,
485 originX,
486 originY,
487 size,
488 size,
489 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
490 }
491
492 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK)
493 {
494 // Recon Samples - Cb
495 NeighborArrayUnit16bitSampleWrite(
496 cbReconSampleNeighborArray,
497 (EB_U16*)(reconBuffer->bufferCb),
498 reconBuffer->strideCb,
499 (reconBuffer->originX + originX) >> subWidthCMinus1,
500 (reconBuffer->originY + originY) >> subHeightCMinus1,
501 originX >> subWidthCMinus1,
502 originY >> subHeightCMinus1,
503 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
504 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
505 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
506
507 // Recon Samples - Cr
508 NeighborArrayUnit16bitSampleWrite(
509 crReconSampleNeighborArray,
510 (EB_U16*)(reconBuffer->bufferCr),
511 reconBuffer->strideCr,
512 (reconBuffer->originX + originX) >> subWidthCMinus1,
513 (reconBuffer->originY + originY) >> subHeightCMinus1,
514 originX >> subWidthCMinus1,
515 originY >> subHeightCMinus1,
516 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
517 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
518 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
519 }
520
521 } else {
522 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
523 // Recon Samples - Luma
524 NeighborArrayUnitSampleWrite(
525 lumaReconSampleNeighborArray,
526 reconBuffer->bufferY,
527 reconBuffer->strideY,
528 reconBuffer->originX + originX,
529 reconBuffer->originY + originY,
530 originX,
531 originY,
532 size,
533 size,
534 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
535 }
536
537 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK)
538 {
539 // Recon Samples - Cb
540 NeighborArrayUnitSampleWrite(
541 cbReconSampleNeighborArray,
542 reconBuffer->bufferCb,
543 reconBuffer->strideCb,
544 (reconBuffer->originX + originX) >> subWidthCMinus1,
545 (reconBuffer->originY + originY) >> subHeightCMinus1,
546 originX >> subWidthCMinus1,
547 originY >> subHeightCMinus1,
548 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
549 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
550 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
551
552 // Recon Samples - Cr
553 NeighborArrayUnitSampleWrite(
554 crReconSampleNeighborArray,
555 reconBuffer->bufferCr,
556 reconBuffer->strideCr,
557 (reconBuffer->originX + originX) >> subWidthCMinus1,
558 (reconBuffer->originY + originY) >> subHeightCMinus1,
559 originX >> subWidthCMinus1,
560 originY >> subHeightCMinus1,
561 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
562 size > MIN_PU_SIZE ? (size >> subWidthCMinus1) : size,
563 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
564 }
565 }
566
567 return;
568 }
569
570
571
572
573 /************************************************************
574 * Update Intra Luma Neighbor Modes
575 ************************************************************/
EbHevcGeneratePuIntraLumaNeighborModes(CodingUnit_t * cuPtr,EB_U32 puOriginX,EB_U32 puOriginY,EB_U32 lcuSize,NeighborArrayUnit_t * intraLumaNeighborArray,NeighborArrayUnit_t * modeTypeNeighborArray)576 void EbHevcGeneratePuIntraLumaNeighborModes(
577 CodingUnit_t *cuPtr,
578 EB_U32 puOriginX,
579 EB_U32 puOriginY,
580 EB_U32 lcuSize,
581 NeighborArrayUnit_t *intraLumaNeighborArray,
582 NeighborArrayUnit_t *modeTypeNeighborArray)
583 {
584 EB_U32 modeTypeLeftNeighborIndex = GetNeighborArrayUnitLeftIndex(
585 modeTypeNeighborArray,
586 puOriginY);
587 EB_U32 modeTypeTopNeighborIndex = GetNeighborArrayUnitTopIndex(
588 modeTypeNeighborArray,
589 puOriginX);
590 EB_U32 intraLumaModeLeftNeighborIndex = GetNeighborArrayUnitLeftIndex(
591 intraLumaNeighborArray,
592 puOriginY);
593 EB_U32 intraLumaModeTopNeighborIndex = GetNeighborArrayUnitTopIndex(
594 intraLumaNeighborArray,
595 puOriginX);
596
597 (&cuPtr->predictionUnitArray[0])->intraLumaLeftMode = (EB_U32)(
598 (modeTypeNeighborArray->leftArray[modeTypeLeftNeighborIndex] != INTRA_MODE) ? EB_INTRA_DC :
599 (EB_U32)intraLumaNeighborArray->leftArray[intraLumaModeLeftNeighborIndex]);
600
601 (&cuPtr->predictionUnitArray[0])->intraLumaTopMode = (EB_U32)(
602 (modeTypeNeighborArray->topArray[modeTypeTopNeighborIndex] != INTRA_MODE) ? EB_INTRA_DC :
603 ((puOriginY & (lcuSize - 1)) == 0) ? EB_INTRA_DC : // If we are at the top of the LCU boundary, then
604 (EB_U32)intraLumaNeighborArray->topArray[intraLumaModeTopNeighborIndex]); // use DC. This seems like we could use a LCU-width
605
606
607 return;
608 }
609
610 /**********************************************************
611 * Encode Pass - Update Sao Parameter Neighbor Array
612 **********************************************************/
EncodePassUpdateSaoNeighborArrays(NeighborArrayUnit_t * saoParamNeighborArray,SaoParameters_t * saoParams,EB_U32 originX,EB_U32 originY,EB_U32 size)613 static void EncodePassUpdateSaoNeighborArrays(
614 NeighborArrayUnit_t *saoParamNeighborArray,
615 SaoParameters_t *saoParams,
616 EB_U32 originX,
617 EB_U32 originY,
618 EB_U32 size)
619 {
620 NeighborArrayUnitModeWrite(
621 saoParamNeighborArray,
622 (EB_U8*)saoParams,
623 originX,
624 originY,
625 size,
626 size,
627 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
628
629 return;
630 }
631
632 /**********************************************************
633 * Encode Loop
634 *
635 * Summary: Performs a H.265 conformant
636 * Transform, Quantization and Inverse Quantization of a TU.
637 *
638 * Inputs:
639 * originX
640 * originY
641 * tuSize
642 * lcuSize
643 * input - input samples (position sensitive)
644 * pred - prediction samples (position independent)
645 *
646 * Outputs:
647 * Inverse quantized coeff - quantization indices (position sensitive)
648 *
649 **********************************************************/
650
EncodeLoop(EncDecContext_t * contextPtr,LargestCodingUnit_t * lcuPtr,EB_U32 originX,EB_U32 originY,EB_U32 cbQp,EbPictureBufferDesc_t * predSamples,EbPictureBufferDesc_t * coeffSamplesTB,EbPictureBufferDesc_t * residual16bit,EbPictureBufferDesc_t * transform16bit,EB_S16 * transformScratchBuffer,EB_U32 * countNonZeroCoeffs,EB_U32 useDeltaQp,CabacEncodeContext_t * cabacEncodeCtxPtr,EB_U32 intraLumaMode,EB_U32 componentMask,EB_COLOR_FORMAT colorFormat,EB_BOOL secondChroma,EB_U32 tuSize,CabacCost_t * CabacCost,EB_U32 dZoffset)651 static void EncodeLoop(
652 EncDecContext_t *contextPtr,
653 LargestCodingUnit_t *lcuPtr,
654 EB_U32 originX,
655 EB_U32 originY,
656 EB_U32 cbQp,
657 EbPictureBufferDesc_t *predSamples, // no basis/offset
658 EbPictureBufferDesc_t *coeffSamplesTB, // lcu based
659 EbPictureBufferDesc_t *residual16bit, // no basis/offset
660 EbPictureBufferDesc_t *transform16bit, // no basis/offset
661 EB_S16 *transformScratchBuffer,
662 EB_U32 *countNonZeroCoeffs,
663 EB_U32 useDeltaQp,
664 CabacEncodeContext_t *cabacEncodeCtxPtr,
665 EB_U32 intraLumaMode,
666 EB_U32 componentMask,
667 EB_COLOR_FORMAT colorFormat,
668 EB_BOOL secondChroma,
669 EB_U32 tuSize,
670 CabacCost_t *CabacCost,
671 EB_U32 dZoffset)
672 {
673
674 EB_U32 chromaQp = cbQp;
675 CodingUnit_t *cuPtr = contextPtr->cuPtr;
676 TransformUnit_t *tuPtr = &cuPtr->transformUnitArray[contextPtr->tuItr];
677 EB_PICTURE sliceType = lcuPtr->pictureControlSetPtr->sliceType;
678 EB_U32 temporalLayerIndex = lcuPtr->pictureControlSetPtr->temporalLayerIndex;
679 EB_U32 qp = cuPtr->qp;
680 EbPictureBufferDesc_t *inputSamples = contextPtr->inputSamples;
681
682 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
683 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
684 EB_U16 tuChromaOffset = 0;
685 if (colorFormat == EB_YUV422 && secondChroma) {
686 tuChromaOffset = tuSize >> 1;
687 }
688
689 const EB_U32 inputLumaOffset = ((originY + inputSamples->originY) * inputSamples->strideY) + (originX + inputSamples->originX);
690 const EB_U32 predLumaOffset = ((predSamples->originY+originY) * predSamples->strideY) + (predSamples->originX+originX);
691 const EB_U32 scratchLumaOffset = ((originY & (64 - 1)) * 64) + (originX & (64 - 1));
692
693 const EB_U32 inputCbOffset = ((originX + inputSamples->originX) >> subWidthCMinus1) +
694 (((originY + tuChromaOffset + inputSamples->originY) >> subHeightCMinus1) * inputSamples->strideCb);
695 const EB_U32 inputCrOffset = ((originX + inputSamples->originX) >> subWidthCMinus1) +
696 (((originY + tuChromaOffset + inputSamples->originY) >> subHeightCMinus1) * inputSamples->strideCr);
697
698 const EB_U32 predCbOffset = ((predSamples->originX+originX) >> subWidthCMinus1) +
699 (((predSamples->originY+originY+tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCb);
700 const EB_U32 predCrOffset = ((predSamples->originX+originX) >> subWidthCMinus1) +
701 (((predSamples->originY+originY+tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCr);
702
703 const EB_U32 scratchCbOffset = ((originX & (64 - 1)) >> subWidthCMinus1) +
704 ((((originY + tuChromaOffset) & (64 - 1)) >> subHeightCMinus1) * (64 >> subWidthCMinus1));
705 const EB_U32 scratchCrOffset = ((originX & (64 - 1)) >> subWidthCMinus1) +
706 ((((originY + tuChromaOffset) & (64 - 1)) >> subHeightCMinus1) * (64 >> subWidthCMinus1));
707
708 EB_U8 enableContouringQCUpdateFlag;
709
710 enableContouringQCUpdateFlag = DeriveContouringClass(
711 lcuPtr->pictureControlSetPtr->ParentPcsPtr,
712 lcuPtr->index,
713 cuPtr->leafIndex) && (cuPtr->qp < lcuPtr->pictureControlSetPtr->pictureQp);
714
715 //**********************************
716 // Luma
717 //**********************************
718 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
719 PictureResidual(
720 inputSamples->bufferY + inputLumaOffset,
721 inputSamples->strideY,
722 predSamples->bufferY + predLumaOffset,
723 predSamples->strideY,
724 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
725 residual16bit->strideY, //64,
726 tuSize,
727 tuSize);
728
729 EstimateTransform(
730 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
731 residual16bit->strideY, //64,
732 ((EB_S16*)transform16bit->bufferY) + scratchLumaOffset,
733 transform16bit->strideY, //64,
734 tuSize,
735 transformScratchBuffer,
736 BIT_INCREMENT_8BIT,
737 (EB_BOOL)(tuSize == MIN_PU_SIZE),
738 contextPtr->transCoeffShapeLuma);
739
740 UnifiedQuantizeInvQuantize(
741 contextPtr,
742 lcuPtr->pictureControlSetPtr,
743 ((EB_S16*)transform16bit->bufferY) + scratchLumaOffset,
744 transform16bit->strideY, //64,
745 ((EB_S16*)coeffSamplesTB->bufferY) + scratchLumaOffset,
746 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
747 qp,
748 inputSamples->bitDepth,
749 tuSize,
750 sliceType,
751 &(countNonZeroCoeffs[0]),
752 contextPtr->transCoeffShapeLuma,
753 contextPtr->cleanSparseCeoffPfEncDec,
754 contextPtr->pmpMaskingLevelEncDec,
755 cuPtr->predictionModeFlag,
756 0,
757 enableContouringQCUpdateFlag,
758 COMPONENT_LUMA,
759 temporalLayerIndex,
760 dZoffset,
761 cabacEncodeCtxPtr,
762 contextPtr->fullLambda,
763 intraLumaMode,
764 EB_INTRA_CHROMA_DM,
765 CabacCost);
766
767 tuPtr->lumaCbf = countNonZeroCoeffs[0] ? EB_TRUE : EB_FALSE;
768
769 if (tuSize > MIN_PU_SIZE) {
770 tuPtr->isOnlyDc[0] = (countNonZeroCoeffs[0] == 1 && (((EB_S16*)residual16bit->bufferY) + scratchLumaOffset)[0] != 0 && tuSize != 32) ?
771 EB_TRUE :
772 EB_FALSE;
773
774 if (contextPtr->transCoeffShapeLuma && tuPtr->lumaCbf && tuPtr->isOnlyDc[0] == EB_FALSE) {
775 if (contextPtr->transCoeffShapeLuma == N2_SHAPE || contextPtr->transCoeffShapeLuma == N4_SHAPE) {
776 PfZeroOutUselessQuadrants(
777 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
778 residual16bit->strideY, //64,
779 (tuSize >> 1));
780 }
781
782 if (contextPtr->transCoeffShapeLuma == N4_SHAPE) {
783 PfZeroOutUselessQuadrants(
784 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
785 residual16bit->strideY, //64,
786 (tuSize >> 2));
787 }
788 }
789 } else {
790 if (contextPtr->transCoeffShapeLuma && tuPtr->lumaCbf) {
791 PfZeroOutUselessQuadrants(
792 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
793 residual16bit->strideY, //64,
794 (tuSize >> 1));
795
796 if (contextPtr->transCoeffShapeLuma == N4_SHAPE) {
797 PfZeroOutUselessQuadrants(
798 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
799 residual16bit->strideY, //64,
800 (tuSize >> 2));
801 }
802 }
803 }
804 }
805
806 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
807 //**********************************
808 // Cb
809 //**********************************
810 PictureResidual(
811 inputSamples->bufferCb + inputCbOffset,
812 inputSamples->strideCb,
813 predSamples->bufferCb + predCbOffset,
814 predSamples->strideCb,
815 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
816 residual16bit->strideCb,
817 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
818 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize);
819
820 // For the case that DC path chosen for chroma, we check the DC values and determine to use DC or N2Shape for chroma. Since there is only one flag for ChromaShaping, we do the prediction of Cr and Cb and decide on the chroma shaping
821 if (tuSize > MIN_PU_SIZE && contextPtr->transCoeffShapeChroma == ONLY_DC_SHAPE) {
822 EB_S64 sumResidual = SumResidual_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)](
823 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
824 tuSize >> subWidthCMinus1,
825 residual16bit->strideCb);
826
827 // Normalized based on the size.
828 sumResidual = (ABS(sumResidual) / (tuSize >> subWidthCMinus1) / (tuSize >> subWidthCMinus1));
829 if (sumResidual > 0) {
830 contextPtr->transCoeffShapeChroma = N2_SHAPE;
831 }
832 }
833
834 EstimateTransform(
835 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
836 residual16bit->strideCb,
837 ((EB_S16*)transform16bit->bufferCb) + scratchCbOffset,
838 transform16bit->strideCb,
839 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
840 transformScratchBuffer,
841 BIT_INCREMENT_8BIT,
842 EB_FALSE,
843 contextPtr->transCoeffShapeChroma);
844
845 UnifiedQuantizeInvQuantize(
846 contextPtr,
847 lcuPtr->pictureControlSetPtr,
848 ((EB_S16*)transform16bit->bufferCb) + scratchCbOffset,
849 transform16bit->strideCb,
850 ((EB_S16*)coeffSamplesTB->bufferCb) + scratchCbOffset,
851 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
852 chromaQp,
853 inputSamples->bitDepth,
854 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
855 sliceType,
856 &(countNonZeroCoeffs[1]),
857 contextPtr->transCoeffShapeChroma,
858 contextPtr->cleanSparseCeoffPfEncDec,
859 contextPtr->pmpMaskingLevelEncDec,
860 cuPtr->predictionModeFlag,
861 useDeltaQp == EB_TRUE ? contextPtr->forceCbfFlag : 0,
862 enableContouringQCUpdateFlag,
863 COMPONENT_CHROMA,
864 temporalLayerIndex,
865 0,
866 cabacEncodeCtxPtr,
867 contextPtr->fullLambda,
868 intraLumaMode,
869 EB_INTRA_CHROMA_DM,
870 CabacCost);
871
872 if (secondChroma) {
873 tuPtr->cbCbf2 = countNonZeroCoeffs[1] ? EB_TRUE : EB_FALSE;
874 tuPtr->isOnlyDc2[0] = (countNonZeroCoeffs[1] == 1 && (((EB_S16*)residual16bit->bufferCb) + scratchCbOffset)[0] != 0) ?
875 EB_TRUE :
876 EB_FALSE;
877 } else {
878 tuPtr->cbCbf = countNonZeroCoeffs[1] ? EB_TRUE : EB_FALSE;
879
880 if (tuSize > MIN_PU_SIZE) {
881 tuPtr->isOnlyDc[1] = (countNonZeroCoeffs[1] == 1 && (((EB_S16*)residual16bit->bufferCb) + scratchCbOffset)[0] != 0) ?
882 EB_TRUE :
883 EB_FALSE;
884
885 if (contextPtr->transCoeffShapeChroma && tuPtr->cbCbf && tuPtr->isOnlyDc[1] == EB_FALSE) {
886 if (contextPtr->transCoeffShapeChroma == PF_N2 || contextPtr->transCoeffShapeChroma == PF_N4) {
887 PfZeroOutUselessQuadrants(
888 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
889 residual16bit->strideCb,
890 (tuSize >> (1 + subWidthCMinus1)));
891 }
892
893 if (contextPtr->transCoeffShapeChroma == PF_N4) {
894 PfZeroOutUselessQuadrants(
895 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
896 residual16bit->strideCb,
897 (tuSize >> (2 + subWidthCMinus1)));
898 }
899 }
900 } else {
901 if (contextPtr->transCoeffShapeChroma && tuPtr->cbCbf) {
902 PfZeroOutUselessQuadrants(
903 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
904 residual16bit->strideCb,
905 (tuSize >> 1));
906
907 if (contextPtr->transCoeffShapeChroma == PF_N4) {
908 PfZeroOutUselessQuadrants(
909 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
910 residual16bit->strideCb,
911 (tuSize >> 2));
912 }
913 }
914 }
915 }
916
917
918 //**********************************
919 // Cr
920 //**********************************
921 PictureResidual(
922 inputSamples->bufferCr + inputCrOffset,
923 inputSamples->strideCr,
924 predSamples->bufferCr + predCrOffset,
925 predSamples->strideCr,
926 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
927 residual16bit->strideCr,
928 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
929 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize);
930
931 if (tuSize > MIN_PU_SIZE && contextPtr->transCoeffShapeChroma == ONLY_DC_SHAPE) {
932 EB_S64 sumResidual = SumResidual_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)](
933 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
934 tuSize >> subWidthCMinus1,
935 residual16bit->strideCr);
936
937 sumResidual = (ABS(sumResidual) / (tuSize >> subWidthCMinus1) / (tuSize >> subWidthCMinus1));
938 if (sumResidual > 0) {
939 contextPtr->transCoeffShapeChroma = N2_SHAPE;
940 }
941 }
942
943 EstimateTransform(
944 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
945 residual16bit->strideCr,
946 ((EB_S16*)transform16bit->bufferCr) + scratchCrOffset,
947 transform16bit->strideCr,
948 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
949 transformScratchBuffer,
950 BIT_INCREMENT_8BIT,
951 EB_FALSE,
952 contextPtr->transCoeffShapeChroma);
953
954 UnifiedQuantizeInvQuantize(
955 contextPtr,
956 lcuPtr->pictureControlSetPtr,
957 ((EB_S16*)transform16bit->bufferCr) + scratchCrOffset,
958 transform16bit->strideCr,
959 ((EB_S16*)coeffSamplesTB->bufferCr) + scratchCrOffset,
960 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
961 chromaQp,
962 inputSamples->bitDepth,
963 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
964 sliceType,
965 &(countNonZeroCoeffs[2]),
966 contextPtr->transCoeffShapeChroma,
967 contextPtr->cleanSparseCeoffPfEncDec,
968 contextPtr->pmpMaskingLevelEncDec,
969 cuPtr->predictionModeFlag,
970 0,
971 enableContouringQCUpdateFlag,
972 COMPONENT_CHROMA,
973 temporalLayerIndex,
974 0,
975 cabacEncodeCtxPtr,
976 contextPtr->fullLambda,
977 intraLumaMode,
978 EB_INTRA_CHROMA_DM,
979 CabacCost);
980
981 if ((componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) && secondChroma) {
982 tuPtr->crCbf2 = countNonZeroCoeffs[2] ? EB_TRUE : EB_FALSE;
983 tuPtr->isOnlyDc2[1] = (countNonZeroCoeffs[2] == 1 && (((EB_S16*)residual16bit->bufferCr) + scratchCbOffset)[0] != 0) ?
984 EB_TRUE :
985 EB_FALSE;
986 } else {
987 tuPtr->crCbf = countNonZeroCoeffs[2] ? EB_TRUE : EB_FALSE;
988
989 if (tuSize > MIN_PU_SIZE) {
990 tuPtr->isOnlyDc[2] = (countNonZeroCoeffs[2] == 1 && (((EB_S16*)residual16bit->bufferCr) + scratchCbOffset)[0] != 0) ?
991 EB_TRUE :
992 EB_FALSE;
993 if (contextPtr->transCoeffShapeChroma && tuPtr->crCbf && tuPtr->isOnlyDc[2] == EB_FALSE) {
994
995 if (contextPtr->transCoeffShapeChroma == PF_N2 || contextPtr->transCoeffShapeChroma == PF_N4) {
996 PfZeroOutUselessQuadrants(
997 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
998 residual16bit->strideCr,
999 (tuSize >> (1 + subWidthCMinus1)));
1000 }
1001
1002 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1003 PfZeroOutUselessQuadrants(
1004 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1005 residual16bit->strideCr,
1006 (tuSize >> (2 + subWidthCMinus1)));
1007 }
1008 }
1009 } else {
1010 if (contextPtr->transCoeffShapeChroma && tuPtr->crCbf) {
1011 PfZeroOutUselessQuadrants(
1012 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1013 residual16bit->strideCr,
1014 (tuSize >> 1));
1015
1016 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1017 PfZeroOutUselessQuadrants(
1018 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1019 residual16bit->strideCr,
1020 (tuSize >> 2));
1021 }
1022 }
1023 }
1024 }
1025 }
1026 #ifdef DEBUG_REF_INFO
1027 if (lcuPtr->pictureControlSetPtr->pictureNumber == 0) {
1028 {
1029 int chroma_size = tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize;
1030
1031 printf("\n----- Dump coeff for 1st loop at (%d, %d), qp is %d -----\n", originX, originY, qp);
1032 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1033 dump_block_from_desc(tuSize, coeffSamplesTB, originX&63, originY&63, 0);
1034 }
1035 //if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1036 // dump_block_from_desc(chroma_size, coeffSamplesTB, originX&63, originY&63, 1);
1037 //}
1038
1039 printf("\n----- Dump residual for 1st loop at (%d, %d)-----\n", originX, originY);
1040 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1041 dump_block_from_desc(tuSize, residual16bit, originX&63, originY&63, 0);
1042 }
1043 //if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1044 // dump_block_from_desc(chroma_size, residual16bit, originX&63, originY&63, 1);
1045 //}
1046 }
1047 }
1048 #endif
1049
1050 if ((componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) && secondChroma) {
1051 tuPtr->nzCoefCount2[0] = (EB_U16)countNonZeroCoeffs[1];
1052 tuPtr->nzCoefCount2[1] = (EB_U16)countNonZeroCoeffs[2];
1053 tuPtr->transCoeffShapeChroma2 = contextPtr->transCoeffShapeChroma;
1054 } else {
1055 tuPtr->transCoeffShapeLuma = contextPtr->transCoeffShapeLuma;
1056 tuPtr->transCoeffShapeChroma = contextPtr->transCoeffShapeChroma;
1057 tuPtr->nzCoefCount[0] = (EB_U16)countNonZeroCoeffs[0];
1058 tuPtr->nzCoefCount[1] = (EB_U16)countNonZeroCoeffs[1];
1059 tuPtr->nzCoefCount[2] = (EB_U16)countNonZeroCoeffs[2];
1060 }
1061
1062 return;
1063 }
1064
1065 /**********************************************************
1066 * Encode Generate Recon
1067 *
1068 * Summary: Performs a H.265 conformant
1069 * Inverse Transform and generate
1070 * the reconstructed samples of a TU.
1071 *
1072 * Inputs:
1073 * originX
1074 * originY
1075 * tuSize
1076 * lcuSize
1077 * input - Inverse Qunatized Coeff (position sensitive)
1078 * pred - prediction samples (position independent)
1079 *
1080 * Outputs:
1081 * Recon (position independent)
1082 *
1083 **********************************************************/
EncodeGenerateRecon(EncDecContext_t * contextPtr,EB_U32 originX,EB_U32 originY,EB_U32 componentMask,EB_COLOR_FORMAT colorFormat,EB_BOOL secondChroma,EB_U32 tuSize,EbPictureBufferDesc_t * predSamples,EbPictureBufferDesc_t * residual16bit,EB_S16 * transformScratchBuffer)1084 static void EncodeGenerateRecon(
1085 EncDecContext_t *contextPtr,
1086 EB_U32 originX,
1087 EB_U32 originY,
1088 EB_U32 componentMask,
1089 EB_COLOR_FORMAT colorFormat,
1090 EB_BOOL secondChroma,
1091 EB_U32 tuSize,
1092 EbPictureBufferDesc_t *predSamples, // no basis/offset
1093 EbPictureBufferDesc_t *residual16bit, // no basis/offset
1094 EB_S16 *transformScratchBuffer)
1095 {
1096 EB_U32 predLumaOffset;
1097 EB_U32 predChromaOffset;
1098 EB_U32 scratchLumaOffset;
1099 EB_U32 scratchChromaOffset;
1100 EB_U32 reconLumaOffset;
1101 EB_U32 reconChromaOffset;
1102
1103 CodingUnit_t *cuPtr = contextPtr->cuPtr;
1104 TransformUnit_t *tuPtr = &cuPtr->transformUnitArray[contextPtr->tuItr];
1105 EbPictureBufferDesc_t *reconSamples = predSamples;
1106
1107 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
1108 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
1109 const EB_U16 shift_bit = (tuSize == MIN_PU_SIZE) ? 0 : subWidthCMinus1;
1110 EB_U16 tuChromaOffset = 0;
1111 if (colorFormat == EB_YUV422 && secondChroma) {
1112 tuChromaOffset = tuSize >> 1;
1113 }
1114 EB_BOOL cbCbf=secondChroma?tuPtr->cbCbf2:tuPtr->cbCbf;
1115 EB_BOOL crCbf=secondChroma?tuPtr->crCbf2:tuPtr->crCbf;
1116 // *Note - The prediction is built in-place in the Recon buffer. It is overwritten with Reconstructed
1117 // samples if the CBF==1 && SKIP==False
1118
1119 //**********************************
1120 // Luma
1121 //**********************************
1122 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1123 predLumaOffset = (predSamples->originY+originY) * predSamples->strideY + (predSamples->originX+originX);
1124 scratchLumaOffset = ((originY & (63)) * 64) + (originX & (63));
1125 reconLumaOffset = (reconSamples->originY+originY) * reconSamples->strideY + (reconSamples->originX+originX);
1126 if (tuPtr->lumaCbf == EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1127
1128 EncodeInvTransform(
1129 (tuSize==MIN_PU_SIZE)?EB_FALSE:(tuPtr->transCoeffShapeLuma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[0]),
1130 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1131 residual16bit->strideY,
1132 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1133 residual16bit->strideY,
1134 tuSize,
1135 transformScratchBuffer,
1136 BIT_INCREMENT_8BIT,
1137 (EB_BOOL)(tuSize == MIN_PU_SIZE));
1138
1139 AdditionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][tuSize >> 3](
1140 predSamples->bufferY + predLumaOffset,
1141 predSamples->strideY,
1142 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1143 residual16bit->strideY,
1144 reconSamples->bufferY + reconLumaOffset,
1145 reconSamples->strideY,
1146 tuSize,
1147 tuSize);
1148 }
1149 }
1150
1151 //**********************************
1152 // Chroma
1153 //**********************************
1154
1155 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1156 predChromaOffset = ((predSamples->originX + originX) >> subWidthCMinus1) +
1157 (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCb);
1158 scratchChromaOffset = ((originX & 63) >> subWidthCMinus1) +
1159 (((originY+tuChromaOffset) & 63) >> subHeightCMinus1) * (64 >> subWidthCMinus1);
1160 reconChromaOffset = ((reconSamples->originX + originX) >> subWidthCMinus1) +
1161 (((reconSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * reconSamples->strideCb);
1162
1163 //**********************************
1164 // Cb
1165 //**********************************
1166 if (cbCbf== EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1167 EncodeInvTransform(
1168 (tuSize==MIN_PU_SIZE)?EB_FALSE:(secondChroma ? (tuPtr->transCoeffShapeChroma2 == ONLY_DC_SHAPE || tuPtr->isOnlyDc2[0]) : (tuPtr->transCoeffShapeChroma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[1])),
1169 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1170 residual16bit->strideCb,
1171 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1172 residual16bit->strideCb,
1173 tuSize >> shift_bit,
1174 transformScratchBuffer,
1175 BIT_INCREMENT_8BIT,
1176 EB_FALSE);
1177
1178 AdditionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][tuSize >> (3 + shift_bit)](
1179 predSamples->bufferCb + predChromaOffset,
1180 predSamples->strideCb,
1181 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1182 residual16bit->strideCb,
1183 reconSamples->bufferCb + reconChromaOffset,
1184 reconSamples->strideCb,
1185 tuSize >> shift_bit,
1186 tuSize >> shift_bit);
1187 }
1188
1189 //**********************************
1190 // Cr
1191 //**********************************
1192 predChromaOffset = ((predSamples->originX+originX) >> subWidthCMinus1) +
1193 (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCr);
1194 scratchChromaOffset = ((originX & (63)) >> subWidthCMinus1) +
1195 (((originY+tuChromaOffset) & 63) >> subHeightCMinus1) * (64 >> subWidthCMinus1);
1196 reconChromaOffset = ((reconSamples->originX+originX) >> subWidthCMinus1) +
1197 (((reconSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * reconSamples->strideCr);
1198
1199 if (crCbf == EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1200 EncodeInvTransform(
1201 (tuSize==MIN_PU_SIZE)?EB_FALSE:(secondChroma ? (tuPtr->transCoeffShapeChroma2 == ONLY_DC_SHAPE || tuPtr->isOnlyDc2[1]) : (tuPtr->transCoeffShapeChroma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[2])),
1202 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1203 residual16bit->strideCr,
1204 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1205 residual16bit->strideCr,
1206 tuSize >> shift_bit,
1207 transformScratchBuffer,
1208 BIT_INCREMENT_8BIT,
1209 EB_FALSE);
1210
1211 AdditionKernel_funcPtrArray[!!(ASM_TYPES & PREAVX2_MASK)][tuSize >> (3 + shift_bit)](
1212 predSamples->bufferCr + predChromaOffset,
1213 predSamples->strideCr,
1214 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1215 residual16bit->strideCr,
1216 reconSamples->bufferCr + reconChromaOffset,
1217 reconSamples->strideCr,
1218 tuSize >> shift_bit,
1219 tuSize >> shift_bit);
1220 }
1221 }
1222
1223 return;
1224 }
1225
1226 /**********************************************************
1227 * Encode Loop
1228 *
1229 * Summary: Performs a H.265 conformant
1230 * Transform, Quantization and Inverse Quantization of a TU.
1231 *
1232 * Inputs:
1233 * originX
1234 * originY
1235 * tuSize
1236 * lcuSize
1237 * input - input samples (position sensitive)
1238 * pred - prediction samples (position independent)
1239 *
1240 * Outputs:
1241 * Inverse quantized coeff - quantization indices (position sensitive)
1242 *
1243 **********************************************************/
EncodeLoop16bit(EncDecContext_t * contextPtr,LargestCodingUnit_t * lcuPtr,EB_U32 originX,EB_U32 originY,EB_U32 cbQp,EbPictureBufferDesc_t * predSamples,EbPictureBufferDesc_t * coeffSamplesTB,EbPictureBufferDesc_t * residual16bit,EbPictureBufferDesc_t * transform16bit,EB_S16 * transformScratchBuffer,EB_U32 * countNonZeroCoeffs,EB_U32 useDeltaQp,CabacEncodeContext_t * cabacEncodeCtxPtr,EB_U32 intraLumaMode,EB_U32 componentMask,EB_COLOR_FORMAT colorFormat,EB_BOOL secondChroma,EB_U32 tuSize,CabacCost_t * CabacCost,EB_U32 dZoffset)1244 static void EncodeLoop16bit(
1245 EncDecContext_t *contextPtr,
1246 LargestCodingUnit_t *lcuPtr,
1247 EB_U32 originX,
1248 EB_U32 originY,
1249 EB_U32 cbQp,
1250 EbPictureBufferDesc_t *predSamples, // no basis/offset
1251 EbPictureBufferDesc_t *coeffSamplesTB, // lcu based
1252 EbPictureBufferDesc_t *residual16bit, // no basis/offset
1253 EbPictureBufferDesc_t *transform16bit, // no basis/offset
1254 EB_S16 *transformScratchBuffer,
1255 EB_U32 *countNonZeroCoeffs,
1256 EB_U32 useDeltaQp,
1257 CabacEncodeContext_t *cabacEncodeCtxPtr,
1258 EB_U32 intraLumaMode,
1259 EB_U32 componentMask,
1260 EB_COLOR_FORMAT colorFormat,
1261 EB_BOOL secondChroma,
1262 EB_U32 tuSize,
1263 CabacCost_t *CabacCost,
1264 EB_U32 dZoffset)
1265 {
1266 EB_U32 chromaQp = cbQp;
1267 CodingUnit_t *cuPtr = contextPtr->cuPtr;
1268 TransformUnit_t *tuPtr = &cuPtr->transformUnitArray[contextPtr->tuItr];
1269 EB_PICTURE sliceType = lcuPtr->pictureControlSetPtr->sliceType;
1270 EB_U32 temporalLayerIndex = lcuPtr->pictureControlSetPtr->temporalLayerIndex;
1271 EB_U32 qp = cuPtr->qp;
1272 EbPictureBufferDesc_t *inputSamples16bit = contextPtr->inputSample16bitBuffer; //64x64 for 16bit, whole frame for 8bit
1273 EbPictureBufferDesc_t *predSamples16bit = predSamples;
1274
1275 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
1276 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
1277 EB_U16 tuChromaOffset = 0;
1278 if (colorFormat == EB_YUV422 && secondChroma) {
1279 tuChromaOffset = tuSize >> 1;
1280 }
1281
1282 const EB_U32 inputLumaOffset = ((originY & 63) * inputSamples16bit->strideY) + (originX & 63);
1283 const EB_U32 predLumaOffset = ((predSamples16bit->originY + originY) * predSamples16bit->strideY) + (predSamples16bit->originX + originX);
1284 const EB_U32 scratchLumaOffset = ((originY & 63) * 64) + (originX & 63);
1285
1286 const EB_U32 inputCbOffset = ((((originY + tuChromaOffset) & 63) >> subHeightCMinus1) * inputSamples16bit->strideCb) + ((originX & 63) >> subWidthCMinus1);
1287 const EB_U32 inputCrOffset = ((((originY + tuChromaOffset) & 63) >> subHeightCMinus1) * inputSamples16bit->strideCr) + ((originX & 63) >> subWidthCMinus1);
1288
1289 const EB_U32 predCbOffset = ((predSamples->originX + originX) >> subWidthCMinus1) +
1290 (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCb);
1291 const EB_U32 predCrOffset = ((predSamples->originX + originX) >> subWidthCMinus1) +
1292 (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCr);
1293
1294 const EB_U32 scratchCbOffset = ((originX & (64 - 1)) >> subWidthCMinus1) +
1295 ((((originY + tuChromaOffset) & (64 - 1)) >> subHeightCMinus1) * (64 >> subWidthCMinus1));
1296 const EB_U32 scratchCrOffset = ((originX & (64 - 1)) >> subWidthCMinus1) +
1297 ((((originY + tuChromaOffset) & (64 - 1)) >> subHeightCMinus1) * (64 >> subWidthCMinus1));
1298
1299 EB_U8 enableContouringQCUpdateFlag;
1300
1301 enableContouringQCUpdateFlag = DeriveContouringClass(
1302 lcuPtr->pictureControlSetPtr->ParentPcsPtr,
1303 lcuPtr->index,
1304 cuPtr->leafIndex) && (cuPtr->qp < lcuPtr->pictureControlSetPtr->pictureQp);
1305
1306 //Update QP for Quant
1307 qp += QP_BD_OFFSET;
1308 chromaQp += QP_BD_OFFSET;
1309
1310
1311 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1312 PictureResidual16bit(
1313 ((EB_U16*)inputSamples16bit->bufferY) + inputLumaOffset,
1314 inputSamples16bit->strideY,
1315 ((EB_U16*)predSamples16bit->bufferY) + predLumaOffset,
1316 predSamples16bit->strideY,
1317 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1318 64,
1319 tuSize,
1320 tuSize);
1321
1322 EncodeTransform(
1323 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1324 64,
1325 ((EB_S16*)transform16bit->bufferY) + scratchLumaOffset,
1326 64,
1327 tuSize,
1328 transformScratchBuffer,
1329 BIT_INCREMENT_10BIT,
1330 (EB_BOOL)(tuSize == MIN_PU_SIZE),
1331 contextPtr->transCoeffShapeLuma);
1332
1333 UnifiedQuantizeInvQuantize(
1334 contextPtr,
1335 lcuPtr->pictureControlSetPtr,
1336 ((EB_S16*)transform16bit->bufferY) + scratchLumaOffset,
1337 64,
1338 ((EB_S16*)coeffSamplesTB->bufferY) + scratchLumaOffset,
1339 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1340 qp,
1341 EB_10BIT,
1342 tuSize,
1343 sliceType,
1344 &(countNonZeroCoeffs[0]),
1345 contextPtr->transCoeffShapeLuma,
1346 contextPtr->cleanSparseCeoffPfEncDec,
1347 contextPtr->pmpMaskingLevelEncDec,
1348 cuPtr->predictionModeFlag,
1349 0,
1350 enableContouringQCUpdateFlag,
1351 COMPONENT_LUMA,
1352 temporalLayerIndex,
1353 dZoffset,
1354 cabacEncodeCtxPtr,
1355 contextPtr->fullLambda,
1356 intraLumaMode,
1357 EB_INTRA_CHROMA_DM,
1358 CabacCost);
1359
1360 tuPtr->lumaCbf = countNonZeroCoeffs[0] ? EB_TRUE : EB_FALSE;
1361
1362 if (tuSize > MIN_PU_SIZE) {
1363 tuPtr->isOnlyDc[0] = (countNonZeroCoeffs[0] == 1 && (((EB_S16*)residual16bit->bufferY) + scratchLumaOffset)[0] != 0 && tuSize != 32) ?
1364 EB_TRUE :
1365 EB_FALSE;
1366
1367 if (contextPtr->transCoeffShapeLuma && tuPtr->lumaCbf && tuPtr->isOnlyDc[0] == EB_FALSE) {
1368 if (contextPtr->transCoeffShapeLuma == N2_SHAPE || contextPtr->transCoeffShapeLuma == N4_SHAPE) {
1369 PfZeroOutUselessQuadrants(
1370 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1371 64,
1372 (tuSize >> 1));
1373 }
1374
1375 if (contextPtr->transCoeffShapeLuma == N4_SHAPE) {
1376 PfZeroOutUselessQuadrants(
1377 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1378 64,
1379 (tuSize >> 2));
1380 }
1381 }
1382 } else {
1383 if (contextPtr->transCoeffShapeLuma && tuPtr->lumaCbf) {
1384
1385 PfZeroOutUselessQuadrants(
1386 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1387 64,
1388 (tuSize >> 1));
1389
1390 if (contextPtr->transCoeffShapeLuma == N4_SHAPE) {
1391
1392 PfZeroOutUselessQuadrants(
1393 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1394 64,
1395 (tuSize >> 2));
1396 }
1397 }
1398 }
1399 }
1400
1401 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1402 //**********************************
1403 // Cb
1404 //**********************************
1405 PictureResidual16bit(
1406 ((EB_U16*)inputSamples16bit->bufferCb) + inputCbOffset,
1407 inputSamples16bit->strideCb,
1408 ((EB_U16*)predSamples16bit->bufferCb) + predCbOffset,
1409 predSamples16bit->strideCb,
1410 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1411 residual16bit->strideCb,
1412 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1413 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize);
1414
1415 // For the case that DC path chosen for chroma, we check the DC values and determine to use DC or N2Shape for chroma. Since there is only one flag for ChromaShaping, we do the prediction of Cr and Cb and decide on the chroma shaping
1416 if (tuSize > MIN_PU_SIZE && contextPtr->transCoeffShapeChroma == ONLY_DC_SHAPE) {
1417 EB_S64 sumResidual = SumResidual_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)](
1418 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1419 tuSize >> subWidthCMinus1,
1420 residual16bit->strideCb);
1421 sumResidual = (ABS(sumResidual) / (tuSize >> subWidthCMinus1) / (tuSize >> subWidthCMinus1)); // Normalized based on the size. For chroma, tusize/2 +Tusize/2
1422 if (sumResidual > (1 << BIT_INCREMENT_10BIT)) {
1423 contextPtr->transCoeffShapeChroma = N2_SHAPE;
1424 }
1425 }
1426
1427 EncodeTransform(
1428 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1429 residual16bit->strideCb,
1430 ((EB_S16*)transform16bit->bufferCb) + scratchCbOffset,
1431 transform16bit->strideCb,
1432 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1433 transformScratchBuffer,
1434 BIT_INCREMENT_10BIT,
1435 EB_FALSE,
1436 contextPtr->transCoeffShapeChroma);
1437
1438 UnifiedQuantizeInvQuantize(
1439 contextPtr,
1440 lcuPtr->pictureControlSetPtr,
1441 ((EB_S16*)transform16bit->bufferCb) + scratchCbOffset,
1442 transform16bit->strideCb,
1443 ((EB_S16*)coeffSamplesTB->bufferCb) + scratchCbOffset,
1444 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1445 chromaQp,
1446 EB_10BIT,
1447 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1448 sliceType,
1449 &(countNonZeroCoeffs[1]),
1450 contextPtr->transCoeffShapeChroma,
1451 contextPtr->cleanSparseCeoffPfEncDec,
1452 contextPtr->pmpMaskingLevelEncDec,
1453 cuPtr->predictionModeFlag,
1454 0, //useDeltaQp == EB_TRUE ? contextPtr->forceCbfFlag : 0
1455 enableContouringQCUpdateFlag,
1456 COMPONENT_CHROMA,
1457 temporalLayerIndex,
1458 0,
1459 cabacEncodeCtxPtr,
1460 contextPtr->fullLambda,
1461 intraLumaMode,
1462 EB_INTRA_CHROMA_DM,
1463 CabacCost);
1464
1465
1466 if ((componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) && secondChroma) {
1467 tuPtr->cbCbf2 = countNonZeroCoeffs[1] ? EB_TRUE : EB_FALSE;
1468 tuPtr->isOnlyDc2[0] = (countNonZeroCoeffs[1] == 1 && (((EB_S16*)residual16bit->bufferCb) + scratchCbOffset)[0] != 0) ?
1469 EB_TRUE :
1470 EB_FALSE;
1471 } else {
1472 tuPtr->cbCbf = countNonZeroCoeffs[1] ? EB_TRUE : EB_FALSE;
1473
1474 if (tuSize > MIN_PU_SIZE) {
1475 tuPtr->isOnlyDc[1] = (countNonZeroCoeffs[1] == 1 && (((EB_S16*)residual16bit->bufferCb) + scratchCbOffset)[0] != 0) ?
1476 EB_TRUE :
1477 EB_FALSE;
1478
1479 if (contextPtr->transCoeffShapeChroma && tuPtr->cbCbf && tuPtr->isOnlyDc[1] == EB_FALSE) {
1480 if (contextPtr->transCoeffShapeChroma == PF_N2 || contextPtr->transCoeffShapeChroma == PF_N4) {
1481 PfZeroOutUselessQuadrants(
1482 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1483 residual16bit->strideCb,
1484 (tuSize >> (1 + subWidthCMinus1)));
1485 }
1486
1487 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1488 PfZeroOutUselessQuadrants(
1489 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1490 residual16bit->strideCb,
1491 (tuSize >> (2 + subWidthCMinus1)));
1492 }
1493 }
1494 } else {
1495 if (contextPtr->transCoeffShapeChroma && tuPtr->cbCbf) {
1496 PfZeroOutUselessQuadrants(
1497 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1498 residual16bit->strideCb,
1499 (tuSize >> 1));
1500
1501 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1502 PfZeroOutUselessQuadrants(
1503 ((EB_S16*)residual16bit->bufferCb) + scratchCbOffset,
1504 residual16bit->strideCb,
1505 (tuSize >> 2));
1506 }
1507 }
1508 }
1509 }
1510
1511
1512 //**********************************
1513 // Cr
1514 //**********************************
1515 PictureResidual16bit(
1516 ((EB_U16*)inputSamples16bit->bufferCr) + inputCrOffset,
1517 inputSamples16bit->strideCr,
1518 ((EB_U16*)predSamples16bit->bufferCr) + predCrOffset,
1519 predSamples16bit->strideCr,
1520 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1521 residual16bit->strideCr,
1522 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1523 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize);
1524
1525 if (tuSize > MIN_PU_SIZE && contextPtr->transCoeffShapeChroma == ONLY_DC_SHAPE) {
1526 EB_S64 sumResidual = SumResidual_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)](
1527 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1528 tuSize >> subWidthCMinus1,
1529 residual16bit->strideCr);
1530
1531 sumResidual = (ABS(sumResidual) / (tuSize >> subWidthCMinus1) / (tuSize >> subWidthCMinus1)); // Normalized based on the size. For chroma, tusize/2 +Tusize/2
1532 if (sumResidual > (1 << BIT_INCREMENT_10BIT)) {
1533 contextPtr->transCoeffShapeChroma = N2_SHAPE;
1534 }
1535 }
1536
1537 EncodeTransform(
1538 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1539 residual16bit->strideCr,
1540 ((EB_S16*)transform16bit->bufferCr) + scratchCrOffset,
1541 transform16bit->strideCr,
1542 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1543 transformScratchBuffer,
1544 BIT_INCREMENT_10BIT,
1545 EB_FALSE,
1546 contextPtr->transCoeffShapeChroma);
1547
1548
1549 {
1550 UnifiedQuantizeInvQuantize(
1551 contextPtr,
1552 lcuPtr->pictureControlSetPtr,
1553 ((EB_S16*)transform16bit->bufferCr) + scratchCrOffset,
1554 transform16bit->strideCr,
1555 ((EB_S16*)coeffSamplesTB->bufferCr) + scratchCrOffset,
1556 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1557 chromaQp,
1558 EB_10BIT,
1559 tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize,
1560 sliceType,
1561 &(countNonZeroCoeffs[2]),
1562 contextPtr->transCoeffShapeChroma,
1563 contextPtr->cleanSparseCeoffPfEncDec,
1564 contextPtr->pmpMaskingLevelEncDec,
1565 cuPtr->predictionModeFlag,
1566 useDeltaQp == EB_TRUE ? contextPtr->forceCbfFlag : 0, //Jing: double check here, not align with Cb
1567 enableContouringQCUpdateFlag,
1568 COMPONENT_CHROMA,
1569 temporalLayerIndex,
1570 0,
1571 cabacEncodeCtxPtr,
1572 contextPtr->fullLambda,
1573 intraLumaMode,
1574 EB_INTRA_CHROMA_DM,
1575 CabacCost);
1576 }
1577
1578 if ((componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) && secondChroma) {
1579
1580 tuPtr->crCbf2 = countNonZeroCoeffs[2] ? EB_TRUE : EB_FALSE;
1581 tuPtr->isOnlyDc2[1] = (countNonZeroCoeffs[2] == 1 && (((EB_S16*)residual16bit->bufferCr) + scratchCbOffset)[0] != 0) ?
1582 EB_TRUE :
1583 EB_FALSE;
1584 } else {
1585 tuPtr->crCbf = countNonZeroCoeffs[2] ? EB_TRUE : EB_FALSE;
1586
1587 if (tuSize > MIN_PU_SIZE) {
1588 tuPtr->isOnlyDc[2] = (countNonZeroCoeffs[2] == 1 && (((EB_S16*)residual16bit->bufferCr) + scratchCbOffset)[0] != 0) ?
1589 EB_TRUE :
1590 EB_FALSE;
1591 if (contextPtr->transCoeffShapeChroma && tuPtr->crCbf && tuPtr->isOnlyDc[2] == EB_FALSE) {
1592
1593 if (contextPtr->transCoeffShapeChroma == PF_N2 || contextPtr->transCoeffShapeChroma == PF_N4) {
1594 PfZeroOutUselessQuadrants(
1595 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1596 residual16bit->strideCr,
1597 (tuSize >> (1 + subWidthCMinus1)));
1598 }
1599
1600 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1601 PfZeroOutUselessQuadrants(
1602 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1603 residual16bit->strideCr,
1604 (tuSize >> (2 + subWidthCMinus1)));
1605 }
1606 }
1607 } else {
1608 if (contextPtr->transCoeffShapeChroma && tuPtr->crCbf) {
1609 PfZeroOutUselessQuadrants(
1610 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1611 residual16bit->strideCr,
1612 (tuSize >> 1));
1613
1614 if (contextPtr->transCoeffShapeChroma == PF_N4) {
1615 PfZeroOutUselessQuadrants(
1616 ((EB_S16*)residual16bit->bufferCr) + scratchCrOffset,
1617 residual16bit->strideCr,
1618 (tuSize >> 2));
1619 }
1620 }
1621 }
1622 }
1623 }
1624
1625
1626 if ((componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) && secondChroma) {
1627 tuPtr->nzCoefCount2[0] = (EB_U16)countNonZeroCoeffs[1];
1628 tuPtr->nzCoefCount2[1] = (EB_U16)countNonZeroCoeffs[2];
1629 tuPtr->transCoeffShapeChroma2 = contextPtr->transCoeffShapeChroma;
1630 } else {
1631 tuPtr->transCoeffShapeLuma = contextPtr->transCoeffShapeLuma;
1632 tuPtr->transCoeffShapeChroma = contextPtr->transCoeffShapeChroma;
1633 tuPtr->nzCoefCount[0] = (EB_U16)countNonZeroCoeffs[0];
1634 tuPtr->nzCoefCount[1] = (EB_U16)countNonZeroCoeffs[1];
1635 tuPtr->nzCoefCount[2] = (EB_U16)countNonZeroCoeffs[2];
1636 }
1637 return;
1638 }
1639
1640
1641 /**********************************************************
1642 * Encode Generate Recon
1643 *
1644 * Summary: Performs a H.265 conformant
1645 * Inverse Transform and generate
1646 * the reconstructed samples of a TU.
1647 *
1648 * Inputs:
1649 * originX
1650 * originY
1651 * tuSize
1652 * lcuSize
1653 * input - Inverse Qunatized Coeff (position sensitive)
1654 * pred - prediction samples (position independent)
1655 *
1656 * Outputs:
1657 * Recon (position independent)
1658 *
1659 **********************************************************/
EncodeGenerateRecon16bit(EncDecContext_t * contextPtr,EB_U32 originX,EB_U32 originY,EB_U32 componentMask,EB_COLOR_FORMAT colorFormat,EB_BOOL secondChroma,EB_U32 tuSize,EbPictureBufferDesc_t * predSamples,EbPictureBufferDesc_t * residual16bit,EB_S16 * transformScratchBuffer)1660 static void EncodeGenerateRecon16bit(
1661 EncDecContext_t *contextPtr,
1662 EB_U32 originX,
1663 EB_U32 originY,
1664 EB_U32 componentMask,
1665 EB_COLOR_FORMAT colorFormat,
1666 EB_BOOL secondChroma,
1667 EB_U32 tuSize,
1668 EbPictureBufferDesc_t *predSamples, // no basis/offset
1669 EbPictureBufferDesc_t *residual16bit, // no basis/offset
1670 EB_S16 *transformScratchBuffer)
1671 {
1672 EB_U32 predLumaOffset;
1673 EB_U32 predChromaOffset;
1674 EB_U32 scratchLumaOffset;
1675 EB_U32 scratchChromaOffset;
1676 EB_U32 reconLumaOffset;
1677 EB_U32 reconChromaOffset;
1678
1679 CodingUnit_t *cuPtr = contextPtr->cuPtr;
1680 TransformUnit_t *tuPtr = &cuPtr->transformUnitArray[contextPtr->tuItr];
1681
1682 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
1683 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
1684 const EB_U16 shift_bit = (tuSize == MIN_PU_SIZE) ? 0 : subWidthCMinus1;
1685 EB_U16 tuChromaOffset = 0;
1686 if (colorFormat == EB_YUV422 && secondChroma) {
1687 tuChromaOffset = tuSize >> 1;
1688 }
1689 EB_BOOL cbCbf=secondChroma?tuPtr->cbCbf2:tuPtr->cbCbf;
1690 EB_BOOL crCbf=secondChroma?tuPtr->crCbf2:tuPtr->crCbf;
1691 // *Note - The prediction is built in-place in the Recon buffer. It is overwritten with Reconstructed
1692 // samples if the CBF==1 && SKIP==False
1693
1694 //**********************************
1695 // Luma
1696 //**********************************
1697 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1698 predLumaOffset = (predSamples->originY+originY) * predSamples->strideY + (predSamples->originX+originX);
1699 scratchLumaOffset = ((originY & (63)) * 64) + (originX & (63));
1700 reconLumaOffset = (predSamples->originY + originY)* predSamples->strideY + (predSamples->originX + originX);
1701
1702 if (tuPtr->lumaCbf == EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1703 EncodeInvTransform(
1704 (tuSize==MIN_PU_SIZE)?EB_FALSE:(tuPtr->transCoeffShapeLuma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[0]),
1705 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1706 64,
1707 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1708 64,
1709 tuSize,
1710 transformScratchBuffer,
1711 BIT_INCREMENT_10BIT,
1712 (EB_BOOL)(tuSize == MIN_PU_SIZE));
1713
1714 AdditionKernel_funcPtrArray16bit[!!(ASM_TYPES & PREAVX2_MASK)](
1715 (EB_U16*)predSamples->bufferY + predLumaOffset,
1716 predSamples->strideY,
1717 ((EB_S16*)residual16bit->bufferY) + scratchLumaOffset,
1718 64,
1719 (EB_U16*)predSamples->bufferY + reconLumaOffset,
1720 predSamples->strideY,
1721 tuSize,
1722 tuSize);
1723 }
1724 }
1725
1726 //**********************************
1727 // Chroma
1728 //**********************************
1729
1730 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1731 predChromaOffset = (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCb) +
1732 ((predSamples->originX + originX) >> subWidthCMinus1);
1733 scratchChromaOffset = (((originY + tuChromaOffset) & 63) >> subHeightCMinus1) * (64 >> subWidthCMinus1) +
1734 ((originX & 63) >> subWidthCMinus1);
1735 reconChromaOffset = (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCb) +
1736 ((predSamples->originX + originX) >> subWidthCMinus1);
1737
1738 //**********************************
1739 // Cb
1740 //**********************************
1741 if (cbCbf== EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1742 EncodeInvTransform(
1743 (tuSize==MIN_PU_SIZE)?EB_FALSE:(secondChroma ? (tuPtr->transCoeffShapeChroma2 == ONLY_DC_SHAPE || tuPtr->isOnlyDc2[0]) : (tuPtr->transCoeffShapeChroma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[1])),
1744 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1745 residual16bit->strideCb,
1746 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1747 residual16bit->strideCb,
1748 tuSize >> shift_bit,
1749 transformScratchBuffer,
1750 BIT_INCREMENT_10BIT,
1751 EB_FALSE);
1752
1753 AdditionKernel_funcPtrArray16bit[!!(ASM_TYPES & PREAVX2_MASK)](
1754 (EB_U16*)predSamples->bufferCb + predChromaOffset,
1755 predSamples->strideCb,
1756 ((EB_S16*)residual16bit->bufferCb) + scratchChromaOffset,
1757 residual16bit->strideCb,
1758 (EB_U16*)predSamples->bufferCb + reconChromaOffset,
1759 predSamples->strideCb,
1760 tuSize >> shift_bit,
1761 tuSize >> shift_bit);
1762 }
1763
1764 //**********************************
1765 // Cr
1766 //**********************************
1767 predChromaOffset = (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCr) +
1768 ((predSamples->originX + originX) >> subWidthCMinus1);
1769 scratchChromaOffset = (((originY + tuChromaOffset) & 63) >> subHeightCMinus1) * (64 >> subWidthCMinus1) +
1770 ((originX & 63) >> subWidthCMinus1);
1771 reconChromaOffset = (((predSamples->originY + originY + tuChromaOffset) >> subHeightCMinus1) * predSamples->strideCr) +
1772 ((predSamples->originX + originX) >> subWidthCMinus1);
1773
1774 if (crCbf == EB_TRUE && cuPtr->skipFlag == EB_FALSE) {
1775 EncodeInvTransform(
1776 (tuSize==MIN_PU_SIZE)?EB_FALSE:(secondChroma ? (tuPtr->transCoeffShapeChroma2 == ONLY_DC_SHAPE || tuPtr->isOnlyDc2[1]) : (tuPtr->transCoeffShapeChroma == ONLY_DC_SHAPE || tuPtr->isOnlyDc[2])),
1777 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1778 residual16bit->strideCr,
1779 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1780 residual16bit->strideCr,
1781 tuSize >> shift_bit,
1782 transformScratchBuffer,
1783 BIT_INCREMENT_10BIT,
1784 EB_FALSE);
1785
1786 AdditionKernel_funcPtrArray16bit[!!(ASM_TYPES & PREAVX2_MASK)](
1787 (EB_U16*)predSamples->bufferCr + predChromaOffset,
1788 predSamples->strideCr,
1789 ((EB_S16*)residual16bit->bufferCr) + scratchChromaOffset,
1790 residual16bit->strideCr,
1791 (EB_U16*)predSamples->bufferCr + reconChromaOffset,
1792 predSamples->strideCr,
1793 tuSize >> shift_bit,
1794 tuSize >> shift_bit);
1795 }
1796 }
1797
1798 return;
1799 }
1800
1801 static EB_ENCODE_LOOP_FUNC_PTR EncodeLoopFunctionTable[2] =
1802 {
1803 EncodeLoop,
1804 EncodeLoop16bit
1805 };
1806
1807 EB_GENERATE_RECON_FUNC_PTR EncodeGenerateReconFunctionPtr[2] =
1808 {
1809 EncodeGenerateRecon,
1810 EncodeGenerateRecon16bit
1811 };
1812
1813
1814 EB_GENERATE_INTRA_SAMPLES_FUNC_PTR GenerateIntraReferenceSamplesFuncTable[2] =
1815 {
1816 GenerateIntraReferenceSamplesEncodePass,
1817 GenerateIntraReference16bitSamplesEncodePass
1818 };
1819
1820 EB_GENERATE_LUMA_INTRA_SAMPLES_FUNC_PTR GenerateLumaIntraReferenceSamplesFuncTable[2] =
1821 {
1822 GenerateLumaIntraReferenceSamplesEncodePass,
1823 GenerateLumaIntraReference16bitSamplesEncodePass
1824 };
1825
1826 EB_GENERATE_CHROMA_INTRA_SAMPLES_FUNC_PTR GenerateChromaIntraReferenceSamplesFuncTable[2] =
1827 {
1828 GenerateChromaIntraReferenceSamplesEncodePass,
1829 GenerateChromaIntraReference16bitSamplesEncodePass
1830 };
1831
1832 EB_ENC_PASS_INTRA_FUNC_PTR EncodePassIntraPredictionFuncTable[2] =
1833 {
1834 EncodePassIntraPrediction,
1835 EncodePassIntraPrediction16bit
1836 };
1837
1838 EB_LCU_INTERNAL_DLF_FUNC_PTR LcuInternalAreaDLFCoreFuncTable[2] =
1839 {
1840 LCUInternalAreaDLFCore,
1841 LCUInternalAreaDLFCore16bit
1842 };
1843
1844 EB_LCU_BOUNDARY_DLF_FUNC_PTR LcuBoundaryDLFCoreFuncTable[2] =
1845 {
1846 LCUBoundaryDLFCore,
1847 LCUBoundaryDLFCore16bit
1848 };
1849
1850 EB_LCU_PIC_EDGE_DLF_FUNC_PTR LcuPicEdgeDLFCoreFuncTable[2] =
1851 {
1852 LCUPictureEdgeDLFCore,
1853 LCUPictureEdgeDLFCore16bit
1854 };
1855
1856
1857
1858 /*************************************************
1859 * Encode Pass Motion Vector Prediction
1860 *************************************************/
EncodePassMvPrediction(SequenceControlSet_t * sequenceControlSetPtr,PictureControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex,EncDecContext_t * contextPtr)1861 static void EncodePassMvPrediction(
1862 SequenceControlSet_t *sequenceControlSetPtr,
1863 PictureControlSet_t *pictureControlSetPtr,
1864 EB_U32 lcuIndex,
1865 EncDecContext_t *contextPtr)
1866 {
1867 // AMVP Signaled, or we failed to find a Merge MV match
1868 if (contextPtr->cuPtr->predictionUnitArray->mergeFlag == EB_FALSE)
1869 {
1870 EB_U64 mvdBitsIdx0;
1871 EB_U64 mvdBitsIdx1;
1872 EB_S32 xMvdIdx0;
1873 EB_S32 yMvdIdx0;
1874 EB_S32 xMvdIdx1;
1875 EB_S32 yMvdIdx1;
1876
1877 contextPtr->cuPtr->predictionUnitArray->mergeFlag = EB_FALSE;
1878 contextPtr->cuPtr->skipFlag = EB_FALSE;
1879
1880 // Generate AMVP List
1881 if (contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == UNI_PRED_LIST_0 ||
1882 contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == BI_PRED)
1883 {
1884 FillAMVPCandidates(
1885 pictureControlSetPtr->epMvNeighborArray[contextPtr->encDecTileIndex],
1886 pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->encDecTileIndex],
1887 contextPtr->cuOriginX,
1888 contextPtr->cuOriginY,
1889 contextPtr->cuStats->size,
1890 contextPtr->cuStats->size,
1891 contextPtr->cuStats->size,
1892 contextPtr->cuStats->depth,
1893 sequenceControlSetPtr->lcuSize,
1894 pictureControlSetPtr,
1895 pictureControlSetPtr->ParentPcsPtr->disableTmvpFlag ? EB_FALSE : EB_TRUE,
1896 lcuIndex,
1897 REF_LIST_0,
1898 contextPtr->xMvAmvpCandidateArrayList0,
1899 contextPtr->yMvAmvpCandidateArrayList0,
1900 &contextPtr->amvpCandidateCountRefList0);
1901
1902 xMvdIdx0 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_0].x - contextPtr->xMvAmvpCandidateArrayList0[0]);
1903 yMvdIdx0 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_0].y - contextPtr->yMvAmvpCandidateArrayList0[0]);
1904 EbHevcGetMvdFractionBits(xMvdIdx0, yMvdIdx0, contextPtr->mdRateEstimationPtr, &mvdBitsIdx0);
1905
1906 if (contextPtr->amvpCandidateCountRefList0 > 1) {
1907 xMvdIdx1 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_0].x - contextPtr->xMvAmvpCandidateArrayList0[1]);
1908 yMvdIdx1 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_0].y - contextPtr->yMvAmvpCandidateArrayList0[1]);
1909 EbHevcGetMvdFractionBits(xMvdIdx1, yMvdIdx1, contextPtr->mdRateEstimationPtr, &mvdBitsIdx1);
1910
1911 // Assign the AMVP predictor index
1912 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].predIdx = (mvdBitsIdx1 < mvdBitsIdx0);
1913
1914 // Assign the MV Predictor
1915 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].mvdX = contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].predIdx ? xMvdIdx1 : xMvdIdx0;
1916 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].mvdY = contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].predIdx ? yMvdIdx1 : yMvdIdx0;
1917 }
1918 else {
1919 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].predIdx = 0;
1920
1921 // Assign the MV Predictor
1922 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].mvdX = xMvdIdx0;
1923 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_0].mvdY = yMvdIdx0;
1924 }
1925 }
1926
1927 // Generate AMVP List
1928 if (contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == UNI_PRED_LIST_1 ||
1929 contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == BI_PRED)
1930 {
1931 FillAMVPCandidates(
1932 pictureControlSetPtr->epMvNeighborArray[contextPtr->encDecTileIndex],
1933 pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->encDecTileIndex],
1934 contextPtr->cuOriginX,
1935 contextPtr->cuOriginY,
1936 contextPtr->cuStats->size,
1937 contextPtr->cuStats->size,
1938 contextPtr->cuStats->size,
1939 contextPtr->cuStats->depth,
1940 sequenceControlSetPtr->lcuSize,
1941 pictureControlSetPtr,
1942 pictureControlSetPtr->ParentPcsPtr->disableTmvpFlag ? EB_FALSE : EB_TRUE,
1943 lcuIndex,
1944 REF_LIST_1,
1945 contextPtr->xMvAmvpCandidateArrayList1,
1946 contextPtr->yMvAmvpCandidateArrayList1,
1947 &contextPtr->amvpCandidateCountRefList1);
1948
1949 // Assign the MV Predictor
1950 xMvdIdx0 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_1].x - contextPtr->xMvAmvpCandidateArrayList1[0]);
1951 yMvdIdx0 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_1].y - contextPtr->yMvAmvpCandidateArrayList1[0]);
1952 EbHevcGetMvdFractionBits(xMvdIdx0, yMvdIdx0, contextPtr->mdRateEstimationPtr, &mvdBitsIdx0);
1953
1954 if (contextPtr->amvpCandidateCountRefList1 > 1) {
1955 xMvdIdx1 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_1].x - contextPtr->xMvAmvpCandidateArrayList1[1]);
1956 yMvdIdx1 = (contextPtr->cuPtr->predictionUnitArray->mv[REF_LIST_1].y - contextPtr->yMvAmvpCandidateArrayList1[1]);
1957 EbHevcGetMvdFractionBits(xMvdIdx1, yMvdIdx1, contextPtr->mdRateEstimationPtr, &mvdBitsIdx1);
1958
1959 // Assign the AMVP predictor index
1960 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].predIdx = (mvdBitsIdx1 < mvdBitsIdx0);
1961 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].mvdX = contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].predIdx ? xMvdIdx1 : xMvdIdx0;
1962 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].mvdY = contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].predIdx ? yMvdIdx1 : yMvdIdx0;
1963 }
1964 else {
1965 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].predIdx = 0;
1966 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].mvdX = xMvdIdx0;
1967 contextPtr->cuPtr->predictionUnitArray->mvd[REF_LIST_1].mvdY = yMvdIdx0;
1968 }
1969
1970 // Assign the MV Predictor
1971
1972 }
1973
1974
1975 }
1976
1977 return;
1978 }
1979
1980 /*******************************************
1981 * Encode Pass - Assign Delta Qp
1982 *******************************************/
EncodePassUpdateQp(PictureControlSet_t * pictureControlSetPtr,EncDecContext_t * contextPtr,EB_BOOL availableCoeff,EB_BOOL isDeltaQpEnable,EB_BOOL * isDeltaQpNotCoded,EB_U32 difCuDeltaQpDepth,EB_U8 * prevCodedQp,EB_U8 * prevQuantGroupCodedQp,EB_U32 tileOriginX,EB_U32 tileOriginY,EB_U32 lcuQp)1983 static void EncodePassUpdateQp(
1984 PictureControlSet_t *pictureControlSetPtr,
1985 EncDecContext_t *contextPtr,
1986 EB_BOOL availableCoeff,
1987 EB_BOOL isDeltaQpEnable,
1988 EB_BOOL *isDeltaQpNotCoded,
1989 EB_U32 difCuDeltaQpDepth,
1990 EB_U8 *prevCodedQp,
1991 EB_U8 *prevQuantGroupCodedQp,
1992 EB_U32 tileOriginX,
1993 EB_U32 tileOriginY,
1994 EB_U32 lcuQp
1995 )
1996 {
1997
1998 EB_U32 refQp;
1999 EB_U8 qp;
2000
2001 EB_U32 log2MinCuQpDeltaSize = LOG2F_MAX_LCU_SIZE - difCuDeltaQpDepth;
2002 EB_S32 qpTopNeighbor = 0;
2003 EB_S32 qpLeftNeighbor = 0;
2004 EB_BOOL newQuantGroup;
2005 EB_U32 quantGroupX = contextPtr->cuOriginX - (contextPtr->cuOriginX & ((1 << log2MinCuQpDeltaSize) - 1));
2006 EB_U32 quantGroupY = contextPtr->cuOriginY - (contextPtr->cuOriginY & ((1 << log2MinCuQpDeltaSize) - 1));
2007 EB_BOOL sameLcuCheckTop = (((quantGroupY - 1) >> LOG2F_MAX_LCU_SIZE) == ((quantGroupY) >> LOG2F_MAX_LCU_SIZE)) ? EB_TRUE : EB_FALSE;
2008 EB_BOOL sameLcuCheckLeft = (((quantGroupX - 1) >> LOG2F_MAX_LCU_SIZE) == ((quantGroupX) >> LOG2F_MAX_LCU_SIZE)) ? EB_TRUE : EB_FALSE;
2009 // Neighbor Array
2010 EB_U32 qpLeftNeighborIndex = 0;
2011 EB_U32 qpTopNeighborIndex = 0;
2012
2013 // CU larger than the quantization group
2014 if (Log2f(contextPtr->cuStats->size) >= log2MinCuQpDeltaSize){
2015 *isDeltaQpNotCoded = EB_TRUE;
2016 }
2017
2018 // At the beginning of a new quantization group
2019 if (((contextPtr->cuOriginX & ((1 << log2MinCuQpDeltaSize) - 1)) == 0) &&
2020 ((contextPtr->cuOriginY & ((1 << log2MinCuQpDeltaSize) - 1)) == 0))
2021 {
2022 *isDeltaQpNotCoded = EB_TRUE;
2023 newQuantGroup = EB_TRUE;
2024 }
2025 else {
2026 newQuantGroup = EB_FALSE;
2027 }
2028
2029 // setting the previous Quantization Group QP
2030 if (newQuantGroup == EB_TRUE) {
2031 *prevCodedQp = *prevQuantGroupCodedQp;
2032 }
2033
2034 if ((quantGroupY > tileOriginY) && sameLcuCheckTop) {
2035 qpTopNeighborIndex =
2036 LUMA_SAMPLE_PIC_WISE_LOCATION_TO_QP_ARRAY_IDX(
2037 quantGroupX,
2038 quantGroupY - 1,
2039 pictureControlSetPtr->qpArrayStride);
2040 qpTopNeighbor = pictureControlSetPtr->qpArray[qpTopNeighborIndex];
2041 }
2042 else {
2043 qpTopNeighbor = *prevCodedQp;
2044 }
2045
2046 if ((quantGroupX > tileOriginX) && sameLcuCheckLeft) {
2047 qpLeftNeighborIndex =
2048 LUMA_SAMPLE_PIC_WISE_LOCATION_TO_QP_ARRAY_IDX(
2049 quantGroupX - 1,
2050 quantGroupY,
2051 pictureControlSetPtr->qpArrayStride);
2052
2053 qpLeftNeighbor = pictureControlSetPtr->qpArray[qpLeftNeighborIndex];
2054 }
2055 else {
2056 qpLeftNeighbor = *prevCodedQp;
2057 }
2058
2059 refQp = (qpLeftNeighbor + qpTopNeighbor + 1) >> 1;
2060
2061 qp = (EB_U8)contextPtr->cuPtr->qp;
2062 // Update the State info
2063 if (isDeltaQpEnable) {
2064 if (*isDeltaQpNotCoded) {
2065 if (availableCoeff){
2066 qp = (EB_U8)contextPtr->cuPtr->qp;
2067 *prevCodedQp = qp;
2068 *prevQuantGroupCodedQp = qp;
2069 *isDeltaQpNotCoded = EB_FALSE;
2070 }
2071 else{
2072 qp = (EB_U8)refQp;
2073 *prevQuantGroupCodedQp = qp;
2074 }
2075 }
2076 }
2077 else{
2078 qp = (EB_U8)lcuQp;
2079 }
2080 contextPtr->cuPtr->qp = qp;
2081 return;
2082 }
2083
SetPmEncDecMode(PictureControlSet_t * pictureControlSetPtr,EncDecContext_t * contextPtr,EB_U32 lcuIndex,EB_U8 stationaryEdgeOverTimeFlag,EB_U8 pmStationaryEdgeOverTimeFlag)2084 void SetPmEncDecMode(
2085 PictureControlSet_t *pictureControlSetPtr,
2086 EncDecContext_t *contextPtr,
2087 EB_U32 lcuIndex,
2088 EB_U8 stationaryEdgeOverTimeFlag,
2089 EB_U8 pmStationaryEdgeOverTimeFlag){
2090
2091
2092
2093 SequenceControlSet_t *sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->ParentPcsPtr->sequenceControlSetWrapperPtr->objectPtr;
2094
2095
2096
2097 contextPtr->cleanSparseCeoffPfEncDec = 0;
2098
2099 contextPtr->pmpMaskingLevelEncDec = 0;
2100
2101 EB_BOOL pmSensitiveUncoveredBackground = EB_FALSE;
2102 // Derived for REF P & B & kept false otherwise (for temporal distance equal to 1 uncovered area are easier to handle)
2103 if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
2104 if (pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag) {
2105 EbReferenceObject_t * refObjL0;
2106 refObjL0 = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr;
2107 pmSensitiveUncoveredBackground = ((pictureControlSetPtr->ParentPcsPtr->failingMotionLcuFlag[lcuIndex] || contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) && (pictureControlSetPtr->ParentPcsPtr->nonMovingIndexArray[lcuIndex] < PM_NON_MOVING_INDEX_TH || refObjL0->nonMovingIndexArray[lcuIndex] < PM_NON_MOVING_INDEX_TH));
2108 }
2109 }
2110
2111 EB_BOOL pmSensitiveComplexArea = EB_FALSE;
2112 // Derived for all frames
2113 pmSensitiveComplexArea = pictureControlSetPtr->highIntraSlection && pictureControlSetPtr->ParentPcsPtr->complexLcuArray[lcuIndex] == LCU_COMPLEXITY_STATUS_1;
2114
2115
2116 EB_BOOL pmSensitiveSkinArea = EB_FALSE;
2117 LcuStat_t *lcuStatPtr = &(pictureControlSetPtr->ParentPcsPtr->lcuStatArray[lcuIndex]);
2118 if (pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_1) {
2119 if (lcuStatPtr->cuStatArray[0].skinArea) {
2120 pmSensitiveSkinArea = EB_TRUE;
2121 }
2122 }
2123
2124 EB_BOOL pmSensitiveCmplxContrastArea = EB_FALSE;
2125 if (pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_2) {
2126 if (pictureControlSetPtr->ParentPcsPtr->lcuCmplxContrastArray[lcuIndex]) {
2127 pmSensitiveCmplxContrastArea = EB_TRUE;
2128 }
2129 }
2130
2131 if (sequenceControlSetPtr->staticConfig.bitRateReduction == EB_TRUE && !contextPtr->forceCbfFlag && !((pictureControlSetPtr->sliceType == EB_I_PICTURE && contextPtr->cuStats->size == 8) || stationaryEdgeOverTimeFlag || pmSensitiveSkinArea || pmSensitiveCmplxContrastArea)) {
2132
2133 if (sequenceControlSetPtr->inputResolution == INPUT_SIZE_4K_RANGE) {
2134
2135 if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
2136
2137 if (stationaryEdgeOverTimeFlag || pictureControlSetPtr->ParentPcsPtr->logoPicFlag || pmSensitiveUncoveredBackground || pmSensitiveComplexArea) {
2138
2139 contextPtr->pmpMaskingLevelEncDec = 1;
2140 }
2141 else if (pmStationaryEdgeOverTimeFlag){
2142 contextPtr->pmpMaskingLevelEncDec = 2;
2143 }
2144 else
2145 {
2146 if (pictureControlSetPtr->temporalLayerIndex == 0) {
2147
2148 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2149 contextPtr->pmpMaskingLevelEncDec = 2;
2150 }
2151 else{
2152 contextPtr->pmpMaskingLevelEncDec = 3;
2153 }
2154 }
2155 else {
2156 contextPtr->cleanSparseCeoffPfEncDec = 1;
2157 if (pictureControlSetPtr->ParentPcsPtr->highDarkLowLightAreaDensityFlag && pictureControlSetPtr->ParentPcsPtr->sharpEdgeLcuFlag[lcuIndex] && !pictureControlSetPtr->ParentPcsPtr->similarColocatedLcuArrayAllLayers[lcuIndex]){
2158 contextPtr->pmpMaskingLevelEncDec = 2;
2159 }
2160 else
2161 {
2162
2163 if (pictureControlSetPtr->temporalLayerIndex == 3) {
2164 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2165 contextPtr->pmpMaskingLevelEncDec = 6;
2166 }
2167 else{
2168 contextPtr->pmpMaskingLevelEncDec = 7;
2169 }
2170 }
2171 else{
2172 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2173 contextPtr->pmpMaskingLevelEncDec = 4;
2174 }
2175 else{
2176 contextPtr->pmpMaskingLevelEncDec = 5;
2177 }
2178 }
2179 }
2180 }
2181 }
2182
2183 }
2184 else{
2185 if (stationaryEdgeOverTimeFlag == 0 && !pictureControlSetPtr->ParentPcsPtr->logoPicFlag)
2186 {
2187 contextPtr->pmpMaskingLevelEncDec = 1;
2188 }
2189 }
2190
2191
2192 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE && contextPtr->cuStats->size == 32 && contextPtr->cuPtr->predictionUnitArray->intraLumaMode != EB_INTRA_DC && contextPtr->cuPtr->predictionUnitArray->intraLumaMode != EB_INTRA_PLANAR) {
2193 contextPtr->pmpMaskingLevelEncDec = 0;
2194 }
2195
2196
2197
2198 if (pictureControlSetPtr->sliceType == EB_P_PICTURE) {
2199 contextPtr->pmpMaskingLevelEncDec = 1;
2200 }
2201
2202
2203 }
2204 else{
2205
2206 if (stationaryEdgeOverTimeFlag == 0 && !pictureControlSetPtr->ParentPcsPtr->logoPicFlag)
2207 {
2208
2209 if (pictureControlSetPtr->temporalLayerIndex > 0 && !pmSensitiveUncoveredBackground && !pmSensitiveComplexArea) {
2210 contextPtr->cleanSparseCeoffPfEncDec = 1;
2211 }
2212 if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
2213 {
2214 if (stationaryEdgeOverTimeFlag || pictureControlSetPtr->ParentPcsPtr->logoPicFlag){
2215 contextPtr->pmpMaskingLevelEncDec = 0;
2216 }
2217 if (pmSensitiveUncoveredBackground || pmSensitiveComplexArea) {
2218 contextPtr->pmpMaskingLevelEncDec = 1;
2219 }
2220 else
2221 {
2222
2223 if (pictureControlSetPtr->temporalLayerIndex == 0) {
2224
2225 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2226 contextPtr->pmpMaskingLevelEncDec = 2;
2227 }
2228 else{
2229 contextPtr->pmpMaskingLevelEncDec = 3;
2230 }
2231 }
2232 else {
2233
2234 if (pictureControlSetPtr->temporalLayerIndex == 3) {
2235 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2236 contextPtr->pmpMaskingLevelEncDec = 6;
2237 }
2238 else{
2239 contextPtr->pmpMaskingLevelEncDec = 7;
2240 }
2241 }
2242 else{
2243 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE) {
2244 contextPtr->pmpMaskingLevelEncDec = 4;
2245 }
2246 else{
2247 contextPtr->pmpMaskingLevelEncDec = 5;
2248 }
2249 }
2250 }
2251
2252
2253
2254 if (contextPtr->cuPtr->predictionModeFlag == INTRA_MODE && contextPtr->cuStats->size == 32 && contextPtr->cuPtr->predictionUnitArray->intraLumaMode != EB_INTRA_DC && contextPtr->cuPtr->predictionUnitArray->intraLumaMode != EB_INTRA_PLANAR) {
2255 contextPtr->pmpMaskingLevelEncDec = 0;
2256 }
2257
2258 if (pictureControlSetPtr->sliceType == EB_P_PICTURE) {
2259 contextPtr->pmpMaskingLevelEncDec = 1;
2260 }
2261
2262
2263 }
2264 }
2265 }
2266 else{
2267
2268 contextPtr->pmpMaskingLevelEncDec = 0;
2269
2270 }
2271 }
2272
2273 }
2274 }
2275
2276 if (pictureControlSetPtr->ParentPcsPtr->segmentOvArray != NULL && sequenceControlSetPtr->staticConfig.segmentOvEnabled) {
2277 SegmentOverride_t *segmentOvPtr = pictureControlSetPtr->ParentPcsPtr->segmentOvArray;
2278 if (segmentOvPtr[lcuIndex].ovFlags & EB_TU_FILTER_OV)
2279 contextPtr->pmpMaskingLevelEncDec = CLIP3(0, 7, contextPtr->pmpMaskingLevelEncDec + segmentOvPtr[lcuIndex].filterOv);
2280 }
2281
2282 }
2283
2284
Pack2DBlock(EncDecContext_t * contextPtr,EbPictureBufferDesc_t * inputPicture,EB_U32 originX,EB_U32 originY,EB_U32 width,EB_U32 height)2285 void Pack2DBlock(
2286 EncDecContext_t *contextPtr,
2287 EbPictureBufferDesc_t *inputPicture,
2288 EB_U32 originX,
2289 EB_U32 originY,
2290 EB_U32 width,
2291 EB_U32 height) {
2292
2293 const EB_U32 inputLumaOffset = ((originY + inputPicture->originY) * inputPicture->strideY) + (originX + inputPicture->originX);
2294 const EB_U32 inputBitIncLumaOffset = ((originY + inputPicture->originY) * inputPicture->strideBitIncY) + (originX + inputPicture->originX);
2295 const EB_U32 inputCbOffset = (((originY + inputPicture->originY) >> 1) * inputPicture->strideCb) + ((originX + inputPicture->originX) >> 1);
2296 const EB_U32 inputBitIncCbOffset = (((originY + inputPicture->originY) >> 1) * inputPicture->strideBitIncCb) + ((originX + inputPicture->originX) >> 1);
2297 const EB_U32 inputCrOffset = (((originY + inputPicture->originY) >> 1) * inputPicture->strideCr) + ((originX + inputPicture->originX) >> 1);
2298 const EB_U32 inputBitIncCrOffset = (((originY + inputPicture->originY) >> 1) * inputPicture->strideBitIncCr) + ((originX + inputPicture->originX) >> 1);
2299
2300
2301 const EB_U32 blockLumaOffset = ((originY % 64) * contextPtr->inputSample16bitBuffer->strideY) + (originX % 64);
2302 const EB_U32 blockCbOffset = (((originY % 64) >> 1) * contextPtr->inputSample16bitBuffer->strideCb) + ((originX % 64) >> 1);
2303 const EB_U32 blockCrOffset = (((originY % 64) >> 1) * contextPtr->inputSample16bitBuffer->strideCr) + ((originX % 64) >> 1);
2304
2305 {
2306 Pack2D_SRC(
2307 inputPicture->bufferY + inputLumaOffset,
2308 inputPicture->strideY,
2309 inputPicture->bufferBitIncY + inputBitIncLumaOffset,
2310 inputPicture->strideBitIncY,
2311 ((EB_U16 *)(contextPtr->inputSample16bitBuffer->bufferY)) + blockLumaOffset,
2312 contextPtr->inputSample16bitBuffer->strideY,
2313 width,
2314 height); //this should be depending on a configuration param
2315
2316 Pack2D_SRC(
2317 inputPicture->bufferCb + inputCbOffset,
2318 inputPicture->strideCr,
2319 inputPicture->bufferBitIncCb + inputBitIncCbOffset,
2320 inputPicture->strideBitIncCr,
2321 ((EB_U16 *)(contextPtr->inputSample16bitBuffer->bufferCb)) + blockCbOffset,
2322 contextPtr->inputSample16bitBuffer->strideCb,
2323 width >> 1,
2324 height >> 1); //this should be depending on a configuration param
2325
2326 Pack2D_SRC(
2327 inputPicture->bufferCr + inputCrOffset,
2328 inputPicture->strideCr,
2329 inputPicture->bufferBitIncCr + inputBitIncCrOffset,
2330 inputPicture->strideBitIncCr,
2331 ((EB_U16 *)(contextPtr->inputSample16bitBuffer->bufferCr)) + blockCrOffset,
2332 contextPtr->inputSample16bitBuffer->strideCr,
2333 width >> 1,
2334 height >> 1); //this should be depending on a configuration param
2335
2336 }
2337
2338 }
2339
QpmDeriveBeaAndSkipQpmFlagLcu(SequenceControlSet_t * sequenceControlSetPtr,PictureControlSet_t * pictureControlSetPtr,LargestCodingUnit_t * lcuPtr,EB_U32 lcuIndex,EncDecContext_t * contextPtr)2340 EB_ERRORTYPE QpmDeriveBeaAndSkipQpmFlagLcu(
2341 SequenceControlSet_t *sequenceControlSetPtr,
2342 PictureControlSet_t *pictureControlSetPtr,
2343 LargestCodingUnit_t *lcuPtr,
2344 EB_U32 lcuIndex,
2345 EncDecContext_t *contextPtr)
2346 {
2347
2348 EB_ERRORTYPE return_error = EB_ErrorNone;
2349 EB_S8 pictureQp = pictureControlSetPtr->pictureQp;
2350 EB_U8 minQpAllowed = (EB_U8)sequenceControlSetPtr->staticConfig.minQpAllowed;
2351 EB_U8 maxQpAllowed = (EB_U8)sequenceControlSetPtr->staticConfig.maxQpAllowed;
2352
2353
2354 if (sequenceControlSetPtr->staticConfig.segmentOvEnabled && pictureControlSetPtr->ParentPcsPtr->segmentOvArray != NULL) {
2355 SegmentOverride_t *segmentOvPtr = pictureControlSetPtr->ParentPcsPtr->segmentOvArray;
2356 if (segmentOvPtr[lcuIndex].ovFlags & EB_QP_OV_DIRECT)
2357 pictureQp = segmentOvPtr[lcuIndex].qpOv;
2358 else if (segmentOvPtr[lcuIndex].ovFlags & EB_QP_OV_DELTA)
2359 pictureQp += segmentOvPtr[lcuIndex].qpOv;
2360 }
2361 contextPtr->qpmQp = CLIP3(minQpAllowed, maxQpAllowed, pictureQp);
2362
2363 LcuStat_t *lcuStatPtr = &(pictureControlSetPtr->ParentPcsPtr->lcuStatArray[lcuIndex]);
2364
2365
2366 contextPtr->nonMovingDeltaQp = 0;
2367
2368 contextPtr->grassEnhancementFlag = ((pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_1) && (lcuStatPtr->cuStatArray[0].grassArea)
2369 && (lcuPtr->pictureControlSetPtr->ParentPcsPtr->edgeResultsPtr[lcuIndex].edgeBlockNum > 0))
2370 ? EB_TRUE : EB_FALSE;
2371
2372 contextPtr->backgorundEnhancement = EB_FALSE;
2373
2374
2375 contextPtr->backgorundEnhancement = EB_FALSE;
2376
2377 contextPtr->skipQpmFlag = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) ? EB_FALSE : EB_TRUE;
2378
2379 if ((pictureControlSetPtr->ParentPcsPtr->logoPicFlag == EB_FALSE) && ((pictureControlSetPtr->ParentPcsPtr->picNoiseClass >= PIC_NOISE_CLASS_3_1) || (pictureControlSetPtr->ParentPcsPtr->highDarkLowLightAreaDensityFlag) || (pictureControlSetPtr->ParentPcsPtr->intraCodedBlockProbability > 90))){
2380 contextPtr->skipQpmFlag = EB_TRUE;
2381 }
2382
2383 if (contextPtr->skipQpmFlag == EB_FALSE) {
2384 if (pictureControlSetPtr->ParentPcsPtr->picHomogenousOverTimeLcuPercentage > 30 && pictureControlSetPtr->sliceType != EB_I_PICTURE){
2385 contextPtr->qpmQp = CLIP3(minQpAllowed, maxQpAllowed, pictureQp + 1);
2386 }
2387 }
2388
2389 return return_error;
2390 }
2391
EncQpmDeriveDeltaQPForEachLeafLcu(SequenceControlSet_t * sequenceControlSetPtr,PictureControlSet_t * pictureControlSetPtr,LargestCodingUnit_t * lcuPtr,EB_U32 lcuIndex,CodingUnit_t * cuPtr,EB_U32 cuDepth,EB_U32 cuIndex,EB_U32 cuSize,EB_U8 type,EB_U8 parent32x32Index,EncDecContext_t * contextPtr)2392 EB_ERRORTYPE EncQpmDeriveDeltaQPForEachLeafLcu(
2393 SequenceControlSet_t *sequenceControlSetPtr,
2394 PictureControlSet_t *pictureControlSetPtr,
2395 LargestCodingUnit_t *lcuPtr,
2396 EB_U32 lcuIndex,
2397 CodingUnit_t *cuPtr,
2398 EB_U32 cuDepth,
2399 EB_U32 cuIndex,
2400 EB_U32 cuSize,
2401 EB_U8 type,
2402 EB_U8 parent32x32Index,
2403 EncDecContext_t *contextPtr)
2404 {
2405 EB_ERRORTYPE return_error = EB_ErrorNone;
2406
2407
2408 //LcuParams_t lcuParams;
2409 EB_S64 complexityDistance;
2410 EB_S8 deltaQp = 0;
2411 EB_U8 qpmQp = contextPtr->qpmQp;
2412 EB_U8 minQpAllowed = (EB_U8)sequenceControlSetPtr->staticConfig.minQpAllowed;
2413 EB_U8 maxQpAllowed = (EB_U8)sequenceControlSetPtr->staticConfig.maxQpAllowed;
2414 EB_S16 cuQP;
2415
2416 EB_BOOL skipOis8x8 = (pictureControlSetPtr->ParentPcsPtr->skipOis8x8 && cuSize == 8);
2417
2418 EB_U32 usedDepth = cuDepth;
2419 if (skipOis8x8)
2420 usedDepth = 2;
2421
2422 EB_U32 cuIndexInRaterScan = MD_SCAN_TO_RASTER_SCAN[cuIndex];
2423
2424 EB_BOOL acEnergyBasedAntiContouring = pictureControlSetPtr->sliceType == EB_I_PICTURE ? EB_TRUE : EB_FALSE;
2425 EB_U8 lowerQPClass;
2426
2427 EB_S8 nonMovingDeltaQp = contextPtr->nonMovingDeltaQp;
2428
2429 EB_S8 bea64x64DeltaQp;
2430
2431 cuQP = qpmQp;
2432 cuPtr->qp = qpmQp;
2433
2434 EB_U32 distortion = 0;
2435
2436 if (!contextPtr->skipQpmFlag){
2437
2438 // INTRA MODE
2439 if (type == INTRA_MODE){
2440
2441 OisCu32Cu16Results_t *oisCu32Cu16ResultsPtr = pictureControlSetPtr->ParentPcsPtr->oisCu32Cu16Results[lcuIndex];
2442 OisCu8Results_t *oisCu8ResultsPtr = pictureControlSetPtr->ParentPcsPtr->oisCu8Results[lcuIndex];
2443
2444 if (cuSize > 32){
2445 distortion =
2446 oisCu32Cu16ResultsPtr->sortedOisCandidate[1][0].distortion +
2447 oisCu32Cu16ResultsPtr->sortedOisCandidate[2][0].distortion +
2448 oisCu32Cu16ResultsPtr->sortedOisCandidate[3][0].distortion +
2449 oisCu32Cu16ResultsPtr->sortedOisCandidate[4][0].distortion;
2450 }
2451 else if (cuSize == 32) {
2452 const EB_U32 me2Nx2NTableOffset = contextPtr->cuStats->cuNumInDepth + me2Nx2NOffset[contextPtr->cuStats->depth];
2453 distortion = oisCu32Cu16ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].distortion;
2454 }
2455 else{
2456 if (cuSize > 8){
2457 const EB_U32 me2Nx2NTableOffset = contextPtr->cuStats->cuNumInDepth + me2Nx2NOffset[contextPtr->cuStats->depth];
2458 distortion = oisCu32Cu16ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].distortion;
2459 }
2460 else{
2461 if (skipOis8x8){
2462
2463 const CodedUnitStats_t *cuStats = GetCodedUnitStats(ParentBlockIndex[cuIndex]);
2464 const EB_U32 me2Nx2NTableOffset = cuStats->cuNumInDepth + me2Nx2NOffset[cuStats->depth];
2465
2466 distortion = oisCu32Cu16ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].distortion;
2467 }
2468 else {
2469
2470 const EB_U32 me2Nx2NTableOffset = contextPtr->cuStats->cuNumInDepth;
2471
2472 if (oisCu8ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].validDistortion){
2473 distortion = oisCu8ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].distortion;
2474 }
2475 else{
2476
2477 const CodedUnitStats_t *cuStats = GetCodedUnitStats(ParentBlockIndex[cuIndex]);
2478 const EB_U32 me2Nx2NTableOffset = cuStats->cuNumInDepth + me2Nx2NOffset[cuStats->depth];
2479
2480 if (oisCu32Cu16ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].validDistortion){
2481 distortion = oisCu32Cu16ResultsPtr->sortedOisCandidate[me2Nx2NTableOffset][0].distortion;
2482 }
2483 else {
2484 distortion = 0;
2485 }
2486 }
2487
2488 }
2489 }
2490 }
2491
2492
2493 distortion = (EB_U32)CLIP3(pictureControlSetPtr->ParentPcsPtr->intraComplexityMin[usedDepth], pictureControlSetPtr->ParentPcsPtr->intraComplexityMax[usedDepth], distortion);
2494 complexityDistance = ((EB_S32)distortion - (EB_S32)pictureControlSetPtr->ParentPcsPtr->intraComplexityAvg[usedDepth]);
2495
2496 if (complexityDistance < 0){
2497
2498 deltaQp = (pictureControlSetPtr->ParentPcsPtr->intraMinDistance[usedDepth] != 0) ? (EB_S8)((contextPtr->minDeltaQpWeight * contextPtr->minDeltaQp[usedDepth] * complexityDistance) / (100 * pictureControlSetPtr->ParentPcsPtr->intraMinDistance[usedDepth])) : 0;
2499 }
2500 else{
2501
2502 deltaQp = (pictureControlSetPtr->ParentPcsPtr->intraMaxDistance[usedDepth] != 0) ? (EB_S8)((contextPtr->maxDeltaQpWeight * contextPtr->maxDeltaQp[usedDepth] * complexityDistance) / (100 * pictureControlSetPtr->ParentPcsPtr->intraMaxDistance[usedDepth])) : 0;
2503 }
2504 // QPM action
2505 if (lcuPtr->pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_2) {
2506 if (lcuPtr->pictureControlSetPtr->ParentPcsPtr->lcuCmplxContrastArray[lcuIndex] && deltaQp > 0) {
2507 deltaQp = 0;
2508 }
2509 }
2510 }
2511 // INTER MODE
2512 else{
2513 distortion = pictureControlSetPtr->ParentPcsPtr->meResults[lcuIndex][cuIndexInRaterScan].distortionDirection[0].distortion;
2514 if (skipOis8x8){
2515 EB_U32 cuIndexRScan = MD_SCAN_TO_RASTER_SCAN[ParentBlockIndex[cuIndex]];
2516
2517 distortion = pictureControlSetPtr->ParentPcsPtr->meResults[lcuIndex][cuIndexRScan].distortionDirection[0].distortion;
2518
2519 }
2520 distortion = (EB_U32)CLIP3(pictureControlSetPtr->ParentPcsPtr->interComplexityMin[usedDepth], pictureControlSetPtr->ParentPcsPtr->interComplexityMax[usedDepth], distortion);
2521 complexityDistance = ((EB_S32)distortion - (EB_S32)pictureControlSetPtr->ParentPcsPtr->interComplexityAvg[usedDepth]);
2522
2523 if (complexityDistance < 0){
2524
2525 deltaQp = (pictureControlSetPtr->ParentPcsPtr->interMinDistance[usedDepth] != 0) ? (EB_S8)((contextPtr->minDeltaQpWeight * contextPtr->minDeltaQp[usedDepth] * complexityDistance) / (100 * pictureControlSetPtr->ParentPcsPtr->interMinDistance[usedDepth])) : 0;
2526 }
2527 else{
2528
2529 deltaQp = (pictureControlSetPtr->ParentPcsPtr->interMaxDistance[usedDepth] != 0) ? (EB_S8)((contextPtr->maxDeltaQpWeight * contextPtr->maxDeltaQp[usedDepth] * complexityDistance) / (100 * pictureControlSetPtr->ParentPcsPtr->interMaxDistance[usedDepth])) : 0;
2530 }
2531 }
2532
2533 if (contextPtr->backgorundEnhancement){
2534 // Use the 8x8 background enhancement only for the Intra slice, otherwise, use the existing LCU based BEA results
2535 bea64x64DeltaQp = nonMovingDeltaQp;
2536
2537 if (((cuIndex > 0) && ((pictureControlSetPtr->ParentPcsPtr->yMean[lcuIndex][parent32x32Index]) > ANTI_CONTOURING_LUMA_T2 || (pictureControlSetPtr->ParentPcsPtr->yMean[lcuIndex][parent32x32Index]) < ANTI_CONTOURING_LUMA_T1)) ||
2538 ((cuIndex == 0) && ((pictureControlSetPtr->ParentPcsPtr->yMean[lcuIndex][0]) > ANTI_CONTOURING_LUMA_T2 || (pictureControlSetPtr->ParentPcsPtr->yMean[lcuIndex][0]) < ANTI_CONTOURING_LUMA_T1))) {
2539
2540 if (bea64x64DeltaQp < 0){
2541 bea64x64DeltaQp = 0;
2542 }
2543
2544 }
2545
2546 deltaQp += bea64x64DeltaQp;
2547 }
2548
2549 if ((pictureControlSetPtr->ParentPcsPtr->logoPicFlag)){
2550 deltaQp = (deltaQp < contextPtr->minDeltaQp[0]) ? deltaQp : contextPtr->minDeltaQp[0];
2551 }
2552
2553 LcuStat_t *lcuStatPtr = &(pictureControlSetPtr->ParentPcsPtr->lcuStatArray[lcuIndex]);
2554 if (lcuStatPtr->stationaryEdgeOverTimeFlag && deltaQp > 0){
2555 deltaQp = 0;
2556 }
2557 // QPM action
2558 if (lcuPtr->pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_2) {
2559 if (lcuPtr->pictureControlSetPtr->ParentPcsPtr->lcuCmplxContrastArray[lcuIndex] && deltaQp > 0) {
2560 deltaQp = 0;
2561 }
2562 }
2563
2564 if (acEnergyBasedAntiContouring) {
2565
2566 lowerQPClass = DeriveContouringClass(
2567 lcuPtr->pictureControlSetPtr->ParentPcsPtr,
2568 lcuPtr->index,
2569 (EB_U8) cuIndex);
2570
2571 if (lowerQPClass){
2572 if (lowerQPClass == 3)
2573 deltaQp = ANTI_CONTOURING_DELTA_QP_0;
2574 else if (lowerQPClass == 2)
2575 deltaQp = ANTI_CONTOURING_DELTA_QP_1;
2576 else if (lowerQPClass == 1)
2577 deltaQp = ANTI_CONTOURING_DELTA_QP_2;
2578 }
2579 }
2580
2581
2582 deltaQp -= contextPtr->grassEnhancementFlag ? 3 : 0;
2583 if (sequenceControlSetPtr->inputResolution == INPUT_SIZE_4K_RANGE)
2584 deltaQp = ((deltaQp < 0 && sequenceControlSetPtr->staticConfig.bitRateReduction && !sequenceControlSetPtr->staticConfig.improveSharpness) ||
2585 (deltaQp > 0 && sequenceControlSetPtr->staticConfig.improveSharpness && !sequenceControlSetPtr->staticConfig.bitRateReduction)) ? 0 : deltaQp;
2586 else
2587 deltaQp = (deltaQp > 0 && sequenceControlSetPtr->staticConfig.improveSharpness) ? 0 : deltaQp;
2588 if (sequenceControlSetPtr->staticConfig.rateControlMode == 1 || sequenceControlSetPtr->staticConfig.rateControlMode == 2){
2589
2590 if (qpmQp > RC_QPMOD_MAXQP){
2591 deltaQp = MIN(0, deltaQp);
2592 }
2593
2594 cuQP = (qpmQp + deltaQp);
2595
2596
2597 if ((qpmQp <= RC_QPMOD_MAXQP)){
2598 cuQP = (EB_U8)CLIP3(
2599 minQpAllowed,
2600 RC_QPMOD_MAXQP,
2601 cuQP);
2602 }
2603 }
2604 else{
2605 cuQP = (qpmQp + deltaQp);
2606 }
2607
2608 cuQP = (EB_U8)CLIP3(
2609 minQpAllowed,
2610 maxQpAllowed,
2611 cuQP);
2612
2613
2614 }
2615
2616 cuPtr->qp = cuQP ;
2617
2618 lcuPtr->qp = (cuSize == 64) ? (EB_U8)cuPtr->qp : lcuPtr->qp;
2619
2620
2621 cuPtr->deltaQp = (EB_S16)cuPtr->qp - (EB_S16)qpmQp;
2622
2623 cuPtr->orgDeltaQp = cuPtr->deltaQp;
2624
2625
2626
2627 return return_error;
2628 }
2629
2630
2631 /************************************
2632 this function checks whether any intra
2633 CU is present in the current LCU
2634 *************************************/
isIntraPresent(LargestCodingUnit_t * lcuPtr)2635 EB_BOOL isIntraPresent(
2636 LargestCodingUnit_t *lcuPtr)
2637 {
2638 EB_U8 leafIndex = 0;
2639 while (leafIndex < CU_MAX_COUNT) {
2640
2641 CodingUnit_t * const cuPtr = lcuPtr->codedLeafArrayPtr[leafIndex];
2642
2643 if (cuPtr->splitFlag == EB_FALSE) {
2644
2645 const CodedUnitStats_t *cuStatsPtr = GetCodedUnitStats(leafIndex);
2646 if (cuPtr->predictionModeFlag == INTRA_MODE)
2647 return EB_TRUE;
2648
2649
2650 leafIndex += DepthOffset[cuStatsPtr->depth];
2651 }
2652 else {
2653 leafIndex++;
2654 }
2655 }
2656
2657 return EB_FALSE;
2658
2659 }
2660
2661
EncodePassPreFetchRef(PictureControlSet_t * pictureControlSetPtr,EncDecContext_t * contextPtr,CodingUnit_t * cuPtr,const CodedUnitStats_t * cuStats,PredictionUnit_t * puPtr,EB_BOOL is16bit)2662 void EncodePassPreFetchRef(
2663 PictureControlSet_t *pictureControlSetPtr,
2664 EncDecContext_t *contextPtr,
2665 CodingUnit_t *cuPtr,
2666 const CodedUnitStats_t *cuStats,
2667 PredictionUnit_t *puPtr,
2668 EB_BOOL is16bit)
2669 {
2670
2671 if (cuPtr->predictionModeFlag == INTER_MODE){
2672
2673 if (is16bit)
2674 {
2675 puPtr = cuPtr->predictionUnitArray;
2676 contextPtr->mvUnit.predDirection = (EB_U8)puPtr->interPredDirectionIndex;
2677 contextPtr->mvUnit.mv[REF_LIST_0].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
2678 contextPtr->mvUnit.mv[REF_LIST_1].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
2679
2680 if ((contextPtr->mvUnit.predDirection == UNI_PRED_LIST_0) || (contextPtr->mvUnit.predDirection == BI_PRED))
2681 {
2682
2683 EbPictureBufferDesc_t *refPicList0 = 0;
2684 EbReferenceObject_t *referenceObject;
2685 EB_U16 refList0PosX = 0;
2686 EB_U16 refList0PosY = 0;
2687 EB_U8 counter;
2688 EB_U16 *src0Ptr;
2689
2690 referenceObject = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr;
2691 refPicList0 = (EbPictureBufferDesc_t*)referenceObject->referencePicture16bit;
2692
2693 refList0PosX = (EB_U32)CLIP3(
2694 (EB_S32)((refPicList0->originX - 71) << 2),
2695 (EB_S32)((refPicList0->width + refPicList0->originX + 7) << 2),
2696 (EB_S32)((contextPtr->cuOriginX + refPicList0->originX) << 2) + contextPtr->mvUnit.mv[REF_LIST_0].x);
2697
2698 refList0PosY = (EB_U32)CLIP3(
2699 (EB_S32)((refPicList0->originY - 71) << 2),
2700 (EB_S32)((refPicList0->height + refPicList0->originY + 7) << 2),
2701 (EB_S32)((contextPtr->cuOriginY + refPicList0->originY) << 2) + contextPtr->mvUnit.mv[REF_LIST_0].y);
2702
2703 EB_U32 lumaOffSet = ((refList0PosX >> 2) - 4) * 2 + ((refList0PosY >> 2) - 4) * 2 * refPicList0->strideY;
2704 EB_U32 cbOffset = ((refList0PosX >> 3) - 2) * 2 + ((refList0PosY >> 3) - 2) * 2 * refPicList0->strideCb;
2705 EB_U32 crOffset = ((refList0PosX >> 3) - 2) * 2 + ((refList0PosY >> 3) - 2) * 2 * refPicList0->strideCr;
2706
2707
2708 contextPtr->mcpContext->localReferenceBlockL0->bufferY = refPicList0->bufferY + lumaOffSet;
2709 contextPtr->mcpContext->localReferenceBlockL0->bufferCb = refPicList0->bufferCb + cbOffset;
2710 contextPtr->mcpContext->localReferenceBlockL0->bufferCr = refPicList0->bufferCr + crOffset;
2711 contextPtr->mcpContext->localReferenceBlockL0->strideY = refPicList0->strideY;
2712 contextPtr->mcpContext->localReferenceBlockL0->strideCb = refPicList0->strideCb;
2713 contextPtr->mcpContext->localReferenceBlockL0->strideCr = refPicList0->strideCr;
2714
2715
2716 src0Ptr = (EB_U16 *)contextPtr->mcpContext->localReferenceBlockL0->bufferY + 4 + 4 * contextPtr->mcpContext->localReferenceBlockL0->strideY;
2717
2718 for (counter = 0; counter < cuStats->size; counter++)
2719 {
2720 char const* p0 = (char const*)(src0Ptr + counter*contextPtr->mcpContext->localReferenceBlockL0->strideY);
2721 _mm_prefetch(p0, _MM_HINT_T2);
2722 char const* p1 = (char const*)(src0Ptr + counter*contextPtr->mcpContext->localReferenceBlockL0->strideY + (cuStats->size >> 1));
2723 _mm_prefetch(p1, _MM_HINT_T2);
2724 }
2725
2726 }
2727
2728 if ((contextPtr->mvUnit.predDirection == UNI_PRED_LIST_1) || (contextPtr->mvUnit.predDirection == BI_PRED))
2729 {
2730 // Setup List 0
2731 EbPictureBufferDesc_t *refPicList1 = 0;
2732 EbReferenceObject_t *referenceObject;
2733 EB_U16 refList1PosX = 0;
2734 EB_U16 refList1PosY = 0;
2735 EB_U8 counter;
2736 EB_U16 *src1Ptr;
2737
2738 referenceObject = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_1]->objectPtr;
2739 refPicList1 = (EbPictureBufferDesc_t*)referenceObject->referencePicture16bit;
2740
2741 refList1PosX = (EB_U32)CLIP3(
2742 (EB_S32)((refPicList1->originX - 71) << 2),
2743 (EB_S32)((refPicList1->width + refPicList1->originX + 7) << 2),
2744 (EB_S32)((contextPtr->cuOriginX + refPicList1->originX) << 2) + contextPtr->mvUnit.mv[REF_LIST_1].x);
2745
2746 refList1PosY = (EB_U32)CLIP3(
2747 (EB_S32)((refPicList1->originY - 71) << 2),
2748 (EB_S32)((refPicList1->height + refPicList1->originY + 7) << 2),
2749 (EB_S32)((contextPtr->cuOriginY + refPicList1->originY) << 2) + contextPtr->mvUnit.mv[REF_LIST_1].y);
2750
2751 EB_U32 lumaOffSet = ((refList1PosX >> 2) - 4) * 2 + ((refList1PosY >> 2) - 4) * 2 * refPicList1->strideY; //refPicList0->originX + refPicList0->originY*refPicList0->strideY; //
2752 EB_U32 cbOffset = ((refList1PosX >> 3) - 2) * 2 + ((refList1PosY >> 3) - 2) * 2 * refPicList1->strideCb;
2753 EB_U32 crOffset = ((refList1PosX >> 3) - 2) * 2 + ((refList1PosY >> 3) - 2) * 2 * refPicList1->strideCr;
2754
2755
2756 contextPtr->mcpContext->localReferenceBlockL1->bufferY = refPicList1->bufferY + lumaOffSet;
2757 contextPtr->mcpContext->localReferenceBlockL1->bufferCb = refPicList1->bufferCb + cbOffset;
2758 contextPtr->mcpContext->localReferenceBlockL1->bufferCr = refPicList1->bufferCr + crOffset;
2759 contextPtr->mcpContext->localReferenceBlockL1->strideY = refPicList1->strideY;
2760 contextPtr->mcpContext->localReferenceBlockL1->strideCb = refPicList1->strideCb;
2761 contextPtr->mcpContext->localReferenceBlockL1->strideCr = refPicList1->strideCr;
2762
2763
2764 src1Ptr = (EB_U16 *)contextPtr->mcpContext->localReferenceBlockL1->bufferY + 4 + 4 * contextPtr->mcpContext->localReferenceBlockL1->strideY;
2765
2766 for (counter = 0; counter < cuStats->size; counter++)
2767 {
2768 char const* p0 = (char const*)(src1Ptr + counter*contextPtr->mcpContext->localReferenceBlockL1->strideY);
2769 _mm_prefetch(p0, _MM_HINT_T2);
2770 char const* p1 = (char const*)(src1Ptr + counter*contextPtr->mcpContext->localReferenceBlockL1->strideY + (cuStats->size >> 1));
2771 _mm_prefetch(p1, _MM_HINT_T2);
2772 }
2773
2774 }
2775 }
2776 else
2777 {
2778 puPtr = cuPtr->predictionUnitArray;
2779 contextPtr->mvUnit.predDirection = (EB_U8)puPtr->interPredDirectionIndex;
2780 contextPtr->mvUnit.mv[REF_LIST_0].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
2781 contextPtr->mvUnit.mv[REF_LIST_1].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
2782
2783 if ((contextPtr->mvUnit.predDirection == UNI_PRED_LIST_0) || (contextPtr->mvUnit.predDirection == BI_PRED))
2784 {
2785 // Setup List 0
2786 EbPictureBufferDesc_t *refPicList0 = 0;
2787 EbReferenceObject_t *referenceObject;
2788 EB_U16 refList0PosX = 0;
2789 EB_U16 refList0PosY = 0;
2790 EB_U32 integPosL0x;
2791 EB_U32 integPosL0y;
2792 EB_U8 counter;
2793 EB_U8 *src0Ptr;
2794
2795 referenceObject = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr;
2796 refPicList0 = (EbPictureBufferDesc_t*)referenceObject->referencePicture;
2797
2798 refList0PosX = (EB_U32)CLIP3(
2799 (EB_S32)((refPicList0->originX - 71) << 2),
2800 (EB_S32)((refPicList0->width + refPicList0->originX + 7) << 2),
2801 (EB_S32)((contextPtr->cuOriginX + refPicList0->originX) << 2) + contextPtr->mvUnit.mv[REF_LIST_0].x);
2802
2803 refList0PosY = (EB_U32)CLIP3(
2804 (EB_S32)((refPicList0->originY - 71) << 2),
2805 (EB_S32)((refPicList0->height + refPicList0->originY + 7) << 2),
2806 (EB_S32)((contextPtr->cuOriginY + refPicList0->originY) << 2) + contextPtr->mvUnit.mv[REF_LIST_0].y);
2807
2808
2809 //compute the luma fractional position
2810 integPosL0x = (refList0PosX >> 2);
2811 integPosL0y = (refList0PosY >> 2);
2812
2813
2814 src0Ptr = refPicList0->bufferY + integPosL0x + integPosL0y*refPicList0->strideY;
2815 for (counter = 0; counter < cuStats->size; counter++)
2816 {
2817 char const* p0 = (char const*)(src0Ptr + counter*refPicList0->strideY);
2818 _mm_prefetch(p0, _MM_HINT_T2);
2819 }
2820
2821 }
2822
2823 if ((contextPtr->mvUnit.predDirection == UNI_PRED_LIST_1) || (contextPtr->mvUnit.predDirection == BI_PRED))
2824 {
2825 // Setup List 0
2826 EbPictureBufferDesc_t *refPicList1 = 0;
2827 EbReferenceObject_t *referenceObject;
2828 EB_U16 refList1PosX = 0;
2829 EB_U16 refList1PosY = 0;
2830 EB_U32 integPosL1x;
2831 EB_U32 integPosL1y;
2832 EB_U8 counter;
2833 EB_U8 *src1Ptr;
2834
2835 referenceObject = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_1]->objectPtr;
2836 refPicList1 = (EbPictureBufferDesc_t*)referenceObject->referencePicture;
2837
2838 refList1PosX = (EB_U32)CLIP3(
2839 (EB_S32)((refPicList1->originX - 71) << 2),
2840 (EB_S32)((refPicList1->width + refPicList1->originX + 7) << 2),
2841 (EB_S32)((contextPtr->cuOriginX + refPicList1->originX) << 2) + contextPtr->mvUnit.mv[REF_LIST_1].x);
2842
2843 refList1PosY = (EB_U32)CLIP3(
2844 (EB_S32)((refPicList1->originY - 71) << 2),
2845 (EB_S32)((refPicList1->height + refPicList1->originY + 7) << 2),
2846 (EB_S32)((contextPtr->cuOriginY + refPicList1->originY) << 2) + contextPtr->mvUnit.mv[REF_LIST_1].y);
2847
2848
2849 //uni-prediction List1 luma
2850 integPosL1x = (refList1PosX >> 2);
2851 integPosL1y = (refList1PosY >> 2);
2852
2853
2854 src1Ptr = refPicList1->bufferY + integPosL1x + integPosL1y*refPicList1->strideY;
2855 for (counter = 0; counter < cuStats->size; counter++)
2856 {
2857 char const* p1 = (char const*)(src1Ptr + counter*refPicList1->strideY);
2858 _mm_prefetch(p1, _MM_HINT_T2);
2859 }
2860
2861 }
2862 }
2863 }
2864 }
2865
2866
EncodePassPackLcu(SequenceControlSet_t * sequenceControlSetPtr,EbPictureBufferDesc_t * inputPicture,EncDecContext_t * contextPtr,EB_U32 lcuOriginX,EB_U32 lcuOriginY,EB_U32 lcuWidth,EB_U32 lcuHeight)2867 void EncodePassPackLcu(
2868 SequenceControlSet_t *sequenceControlSetPtr,
2869 EbPictureBufferDesc_t *inputPicture,
2870 EncDecContext_t *contextPtr,
2871 EB_U32 lcuOriginX,
2872 EB_U32 lcuOriginY,
2873 EB_U32 lcuWidth,
2874 EB_U32 lcuHeight)
2875 {
2876 const EB_COLOR_FORMAT colorFormat = inputPicture->colorFormat;
2877 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
2878 const EB_U16 subHeightCMinus1 = (colorFormat >= EB_YUV422 ? 1 : 2) - 1;
2879
2880 if (sequenceControlSetPtr->staticConfig.compressedTenBitFormat == 1)
2881 {
2882
2883 const EB_U32 inputLumaOffset = ((lcuOriginY + inputPicture->originY) * inputPicture->strideY) + (lcuOriginX + inputPicture->originX);
2884 const EB_U32 inputCbOffset = (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideCb) + ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1);
2885 const EB_U32 inputCrOffset = (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideCr) + ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1);
2886 const EB_U16 luma2BitWidth = inputPicture->width / 4;
2887 const EB_U16 chroma2BitWidth = (inputPicture->width / 4) >> subWidthCMinus1;
2888
2889
2890 CompressedPackLcu(
2891 inputPicture->bufferY + inputLumaOffset,
2892 inputPicture->strideY,
2893 inputPicture->bufferBitIncY + lcuOriginY*luma2BitWidth + (lcuOriginX / 4)*lcuHeight,
2894 lcuWidth / 4,
2895 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferY,
2896 MAX_LCU_SIZE,
2897 lcuWidth,
2898 lcuHeight);
2899
2900 CompressedPackLcu(
2901 inputPicture->bufferCb + inputCbOffset,
2902 inputPicture->strideCb,
2903 inputPicture->bufferBitIncCb + (lcuOriginY >> subHeightCMinus1) * chroma2BitWidth + ((lcuOriginX >> subWidthCMinus1) / 4)*(lcuHeight >> subHeightCMinus1),
2904 (lcuWidth >> subWidthCMinus1) / 4,
2905 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferCb,
2906 MAX_LCU_SIZE >> subWidthCMinus1,
2907 lcuWidth >> subWidthCMinus1,
2908 lcuHeight >> subHeightCMinus1);
2909
2910 CompressedPackLcu(
2911 inputPicture->bufferCr + inputCrOffset,
2912 inputPicture->strideCr,
2913 inputPicture->bufferBitIncCr + (lcuOriginY >> subHeightCMinus1) * chroma2BitWidth + ((lcuOriginX >> subWidthCMinus1) / 4)*(lcuHeight >> subHeightCMinus1),
2914 (lcuWidth >> subWidthCMinus1) / 4,
2915 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferCr,
2916 MAX_LCU_SIZE >> subWidthCMinus1,
2917 lcuWidth >> subWidthCMinus1,
2918 lcuHeight >> subHeightCMinus1);
2919
2920 }
2921 else {
2922
2923 const EB_U32 inputLumaOffset = ((lcuOriginY + inputPicture->originY) * inputPicture->strideY) + (lcuOriginX + inputPicture->originX);
2924 const EB_U32 inputBitIncLumaOffset = ((lcuOriginY + inputPicture->originY) * inputPicture->strideBitIncY) + (lcuOriginX + inputPicture->originX);
2925 const EB_U32 inputCbOffset = ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1) +
2926 (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideCb);
2927 const EB_U32 inputCrOffset = ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1) +
2928 (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideCr);
2929
2930 const EB_U32 inputBitIncCrOffset = ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1) +
2931 (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideBitIncCr);
2932 const EB_U32 inputBitIncCbOffset = ((lcuOriginX + inputPicture->originX) >> subWidthCMinus1) +
2933 (((lcuOriginY + inputPicture->originY) >> subHeightCMinus1) * inputPicture->strideBitIncCb);
2934
2935 Pack2D_SRC(
2936 inputPicture->bufferY + inputLumaOffset,
2937 inputPicture->strideY,
2938 inputPicture->bufferBitIncY + inputBitIncLumaOffset,
2939 inputPicture->strideBitIncY,
2940 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferY,
2941 MAX_LCU_SIZE,
2942 lcuWidth,
2943 lcuHeight);
2944
2945
2946 Pack2D_SRC(
2947 inputPicture->bufferCb + inputCbOffset,
2948 inputPicture->strideCr,
2949 inputPicture->bufferBitIncCb + inputBitIncCbOffset,
2950 inputPicture->strideBitIncCr,
2951 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferCb,
2952 MAX_LCU_SIZE >> subWidthCMinus1,
2953 lcuWidth >> subWidthCMinus1,
2954 lcuHeight >> subHeightCMinus1);
2955
2956
2957 Pack2D_SRC(
2958 inputPicture->bufferCr + inputCrOffset,
2959 inputPicture->strideCr,
2960 inputPicture->bufferBitIncCr + inputBitIncCrOffset,
2961 inputPicture->strideBitIncCr,
2962 (EB_U16 *)contextPtr->inputSample16bitBuffer->bufferCr,
2963 MAX_LCU_SIZE >> subWidthCMinus1,
2964 lcuWidth >> subWidthCMinus1,
2965 lcuHeight >> subHeightCMinus1);
2966 }
2967 }
2968
2969
2970 /*******************************************
2971 * Encode Pass
2972 *
2973 * Summary: Performs a H.265 conformant
2974 * reconstruction based on the LCU
2975 * mode decision.
2976 *
2977 * Inputs:
2978 * SourcePic
2979 * Coding Results
2980 * LCU Location
2981 * Sequence Control Set
2982 * Picture Control Set
2983 *
2984 * Outputs:
2985 * Reconstructed Samples
2986 * Coefficient Samples
2987 *
2988 *******************************************/
EncodePass(SequenceControlSet_t * sequenceControlSetPtr,PictureControlSet_t * pictureControlSetPtr,LargestCodingUnit_t * lcuPtr,EB_U32 tbAddr,EB_U32 lcuOriginX,EB_U32 lcuOriginY,EB_U32 lcuQp,EB_BOOL enableSaoFlag,EncDecContext_t * contextPtr)2989 EB_EXTERN void EncodePass(
2990 SequenceControlSet_t *sequenceControlSetPtr,
2991 PictureControlSet_t *pictureControlSetPtr,
2992 LargestCodingUnit_t *lcuPtr,
2993 EB_U32 tbAddr,
2994 EB_U32 lcuOriginX,
2995 EB_U32 lcuOriginY,
2996 EB_U32 lcuQp,
2997 EB_BOOL enableSaoFlag,
2998 EncDecContext_t *contextPtr)
2999 {
3000 EB_BOOL is16bit = contextPtr->is16bit;
3001 EB_COLOR_FORMAT colorFormat = contextPtr->colorFormat;
3002 const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;
3003
3004 EB_U32 tileIdx = contextPtr->encDecTileIndex;
3005 EbPictureBufferDesc_t *reconBuffer = is16bit ? pictureControlSetPtr->reconPicture16bitPtr : pictureControlSetPtr->reconPicturePtr;
3006 EbPictureBufferDesc_t *coeffBufferTB = lcuPtr->quantizedCoeff;
3007
3008 EbPictureBufferDesc_t *inputPicture;
3009 ModeDecisionContext_t *mdcontextPtr;
3010
3011 mdcontextPtr = contextPtr->mdContext;
3012 inputPicture = contextPtr->inputSamples = (EbPictureBufferDesc_t*)pictureControlSetPtr->ParentPcsPtr->enhancedPicturePtr;
3013
3014 LcuStat_t *lcuStatPtr = &(pictureControlSetPtr->ParentPcsPtr->lcuStatArray[tbAddr]);
3015
3016
3017 // TMVP
3018 TmvpUnit_t *tmvpMapWritePtr;
3019 EB_U32 tmvpMapHorizontalStartIndex;
3020 EB_U32 tmvpMapVerticalStartIndex;
3021 EB_U32 tmvpMapHorizontalEndIndex;
3022 EB_U32 tmvpMapVerticalEndIndex;
3023 EB_U32 tmvpMapIndex;
3024 EB_U32 mvCompressionUnitSizeMinus1 = (1 << LOG_MV_COMPRESS_UNIT_SIZE) - 1;
3025
3026 // DLF
3027 EB_U32 startIndex;
3028 EB_U8 blk4x4IndexX;
3029 EB_U8 blk4x4IndexY;
3030 EB_BOOL availableCoeff;
3031
3032 // QP Neighbor Arrays
3033 EB_BOOL isDeltaQpNotCoded = EB_TRUE;
3034
3035 // LCU Stats
3036 EB_U32 lcuWidth = MIN(sequenceControlSetPtr->lcuSize, sequenceControlSetPtr->lumaWidth - lcuOriginX);
3037 EB_U32 lcuHeight = MIN(sequenceControlSetPtr->lcuSize, sequenceControlSetPtr->lumaHeight - lcuOriginY);
3038
3039 // SAO
3040 EB_S64 saoLumaBestCost;
3041 EB_S64 saoChromaBestCost;
3042
3043 // MV merge mode
3044 EB_U32 yCbf=0;
3045 EB_U32 cbCbf=0;
3046 EB_U32 crCbf=0;
3047 EB_U32 cbCbf2=0;
3048 EB_U32 crCbf2=0;
3049 EB_U64 yCoeffBits;
3050 EB_U64 cbCoeffBits;
3051 EB_U64 crCoeffBits;
3052 EB_U64 yFullDistortion[DIST_CALC_TOTAL];
3053 EB_U64 yTuFullDistortion[DIST_CALC_TOTAL];
3054 EB_U32 countNonZeroCoeffs[3];
3055 EB_U64 yTuCoeffBits;
3056 EB_U64 cbTuCoeffBits;
3057 EB_U64 crTuCoeffBits;
3058 EB_U32 lumaShift;
3059 EB_U32 scratchLumaOffset;
3060 EB_U32 lcuRowIndex = lcuOriginY / MAX_LCU_SIZE;
3061 EncodeContext_t *encodeContextPtr = NULL;
3062
3063 // Dereferencing early
3064 NeighborArrayUnit_t *epModeTypeNeighborArray = pictureControlSetPtr->epModeTypeNeighborArray[tileIdx];
3065 NeighborArrayUnit_t *epIntraLumaModeNeighborArray = pictureControlSetPtr->epIntraLumaModeNeighborArray[tileIdx];
3066 NeighborArrayUnit_t *epMvNeighborArray = pictureControlSetPtr->epMvNeighborArray[tileIdx];
3067 NeighborArrayUnit_t *epLumaReconNeighborArray = is16bit ? pictureControlSetPtr->epLumaReconNeighborArray16bit[tileIdx] : pictureControlSetPtr->epLumaReconNeighborArray[tileIdx];
3068 NeighborArrayUnit_t *epCbReconNeighborArray = is16bit ? pictureControlSetPtr->epCbReconNeighborArray16bit[tileIdx] : pictureControlSetPtr->epCbReconNeighborArray[tileIdx];
3069 NeighborArrayUnit_t *epCrReconNeighborArray = is16bit ? pictureControlSetPtr->epCrReconNeighborArray16bit[tileIdx] : pictureControlSetPtr->epCrReconNeighborArray[tileIdx];
3070 NeighborArrayUnit_t *epSkipFlagNeighborArray = pictureControlSetPtr->epSkipFlagNeighborArray[tileIdx];
3071 NeighborArrayUnit_t *epLeafDepthNeighborArray = pictureControlSetPtr->epLeafDepthNeighborArray[tileIdx];
3072
3073 EB_BOOL constrainedIntraFlag = pictureControlSetPtr->constrainedIntraFlag;
3074 EB_BOOL enableStrongIntraSmoothing = sequenceControlSetPtr->enableStrongIntraSmoothing;
3075 CodingUnit_t **codedLeafArrayPtr = lcuPtr->codedLeafArrayPtr;
3076
3077 EB_BOOL dlfEnableFlag = (EB_BOOL)(!sequenceControlSetPtr->staticConfig.disableDlfFlag) &&
3078 (pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag ||
3079 sequenceControlSetPtr->staticConfig.reconEnabled);
3080
3081 dlfEnableFlag = contextPtr->allowEncDecMismatch ? EB_FALSE : dlfEnableFlag;
3082
3083 const EB_BOOL isIntraLCU = contextPtr->mdContext->limitIntra ? isIntraPresent(lcuPtr) : EB_TRUE;
3084
3085 EB_BOOL doRecon = (EB_BOOL)(contextPtr->mdContext->limitIntra == 0 || isIntraLCU == 1) ||
3086 pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag ||
3087 sequenceControlSetPtr->staticConfig.reconEnabled;
3088
3089 CabacCost_t *cabacCost = pictureControlSetPtr->cabacCost;
3090 EntropyCoder_t *coeffEstEntropyCoderPtr = pictureControlSetPtr->coeffEstEntropyCoderPtr;
3091 EB_U8 cuItr;
3092 EB_U32 dZoffset = 0;
3093
3094 if (!lcuStatPtr->stationaryEdgeOverTimeFlag && sequenceControlSetPtr->staticConfig.improveSharpness && pictureControlSetPtr->ParentPcsPtr->picNoiseClass < PIC_NOISE_CLASS_3_1) {
3095 EB_S16 cuDeltaQp = (EB_S16)(lcuPtr->qp - pictureControlSetPtr->ParentPcsPtr->averageQp);
3096 EB_U32 dzCondition = cuDeltaQp > 0 ? 0 : 1;
3097
3098 if (sequenceControlSetPtr->inputResolution == INPUT_SIZE_4K_RANGE) {
3099
3100 if (!(pictureControlSetPtr->ParentPcsPtr->isPan ||
3101 (pictureControlSetPtr->ParentPcsPtr->nonMovingIndexAverage < 10 && lcuPtr->auraStatus == AURA_STATUS_1) ||
3102 (lcuStatPtr->cuStatArray[0].skinArea) ||
3103 (pictureControlSetPtr->ParentPcsPtr->intraCodedBlockProbability > 90) ||
3104 (pictureControlSetPtr->ParentPcsPtr->highDarkAreaDensityFlag))) {
3105
3106 if (pictureControlSetPtr->sliceType != EB_I_PICTURE &&
3107 pictureControlSetPtr->temporalLayerIndex == 0 &&
3108 pictureControlSetPtr->ParentPcsPtr->intraCodedBlockProbability > 60 &&
3109 !pictureControlSetPtr->ParentPcsPtr->isTilt &&
3110 pictureControlSetPtr->ParentPcsPtr->picHomogenousOverTimeLcuPercentage > 40)
3111 {
3112 dZoffset = 10;
3113 }
3114
3115 if (dzCondition) {
3116 if (pictureControlSetPtr->sceneCaracteristicId == EB_FRAME_CARAC_1) {
3117 if (pictureControlSetPtr->sliceType == EB_I_PICTURE) {
3118 dZoffset = lcuStatPtr->cuStatArray[0].grassArea ? 10 : dZoffset;
3119 }
3120 else if (pictureControlSetPtr->temporalLayerIndex == 0) {
3121 dZoffset = lcuStatPtr->cuStatArray[0].grassArea ? 9 : dZoffset;
3122 }
3123 else if (pictureControlSetPtr->temporalLayerIndex == 1) {
3124 dZoffset = lcuStatPtr->cuStatArray[0].grassArea ? 5 : dZoffset;
3125 }
3126 }
3127
3128 }
3129 }
3130 }
3131 }
3132
3133 QpmDeriveBeaAndSkipQpmFlagLcu(
3134 sequenceControlSetPtr,
3135 pictureControlSetPtr,
3136 lcuPtr,
3137 tbAddr,
3138 contextPtr);
3139
3140 encodeContextPtr = ((SequenceControlSet_t*)(pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr))->encodeContextPtr;
3141
3142 if (pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag == EB_TRUE) {
3143 // TMVP init
3144 tmvpMapWritePtr = &(contextPtr->referenceObjectWritePtr->tmvpMap[tbAddr]);
3145 tmvpMapIndex = 0;
3146
3147 //get the 16bit form of the input LCU
3148 if (is16bit) {
3149
3150 reconBuffer = ((EbReferenceObject_t*)pictureControlSetPtr->ParentPcsPtr->referencePictureWrapperPtr->objectPtr)->referencePicture16bit;
3151 } else {
3152 reconBuffer = ((EbReferenceObject_t*)pictureControlSetPtr->ParentPcsPtr->referencePictureWrapperPtr->objectPtr)->referencePicture;
3153 }
3154 }
3155 else { // non ref pictures
3156 reconBuffer = is16bit ? pictureControlSetPtr->reconPicture16bitPtr : pictureControlSetPtr->reconPicturePtr;
3157 tmvpMapWritePtr = (TmvpUnit_t*)EB_NULL;
3158 }
3159
3160
3161 EB_BOOL useDeltaQp = (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || sequenceControlSetPtr->staticConfig.segmentOvEnabled);
3162
3163 EB_BOOL singleSegment = (sequenceControlSetPtr->encDecSegmentColCountArray[pictureControlSetPtr->temporalLayerIndex] == 1) && (sequenceControlSetPtr->encDecSegmentRowCountArray[pictureControlSetPtr->temporalLayerIndex] == 1);
3164
3165 EB_BOOL useDeltaQpSegments = singleSegment ? 0 : (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || sequenceControlSetPtr->staticConfig.segmentOvEnabled);
3166
3167 if (is16bit) {
3168 EncodePassPackLcu(
3169 sequenceControlSetPtr,
3170 inputPicture,
3171 contextPtr,
3172 lcuOriginX,
3173 lcuOriginY,
3174 lcuWidth,
3175 lcuHeight);
3176 }
3177
3178 contextPtr->intraCodedAreaLCU[tbAddr] = 0;
3179
3180 // CU Loop
3181 cuItr = 0;
3182 while (cuItr < CU_MAX_COUNT) {
3183 if (codedLeafArrayPtr[cuItr]->splitFlag == EB_FALSE){
3184 // PU Stack variables
3185 PredictionUnit_t *puPtr = (PredictionUnit_t *)EB_NULL; // done
3186 EbPictureBufferDesc_t *residualBuffer = contextPtr->residualBuffer;
3187 EbPictureBufferDesc_t *transformBuffer = contextPtr->transformBuffer;
3188 EB_S16 *transformInnerArrayPtr = contextPtr->transformInnerArrayPtr;
3189 const CodedUnitStats_t *cuStats = contextPtr->cuStats = GetCodedUnitStats(cuItr);
3190 CodingUnit_t *cuPtr = contextPtr->cuPtr = lcuPtr->codedLeafArrayPtr[cuItr];
3191
3192 _mm_prefetch((const char *)cuStats, _MM_HINT_T0);
3193
3194 contextPtr->cuOriginX = (EB_U16)(lcuOriginX + cuStats->originX);
3195 contextPtr->cuOriginY = (EB_U16)(lcuOriginY + cuStats->originY);
3196
3197 EB_BOOL tileLeftBoundary = (lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag == EB_TRUE && ((contextPtr->cuOriginX & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3198 EB_BOOL tileTopBoundary = (lcuPtr->lcuEdgeInfoPtr->tileTopEdgeFlag == EB_TRUE && ((contextPtr->cuOriginY & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3199 EB_BOOL tileRightBoundary = (lcuPtr->lcuEdgeInfoPtr->tileRightEdgeFlag == EB_TRUE && (((contextPtr->cuOriginX + cuStats->size) & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3200 //printf("LCU (%d, %d), left/top/right boundary %d/%d/%d\n", lcuOriginX, lcuOriginY,
3201 // tileLeftBoundary, tileTopBoundary, tileRightBoundary);
3202
3203 EncodePassPreFetchRef(
3204 pictureControlSetPtr,
3205 contextPtr,
3206 cuPtr,
3207 cuStats,
3208 puPtr,
3209 is16bit);
3210
3211 cuPtr->deltaQp = 0;
3212
3213 cuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || sequenceControlSetPtr->staticConfig.segmentOvEnabled) ? contextPtr->qpmQp : pictureControlSetPtr->pictureQp;
3214 lcuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || sequenceControlSetPtr->staticConfig.segmentOvEnabled) ? contextPtr->qpmQp : pictureControlSetPtr->pictureQp;
3215 cuPtr->orgDeltaQp = cuPtr->deltaQp;
3216
3217 if (!contextPtr->skipQpmFlag &&
3218 (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) &&
3219 (contextPtr->cuStats->depth <= pictureControlSetPtr->difCuDeltaQpDepth)) {
3220 EncQpmDeriveDeltaQPForEachLeafLcu(
3221 sequenceControlSetPtr,
3222 pictureControlSetPtr,
3223 lcuPtr,
3224 tbAddr,
3225 cuPtr,
3226 contextPtr->cuStats->depth,
3227 cuItr,
3228 cuStats->size,
3229 cuPtr->predictionModeFlag,
3230 contextPtr->cuStats->parent32x32Index,
3231 contextPtr);
3232 }
3233
3234 EB_U8 fastEl = (contextPtr->fastEl && contextPtr->cuStats->size > 8);
3235 EB_U64 yCoeffBitsTemp = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yCoeffBits;
3236 EB_S16 yDc = 0;
3237 EB_U16 yCountNonZeroCoeffs = 0;
3238 EB_U32 yBitsThsld = (contextPtr->cuStats->size > 32) ? contextPtr->yBitsThsld : (contextPtr->cuStats->size > 16) ? (contextPtr->yBitsThsld >> 1) : (contextPtr->yBitsThsld >> 2);
3239
3240 EB_U8 qpScaled = CLIP3((EB_S8)MIN_QP_VALUE, (EB_S8)MAX_CHROMA_MAP_QP_VALUE, (EB_S8)(cuPtr->qp + pictureControlSetPtr->cbQpOffset + pictureControlSetPtr->sliceCbQpOffset));
3241 EB_U8 cbQp = 0;
3242
3243 if (colorFormat == EB_YUV420) {
3244 cbQp = MapChromaQp(qpScaled);
3245 } else {
3246 cbQp = MIN(qpScaled, 51);
3247 }
3248
3249 //if (pictureControlSetPtr->pictureNumber == 1) {
3250 // printf("POC %d, ", pictureControlSetPtr->pictureNumber);
3251 // if (cuPtr->predictionModeFlag == INTRA_MODE) {
3252 // printf("(%d, %d), pu size %d, intraLumaMode %d\n",
3253 // contextPtr->cuOriginX, contextPtr->cuOriginY, cuStats->size, cuPtr->predictionUnitArray->intraLumaMode);
3254 // } else {
3255 // printf("(%d, %d), pu size %d, inter mode, merge flag %d, mvp (%d, %d), tileIdx %d\n",
3256 // contextPtr->cuOriginX, contextPtr->cuOriginY, cuStats->size,
3257 // cuPtr->predictionUnitArray[0].mergeFlag,
3258 // cuPtr->predictionUnitArray->mv[0].x,
3259 // cuPtr->predictionUnitArray->mv[0].y, tileIdx);
3260 // }
3261 //}
3262
3263
3264 if (cuPtr->predictionModeFlag == INTRA_MODE &&
3265 cuPtr->predictionUnitArray->intraLumaMode != EB_INTRA_MODE_4x4) {
3266 contextPtr->totIntraCodedArea += cuStats->size*cuStats->size;
3267 if (pictureControlSetPtr->sliceType != EB_I_PICTURE){
3268 contextPtr->intraCodedAreaLCU[tbAddr] += cuStats->size*cuStats->size;
3269 }
3270
3271 // *Note - Transforms are the same size as predictions
3272 // Partition Loop
3273 contextPtr->tuItr = 0;
3274
3275 {
3276 // Set the PU Loop Variables
3277 puPtr = cuPtr->predictionUnitArray;
3278 // Generate Intra Luma Neighbor Modes
3279 EbHevcGeneratePuIntraLumaNeighborModes( // HT done
3280 cuPtr,
3281 contextPtr->cuOriginX,
3282 contextPtr->cuOriginY,
3283 MAX_LCU_SIZE,
3284 epIntraLumaModeNeighborArray,
3285 epModeTypeNeighborArray);
3286
3287
3288 // Transform Loop (not supported)
3289 {
3290 // Generate Intra Reference Samples
3291 if (colorFormat == EB_YUV420) {
3292 GenerateIntraReferenceSamplesFuncTable[is16bit](
3293 constrainedIntraFlag,
3294 enableStrongIntraSmoothing,
3295 contextPtr->cuOriginX,
3296 contextPtr->cuOriginY,
3297 cuStats->size,
3298 MAX_LCU_SIZE,
3299 cuStats->depth,
3300 epModeTypeNeighborArray,
3301 epLumaReconNeighborArray,
3302 epCbReconNeighborArray,
3303 epCrReconNeighborArray,
3304 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3305 colorFormat,
3306 tileLeftBoundary, tileTopBoundary, tileRightBoundary);
3307 } else if (colorFormat == EB_YUV422 || colorFormat == EB_YUV444) {
3308 //Jing: TODO, add tiles support
3309 GenerateLumaIntraReferenceSamplesFuncTable[is16bit](
3310 constrainedIntraFlag,
3311 enableStrongIntraSmoothing,
3312 contextPtr->cuOriginX,
3313 contextPtr->cuOriginY,
3314 cuStats->size,
3315 MAX_LCU_SIZE,
3316 cuStats->depth,
3317 epModeTypeNeighborArray,
3318 epLumaReconNeighborArray,
3319 epCbReconNeighborArray,
3320 epCrReconNeighborArray,
3321 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3322 tileLeftBoundary, tileTopBoundary, tileRightBoundary);
3323
3324 GenerateChromaIntraReferenceSamplesFuncTable[is16bit](
3325 constrainedIntraFlag,
3326 enableStrongIntraSmoothing,
3327 contextPtr->cuOriginX,
3328 contextPtr->cuOriginY,
3329 cuStats->size,
3330 MAX_LCU_SIZE,
3331 cuStats->depth,
3332 epModeTypeNeighborArray,
3333 epLumaReconNeighborArray,
3334 epCbReconNeighborArray,
3335 epCrReconNeighborArray,
3336 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3337 colorFormat,
3338 EB_FALSE,
3339 tileLeftBoundary, tileTopBoundary, tileRightBoundary);
3340 }
3341
3342 // Prediction
3343 EncodePassIntraPredictionFuncTable[is16bit](
3344 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3345 contextPtr->cuOriginX + reconBuffer->originX,
3346 contextPtr->cuOriginY + reconBuffer->originY,
3347 cuStats->size,
3348 cuStats->size >> subWidthCMinus1, //chroma PU size, for 444 chroma PU size is the same as luma
3349 reconBuffer,
3350 colorFormat,
3351 EB_FALSE,
3352 (EB_U32)puPtr->intraLumaMode,
3353 EB_INTRA_CHROMA_DM,
3354 PICTURE_BUFFER_DESC_FULL_MASK );
3355
3356 #ifdef DEBUG_REF_INFO
3357 int originX = contextPtr->cuOriginX;
3358 int originY = contextPtr->cuOriginY;
3359 int tuSize = cuStats->size;
3360 printf("\n----- Dump prediction for 1st loop at (%d, %d)-----\n", originX, originY);
3361
3362 int chroma_size = tuSize > MIN_PU_SIZE? (tuSize >> subWidthCMinus1): tuSize;
3363
3364 //dump_block_from_desc(chroma_size, reconBuffer, originX, originY, 1);
3365 dump_block_from_desc(tuSize, reconBuffer, originX, originY, 0);
3366 #endif
3367 // Encode Transform Unit -INTRA-
3368 {
3369 contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ?
3370 EB_FALSE :
3371 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((contextPtr->cuOriginX & (63)) == 0) && (contextPtr->cuOriginY == lcuOriginY);
3372 SetPmEncDecMode(
3373 pictureControlSetPtr,
3374 contextPtr,
3375 tbAddr,
3376 lcuStatPtr->stationaryEdgeOverTimeFlag,
3377 pictureControlSetPtr->temporalLayerIndex > 0 ? lcuStatPtr->pmStationaryEdgeOverTimeFlag : lcuStatPtr->stationaryEdgeOverTimeFlag);
3378
3379 // Set Fast El coef shaping method
3380 contextPtr->transCoeffShapeLuma = DEFAULT_SHAPE;
3381 contextPtr->transCoeffShapeChroma = DEFAULT_SHAPE;
3382 if (fastEl && contextPtr->pmpMaskingLevelEncDec > MASK_THSHLD_1) {
3383 yDc = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yDc[0];
3384 yCountNonZeroCoeffs = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yCountNonZeroCoeffs[0];
3385
3386 if ((cuPtr->rootCbf == 0) ||
3387 ((yCoeffBitsTemp <= yBitsThsld) && yDc < YDC_THSHLD_1 && yCountNonZeroCoeffs <= 1)) {
3388 // Skip pass for cuPtr->rootCbf == 0 caused some VQ issues in chroma, so DC path is used instead
3389 contextPtr->transCoeffShapeLuma = ONLY_DC_SHAPE;
3390 contextPtr->transCoeffShapeChroma = ONLY_DC_SHAPE;
3391 } else if ((yCoeffBitsTemp <= yBitsThsld * 4)) {
3392 contextPtr->transCoeffShapeLuma = N4_SHAPE;
3393 if ((cuStats->size >> 1) > 8) {
3394 contextPtr->transCoeffShapeChroma = N4_SHAPE;
3395 } else {
3396 contextPtr->transCoeffShapeChroma = N2_SHAPE;
3397 }
3398 } else if ((yCoeffBitsTemp <= yBitsThsld * 16)) {
3399 contextPtr->transCoeffShapeLuma = N2_SHAPE;
3400 contextPtr->transCoeffShapeChroma = N2_SHAPE;
3401 }
3402 }
3403
3404 EncodeLoopFunctionTable[is16bit](
3405 contextPtr,
3406 lcuPtr,
3407 contextPtr->cuOriginX,
3408 contextPtr->cuOriginY,
3409 cbQp,
3410 reconBuffer,
3411 coeffBufferTB,
3412 residualBuffer,
3413 transformBuffer,
3414 transformInnerArrayPtr,
3415 countNonZeroCoeffs,
3416 useDeltaQpSegments,
3417 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
3418 (EB_U32)puPtr->intraLumaMode,
3419 PICTURE_BUFFER_DESC_FULL_MASK,
3420 colorFormat,
3421 EB_FALSE,
3422 (contextPtr->cuStats->size == 64) ? 32 : contextPtr->cuStats->size,
3423 pictureControlSetPtr->cabacCost,
3424 cuPtr->deltaQp > 0 ? 0 : dZoffset);
3425
3426 EncodeGenerateReconFunctionPtr[is16bit](
3427 contextPtr,
3428 contextPtr->cuOriginX,
3429 contextPtr->cuOriginY,
3430 PICTURE_BUFFER_DESC_FULL_MASK,
3431 colorFormat,
3432 EB_FALSE,
3433 (contextPtr->cuStats->size == 64) ? 32 : contextPtr->cuStats->size,
3434 reconBuffer,
3435 residualBuffer,
3436 transformInnerArrayPtr);
3437 }
3438
3439 // Update Recon Samples-INTRA-
3440 EncodePassUpdateReconSampleNeighborArrays(
3441 epLumaReconNeighborArray,
3442 epCbReconNeighborArray,
3443 epCrReconNeighborArray,
3444 reconBuffer,
3445 contextPtr->cuOriginX,
3446 contextPtr->cuOriginY,
3447 cuStats->size,
3448 PICTURE_BUFFER_DESC_FULL_MASK,
3449 colorFormat,
3450 is16bit);
3451 } // Transform Loop
3452
3453 if (colorFormat == EB_YUV422) {
3454 GenerateChromaIntraReferenceSamplesFuncTable[is16bit](
3455 constrainedIntraFlag,
3456 enableStrongIntraSmoothing,
3457 contextPtr->cuOriginX,
3458 contextPtr->cuOriginY,
3459 cuStats->size,
3460 MAX_LCU_SIZE,
3461 cuStats->depth,
3462 epModeTypeNeighborArray,
3463 epLumaReconNeighborArray,
3464 epCbReconNeighborArray,
3465 epCrReconNeighborArray,
3466 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3467 colorFormat,
3468 EB_TRUE,
3469 tileLeftBoundary, EB_FALSE, tileRightBoundary);
3470
3471 // Prediction
3472 EncodePassIntraPredictionFuncTable[is16bit](
3473 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3474 contextPtr->cuOriginX + reconBuffer->originX,
3475 contextPtr->cuOriginY + reconBuffer->originY,
3476 cuStats->size,
3477 cuStats->size>>1,
3478 reconBuffer,
3479 colorFormat,
3480 EB_TRUE,
3481 (EB_U32)puPtr->intraLumaMode,
3482 EB_INTRA_CHROMA_DM,
3483 PICTURE_BUFFER_DESC_CHROMA_MASK);
3484
3485 //EncodeLoop
3486 {
3487 EncodeLoopFunctionTable[is16bit](
3488 contextPtr,
3489 lcuPtr,
3490 contextPtr->cuOriginX,
3491 contextPtr->cuOriginY,
3492 cbQp,
3493 reconBuffer,
3494 coeffBufferTB,
3495 residualBuffer,
3496 transformBuffer,
3497 transformInnerArrayPtr,
3498 countNonZeroCoeffs,
3499 useDeltaQpSegments,
3500 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
3501 (EB_U32)puPtr->intraLumaMode,
3502 PICTURE_BUFFER_DESC_CHROMA_MASK,
3503 colorFormat,
3504 EB_TRUE,
3505 (contextPtr->cuStats->size == 64) ? 32 : contextPtr->cuStats->size,
3506 pictureControlSetPtr->cabacCost,
3507 cuPtr->deltaQp > 0 ? 0 : dZoffset);
3508
3509 EncodeGenerateReconFunctionPtr[is16bit](
3510 contextPtr,
3511 contextPtr->cuOriginX,
3512 contextPtr->cuOriginY,
3513 PICTURE_BUFFER_DESC_CHROMA_MASK,
3514 colorFormat,
3515 EB_TRUE,
3516 (contextPtr->cuStats->size == 64) ? 32 : contextPtr->cuStats->size,
3517 reconBuffer,
3518 residualBuffer,
3519 transformInnerArrayPtr);
3520
3521 // Update Recon Samples-INTRA-
3522 EncodePassUpdateReconSampleNeighborArrays(
3523 epLumaReconNeighborArray,
3524 epCbReconNeighborArray,
3525 epCrReconNeighborArray,
3526 reconBuffer,
3527 contextPtr->cuOriginX,
3528 contextPtr->cuOriginY+(cuStats->size>>1),
3529 cuStats->size,
3530 PICTURE_BUFFER_DESC_CHROMA_MASK,
3531 colorFormat,
3532 is16bit);
3533 }
3534 }
3535
3536 // Update the Intra-specific Neighbor Arrays
3537 EncodePassUpdateIntraModeNeighborArrays(
3538 epModeTypeNeighborArray,
3539 epIntraLumaModeNeighborArray,
3540 (EB_U8)cuPtr->predictionUnitArray->intraLumaMode,
3541 contextPtr->cuOriginX,
3542 contextPtr->cuOriginY,
3543 cuStats->size);
3544
3545 // set up the bS based on PU boundary for DLF
3546 if (dlfEnableFlag){
3547 // Update the cbf map for DLF
3548 startIndex = (contextPtr->cuOriginY >> 2) * (sequenceControlSetPtr->lumaWidth >> 2) + (contextPtr->cuOriginX >> 2);
3549 for (blk4x4IndexY = 0; blk4x4IndexY < (cuStats->size >> 2); ++blk4x4IndexY){
3550 EB_MEMSET(&pictureControlSetPtr->cbfMapArray[startIndex], (EB_U8)(cuPtr->transformUnitArray[contextPtr->tuItr].lumaCbf), (cuStats->size >> 2));
3551 startIndex += (sequenceControlSetPtr->lumaWidth >> 2);
3552 }
3553
3554 SetBSArrayBasedOnPUBoundary(
3555 epModeTypeNeighborArray,
3556 epMvNeighborArray,
3557 puPtr,
3558 cuPtr,
3559 cuStats,
3560 lcuOriginX,
3561 lcuOriginY,
3562 tileLeftBoundary,
3563 tileTopBoundary,
3564 pictureControlSetPtr,
3565 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
3566 pictureControlSetPtr->verticalEdgeBSArray[tbAddr]);
3567
3568 }
3569 } // Partition Loop
3570 } else if (cuPtr->predictionModeFlag == INTRA_MODE) {
3571 //*************************
3572 // INTRA 4x4
3573 //*************************
3574 contextPtr->totIntraCodedArea += cuStats->size*cuStats->size;
3575 if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
3576 contextPtr->intraCodedAreaLCU[tbAddr] += cuStats->size*cuStats->size;
3577 }
3578
3579 // Partition Loop
3580 EB_U8 partitionIndex;
3581 EB_U8 componentMask = PICTURE_BUFFER_DESC_LUMA_MASK;
3582
3583 for (partitionIndex = 0; partitionIndex < 4; partitionIndex++) {
3584 // Partition Loop
3585 contextPtr->tuItr = partitionIndex + 1;
3586
3587 EB_U16 partitionOriginX = contextPtr->cuOriginX + INTRA_4x4_OFFSET_X[partitionIndex];
3588 EB_U16 partitionOriginY = contextPtr->cuOriginY + INTRA_4x4_OFFSET_Y[partitionIndex];
3589
3590 EB_BOOL pictureLeftBoundary = (lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag == EB_TRUE && ((partitionOriginX & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3591 EB_BOOL pictureTopBoundary = (lcuPtr->lcuEdgeInfoPtr->tileTopEdgeFlag == EB_TRUE && ((partitionOriginY & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3592 EB_BOOL pictureRightBoundary = (lcuPtr->lcuEdgeInfoPtr->tileRightEdgeFlag == EB_TRUE && (((partitionOriginX + MIN_PU_SIZE) & (lcuPtr->size - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3593
3594 EB_U8 intraLumaMode = lcuPtr->intra4x4Mode[((MD_SCAN_TO_RASTER_SCAN[cuItr] - 21) << 2) + partitionIndex];
3595 EB_U8 intraLumaModeForChroma = lcuPtr->intra4x4Mode[((MD_SCAN_TO_RASTER_SCAN[cuItr] - 21) << 2)];
3596
3597 //printf("Intra 4x4 block (%d, %d), luma mode is %d\n", partitionOriginX, partitionOriginY, intraLumaMode);
3598
3599 // Set the PU Loop Variables
3600 puPtr = cuPtr->predictionUnitArray;
3601
3602 // Generate Intra Luma Neighbor Modes
3603 EbHevcGeneratePuIntraLumaNeighborModes( // HT done
3604 cuPtr,
3605 partitionOriginX,
3606 partitionOriginY,
3607 MAX_LCU_SIZE,
3608 epIntraLumaModeNeighborArray,
3609 epModeTypeNeighborArray);
3610
3611 // Generate Intra Reference Samples
3612 GenerateLumaIntraReferenceSamplesFuncTable[is16bit](
3613 constrainedIntraFlag,
3614 enableStrongIntraSmoothing,
3615 partitionOriginX,
3616 partitionOriginY,
3617 MIN_PU_SIZE,
3618 MAX_LCU_SIZE,
3619 cuStats->depth,
3620 epModeTypeNeighborArray,
3621 epLumaReconNeighborArray,
3622 epCbReconNeighborArray,
3623 epCrReconNeighborArray,
3624 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3625 pictureLeftBoundary,
3626 pictureTopBoundary,
3627 pictureRightBoundary);
3628
3629 componentMask = PICTURE_BUFFER_DESC_LUMA_MASK;
3630 if (partitionIndex == 0 ||
3631 (colorFormat == EB_YUV422 && partitionIndex == 2) ||
3632 (colorFormat == EB_YUV444)) {
3633 // For the Intra4x4 case, the Chroma for the CU is coded as a single 4x4 block.
3634 // This changes how the right picture boundary is interpreted for the Luma and Chroma blocks
3635 // as there is not a one-to-one relationship between the luma/chroma blocks. This effects
3636 // only the right picture edge check and not the left or top boundary checks as the block size
3637 // has no influence on those checks.
3638 if (colorFormat == EB_YUV444) {
3639 pictureRightBoundary = (lcuPtr->lcuEdgeInfoPtr->tileRightEdgeFlag == EB_TRUE && (((partitionOriginX + MIN_PU_SIZE) & (MAX_LCU_SIZE - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3640 } else {
3641 pictureRightBoundary = (lcuPtr->lcuEdgeInfoPtr->tileRightEdgeFlag == EB_TRUE && ((((partitionOriginX / 2) + MIN_PU_SIZE) & ((MAX_LCU_SIZE / 2) - 1)) == 0)) ? EB_TRUE : EB_FALSE;
3642 }
3643 componentMask = PICTURE_BUFFER_DESC_FULL_MASK;
3644 GenerateChromaIntraReferenceSamplesFuncTable[is16bit](
3645 constrainedIntraFlag,
3646 enableStrongIntraSmoothing,
3647 (colorFormat == EB_YUV444) ? partitionOriginX : contextPtr->cuOriginX,
3648 (colorFormat == EB_YUV444) ? partitionOriginY : contextPtr->cuOriginY,
3649 (colorFormat == EB_YUV444) ? MIN_PU_SIZE: (MIN_PU_SIZE << 1), //Jing: really a mess here, clean up later
3650 MAX_LCU_SIZE,
3651 cuStats->depth,
3652 epModeTypeNeighborArray,
3653 epLumaReconNeighborArray,
3654 epCbReconNeighborArray,
3655 epCrReconNeighborArray,
3656 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3657 colorFormat,
3658 (colorFormat == EB_YUV444) ? EB_FALSE : (EB_BOOL)partitionIndex,
3659 pictureLeftBoundary,
3660 pictureTopBoundary,
3661 pictureRightBoundary);
3662 }
3663
3664 // Prediction
3665 if (componentMask & PICTURE_BUFFER_DESC_LUMA_MASK) {
3666 EncodePassIntraPredictionFuncTable[is16bit](
3667 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3668 partitionOriginX + reconBuffer->originX,
3669 partitionOriginY + reconBuffer->originY,
3670 MIN_PU_SIZE,
3671 MIN_PU_SIZE,
3672 reconBuffer,
3673 colorFormat,
3674 EB_FALSE, //4x4, always 1st block
3675 intraLumaMode,
3676 EB_INTRA_CHROMA_DM,
3677 PICTURE_BUFFER_DESC_LUMA_MASK);
3678 }
3679
3680 if (componentMask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
3681 // Jing:
3682 // For 422 intra4x4, the mode for chroma is the mode of 1st luma 4x4
3683 EncodePassIntraPredictionFuncTable[is16bit](
3684 is16bit ? (void*)contextPtr->intraRefPtr16 : (void*)contextPtr->intraRefPtr,
3685 partitionOriginX + reconBuffer->originX,
3686 partitionOriginY + reconBuffer->originY,
3687 MIN_PU_SIZE,
3688 MIN_PU_SIZE,
3689 reconBuffer,
3690 colorFormat,
3691 EB_FALSE, //4x4, always 1st block
3692 (colorFormat == EB_YUV444) ? intraLumaMode : intraLumaModeForChroma,//420/422 use 1st luma 4x4 mode
3693 EB_INTRA_CHROMA_DM,
3694 PICTURE_BUFFER_DESC_CHROMA_MASK);
3695 }
3696
3697 // Encode Transform Unit -INTRA-
3698 contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ?
3699 EB_FALSE :
3700 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((contextPtr->cuOriginX & (63)) == 0) && (contextPtr->cuOriginY == lcuOriginY);
3701
3702 SetPmEncDecMode(
3703 pictureControlSetPtr,
3704 contextPtr,
3705 tbAddr,
3706
3707 lcuStatPtr->stationaryEdgeOverTimeFlag,
3708 pictureControlSetPtr->temporalLayerIndex > 0 ? lcuStatPtr->pmStationaryEdgeOverTimeFlag : lcuStatPtr->stationaryEdgeOverTimeFlag);
3709
3710 contextPtr->transCoeffShapeLuma = DEFAULT_SHAPE;
3711 contextPtr->transCoeffShapeChroma = DEFAULT_SHAPE;
3712
3713 EncodeLoopFunctionTable[is16bit](
3714 contextPtr,
3715 lcuPtr,
3716 partitionOriginX,
3717 partitionOriginY,
3718 cbQp,
3719 reconBuffer,
3720 coeffBufferTB,
3721 residualBuffer,
3722 transformBuffer,
3723 transformInnerArrayPtr,
3724 countNonZeroCoeffs,
3725 useDeltaQpSegments,
3726 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
3727 (EB_U32)puPtr->intraLumaMode,
3728 componentMask,
3729 colorFormat,
3730 EB_FALSE, //always 1st chroma block for 4x4
3731 MIN_PU_SIZE,
3732 pictureControlSetPtr->cabacCost,
3733 cuPtr->deltaQp > 0 ? 0 : dZoffset);
3734
3735 EncodeGenerateReconFunctionPtr[is16bit](
3736 contextPtr,
3737 partitionOriginX,
3738 partitionOriginY,
3739 componentMask,
3740 colorFormat,
3741 EB_FALSE,
3742 MIN_PU_SIZE,
3743 reconBuffer,
3744 residualBuffer,
3745 transformInnerArrayPtr);
3746
3747 // Update the Intra-specific Neighbor Arrays
3748 EncodePassUpdateIntraModeNeighborArrays(
3749 epModeTypeNeighborArray,
3750 epIntraLumaModeNeighborArray,
3751 intraLumaMode,
3752 partitionOriginX,
3753 partitionOriginY,
3754 MIN_PU_SIZE);
3755
3756 // Update Recon Samples-INTRA-
3757 EncodePassUpdateReconSampleNeighborArrays(
3758 epLumaReconNeighborArray,
3759 epCbReconNeighborArray,
3760 epCrReconNeighborArray,
3761 reconBuffer,
3762 partitionOriginX,
3763 partitionOriginY,
3764 MIN_PU_SIZE,
3765 componentMask,
3766 colorFormat,
3767 is16bit);
3768
3769 // set up the bS based on PU boundary for DLF
3770 if (dlfEnableFlag){
3771 // Update the cbf map for DLF
3772 startIndex = (partitionOriginY >> 2) * (sequenceControlSetPtr->lumaWidth >> 2) + (partitionOriginX >> 2);
3773 for (blk4x4IndexY = 0; blk4x4IndexY < (MIN_PU_SIZE >> 2); ++blk4x4IndexY){
3774 for (blk4x4IndexX = 0; blk4x4IndexX < (MIN_PU_SIZE >> 2); ++blk4x4IndexX){
3775 pictureControlSetPtr->cbfMapArray[startIndex + blk4x4IndexX] = (EB_U8)cuPtr->transformUnitArray[contextPtr->tuItr].lumaCbf;
3776 }
3777 startIndex += (sequenceControlSetPtr->lumaWidth >> 2);
3778 }
3779
3780 // Set the bS on TU boundary for DLF
3781 Intra4x4SetBSArrayBasedOnTUBoundary(
3782 partitionOriginX,
3783 partitionOriginY,
3784 MIN_PU_SIZE,
3785 MIN_PU_SIZE,
3786 partitionOriginY == contextPtr->cuOriginY ? EB_TRUE : EB_FALSE,
3787 partitionOriginX == contextPtr->cuOriginX ? EB_TRUE : EB_FALSE,
3788 contextPtr->cuStats,
3789 (EB_PART_MODE)contextPtr->cuPtr->predictionModeFlag,
3790 lcuOriginX,
3791 lcuOriginY,
3792 pictureControlSetPtr,
3793 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
3794 pictureControlSetPtr->verticalEdgeBSArray[tbAddr]);
3795
3796
3797 Intra4x4SetBSArrayBasedOnPUBoundary(
3798 epModeTypeNeighborArray,
3799 epMvNeighborArray,
3800 puPtr,
3801 cuPtr,
3802 cuStats,
3803 partitionOriginX & (MAX_LCU_SIZE - 1),
3804 partitionOriginY & (MAX_LCU_SIZE - 1),
3805 MIN_PU_SIZE,
3806 MIN_PU_SIZE,
3807 lcuOriginX,
3808 lcuOriginY,
3809 EB_FALSE,
3810 EB_FALSE,
3811 pictureControlSetPtr,
3812 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
3813 pictureControlSetPtr->verticalEdgeBSArray[tbAddr]);
3814
3815 }
3816 } // Partition Loop
3817 } else if (cuPtr->predictionModeFlag == INTER_MODE) {
3818 EB_U16 tuOriginX;
3819 EB_U16 tuOriginY;
3820 EB_U8 tuSize = 0;
3821 EB_U8 tuSizeChroma;
3822
3823 EB_BOOL isCuSkip = EB_FALSE;
3824
3825 //********************************
3826 // INTER
3827 //********************************
3828 EB_BOOL doMVpred = EB_TRUE;
3829 //if QPM and Segments are used, First Cu in LCU row should have at least one coeff.
3830 EB_BOOL isFirstCUinRow = (useDeltaQp == 1) &&
3831 !singleSegment &&
3832 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((contextPtr->cuOriginX & (63)) == 0) && (contextPtr->cuOriginY == lcuOriginY) ? EB_TRUE : EB_FALSE;;
3833 //Motion Compensation could be avoided in the case below
3834 EB_BOOL doMC = EB_TRUE;
3835
3836 // Perform Merge/Skip Decision if the mode coming from MD is merge. for the First CU in Row merge will remain as is.
3837
3838 if (cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE) {
3839 if (isFirstCUinRow == EB_FALSE) {
3840 if (lcuPtr->chromaEncodeMode == CHROMA_MODE_BEST) {
3841 // Jing: using 420 for MD related stuff
3842 //EbPictureBufferDesc_t *inputPicturePtr = pictureControlSetPtr->ParentPcsPtr->enhancedPicturePtr;
3843 EbPictureBufferDesc_t *inputPicturePtr = pictureControlSetPtr->ParentPcsPtr->chromaDownSamplePicturePtr;
3844 const EB_U32 inputCbOriginIndex = ((contextPtr->cuOriginY >> 1) + (inputPicturePtr->originY >> 1)) * inputPicturePtr->strideCb + ((contextPtr->cuOriginX >> 1) + (inputPicturePtr->originX >> 1));
3845 const EB_U32 cuChromaOriginIndex = (((contextPtr->cuOriginY & 63) * 32) + (contextPtr->cuOriginX & 63)) >> 1;
3846
3847 contextPtr->mdContext->cuOriginX = contextPtr->cuOriginX;
3848 contextPtr->mdContext->cuOriginY = contextPtr->cuOriginY;
3849 contextPtr->mdContext->puItr = 0;
3850 contextPtr->mdContext->cuSize = contextPtr->cuStats->size;
3851 contextPtr->mdContext->cuSizeLog2 = contextPtr->cuStats->sizeLog2;
3852 contextPtr->mdContext->cuStats = contextPtr->cuStats;
3853
3854 AddChromaEncDec(
3855 pictureControlSetPtr,
3856 lcuPtr,
3857 cuPtr,
3858 contextPtr->mdContext,
3859 contextPtr,
3860 inputPicturePtr,
3861 inputCbOriginIndex,
3862 cuChromaOriginIndex,
3863 0);
3864 }
3865
3866 if (pictureControlSetPtr->sliceType == EB_B_PICTURE &&
3867 pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag == EB_FALSE) {
3868 EbReferenceObject_t * refObjL0, *refObjL1;
3869 EB_U16 cuVar = (pictureControlSetPtr->ParentPcsPtr->variance[lcuPtr->index][0]);
3870 EB_U8 INTRA_AREA_TH[MAX_TEMPORAL_LAYERS] = { 40, 30, 30, 0, 0, 0 };
3871 refObjL0 = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr;
3872 refObjL1 = (EbReferenceObject_t*)pictureControlSetPtr->refPicPtrArray[REF_LIST_1]->objectPtr;
3873
3874 if (cuVar < 200 && (refObjL0->intraCodedArea > INTRA_AREA_TH[refObjL0->tmpLayerIdx] ||
3875 refObjL1->intraCodedArea > INTRA_AREA_TH[refObjL1->tmpLayerIdx])) {
3876 mdcontextPtr->mdEpPipeLcu[cuPtr->leafIndex].skipCost +=
3877 (mdcontextPtr->mdEpPipeLcu[cuPtr->leafIndex].skipCost * 70) / 100;
3878 }
3879 }
3880 isCuSkip = mdcontextPtr->mdEpPipeLcu[cuPtr->leafIndex].skipCost <= mdcontextPtr->mdEpPipeLcu[cuPtr->leafIndex].mergeCost ? 1 : 0;
3881 }
3882 }
3883
3884 //MC could be avoided in some cases below
3885 if (isFirstCUinRow == EB_FALSE) {
3886 if (pictureControlSetPtr->ParentPcsPtr->isUsedAsReferenceFlag == EB_FALSE &&
3887 constrainedIntraFlag == EB_TRUE &&
3888 cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE) {
3889 if (isCuSkip) {
3890 //here merge is decided to be skip in nonRef frame.
3891 doMC = EB_FALSE;
3892 doMVpred = EB_FALSE;
3893 }
3894 } else if (contextPtr->mdContext->limitIntra && isIntraLCU == EB_FALSE) {
3895 if (isCuSkip) {
3896 doMC = EB_FALSE;
3897 doMVpred = EB_FALSE;
3898 }
3899 }
3900 }
3901
3902 doMC = (EB_BOOL)(doRecon | doMC);
3903 doMVpred = (EB_BOOL)(doRecon | doMVpred);
3904 {
3905 // 1st Partition Loop
3906 puPtr = cuPtr->predictionUnitArray;
3907 if (doMVpred)
3908 EncodePassMvPrediction( //AMVP, not merge
3909 sequenceControlSetPtr,
3910 pictureControlSetPtr,
3911 tbAddr,
3912 contextPtr);
3913
3914 // Set MvUnit
3915 contextPtr->mvUnit.predDirection = (EB_U8)puPtr->interPredDirectionIndex;
3916 contextPtr->mvUnit.mv[REF_LIST_0].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
3917 contextPtr->mvUnit.mv[REF_LIST_1].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
3918 // Inter Prediction
3919 if (is16bit) {
3920 if (doMC)
3921 EncodePassInterPrediction16bit(
3922 &contextPtr->mvUnit,
3923 contextPtr->cuOriginX,
3924 contextPtr->cuOriginY,
3925 cuStats->size,
3926 cuStats->size,
3927 pictureControlSetPtr,
3928 reconBuffer,
3929 contextPtr->mcpContext);
3930 } else{
3931 if (doMC) {
3932 EncodePassInterPrediction(
3933 &contextPtr->mvUnit,
3934 contextPtr->cuOriginX,
3935 contextPtr->cuOriginY,
3936 cuStats->size,
3937 cuStats->size,
3938 pictureControlSetPtr,
3939 reconBuffer,
3940 contextPtr->mcpContext);
3941 }
3942 }
3943 }
3944 contextPtr->tuItr = (cuStats->size < MAX_LCU_SIZE) ? 0 : 1;
3945
3946 // Transform Loop
3947 cuPtr->transformUnitArray[0].lumaCbf = EB_FALSE;
3948 cuPtr->transformUnitArray[0].cbCbf = EB_FALSE;
3949 cuPtr->transformUnitArray[0].crCbf = EB_FALSE;
3950 cuPtr->transformUnitArray[0].cbCbf2 = EB_FALSE;
3951 cuPtr->transformUnitArray[0].crCbf2 = EB_FALSE;
3952
3953 // initialize TU Split
3954 yFullDistortion[DIST_CALC_RESIDUAL] = 0;
3955 yFullDistortion[DIST_CALC_PREDICTION] = 0;
3956
3957 yCoeffBits = 0;
3958 cbCoeffBits = 0;
3959 crCoeffBits = 0;
3960
3961 //printf("sizeof %i \n",sizeof(CodingUnit_t));
3962 EB_U32 totTu = (cuStats->size < MAX_LCU_SIZE) ? 1 : 4;
3963 EB_U8 tuIt;
3964
3965 EB_U32 componentMask = PICTURE_BUFFER_DESC_FULL_MASK;
3966 EB_MODETYPE predictionModeFlag = (EB_MODETYPE)cuPtr->predictionModeFlag;
3967
3968 if (cuPtr->predictionUnitArray[0].mergeFlag == EB_FALSE) {
3969 for (tuIt = 0; tuIt < totTu; tuIt++) {
3970 contextPtr->tuItr = (cuStats->size < MAX_LCU_SIZE) ? 0 : tuIt + 1;
3971 if (cuStats->size < MAX_LCU_SIZE) {
3972 tuOriginX = contextPtr->cuOriginX;
3973 tuOriginY = contextPtr->cuOriginY;
3974 tuSize = cuStats->size;
3975 tuSizeChroma = (cuStats->size >> (colorFormat==EB_YUV444 ? 0 : 1));
3976 } else {
3977 tuOriginX = contextPtr->cuOriginX + ((tuIt & 1) << 5);
3978 tuOriginY = contextPtr->cuOriginY + ((tuIt > 1) << 5);
3979 tuSize = 32;
3980 tuSizeChroma = (colorFormat == EB_YUV444 ? 32: 16);
3981 }
3982
3983 //TU LOOP for MV mode + Luma CBF decision.
3984 contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ?
3985 EB_FALSE :
3986 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((tuOriginX & 63) == 0) && (tuOriginY == lcuOriginY);
3987
3988 SetPmEncDecMode(
3989 pictureControlSetPtr,
3990 contextPtr,
3991 tbAddr,
3992 lcuStatPtr->stationaryEdgeOverTimeFlag,
3993 pictureControlSetPtr->temporalLayerIndex > 0 ? lcuStatPtr->pmStationaryEdgeOverTimeFlag : lcuStatPtr->stationaryEdgeOverTimeFlag);
3994
3995 // Set Fast El coef shaping method
3996 contextPtr->transCoeffShapeLuma = DEFAULT_SHAPE;
3997 contextPtr->transCoeffShapeChroma = DEFAULT_SHAPE;
3998
3999 if (fastEl && isFirstCUinRow == EB_FALSE && contextPtr->pmpMaskingLevelEncDec > MASK_THSHLD_1) {
4000 yDc = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yDc[tuIt];
4001 yCountNonZeroCoeffs = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yCountNonZeroCoeffs[tuIt];
4002
4003 if ((cuPtr->rootCbf == 0) || ((yCoeffBitsTemp <= yBitsThsld) && yDc < YDC_THSHLD_1 && yCountNonZeroCoeffs <= 1)) {
4004 // Skip pass for cuPtr->rootCbf == 0 caused some VQ issues in chroma, so DC path is used instead
4005 contextPtr->transCoeffShapeLuma = ONLY_DC_SHAPE;
4006 contextPtr->transCoeffShapeChroma = ONLY_DC_SHAPE;
4007 } else if ((yCoeffBitsTemp <= yBitsThsld * 4)) {
4008 contextPtr->transCoeffShapeLuma = N4_SHAPE;
4009 if ((cuStats->size >> 1) > 8) {
4010 contextPtr->transCoeffShapeChroma = N4_SHAPE;
4011 } else {
4012 contextPtr->transCoeffShapeChroma = N2_SHAPE;
4013 }
4014 } else if ((yCoeffBitsTemp <= yBitsThsld * 16)) {
4015 contextPtr->transCoeffShapeLuma = N2_SHAPE;
4016 contextPtr->transCoeffShapeChroma = N2_SHAPE;
4017 }
4018 }
4019
4020 EncodeLoopFunctionTable[is16bit](
4021 contextPtr,
4022 lcuPtr,
4023 tuOriginX,
4024 tuOriginY,
4025 cbQp,
4026 reconBuffer,
4027 coeffBufferTB,
4028 residualBuffer,
4029 transformBuffer,
4030 transformInnerArrayPtr,
4031 countNonZeroCoeffs,
4032 useDeltaQpSegments,
4033 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
4034 0,
4035 PICTURE_BUFFER_DESC_FULL_MASK,
4036 colorFormat,
4037 EB_FALSE,
4038 tuSize,
4039 pictureControlSetPtr->cabacCost,
4040 cuPtr->deltaQp > 0 ? 0 : dZoffset);
4041
4042 //Jing: For 422, do for the 2nd chroma
4043 if (colorFormat == EB_YUV422) {
4044 EB_U32 tmpCountNonZeroCoeffs[3];
4045 EncodeLoopFunctionTable[is16bit](
4046 contextPtr,
4047 lcuPtr,
4048 tuOriginX,
4049 tuOriginY,
4050 cbQp,
4051 reconBuffer,
4052 coeffBufferTB,
4053 residualBuffer,
4054 transformBuffer,
4055 transformInnerArrayPtr,
4056 tmpCountNonZeroCoeffs, //Jing: beware of this
4057 useDeltaQpSegments,
4058 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
4059 0,
4060 PICTURE_BUFFER_DESC_CHROMA_MASK,
4061 colorFormat,
4062 EB_TRUE,
4063 tuSize,
4064 pictureControlSetPtr->cabacCost,
4065 cuPtr->deltaQp > 0 ? 0 : dZoffset);
4066 // Jing, seems not useful here, never used ...
4067 //countNonZeroCoeffs[1] += tmpCountNonZeroCoeffs[1];
4068 //countNonZeroCoeffs[2] += tmpCountNonZeroCoeffs[2];
4069 }
4070
4071 // SKIP the CBF zero mode for DC path. There are problems with cost calculations
4072 if (contextPtr->transCoeffShapeLuma != ONLY_DC_SHAPE && colorFormat == EB_YUV420) {
4073 // Jing: A bit mess here, seems only luma is used, but chroma everywhere and wastes calculation power
4074 // Will clean it later for 422, only enable it for 420 now
4075 scratchLumaOffset = ((tuOriginY & (63)) * 64) + (tuOriginX & (63));
4076
4077 // Compute Tu distortion
4078 PictureFullDistortionLuma(
4079 transformBuffer,
4080 scratchLumaOffset,
4081 residualBuffer,
4082 scratchLumaOffset,
4083 contextPtr->transCoeffShapeLuma == ONLY_DC_SHAPE || cuPtr->transformUnitArray[contextPtr->tuItr].isOnlyDc[0] == EB_TRUE ? 1 : (tuSize >> contextPtr->transCoeffShapeLuma),
4084 yTuFullDistortion,
4085 countNonZeroCoeffs[0],
4086 predictionModeFlag);
4087
4088
4089 lumaShift = 2 * (7 - Log2f(tuSize));
4090
4091 // Note: for square Transform, the scale is 1/(2^(7-Log2(Transform size)))
4092 // For NSQT the scale would be 1/ (2^(7-(Log2(first Transform size)+Log2(second Transform size))/2))
4093 // Add Log2 of Transform size in order to calculating it multiple time in this function
4094
4095 yTuFullDistortion[DIST_CALC_RESIDUAL] = (yTuFullDistortion[DIST_CALC_RESIDUAL] + (EB_U64)(1 << (lumaShift - 1))) >> lumaShift;
4096 yTuFullDistortion[DIST_CALC_PREDICTION] = (yTuFullDistortion[DIST_CALC_PREDICTION] + (EB_U64)(1 << (lumaShift - 1))) >> lumaShift;
4097
4098 yTuCoeffBits = 0;
4099 cbTuCoeffBits = 0;
4100 crTuCoeffBits = 0;
4101
4102 // Estimate Tu Coeff bits
4103 TuEstimateCoeffBitsEncDec(
4104 (tuOriginY & (63)) * MAX_LCU_SIZE + (tuOriginX & (63)),
4105 ((tuOriginY & (63)) * MAX_LCU_SIZE_CHROMA + (tuOriginX & (63))) >> 1, //Jing: 444 is different
4106 coeffEstEntropyCoderPtr,
4107 coeffBufferTB,
4108 countNonZeroCoeffs,
4109 &yTuCoeffBits,
4110 &cbTuCoeffBits,
4111 &crTuCoeffBits,
4112 contextPtr->transCoeffShapeLuma == ONLY_DC_SHAPE ? 1 : (tuSize >> contextPtr->transCoeffShapeLuma),
4113 contextPtr->transCoeffShapeChroma == ONLY_DC_SHAPE ? 1 : (tuSizeChroma >> contextPtr->transCoeffShapeChroma),
4114 predictionModeFlag,
4115 cabacCost);
4116
4117 // CBF Tu decision
4118 EncodeTuCalcCost(
4119 contextPtr,
4120 countNonZeroCoeffs,
4121 yTuFullDistortion,
4122 &yTuCoeffBits,
4123 componentMask);
4124
4125 yCoeffBits += yTuCoeffBits;
4126 cbCoeffBits += cbTuCoeffBits;
4127 crCoeffBits += crTuCoeffBits;
4128
4129 yFullDistortion[DIST_CALC_RESIDUAL] += yTuFullDistortion[DIST_CALC_RESIDUAL];
4130 yFullDistortion[DIST_CALC_PREDICTION] += yTuFullDistortion[DIST_CALC_PREDICTION];
4131 //-------------------------------------------------
4132 }
4133 } // Transform Loop
4134 }
4135
4136 //Set Final CU data flags after skip/Merge decision.
4137 if (isFirstCUinRow == EB_FALSE) {
4138 if (cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE) {
4139 cuPtr->skipFlag = (isCuSkip) ? EB_TRUE : EB_FALSE;
4140 cuPtr->predictionUnitArray[0].mergeFlag = (isCuSkip) ? EB_FALSE : EB_TRUE;
4141 }
4142 }
4143
4144 // Initialize the Transform Loop
4145 contextPtr->tuItr = (cuStats->size < MAX_LCU_SIZE) ? 0 : 1;
4146 yCbf = 0;
4147 cbCbf = 0;
4148 crCbf = 0;
4149 cbCbf2 = 0;
4150 crCbf2 = 0;
4151
4152 for (tuIt = 0; tuIt < totTu; tuIt++) {
4153 contextPtr->tuItr = (cuStats->size < MAX_LCU_SIZE) ? 0 : tuIt + 1;
4154 if (cuStats->size < MAX_LCU_SIZE) {
4155 tuOriginX = contextPtr->cuOriginX;
4156 tuOriginY = contextPtr->cuOriginY;
4157 tuSize = cuStats->size;
4158 tuSizeChroma = (tuSize >> (colorFormat==EB_YUV444 ? 0 : 1));
4159 } else {
4160 tuOriginX = contextPtr->cuOriginX + ((tuIt & 1) << 5);
4161 tuOriginY = contextPtr->cuOriginY + ((tuIt > 1) << 5);
4162 tuSize = 32;
4163 tuSizeChroma = colorFormat==EB_YUV444 ? 32 : 16;
4164 }
4165
4166 if (cuPtr->skipFlag == EB_TRUE){
4167 cuPtr->transformUnitArray[contextPtr->tuItr].lumaCbf = EB_FALSE;
4168 cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf = EB_FALSE;
4169 cuPtr->transformUnitArray[contextPtr->tuItr].crCbf = EB_FALSE;
4170 cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf2 = EB_FALSE;
4171 cuPtr->transformUnitArray[contextPtr->tuItr].crCbf2 = EB_FALSE;
4172 } else if (cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE) {
4173 contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ?
4174 EB_FALSE :
4175 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((tuOriginX & 63) == 0) && (tuOriginY == lcuOriginY);
4176
4177 SetPmEncDecMode(
4178 pictureControlSetPtr,
4179 contextPtr,
4180 tbAddr,
4181 lcuStatPtr->stationaryEdgeOverTimeFlag,
4182 pictureControlSetPtr->temporalLayerIndex > 0 ? lcuStatPtr->pmStationaryEdgeOverTimeFlag : lcuStatPtr->stationaryEdgeOverTimeFlag);
4183
4184 // Set Fast El coef shaping method
4185 contextPtr->transCoeffShapeLuma = DEFAULT_SHAPE;
4186 contextPtr->transCoeffShapeChroma = DEFAULT_SHAPE;
4187
4188 if (fastEl && isFirstCUinRow == EB_FALSE && contextPtr->pmpMaskingLevelEncDec > MASK_THSHLD_1) {
4189 yDc = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yDc[tuIt];
4190 yCountNonZeroCoeffs = contextPtr->mdContext->mdEpPipeLcu[cuPtr->leafIndex].yCountNonZeroCoeffs[tuIt];
4191
4192 if ((cuPtr->rootCbf == 0) || ((yCoeffBitsTemp <= yBitsThsld) && yDc < YDC_THSHLD_1 && yCountNonZeroCoeffs <= 1)) {
4193 // Skip pass for cuPtr->rootCbf == 0 caused some VQ issues in chroma, so DC path is used instead
4194 contextPtr->transCoeffShapeLuma = ONLY_DC_SHAPE;
4195 contextPtr->transCoeffShapeChroma = ONLY_DC_SHAPE;
4196 } else if ((yCoeffBitsTemp <= yBitsThsld * 4)) {
4197 contextPtr->transCoeffShapeLuma = N4_SHAPE;
4198 if ((cuStats->size >> 1) > 8) {
4199 contextPtr->transCoeffShapeChroma = N4_SHAPE;
4200 } else {
4201 contextPtr->transCoeffShapeChroma = N2_SHAPE;
4202 }
4203 } else if ((yCoeffBitsTemp <= yBitsThsld * 16)) {
4204 contextPtr->transCoeffShapeLuma = N2_SHAPE;
4205 contextPtr->transCoeffShapeChroma = N2_SHAPE;
4206 }
4207 }
4208
4209 EncodeLoopFunctionTable[is16bit](
4210 contextPtr,
4211 lcuPtr,
4212 tuOriginX,
4213 tuOriginY,
4214 cbQp,
4215 reconBuffer,
4216 coeffBufferTB,
4217 residualBuffer,
4218 transformBuffer,
4219 transformInnerArrayPtr,
4220 countNonZeroCoeffs,
4221 useDeltaQpSegments,
4222 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
4223 0,
4224 PICTURE_BUFFER_DESC_FULL_MASK,
4225 colorFormat,
4226 EB_FALSE,
4227 tuSize,
4228 pictureControlSetPtr->cabacCost,
4229 cuPtr->deltaQp > 0 ? 0 : dZoffset);
4230
4231 if (colorFormat == EB_YUV422) {
4232 EncodeLoopFunctionTable[is16bit](
4233 contextPtr,
4234 lcuPtr,
4235 tuOriginX,
4236 tuOriginY,
4237 cbQp,
4238 reconBuffer,
4239 coeffBufferTB,
4240 residualBuffer,
4241 transformBuffer,
4242 transformInnerArrayPtr,
4243 countNonZeroCoeffs,
4244 useDeltaQpSegments,
4245 (CabacEncodeContext_t*)coeffEstEntropyCoderPtr->cabacEncodeContextPtr,
4246 0,
4247 PICTURE_BUFFER_DESC_CHROMA_MASK,
4248 colorFormat,
4249 EB_TRUE,
4250 tuSize,
4251 pictureControlSetPtr->cabacCost,
4252 cuPtr->deltaQp > 0 ? 0 : dZoffset);
4253 }
4254 }
4255
4256 cuPtr->rootCbf = cuPtr->rootCbf |
4257 cuPtr->transformUnitArray[contextPtr->tuItr].lumaCbf |
4258 cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf |
4259 cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf2 |
4260 cuPtr->transformUnitArray[contextPtr->tuItr].crCbf |
4261 cuPtr->transformUnitArray[contextPtr->tuItr].crCbf2;
4262
4263 if (cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf) {
4264 cuPtr->transformUnitArray[0].cbCbf = EB_TRUE;
4265 }
4266
4267 if (cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf2) {
4268 cuPtr->transformUnitArray[0].cbCbf2 = EB_TRUE;
4269 }
4270
4271 contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ?
4272 EB_FALSE :
4273 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((tuOriginX & 63) == 0) && (tuOriginY == lcuOriginY);
4274
4275 if (cuPtr->transformUnitArray[contextPtr->tuItr].crCbf) {
4276 cuPtr->transformUnitArray[0].crCbf = EB_TRUE;
4277 }
4278
4279 if (cuPtr->transformUnitArray[contextPtr->tuItr].crCbf2) {
4280 cuPtr->transformUnitArray[0].crCbf2 = EB_TRUE;
4281 }
4282
4283 if (doRecon) {
4284 EncodeGenerateReconFunctionPtr[is16bit](
4285 contextPtr,
4286 tuOriginX,
4287 tuOriginY,
4288 PICTURE_BUFFER_DESC_FULL_MASK,
4289 colorFormat,
4290 EB_FALSE,
4291 tuSize,
4292 reconBuffer,
4293 residualBuffer,
4294 transformInnerArrayPtr);
4295 if (colorFormat == EB_YUV422) {
4296 EncodeGenerateReconFunctionPtr[is16bit](
4297 contextPtr,
4298 tuOriginX,
4299 tuOriginY,
4300 PICTURE_BUFFER_DESC_CHROMA_MASK,
4301 colorFormat,
4302 EB_TRUE,
4303 tuSize,
4304 reconBuffer,
4305 residualBuffer,
4306 transformInnerArrayPtr);
4307 }
4308 }
4309 yCbf |= cuPtr->transformUnitArray[contextPtr->tuItr].lumaCbf;
4310 cbCbf |= cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf;
4311 crCbf |= cuPtr->transformUnitArray[contextPtr->tuItr].crCbf;
4312 cbCbf2 |= cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf2;
4313 crCbf2 |= cuPtr->transformUnitArray[contextPtr->tuItr].crCbf2;
4314
4315 if (dlfEnableFlag) {
4316
4317 EB_U32 lumaStride = (sequenceControlSetPtr->lumaWidth >> 2);
4318 TransformUnit_t *tuPtr = &cuPtr->transformUnitArray[contextPtr->tuItr];
4319
4320 // Update the cbf map for DLF
4321 startIndex = (tuOriginY >> 2) * lumaStride + (tuOriginX >> 2);
4322 for (blk4x4IndexY = 0; blk4x4IndexY < (tuSize >> 2); ++blk4x4IndexY){
4323 EB_MEMSET(&pictureControlSetPtr->cbfMapArray[startIndex], (EB_U8)tuPtr->lumaCbf, (tuSize >> 2));
4324 startIndex += lumaStride;
4325 }
4326
4327 if (cuStats->size == MAX_LCU_SIZE)
4328 // Set the bS on TU boundary for DLF
4329 SetBSArrayBasedOnTUBoundary(
4330 tuOriginX,
4331 tuOriginY,
4332 tuSize,
4333 tuSize,
4334 cuStats,
4335 (EB_PART_MODE)cuPtr->predictionModeFlag,
4336 lcuOriginX,
4337 lcuOriginY,
4338 pictureControlSetPtr,
4339 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
4340 pictureControlSetPtr->verticalEdgeBSArray[tbAddr]);
4341 }
4342 } // Transform Loop
4343
4344 // Calculate Root CBF
4345 cuPtr->rootCbf = (yCbf | cbCbf | cbCbf2 | crCbf | crCbf2 ) ? EB_TRUE : EB_FALSE;
4346
4347 // Force Skip if MergeFlag == TRUE && RootCbf == 0
4348 if (cuPtr->skipFlag == EB_FALSE &&
4349 cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE &&
4350 cuPtr->rootCbf == EB_FALSE ) {
4351 cuPtr->skipFlag = EB_TRUE;
4352 }
4353
4354 {
4355 // Set the PU Loop Variables
4356 puPtr = cuPtr->predictionUnitArray;
4357
4358 // Set MvUnit
4359 contextPtr->mvUnit.predDirection = (EB_U8)puPtr->interPredDirectionIndex;
4360 contextPtr->mvUnit.mv[REF_LIST_0].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
4361 contextPtr->mvUnit.mv[REF_LIST_1].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
4362 // set up the bS based on PU boundary for DLF
4363 if (dlfEnableFlag /*&& cuStats->size < MAX_LCU_SIZE*/ ) {
4364 SetBSArrayBasedOnPUBoundary(
4365 epModeTypeNeighborArray,
4366 epMvNeighborArray,
4367 puPtr,
4368 cuPtr,
4369 cuStats,
4370 lcuOriginX,
4371 lcuOriginY,
4372 tileLeftBoundary,
4373 tileTopBoundary,
4374 pictureControlSetPtr,
4375 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
4376 pictureControlSetPtr->verticalEdgeBSArray[tbAddr]);
4377 }
4378
4379 // Update Neighbor Arrays (Mode Type, MVs, SKIP)
4380 {
4381 EB_U8 skipFlag = (EB_U8)cuPtr->skipFlag;
4382 EncodePassUpdateInterModeNeighborArrays(
4383 epModeTypeNeighborArray,
4384 epMvNeighborArray,
4385 epSkipFlagNeighborArray,
4386 &contextPtr->mvUnit,
4387 &skipFlag,
4388 contextPtr->cuOriginX,
4389 contextPtr->cuOriginY,
4390 cuStats->size);
4391
4392 }
4393
4394 } // 2nd Partition Loop
4395
4396
4397 // Update Recon Samples Neighbor Arrays -INTER-
4398 if (doRecon) {
4399 EncodePassUpdateReconSampleNeighborArrays(
4400 epLumaReconNeighborArray,
4401 epCbReconNeighborArray,
4402 epCrReconNeighborArray,
4403 reconBuffer,
4404 contextPtr->cuOriginX,
4405 contextPtr->cuOriginY,
4406 cuStats->size,
4407 PICTURE_BUFFER_DESC_FULL_MASK,
4408 colorFormat,
4409 is16bit);
4410
4411 if (colorFormat == EB_YUV422) {
4412 //Here need to update the 2nd chroma for neighbour
4413 EncodePassUpdateReconSampleNeighborArrays(
4414 epLumaReconNeighborArray,
4415 epCbReconNeighborArray,
4416 epCrReconNeighborArray,
4417 reconBuffer,
4418 contextPtr->cuOriginX,
4419 contextPtr->cuOriginY+(cuStats->size>>1),
4420 cuStats->size,
4421 PICTURE_BUFFER_DESC_CHROMA_MASK,
4422 colorFormat,
4423 is16bit);
4424 }
4425 }
4426 } else {
4427 CHECK_REPORT_ERROR_NC(
4428 encodeContextPtr->appCallbackPtr,
4429 EB_ENC_CL_ERROR2);
4430 }
4431
4432
4433 if (dlfEnableFlag) {
4434 // Assign the LCU-level QP
4435 if (cuPtr->predictionModeFlag == INTRA_MODE && puPtr->intraLumaMode == EB_INTRA_MODE_4x4) {
4436 availableCoeff = (
4437 contextPtr->cuPtr->transformUnitArray[1].lumaCbf ||
4438 contextPtr->cuPtr->transformUnitArray[2].lumaCbf ||
4439 contextPtr->cuPtr->transformUnitArray[3].lumaCbf ||
4440 contextPtr->cuPtr->transformUnitArray[4].lumaCbf ||
4441 contextPtr->cuPtr->transformUnitArray[1].crCbf ||
4442 contextPtr->cuPtr->transformUnitArray[1].cbCbf ||
4443 contextPtr->cuPtr->transformUnitArray[2].crCbf ||
4444 contextPtr->cuPtr->transformUnitArray[2].cbCbf ||
4445 contextPtr->cuPtr->transformUnitArray[3].crCbf ||
4446 contextPtr->cuPtr->transformUnitArray[3].cbCbf ||
4447 contextPtr->cuPtr->transformUnitArray[4].crCbf || // 422 case will use 3rd 4x4 for the 2nd chroma
4448 contextPtr->cuPtr->transformUnitArray[4].cbCbf) ? EB_TRUE : EB_FALSE;
4449 } else {
4450 availableCoeff = (cuPtr->predictionModeFlag == INTER_MODE) ? (EB_BOOL)cuPtr->rootCbf :
4451 (cuPtr->transformUnitArray[cuStats->size == MAX_LCU_SIZE ? 1 : 0].lumaCbf ||
4452 cuPtr->transformUnitArray[cuStats->size == MAX_LCU_SIZE ? 1 : 0].crCbf ||
4453 cuPtr->transformUnitArray[cuStats->size == MAX_LCU_SIZE ? 1 : 0].crCbf2 ||
4454 cuPtr->transformUnitArray[cuStats->size == MAX_LCU_SIZE ? 1 : 0].cbCbf ||
4455 cuPtr->transformUnitArray[cuStats->size == MAX_LCU_SIZE ? 1 : 0].cbCbf2) ? EB_TRUE : EB_FALSE;
4456 }
4457
4458
4459 // Assign the LCU-level QP
4460 EncodePassUpdateQp(
4461 pictureControlSetPtr,
4462 contextPtr,
4463 availableCoeff,
4464 useDeltaQp,
4465 &isDeltaQpNotCoded,
4466 pictureControlSetPtr->difCuDeltaQpDepth,
4467 &(pictureControlSetPtr->encPrevCodedQp[tileIdx][singleSegment ? 0 : lcuRowIndex]),
4468 &(pictureControlSetPtr->encPrevQuantGroupCodedQp[tileIdx][singleSegment ? 0 : lcuRowIndex]),
4469 lcuPtr->tileInfoPtr->tilePxlOriginX,
4470 lcuPtr->tileInfoPtr->tilePxlOriginY,
4471 lcuQp);
4472
4473 // Assign DLF QP
4474 SetQpArrayBasedOnCU(
4475 pictureControlSetPtr,
4476 contextPtr->cuOriginX,
4477 contextPtr->cuOriginY,
4478 cuStats->size / MIN_CU_SIZE,
4479 cuPtr->qp);
4480 }
4481
4482 {
4483 // Update Neighbor Arrays (Leaf Depth)
4484 EncodePassUpdateLeafDepthNeighborArrays(
4485 epLeafDepthNeighborArray,
4486 cuStats->depth,
4487 contextPtr->cuOriginX,
4488 contextPtr->cuOriginY,
4489 cuStats->size);
4490 {
4491 // Set the PU Loop Variables
4492 puPtr = cuPtr->predictionUnitArray;
4493 // Set MvUnit
4494 contextPtr->mvUnit.predDirection = (EB_U8)puPtr->interPredDirectionIndex;
4495 contextPtr->mvUnit.mv[REF_LIST_0].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
4496 contextPtr->mvUnit.mv[REF_LIST_1].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
4497 }
4498
4499
4500 // Update TMVP Map (create new one and compare to the old one!!!)
4501 if (tmvpMapWritePtr != EB_NULL){
4502
4503 puPtr = cuPtr->predictionUnitArray;
4504 tmvpMapVerticalStartIndex = (cuStats->originY + mvCompressionUnitSizeMinus1) >> LOG_MV_COMPRESS_UNIT_SIZE; //elemPU's vertical index relative to current LCU on 16x16 basic unit
4505 tmvpMapHorizontalEndIndex = (cuStats->originX + cuStats->size + mvCompressionUnitSizeMinus1) >> LOG_MV_COMPRESS_UNIT_SIZE;
4506 tmvpMapVerticalEndIndex = (cuStats->originY + cuStats->size + mvCompressionUnitSizeMinus1) >> LOG_MV_COMPRESS_UNIT_SIZE; // the problem is at this line, in 64x48 PU, this value turns out to be 4 while it is supposed to be 3
4507 tmvpMapHorizontalStartIndex = (cuStats->originX + mvCompressionUnitSizeMinus1) >> LOG_MV_COMPRESS_UNIT_SIZE;
4508
4509 while (tmvpMapVerticalStartIndex < tmvpMapVerticalEndIndex){
4510 tmvpMapHorizontalStartIndex = (cuStats->originX + mvCompressionUnitSizeMinus1) >> LOG_MV_COMPRESS_UNIT_SIZE;
4511 tmvpMapIndex = (tmvpMapVerticalStartIndex * (MAX_LCU_SIZE >> LOG_MV_COMPRESS_UNIT_SIZE)) + tmvpMapHorizontalStartIndex;
4512
4513 while ((tmvpMapHorizontalStartIndex) < tmvpMapHorizontalEndIndex){
4514 switch (cuPtr->predictionModeFlag){
4515 case INTER_MODE:
4516 switch (cuPtr->predictionUnitArray->interPredDirectionIndex){
4517
4518 case UNI_PRED_LIST_0:
4519 tmvpMapWritePtr->availabilityFlag[tmvpMapIndex] = EB_TRUE;
4520 tmvpMapWritePtr->mv[REF_LIST_0][tmvpMapIndex].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
4521 tmvpMapWritePtr->predictionDirection[tmvpMapIndex] = UNI_PRED_LIST_0;
4522 tmvpMapWritePtr->refPicPOC[REF_LIST_0][tmvpMapIndex] = ((EbReferenceObject_t*)
4523 pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr
4524 )->refPOC;
4525 break;
4526
4527 case UNI_PRED_LIST_1:
4528 tmvpMapWritePtr->availabilityFlag[tmvpMapIndex] = EB_TRUE;
4529 tmvpMapWritePtr->mv[REF_LIST_1][tmvpMapIndex].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
4530 tmvpMapWritePtr->predictionDirection[tmvpMapIndex] = UNI_PRED_LIST_1;
4531 tmvpMapWritePtr->refPicPOC[REF_LIST_1][tmvpMapIndex] = ((EbReferenceObject_t*)
4532 pictureControlSetPtr->refPicPtrArray[REF_LIST_1]->objectPtr
4533 )->refPOC;
4534 break;
4535
4536 case BI_PRED:
4537 if (puPtr->interPredDirectionIndex == BI_PRED || puPtr->interPredDirectionIndex == UNI_PRED_LIST_0){
4538 tmvpMapWritePtr->availabilityFlag[tmvpMapIndex] = EB_TRUE;
4539 tmvpMapWritePtr->mv[REF_LIST_0][tmvpMapIndex].mvUnion = puPtr->mv[REF_LIST_0].mvUnion;
4540 tmvpMapWritePtr->predictionDirection[tmvpMapIndex] = (EB_PREDDIRECTION)puPtr->interPredDirectionIndex;
4541 tmvpMapWritePtr->refPicPOC[REF_LIST_0][tmvpMapIndex] = ((EbReferenceObject_t*)
4542 pictureControlSetPtr->refPicPtrArray[REF_LIST_0]->objectPtr
4543 )->refPOC;
4544 }
4545
4546 if (puPtr->interPredDirectionIndex == BI_PRED || puPtr->interPredDirectionIndex == UNI_PRED_LIST_1){
4547 tmvpMapWritePtr->availabilityFlag[tmvpMapIndex] = EB_TRUE;
4548 tmvpMapWritePtr->mv[REF_LIST_1][tmvpMapIndex].mvUnion = puPtr->mv[REF_LIST_1].mvUnion;
4549 tmvpMapWritePtr->predictionDirection[tmvpMapIndex] = (EB_PREDDIRECTION)puPtr->interPredDirectionIndex;
4550 tmvpMapWritePtr->refPicPOC[REF_LIST_1][tmvpMapIndex] = ((EbReferenceObject_t*)
4551 pictureControlSetPtr->refPicPtrArray[REF_LIST_1]->objectPtr
4552 )->refPOC;
4553 }
4554
4555 break;
4556
4557 default:
4558 CHECK_REPORT_ERROR_NC(
4559 encodeContextPtr->appCallbackPtr,
4560 EB_ENC_INTER_PRED_ERROR0);
4561
4562 }
4563 break;
4564
4565 case INTRA_MODE:
4566 tmvpMapWritePtr->availabilityFlag[tmvpMapIndex] = EB_FALSE;
4567 break;
4568
4569 default:
4570
4571 CHECK_REPORT_ERROR_NC(
4572 encodeContextPtr->appCallbackPtr,
4573 EB_ENC_CL_ERROR2);
4574 break;
4575 }
4576
4577 //*Note- Filling the map for list 1 motion info will be added when B-slices are ready
4578
4579 ++tmvpMapHorizontalStartIndex;
4580 ++tmvpMapIndex;
4581 }
4582 ++tmvpMapVerticalStartIndex;
4583 }
4584
4585 }
4586 }
4587
4588 cuItr += DepthOffset[cuStats->depth];
4589 }
4590 else{
4591 cuItr++;
4592 }
4593
4594 } // CU Loop
4595
4596 contextPtr->codedLcuCount++;
4597 //Jing:
4598 //For true tile mode, need to change DLF accordingly
4599 // First Pass Deblocking
4600 if (dlfEnableFlag){
4601
4602 EB_U32 pictureWidthInLcu = (sequenceControlSetPtr->lumaWidth + 63) >> LOG2F_MAX_LCU_SIZE;
4603
4604 LcuInternalAreaDLFCoreFuncTable[is16bit](
4605 reconBuffer,
4606 lcuOriginX,
4607 lcuOriginY,
4608 lcuWidth,
4609 lcuHeight,
4610 pictureControlSetPtr->verticalEdgeBSArray[tbAddr],
4611 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
4612 pictureControlSetPtr);
4613
4614 LcuBoundaryDLFCoreFuncTable[is16bit](
4615 reconBuffer,
4616 lcuOriginX,
4617 lcuOriginY,
4618 lcuWidth,
4619 lcuHeight,
4620 pictureControlSetPtr->verticalEdgeBSArray[tbAddr],
4621 pictureControlSetPtr->horizontalEdgeBSArray[tbAddr],
4622 //lcuOriginY == 0 ? (EB_U8*)EB_NULL : pictureControlSetPtr->verticalEdgeBSArray[tbAddr - pictureWidthInLcu],
4623 //lcuOriginX == 0 ? (EB_U8*)EB_NULL : pictureControlSetPtr->horizontalEdgeBSArray[tbAddr - 1],
4624 lcuPtr->lcuEdgeInfoPtr->tileTopEdgeFlag ? (EB_U8*)EB_NULL : pictureControlSetPtr->verticalEdgeBSArray[tbAddr - pictureWidthInLcu],
4625 lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag ? (EB_U8*)EB_NULL : pictureControlSetPtr->horizontalEdgeBSArray[tbAddr - 1],
4626 pictureControlSetPtr);
4627
4628 LcuPicEdgeDLFCoreFuncTable[is16bit](
4629 reconBuffer,
4630 tbAddr,
4631 lcuOriginX,
4632 lcuOriginY,
4633 lcuWidth,
4634 lcuHeight,
4635 pictureControlSetPtr);
4636
4637 }
4638
4639
4640 // SAO Parameter Generation
4641 if (enableSaoFlag) {
4642
4643 EB_S16 lcuDeltaQp = (EB_S16)(lcuPtr->qp - pictureControlSetPtr->ParentPcsPtr->averageQp);
4644
4645 SaoParameters_t *leftSaoPtr;
4646 SaoParameters_t *topSaoPtr;
4647
4648 //Jing: Double check for multi-tile
4649 //if (lcuOriginY != 0){
4650 if (!lcuPtr->lcuEdgeInfoPtr->tileTopEdgeFlag) {
4651 EB_U32 topSaoIndex = GetNeighborArrayUnitTopIndex(
4652 pictureControlSetPtr->epSaoNeighborArray[tileIdx],
4653 lcuOriginX);
4654
4655 topSaoPtr = ((SaoParameters_t*)pictureControlSetPtr->epSaoNeighborArray[tileIdx]->topArray) + topSaoIndex;
4656 }
4657 else{
4658 topSaoPtr = (SaoParameters_t*)EB_NULL;
4659 }
4660 //if (lcuOriginX != 0){
4661 if (!lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag) {
4662 EB_U32 leftSaoIndex = GetNeighborArrayUnitLeftIndex(
4663 pictureControlSetPtr->epSaoNeighborArray[tileIdx],
4664 lcuOriginY);
4665
4666 leftSaoPtr = ((SaoParameters_t*)pictureControlSetPtr->epSaoNeighborArray[tileIdx]->leftArray) + leftSaoIndex;
4667 }
4668 else{
4669 leftSaoPtr = (SaoParameters_t*)EB_NULL;
4670 }
4671
4672
4673 EB_U8 varCount32x32 = 0;
4674 varCount32x32 = ((pictureControlSetPtr->ParentPcsPtr->variance[tbAddr][1]) > 1000) +
4675 ((pictureControlSetPtr->ParentPcsPtr->variance[tbAddr][2]) > 1000) +
4676 ((pictureControlSetPtr->ParentPcsPtr->variance[tbAddr][3]) > 1000) +
4677 ((pictureControlSetPtr->ParentPcsPtr->variance[tbAddr][4]) > 1000);
4678
4679 EB_BOOL shutSaoCondition0;
4680 EB_BOOL shutSaoCondition1;
4681
4682 shutSaoCondition0 = (sequenceControlSetPtr->inputResolution < INPUT_SIZE_4K_RANGE || contextPtr->saoMode) ?
4683 EB_FALSE :
4684 ((pictureControlSetPtr->ParentPcsPtr->edgeResultsPtr[tbAddr].edgeBlockNum == 0 || (pictureControlSetPtr->sceneCaracteristicId != 0)) && (contextPtr->skipQpmFlag == EB_FALSE) && pictureControlSetPtr->ParentPcsPtr->picNoiseClass >= PIC_NOISE_CLASS_1 && !lcuStatPtr->stationaryEdgeOverTimeFlag);
4685
4686 shutSaoCondition1 = (contextPtr->saoMode) ?
4687 EB_FALSE :
4688 (sequenceControlSetPtr->inputResolution < INPUT_SIZE_4K_RANGE) ?
4689 (varCount32x32 < 1 && lcuDeltaQp <= 0 && pictureControlSetPtr->sliceType != EB_I_PICTURE && !lcuStatPtr->stationaryEdgeOverTimeFlag) :
4690 (((varCount32x32 < 1) && (lcuDeltaQp <= 0 && pictureControlSetPtr->sliceType != EB_I_PICTURE) && (contextPtr->skipQpmFlag == EB_FALSE)) && pictureControlSetPtr->ParentPcsPtr->picNoiseClass >= PIC_NOISE_CLASS_1 && !lcuStatPtr->stationaryEdgeOverTimeFlag);
4691
4692 if (doRecon == EB_FALSE || shutSaoCondition0 || shutSaoCondition1) {
4693
4694 lcuPtr->saoParams.saoTypeIndex[SAO_COMPONENT_LUMA] = 0;
4695 lcuPtr->saoParams.saoTypeIndex[SAO_COMPONENT_CHROMA] = 0;
4696 lcuPtr->saoParams.saoOffset[SAO_COMPONENT_LUMA][0] = 0;
4697 lcuPtr->saoParams.saoOffset[SAO_COMPONENT_LUMA][1] = 0;
4698 lcuPtr->saoParams.saoOffset[SAO_COMPONENT_LUMA][2] = 0;
4699 lcuPtr->saoParams.saoOffset[SAO_COMPONENT_LUMA][3] = 0;
4700 lcuPtr->saoParams.saoBandPosition[SAO_COMPONENT_LUMA] = 0;
4701 lcuPtr->saoParams.saoMergeLeftFlag = EB_FALSE;
4702 lcuPtr->saoParams.saoMergeUpFlag = EB_FALSE;
4703
4704 saoLumaBestCost = 0xFFFFFFFFFFFFFFFFull;
4705 saoChromaBestCost = 0xFFFFFFFFFFFFFFFFull;
4706
4707 }
4708 else {
4709 // Generate the SAO Parameters
4710 if (is16bit){
4711 SaoGenerationDecision16bit(
4712 contextPtr->inputSample16bitBuffer,
4713 contextPtr->saoStats,
4714 &lcuPtr->saoParams,
4715 contextPtr->mdRateEstimationPtr,
4716 contextPtr->fullLambda,
4717 contextPtr->fullChromaLambdaSao,
4718 contextPtr->saoMode,
4719 pictureControlSetPtr,
4720 lcuOriginX,
4721 lcuOriginY,
4722 lcuWidth,
4723 lcuHeight,
4724 &lcuPtr->saoParams,
4725 leftSaoPtr,
4726 topSaoPtr,
4727 &saoLumaBestCost,
4728 &saoChromaBestCost);
4729
4730 }
4731 else{
4732 SaoGenerationDecision(
4733 contextPtr->saoStats,
4734 &lcuPtr->saoParams,
4735 contextPtr->mdRateEstimationPtr,
4736 contextPtr->fullLambda,
4737 contextPtr->fullChromaLambdaSao,
4738 contextPtr->saoMode,
4739 pictureControlSetPtr,
4740 lcuOriginX,
4741 lcuOriginY,
4742 lcuWidth,
4743 lcuHeight,
4744 &lcuPtr->saoParams,
4745 leftSaoPtr,
4746 topSaoPtr,
4747 &saoLumaBestCost,
4748 &saoChromaBestCost);
4749
4750 if (contextPtr->skipQpmFlag == EB_FALSE){
4751 if (pictureControlSetPtr->ParentPcsPtr->picNoiseClass >= PIC_NOISE_CLASS_3_1 && pictureControlSetPtr->pictureQp >= 37) {
4752 lcuPtr->saoParams.saoTypeIndex[SAO_COMPONENT_LUMA] = 0;
4753 lcuPtr->saoParams.saoTypeIndex[SAO_COMPONENT_CHROMA] = 0;
4754 lcuPtr->saoParams.saoMergeLeftFlag = EB_FALSE;
4755 lcuPtr->saoParams.saoMergeUpFlag = EB_FALSE;
4756 }
4757 }
4758 }
4759 }
4760
4761 // Update the SAO Neighbor Array
4762 EncodePassUpdateSaoNeighborArrays(
4763 pictureControlSetPtr->epSaoNeighborArray[tileIdx],
4764 &lcuPtr->saoParams,
4765 lcuOriginX,
4766 lcuOriginY,
4767 contextPtr->lcuSize);
4768 }
4769 return;
4770 }
4771
4772
UnusedVariablevoidFunc_CodingLoop()4773 void UnusedVariablevoidFunc_CodingLoop()
4774 {
4775 (void)NxMSadLoopKernel_funcPtrArray;
4776 (void)NxMSadAveragingKernel_funcPtrArray;
4777 }
4778