1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5 
6 #include <stdlib.h>
7 
8 #include "EbUtility.h"
9 #include "EbPictureControlSet.h"
10 #include "EbSequenceControlSet.h"
11 #include "EbPictureDecisionResults.h"
12 #include "EbMotionEstimationProcess.h"
13 #include "EbMotionEstimationResults.h"
14 #include "EbReferenceObject.h"
15 #include "EbMotionEstimation.h"
16 #include "EbIntraPrediction.h"
17 #include "EbLambdaRateTables.h"
18 #include "EbComputeSAD.h"
19 
20 #include "emmintrin.h"
21 
22 #define SQUARE_PU_NUM  85
23 #define BUFF_CHECK_SIZE	128
24 
25 #define DERIVE_INTRA_32_FROM_16   0 //CHKN 1
26 
27 /* --32x32-
28 |00||01|
29 |02||03|
30 --------*/
31 /* ------16x16-----
32 |00||01||04||05|
33 |02||03||06||07|
34 |08||09||12||13|
35 |10||11||14||15|
36 ----------------*/
37 /* ------8x8----------------------------
38 |00||01||04||05|     |16||17||20||21|
39 |02||03||06||07|     |18||19||22||23|
40 |08||09||12||13|     |24||25||28||29|
41 |10||11||14||15|     |26||27||30||31|
42 
43 |32||33||36||37|     |48||49||52||53|
44 |34||35||38||39|     |50||51||54||55|
45 |40||41||44||45|     |56||57||60||61|
46 |42||43||46||47|     |58||59||62||63|
47 -------------------------------------*/
48 
49 
50 /************************************************
51  * Set ME/HME Params
52  ************************************************/
SetMeHmeParamsOq(MeContext_t * meContextPtr,PictureParentControlSet_t * pictureControlSetPtr,SequenceControlSet_t * sequenceControlSetPtr,EB_INPUT_RESOLUTION inputResolution)53 static void* SetMeHmeParamsOq(
54     MeContext_t                     *meContextPtr,
55 	PictureParentControlSet_t       *pictureControlSetPtr,
56 	SequenceControlSet_t            *sequenceControlSetPtr,
57 	EB_INPUT_RESOLUTION				 inputResolution)
58 {
59 
60 	EB_U8  hmeMeLevel = pictureControlSetPtr->encMode;
61 
62 	EB_U32 inputRatio = sequenceControlSetPtr->lumaWidth / sequenceControlSetPtr->lumaHeight;
63 
64 	EB_U8 resolutionIndex = inputResolution <= INPUT_SIZE_576p_RANGE_OR_LOWER   ?   0 : // 480P
65 		(inputResolution <= INPUT_SIZE_1080i_RANGE && inputRatio < 2)           ?   1 : // 720P
66 		(inputResolution <= INPUT_SIZE_1080i_RANGE && inputRatio > 3)           ?   2 : // 1080I
67 		(inputResolution <= INPUT_SIZE_1080p_RANGE)                             ?   3 : // 1080I
68 		                                                                            4;  // 4K
69 
70     // HME/ME default settings
71 	meContextPtr->numberHmeSearchRegionInWidth          = EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT;
72 	meContextPtr->numberHmeSearchRegionInHeight         = EB_HME_SEARCH_AREA_ROW_MAX_COUNT;
73 
74     // HME Level0
75 	meContextPtr->hmeLevel0TotalSearchAreaWidth         = HmeLevel0TotalSearchAreaWidthOq[resolutionIndex][hmeMeLevel];
76 	meContextPtr->hmeLevel0TotalSearchAreaHeight        = HmeLevel0TotalSearchAreaHeightOq[resolutionIndex][hmeMeLevel];
77 	meContextPtr->hmeLevel0SearchAreaInWidthArray[0]    = HmeLevel0SearchAreaInWidthArrayRightOq[resolutionIndex][hmeMeLevel];
78 	meContextPtr->hmeLevel0SearchAreaInWidthArray[1]    = HmeLevel0SearchAreaInWidthArrayLeftOq[resolutionIndex][hmeMeLevel];
79 	meContextPtr->hmeLevel0SearchAreaInHeightArray[0]   = HmeLevel0SearchAreaInHeightArrayTopOq[resolutionIndex][hmeMeLevel];
80 	meContextPtr->hmeLevel0SearchAreaInHeightArray[1]   = HmeLevel0SearchAreaInHeightArrayBottomOq[resolutionIndex][hmeMeLevel];
81     // HME Level1
82 	meContextPtr->hmeLevel1SearchAreaInWidthArray[0]    = HmeLevel1SearchAreaInWidthArrayRightOq[resolutionIndex][hmeMeLevel];
83 	meContextPtr->hmeLevel1SearchAreaInWidthArray[1]    = HmeLevel1SearchAreaInWidthArrayLeftOq[resolutionIndex][hmeMeLevel];
84 	meContextPtr->hmeLevel1SearchAreaInHeightArray[0]   = HmeLevel1SearchAreaInHeightArrayTopOq[resolutionIndex][hmeMeLevel];
85 	meContextPtr->hmeLevel1SearchAreaInHeightArray[1]   = HmeLevel1SearchAreaInHeightArrayBottomOq[resolutionIndex][hmeMeLevel];
86     // HME Level2
87 	meContextPtr->hmeLevel2SearchAreaInWidthArray[0]    = HmeLevel2SearchAreaInWidthArrayRightOq[resolutionIndex][hmeMeLevel];
88 	meContextPtr->hmeLevel2SearchAreaInWidthArray[1]    = HmeLevel2SearchAreaInWidthArrayLeftOq[resolutionIndex][hmeMeLevel];
89 	meContextPtr->hmeLevel2SearchAreaInHeightArray[0]   = HmeLevel2SearchAreaInHeightArrayTopOq[resolutionIndex][hmeMeLevel];
90 	meContextPtr->hmeLevel2SearchAreaInHeightArray[1]   = HmeLevel2SearchAreaInHeightArrayBottomOq[resolutionIndex][hmeMeLevel];
91 
92     // ME
93 	meContextPtr->searchAreaWidth                       = SearchAreaWidthOq[resolutionIndex][hmeMeLevel];
94 	meContextPtr->searchAreaHeight                      = SearchAreaHeightOq[resolutionIndex][hmeMeLevel];
95 
96 
97 	// HME Level0 adjustment for low frame rate contents (frame rate <= 30)
98     if (inputResolution == INPUT_SIZE_4K_RANGE) {
99         if ((sequenceControlSetPtr->staticConfig.frameRate >> 16) <= 30) {
100 
101             if (hmeMeLevel == ENC_MODE_6 || hmeMeLevel == ENC_MODE_7) {
102                 meContextPtr->hmeLevel0TotalSearchAreaWidth         = MAX(96  , meContextPtr->hmeLevel0TotalSearchAreaWidth        );
103                 meContextPtr->hmeLevel0TotalSearchAreaHeight        = MAX(64  , meContextPtr->hmeLevel0TotalSearchAreaHeight       );
104                 meContextPtr->hmeLevel0SearchAreaInWidthArray[0]    = MAX(48  , meContextPtr->hmeLevel0SearchAreaInWidthArray[0]   );
105                 meContextPtr->hmeLevel0SearchAreaInWidthArray[1]    = MAX(48  , meContextPtr->hmeLevel0SearchAreaInWidthArray[1]   );
106                 meContextPtr->hmeLevel0SearchAreaInHeightArray[0]   = MAX(32  , meContextPtr->hmeLevel0SearchAreaInHeightArray[0]  );
107                 meContextPtr->hmeLevel0SearchAreaInHeightArray[1]   = MAX(32  , meContextPtr->hmeLevel0SearchAreaInHeightArray[1]  );
108             }
109             else if (hmeMeLevel >= ENC_MODE_8) {
110                 meContextPtr->hmeLevel0TotalSearchAreaWidth         = MAX(64  , meContextPtr->hmeLevel0TotalSearchAreaWidth        );
111                 meContextPtr->hmeLevel0TotalSearchAreaHeight        = MAX(48  , meContextPtr->hmeLevel0TotalSearchAreaHeight       );
112                 meContextPtr->hmeLevel0SearchAreaInWidthArray[0]    = MAX(32  , meContextPtr->hmeLevel0SearchAreaInWidthArray[0]   );
113                 meContextPtr->hmeLevel0SearchAreaInWidthArray[1]    = MAX(32  , meContextPtr->hmeLevel0SearchAreaInWidthArray[1]   );
114                 meContextPtr->hmeLevel0SearchAreaInHeightArray[0]   = MAX(24  , meContextPtr->hmeLevel0SearchAreaInHeightArray[0]  );
115                 meContextPtr->hmeLevel0SearchAreaInHeightArray[1]   = MAX(24  , meContextPtr->hmeLevel0SearchAreaInHeightArray[1]  );
116             }
117         }
118     }
119 
120     if ((inputResolution > INPUT_SIZE_576p_RANGE_OR_LOWER) && (sequenceControlSetPtr->staticConfig.tune > 0)) {
121         meContextPtr->updateHmeSearchCenter = EB_TRUE;
122     }
123 	return EB_NULL;
124 };
125 
126 
127 
128 
129 /************************************************
130  * Set ME/HME Params from Config
131  ************************************************/
SetMeHmeParamsFromConfig(SequenceControlSet_t * sequenceControlSetPtr,MeContext_t * meContextPtr)132 static void SetMeHmeParamsFromConfig(
133     SequenceControlSet_t	    *sequenceControlSetPtr,
134     MeContext_t                 *meContextPtr)
135 {
136 
137     meContextPtr->searchAreaWidth = (EB_U8)sequenceControlSetPtr->staticConfig.searchAreaWidth;
138     meContextPtr->searchAreaHeight = (EB_U8)sequenceControlSetPtr->staticConfig.searchAreaHeight;
139 }
140 
141 
MotionEstimationContextDctor(EB_PTR p)142 static void MotionEstimationContextDctor(EB_PTR p)
143 {
144     MotionEstimationContext_t* obj = (MotionEstimationContext_t*)p;
145     EB_DELETE(obj->intraRefPtr);
146     EB_DELETE(obj->meContextPtr);
147 }
148 
149 /************************************************
150  * Motion Analysis Context Constructor
151  ************************************************/
152 
MotionEstimationContextCtor(MotionEstimationContext_t * contextPtr,EbFifo_t * pictureDecisionResultsInputFifoPtr,EbFifo_t * motionEstimationResultsOutputFifoPtr)153 EB_ERRORTYPE MotionEstimationContextCtor(
154 	MotionEstimationContext_t    *contextPtr,
155 	EbFifo_t                     *pictureDecisionResultsInputFifoPtr,
156 	EbFifo_t                     *motionEstimationResultsOutputFifoPtr)
157 {
158     contextPtr->dctor = MotionEstimationContextDctor;
159 	contextPtr->pictureDecisionResultsInputFifoPtr = pictureDecisionResultsInputFifoPtr;
160 	contextPtr->motionEstimationResultsOutputFifoPtr = motionEstimationResultsOutputFifoPtr;
161 
162     EB_NEW(
163         contextPtr->intraRefPtr,
164         IntraOpenLoopReferenceSamplesCtor);
165 
166     EB_NEW(
167         contextPtr->meContextPtr,
168         MeContextCtor);
169 
170 	return EB_ErrorNone;
171 }
172 
173 /***************************************************************************************************
174 * ZZ Decimated SAD Computation
175 ***************************************************************************************************/
ComputeDecimatedZzSad(MotionEstimationContext_t * contextPtr,SequenceControlSet_t * sequenceControlSetPtr,PictureParentControlSet_t * pictureControlSetPtr,EbPictureBufferDesc_t * sixteenthDecimatedPicturePtr,EB_U32 xLcuStartIndex,EB_U32 xLcuEndIndex,EB_U32 yLcuStartIndex,EB_U32 yLcuEndIndex)176 static EB_ERRORTYPE ComputeDecimatedZzSad(
177 	MotionEstimationContext_t   *contextPtr,
178 	SequenceControlSet_t        *sequenceControlSetPtr,
179 	PictureParentControlSet_t   *pictureControlSetPtr,
180 	EbPictureBufferDesc_t       *sixteenthDecimatedPicturePtr,
181 	EB_U32						 xLcuStartIndex,
182 	EB_U32						 xLcuEndIndex,
183 	EB_U32						 yLcuStartIndex,
184 	EB_U32						 yLcuEndIndex) {
185 
186 	EB_ERRORTYPE return_error = EB_ErrorNone;
187 
188 	PictureParentControlSet_t	*previousPictureControlSetWrapperPtr = ((PictureParentControlSet_t*)pictureControlSetPtr->previousPictureControlSetWrapperPtr->objectPtr);
189 	EbPictureBufferDesc_t		*previousInputPictureFull = previousPictureControlSetWrapperPtr->enhancedPicturePtr;
190 
191 	EB_U32 lcuIndex;
192 
193 	EB_U32 lcuWidth;
194 	EB_U32 lcuHeight;
195 
196 	EB_U32 decimatedLcuWidth;
197 	EB_U32 decimatedLcuHeight;
198 
199 	EB_U32 lcuOriginX;
200 	EB_U32 lcuOriginY;
201 
202 	EB_U32 blkDisplacementDecimated;
203 	EB_U32 blkDisplacementFull;
204 
205 	EB_U32 decimatedLcuCollocatedSad;
206 
207 	EB_U32 xLcuIndex;
208 	EB_U32 yLcuIndex;
209 
210 	for (yLcuIndex = yLcuStartIndex; yLcuIndex < yLcuEndIndex; ++yLcuIndex) {
211 		for (xLcuIndex = xLcuStartIndex; xLcuIndex < xLcuEndIndex; ++xLcuIndex) {
212 
213 			lcuIndex = xLcuIndex + yLcuIndex * sequenceControlSetPtr->pictureWidthInLcu;
214             LcuParams_t *lcuParams = &sequenceControlSetPtr->lcuParamsArray[lcuIndex];
215 
216 			lcuWidth = lcuParams->width;
217 			lcuHeight = lcuParams->height;
218 
219 			lcuOriginX = lcuParams->originX;
220 			lcuOriginY = lcuParams->originY;
221 
222 			lcuWidth = lcuParams->width;
223 			lcuHeight = lcuParams->height;
224 
225 
226 			decimatedLcuWidth = lcuWidth >> 2;
227 			decimatedLcuHeight = lcuHeight >> 2;
228 
229 			decimatedLcuCollocatedSad = 0;
230 
231             if (lcuParams->isCompleteLcu)
232 			{
233 
234 				blkDisplacementDecimated = (sixteenthDecimatedPicturePtr->originY + (lcuOriginY >> 2)) * sixteenthDecimatedPicturePtr->strideY + sixteenthDecimatedPicturePtr->originX + (lcuOriginX >> 2);
235                 blkDisplacementFull = (previousInputPictureFull->originY + lcuOriginY)* previousInputPictureFull->strideY + (previousInputPictureFull->originX + lcuOriginX);
236 
237 				// 1/16 collocated LCU decimation
238 				Decimation2D(
239 					&previousInputPictureFull->bufferY[blkDisplacementFull],
240 					previousInputPictureFull->strideY,
241 					MAX_LCU_SIZE,
242 					MAX_LCU_SIZE,
243 					contextPtr->meContextPtr->sixteenthLcuBuffer,
244 					contextPtr->meContextPtr->sixteenthLcuBufferStride,
245 					4);
246 
247 				// ZZ SAD between 1/16 current & 1/16 collocated
248 				decimatedLcuCollocatedSad = NxMSadKernel_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)][2](
249 					&(sixteenthDecimatedPicturePtr->bufferY[blkDisplacementDecimated]),
250 					sixteenthDecimatedPicturePtr->strideY,
251 					contextPtr->meContextPtr->sixteenthLcuBuffer,
252 					contextPtr->meContextPtr->sixteenthLcuBufferStride,
253 					16, 16);
254 
255 				// Background Enhancement Algorithm
256 				// Classification is important to:
257 				// 1. Avoid improving moving objects.
258 				// 2. Do not modulate when all the picture is background
259 				// 3. Do give different importance to different regions
260 				if (decimatedLcuCollocatedSad < BEA_CLASS_0_0_DEC_TH) {
261 					previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = BEA_CLASS_0_ZZ_COST;
262 				}
263 				else if (decimatedLcuCollocatedSad < BEA_CLASS_0_DEC_TH) {
264 					previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = BEA_CLASS_0_1_ZZ_COST;
265 				}
266 				else if (decimatedLcuCollocatedSad < BEA_CLASS_1_DEC_TH) {
267 					previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = BEA_CLASS_1_ZZ_COST;
268 				}
269 				else if (decimatedLcuCollocatedSad < BEA_CLASS_2_DEC_TH) {
270 					previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = BEA_CLASS_2_ZZ_COST;
271 				}
272 				else {
273 					previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = BEA_CLASS_3_ZZ_COST;
274 				}
275 
276 
277 			}
278 			else {
279 				previousPictureControlSetWrapperPtr->zzCostArray[lcuIndex] = INVALID_ZZ_COST;
280 				decimatedLcuCollocatedSad = (EB_U32)~0;
281 			}
282 
283 
284 			// Keep track of non moving LCUs for QP modulation
285 			if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 2)) {
286 				previousPictureControlSetWrapperPtr->nonMovingIndexArray[lcuIndex] = BEA_CLASS_0_ZZ_COST;
287 			}
288 			else if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 4)) {
289 				previousPictureControlSetWrapperPtr->nonMovingIndexArray[lcuIndex] = BEA_CLASS_1_ZZ_COST;
290 			}
291 			else if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 8)) {
292 				previousPictureControlSetWrapperPtr->nonMovingIndexArray[lcuIndex] = BEA_CLASS_2_ZZ_COST;
293 			}
294 			else { //if (decimatedLcuCollocatedSad < ((decimatedLcuWidth * decimatedLcuHeight) * 4)) {
295 				previousPictureControlSetWrapperPtr->nonMovingIndexArray[lcuIndex] = BEA_CLASS_3_ZZ_COST;
296 			}
297 		}
298 	}
299 
300 	return return_error;
301 }
302 
303 /******************************************************
304 * Derive ME Settings for OQ
305   Input   : encoder mode and tune
306   Output  : ME Kernel signal(s)
307 ******************************************************/
SignalDerivationMeKernelOq(SequenceControlSet_t * sequenceControlSetPtr,PictureParentControlSet_t * pictureControlSetPtr,MotionEstimationContext_t * contextPtr)308 EB_ERRORTYPE SignalDerivationMeKernelOq(
309     SequenceControlSet_t        *sequenceControlSetPtr,
310     PictureParentControlSet_t   *pictureControlSetPtr,
311     MotionEstimationContext_t   *contextPtr) {
312 
313     EB_ERRORTYPE return_error = EB_ErrorNone;
314 
315     // Set ME/HME search regions
316     SetMeHmeParamsOq(
317         contextPtr->meContextPtr,
318         pictureControlSetPtr,
319         sequenceControlSetPtr,
320         sequenceControlSetPtr->inputResolution);
321     if (!sequenceControlSetPtr->staticConfig.useDefaultMeHme) {
322         SetMeHmeParamsFromConfig(
323             sequenceControlSetPtr,
324             contextPtr->meContextPtr);
325     }
326 
327     // Set number of quadrant(s)
328     if (pictureControlSetPtr->encMode <= ENC_MODE_7) {
329         contextPtr->meContextPtr->oneQuadrantHME = EB_FALSE;
330     }
331     else {
332         if (sequenceControlSetPtr->inputResolution >= INPUT_SIZE_4K_RANGE) {
333             contextPtr->meContextPtr->oneQuadrantHME = EB_TRUE;
334         }
335         else {
336             contextPtr->meContextPtr->oneQuadrantHME = EB_FALSE;
337         }
338     }
339 
340     // Set ME Fractional Search Method
341     if (pictureControlSetPtr->encMode <= ENC_MODE_4) {
342         contextPtr->meContextPtr->fractionalSearchMethod = SSD_SEARCH;
343     }
344     else {
345         contextPtr->meContextPtr->fractionalSearchMethod = SUB_SAD_SEARCH;
346     }
347 
348     // Set 64x64 Fractional Search Flag
349 	if (pictureControlSetPtr->encMode <= ENC_MODE_2) {
350 		contextPtr->meContextPtr->fractionalSearch64x64 = EB_TRUE;
351 	}
352 	else {
353 		contextPtr->meContextPtr->fractionalSearch64x64 = EB_FALSE;
354 	}
355 
356     // Set OIS Kernel
357 	if (pictureControlSetPtr->encMode <= ENC_MODE_4) {
358 		if (sequenceControlSetPtr->inputResolution < INPUT_SIZE_4K_RANGE) {
359 			contextPtr->oisKernelLevel = (pictureControlSetPtr->temporalLayerIndex == 0) ? EB_TRUE : EB_FALSE;
360 		}
361 		else {
362 			contextPtr->oisKernelLevel = EB_FALSE;
363 		}
364 	}
365     else {
366         contextPtr->oisKernelLevel = EB_FALSE;
367     }
368 
369     // Set OIS TH
370     // 0: Agressive
371     // 1: Default
372     // 2: Conservative
373     if (sequenceControlSetPtr->inputResolution == INPUT_SIZE_4K_RANGE) {
374         if (pictureControlSetPtr->encMode <= ENC_MODE_5) {
375             if (pictureControlSetPtr->isUsedAsReferenceFlag == EB_TRUE) {
376                 contextPtr->oisThSet = 2;
377             }
378             else {
379                 contextPtr->oisThSet = 1;
380             }
381         }
382         else {
383             contextPtr->oisThSet = 1;
384         }
385     }
386     else {
387 		if (pictureControlSetPtr->encMode <= ENC_MODE_6) {
388 			contextPtr->oisThSet = 2;
389 		}
390         else {
391             contextPtr->oisThSet = 1;
392         }
393     }
394 
395     // Set valid flag for the best OIS
396 	contextPtr->setBestOisDistortionToValid = EB_FALSE;
397 
398     // Set fractional search model
399     // 0: search all blocks
400     // 1: selective based on Full-Search SAD & MV.
401     // 2: off
402     if (pictureControlSetPtr->useSubpelFlag == 1) {
403         if (pictureControlSetPtr->encMode <= ENC_MODE_5) {
404             contextPtr->meContextPtr->fractionalSearchModel = 0;
405         }
406 		else if (pictureControlSetPtr->encMode <= ENC_MODE_6) {
407 			if (sequenceControlSetPtr->inputResolution == INPUT_SIZE_4K_RANGE) {
408 				contextPtr->meContextPtr->fractionalSearchModel = 1;
409 			}
410 			else {
411 				contextPtr->meContextPtr->fractionalSearchModel = 0;
412 			}
413 		}
414         else {
415             contextPtr->meContextPtr->fractionalSearchModel = 1;
416         }
417     }
418     else {
419         contextPtr->meContextPtr->fractionalSearchModel = 2;
420     }
421 
422     return return_error;
423 }
424 
425 
426 /******************************************************
427 * EbHevcGetMv
428   Input   : LCU Index
429   Output  : List0 MV
430 ******************************************************/
EbHevcGetMv(PictureParentControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex,EB_S32 * xCurrentMv,EB_S32 * yCurrentMv)431 void EbHevcGetMv(
432     PictureParentControlSet_t	*pictureControlSetPtr,
433     EB_U32						 lcuIndex,
434     EB_S32						*xCurrentMv,
435     EB_S32						*yCurrentMv)
436 {
437 
438     MeCuResults_t * cuResults = &pictureControlSetPtr->meResults[lcuIndex][0];
439 
440     *xCurrentMv = cuResults->xMvL0;
441     *yCurrentMv = cuResults->yMvL0;
442 }
443 
444 /******************************************************
445 * EbHevcGetMeDist
446  Input   : LCU Index
447  Output  : Best ME Distortion
448 ******************************************************/
EbHevcGetMeDist(PictureParentControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex,EB_U32 * distortion)449 void EbHevcGetMeDist(
450     PictureParentControlSet_t	*pictureControlSetPtr,
451     EB_U32						 lcuIndex,
452     EB_U32                      *distortion)
453 {
454 
455     *distortion = (EB_U32)(pictureControlSetPtr->meResults[lcuIndex][0].distortionDirection[0].distortion);
456 
457 }
458 
459 /******************************************************
460 * Derive Similar Collocated Flag
461 ******************************************************/
DeriveSimilarCollocatedFlag(PictureParentControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex)462 static void DeriveSimilarCollocatedFlag(
463     PictureParentControlSet_t    *pictureControlSetPtr,
464     EB_U32	                      lcuIndex)
465 {
466    // Similairty detector for collocated LCU
467    pictureControlSetPtr->similarColocatedLcuArray[lcuIndex] = EB_FALSE;
468 
469    // Similairty detector for collocated LCU -- all layers
470    pictureControlSetPtr->similarColocatedLcuArrayAllLayers[lcuIndex] = EB_FALSE;
471 
472    if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
473 
474        EB_U8                   refMean, curMean;
475        EB_U16                  refVar, curVar;
476 
477        EbPaReferenceObject_t    *refObjL0;
478 
479        refObjL0 = (EbPaReferenceObject_t*)pictureControlSetPtr->refPaPicPtrArray[REF_LIST_0]->objectPtr;
480        refMean = refObjL0->yMean[lcuIndex];
481 
482        refVar = refObjL0->variance[lcuIndex];
483 
484        curMean = pictureControlSetPtr->yMean[lcuIndex][RASTER_SCAN_CU_INDEX_64x64];
485 
486        curVar = pictureControlSetPtr->variance[lcuIndex][RASTER_SCAN_CU_INDEX_64x64];
487 
488        refVar = MAX(refVar, 1);
489        if ((ABS((EB_S64)curMean - (EB_S64)refMean) < MEAN_DIFF_THRSHOLD) &&
490            ((ABS((EB_S64)curVar * 100 / (EB_S64)refVar - 100) < VAR_DIFF_THRSHOLD) || (ABS((EB_S64)curVar - (EB_S64)refVar) < VAR_DIFF_THRSHOLD))) {
491 
492            if (pictureControlSetPtr->isUsedAsReferenceFlag) {
493                pictureControlSetPtr->similarColocatedLcuArray[lcuIndex] = EB_TRUE;
494            }
495            pictureControlSetPtr->similarColocatedLcuArrayAllLayers[lcuIndex] = EB_TRUE;
496        }
497    }
498 
499     return;
500 }
501 
StationaryEdgeOverUpdateOverTimeLcuPart1(SequenceControlSet_t * sequenceControlSetPtr,PictureParentControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex)502 static void StationaryEdgeOverUpdateOverTimeLcuPart1(
503     SequenceControlSet_t        *sequenceControlSetPtr,
504     PictureParentControlSet_t   *pictureControlSetPtr,
505     EB_U32                       lcuIndex)
506 {
507     EB_S32	             xCurrentMv = 0;
508     EB_S32	             yCurrentMv = 0;
509 
510     LcuParams_t *lcuParams  = &sequenceControlSetPtr->lcuParamsArray[lcuIndex];
511     LcuStat_t   *lcuStatPtr = &pictureControlSetPtr->lcuStatArray[lcuIndex];
512 
513     if (lcuParams->potentialLogoLcu && lcuParams->isCompleteLcu) {
514 
515         // Current MV
516         if (pictureControlSetPtr->temporalLayerIndex > 0)
517             EbHevcGetMv(pictureControlSetPtr, lcuIndex, &xCurrentMv, &yCurrentMv);
518 
519         EB_BOOL lowMotion = pictureControlSetPtr->temporalLayerIndex == 0 ? EB_TRUE : (ABS(xCurrentMv) < 16) && (ABS(yCurrentMv) < 16) ? EB_TRUE : EB_FALSE;
520         EB_U16 *yVariancePtr = pictureControlSetPtr->variance[lcuIndex];
521         EB_U64 var0 = yVariancePtr[ME_TIER_ZERO_PU_32x32_0];
522         EB_U64 var1 = yVariancePtr[ME_TIER_ZERO_PU_32x32_1];
523         EB_U64 var2 = yVariancePtr[ME_TIER_ZERO_PU_32x32_2];
524         EB_U64 var3 = yVariancePtr[ME_TIER_ZERO_PU_32x32_3];
525 
526         EB_U64 averageVar = (var0 + var1 + var2 + var3) >> 2;
527         EB_U64 varOfVar = (((EB_S32)(var0 - averageVar) * (EB_S32)(var0 - averageVar)) +
528             ((EB_S32)(var1 - averageVar) * (EB_S32)(var1 - averageVar)) +
529             ((EB_S32)(var2 - averageVar) * (EB_S32)(var2 - averageVar)) +
530             ((EB_S32)(var3 - averageVar) * (EB_S32)(var3 - averageVar))) >> 2;
531 
532         if ((varOfVar <= 50000) || !lowMotion) {
533             lcuStatPtr->check1ForLogoStationaryEdgeOverTimeFlag = 0;
534         }
535         else {
536             lcuStatPtr->check1ForLogoStationaryEdgeOverTimeFlag = 1;
537         }
538 
539         if ((varOfVar <= 1000)) {
540             lcuStatPtr->pmCheck1ForLogoStationaryEdgeOverTimeFlag = 0;
541         }
542         else {
543             lcuStatPtr->pmCheck1ForLogoStationaryEdgeOverTimeFlag = 1;
544         }
545     }
546     else {
547         lcuStatPtr->check1ForLogoStationaryEdgeOverTimeFlag = 0;
548 
549         lcuStatPtr->pmCheck1ForLogoStationaryEdgeOverTimeFlag = 0;
550 
551     }
552 }
553 
StationaryEdgeOverUpdateOverTimeLcuPart2(SequenceControlSet_t * sequenceControlSetPtr,PictureParentControlSet_t * pictureControlSetPtr,EB_U32 lcuIndex)554 static void StationaryEdgeOverUpdateOverTimeLcuPart2(
555     SequenceControlSet_t        *sequenceControlSetPtr,
556     PictureParentControlSet_t   *pictureControlSetPtr,
557     EB_U32                       lcuIndex)
558 {
559     EB_U32               lowSadTh = (sequenceControlSetPtr->inputResolution < INPUT_SIZE_1080p_RANGE) ? 5 : 2;
560 
561     LcuParams_t  *lcuParams  = &sequenceControlSetPtr->lcuParamsArray[lcuIndex];
562     LcuStat_t    *lcuStatPtr = &pictureControlSetPtr->lcuStatArray[lcuIndex];
563 
564     if (lcuParams->potentialLogoLcu && lcuParams->isCompleteLcu) {
565         EB_U32 meDist = 0;
566 
567         EB_BOOL lowSad = EB_FALSE;
568 
569         if (pictureControlSetPtr->sliceType == EB_B_PICTURE) {
570             EbHevcGetMeDist(pictureControlSetPtr, lcuIndex, &meDist);
571         }
572         lowSad = (pictureControlSetPtr->sliceType != EB_B_PICTURE) ?
573 
574             EB_FALSE : (meDist < 64 * 64 * lowSadTh) ? EB_TRUE : EB_FALSE;
575 
576         if (lowSad) {
577             lcuStatPtr->check2ForLogoStationaryEdgeOverTimeFlag = 0;
578             lcuStatPtr->lowDistLogo = 1;
579         }
580         else {
581             lcuStatPtr->check2ForLogoStationaryEdgeOverTimeFlag = 1;
582 
583             lcuStatPtr->lowDistLogo = 0;
584         }
585     }
586     else {
587         lcuStatPtr->check2ForLogoStationaryEdgeOverTimeFlag = 0;
588 
589         lcuStatPtr->lowDistLogo = 0;
590     }
591     lcuStatPtr->check2ForLogoStationaryEdgeOverTimeFlag = 1;
592 
593 }
594 
595 /************************************************
596  * Motion Analysis Kernel
597  * The Motion Analysis performs  Motion Estimation
598  * This process has access to the current input picture as well as
599  * the input pictures, which the current picture references according
600  * to the prediction structure pattern.  The Motion Analysis process is multithreaded,
601  * so pictures can be processed out of order as long as all inputs are available.
602  ************************************************/
MotionEstimationKernel(void * inputPtr)603 void* MotionEstimationKernel(void *inputPtr)
604 {
605 	MotionEstimationContext_t   *contextPtr = (MotionEstimationContext_t*)inputPtr;
606 
607 	PictureParentControlSet_t   *pictureControlSetPtr;
608 	SequenceControlSet_t        *sequenceControlSetPtr;
609 
610 	EbObjectWrapper_t           *inputResultsWrapperPtr;
611 	PictureDecisionResults_t    *inputResultsPtr;
612 
613 	EbObjectWrapper_t           *outputResultsWrapperPtr;
614 	MotionEstimationResults_t   *outputResultsPtr;
615 
616 	EbPictureBufferDesc_t       *inputPicturePtr;
617 
618     EbPictureBufferDesc_t       *inputPaddedPicturePtr;
619 
620 	EB_U32                       bufferIndex;
621 
622 	EB_U32                       lcuIndex;
623 	EB_U32                       xLcuIndex;
624 	EB_U32                       yLcuIndex;
625 	EB_U32                       pictureWidthInLcu;
626 	EB_U32                       pictureHeightInLcu;
627 	EB_U32                       lcuOriginX;
628 	EB_U32                       lcuOriginY;
629 	EB_U32                       lcuWidth;
630 	EB_U32                       lcuHeight;
631 	EB_U32                       lcuRow;
632 
633 
634 
635 	EbPaReferenceObject_t       *paReferenceObject;
636 	EbPictureBufferDesc_t       *quarterDecimatedPicturePtr;
637 	EbPictureBufferDesc_t       *sixteenthDecimatedPicturePtr;
638 
639 	// Segments
640 	EB_U32                      segmentIndex;
641 	EB_U32                      xSegmentIndex;
642 	EB_U32                      ySegmentIndex;
643 	EB_U32                      xLcuStartIndex;
644 	EB_U32                      xLcuEndIndex;
645 	EB_U32                      yLcuStartIndex;
646 	EB_U32                      yLcuEndIndex;
647 
648 	EB_U32                      intraSadIntervalIndex;
649 
650 	MdRateEstimationContext_t   *mdRateEstimationArray;
651 
652 
653 	for (;;) {
654 
655 
656 		// Get Input Full Object
657 		EbGetFullObject(
658 			contextPtr->pictureDecisionResultsInputFifoPtr,
659 			&inputResultsWrapperPtr);
660         EB_CHECK_END_OBJ(inputResultsWrapperPtr);
661 
662 		inputResultsPtr = (PictureDecisionResults_t*)inputResultsWrapperPtr->objectPtr;
663 		pictureControlSetPtr = (PictureParentControlSet_t*)inputResultsPtr->pictureControlSetWrapperPtr->objectPtr;
664 		sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr;
665 		paReferenceObject = (EbPaReferenceObject_t*)pictureControlSetPtr->paReferencePictureWrapperPtr->objectPtr;
666 		quarterDecimatedPicturePtr = (EbPictureBufferDesc_t*)paReferenceObject->quarterDecimatedPicturePtr;
667 		sixteenthDecimatedPicturePtr = (EbPictureBufferDesc_t*)paReferenceObject->sixteenthDecimatedPicturePtr;
668         inputPaddedPicturePtr = (EbPictureBufferDesc_t*)paReferenceObject->inputPaddedPicturePtr;
669 		inputPicturePtr = pictureControlSetPtr->enhancedPicturePtr;
670 		// Segments
671 		segmentIndex = inputResultsPtr->segmentIndex;
672 #if DEADLOCK_DEBUG
673         if ((pictureControlSetPtr->pictureNumber >= MIN_POC) && (pictureControlSetPtr->pictureNumber <= MAX_POC))
674             if (segmentIndex == 0)
675                 SVT_LOG("POC %lu ME IN \n", pictureControlSetPtr->pictureNumber);
676 #endif
677 		pictureWidthInLcu = (sequenceControlSetPtr->lumaWidth + sequenceControlSetPtr->lcuSize - 1) / sequenceControlSetPtr->lcuSize;
678 		pictureHeightInLcu = (sequenceControlSetPtr->lumaHeight + sequenceControlSetPtr->lcuSize - 1) / sequenceControlSetPtr->lcuSize;
679 		SEGMENT_CONVERT_IDX_TO_XY(segmentIndex, xSegmentIndex, ySegmentIndex, pictureControlSetPtr->meSegmentsColumnCount);
680 		xLcuStartIndex = SEGMENT_START_IDX(xSegmentIndex, pictureWidthInLcu, pictureControlSetPtr->meSegmentsColumnCount);
681 		xLcuEndIndex = SEGMENT_END_IDX(xSegmentIndex, pictureWidthInLcu, pictureControlSetPtr->meSegmentsColumnCount);
682 		yLcuStartIndex = SEGMENT_START_IDX(ySegmentIndex, pictureHeightInLcu, pictureControlSetPtr->meSegmentsRowCount);
683 		yLcuEndIndex = SEGMENT_END_IDX(ySegmentIndex, pictureHeightInLcu, pictureControlSetPtr->meSegmentsRowCount);
684 		// Increment the MD Rate Estimation array pointer to point to the right address based on the QP and slice type
685 		mdRateEstimationArray = (MdRateEstimationContext_t*)sequenceControlSetPtr->encodeContextPtr->mdRateEstimationArray;
686 		mdRateEstimationArray += pictureControlSetPtr->sliceType * TOTAL_NUMBER_OF_QP_VALUES + pictureControlSetPtr->pictureQp;
687 		// Reset MD rate Estimation table to initial values by copying from mdRateEstimationArray
688 		EB_MEMCPY(&(contextPtr->meContextPtr->mvdBitsArray[0]), &(mdRateEstimationArray->mvdBits[0]), sizeof(EB_BitFraction)*NUMBER_OF_MVD_CASES);
689 
690         SignalDerivationMeKernelOq(
691                 sequenceControlSetPtr,
692                 pictureControlSetPtr,
693                 contextPtr);
694 
695 		// Lambda Assignement
696         if (pictureControlSetPtr->temporalLayerIndex == 0) {
697             contextPtr->meContextPtr->lambda = lambdaModeDecisionRaSadBase[pictureControlSetPtr->pictureQp];
698         }
699         else if (pictureControlSetPtr->isUsedAsReferenceFlag) {
700             contextPtr->meContextPtr->lambda = lambdaModeDecisionRaSadRefNonBase[pictureControlSetPtr->pictureQp];
701         }
702         else {
703             contextPtr->meContextPtr->lambda = lambdaModeDecisionRaSadNonRef[pictureControlSetPtr->pictureQp];
704         }
705 
706         // Motion Estimation
707         if (pictureControlSetPtr->sliceType != EB_I_PICTURE) {
708 
709             // LCU Loop
710             for (yLcuIndex = yLcuStartIndex; yLcuIndex < yLcuEndIndex; ++yLcuIndex) {
711                 for (xLcuIndex = xLcuStartIndex; xLcuIndex < xLcuEndIndex; ++xLcuIndex) {
712 
713                     lcuIndex = (EB_U16)(xLcuIndex + yLcuIndex * pictureWidthInLcu);
714                     lcuOriginX = xLcuIndex * sequenceControlSetPtr->lcuSize;
715                     lcuOriginY = yLcuIndex * sequenceControlSetPtr->lcuSize;
716 
717                     lcuWidth = (sequenceControlSetPtr->lumaWidth - lcuOriginX) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaWidth - lcuOriginX : MAX_LCU_SIZE;
718                     lcuHeight = (sequenceControlSetPtr->lumaHeight - lcuOriginY) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaHeight - lcuOriginY : MAX_LCU_SIZE;
719 
720                     // Load the LCU from the input to the intermediate LCU buffer
721                     bufferIndex = (inputPicturePtr->originY + lcuOriginY) * inputPicturePtr->strideY + inputPicturePtr->originX + lcuOriginX;
722 
723                     contextPtr->meContextPtr->hmeSearchType = HME_RECTANGULAR;
724 
725                     for (lcuRow = 0; lcuRow < MAX_LCU_SIZE; lcuRow++) {
726                         EB_MEMCPY((&(contextPtr->meContextPtr->lcuBuffer[lcuRow * MAX_LCU_SIZE])), (&(inputPicturePtr->bufferY[bufferIndex + lcuRow * inputPicturePtr->strideY])), MAX_LCU_SIZE * sizeof(EB_U8));
727 
728                     }
729 
730                     EB_U8 * srcPtr = &inputPaddedPicturePtr->bufferY[bufferIndex];
731 
732                     //_MM_HINT_T0 	//_MM_HINT_T1	//_MM_HINT_T2//_MM_HINT_NTA
733                     EB_U32 i;
734                     for (i = 0; i < lcuHeight; i++)
735                     {
736                         char const* p = (char const*)(srcPtr + i*inputPaddedPicturePtr->strideY);
737                         _mm_prefetch(p, _MM_HINT_T2);
738                     }
739 
740 
741                     contextPtr->meContextPtr->lcuSrcPtr = &inputPaddedPicturePtr->bufferY[bufferIndex];
742                     contextPtr->meContextPtr->lcuSrcStride = inputPaddedPicturePtr->strideY;
743 
744 
745                     // Load the 1/4 decimated LCU from the 1/4 decimated input to the 1/4 intermediate LCU buffer
746                     if (pictureControlSetPtr->enableHmeLevel1Flag) {
747 
748                         bufferIndex = (quarterDecimatedPicturePtr->originY + (lcuOriginY >> 1)) * quarterDecimatedPicturePtr->strideY + quarterDecimatedPicturePtr->originX + (lcuOriginX >> 1);
749 
750                         for (lcuRow = 0; lcuRow < (lcuHeight >> 1); lcuRow++) {
751                             EB_MEMCPY((&(contextPtr->meContextPtr->quarterLcuBuffer[lcuRow * contextPtr->meContextPtr->quarterLcuBufferStride])), (&(quarterDecimatedPicturePtr->bufferY[bufferIndex + lcuRow * quarterDecimatedPicturePtr->strideY])), (lcuWidth >> 1) * sizeof(EB_U8));
752 
753                         }
754                     }
755 
756                     // Load the 1/16 decimated LCU from the 1/16 decimated input to the 1/16 intermediate LCU buffer
757                     if (pictureControlSetPtr->enableHmeLevel0Flag) {
758 
759                         bufferIndex = (sixteenthDecimatedPicturePtr->originY + (lcuOriginY >> 2)) * sixteenthDecimatedPicturePtr->strideY + sixteenthDecimatedPicturePtr->originX + (lcuOriginX >> 2);
760 
761                         {
762                             EB_U8  *framePtr = &sixteenthDecimatedPicturePtr->bufferY[bufferIndex];
763                             EB_U8  *localPtr = contextPtr->meContextPtr->sixteenthLcuBuffer;
764 
765                             for (lcuRow = 0; lcuRow < (lcuHeight >> 2); lcuRow += 2) {
766                                 EB_MEMCPY(localPtr, framePtr, (lcuWidth >> 2) * sizeof(EB_U8));
767                                 localPtr += 16;
768                                 framePtr += sixteenthDecimatedPicturePtr->strideY << 1;
769                             }
770                         }
771                     }
772 
773                     MotionEstimateLcu(
774                         pictureControlSetPtr,
775                         lcuIndex,
776                         lcuOriginX,
777                         lcuOriginY,
778                         contextPtr->meContextPtr,
779                         inputPicturePtr);
780                 }
781             }
782         }
783 
784 	    // OIS + Similar Collocated Checks + Stationary Edge Over Time Check
785         // LCU Loop
786 		for (yLcuIndex = yLcuStartIndex; yLcuIndex < yLcuEndIndex; ++yLcuIndex) {
787 			for (xLcuIndex = xLcuStartIndex; xLcuIndex < xLcuEndIndex; ++xLcuIndex) {
788 
789 				lcuOriginX = xLcuIndex * sequenceControlSetPtr->lcuSize;
790 				lcuOriginY = yLcuIndex * sequenceControlSetPtr->lcuSize;
791                 lcuIndex = (EB_U16)(xLcuIndex + yLcuIndex * pictureWidthInLcu);
792 
793 				OpenLoopIntraSearchLcu(
794 					pictureControlSetPtr,
795 					lcuIndex,
796 					contextPtr,
797 					inputPicturePtr);
798 
799                 // Derive Similar Collocated Flag
800                 DeriveSimilarCollocatedFlag(
801                     pictureControlSetPtr,
802                     lcuIndex);
803 
804                 //Check conditions for stationary edge over time Part 1
805                 StationaryEdgeOverUpdateOverTimeLcuPart1(
806                     sequenceControlSetPtr,
807                     pictureControlSetPtr,
808                     lcuIndex);
809 
810                 //Check conditions for stationary edge over time Part 2
811                 if (!pictureControlSetPtr->endOfSequenceFlag && sequenceControlSetPtr->staticConfig.lookAheadDistance != 0) {
812                     StationaryEdgeOverUpdateOverTimeLcuPart2(
813                         sequenceControlSetPtr,
814                         pictureControlSetPtr,
815                         lcuIndex);
816                 }
817 			}
818 		}
819 
820 		// ZZ SADs Computation
821 		// 1 lookahead frame is needed to get valid (0,0) SAD
822 		if (sequenceControlSetPtr->staticConfig.lookAheadDistance != 0) {
823 			// when DG is ON, the ZZ SADs are computed @ the PD process
824 			{
825 				// ZZ SADs Computation using decimated picture
826 				if (pictureControlSetPtr->pictureNumber > 0) {
827 
828                     ComputeDecimatedZzSad(
829                         contextPtr,
830                         sequenceControlSetPtr,
831                         pictureControlSetPtr,
832                         sixteenthDecimatedPicturePtr,
833                         xLcuStartIndex,
834                         xLcuEndIndex,
835                         yLcuStartIndex,
836                         yLcuEndIndex);
837 
838 				}
839 			}
840 		}
841 
842 
843 		// Calculate the ME Distortion and OIS Historgrams
844         EbBlockOnMutex(pictureControlSetPtr->rcDistortionHistogramMutex);
845 		if (sequenceControlSetPtr->staticConfig.rateControlMode){
846 			if (pictureControlSetPtr->sliceType != EB_I_PICTURE){
847 				EB_U16 sadIntervalIndex;
848 				for (yLcuIndex = yLcuStartIndex; yLcuIndex < yLcuEndIndex; ++yLcuIndex) {
849 					for (xLcuIndex = xLcuStartIndex; xLcuIndex < xLcuEndIndex; ++xLcuIndex) {
850 
851 						lcuOriginX = xLcuIndex * sequenceControlSetPtr->lcuSize;
852 						lcuOriginY = yLcuIndex * sequenceControlSetPtr->lcuSize;
853 						lcuWidth = (sequenceControlSetPtr->lumaWidth - lcuOriginX) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaWidth - lcuOriginX : MAX_LCU_SIZE;
854 						lcuHeight = (sequenceControlSetPtr->lumaHeight - lcuOriginY) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaHeight - lcuOriginY : MAX_LCU_SIZE;
855 
856                         lcuIndex = (EB_U16)(xLcuIndex + yLcuIndex * pictureWidthInLcu);
857                         pictureControlSetPtr->interSadIntervalIndex[lcuIndex] = 0;
858                         pictureControlSetPtr->intraSadIntervalIndex[lcuIndex] = 0;
859 
860 						if (lcuWidth == MAX_LCU_SIZE && lcuHeight == MAX_LCU_SIZE) {
861 
862 
863 							sadIntervalIndex = (EB_U16)(pictureControlSetPtr->rcMEdistortion[lcuIndex] >> (12 - SAD_PRECISION_INTERVAL));//change 12 to 2*log2(64)
864 
865                             sadIntervalIndex = (EB_U16)(sadIntervalIndex >> 2);
866                             if (sadIntervalIndex > (NUMBER_OF_SAD_INTERVALS>>1) -1){
867                                 EB_U16 sadIntervalIndexTemp = sadIntervalIndex - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
868 
869                                 sadIntervalIndex = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
870 
871                             }
872                             if (sadIntervalIndex >= NUMBER_OF_SAD_INTERVALS - 1)
873                                 sadIntervalIndex = NUMBER_OF_SAD_INTERVALS - 1;
874 
875 
876 
877                             pictureControlSetPtr->interSadIntervalIndex[lcuIndex] = sadIntervalIndex;
878                             pictureControlSetPtr->meDistortionHistogram[sadIntervalIndex] ++;
879 
880                             EB_U32                       bestOisCuIndex = 0;
881 
882 							//DOUBLE CHECK THIS PIECE OF CODE
883                             intraSadIntervalIndex = (EB_U32)
884 								(((pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[1][bestOisCuIndex].distortion +
885 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[2][bestOisCuIndex].distortion +
886 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[3][bestOisCuIndex].distortion +
887 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[4][bestOisCuIndex].distortion)) >> (12 - SAD_PRECISION_INTERVAL));//change 12 to 2*log2(64) ;
888 
889                             intraSadIntervalIndex = (EB_U16)(intraSadIntervalIndex >> 2);
890                             if (intraSadIntervalIndex > (NUMBER_OF_SAD_INTERVALS >> 1) - 1){
891                                 EB_U32 sadIntervalIndexTemp = intraSadIntervalIndex - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
892 
893                                 intraSadIntervalIndex = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
894 
895                             }
896                             if (intraSadIntervalIndex >= NUMBER_OF_SAD_INTERVALS - 1)
897                                 intraSadIntervalIndex = NUMBER_OF_SAD_INTERVALS - 1;
898 
899 
900                             pictureControlSetPtr->intraSadIntervalIndex[lcuIndex] = intraSadIntervalIndex;
901                             pictureControlSetPtr->oisDistortionHistogram[intraSadIntervalIndex] ++;
902 
903 
904 
905 
906 							++pictureControlSetPtr->fullLcuCount;
907 						}
908 
909 					}
910 				}
911 			}
912 			else{
913 				EB_U32                       bestOisCuIndex = 0;
914 
915 
916 				for (yLcuIndex = yLcuStartIndex; yLcuIndex < yLcuEndIndex; ++yLcuIndex) {
917 					for (xLcuIndex = xLcuStartIndex; xLcuIndex < xLcuEndIndex; ++xLcuIndex) {
918 						lcuOriginX = xLcuIndex * sequenceControlSetPtr->lcuSize;
919 						lcuOriginY = yLcuIndex * sequenceControlSetPtr->lcuSize;
920 						lcuWidth = (sequenceControlSetPtr->lumaWidth - lcuOriginX) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaWidth - lcuOriginX : MAX_LCU_SIZE;
921 						lcuHeight = (sequenceControlSetPtr->lumaHeight - lcuOriginY) < MAX_LCU_SIZE ? sequenceControlSetPtr->lumaHeight - lcuOriginY : MAX_LCU_SIZE;
922 
923                         lcuIndex = (EB_U16)(xLcuIndex + yLcuIndex * pictureWidthInLcu);
924 
925                         pictureControlSetPtr->interSadIntervalIndex[lcuIndex] = 0;
926                         pictureControlSetPtr->intraSadIntervalIndex[lcuIndex] = 0;
927 
928 						if (lcuWidth == MAX_LCU_SIZE && lcuHeight == MAX_LCU_SIZE) {
929 
930 
931 							//DOUBLE CHECK THIS PIECE OF CODE
932 
933 							intraSadIntervalIndex = (EB_U32)
934 								(((pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[1][bestOisCuIndex].distortion +
935 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[2][bestOisCuIndex].distortion +
936 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[3][bestOisCuIndex].distortion +
937 								pictureControlSetPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[4][bestOisCuIndex].distortion)) >> (12 - SAD_PRECISION_INTERVAL));//change 12 to 2*log2(64) ;
938 
939                             intraSadIntervalIndex = (EB_U16)(intraSadIntervalIndex >> 2);
940                             if (intraSadIntervalIndex > (NUMBER_OF_SAD_INTERVALS >> 1) - 1){
941                                 EB_U32 sadIntervalIndexTemp = intraSadIntervalIndex - ((NUMBER_OF_SAD_INTERVALS >> 1) - 1);
942 
943                                 intraSadIntervalIndex = ((NUMBER_OF_SAD_INTERVALS >> 1) - 1) + (sadIntervalIndexTemp >> 3);
944 
945                             }
946                             if (intraSadIntervalIndex >= NUMBER_OF_SAD_INTERVALS - 1)
947                                 intraSadIntervalIndex = NUMBER_OF_SAD_INTERVALS - 1;
948 
949                             pictureControlSetPtr->intraSadIntervalIndex[lcuIndex] = intraSadIntervalIndex;
950 							pictureControlSetPtr->oisDistortionHistogram[intraSadIntervalIndex] ++;
951 							++pictureControlSetPtr->fullLcuCount;
952 						}
953 
954 					}
955 				}
956 			}
957 		}
958         EbReleaseMutex(pictureControlSetPtr->rcDistortionHistogramMutex);
959 		// Get Empty Results Object
960 		EbGetEmptyObject(
961 			contextPtr->motionEstimationResultsOutputFifoPtr,
962 			&outputResultsWrapperPtr);
963 
964 		outputResultsPtr = (MotionEstimationResults_t*)outputResultsWrapperPtr->objectPtr;
965 		outputResultsPtr->pictureControlSetWrapperPtr = inputResultsPtr->pictureControlSetWrapperPtr;
966 		outputResultsPtr->segmentIndex = segmentIndex;
967 
968 		// Release the Input Results
969 		EbReleaseObject(inputResultsWrapperPtr);
970 
971 		// Post the Full Results Object
972 		EbPostFullObject(outputResultsWrapperPtr);
973 #if DEADLOCK_DEBUG
974         if ((pictureControlSetPtr->pictureNumber >= MIN_POC) && (pictureControlSetPtr->pictureNumber <= MAX_POC))
975             if (segmentIndex == (EB_U32)(pictureControlSetPtr->meSegmentsTotalCount - 1))
976                 SVT_LOG("POC %lu ME OUT \n", pictureControlSetPtr->pictureNumber);
977 #endif
978 	}
979 	return EB_NULL;
980 }
981