1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file  svc motion estimate.c
33  *
34  * \brief  Interfaces introduced in svc mb motion estimation
35  *
36  * \date  08/11/2009 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "cpu_core.h"
42 #include "ls_defines.h"
43 #include "svc_motion_estimate.h"
44 #include "wels_transpose_matrix.h"
45 
46 namespace WelsEnc {
47 
48 const int32_t QStepx16ByQp[52] = {  /* save QStep<<4 for int32_t */
49   10,  11,  13,  14,  16,  18,  /* 0~5   */
50   20,  22,  26,  28,  32,  36,  /* 6~11  */
51   40,  44,  52,  56,  64,  72,  /* 12~17 */
52   80,  88,  104, 112, 128, 144, /* 18~23 */
53   160, 176, 208, 224, 256, 288, /* 24~29 */
54   320, 352, 416, 448, 512, 576, /* 30~35 */
55   640, 704, 832, 896, 1024, 1152, /* 36~41 */
56   1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */
57   2560, 2816, 3328, 3584     /* 48~51 */
58 };
59 
UpdateMeResults(const SMVUnitXY ksBestMv,const uint32_t kiBestSadCost,uint8_t * pRef,SWelsME * pMe)60 static inline void UpdateMeResults (const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef,
61                                     SWelsME* pMe) {
62   pMe->sMv = ksBestMv;
63   pMe->pRefMb = pRef;
64   pMe->uiSadCost = kiBestSadCost;
65 }
MeEndIntepelSearch(SWelsME * pMe)66 static inline void MeEndIntepelSearch (SWelsME* pMe) {
67   /* -> qpel mv */
68   pMe->sMv.iMvX *= (1 << 2);
69   pMe->sMv.iMvY *= (1 << 2);
70   pMe->uiSatdCost = pMe->uiSadCost;
71 }
72 
WelsInitMeFunc(SWelsFuncPtrList * pFuncList,uint32_t uiCpuFlag,bool bScreenContent)73 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent) {
74   pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
75 
76   if (!bScreenContent) {
77     pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
78     pFuncList->pfCalculateBlockFeatureOfFrame[0] =
79       pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
80     pFuncList->pfCalculateSingleBlockFeature[0] =
81       pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
82 
83   } else {
84     pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
85 
86     //for cross serarch
87     pFuncList->pfVerticalFullSearch = LineFullSearch_c;
88     pFuncList->pfHorizontalFullSearch = LineFullSearch_c;
89 
90 #if defined (X86_ASM)
91     if (uiCpuFlag & WELS_CPU_SSE41) {
92       pFuncList->pfSampleSadHor8[0] = SampleSad8x8Hor8_sse41;
93       pFuncList->pfSampleSadHor8[1] = SampleSad16x16Hor8_sse41;
94       pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
95       pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
96     }
97 #endif
98 
99     //for feature search
100     pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_c;
101     pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_c;
102     pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
103     pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
104     //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
105     pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
106     pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
107 #if defined (X86_ASM)
108     if (uiCpuFlag & WELS_CPU_SSE2) {
109       //for feature search
110       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_sse2;
111       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_sse2;
112       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse2;
113       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse2;
114       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
115       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_sse2;
116       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_sse2;
117     }
118     if (uiCpuFlag & WELS_CPU_SSE41) {
119       //for feature search
120       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse4;
121       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse4;
122     }
123 #endif
124 
125 #if defined (HAVE_NEON)
126     if (uiCpuFlag & WELS_CPU_NEON) {
127       //for feature search
128       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_neon;
129       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_neon;
130       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_neon;
131       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_neon;
132       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
133       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_neon;
134       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_neon;
135     }
136 #endif
137 
138 #if defined (HAVE_NEON_AARCH64)
139     if (uiCpuFlag & WELS_CPU_NEON) {
140       //for feature search
141       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_AArch64_neon;
142       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_AArch64_neon;
143       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_AArch64_neon;
144       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_AArch64_neon;
145       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
146       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_AArch64_neon;
147       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_AArch64_neon;
148     }
149 #endif
150   }
151 }
152 
153 /*!
154  * \brief  BL mb motion estimate search
155  *
156  * \param  enc      Wels encoder context
157  * \param  pMe          Wels me information
158  *
159  * \return  NONE
160  */
161 
WelsMotionEstimateSearch(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)162 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, SSlice* pSlice) {
163   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
164   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
165 
166   //  Step 1: Initial point prediction
167   if (!WelsMotionEstimateInitialPoint (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef)) {
168     pFuncList->pfSearchMethod[pMe->uiBlockSize] (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef);
169     MeEndIntepelSearch (pMe);
170   }
171 
172   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
173                               kiStrideRef);
174 }
175 
WelsMotionEstimateSearchStatic(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pLpslice)176 void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
177                                      SSlice* pLpslice) {
178   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
179   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
180 
181   pMe->sMv.iMvX = pMe->sMv.iMvY = 0;
182   pMe->uiSadCost =
183     pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) ;
184   pMe->uiSadCost += COST_MVD (pMe->pMvdCost, - pMe->sMvp.iMvX, - pMe->sMvp.iMvY);
185   MeEndIntepelSearch (pMe);
186   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
187                               kiStrideRef);
188 }
189 
WelsMotionEstimateSearchScrolled(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)190 void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
191                                        SSlice* pSlice) {
192   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
193   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
194 
195   pMe->sMv = pMe->sDirectionalMv;
196   pMe->pRefMb = pMe->pColoRefMb + pMe->sMv.iMvY * kiStrideRef + pMe->sMv.iMvX;
197   pMe->uiSadCost =
198     pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef)
199     + COST_MVD (pMe->pMvdCost, (pMe->sMv.iMvX * (1 << 2)) - pMe->sMvp.iMvX, (pMe->sMv.iMvY * (1 << 2)) - pMe->sMvp.iMvY);
200   MeEndIntepelSearch (pMe);
201   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
202                               kiStrideRef);
203 }
204 /*!
205  * \brief  EL mb motion estimate initial point testing
206  *
207  * \param  pix_pFuncList  SSampleDealingFunc
208  * \param  pMe          Wels me information
209  * \param  mv_range  search range in motion estimate
210  * \param  point      the best match point in motion estimation
211  *
212  * \return  NONE
213  */
WelsMotionEstimateInitialPoint(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,int32_t iStrideEnc,int32_t iStrideRef)214 bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc,
215                                      int32_t iStrideRef) {
216   PSampleSadSatdCostFunc pSad    = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
217   const uint16_t* kpMvdCost  = pMe->pMvdCost;
218   uint8_t* const kpEncMb    = pMe->pEncMb;
219   int16_t iMvc0, iMvc1;
220   int32_t iSadCost;
221   int32_t iBestSadCost;
222   uint8_t* pRefMb;
223   uint8_t* pFref2;
224   uint32_t i;
225   const uint32_t kuiMvcNum    = pSlice->uiMvcNum;
226   const SMVUnitXY* kpMvcList  = &pSlice->sMvc[0];
227   const SMVUnitXY ksMvStartMin    = pSlice->sMvStartMin;
228   const SMVUnitXY ksMvStartMax    = pSlice->sMvStartMax;
229   const SMVUnitXY ksMvp    = pMe->sMvp;
230   SMVUnitXY sMv;
231 
232   //  Step 1: Initial point prediction
233   // init with sMvp
234   sMv.iMvX  = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
235   sMv.iMvY  = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
236 
237   pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
238 
239   iBestSadCost = pSad (kpEncMb, iStrideEnc, pRefMb, iStrideRef);
240   iBestSadCost += COST_MVD (kpMvdCost, ((sMv.iMvX) * (1 << 2)) - ksMvp.iMvX, ((sMv.iMvY) * (1 << 2)) - ksMvp.iMvY);
241 
242   for (i = 0; i < kuiMvcNum; i++) {
243     //clipping here is essential since some pOut-of-range MVC may happen here (i.e., refer to baseMV)
244     iMvc0 = WELS_CLIP3 ((2 + kpMvcList[i].iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
245     iMvc1 = WELS_CLIP3 ((2 + kpMvcList[i].iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
246 
247     if (((iMvc0 - sMv.iMvX) || (iMvc1 - sMv.iMvY))) {
248       pFref2 = &pMe->pRefMb[iMvc1 * iStrideRef + iMvc0];
249 
250       iSadCost = pSad (kpEncMb, iStrideEnc, pFref2, iStrideRef) +
251                  COST_MVD (kpMvdCost, (iMvc0 * (1 << 2)) - ksMvp.iMvX, (iMvc1 * (1 << 2)) - ksMvp.iMvY);
252 
253       if (iSadCost < iBestSadCost) {
254         sMv.iMvX = iMvc0;
255         sMv.iMvY = iMvc1;
256         pRefMb = pFref2;
257         iBestSadCost = iSadCost;
258       }
259     }
260   }
261 
262   if (pFuncList->pfCheckDirectionalMv
263       (pSad, pMe, ksMvStartMin, ksMvStartMax, iStrideEnc, iStrideRef, iSadCost)) {
264     sMv = pMe->sDirectionalMv;
265     pRefMb =  &pMe->pColoRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
266     iBestSadCost = iSadCost;
267   }
268 
269   UpdateMeResults (sMv, iBestSadCost, pRefMb, pMe);
270   if (iBestSadCost < static_cast<int32_t> (pMe->uSadPredISatd.uiSadPred)) {
271     //Initial point early Stop
272     MeEndIntepelSearch (pMe);
273     return true;
274   }
275   return false;
276 }
277 
CalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)278 void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
279                         const int32_t kiEncStride, const int32_t kiRefStride) {
280   pMe->uSadPredISatd.uiSatd = pSatd (pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
281   pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
282                     pMe->sMv.iMvY - pMe->sMvp.iMvY);
283 }
NotCalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)284 void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
285                            const int32_t kiEncStride, const int32_t kiRefStride) {
286 }
287 
288 
289 /////////////////////////
290 // Diamond Search Basics
291 /////////////////////////
WelsMeSadCostSelect(int32_t * iSadCost,const uint16_t * kpMvdCost,int32_t * pBestCost,const int32_t kiDx,const int32_t kiDy,int32_t * pIx,int32_t * pIy)292 bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
293                           const int32_t kiDy, int32_t* pIx, int32_t* pIy) {
294   int32_t iTempSadCost[4];
295   int32_t iInputSadCost = *pBestCost;
296   iTempSadCost[0] = iSadCost[0] + COST_MVD (kpMvdCost, kiDx, kiDy - 4);
297   iTempSadCost[1] = iSadCost[1] + COST_MVD (kpMvdCost, kiDx, kiDy + 4);
298   iTempSadCost[2] = iSadCost[2] + COST_MVD (kpMvdCost, kiDx - 4, kiDy);
299   iTempSadCost[3] = iSadCost[3] + COST_MVD (kpMvdCost, kiDx + 4, kiDy);
300 
301   if (iTempSadCost[0] < *pBestCost) {
302     *pBestCost = iTempSadCost[0];
303     *pIx = 0;
304     *pIy = 1;
305   }
306 
307   if (iTempSadCost[1] < *pBestCost) {
308     *pBestCost = iTempSadCost[1];
309     *pIx = 0;
310     *pIy = -1;
311   }
312 
313   if (iTempSadCost[2] < *pBestCost) {
314     *pBestCost = iTempSadCost[2];
315     *pIx = 1;
316     *pIy = 0;
317   }
318 
319   if (iTempSadCost[3] < *pBestCost) {
320     *pBestCost = iTempSadCost[3];
321     *pIx = -1;
322     *pIy = 0;
323   }
324   return (*pBestCost == iInputSadCost);
325 }
326 
WelsDiamondSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiStrideEnc,const int32_t kiStrideRef)327 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
328                         const int32_t kiStrideEnc,  const int32_t kiStrideRef) {
329   PSample4SadCostFunc      pSad          =  pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize];
330 
331   uint8_t* pFref = pMe->pRefMb;
332   uint8_t* const kpEncMb = pMe->pEncMb;
333   const uint16_t* kpMvdCost = pMe->pMvdCost;
334 
335   const SMVUnitXY ksMvStartMin    = pSlice->sMvStartMin;
336   const SMVUnitXY ksMvStartMax    = pSlice->sMvStartMax;
337 
338   int32_t iMvDx = ((pMe->sMv.iMvX) * (1 << 2)) - pMe->sMvp.iMvX;
339   int32_t iMvDy = ((pMe->sMv.iMvY) * (1 << 2)) - pMe->sMvp.iMvY;
340 
341   uint8_t* pRefMb = pFref;
342   int32_t iBestCost = (pMe->uiSadCost);
343 
344   int32_t iTimeThreshold = ITERATIVE_TIMES;
345   ENFORCE_STACK_ALIGN_1D (int32_t, iSadCosts, 4, 16)
346 
347   while (iTimeThreshold--) {
348     pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
349     pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
350     if (!CheckMvInRange (pMe->sMv, ksMvStartMin, ksMvStartMax))
351       continue;
352     pSad (kpEncMb, kiStrideEnc, pRefMb, kiStrideRef, &iSadCosts[0]);
353 
354     int32_t iX, iY;
355 
356     const bool kbIsBestCostWorse = WelsMeSadCostSelect (iSadCosts, kpMvdCost, &iBestCost, iMvDx, iMvDy, &iX, &iY);
357     if (kbIsBestCostWorse)
358       break;
359 
360     iMvDx -= (iX * (1 << 2)) ;
361     iMvDy -= (iY * (1 << 2)) ;
362 
363     pRefMb -= (iX + iY * kiStrideRef);
364 
365   }
366 
367   /* integer-pel mv */
368   pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
369   pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
370   pMe->uiSatdCost = pMe->uiSadCost = (iBestCost);
371   pMe->pRefMb = pRefMb;
372 }
373 
374 /////////////////////////
375 // DirectionalMv Basics
376 /////////////////////////
CheckDirectionalMv(PSampleSadSatdCostFunc pSad,SWelsME * pMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)377 bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
378                          const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
379                          int32_t& iBestSadCost) {
380   const int16_t kiMvX = pMe->sDirectionalMv.iMvX;
381   const int16_t kiMvY = pMe->sDirectionalMv.iMvY;
382 
383   //Check MV from scrolling detection
384   if ((BLOCK_16x16 != pMe->uiBlockSize) //scrolled_MV with P16x16 is checked SKIP checking function
385       && (kiMvX | kiMvY)   //(0,0) checked in ordinary initial point checking
386       && CheckMvInRange (pMe->sDirectionalMv, ksMinMv, ksMaxMv)) {
387     uint8_t* pRef = &pMe->pColoRefMb[kiMvY * kiRefStride + kiMvX];
388     uint32_t uiCurrentSadCost = pSad (pMe->pEncMb, kiEncStride,  pRef, kiRefStride) +
389                                 COST_MVD (pMe->pMvdCost, (kiMvX * (1 << 2)) - pMe->sMvp.iMvX, (kiMvY * (1 << 2)) - pMe->sMvp.iMvY);
390     if (uiCurrentSadCost < pMe->uiSadCost) {
391       iBestSadCost = uiCurrentSadCost;
392       return true;
393     }
394   }
395   return false;
396 }
397 
CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad,SWelsME * vpMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)398 bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* vpMe,
399                               const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
400                               int32_t& iBestSadCost) {
401   return false;
402 }
403 
404 /////////////////////////
405 // Cross Search Basics
406 /////////////////////////
407 #if defined (X86_ASM)
CalcMvdCostx8_c(uint16_t * pMvdCost,const int32_t kiStartMv,uint16_t * pMvdTable,const uint16_t kiFixedCost)408 void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost) {
409   uint16_t* pBaseCost  = pMvdCost;
410   const int32_t kiOffset = (kiStartMv * (1 << 2));
411   uint16_t* pMvd  = pMvdTable + kiOffset;
412   for (int32_t i = 0; i < 8; ++ i) {
413     pBaseCost[i] = ((*pMvd) + kiFixedCost);
414     pMvd += 4;
415   }
416 }
VerticalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)417 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
418                                    uint16_t* pMvdTable,
419                                    const int32_t kiEncStride, const int32_t kiRefStride,
420                                    const int16_t kiMinMv, const int16_t kiMaxMv,
421                                    const bool bVerticalSearch) {
422   uint8_t*  kpEncMb = pMe->pEncMb;
423   const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
424   uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv * kiRefStride];
425 
426   const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
427 
428   int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
429   int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
430   int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
431   uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
432   int16_t iStartMv = 0;
433 
434 
435   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
436   const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
437   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
438   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
439   PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 :
440       TransposeMatrixBlock8x8_mmx;
441   PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
442       TransposeMatrixBlocksx8_mmx;
443 
444   const int32_t kiDiff   = iMaxPos - iMinPos;
445   const int32_t kiRowNum  = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
446   const int32_t kiBlocksNum  = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
447   int32_t iCountLoop8  = (kiRowNum - kiEdgeBlocks) >> 3;
448   const int32_t kiRemainingVectors  = kiDiff - (iCountLoop8 << 3);
449   const int32_t kiMatrixStride  = MAX_VERTICAL_MV_RANGE;
450   ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixRef, 16, kiMatrixStride, 16);  // transpose matrix result for ref
451   ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixEnc, 16, 16, 16);     // transpose matrix result for enc
452   assert (kiRowNum <= kiMatrixStride); // make sure effective memory
453 
454   TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
455   TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
456   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
457   int32_t iTargetPos   = iMinPos;
458   int16_t iBestPos    = pMe->sMv.iMvX;
459   uint32_t uiBestCost   = pMe->uiSadCost;
460   uint32_t uiCostMin;
461   int32_t iIndexMinPos;
462   kpEncMb = &uiMatrixEnc[0][0];
463   pRef = &uiMatrixRef[0][0];
464 
465   while (iCountLoop8 > 0) {
466     CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
467     uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
468     if (uiCostMin < uiBestCost) {
469       uiBestCost = uiCostMin;
470       iBestPos  = iTargetPos + iIndexMinPos;
471     }
472     iTargetPos += 8;
473     pRef += 8;
474     iStartMv += 8;
475     -- iCountLoop8;
476   }
477   if (kiRemainingVectors > 0) {
478     kpEncMb = pMe->pEncMb;
479     pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
480     while (iTargetPos < iMaxPos) {
481       const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
482       uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
483       if (uiSadCost < uiBestCost) {
484         uiBestCost = uiSadCost;
485         iBestPos = iTargetPos;
486       }
487       iStartMv++;
488       pRef += kiRefStride;
489       ++iTargetPos;
490     }
491   }
492   if (uiBestCost < pMe->uiSadCost) {
493     SMVUnitXY sBestMv;
494     sBestMv.iMvX = 0;
495     sBestMv.iMvY = iBestPos - kiCurMeBlockPix;
496     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride], pMe);
497   }
498 }
499 
HorizontalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)500 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
501                                      uint16_t* pMvdTable,
502                                      const int32_t kiEncStride, const int32_t kiRefStride,
503                                      const int16_t kiMinMv, const int16_t kiMaxMv,
504                                      const bool bVerticalSearch) {
505   uint8_t* kpEncMb = pMe->pEncMb;
506 
507   const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
508   int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
509   int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
510   int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
511   uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
512   int16_t iStartMv = 0;
513   uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv];
514   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
515   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
516   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
517   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
518   const int32_t kiNumVector = iMaxPos - iMinPos;
519   int32_t iCountLoop8 = kiNumVector >> 3;
520   const int32_t kiRemainingLoop8 = kiNumVector & 7;
521   int32_t iTargetPos   = iMinPos;
522   int16_t iBestPos    = pMe->sMv.iMvX;
523   uint32_t uiBestCost   = pMe->uiSadCost;
524   uint32_t uiCostMin;
525   int32_t iIndexMinPos;
526 
527   while (iCountLoop8 > 0) {
528     CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
529     uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
530     if (uiCostMin < uiBestCost) {
531       uiBestCost = uiCostMin;
532       iBestPos  = iTargetPos + iIndexMinPos;
533     }
534     iTargetPos += 8;
535     pRef += 8;
536     iStartMv += 8;
537     -- iCountLoop8;
538   }
539   if (kiRemainingLoop8 > 0) {
540     while (iTargetPos < iMaxPos) {
541       const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
542       uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
543       if (uiSadCost < uiBestCost) {
544         uiBestCost = uiSadCost;
545         iBestPos = iTargetPos;
546       }
547       iStartMv++;
548       ++pRef;
549       ++iTargetPos;
550     }
551   }
552   if (uiBestCost < pMe->uiSadCost) {
553     SMVUnitXY sBestMv;
554     sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
555     sBestMv.iMvY = 0;
556     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
557   }
558 }
559 #endif
LineFullSearch_c(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t iMinMv,const int16_t iMaxMv,const bool bVerticalSearch)560 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
561                        uint16_t* pMvdTable,
562                        const int32_t kiEncStride, const int32_t kiRefStride,
563                        const int16_t iMinMv, const int16_t iMaxMv,
564                        const bool bVerticalSearch) {
565   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
566   const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
567   const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
568   int32_t iMinPos, iMaxPos;
569   int32_t iFixedMvd;
570   int32_t iCurMeBlockPix;
571   int32_t iStride;
572   uint16_t* pMvdCost;
573 
574   if (bVerticalSearch) {
575     iMinPos = kiCurMeBlockPixY + iMinMv;
576     iMaxPos = kiCurMeBlockPixY + iMaxMv;
577     iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
578     iCurMeBlockPix = pMe->iCurMeBlockPixY;
579     iStride = kiRefStride;
580     pMvdCost  = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
581   } else {
582     iMinPos = kiCurMeBlockPixX + iMinMv;
583     iMaxPos = kiCurMeBlockPixX + iMaxMv;
584     iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
585     iCurMeBlockPix = pMe->iCurMeBlockPixX;
586     iStride = 1;
587     pMvdCost  = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
588   }
589   uint8_t* pRef            = &pMe->pColoRefMb[ iMinMv * iStride];
590   uint32_t uiBestCost    = 0xFFFFFFFF;
591   int32_t iBestPos       = 0;
592 
593   for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
594     uint8_t* const kpEncMb  = pMe->pEncMb;
595     uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
596     if (uiSadCost < uiBestCost) {
597       uiBestCost  = uiSadCost;
598       iBestPos  = iTargetPos;
599     }
600     pRef += iStride;
601     pMvdCost += 4;
602   }
603 
604   if (uiBestCost < pMe->uiSadCost) {
605     SMVUnitXY sBestMv;
606     sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
607     sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
608     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
609   }
610 }
611 
WelsMotionCrossSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)612 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
613                             const int32_t kiEncStride,  const int32_t kiRefStride) {
614   PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
615   PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
616 
617   //vertical search
618   pfVerticalFullSearchFunc (pFuncList, pMe,
619                             pMe->pMvdCost,
620                             kiEncStride, kiRefStride,
621                             pSlice->sMvStartMin.iMvY,
622                             pSlice->sMvStartMax.iMvY, true);
623 
624   //horizontal search
625   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
626     pfHorizontalFullSearchFunc (pFuncList, pMe,
627                                 pMe->pMvdCost,
628                                 kiEncStride, kiRefStride,
629                                 pSlice->sMvStartMin.iMvX,
630                                 pSlice->sMvStartMax.iMvX,
631                                 false);
632   }
633 }
634 
635 
636 /////////////////////////
637 // Feature Search Basics
638 /////////////////////////
639 //memory related
RequestFeatureSearchPreparation(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SFeatureSearchPreparation * pFeatureSearchPreparation)640 int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
641     const int32_t iNeedFeatureStorage,
642     SFeatureSearchPreparation* pFeatureSearchPreparation) {
643   const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
644   const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME) == ME_FME);
645   const int32_t kiMarginSize = bFme8x8 ? 8 : 16;
646   const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
647   int32_t iListOfFeatureOfBlock;
648 
649   if (0 == kiFeatureStrategyIndex) {
650     iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize;
651   } else {
652     iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize +
653                             (kiFrameWidth - kiMarginSize) * sizeof (uint32_t) + kiFrameWidth * 8 * sizeof (uint8_t);
654   }
655   pFeatureSearchPreparation->pFeatureOfBlock =
656     (uint16_t*)pMa->WelsMallocz (iListOfFeatureOfBlock, "pFeatureOfBlock");
657   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock))
658 
659   pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
660   pFeatureSearchPreparation->bFMESwitchFlag = true;
661   pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
662   pFeatureSearchPreparation->iHighFreMbCount = 0;
663 
664   return ENC_RETURN_SUCCESS;
665 }
ReleaseFeatureSearchPreparation(CMemoryAlign * pMa,uint16_t * & pFeatureOfBlock)666 int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock) {
667   if (pMa && pFeatureOfBlock) {
668     pMa->WelsFree (pFeatureOfBlock, "pFeatureOfBlock");
669     pFeatureOfBlock = NULL;
670     return ENC_RETURN_SUCCESS;
671   }
672   return ENC_RETURN_UNEXPECTED;
673 }
674 
RequestScreenBlockFeatureStorage(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)675 int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
676     const int32_t iNeedFeatureStorage,
677     SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
678 
679   const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
680   const int32_t kiMe8x8FME = iNeedFeatureStorage & 0x0000FF & ME_FME;
681   const int32_t kiMe16x16FME = ((iNeedFeatureStorage & 0x00FF00) >> 8) & ME_FME;
682   if ((kiMe8x8FME == ME_FME) && (kiMe16x16FME == ME_FME)) {
683     return ENC_RETURN_UNSUPPORTED_PARA;
684     //the following memory allocation cannot support when FME at both size
685   }
686 
687   const bool bIsBlock8x8 = (kiMe8x8FME == ME_FME);
688   const int32_t kiMarginSize = bIsBlock8x8 ? 8 : 16;
689   const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
690   const int32_t kiListSize  = (0 == kiFeatureStrategyIndex) ? (bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16) :
691                               256;
692 
693   pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMallocz (kiListSize * sizeof (uint32_t),
694       "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
695   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue)
696 
697   pScreenBlockFeatureStorage->pLocationOfFeature = (uint16_t**)pMa->WelsMallocz (kiListSize * sizeof (uint16_t*),
698       "pScreenBlockFeatureStorage->pLocationOfFeature");
699   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationOfFeature)
700 
701   pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMallocz (2 * kiFrameSize * sizeof (uint16_t),
702       "pScreenBlockFeatureStorage->pLocationPointer");
703   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
704   //  uint16_t* pFeatureValuePointerList[WELS_MAX (LIST_SIZE_SUM_16x16, LIST_SIZE_MSE_16x16)] = {0};
705   pScreenBlockFeatureStorage->pFeatureValuePointerList = (uint16_t**)pMa->WelsMallocz (WELS_MAX (LIST_SIZE_SUM_16x16,
706       LIST_SIZE_MSE_16x16) * sizeof (uint16_t*),
707       "pScreenBlockFeatureStorage->pFeatureValuePointerList");
708   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pFeatureValuePointerList)
709 
710   pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL;
711   pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
712   pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
713   pScreenBlockFeatureStorage->iActualListSize = kiListSize;
714   WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof (uint32_t));
715   pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
716 
717   return ENC_RETURN_SUCCESS;
718 }
ReleaseScreenBlockFeatureStorage(CMemoryAlign * pMa,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)719 int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
720   if (pMa && pScreenBlockFeatureStorage) {
721     if (pScreenBlockFeatureStorage->pTimesOfFeatureValue) {
722       pMa->WelsFree (pScreenBlockFeatureStorage->pTimesOfFeatureValue, "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
723       pScreenBlockFeatureStorage->pTimesOfFeatureValue = NULL;
724     }
725 
726     if (pScreenBlockFeatureStorage->pLocationOfFeature) {
727       pMa->WelsFree (pScreenBlockFeatureStorage->pLocationOfFeature, "pScreenBlockFeatureStorage->pLocationOfFeature");
728       pScreenBlockFeatureStorage->pLocationOfFeature = NULL;
729     }
730 
731     if (pScreenBlockFeatureStorage->pLocationPointer) {
732       pMa->WelsFree (pScreenBlockFeatureStorage->pLocationPointer, "pScreenBlockFeatureStorage->pLocationPointer");
733       pScreenBlockFeatureStorage->pLocationPointer = NULL;
734     }
735 
736     if (pScreenBlockFeatureStorage->pFeatureValuePointerList) {
737       pMa->WelsFree (pScreenBlockFeatureStorage->pFeatureValuePointerList,
738                      "pScreenBlockFeatureStorage->pFeatureValuePointerList");
739       pScreenBlockFeatureStorage->pFeatureValuePointerList = NULL;
740     }
741 
742     return ENC_RETURN_SUCCESS;
743   }
744   return ENC_RETURN_UNEXPECTED;
745 }
746 
747 //preprocess related
SumOf8x8SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)748 int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
749   int32_t iSum = 0, i;
750   for (i = 0; i < 8; i++) {
751     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
752     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
753     pRef += kiRefStride;
754   }
755   return iSum;
756 }
SumOf16x16SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)757 int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
758   int32_t iSum = 0, i;
759   for (i = 0; i < 16; i++) {
760     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
761     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
762     iSum    +=  pRef[8]    + pRef[9]  + pRef[10]  + pRef[11];
763     iSum    +=  pRef[12]  + pRef[13]  + pRef[14]  + pRef[15];
764     pRef += kiRefStride;
765   }
766   return iSum;
767 }
768 
SumOf8x8BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])769 void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
770                              const int32_t kiRefStride,
771                              uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
772   int32_t x, y;
773   uint8_t* pRef;
774   uint16_t* pBuffer;
775   int32_t iSum;
776   for (y = 0; y < kiHeight; y++) {
777     pRef = pRefPicture  + kiRefStride * y;
778     pBuffer  = pFeatureOfBlock + kiWidth * y;
779     for (x = 0; x < kiWidth; x++) {
780       iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride);
781 
782       pBuffer[x] = iSum;
783       pTimesOfFeatureValue[iSum]++;
784     }
785   }
786 }
787 
SumOf16x16BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])788 void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
789                                const int32_t kiRefStride,
790                                uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
791   //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
792   int32_t x, y;
793   uint8_t* pRef;
794   uint16_t* pBuffer;
795   int32_t iSum;
796   for (y = 0; y < kiHeight; y++) {
797     pRef = pRefPicture  + kiRefStride * y;
798     pBuffer  = pFeatureOfBlock + kiWidth * y;
799     for (x = 0; x < kiWidth; x++) {
800       iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride);
801 
802       pBuffer[x] = iSum;
803       pTimesOfFeatureValue[iSum]++;
804     }
805   }
806 }
807 
InitializeHashforFeature_c(uint32_t * pTimesOfFeatureValue,uint16_t * pBuf,const int32_t kiListSize,uint16_t ** pLocationOfFeature,uint16_t ** pFeatureValuePointerList)808 void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
809                                  uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) {
810   //assign location pointer
811   uint16_t* pBufPos  = pBuf;
812   for (int32_t i = 0 ; i < kiListSize; ++i) {
813     pLocationOfFeature[i] =
814       pFeatureValuePointerList[i] = pBufPos;
815     pBufPos      += (pTimesOfFeatureValue[i] << 1);
816   }
817 }
FillQpelLocationByFeatureValue_c(uint16_t * pFeatureOfBlock,const int32_t kiWidth,const int32_t kiHeight,uint16_t ** pFeatureValuePointerList)818 void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
819                                        uint16_t** pFeatureValuePointerList) {
820   //assign each pixel's position
821   uint16_t* pSrcPointer  =  pFeatureOfBlock;
822   int32_t iQpelY = 0;
823   for (int32_t y = 0; y < kiHeight; y++) {
824     for (int32_t x = 0; x < kiWidth; x++) {
825       uint16_t uiFeature = pSrcPointer[x];
826       pFeatureValuePointerList[uiFeature][0] = x << 2;
827       pFeatureValuePointerList[uiFeature][1] = iQpelY;
828       pFeatureValuePointerList[uiFeature] += 2;
829     }
830     iQpelY += 4;
831     pSrcPointer += kiWidth;
832   }
833 }
834 
CalculateFeatureOfBlock(SWelsFuncPtrList * pFunc,SPicture * pRef,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)835 bool CalculateFeatureOfBlock (SWelsFuncPtrList* pFunc, SPicture* pRef,
836                               SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
837   uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
838   uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
839   uint16_t** pLocationOfFeature  = pScreenBlockFeatureStorage->pLocationOfFeature;
840   uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
841 
842   if (NULL == pFeatureOfBlock || NULL == pTimesOfFeatureValue || NULL == pLocationOfFeature || NULL == pBuf
843       || NULL == pRef->pData[0]) {
844     return false;
845   }
846 
847   uint8_t* pRefData = pRef->pData[0];
848   const int32_t iRefStride = pRef->iLineSize[0];
849   int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
850   const int32_t iEdgeDiscard = (iIs16x16 ? 16 : 8); //this is to save complexity of padding on pRef
851   const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
852   const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
853   const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
854 
855   memset (pTimesOfFeatureValue, 0, sizeof (int32_t)*kiActualListSize);
856   (pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16]) (pRefData, iWidth, kiHeight, iRefStride, pFeatureOfBlock,
857       pTimesOfFeatureValue);
858 
859   //assign pLocationOfFeature pointer
860   pFunc->pfInitializeHashforFeature (pTimesOfFeatureValue, pBuf, kiActualListSize,
861                                      pLocationOfFeature, pScreenBlockFeatureStorage->pFeatureValuePointerList);
862 
863   //assign each pixel's pLocationOfFeature
864   pFunc->pfFillQpelLocationByFeatureValue (pFeatureOfBlock, iWidth, kiHeight,
865       pScreenBlockFeatureStorage->pFeatureValuePointerList);
866   return true;
867 }
868 
PerformFMEPreprocess(SWelsFuncPtrList * pFunc,SPicture * pRef,uint16_t * pFeatureOfBlock,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)869 void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock,
870                            SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
871   pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureOfBlock;
872   pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = CalculateFeatureOfBlock (pFunc, pRef,
873       pScreenBlockFeatureStorage);
874 
875   if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
876     uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian (0, pRef->iFrameAverageQp, 51)];
877     uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160)) >> 3);
878     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16;
879     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16 >> 2);
880     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8]
881       = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16]
882         = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX;
883   }
884 }
885 
886 //search related
SetFeatureSearchIn(SWelsFuncPtrList * pFunc,const SWelsME & sMe,const SSlice * pSlice,SScreenBlockFeatureStorage * pRefFeatureStorage,const int32_t kiEncStride,const int32_t kiRefStride,SFeatureSearchIn * pFeatureSearchIn)887 bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc,  const SWelsME& sMe,
888                          const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
889                          const int32_t kiEncStride, const int32_t kiRefStride,
890                          SFeatureSearchIn* pFeatureSearchIn) {
891   pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
892   pFeatureSearchIn->iFeatureOfCurrent = pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16 == sMe.uiBlockSize] (sMe.pEncMb,
893                                         kiEncStride);
894 
895   pFeatureSearchIn->pEnc       = sMe.pEncMb;
896   pFeatureSearchIn->pColoRef = sMe.pColoRefMb;
897   pFeatureSearchIn->iEncStride = kiEncStride;
898   pFeatureSearchIn->iRefStride = kiRefStride;
899   pFeatureSearchIn->uiSadCostThresh = sMe.uiSadCostThreshold;
900 
901   pFeatureSearchIn->iCurPixX = sMe.iCurMeBlockPixX;
902   pFeatureSearchIn->iCurPixXQpel = (pFeatureSearchIn->iCurPixX << 2);
903   pFeatureSearchIn->iCurPixY = sMe.iCurMeBlockPixY;
904   pFeatureSearchIn->iCurPixYQpel = (pFeatureSearchIn->iCurPixY << 2);
905 
906   pFeatureSearchIn->pTimesOfFeature = pRefFeatureStorage->pTimesOfFeatureValue;
907   pFeatureSearchIn->pQpelLocationOfFeature = pRefFeatureStorage->pLocationOfFeature;
908   pFeatureSearchIn->pMvdCostX = sMe.pMvdCost - pFeatureSearchIn->iCurPixXQpel - sMe.sMvp.iMvX;
909   pFeatureSearchIn->pMvdCostY = sMe.pMvdCost - pFeatureSearchIn->iCurPixYQpel - sMe.sMvp.iMvY;
910 
911   pFeatureSearchIn->iMinQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMin.iMvX) * (1 << 2));
912   pFeatureSearchIn->iMinQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMin.iMvY) * (1 << 2));
913   pFeatureSearchIn->iMaxQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMax.iMvX) * (1 << 2));
914   pFeatureSearchIn->iMaxQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMax.iMvY) * (1 << 2));
915 
916   if (NULL == pFeatureSearchIn->pSad || NULL == pFeatureSearchIn->pTimesOfFeature
917       || NULL == pFeatureSearchIn->pQpelLocationOfFeature) {
918     return false;
919   }
920   return true;
921 }
SaveFeatureSearchOut(const SMVUnitXY sBestMv,const uint32_t uiBestSadCost,uint8_t * pRef,SFeatureSearchOut * pFeatureSearchOut)922 void SaveFeatureSearchOut (const SMVUnitXY sBestMv, const uint32_t uiBestSadCost, uint8_t* pRef,
923                            SFeatureSearchOut* pFeatureSearchOut) {
924   pFeatureSearchOut->sBestMv = sBestMv;
925   pFeatureSearchOut->uiBestSadCost = uiBestSadCost;
926   pFeatureSearchOut->pBestRef = pRef;
927 }
928 
FeatureSearchOne(SFeatureSearchIn & sFeatureSearchIn,const int32_t iFeatureDifference,const uint32_t kuiExpectedSearchTimes,SFeatureSearchOut * pFeatureSearchOut)929 bool FeatureSearchOne (SFeatureSearchIn& sFeatureSearchIn, const int32_t iFeatureDifference,
930                        const uint32_t kuiExpectedSearchTimes,
931                        SFeatureSearchOut* pFeatureSearchOut) {
932   const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference);
933   if (iFeatureOfRef < 0 || iFeatureOfRef >= LIST_SIZE)
934     return true;
935 
936   PSampleSadSatdCostFunc pSad = sFeatureSearchIn.pSad;
937   uint8_t* pEnc =  sFeatureSearchIn.pEnc;
938   uint8_t* pColoRef = sFeatureSearchIn.pColoRef;
939   const int32_t iEncStride =  sFeatureSearchIn.iEncStride;
940   const int32_t iRefStride =  sFeatureSearchIn.iRefStride;
941   const uint16_t uiSadCostThresh = sFeatureSearchIn.uiSadCostThresh;
942 
943   const int32_t iCurPixX = sFeatureSearchIn.iCurPixX;
944   const int32_t iCurPixY = sFeatureSearchIn.iCurPixY;
945   const int32_t iCurPixXQpel = sFeatureSearchIn.iCurPixXQpel;
946   const int32_t iCurPixYQpel = sFeatureSearchIn.iCurPixYQpel;
947 
948   const int32_t iMinQpelX =  sFeatureSearchIn.iMinQpelX;
949   const int32_t iMinQpelY =  sFeatureSearchIn.iMinQpelY;
950   const int32_t iMaxQpelX =  sFeatureSearchIn.iMaxQpelX;
951   const int32_t iMaxQpelY =  sFeatureSearchIn.iMaxQpelY;
952 
953   const int32_t iSearchTimes = WELS_MIN (sFeatureSearchIn.pTimesOfFeature[iFeatureOfRef], kuiExpectedSearchTimes);
954   const int32_t iSearchTimesx2 = (iSearchTimes << 1);
955   const uint16_t* pQpelPosition = sFeatureSearchIn.pQpelLocationOfFeature[iFeatureOfRef];
956 
957   SMVUnitXY sBestMv;
958   uint32_t uiBestCost, uiTmpCost;
959   uint8_t* pBestRef, *pCurRef;
960   int32_t iQpelX, iQpelY;
961   int32_t iIntepelX, iIntepelY;
962   int32_t i;
963 
964   sBestMv.iMvX = pFeatureSearchOut->sBestMv.iMvX;
965   sBestMv.iMvY = pFeatureSearchOut->sBestMv.iMvY;
966   uiBestCost = pFeatureSearchOut->uiBestSadCost;
967   pBestRef = pFeatureSearchOut->pBestRef;
968 
969   for (i = 0; i < iSearchTimesx2; i += 2) {
970     iQpelX = pQpelPosition[i];
971     iQpelY = pQpelPosition[i + 1];
972 
973     if ((iQpelX > iMaxQpelX) || (iQpelX < iMinQpelX)
974         || (iQpelY > iMaxQpelY) || (iQpelY < iMinQpelY)
975         || (iQpelX == iCurPixXQpel) || (iQpelY == iCurPixYQpel))
976       continue;
977 
978     uiTmpCost = sFeatureSearchIn.pMvdCostX[ iQpelX ] + sFeatureSearchIn.pMvdCostY[ iQpelY ];
979     if (uiTmpCost + iFeatureDifference >= uiBestCost)
980       continue;
981 
982     iIntepelX = (iQpelX >> 2) - iCurPixX;
983     iIntepelY = (iQpelY >> 2) - iCurPixY;
984     pCurRef = &pColoRef[iIntepelX + iIntepelY * iRefStride];
985     uiTmpCost += pSad (pEnc, iEncStride, pCurRef, iRefStride);
986     if (uiTmpCost < uiBestCost) {
987       sBestMv.iMvX = iIntepelX;
988       sBestMv.iMvY = iIntepelY;
989       uiBestCost = uiTmpCost;
990       pBestRef = pCurRef;
991 
992       if (uiBestCost < uiSadCostThresh)
993         break;
994     }
995   }
996   SaveFeatureSearchOut (sBestMv, uiBestCost, pBestRef, pFeatureSearchOut);
997   return (i < iSearchTimesx2);
998 }
999 
1000 
MotionEstimateFeatureFullSearch(SFeatureSearchIn & sFeatureSearchIn,const uint32_t kuiMaxSearchPoint,SWelsME * pMe)1001 void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn,
1002                                       const uint32_t kuiMaxSearchPoint,
1003                                       SWelsME* pMe) {
1004   SFeatureSearchOut sFeatureSearchOut = { { 0 } };//TODO: this can be refactored and removed
1005   sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost;
1006   sFeatureSearchOut.sBestMv = pMe->sMv;
1007   sFeatureSearchOut.pBestRef = pMe->pRefMb;
1008 
1009   int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed
1010   FeatureSearchOne (sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut);
1011   if (sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost) {  //TODO: this may be refactored and removed
1012     UpdateMeResults (sFeatureSearchOut.sBestMv,
1013                      sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef,
1014                      pMe);
1015   }
1016 }
1017 
1018 //switch related
CountFMECostDown(const SDqLayer * pCurLayer)1019 static uint32_t CountFMECostDown (const SDqLayer* pCurLayer) {
1020   uint32_t uiCostDownSum      = 0;
1021   const int32_t kiSliceCount  = GetCurrentSliceNum (pCurLayer);
1022   if (kiSliceCount >= 1) {
1023     int32_t iSliceIndex  = 0;
1024     SSlice* pSlice    = pCurLayer->ppSliceInLayer[iSliceIndex];
1025     while (iSliceIndex < kiSliceCount) {
1026       pSlice        = pCurLayer->ppSliceInLayer[iSliceIndex];
1027       uiCostDownSum += pSlice->uiSliceFMECostDown;
1028       ++ iSliceIndex;
1029     }
1030   }
1031   return uiCostDownSum;
1032 }
1033 #define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
1034 #define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown,uint8_t & uiFMEGoodFrameCount)1035 static void UpdateFMEGoodFrameCount (const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
1036   //this strategy may be changed, here the number is derived from empirical-numbers
1037   // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
1038   if (iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD) {
1039     if (uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX)
1040       ++ uiFMEGoodFrameCount;
1041   } else {
1042     if (uiFMEGoodFrameCount > 0)
1043       -- uiFMEGoodFrameCount;
1044   }
1045 }
UpdateFMESwitch(SDqLayer * pCurLayer)1046 void UpdateFMESwitch (SDqLayer* pCurLayer) {
1047   const uint32_t iFMECost = CountFMECostDown (pCurLayer);
1048   const uint32_t iAvMBNormalizedRDcostDown  = iFMECost / (pCurLayer->iMbWidth * pCurLayer->iMbHeight);
1049   UpdateFMEGoodFrameCount (iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount);
1050 }
UpdateFMESwitchNull(SDqLayer * pCurLayer)1051 void UpdateFMESwitchNull (SDqLayer* pCurLayer) {
1052 }
1053 /////////////////////////
1054 // Search function options
1055 /////////////////////////
WelsDiamondCrossSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1056 void WelsDiamondCrossSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1057                              const int32_t kiRefStride) {
1058   //  Step 1: diamond search
1059   WelsDiamondSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1060 
1061   //  Step 2: CROSS search
1062   pMe->uiSadCostThreshold = pMe->pRefFeatureStorage->uiSadCostThreshold[pMe->uiBlockSize];
1063   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1064     WelsMotionCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1065   }
1066 }
WelsDiamondCrossFeatureSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1067 void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1068                                     const int32_t kiRefStride) {
1069   //  Step 1: diamond search + cross
1070   WelsDiamondCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1071 
1072   // Step 2: FeatureSearch
1073   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1074     pSlice->uiSliceFMECostDown += pMe->uiSadCost;
1075 
1076     uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting
1077     SFeatureSearchIn sFeatureSearchIn = {0};
1078     if (SetFeatureSearchIn (pFunc, *pMe, pSlice, pMe->pRefFeatureStorage,
1079                             kiEncStride, kiRefStride,
1080                             &sFeatureSearchIn)) {
1081       MotionEstimateFeatureFullSearch (sFeatureSearchIn, uiMaxSearchPoint, pMe);
1082     }
1083     pSlice->uiSliceFMECostDown -= pMe->uiSadCost;
1084   }
1085 }
1086 
1087 
1088 } // namespace WelsEnc
1089 
1090