1 
2 /*!
3  ************************************************************************
4  *
5  * \file me_umhex.c
6  *
7  * \brief
8  *   Fast integer pel motion estimation and fractional pel motion estimation
9  *   algorithms are described in this file.
10  *   1. UMHEX_get_mem() and UMHEX_free_mem() are functions for allocation and release
11  *      of memories about motion estimation
12  *   2. UMHEX_BlockMotionSearch() is the function for fast integer pel motion
13  *      estimation and fractional pel motion estimation
14  *   3. UMHEX_DefineThreshold() defined thresholds for early termination
15  * \author
16  *    Main contributors: (see contributors.h for copyright, address and affiliation details)
17  *    - Zhibo Chen         <chenzhibo@tsinghua.org.cn>
18  *    - JianFeng Xu        <fenax@video.mdc.tsinghua.edu.cn>
19  *    - Wenfang Fu         <fwf@video.mdc.tsinghua.edu.cn>
20  *    - Xiaozhong Xu       <xxz@video.mdc.tsinghua.edu.cn>
21  * \date
22  *    2006.1
23  ************************************************************************
24  */
25 
26 #include <limits.h>
27 
28 #include "global.h"
29 #include "memalloc.h"
30 #include "me_umhex.h"
31 #include "refbuf.h"
32 #include "mb_access.h"
33 #include "image.h"
34 #include "enc_statistics.h"
35 #include "macroblock.h"
36 #include "me_distortion.h"
37 #include "mv_search.h"
38 #include "me_fullsearch.h"
39 
40 #define Q_BITS          15
41 #define MIN_IMG_WIDTH   176
42 
43 static const MotionVector Diamond[4] = {{-4, 0}, {4, 0}, {0, -4}, {0, 4}};
44 static const MotionVector Hexagon[6] = {{-8, 0}, {8, 0},{-4, -8}, {4, 8}, {-4, 8}, {4 , -8}};
45 static const short Big_Hexagon_X[16] = {0,-8, -16,-16,-16, -16, -16, -8,  0,  8,  16, 16, 16, 16, 16, 8};
46 static const short Big_Hexagon_Y[16] = {8, 12, 8,  4, 0, -4, -8, -12, -16, -12, -8, -4, 0, 4, 8, 12};
47 
48 static const int   Multi_Ref_Thd[8]    = {0,  300,  120,  120,  60,  30,   30,  15};
49 static const int   Big_Hexagon_Thd[8]  = {0, 3000, 1500, 1500, 800, 400,  400, 200};
50 static const int   Median_Pred_Thd[8]  = {0,  750,  350,  350, 170,  80,   80,  40};
51 static const int   Threshold_DSR[8]    = {0, 2200, 1000, 1000, 500, 250,  250, 120};
52 
53 
UMHEX_DefineThreshold(VideoParameters * p_Vid)54 void UMHEX_DefineThreshold(VideoParameters *p_Vid)
55 {
56   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
57 
58   p_UMHex->AlphaFourth_1[1] = 0.01f;
59   p_UMHex->AlphaFourth_1[2] = 0.01f;
60   p_UMHex->AlphaFourth_1[3] = 0.01f;
61   p_UMHex->AlphaFourth_1[4] = 0.02f;
62   p_UMHex->AlphaFourth_1[5] = 0.03f;
63   p_UMHex->AlphaFourth_1[6] = 0.03f;
64   p_UMHex->AlphaFourth_1[7] = 0.04f;
65 
66   p_UMHex->AlphaFourth_2[1] = 0.06f;
67   p_UMHex->AlphaFourth_2[2] = 0.07f;
68   p_UMHex->AlphaFourth_2[3] = 0.07f;
69   p_UMHex->AlphaFourth_2[4] = 0.08f;
70   p_UMHex->AlphaFourth_2[5] = 0.12f;
71   p_UMHex->AlphaFourth_2[6] = 0.11f;
72   p_UMHex->AlphaFourth_2[7] = 0.15f;
73 
74   p_UMHex->BlockType_LUT[0][0] = 7; // 4x4
75   p_UMHex->BlockType_LUT[0][1] = 6; // 4x8
76   p_UMHex->BlockType_LUT[1][0] = 5; // 8x4
77   p_UMHex->BlockType_LUT[1][1] = 4; // 8x8
78   p_UMHex->BlockType_LUT[1][3] = 3; // 8x16
79   p_UMHex->BlockType_LUT[3][1] = 2; // 16x8
80   p_UMHex->BlockType_LUT[3][3] = 1; // 16x16
81 
82   return;
83 }
84 /*!
85 ************************************************************************
86 * \brief
87 *    Set MB thresholds for fast motion estimation
88 *    Those thresholds may be adjusted to trade off rate-distortion
89 *    performance and UMHEX speed
90 ************************************************************************
91 */
92 
UMHEX_DefineThresholdMB(VideoParameters * p_Vid,InputParameters * p_Inp)93 void UMHEX_DefineThresholdMB(VideoParameters *p_Vid, InputParameters *p_Inp)
94 {
95   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
96   int gb_qp_per    = (p_Inp->qp[P_SLICE])/6;
97   int gb_qp_rem    = (p_Inp->qp[P_SLICE])%6;
98 
99   int gb_q_bits    = Q_BITS+gb_qp_per;
100   int gb_qp_const,Thresh4x4;
101 
102   float Quantize_step;
103   int i;
104   // scale factor: defined for different image sizes
105   float scale_factor = (float)((1-p_Inp->UMHexScale*0.1)+p_Inp->UMHexScale*0.1*(p_Vid->width/MIN_IMG_WIDTH));
106   // QP factor: defined for different quantization steps
107   float QP_factor = (float)((1.0-0.90*(p_Inp->qp[P_SLICE]/51.0f)));
108   distblk dbScalar = dist_scale(1);
109 
110   gb_qp_const=(1<<gb_q_bits)/6;
111   Thresh4x4 =   ((1<<gb_q_bits) - gb_qp_const)/imax(1, p_Vid->p_Quant->q_params_4x4[0][1][gb_qp_rem][0][0].ScaleComp);
112   Quantize_step = Thresh4x4/(4*5.61f)*2.0f*scale_factor;
113   p_UMHex->Bsize[7]=(16*16)*Quantize_step*dbScalar;
114 
115   p_UMHex->Bsize[6] = p_UMHex->Bsize[7]*4*dbScalar;
116   p_UMHex->Bsize[5] = p_UMHex->Bsize[7]*4*dbScalar;
117   p_UMHex->Bsize[4] = p_UMHex->Bsize[5]*4*dbScalar;
118   p_UMHex->Bsize[3] = p_UMHex->Bsize[4]*4*dbScalar;
119   p_UMHex->Bsize[2] = p_UMHex->Bsize[4]*4*dbScalar;
120   p_UMHex->Bsize[1] = p_UMHex->Bsize[2]*4*dbScalar;
121 
122   for(i=1;i<8;i++)
123   {
124     //ET_Thd1: early termination after median prediction
125     p_UMHex->Median_Pred_Thd_MB[i]  = (distblk) (Median_Pred_Thd[i]* scale_factor*QP_factor*dbScalar);
126     //ET_thd2: early termination after every circle of 16 points Big-Hex Search
127     p_UMHex->Big_Hexagon_Thd_MB[i]  = (distblk) (Big_Hexagon_Thd[i]* scale_factor*QP_factor*dbScalar);
128     //threshold for multi ref case
129     p_UMHex->Multi_Ref_Thd_MB[i]    = (distblk) (Multi_Ref_Thd[i]  * scale_factor*QP_factor*dbScalar);
130     //threshold for usage of DSR technique. DSR ref to JVT-R088
131     p_UMHex->Threshold_DSR_MB[i]    = (distblk) (Threshold_DSR[i]  * scale_factor*QP_factor*dbScalar);
132   }
133 }
134 
135 /*!
136 ************************************************************************
137 * \brief
138 *    Allocation of space for fast motion estimation
139 ************************************************************************
140 */
UMHEX_get_mem(VideoParameters * p_Vid,InputParameters * p_Inp)141 int UMHEX_get_mem(VideoParameters *p_Vid, InputParameters *p_Inp)
142 {
143   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
144 
145   int memory_size = 0;
146   int search_range = p_Inp->SearchMode[0] == UM_HEX ? p_Inp->search_range[0] : p_Inp->search_range[1];
147 
148   if (NULL==(p_UMHex->flag_intra = calloc ((p_Vid->width>>4)+1,sizeof(byte)))) no_mem_exit("UMHEX_get_mem: p_UMHex->flag_intra"); //fwf 20050330
149 
150   memory_size += get_mem2D(&p_UMHex->McostState, 2*search_range+1, 2*search_range+1);
151 
152   memory_size += get_mem4Ddistblk(&(p_UMHex->fastme_ref_cost), p_Vid->max_num_references, 9, 4, 4);
153   memory_size += get_mem3Ddistblk(&(p_UMHex->fastme_l0_cost), 9, p_Vid->height >> 2, p_Vid->width >> 2);
154   memory_size += get_mem3Ddistblk(&(p_UMHex->fastme_l1_cost), 9, p_Vid->height >> 2, p_Vid->width >> 2);
155   memory_size += get_mem2Ddistblk(&(p_UMHex->fastme_best_cost), 7, p_Vid->width >> 2);
156 
157   memory_size += get_mem2D(&p_UMHex->SearchState, 7, 7);
158   if(p_Inp->BiPredMotionEstimation == 1)//memory allocation for bipred mode
159   {
160     memory_size += get_mem3Ddistblk(&(p_UMHex->fastme_l0_cost_bipred), 9, p_Vid->height >> 2, p_Vid->width >> 2);//for bipred
161     memory_size += get_mem3Ddistblk(&(p_UMHex->fastme_l1_cost_bipred), 9, p_Vid->height >> 2, p_Vid->width >> 2);//for bipred
162   }
163 
164   return memory_size;
165 }
166 
167 /*!
168 ************************************************************************
169 * \brief
170 *    Free space for fast motion estimation
171 ************************************************************************
172 */
UMHEX_free_mem(VideoParameters * p_Vid,InputParameters * p_Inp)173 void UMHEX_free_mem(VideoParameters *p_Vid, InputParameters *p_Inp)
174 {
175   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
176   free_mem2D(p_UMHex->McostState);
177 
178   free_mem4Ddistblk(p_UMHex->fastme_ref_cost);
179   free_mem3Ddistblk(p_UMHex->fastme_l0_cost );
180   free_mem3Ddistblk(p_UMHex->fastme_l1_cost);
181   free_mem2Ddistblk(p_UMHex->fastme_best_cost);
182 
183   free_mem2D(p_UMHex->SearchState);
184   free (p_UMHex->flag_intra);
185   if(p_Inp->BiPredMotionEstimation == 1)
186   {
187     free_mem3Ddistblk(p_UMHex->fastme_l0_cost_bipred);//for bipred
188     free_mem3Ddistblk(p_UMHex->fastme_l1_cost_bipred);//for bipred
189   }
190   free(p_UMHex);
191 }
192 
193 /*!
194 ************************************************************************
195 * \brief
196 *    UMHEXIntegerPelBlockMotionSearch: fast pixel block motion search
197 *    this algorithm is called UMHexagonS(see JVT-D016),which includes
198 *    four steps with different kinds of search patterns
199 * \par Input:
200 * imgpel*   orig_pic,     // <--  original picture
201 * int       ref,          // <--  reference frame (0... or -1 (backward))
202 * int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
203 * int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
204 * int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
205 * int       pred_mv[2],   // <--  motion vector predictor (x|y) in sub-pel units
206 * MotionVector   *mv,        //  --> motion vector (x|y) - in sub-pel units
207 * int       search_range, // <--  1-d search range in sub-pel units
208 * int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
209 * int       lambda_factor // <--  lagrangian parameter for determining motion cost
210 * \par
211 * Two macro definitions defined in this program:
212 * 1. EARLY_TERMINATION: early termination algrithm, refer to JVT-D016.doc
213 * 2. SEARCH_ONE_PIXEL: search one pixel in search range
214 * \author
215 *   Main contributors: (see contributors.h for copyright, address and affiliation details)
216 *   - Zhibo Chen         <chenzhibo@tsinghua.org.cn>
217 *   - JianFeng Xu        <fenax@video.mdc.tsinghua.edu.cn>
218 *   - Xiaozhong Xu       <xxz@video.mdc.tsinghua.edu.cn>
219 * \date   :
220 *   2006.1
221 ************************************************************************
222 */
223 distblk                                     //  ==> minimum motion cost after search
UMHEXIntegerPelBlockMotionSearch(Macroblock * currMB,MotionVector * pred_mv,MEBlock * mv_block,distblk min_mcost,int lambda_factor)224 UMHEXIntegerPelBlockMotionSearch  (Macroblock *currMB,     // <--  current Macroblock
225                                    MotionVector *pred_mv,    // < <--  motion vector predictor (x|y) in sub-pel units
226                                    MEBlock *mv_block,
227                                    distblk     min_mcost,     // < <--  minimum motion cost (cost for center or huge value)
228                                    int       lambda_factor  // < <--  lagrangian parameter for determining motion cost
229                                    )
230 {
231   Slice *currSlice = currMB->p_Slice;
232   VideoParameters *p_Vid = currMB->p_Vid;
233   InputParameters *p_Inp = currMB->p_Inp;
234   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
235 
236   int   blocktype     = mv_block->blocktype;
237   short blocksize_x   = mv_block->blocksize_x;  // horizontal block size
238   short blocksize_y   = mv_block->blocksize_y;  // vertical block size
239   short pic_pix_x2    = mv_block->pos_x2;
240   short block_x       = mv_block->block_x;
241   short block_y       = mv_block->block_y;
242 
243   int   list = mv_block->list;
244   int   cur_list = list + currMB->list_offset;
245   short ref = mv_block->ref_idx;
246   StorablePicture *ref_picture = currSlice->listX[cur_list][ref];
247 
248   MotionVector *mv = &mv_block->mv[list];
249   MotionVector iMinNow, cand, center, pred, best = {0, 0};
250 
251   int   search_step;
252   int   pos;
253   distblk mcost;
254   distblk   *SAD_prediction = p_UMHex->fastme_best_cost[blocktype-1];//multi ref SAD prediction
255   int   i, j, m;
256   float betaFourth_1,betaFourth_2;
257   int  temp_Big_Hexagon_X[16];//  temp for Big_Hexagon_X;
258   int  temp_Big_Hexagon_Y[16];//  temp for Big_Hexagon_Y;
259   distblk ET_Thred = p_UMHex->Median_Pred_Thd_MB[blocktype];//ET threshold in use
260   int  search_range = mv_block->searchRange.max_x >> 2;
261 
262   short pic_pix_x = mv_block->pos_x_padded;
263   short pic_pix_y = mv_block->pos_y_padded;
264   pred.mv_x   = pic_pix_x + pred_mv->mv_x;       // predicted position x (in sub-pel units)
265   pred.mv_y   = pic_pix_y + pred_mv->mv_y;       // predicted position y (in sub-pel units)
266   center.mv_x = pic_pix_x + mv->mv_x;            // center position x (in sub-pel units)
267   center.mv_y = pic_pix_y + mv->mv_y;            // center position y (in sub-pel units)
268 
269 
270 
271   //////allocate memory for search state//////////////////////////
272   memset(p_UMHex->McostState[0],0,(2*p_Inp->search_range[p_Vid->view_id]+1)*(2*p_Inp->search_range[p_Vid->view_id]+1));
273 
274 
275   //check the center median predictor
276 
277   cand = center;
278   mcost = mv_cost (p_Vid, lambda_factor, &cand, &pred);
279 
280   mcost += mv_block->computePredFPel(ref_picture, mv_block, min_mcost - mcost, &cand);
281 
282   p_UMHex->McostState[search_range][search_range] = 1;
283   if (mcost < min_mcost)
284   {
285     min_mcost = mcost;
286     best = cand;
287   }
288 
289   iMinNow = best;
290 
291   for (m = 0; m < 4; m++)
292   {
293     cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
294     cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
295     SEARCH_ONE_PIXEL
296   }
297 
298   if(center.mv_x != pic_pix_x || center.mv_y != pic_pix_y)
299   {
300     cand.mv_x = pic_pix_x ;
301     cand.mv_y = pic_pix_y ;
302     SEARCH_ONE_PIXEL
303       iMinNow = best;
304     for (m = 0; m < 4; m++)
305     {
306       cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
307       cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
308       SEARCH_ONE_PIXEL
309     }
310   }
311   /***********************************init process*************************/
312   //for multi ref
313   if(ref>0 && currSlice->structure == FRAME  && min_mcost > ET_Thred && SAD_prediction[pic_pix_x2] < p_UMHex->Multi_Ref_Thd_MB[blocktype])
314     goto terminate_step;
315 
316   //ET_Thd1: early termination for low motion case
317   if( min_mcost < ET_Thred)
318   {
319     goto terminate_step;
320   }
321   else // hybrid search for main search loop
322   {
323     /****************************(MV and SAD prediction)********************************/
324     UMHEX_setup(currMB, ref, mv_block->list, block_y, block_x, blocktype, currSlice->all_mv );
325     ET_Thred = p_UMHex->Big_Hexagon_Thd_MB[blocktype];  // ET_Thd2: early termination Threshold for strong motion
326 
327     // Threshold defined for EARLY_TERMINATION
328     if (p_UMHex->pred_SAD == 0)
329     {
330       betaFourth_1=0;
331       betaFourth_2=0;
332     }
333     else
334     {
335       betaFourth_1 = p_UMHex->Bsize[blocktype]/((float)p_UMHex->pred_SAD * p_UMHex->pred_SAD)-p_UMHex->AlphaFourth_1[blocktype];
336       betaFourth_2 = p_UMHex->Bsize[blocktype]/((float)p_UMHex->pred_SAD * p_UMHex->pred_SAD)-p_UMHex->AlphaFourth_2[blocktype];
337 
338     }
339     /*********************************************end of init ***********************************************/
340   }
341   // first_step: initial start point prediction
342 
343   if(blocktype>1)
344   {
345     cand.mv_x = (short) (pic_pix_x + (p_UMHex->pred_MV_uplayer[0] / 4) * 4);
346     cand.mv_y = (short) (pic_pix_y + (p_UMHex->pred_MV_uplayer[1] / 4) * 4);
347     SEARCH_ONE_PIXEL
348   }
349 
350 
351   //prediction using mV of last ref moiton vector
352   if(p_UMHex->pred_MV_ref_flag == 1)      //Notes: for interlace case, ref==1 should be added
353   {
354     cand.mv_x = (short) (pic_pix_x + (p_UMHex->pred_MV_ref[0] / 4) * 4);
355     cand.mv_y = (short) (pic_pix_y + (p_UMHex->pred_MV_ref[1] / 4) * 4);
356     SEARCH_ONE_PIXEL
357   }
358   // Small local search
359   iMinNow = best;
360   for (m = 0; m < 4; m++)
361   {
362     cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
363     cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
364     SEARCH_ONE_PIXEL
365   }
366 
367   //early termination algorithm, refer to JVT-G016
368   EARLY_TERMINATION
369 
370     if(blocktype>6)
371       goto fourth_1_step;
372     else
373       goto sec_step;
374 
375 sec_step: //Unsymmetrical-cross search
376   iMinNow = best;
377 
378   for(i = 4; i < search_range << 2; i+=8)
379   {
380     search_step = i;
381     cand.mv_x = (short) (iMinNow.mv_x + search_step);
382     cand.mv_y = iMinNow.mv_y ;
383     SEARCH_ONE_PIXEL
384       cand.mv_x = (short) (iMinNow.mv_x - search_step);
385     cand.mv_y = iMinNow.mv_y ;
386     SEARCH_ONE_PIXEL
387   }
388   for(i = 4; i < (search_range << 1);i+=8)
389   {
390     search_step = i;
391     cand.mv_x = iMinNow.mv_x ;
392     cand.mv_y = (short) (iMinNow.mv_y + search_step);
393     SEARCH_ONE_PIXEL
394       cand.mv_x = iMinNow.mv_x ;
395     cand.mv_y = (short) (iMinNow.mv_y - search_step);
396     SEARCH_ONE_PIXEL
397   }
398 
399 
400   //early termination alogrithm, refer to JVT-G016
401   EARLY_TERMINATION
402 
403     iMinNow = best;
404 
405   //third_step:    // Uneven Multi-Hexagon-grid Search
406   //sub step 1: 5x5 squre search
407   for(pos=1;pos<25;pos++)
408   {
409     cand.mv_x = iMinNow.mv_x + p_Vid->spiral_qpel_search[pos].mv_x;
410     cand.mv_y = iMinNow.mv_y + p_Vid->spiral_qpel_search[pos].mv_y;
411     SEARCH_ONE_PIXEL
412   }
413 
414   //early termination alogrithm, refer to JVT-G016
415   EARLY_TERMINATION
416 
417     //sub step 2:  Multi-Hexagon-grid search
418     memcpy(temp_Big_Hexagon_X,Big_Hexagon_X,64);
419   memcpy(temp_Big_Hexagon_Y,Big_Hexagon_Y,64);
420   for(i=1;i<=(search_range >> 2); i++)
421   {
422 
423     for (m = 0; m < 16; m++)
424     {
425       cand.mv_x = (short) (iMinNow.mv_x + temp_Big_Hexagon_X[m]);
426       cand.mv_y = (short) (iMinNow.mv_y + temp_Big_Hexagon_Y[m]);
427       temp_Big_Hexagon_X[m] += Big_Hexagon_X[m];
428       temp_Big_Hexagon_Y[m] += Big_Hexagon_Y[m];
429 
430       SEARCH_ONE_PIXEL
431     }
432     // ET_Thd2: early termination Threshold for strong motion
433     if(min_mcost < ET_Thred)
434     {
435       goto terminate_step;
436     }
437   }
438 
439 
440   //fourth_step:  //Extended Hexagon-based Search
441   // the fourth step with a small search pattern
442 fourth_1_step:  //sub step 1: small Hexagon search
443   for(i = 0; i < search_range; i++)
444   {
445     iMinNow = best;
446     for (m = 0; m < 6; m++)
447     {
448       cand.mv_x = iMinNow.mv_x + Hexagon[m].mv_x;
449       cand.mv_y = iMinNow.mv_y + Hexagon[m].mv_y;
450       SEARCH_ONE_PIXEL
451     }
452 
453     if (best.mv_x == iMinNow.mv_x && best.mv_y == iMinNow.mv_y)
454     {
455       break;
456     }
457   }
458 fourth_2_step: //sub step 2: small Diamond search
459 
460   for(i = 0; i < search_range; i++)
461   {
462     iMinNow = best;
463     for (m = 0; m < 4; m++)
464     {
465       cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
466       cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
467       SEARCH_ONE_PIXEL
468     }
469     if(best.mv_x == iMinNow.mv_x && best.mv_y == iMinNow.mv_y)
470       break;
471   }
472 
473 terminate_step:
474 
475   // store SAD infomation for prediction
476   //FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3
477   for (i=0; i < (blocksize_x>>2); i++)
478   {
479     for (j=0; j < (blocksize_y>>2); j++)
480     {
481       if(mv_block->list == 0)
482       {
483         p_UMHex->fastme_ref_cost[ref][blocktype][block_y+j][block_x+i] = min_mcost;
484         if (ref==0)
485           p_UMHex->fastme_l0_cost[blocktype][(currMB->block_y)+block_y+j][(currMB->block_x)+block_x+i] = min_mcost;
486       }
487       else
488       {
489         p_UMHex->fastme_l1_cost[blocktype][(currMB->block_y)+block_y+j][(currMB->block_x)+block_x+i] = min_mcost;
490       }
491     }
492   }
493   //for multi ref SAD prediction
494   if ((ref==0) || (SAD_prediction[pic_pix_x2] > min_mcost))
495     SAD_prediction[pic_pix_x2] = min_mcost;
496 
497   mv->mv_x = (short) (best.mv_x - pic_pix_x);
498   mv->mv_y = (short) (best.mv_y - pic_pix_y);
499   return min_mcost;
500 }
501 
502 distblk                                                   //  ==> minimum motion cost after search
UMHEXSubPelBlockMotionSearch(Macroblock * currMB,MotionVector * pred_mv,MEBlock * mv_block,distblk min_mcost,int lambda_factor)503 UMHEXSubPelBlockMotionSearch (Macroblock *currMB,     // <--  current Macroblock
504                               MotionVector *pred_mv,    // < <--  motion vector predictor (x|y) in sub-pel units
505                               MEBlock *mv_block,
506                               distblk     min_mcost,     // <--  minimum motion cost (cost for center or huge value)
507                               int       lambda_factor  // <--  lagrangian parameter for determining motion cost
508                               )
509 {
510   VideoParameters *p_Vid = currMB->p_Vid;
511   Slice *currSlice = currMB->p_Slice;
512   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
513   static const MotionVector DiamondQ[4] = {{-1, 0}, { 0, 1}, { 1, 0}, { 0, -1}};
514   distblk mcost;
515   MotionVector cand, iMinNow, currmv = {0, 0}, cand_pad;
516 
517   int   list          = mv_block->list;
518   int   list_offset   = currMB->list_offset;
519   short ref = mv_block->ref_idx;
520   MotionVector *mv    = &mv_block->mv[list];
521 
522   StorablePicture *ref_picture = currSlice->listX[list+list_offset][ref];
523 
524   int   dynamic_search_range = 3, i;
525   int   m;
526   int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
527 
528   //int   pred_frac_up_mv_x, pred_frac_up_mv_y;
529 
530   pred_frac_mv_x = (pred_mv->mv_x - mv->mv_x) & 0x03;
531   pred_frac_mv_y = (pred_mv->mv_y - mv->mv_y) & 0x03;
532 
533   //pred_frac_up_mv_x = (p_UMHex->pred_MV_uplayer[0] - mv->mv_x) & 0x03;
534   //pred_frac_up_mv_y = (p_UMHex->pred_MV_uplayer[1] - mv->mv_y) & 0x03;
535 
536 
537   memset(p_UMHex->SearchState[0], 0,(2 * dynamic_search_range + 1)*(2 * dynamic_search_range + 1));
538 
539   if( !p_Vid->start_me_refinement_hp )
540   {
541     p_UMHex->SearchState[dynamic_search_range][dynamic_search_range] = 1;
542     cand = *mv;
543     mcost = mv_cost (p_Vid, lambda_factor, &cand, pred_mv);
544     cand_pad = pad_MVs (cand, mv_block); //cand = pad_MVs (cand, mv_block);
545     mcost += mv_block->computePredQPel( ref_picture, mv_block, min_mcost - mcost, &cand_pad); //&cand);
546 
547     if (mcost < min_mcost)
548     {
549       min_mcost = mcost;
550       currmv = cand;
551     }
552   }
553   else
554   {
555     p_UMHex->SearchState[dynamic_search_range][dynamic_search_range] = 1;
556     currmv = *mv;
557   }
558 
559   if(pred_frac_mv_x!=0 || pred_frac_mv_y!=0)
560   {
561     cand.mv_x = (short) (mv->mv_x + pred_frac_mv_x);
562     cand.mv_y = (short) (mv->mv_y + pred_frac_mv_y);
563     mcost = mv_cost (p_Vid, lambda_factor, &cand, pred_mv);
564     p_UMHex->SearchState[cand.mv_y -mv->mv_y + dynamic_search_range][cand.mv_x - mv->mv_x + dynamic_search_range] = 1;
565     cand_pad = pad_MVs (cand, mv_block); //cand = pad_MVs (cand, mv_block);
566 
567     mcost += mv_block->computePredQPel( ref_picture, mv_block, min_mcost - mcost, &cand_pad); //&cand);
568 
569     if (mcost < min_mcost)
570     {
571       min_mcost = mcost;
572       currmv = cand;
573     }
574   }
575 
576   iMinNow = currmv;
577 
578   for(i = 0; i < dynamic_search_range; i++)
579   {
580     abort_search=1;
581     for (m = 0; m < 4; m++)
582     {
583       cand.mv_x = iMinNow.mv_x + DiamondQ[m].mv_x;
584       cand.mv_y = iMinNow.mv_y + DiamondQ[m].mv_y;
585 
586       if(iabs(cand.mv_x - mv->mv_x) <= dynamic_search_range && iabs(cand.mv_y - mv->mv_y) <= dynamic_search_range)
587       {
588         if(!p_UMHex->SearchState[cand.mv_y -mv->mv_y + dynamic_search_range][cand.mv_x -mv->mv_x + dynamic_search_range])
589         {
590           p_UMHex->SearchState[cand.mv_y -mv->mv_y + dynamic_search_range][cand.mv_x -mv->mv_x + dynamic_search_range] = 1;
591           mcost = mv_cost (p_Vid, lambda_factor, &cand, pred_mv);
592           cand_pad = pad_MVs (cand, mv_block); //cand = pad_MVs (cand, mv_block);
593 
594           mcost += mv_block->computePredQPel( ref_picture, mv_block, min_mcost - mcost, &cand_pad); // &cand);
595           if (mcost < min_mcost)
596           {
597             min_mcost = mcost;
598             currmv = cand;
599             abort_search = 0;
600           }
601         }
602       }
603     }
604     iMinNow = currmv;
605     if(abort_search)
606     {
607       break;
608     }
609   }
610 
611   *mv = currmv;
612 
613   //===== return minimum motion cost =====
614   return min_mcost;
615 }
616 
617 distblk                                                   //  ==> minimum motion cost after search
UMHEXSubPelBlockME(Macroblock * currMB,MotionVector * pred_mv,MEBlock * mv_block,distblk min_mcost,int * lambda)618 UMHEXSubPelBlockME (Macroblock *currMB,        // <-- Current Macroblock
619                     MotionVector  *pred_mv,    // <--  motion vector predictor (x|y) in sub-pel units
620                     MEBlock *mv_block,
621                     distblk     min_mcost,     // <--  minimum motion cost (cost for center or huge value)
622                     int*      lambda
623                     )
624 {
625   if(mv_block->blocktype >3)
626   {
627     min_mcost =  UMHEXSubPelBlockMotionSearch (currMB, pred_mv, mv_block, min_mcost, lambda[Q_PEL]);
628   }
629   else
630   {
631     min_mcost =  sub_pel_motion_estimation (currMB, pred_mv, mv_block, min_mcost, lambda);
632   }
633 
634   return min_mcost;
635 }
636 
637 /*!
638 ************************************************************************
639 * \brief
640 * Functions for SAD prediction of intra block cases.
641 * 1. void UMHEX_decide_intrabk_SAD() judges the block coding type(intra/inter)
642 *    of neibouring blocks
643 * 2. void UMHEX_skip_intrabk_SAD() set the SAD to zero if neigouring block coding
644 *    type is intra
645 * \date
646 *    2003.4
647 ************************************************************************
648 */
UMHEX_decide_intrabk_SAD(Macroblock * currMB)649 void UMHEX_decide_intrabk_SAD(Macroblock *currMB)
650 {
651   Slice *currSlice = currMB->p_Slice;
652   VideoParameters *p_Vid = currMB->p_Vid;
653   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
654   if (currSlice->slice_type != I_SLICE && currSlice->slice_type != SI_SLICE)
655   {
656     if (currMB->pix_x == 0 && currMB->pix_y == 0)
657     {
658       p_UMHex->flag_intra_SAD = 0;
659     }
660     else if (currMB->pix_x == 0)
661     {
662       p_UMHex->flag_intra_SAD = p_UMHex->flag_intra[(currMB->pix_x)>>4];
663     }
664     else if (currMB->pix_y == 0)
665     {
666       p_UMHex->flag_intra_SAD = p_UMHex->flag_intra[((currMB->pix_x)>>4)-1];
667     }
668     else
669     {
670       p_UMHex->flag_intra_SAD = ((p_UMHex->flag_intra[(currMB->pix_x)>>4])||(p_UMHex->flag_intra[((currMB->pix_x)>>4)-1])||(p_UMHex->flag_intra[((currMB->pix_x)>>4)+1])) ;
671     }
672   }
673   return;
674 }
675 
UMHEX_skip_intrabk_SAD(Macroblock * currMB,int ref_max)676 void UMHEX_skip_intrabk_SAD(Macroblock *currMB, int ref_max)
677 {
678   Slice *currSlice = currMB->p_Slice;
679   VideoParameters *p_Vid = currMB->p_Vid;
680   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
681   int i,j,k, ref;
682   if (p_Vid->number > 0)
683     p_UMHex->flag_intra[(currMB->pix_x)>>4] = (currMB->best_mode == 9 || currMB->best_mode == 10) ? 1:0;
684 
685   if (currSlice->slice_type != I_SLICE && currSlice->slice_type != SI_SLICE && (currMB->best_mode == 9 || currMB->best_mode == 10))
686   {
687     for (k=0; k < 9;k++)
688     {
689       for (j=0; j < 4; j++)
690       {
691         for (i=0; i < 4; i++)
692         {
693           p_UMHex->fastme_l0_cost[k][j][i] = 0;
694           p_UMHex->fastme_l1_cost[k][j][i] = 0;
695 
696           for (ref=0; ref < ref_max;ref++)
697           {
698             p_UMHex->fastme_ref_cost[ref][k][j][i] = 0;
699           }
700         }
701       }
702     }
703 
704   }
705   return;
706 }
707 
708 
UMHEX_setup(Macroblock * currMB,short ref,int list,int block_y,int block_x,int blocktype,MotionVector ***** all_mv)709 void UMHEX_setup(Macroblock *currMB, short ref, int list, int block_y, int block_x, int blocktype, MotionVector  *****all_mv)
710 {
711   Slice *currSlice = currMB->p_Slice;
712   VideoParameters *p_Vid = currMB->p_Vid;
713   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
714 
715   int  N_Bframe=0;
716   int n_Bframe=0;
717   int temp_blocktype = 0;
718   int indication_blocktype[8]={0,0,1,1,2,4,4,5};
719   InputParameters *p_Inp = currMB->p_Inp;
720   N_Bframe = p_Inp->NumberBFrames;
721   n_Bframe =(N_Bframe) ? (p_Vid->p_Stats->frame_ctr[B_SLICE]%(N_Bframe+1)): 0;
722 
723 
724   /**************************** MV prediction **********************/
725   //MV uplayer prediction
726   if (blocktype>1)
727   {
728     temp_blocktype = indication_blocktype[blocktype];
729     p_UMHex->pred_MV_uplayer[0] = all_mv[list][ref][temp_blocktype][block_y][block_x].mv_x;
730     p_UMHex->pred_MV_uplayer[1] = all_mv[list][ref][temp_blocktype][block_y][block_x].mv_y;
731   }
732 
733 
734   //MV ref-frame prediction
735   p_UMHex->pred_MV_ref_flag = 0;
736   if(list==0)
737   {
738     if (p_Vid->field_picture)
739     {
740       if ( ref > 1)
741       {
742         p_UMHex->pred_MV_ref[0] = all_mv[0][ref-2][blocktype][block_y][block_x].mv_x;
743         p_UMHex->pred_MV_ref[0] = (int)(p_UMHex->pred_MV_ref[0]*((ref>>1)+1)/(float)((ref>>1)));
744         p_UMHex->pred_MV_ref[1] = all_mv[0][ref-2][blocktype][block_y][block_x].mv_y;
745         p_UMHex->pred_MV_ref[1] = (int)(p_UMHex->pred_MV_ref[1]*((ref>>1)+1)/(float)((ref>>1)));
746         p_UMHex->pred_MV_ref_flag = 1;
747       }
748       if (currSlice->slice_type == B_SLICE &&  (ref==0 || ref==1) )
749       {
750         p_UMHex->pred_MV_ref[0] =(int) (all_mv[1][0][blocktype][block_y][block_x].mv_x * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
751         p_UMHex->pred_MV_ref[1] =(int) (all_mv[1][0][blocktype][block_y][block_x].mv_y * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
752         p_UMHex->pred_MV_ref_flag = 1;
753       }
754     }
755     else //frame case
756     {
757       if ( ref > 0)
758       {
759         p_UMHex->pred_MV_ref[0] = all_mv[0][ref-1][blocktype][block_y][block_x].mv_x;
760         p_UMHex->pred_MV_ref[0] = (int)(p_UMHex->pred_MV_ref[0]*(ref+1)/(float)(ref));
761         p_UMHex->pred_MV_ref[1] = all_mv[0][ref-1][blocktype][block_y][block_x].mv_y;
762         p_UMHex->pred_MV_ref[1] = (int)(p_UMHex->pred_MV_ref[1]*(ref+1)/(float)(ref));
763         p_UMHex->pred_MV_ref_flag = 1;
764       }
765       if (currSlice->slice_type == B_SLICE && (ref==0)) //B frame forward prediction, first ref
766       {
767         p_UMHex->pred_MV_ref[0] =(int) (all_mv[1][0][blocktype][block_y][block_x].mv_x * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
768         p_UMHex->pred_MV_ref[1] =(int) (all_mv[1][0][blocktype][block_y][block_x].mv_y * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
769         p_UMHex->pred_MV_ref_flag = 1;
770       }
771     }
772   }
773   /******************************SAD prediction**********************************/
774   if (list==0 && ref>0)  //pred_SAD_ref
775   {
776 
777     if (p_UMHex->flag_intra_SAD) //add this for irregular motion
778     {
779       p_UMHex->pred_SAD = 0;
780     }
781     else
782     {
783       if (p_Vid->field_picture)
784       {
785         if (ref > 1)
786         {
787           p_UMHex->pred_SAD = p_UMHex->fastme_ref_cost[ref-2][blocktype][block_y][block_x];
788         }
789         else
790         {
791           p_UMHex->pred_SAD = p_UMHex->fastme_ref_cost[0][blocktype][block_y][block_x];
792         }
793       }
794       else
795       {
796         p_UMHex->pred_SAD = p_UMHex->fastme_ref_cost[ref-1][blocktype][block_y][block_x];
797       }
798 
799     }
800   }
801   else if (blocktype>1)  // pred_SAD_uplayer
802   {
803     if (p_UMHex->flag_intra_SAD)
804     {
805       p_UMHex->pred_SAD = 0;
806     }
807     else
808     {
809       p_UMHex->pred_SAD = (list==1) ? (p_UMHex->fastme_l1_cost[temp_blocktype][(currMB->block_y)+block_y][(currMB->block_x)+block_x]) : (p_UMHex->fastme_l0_cost[temp_blocktype][(currMB->block_y)+block_y][(currMB->block_x)+block_x]);
810       p_UMHex->pred_SAD /= 2;
811     }
812   }
813   else p_UMHex->pred_SAD = 0 ;  // pred_SAD_space
814 
815 }
816 
817 /*!
818 ************************************************************************
819 * \brief
820 *    UMHEXBipredIntegerPelBlockMotionSearch: fast pixel block motion search for bipred mode
821 *    this algrithm is called UMHexagonS(see JVT-D016),which includes
822 *    four steps with different kinds of search patterns
823 * \author
824 *   Main contributors: (see contributors.h for copyright, address and affiliation details)
825 *   - Zhibo Chen         <chenzhibo@tsinghua.org.cn>
826 *   - JianFeng Xu        <fenax@video.mdc.tsinghua.edu.cn>
827 *   - Xiaozhong Xu       <xxz@video.mdc.tsinghua.edu.cn>
828 * \date   :
829 *   2006.1
830 ************************************************************************
831 */
832 distblk                                                //  ==> minimum motion cost after search
UMHEXBipredIntegerPelBlockMotionSearch(Macroblock * currMB,int list,MotionVector * pred_mv1,MotionVector * pred_mv2,MotionVector * mv1,MotionVector * mv2,MEBlock * mv_block,int search_range,distblk min_mcost,int lambda_factor)833 UMHEXBipredIntegerPelBlockMotionSearch (Macroblock *currMB,      // <--  current Macroblock
834                                         int       list,          // <--  current reference list
835                                         MotionVector *pred_mv1,  // <--  motion vector predictor (x|y) in sub-pel units
836                                         MotionVector *pred_mv2,  // <--  motion vector predictor (x|y) in sub-pel units
837                                         MotionVector  *mv1,      // <--> in: search center (x|y) / out: motion vector (x|y) - in sub-pel units
838                                         MotionVector *mv2,       // <--> in: search center (x|y)
839                                         MEBlock *mv_block,       // <--  motion vector information
840                                         int       search_range,  // <--  1-d search range in sub-pel units
841                                         distblk       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
842                                         int       lambda_factor  // <--  lagrangian parameter for determining motion cost
843                                         )
844 {
845   Slice *currSlice = currMB->p_Slice;
846   VideoParameters *p_Vid = currMB->p_Vid;
847   InputParameters *p_Inp = currMB->p_Inp;
848   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
849   int   temp_Big_Hexagon_X[16];// = Big_Hexagon_X;
850   int   temp_Big_Hexagon_Y[16];// = Big_Hexagon_Y;
851 
852   int   search_step;
853   int   i,m,j;
854   float betaFourth_1,betaFourth_2;
855   int   pos;
856   distblk mcost;
857   short blocktype   = mv_block->blocktype;
858   short blocksize_x = mv_block->blocksize_x;        // horizontal block size
859   short blocksize_y = mv_block->blocksize_y;        // vertical block size
860 
861   short pic_pix_x     = mv_block->pos_x_padded;
862   short pic_pix_y     = mv_block->pos_y_padded;
863 
864   short block_x       = mv_block->block_x;
865   short block_y       = mv_block->block_y;
866   distblk   ET_Thred      = p_UMHex->Median_Pred_Thd_MB[blocktype];
867   short ref           = mv_block->ref_idx;
868 
869   StorablePicture *ref_picture1 = currSlice->listX[list + currMB->list_offset][ref];
870   StorablePicture *ref_picture2 = currSlice->listX[list == 0 ? 1 + currMB->list_offset: currMB->list_offset][ 0 ];
871 
872   MotionVector iMinNow, best, cand;
873 
874   MotionVector pred1 = pad_MVs(*pred_mv1, mv_block);       // predicted position x (in sub-pel units)
875   MotionVector pred2 = pad_MVs(*pred_mv2, mv_block);       // predicted position x (in sub-pel units)
876   MotionVector center1 = pad_MVs(*mv1, mv_block);       // predicted position x (in sub-pel units)
877   MotionVector center2 = pad_MVs(*mv2, mv_block);       // predicted position x (in sub-pel units)
878 
879 
880   search_range >>= 2;
881   //////////////////////////////////////////////////////////////////////////
882 
883   //////allocate memory for search state//////////////////////////
884   memset(p_UMHex->McostState[0],0,(2*search_range+1)*(2*search_range+1));
885 
886   //check the center median predictor
887   best = cand = center2;
888   mcost  = mv_cost (p_Vid, lambda_factor, &center1, &pred1);
889   mcost += mv_cost (p_Vid, lambda_factor, &cand, &pred2);
890   mcost += mv_block->computeBiPredFPel(ref_picture1, ref_picture2, mv_block, DISTBLK_MAX-mcost, &center1, &cand);
891 
892   p_UMHex->McostState[search_range][search_range] = 1;
893 
894   if (mcost < min_mcost)
895   {
896     min_mcost = mcost;
897     best = cand;
898   }
899 
900   iMinNow = best;
901   for (m = 0; m < 4; m++)
902   {
903     cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
904     cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
905     SEARCH_ONE_PIXEL_BIPRED;
906   }
907 
908   if(center2.mv_x != pic_pix_x || center2.mv_y != pic_pix_y)
909   {
910     cand.mv_x = pic_pix_x ;
911     cand.mv_y = pic_pix_y ;
912     SEARCH_ONE_PIXEL_BIPRED;
913 
914     iMinNow = best;
915 
916     for (m = 0; m < 4; m++)
917     {
918       cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
919       cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
920       SEARCH_ONE_PIXEL_BIPRED;
921     }
922   }
923   /***********************************init process*************************/
924 
925   if( min_mcost < ET_Thred)
926   {
927     goto terminate_step;
928   }
929   else
930   {
931     int  N_Bframe=0;
932     int  n_Bframe=0;
933     MotionVector *****bipred_mv = currSlice->bipred_mv[list];
934     N_Bframe = p_Inp->NumberBFrames;
935     n_Bframe = p_Vid->p_Stats->frame_ctr[B_SLICE]%(N_Bframe+1);
936 
937 
938     /**************************** MV prediction **********************/
939     //MV uplayer prediction
940     // non for bipred mode
941 
942     //MV ref-frame prediction
943 
944     if(list==0)
945     {
946       if (p_Vid->field_picture)
947       {
948         p_UMHex->pred_MV_ref[0] =(int) (bipred_mv[1][0][blocktype][block_y][block_x].mv_x * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
949         p_UMHex->pred_MV_ref[1] =(int) (bipred_mv[1][0][blocktype][block_y][block_x].mv_y * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
950       }
951       else //frame case
952       {
953         p_UMHex->pred_MV_ref[0] =(int) (bipred_mv[1][0][blocktype][block_y][block_x].mv_x * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
954         p_UMHex->pred_MV_ref[1] =(int) (bipred_mv[1][0][blocktype][block_y][block_x].mv_y * (-n_Bframe)/(N_Bframe-n_Bframe+1.0f));
955       }
956     }
957     /******************************SAD prediction**********************************/
958     p_UMHex->pred_SAD =distblkmin(distblkmin(p_UMHex->SAD_a,p_UMHex->SAD_b),p_UMHex->SAD_c);  // pred_SAD_space
959     ET_Thred = p_UMHex->Big_Hexagon_Thd_MB[blocktype];
960 
961     ///////Threshold defined for early termination///////////////////
962     if (p_UMHex->pred_SAD == 0)
963     {
964       betaFourth_1=0;
965       betaFourth_2=0;
966     }
967     else
968     {
969       betaFourth_1 = p_UMHex->Bsize[blocktype]/(p_UMHex->pred_SAD * p_UMHex->pred_SAD)-p_UMHex->AlphaFourth_1[blocktype];
970       betaFourth_2 = p_UMHex->Bsize[blocktype]/(p_UMHex->pred_SAD * p_UMHex->pred_SAD)-p_UMHex->AlphaFourth_2[blocktype];
971     }
972   }
973 
974   /***********************************end of init *************************/
975 
976 
977 
978   // first_step: initial start point prediction
979   //prediction using mV of last ref moiton vector
980   if(list == 0)
981   {
982     cand.mv_x = (short) (pic_pix_x + (p_UMHex->pred_MV_ref[0] / 4) * 4);
983     cand.mv_y = (short) (pic_pix_y + (p_UMHex->pred_MV_ref[1] / 4) * 4);
984     SEARCH_ONE_PIXEL_BIPRED;
985   }
986 
987 
988   //small local search
989   iMinNow = best;
990   for (m = 0; m < 4; m++)
991   {
992     cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
993     cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
994     SEARCH_ONE_PIXEL_BIPRED;
995   }
996 
997   //early termination alogrithm, refer to JVT-G016
998   EARLY_TERMINATION;
999 
1000 
1001   //sec_step: //Unsymmetrical-cross search
1002   iMinNow = best;
1003 
1004   for(i = 1; i < search_range; i+=2)
1005   {
1006     search_step = i;
1007     cand.mv_x = (short) (iMinNow.mv_x + search_step);
1008     cand.mv_y = iMinNow.mv_y ;
1009     SEARCH_ONE_PIXEL_BIPRED;
1010     cand.mv_x = (short) (iMinNow.mv_x - search_step);
1011     SEARCH_ONE_PIXEL_BIPRED;
1012   }
1013 
1014   for(i = 1; i < (search_range >> 1);i+=2)
1015   {
1016     search_step = i;
1017     cand.mv_x = iMinNow.mv_x ;
1018     cand.mv_y = (short) (iMinNow.mv_y + search_step);
1019     SEARCH_ONE_PIXEL_BIPRED;
1020     cand.mv_y = (short) (iMinNow.mv_y - search_step);
1021     SEARCH_ONE_PIXEL_BIPRED;
1022   }
1023   //early termination alogrithm, refer to JVT-G016
1024   EARLY_TERMINATION;
1025 
1026   //third_step:     // Uneven Multi-Hexagon-grid Search
1027   iMinNow = best;
1028   //sub step1: 5x5 square search
1029   for(pos=1;pos<25;pos++)
1030   {
1031     cand.mv_x = iMinNow.mv_x + p_Vid->spiral_qpel_search[pos].mv_x;
1032     cand.mv_y = iMinNow.mv_y + p_Vid->spiral_qpel_search[pos].mv_y;
1033     SEARCH_ONE_PIXEL_BIPRED;
1034   }
1035 
1036   //early termination alogrithm, refer to JVT-G016
1037   EARLY_TERMINATION;      //added back by xxz
1038 
1039   //sub step2: multi-grid-hexagon-search
1040   memcpy(temp_Big_Hexagon_X,Big_Hexagon_X,64);
1041   memcpy(temp_Big_Hexagon_Y,Big_Hexagon_Y,64);
1042   for(i=1;i<=(p_Inp->search_range[p_Vid->view_id] >> 2); i++)
1043   {
1044 
1045     for (m = 0; m < 16; m++)
1046     {
1047       cand.mv_x = (short) (iMinNow.mv_x + temp_Big_Hexagon_X[m]);
1048       cand.mv_y = (short) (iMinNow.mv_y + temp_Big_Hexagon_Y[m]);
1049       temp_Big_Hexagon_X[m] += Big_Hexagon_X[m];
1050       temp_Big_Hexagon_Y[m] += Big_Hexagon_Y[m];
1051 
1052       SEARCH_ONE_PIXEL_BIPRED;
1053     }
1054     if(min_mcost < ET_Thred)
1055     {
1056       goto terminate_step;
1057 
1058     }
1059   }
1060   //fourth step: Local Refinement: Extended Hexagon-based Search
1061 fourth_1_step:
1062 
1063   for(i=0; i < search_range; i++)
1064   {
1065     iMinNow = best;
1066     for (m = 0; m < 6; m++)
1067     {
1068       cand.mv_x = iMinNow.mv_x + Hexagon[m].mv_x;
1069       cand.mv_y = iMinNow.mv_y + Hexagon[m].mv_y;
1070       SEARCH_ONE_PIXEL_BIPRED;
1071     }
1072     if(best.mv_x == iMinNow.mv_x && best.mv_y == iMinNow.mv_y)
1073       break;
1074   }
1075 fourth_2_step:
1076 
1077   for(i = 0; i < search_range; i++)
1078   {
1079     iMinNow = best;
1080     for (m = 0; m < 4; m++)
1081     {
1082       cand.mv_x = iMinNow.mv_x + Diamond[m].mv_x;
1083       cand.mv_y = iMinNow.mv_y + Diamond[m].mv_y;
1084       SEARCH_ONE_PIXEL_BIPRED;
1085     }
1086     if(best.mv_x == iMinNow.mv_x && best.mv_y == iMinNow.mv_y)
1087       break;
1088   }
1089 
1090 terminate_step:
1091   for (i=0; i < (blocksize_x>>2); i++)
1092   {
1093     for (j=0; j < (blocksize_y>>2); j++)
1094     {
1095       if(list == 0)
1096       {
1097         p_UMHex->fastme_l0_cost_bipred[blocktype][(currMB->block_y)+block_y+j][(currMB->block_x)+block_x+i] = min_mcost;
1098       }
1099       else
1100       {
1101         p_UMHex->fastme_l1_cost_bipred[blocktype][(currMB->block_y)+block_y+j][(currMB->block_x)+block_x+i] = min_mcost;
1102       }
1103     }
1104   }
1105 
1106   mv1->mv_x = (short) (best.mv_x - pic_pix_x);
1107   mv1->mv_y = (short) (best.mv_y - pic_pix_y);
1108 
1109   return min_mcost;
1110 }
1111 
1112 /*!
1113 ************************************************************************
1114 * \brief
1115 *    Set motion vector predictor
1116 ************************************************************************
1117 */
UMHEXSetMotionVectorPredictor(Macroblock * currMB,MotionVector * pmv,struct pic_motion_params ** mv_info,short ref_frame,int list,int mb_x,int mb_y,int blockshape_x,int blockshape_y,MEBlock * mv_block)1118 void UMHEXSetMotionVectorPredictor (Macroblock *currMB,
1119                                     MotionVector *pmv,
1120                                     struct pic_motion_params **mv_info,
1121                                     short       ref_frame,
1122                                     int         list,
1123                                     int         mb_x,
1124                                     int         mb_y,
1125                                     int         blockshape_x,
1126                                     int         blockshape_y,
1127                                     MEBlock    *mv_block)
1128 {
1129   VideoParameters *p_Vid = currMB->p_Vid;
1130   InputParameters *p_Inp = currMB->p_Inp;
1131   UMHexStruct *p_UMHex = p_Vid->p_UMHex;
1132   int mv_a, mv_b, mv_c;
1133   short pred_vec=0;
1134   int mvPredType, rFrameL, rFrameU, rFrameUR;
1135   int hv;
1136 
1137   PixelPos block_a, block_b, block_c, block_d;
1138 
1139   // added for bipred mode
1140   distblk *** fastme_l0_cost_flag = (p_UMHex->bipred_flag ? p_UMHex->fastme_l0_cost_bipred : p_UMHex->fastme_l0_cost);
1141   distblk *** fastme_l1_cost_flag = (p_UMHex->bipred_flag ? p_UMHex->fastme_l1_cost_bipred : p_UMHex->fastme_l1_cost);
1142   //Dynamic Search Range
1143 
1144   int dsr_temp_search_range[2];
1145   int dsr_mv_avail, dsr_mv_max, dsr_mv_sum, dsr_small_search_range;
1146   int *mb_size = p_Vid->mb_size[IS_LUMA];
1147 
1148   // neighborhood SAD init
1149   p_UMHex->SAD_a = 0;
1150   p_UMHex->SAD_b = 0;
1151   p_UMHex->SAD_c = 0;
1152   p_UMHex->SAD_d = 0;
1153 
1154   get4x4Neighbour(currMB, mb_x - 1           , mb_y    , mb_size, &block_a);
1155   get4x4Neighbour(currMB, mb_x               , mb_y - 1, mb_size, &block_b);
1156   get4x4Neighbour(currMB, mb_x + blockshape_x, mb_y - 1, mb_size, &block_c);
1157   get4x4Neighbour(currMB, mb_x - 1           , mb_y - 1, mb_size, &block_d);
1158 
1159   if (mb_y > 0)
1160   {
1161     if (mb_x < 8)  // first column of 8x8 blocks
1162     {
1163       if (mb_y==8)
1164       {
1165         if (blockshape_x == 16)      block_c.available  = 0;
1166       }
1167       else
1168       {
1169         if (mb_x+blockshape_x == 8)  block_c.available = 0;
1170       }
1171     }
1172     else
1173     {
1174       if (mb_x+blockshape_x == 16)   block_c.available = 0;
1175     }
1176   }
1177 
1178   if (!block_c.available)
1179   {
1180     block_c=block_d;
1181   }
1182 
1183   mvPredType = MVPRED_MEDIAN;
1184 
1185   if (!p_Vid->mb_aff_frame_flag)
1186   {
1187     rFrameL    = block_a.available    ? mv_info[block_a.pos_y][block_a.pos_x].ref_idx[list] : -1;
1188     rFrameU    = block_b.available    ? mv_info[block_b.pos_y][block_b.pos_x].ref_idx[list] : -1;
1189     rFrameUR   = block_c.available    ? mv_info[block_c.pos_y][block_c.pos_x].ref_idx[list] : -1;
1190   }
1191   else
1192   {
1193     if (p_Vid->mb_data[currMB->mbAddrX].mb_field)
1194     {
1195       rFrameL  = block_a.available
1196         ? (p_Vid->mb_data[block_a.mb_addr].mb_field
1197         ? mv_info[block_a.pos_y][block_a.pos_x].ref_idx[list]
1198       : mv_info[block_a.pos_y][block_a.pos_x].ref_idx[list] * 2) : -1;
1199       rFrameU  = block_b.available
1200         ? (p_Vid->mb_data[block_b.mb_addr].mb_field
1201         ? mv_info[block_b.pos_y][block_b.pos_x].ref_idx[list]
1202       : mv_info[block_b.pos_y][block_b.pos_x].ref_idx[list] * 2) : -1;
1203       rFrameUR = block_c.available
1204         ? (p_Vid->mb_data[block_c.mb_addr].mb_field
1205         ? mv_info[block_c.pos_y][block_c.pos_x].ref_idx[list]
1206       : mv_info[block_c.pos_y][block_c.pos_x].ref_idx[list] * 2) : -1;
1207     }
1208     else
1209     {
1210       rFrameL = block_a.available
1211         ? (p_Vid->mb_data[block_a.mb_addr].mb_field
1212         ? mv_info[block_a.pos_y][block_a.pos_x].ref_idx[list] >>1
1213         : mv_info[block_a.pos_y][block_a.pos_x].ref_idx[list]) : -1;
1214       rFrameU    = block_b.available    ?
1215         p_Vid->mb_data[block_b.mb_addr].mb_field ?
1216         mv_info[block_b.pos_y][block_b.pos_x].ref_idx[list] >>1:
1217       mv_info[block_b.pos_y][block_b.pos_x].ref_idx[list] :
1218       -1;
1219       rFrameUR    = block_c.available    ?
1220         p_Vid->mb_data[block_c.mb_addr].mb_field ?
1221         mv_info[block_c.pos_y][block_c.pos_x].ref_idx[list] >>1:
1222       mv_info[block_c.pos_y][block_c.pos_x].ref_idx[list] :
1223       -1;
1224     }
1225   }
1226 
1227   /* Prediction if only one of the neighbors uses the reference frame
1228   * we are checking
1229   */
1230   if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)       mvPredType = MVPRED_L;
1231   else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)  mvPredType = MVPRED_U;
1232   else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)  mvPredType = MVPRED_UR;
1233   // Directional predictions
1234   if(blockshape_x == 8 && blockshape_y == 16)
1235   {
1236     if(mb_x == 0)
1237     {
1238       if(rFrameL == ref_frame)
1239         mvPredType = MVPRED_L;
1240     }
1241     else
1242     {
1243       if( rFrameUR == ref_frame)
1244         mvPredType = MVPRED_UR;
1245     }
1246   }
1247   else if(blockshape_x == 16 && blockshape_y == 8)
1248   {
1249     if(mb_y == 0)
1250     {
1251       if(rFrameU == ref_frame)
1252         mvPredType = MVPRED_U;
1253     }
1254     else
1255     {
1256       if(rFrameL == ref_frame)
1257         mvPredType = MVPRED_L;
1258     }
1259   }
1260 
1261   // neighborhood SAD prediction
1262   if((p_Inp->UMHexDSR == 1 || p_Inp->BiPredMotionEstimation == 1))
1263   {
1264     p_UMHex->SAD_a = block_a.available ? ((list==1) ? (fastme_l1_cost_flag[p_UMHex->UMHEX_blocktype][block_a.pos_y][block_a.pos_x]) : (fastme_l0_cost_flag[p_UMHex->UMHEX_blocktype][block_a.pos_y][block_a.pos_x])) : 0;
1265     p_UMHex->SAD_b = block_b.available ? ((list==1) ? (fastme_l1_cost_flag[p_UMHex->UMHEX_blocktype][block_b.pos_y][block_b.pos_x]) : (fastme_l0_cost_flag[p_UMHex->UMHEX_blocktype][block_b.pos_y][block_b.pos_x])) : 0;
1266     p_UMHex->SAD_d = block_d.available ? ((list==1) ? (fastme_l1_cost_flag[p_UMHex->UMHEX_blocktype][block_d.pos_y][block_d.pos_x]) : (fastme_l0_cost_flag[p_UMHex->UMHEX_blocktype][block_d.pos_y][block_d.pos_x])) : 0;
1267     p_UMHex->SAD_c = block_c.available ? ((list==1) ? (fastme_l1_cost_flag[p_UMHex->UMHEX_blocktype][block_c.pos_y][block_c.pos_x]) : (fastme_l0_cost_flag[p_UMHex->UMHEX_blocktype][block_c.pos_y][block_c.pos_x])) : p_UMHex->SAD_d;
1268   }
1269   for (hv=0; hv < 2; hv++)
1270   {
1271     if (!p_Vid->mb_aff_frame_flag || hv==0)
1272     {
1273       mv_a = block_a.available  ? mv_info[block_a.pos_y][block_a.pos_x].mv[list].mv_x : 0;
1274       mv_b = block_b.available  ? mv_info[block_b.pos_y][block_b.pos_x].mv[list].mv_x : 0;
1275       mv_c = block_c.available  ? mv_info[block_c.pos_y][block_c.pos_x].mv[list].mv_x : 0;
1276     }
1277     else
1278     {
1279       if (p_Vid->mb_data[currMB->mbAddrX].mb_field)
1280       {
1281         mv_a = block_a.available  ? p_Vid->mb_data[block_a.mb_addr].mb_field
1282           ? mv_info[block_a.pos_y][block_a.pos_x].mv[list].mv_y
1283         : mv_info[block_a.pos_y][block_a.pos_x].mv[list].mv_y / 2
1284           : 0;
1285         mv_b = block_b.available  ? p_Vid->mb_data[block_b.mb_addr].mb_field
1286           ? mv_info[block_b.pos_y][block_b.pos_x].mv[list].mv_y
1287         : mv_info[block_b.pos_y][block_b.pos_x].mv[list].mv_y / 2
1288           : 0;
1289         mv_c = block_c.available  ? p_Vid->mb_data[block_c.mb_addr].mb_field
1290           ? mv_info[block_c.pos_y][block_c.pos_x].mv[list].mv_y
1291         : mv_info[block_c.pos_y][block_c.pos_x].mv[list].mv_y / 2
1292           : 0;
1293       }
1294       else
1295       {
1296         mv_a = block_a.available  ? p_Vid->mb_data[block_a.mb_addr].mb_field
1297           ? mv_info[block_a.pos_y][block_a.pos_x].mv[list].mv_y * 2
1298           : mv_info[block_a.pos_y][block_a.pos_x].mv[list].mv_y
1299         : 0;
1300         mv_b = block_b.available  ? p_Vid->mb_data[block_b.mb_addr].mb_field
1301           ? mv_info[block_b.pos_y][block_b.pos_x].mv[list].mv_y * 2
1302           : mv_info[block_b.pos_y][block_b.pos_x].mv[list].mv_y
1303         : 0;
1304         mv_c = block_c.available  ? p_Vid->mb_data[block_c.mb_addr].mb_field
1305           ? mv_info[block_c.pos_y][block_c.pos_x].mv[list].mv_y * 2
1306           : mv_info[block_c.pos_y][block_c.pos_x].mv[list].mv_y
1307         : 0;
1308       }
1309     }
1310 
1311     switch (mvPredType)
1312     {
1313     case MVPRED_MEDIAN:
1314       if(!(block_b.available || block_c.available))
1315       {
1316         pred_vec = (short) mv_a;
1317       }
1318       else
1319       {
1320         pred_vec = (short) (mv_a+mv_b+mv_c-imin(mv_a,imin(mv_b,mv_c))-imax(mv_a,imax(mv_b,mv_c)));
1321       }
1322       break;
1323     case MVPRED_L:
1324       pred_vec = (short) mv_a;
1325       break;
1326     case MVPRED_U:
1327       pred_vec = (short) mv_b;
1328       break;
1329     case MVPRED_UR:
1330       pred_vec = (short) mv_c;
1331       break;
1332     default:
1333       break;
1334     }
1335 
1336     if (hv == 0)
1337       pmv->mv_x = pred_vec;
1338     else
1339       pmv->mv_y = pred_vec;
1340 
1341     //Dynamic Search Range
1342     if (p_Inp->UMHexDSR)
1343     {
1344       dsr_mv_avail=block_a.available+block_b.available+block_c.available;
1345       if(dsr_mv_avail < 2)
1346       {
1347         dsr_temp_search_range[hv] = p_Inp->search_range[p_Vid->view_id];
1348       }
1349       else
1350       {
1351         dsr_mv_max = imax(iabs(mv_a),imax(iabs(mv_b),iabs(mv_c)));
1352         dsr_mv_sum = (iabs(mv_a)+iabs(mv_b)+iabs(mv_c));
1353         if(dsr_mv_sum == 0) dsr_small_search_range = (p_Inp->search_range[p_Vid->view_id] + 4) >> 3;
1354         else if(dsr_mv_sum > 3 ) dsr_small_search_range = (p_Inp->search_range[p_Vid->view_id] + 2) >>2;
1355         else dsr_small_search_range = (3*p_Inp->search_range[p_Vid->view_id] + 8) >> 4;
1356         dsr_temp_search_range[hv]=imin(p_Inp->search_range[p_Vid->view_id],imax(dsr_small_search_range,dsr_mv_max<<1));
1357         if(distblkmax(p_UMHex->SAD_a, distblkmax(p_UMHex->SAD_b,p_UMHex->SAD_c)) > p_UMHex->Threshold_DSR_MB[p_UMHex->UMHEX_blocktype])
1358           dsr_temp_search_range[hv] = p_Inp->search_range[p_Vid->view_id];
1359       }
1360     }
1361   }
1362 
1363   //Dynamic Search Range
1364   if (p_Inp->UMHexDSR)
1365   {
1366     int search_range = imax(dsr_temp_search_range[0],dsr_temp_search_range[1]);
1367     search_range <<= 2;
1368 
1369     if      (p_Inp->full_search == 2)
1370     {
1371       mv_block->searchRange.min_x = -search_range;
1372       mv_block->searchRange.max_x =  search_range;
1373       mv_block->searchRange.min_y = -search_range;
1374       mv_block->searchRange.max_y =  search_range;
1375     }
1376     else if (p_Inp->full_search == 1)
1377     {
1378       int scale = (imin(ref_frame,1)+1);
1379       mv_block->searchRange.min_x = -search_range / scale;
1380       mv_block->searchRange.max_x =  search_range / scale;
1381       mv_block->searchRange.min_y = -search_range / scale;
1382       mv_block->searchRange.max_y =  search_range / scale;
1383     }
1384     else
1385     {
1386       int scale = ((imin(ref_frame,1)+1) * imin(2,p_UMHex->BlockType_LUT[(blockshape_y >> 2) - 1][(blockshape_x >> 2) - 1]));
1387       mv_block->searchRange.min_x = -search_range / scale;
1388       mv_block->searchRange.max_x =  search_range / scale;
1389       mv_block->searchRange.min_y = -search_range / scale;
1390       mv_block->searchRange.max_y =  search_range / scale;
1391     }
1392   }
1393 }
1394 
1395 #undef SEARCH_ONE_PIXEL
1396 
1397