1 /*
2  * Motion estimation
3  * Copyright (c) 2002-2004 Michael Niedermayer
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Motion estimation template.
25  */
26 
27 #include "libavutil/qsort.h"
28 #include "mpegvideo.h"
29 
30 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31 #define LOAD_COMMON\
32     uint32_t av_unused * const score_map= c->score_map;\
33     const int av_unused xmin= c->xmin;\
34     const int av_unused ymin= c->ymin;\
35     const int av_unused xmax= c->xmax;\
36     const int av_unused ymax= c->ymax;\
37     const uint8_t *mv_penalty = c->current_mv_penalty; \
38     const int pred_x= c->pred_x;\
39     const int pred_y= c->pred_y;\
40 
41 #define CHECK_HALF_MV(dx, dy, x, y)\
42 {\
43     const int hx= 2*(x)+(dx);\
44     const int hy= 2*(y)+(dy);\
45     d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
46     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
47     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
48 }
49 
hpel_motion_search(MpegEncContext * s,int * mx_ptr,int * my_ptr,int dmin,int src_index,int ref_index,int size,int h)50 static int hpel_motion_search(MpegEncContext * s,
51                                   int *mx_ptr, int *my_ptr, int dmin,
52                                   int src_index, int ref_index,
53                                   int size, int h)
54 {
55     MotionEstContext * const c= &s->me;
56     const int mx = *mx_ptr;
57     const int my = *my_ptr;
58     const int penalty_factor= c->sub_penalty_factor;
59     me_cmp_func cmp_sub, chroma_cmp_sub;
60     int bx=2*mx, by=2*my;
61 
62     LOAD_COMMON
63     int flags= c->sub_flags;
64 
65  //FIXME factorize
66 
67     cmp_sub        = s->mecc.me_sub_cmp[size];
68     chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
69 
70     if(c->skip){ //FIXME move out of hpel?
71         *mx_ptr = 0;
72         *my_ptr = 0;
73         return dmin;
74     }
75 
76     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
77         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
78         if(mx || my || size>0)
79             dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
80     }
81 
82     if (mx > xmin && mx < xmax &&
83         my > ymin && my < ymax) {
84         int d= dmin;
85         const int index = my * (1 << ME_MAP_SHIFT) + mx;
86         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
87                      + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
88         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
89                      + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
90         const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
91                      + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
92         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
93                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94 
95 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
96         unsigned key;
97         unsigned map_generation= c->map_generation;
98         key = (my - 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
99         av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
100         key = (my + 1) * (1 << ME_MAP_MV_BITS) + (mx) + map_generation;
101         av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
102         key = (my) * (1 << ME_MAP_MV_BITS) + (mx + 1) + map_generation;
103         av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
104         key = (my) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation;
105         av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
106 #endif
107         if(t<=b){
108             CHECK_HALF_MV(0, 1, mx  ,my-1)
109             if(l<=r){
110                 CHECK_HALF_MV(1, 1, mx-1, my-1)
111                 if(t+r<=b+l){
112                     CHECK_HALF_MV(1, 1, mx  , my-1)
113                 }else{
114                     CHECK_HALF_MV(1, 1, mx-1, my  )
115                 }
116                 CHECK_HALF_MV(1, 0, mx-1, my  )
117             }else{
118                 CHECK_HALF_MV(1, 1, mx  , my-1)
119                 if(t+l<=b+r){
120                     CHECK_HALF_MV(1, 1, mx-1, my-1)
121                 }else{
122                     CHECK_HALF_MV(1, 1, mx  , my  )
123                 }
124                 CHECK_HALF_MV(1, 0, mx  , my  )
125             }
126         }else{
127             if(l<=r){
128                 if(t+l<=b+r){
129                     CHECK_HALF_MV(1, 1, mx-1, my-1)
130                 }else{
131                     CHECK_HALF_MV(1, 1, mx  , my  )
132                 }
133                 CHECK_HALF_MV(1, 0, mx-1, my)
134                 CHECK_HALF_MV(1, 1, mx-1, my)
135             }else{
136                 if(t+r<=b+l){
137                     CHECK_HALF_MV(1, 1, mx  , my-1)
138                 }else{
139                     CHECK_HALF_MV(1, 1, mx-1, my)
140                 }
141                 CHECK_HALF_MV(1, 0, mx  , my)
142                 CHECK_HALF_MV(1, 1, mx  , my)
143             }
144             CHECK_HALF_MV(0, 1, mx  , my)
145         }
146         av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
147     }
148 
149     *mx_ptr = bx;
150     *my_ptr = by;
151 
152     return dmin;
153 }
154 
no_sub_motion_search(MpegEncContext * s,int * mx_ptr,int * my_ptr,int dmin,int src_index,int ref_index,int size,int h)155 static int no_sub_motion_search(MpegEncContext * s,
156           int *mx_ptr, int *my_ptr, int dmin,
157                                   int src_index, int ref_index,
158                                   int size, int h)
159 {
160     (*mx_ptr) *= 2;
161     (*my_ptr) *= 2;
162     return dmin;
163 }
164 
get_mb_score(MpegEncContext * s,int mx,int my,int src_index,int ref_index,int size,int h,int add_rate)165 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
166                                int src_index, int ref_index, int size,
167                                int h, int add_rate)
168 {
169     MotionEstContext * const c= &s->me;
170     const int penalty_factor= c->mb_penalty_factor;
171     const int flags= c->mb_flags;
172     const int qpel= flags & FLAG_QPEL;
173     const int mask= 1+2*qpel;
174     me_cmp_func cmp_sub, chroma_cmp_sub;
175     int d;
176 
177     LOAD_COMMON
178 
179  //FIXME factorize
180 
181     cmp_sub        = s->mecc.mb_cmp[size];
182     chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
183 
184     d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
185     //FIXME check cbp before adding penalty for (0,0) vector
186     if(add_rate && (mx || my || size>0))
187         d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
188 
189     return d;
190 }
191 
ff_get_mb_score(MpegEncContext * s,int mx,int my,int src_index,int ref_index,int size,int h,int add_rate)192 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
193                     int ref_index, int size, int h, int add_rate)
194 {
195     return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196 }
197 
198 #define CHECK_QUARTER_MV(dx, dy, x, y)\
199 {\
200     const int hx= 4*(x)+(dx);\
201     const int hy= 4*(y)+(dy);\
202     d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
203     d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
204     COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205 }
206 
qpel_motion_search(MpegEncContext * s,int * mx_ptr,int * my_ptr,int dmin,int src_index,int ref_index,int size,int h)207 static int qpel_motion_search(MpegEncContext * s,
208                                   int *mx_ptr, int *my_ptr, int dmin,
209                                   int src_index, int ref_index,
210                                   int size, int h)
211 {
212     MotionEstContext * const c= &s->me;
213     const int mx = *mx_ptr;
214     const int my = *my_ptr;
215     const int penalty_factor= c->sub_penalty_factor;
216     const unsigned map_generation = c->map_generation;
217     const int subpel_quality= c->avctx->me_subpel_quality;
218     uint32_t *map= c->map;
219     me_cmp_func cmpf, chroma_cmpf;
220     me_cmp_func cmp_sub, chroma_cmp_sub;
221 
222     LOAD_COMMON
223     int flags= c->sub_flags;
224 
225     cmpf        = s->mecc.me_cmp[size];
226     chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
227  //FIXME factorize
228 
229     cmp_sub        = s->mecc.me_sub_cmp[size];
230     chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
231 
232     if(c->skip){ //FIXME somehow move up (benchmark)
233         *mx_ptr = 0;
234         *my_ptr = 0;
235         return dmin;
236     }
237 
238     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
239         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
240         if(mx || my || size>0)
241             dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242     }
243 
244     if (mx > xmin && mx < xmax &&
245         my > ymin && my < ymax) {
246         int bx=4*mx, by=4*my;
247         int d= dmin;
248         int i, nx, ny;
249         const int index = my * (1 << ME_MAP_SHIFT) + mx;
250         const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
251         const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
252         const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
253         const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
254         const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
255         int best[8];
256         int best_pos[8][2];
257 
258         memset(best, 64, sizeof(int)*8);
259         if(s->me.dia_size>=2){
260             const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261             const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
262             const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263             const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
264 
265             for(ny= -3; ny <= 3; ny++){
266                 for(nx= -3; nx <= 3; nx++){
267                     //FIXME this could overflow (unlikely though)
268                     const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
269                     const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
270                     const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
271                     int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272                     int i;
273 
274                     if((nx&3)==0 && (ny&3)==0) continue;
275 
276                     score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
277 
278 //                    if(nx&1) score-=1024*c->penalty_factor;
279 //                    if(ny&1) score-=1024*c->penalty_factor;
280 
281                     for(i=0; i<8; i++){
282                         if(score < best[i]){
283                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
284                             memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
285                             best[i]= score;
286                             best_pos[i][0]= nx + 4*mx;
287                             best_pos[i][1]= ny + 4*my;
288                             break;
289                         }
290                     }
291                 }
292             }
293         }else{
294             int tl;
295             //FIXME this could overflow (unlikely though)
296             const int cx = 4*(r - l);
297             const int cx2= r + l - 2*c;
298             const int cy = 4*(b - t);
299             const int cy2= b + t - 2*c;
300             int cxy;
301 
302             if (map[(index - (1 << ME_MAP_SHIFT) - 1) & (ME_MAP_SIZE - 1)] ==
303                 (my - 1) * (1 << ME_MAP_MV_BITS) + (mx - 1) + map_generation) {
304                 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
305             }else{
306                 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
307             }
308 
309             cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
310 
311             av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
312             av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
313             av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
314             av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
315             av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
316 
317             for(ny= -3; ny <= 3; ny++){
318                 for(nx= -3; nx <= 3; nx++){
319                     //FIXME this could overflow (unlikely though)
320                     int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
321                     int i;
322 
323                     if((nx&3)==0 && (ny&3)==0) continue;
324 
325                     score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
326 //                    if(nx&1) score-=32*c->penalty_factor;
327   //                  if(ny&1) score-=32*c->penalty_factor;
328 
329                     for(i=0; i<8; i++){
330                         if(score < best[i]){
331                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
332                             memmove(best_pos[i + 1], best_pos[i], sizeof(best_pos[0]) * (7 - i));
333                             best[i]= score;
334                             best_pos[i][0]= nx + 4*mx;
335                             best_pos[i][1]= ny + 4*my;
336                             break;
337                         }
338                     }
339                 }
340             }
341         }
342         for(i=0; i<subpel_quality; i++){
343             nx= best_pos[i][0];
344             ny= best_pos[i][1];
345             CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
346         }
347 
348         av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
349 
350         *mx_ptr = bx;
351         *my_ptr = by;
352     }else{
353         *mx_ptr =4*mx;
354         *my_ptr =4*my;
355     }
356 
357     return dmin;
358 }
359 
360 
361 #define CHECK_MV(x,y)\
362 {\
363     const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
364     const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
365     av_assert2((x) >= xmin);\
366     av_assert2((x) <= xmax);\
367     av_assert2((y) >= ymin);\
368     av_assert2((y) <= ymax);\
369     if(map[index]!=key){\
370         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
371         map[index]= key;\
372         score_map[index]= d;\
373         d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\
374         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375     }\
376 }
377 
378 #define CHECK_CLIPPED_MV(ax,ay)\
379 {\
380     const int Lx= ax;\
381     const int Ly= ay;\
382     const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
383     const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
384     CHECK_MV(Lx2, Ly2)\
385 }
386 
387 #define CHECK_MV_DIR(x,y,new_dir)\
388 {\
389     const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
390     const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
391     if(map[index]!=key){\
392         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
393         map[index]= key;\
394         score_map[index]= d;\
395         d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\
396         if(d<dmin){\
397             best[0]=x;\
398             best[1]=y;\
399             dmin=d;\
400             next_dir= new_dir;\
401         }\
402     }\
403 }
404 
405 #define check(x,y,S,v)\
406 if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
407 if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
408 if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
409 if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
410 
411 #define LOAD_COMMON2\
412     uint32_t *map= c->map;\
413     const int qpel= flags&FLAG_QPEL;\
414     const int shift= 1+qpel;\
415 
small_diamond_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)416 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
417                                        int src_index, int ref_index, const int penalty_factor,
418                                        int size, int h, int flags)
419 {
420     MotionEstContext * const c= &s->me;
421     me_cmp_func cmpf, chroma_cmpf;
422     int next_dir=-1;
423     LOAD_COMMON
424     LOAD_COMMON2
425     unsigned map_generation = c->map_generation;
426 
427     cmpf        = s->mecc.me_cmp[size];
428     chroma_cmpf = s->mecc.me_cmp[size + 1];
429 
430     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431         const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432         const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
433         if (map[index] != key) { // this will be executed only very rarely
434             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
435             map[index]= key;
436         }
437     }
438 
439     for(;;){
440         int d;
441         const int dir= next_dir;
442         const int x= best[0];
443         const int y= best[1];
444         next_dir=-1;
445 
446         if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
447         if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
448         if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
449         if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
450 
451         if(next_dir==-1){
452             return dmin;
453         }
454     }
455 }
456 
funny_diamond_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)457 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
458                                        int src_index, int ref_index, const int penalty_factor,
459                                        int size, int h, int flags)
460 {
461     MotionEstContext * const c= &s->me;
462     me_cmp_func cmpf, chroma_cmpf;
463     int dia_size;
464     LOAD_COMMON
465     LOAD_COMMON2
466     unsigned map_generation = c->map_generation;
467 
468     cmpf        = s->mecc.me_cmp[size];
469     chroma_cmpf = s->mecc.me_cmp[size + 1];
470 
471     for(dia_size=1; dia_size<=4; dia_size++){
472         int dir;
473         const int x= best[0];
474         const int y= best[1];
475 
476         if(dia_size&(dia_size-1)) continue;
477 
478         if(   x + dia_size > xmax
479            || x - dia_size < xmin
480            || y + dia_size > ymax
481            || y - dia_size < ymin)
482            continue;
483 
484         for(dir= 0; dir<dia_size; dir+=2){
485             int d;
486 
487             CHECK_MV(x + dir           , y + dia_size - dir);
488             CHECK_MV(x + dia_size - dir, y - dir           );
489             CHECK_MV(x - dir           , y - dia_size + dir);
490             CHECK_MV(x - dia_size + dir, y + dir           );
491         }
492 
493         if(x!=best[0] || y!=best[1])
494             dia_size=0;
495     }
496     return dmin;
497 }
498 
hex_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags,int dia_size)499 static int hex_search(MpegEncContext * s, int *best, int dmin,
500                                        int src_index, int ref_index, const int penalty_factor,
501                                        int size, int h, int flags, int dia_size)
502 {
503     MotionEstContext * const c= &s->me;
504     me_cmp_func cmpf, chroma_cmpf;
505     LOAD_COMMON
506     LOAD_COMMON2
507     unsigned map_generation = c->map_generation;
508     int x,y,d;
509     const int dec= dia_size & (dia_size-1);
510 
511     cmpf        = s->mecc.me_cmp[size];
512     chroma_cmpf = s->mecc.me_cmp[size + 1];
513 
514     for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
515         do{
516             x= best[0];
517             y= best[1];
518 
519             CHECK_CLIPPED_MV(x  -dia_size    , y);
520             CHECK_CLIPPED_MV(x+  dia_size    , y);
521             CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
522             CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
523             if(dia_size>1){
524                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
525                 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
526             }
527         }while(best[0] != x || best[1] != y);
528     }
529 
530     return dmin;
531 }
532 
l2s_dia_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)533 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
534                                        int src_index, int ref_index, const int penalty_factor,
535                                        int size, int h, int flags)
536 {
537     MotionEstContext * const c= &s->me;
538     me_cmp_func cmpf, chroma_cmpf;
539     LOAD_COMMON
540     LOAD_COMMON2
541     unsigned map_generation = c->map_generation;
542     int x,y,i,d;
543     int dia_size= c->dia_size&0xFF;
544     const int dec= dia_size & (dia_size-1);
545     static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
546                                 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
547 
548     cmpf        = s->mecc.me_cmp[size];
549     chroma_cmpf = s->mecc.me_cmp[size + 1];
550 
551     for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
552         do{
553             x= best[0];
554             y= best[1];
555             for(i=0; i<8; i++){
556                 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
557             }
558         }while(best[0] != x || best[1] != y);
559     }
560 
561     x= best[0];
562     y= best[1];
563     CHECK_CLIPPED_MV(x+1, y);
564     CHECK_CLIPPED_MV(x, y+1);
565     CHECK_CLIPPED_MV(x-1, y);
566     CHECK_CLIPPED_MV(x, y-1);
567 
568     return dmin;
569 }
570 
umh_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)571 static int umh_search(MpegEncContext * s, int *best, int dmin,
572                                        int src_index, int ref_index, const int penalty_factor,
573                                        int size, int h, int flags)
574 {
575     MotionEstContext * const c= &s->me;
576     me_cmp_func cmpf, chroma_cmpf;
577     LOAD_COMMON
578     LOAD_COMMON2
579     unsigned map_generation = c->map_generation;
580     int x,y,x2,y2, i, j, d;
581     const int dia_size= c->dia_size&0xFE;
582     static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
583                                  { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
584                                  {-2, 3}, { 0, 4}, { 2, 3},
585                                  {-2,-3}, { 0,-4}, { 2,-3},};
586 
587     cmpf        = s->mecc.me_cmp[size];
588     chroma_cmpf = s->mecc.me_cmp[size + 1];
589 
590     x= best[0];
591     y= best[1];
592     for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
593         CHECK_MV(x2, y);
594     }
595     for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
596         CHECK_MV(x, y2);
597     }
598 
599     x= best[0];
600     y= best[1];
601     for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
602         for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
603             CHECK_MV(x2, y2);
604         }
605     }
606 
607 //FIXME prevent the CLIP stuff
608 
609     for(j=1; j<=dia_size/4; j++){
610         for(i=0; i<16; i++){
611             CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
612         }
613     }
614 
615     return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
616 }
617 
full_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)618 static int full_search(MpegEncContext * s, int *best, int dmin,
619                                        int src_index, int ref_index, const int penalty_factor,
620                                        int size, int h, int flags)
621 {
622     MotionEstContext * const c= &s->me;
623     me_cmp_func cmpf, chroma_cmpf;
624     LOAD_COMMON
625     LOAD_COMMON2
626     unsigned map_generation = c->map_generation;
627     int x,y, d;
628     const int dia_size= c->dia_size&0xFF;
629 
630     cmpf        = s->mecc.me_cmp[size];
631     chroma_cmpf = s->mecc.me_cmp[size + 1];
632 
633     for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
634         for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
635             CHECK_MV(x, y);
636         }
637     }
638 
639     x= best[0];
640     y= best[1];
641     d= dmin;
642     CHECK_CLIPPED_MV(x  , y);
643     CHECK_CLIPPED_MV(x+1, y);
644     CHECK_CLIPPED_MV(x, y+1);
645     CHECK_CLIPPED_MV(x-1, y);
646     CHECK_CLIPPED_MV(x, y-1);
647     best[0]= x;
648     best[1]= y;
649 
650     return d;
651 }
652 
653 #define SAB_CHECK_MV(ax,ay)\
654 {\
655     const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
656     const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
657     if(map[index]!=key){\
658         d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
659         map[index]= key;\
660         score_map[index]= d;\
661         d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
662         if(d < minima[minima_count-1].height){\
663             int j=0;\
664             \
665             while(d >= minima[j].height) j++;\
666 \
667             memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
668 \
669             minima[j].checked= 0;\
670             minima[j].height= d;\
671             minima[j].x= ax;\
672             minima[j].y= ay;\
673             \
674             i=-1;\
675             continue;\
676         }\
677     }\
678 }
679 
680 #define MAX_SAB_SIZE ME_MAP_SIZE
sab_diamond_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)681 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
682                                        int src_index, int ref_index, const int penalty_factor,
683                                        int size, int h, int flags)
684 {
685     MotionEstContext * const c= &s->me;
686     me_cmp_func cmpf, chroma_cmpf;
687     Minima minima[MAX_SAB_SIZE];
688     const int minima_count= FFABS(c->dia_size);
689     int i, j;
690     LOAD_COMMON
691     LOAD_COMMON2
692     unsigned map_generation = c->map_generation;
693 
694     av_assert1(minima_count <= MAX_SAB_SIZE);
695 
696     cmpf        = s->mecc.me_cmp[size];
697     chroma_cmpf = s->mecc.me_cmp[size + 1];
698 
699     /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
700       become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
701      */
702     for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
703         uint32_t key= map[i];
704 
705         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
706 
707         if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation)
708             continue;
709 
710         minima[j].height= score_map[i];
711         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
712         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
713         minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
714         minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
715 
716         // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
717         if(   minima[j].x > xmax || minima[j].x < xmin
718            || minima[j].y > ymax || minima[j].y < ymin)
719             continue;
720 
721         minima[j].checked=0;
722         if(minima[j].x || minima[j].y)
723             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
724 
725         j++;
726     }
727 
728     AV_QSORT(minima, j, Minima, minima_cmp);
729 
730     for(; j<minima_count; j++){
731         minima[j].height=256*256*256*64;
732         minima[j].checked=0;
733         minima[j].x= minima[j].y=0;
734     }
735 
736     for(i=0; i<minima_count; i++){
737         const int x= minima[i].x;
738         const int y= minima[i].y;
739         int d;
740 
741         if(minima[i].checked) continue;
742 
743         if(   x >= xmax || x <= xmin
744            || y >= ymax || y <= ymin)
745            continue;
746 
747         SAB_CHECK_MV(x-1, y)
748         SAB_CHECK_MV(x+1, y)
749         SAB_CHECK_MV(x  , y-1)
750         SAB_CHECK_MV(x  , y+1)
751 
752         minima[i].checked= 1;
753     }
754 
755     best[0]= minima[0].x;
756     best[1]= minima[0].y;
757     dmin= minima[0].height;
758 
759     if(   best[0] < xmax && best[0] > xmin
760        && best[1] < ymax && best[1] > ymin){
761         int d;
762         // ensure that the reference samples for hpel refinement are in the map
763         CHECK_MV(best[0]-1, best[1])
764         CHECK_MV(best[0]+1, best[1])
765         CHECK_MV(best[0], best[1]-1)
766         CHECK_MV(best[0], best[1]+1)
767     }
768     return dmin;
769 }
770 
var_diamond_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)771 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
772                                        int src_index, int ref_index, const int penalty_factor,
773                                        int size, int h, int flags)
774 {
775     MotionEstContext * const c= &s->me;
776     me_cmp_func cmpf, chroma_cmpf;
777     int dia_size;
778     LOAD_COMMON
779     LOAD_COMMON2
780     unsigned map_generation = c->map_generation;
781 
782     cmpf        = s->mecc.me_cmp[size];
783     chroma_cmpf = s->mecc.me_cmp[size + 1];
784 
785     for(dia_size=1; dia_size<=c->dia_size; dia_size++){
786         int dir, start, end;
787         const int x= best[0];
788         const int y= best[1];
789 
790         start= FFMAX(0, y + dia_size - ymax);
791         end  = FFMIN(dia_size, xmax - x + 1);
792         for(dir= start; dir<end; dir++){
793             int d;
794 
795 //check(x + dir,y + dia_size - dir,0, a0)
796             CHECK_MV(x + dir           , y + dia_size - dir);
797         }
798 
799         start= FFMAX(0, x + dia_size - xmax);
800         end  = FFMIN(dia_size, y - ymin + 1);
801         for(dir= start; dir<end; dir++){
802             int d;
803 
804 //check(x + dia_size - dir, y - dir,0, a1)
805             CHECK_MV(x + dia_size - dir, y - dir           );
806         }
807 
808         start= FFMAX(0, -y + dia_size + ymin );
809         end  = FFMIN(dia_size, x - xmin + 1);
810         for(dir= start; dir<end; dir++){
811             int d;
812 
813 //check(x - dir,y - dia_size + dir,0, a2)
814             CHECK_MV(x - dir           , y - dia_size + dir);
815         }
816 
817         start= FFMAX(0, -x + dia_size + xmin );
818         end  = FFMIN(dia_size, ymax - y + 1);
819         for(dir= start; dir<end; dir++){
820             int d;
821 
822 //check(x - dia_size + dir, y + dir,0, a3)
823             CHECK_MV(x - dia_size + dir, y + dir           );
824         }
825 
826         if(x!=best[0] || y!=best[1])
827             dia_size=0;
828     }
829     return dmin;
830 }
831 
diamond_search(MpegEncContext * s,int * best,int dmin,int src_index,int ref_index,const int penalty_factor,int size,int h,int flags)832 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833                                        int src_index, int ref_index, const int penalty_factor,
834                                        int size, int h, int flags){
835     MotionEstContext * const c= &s->me;
836     if(c->dia_size==-1)
837         return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838     else if(c->dia_size<-1)
839         return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840     else if(c->dia_size<2)
841         return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842     else if(c->dia_size>1024)
843         return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844     else if(c->dia_size>768)
845         return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
846     else if(c->dia_size>512)
847         return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
848     else if(c->dia_size>256)
849         return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850     else
851         return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
852 }
853 
854 /**
855    @param P a list of candidate mvs to check before starting the
856    iterative search. If one of the candidates is close to the optimal mv, then
857    it takes fewer iterations. And it increases the chance that we find the
858    optimal mv.
859  */
epzs_motion_search_internal(MpegEncContext * s,int * mx_ptr,int * my_ptr,int P[10][2],int src_index,int ref_index,int16_t (* last_mv)[2],int ref_mv_scale,int flags,int size,int h)860 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861                              int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
862                              int ref_mv_scale, int flags, int size, int h)
863 {
864     MotionEstContext * const c= &s->me;
865     int best[2]={0, 0};      /**< x and y coordinates of the best motion vector.
866                                i.e. the difference between the position of the
867                                block currently being encoded and the position of
868                                the block chosen to predict it from. */
869     int d;                   ///< the score (cmp + penalty) of any given mv
870     int dmin;                /**< the best value of d, i.e. the score
871                                corresponding to the mv stored in best[]. */
872     unsigned map_generation;
873     int penalty_factor;
874     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
875     const int ref_mv_xy = s->mb_x + s->mb_y * ref_mv_stride; // add to last_mv before passing FIXME
876     me_cmp_func cmpf, chroma_cmpf;
877 
878     LOAD_COMMON
879     LOAD_COMMON2
880 
881     if(c->pre_pass){
882         penalty_factor= c->pre_penalty_factor;
883         cmpf           = s->mecc.me_pre_cmp[size];
884         chroma_cmpf    = s->mecc.me_pre_cmp[size + 1];
885     }else{
886         penalty_factor= c->penalty_factor;
887         cmpf           = s->mecc.me_cmp[size];
888         chroma_cmpf    = s->mecc.me_cmp[size + 1];
889     }
890 
891     map_generation= update_map_generation(c);
892 
893     av_assert2(cmpf);
894     dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
895     map[0]= map_generation;
896     score_map[0]= dmin;
897 
898     //FIXME precalc first term below?
899     if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
900         s->mpv_flags & FF_MPV_FLAG_MV0)
901         dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
902 
903     /* first line */
904     if (s->first_slice_line) {
905         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
906         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
907                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908     }else{
909         if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
910                     && ( P_LEFT[0]    |P_LEFT[1]
911                         |P_TOP[0]     |P_TOP[1]
912                         |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
913             *mx_ptr= 0;
914             *my_ptr= 0;
915             c->skip=1;
916             return dmin;
917         }
918         CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
919         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
920         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
921         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
922         CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
923         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
924                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
925         CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
926         CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
927         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
928     }
929     if(dmin>h*h*4){
930         if(c->pre_pass){
931             CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
932                             (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
933             if(!s->first_slice_line)
934                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
935                                 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936         }else{
937             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
938                             (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
939             if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
940                 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
941                                 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
942         }
943     }
944 
945     if(c->avctx->last_predictor_count){
946         const int count= c->avctx->last_predictor_count;
947         const int xstart= FFMAX(0, s->mb_x - count);
948         const int ystart= FFMAX(0, s->mb_y - count);
949         const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
950         const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
951         int mb_y;
952 
953         for(mb_y=ystart; mb_y<yend; mb_y++){
954             int mb_x;
955             for(mb_x=xstart; mb_x<xend; mb_x++){
956                 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
957                 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
958                 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959 
960                 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
961                 CHECK_MV(mx,my)
962             }
963         }
964     }
965 
966 //check(best[0],best[1],0, b0)
967     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968 
969 //check(best[0],best[1],0, b1)
970     *mx_ptr= best[0];
971     *my_ptr= best[1];
972 
973     return dmin;
974 }
975 
976 //this function is dedicated to the brain damaged gcc
ff_epzs_motion_search(MpegEncContext * s,int * mx_ptr,int * my_ptr,int P[10][2],int src_index,int ref_index,int16_t (* last_mv)[2],int ref_mv_scale,int size,int h)977 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
978                           int P[10][2], int src_index, int ref_index,
979                           int16_t (*last_mv)[2], int ref_mv_scale,
980                           int size, int h)
981 {
982     MotionEstContext * const c= &s->me;
983 //FIXME convert other functions in the same way if faster
984     if(c->flags==0 && h==16 && size==0){
985         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986 //    case FLAG_QPEL:
987 //        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988     }else{
989         return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
990     }
991 }
992 
epzs_motion_search2(MpegEncContext * s,int * mx_ptr,int * my_ptr,int P[10][2],int src_index,int ref_index,int16_t (* last_mv)[2],int ref_mv_scale,const int size)993 static int epzs_motion_search2(MpegEncContext * s,
994                              int *mx_ptr, int *my_ptr, int P[10][2],
995                              int src_index, int ref_index, int16_t (*last_mv)[2],
996                              int ref_mv_scale, const int size)
997 {
998     MotionEstContext * const c= &s->me;
999     int best[2]={0, 0};
1000     int d, dmin;
1001     unsigned map_generation;
1002     const int penalty_factor= c->penalty_factor;
1003     const int h=8;
1004     const int ref_mv_stride= s->mb_stride;
1005     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006     me_cmp_func cmpf, chroma_cmpf;
1007     LOAD_COMMON
1008     int flags= c->flags;
1009     LOAD_COMMON2
1010 
1011     cmpf        = s->mecc.me_cmp[size];
1012     chroma_cmpf = s->mecc.me_cmp[size + 1];
1013 
1014     map_generation= update_map_generation(c);
1015 
1016     dmin = 1000000;
1017 
1018     /* first line */
1019     if (s->first_slice_line) {
1020         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024     }else{
1025         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026         //FIXME try some early stop
1027         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029         CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030         CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031         CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1033     }
1034     if(dmin>64*4){
1035         CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1038             CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1040     }
1041 
1042     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1043 
1044     *mx_ptr= best[0];
1045     *my_ptr= best[1];
1046 
1047     return dmin;
1048 }
1049