1 /*
2  * $Id: y4mstabilizer.c,v 1.12 2009/09/19 19:01:43 sms00 Exp $
3  *
4  * written by J. Macropol <jm@wx.gd-ais.com>
5  *	Framework and shifting code adapted from y4mshift by Steve Schultz.
6  *	Motion detection was adapted from yuvdenoise.
7  *
8  * Program to stabilize a video stream.
9  * Works only non-interlaced yuv4mpeg streams for now.
10  *
11  * Usage: y4mstabilizer [-v] [-a <alpha>] [-r <srchRadius>]
12  *
13  *	-v		Verbose.  Add additional -v to increase verbosity/debug
14  *	-a <alpha>	The alpha value is a "viscosity" measure (see below).
15  *	-r <srchRadius>	How far to look for movement.
16  *	-s <stride>	How far apart the motion search points are.
17  *	-n		Do not supersample the chroma to get 1-pixel shifts.
18  *	-i		Try alternate interlaced mode (does not work yet)
19  *
20  * Legal <alpha> values are beween 0 and 1.
21  * Useful <alpha> values are beween 0.7 (or so) and .95.
22  * Higher values resist panning more at the expense of greater
23  * shifting.  <alpha> defaults to 0.95, a fairly high value, on the theory
24  * that you probably have some significant vibrations that need smoothing.
25  * Separate alpha values for the X- and Y-axes can be specified by separating
26  * them with a colon.  For example,
27  * 	-a 0.7:0.9
28  * would set the X-axis alpha to 0.7, and the Y-axis alpha to 0.9.  Thus,
29  * the stabilizer would be much more responsive to side-to-side movement,
30  * while resisting up-and-down motion.
31  *
32  * The <srchRadius> defaults to 15.  Smaller values speed things up,
33  * but won't be able to cope with large/fast movements as well.
34  *
35  * The <stride> defaults to 48 pixels.  Giving a larger number here will
36  * speed the process tremendously, but makes it easier for the motion search
37  * to be fooled by local movement.  Use a smaller number if the search seems
38  * to be following local movements.
39  *
40  * No file arguments are needed since this is a filter only program.
41  *
42  * TODO:
43  * 	Get alternate interlace method working.
44  * 	Get chroma super/subsampleing working better.
45  */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <stdio.h>
52 #include <sys/types.h>
53 #include <stdlib.h>
54 #include <unistd.h>
55 #include <string.h>
56 #include <limits.h>
57 
58 #include "yuv4mpeg.h"
59 #include "subsample.h"
60 
61 struct
62     {
63     int		verbose;	/* Talkative flag */
64     int		nosuper;	/* Flag not to supersample chroma on shift */
65     int		rad, diam;	/* Search radius and diameter */
66     int		stride;		/* Stride between motion points */
67     float	alphax;		/* X Viscosity */
68     float	alphay;		/* Y Viscosity */
69     float	gsX, gsY;	/* Accumulated shift */
70     int		ss_h, ss_v;	/* UV decimation factors */
71     } Stab;
72 #define SS_H Stab.ss_h
73 #define SS_V Stab.ss_v
74 
75 typedef struct { int x, y; } vec;
76 
77 static void usage(void);
78 static void alloc_yuv(u_char**, int, int);
79 static void subsample(uint8_t*, uint8_t*, int, int);
80 static void gmotion(u_char**, u_char**, int, int, int, vec*);
81 static void motion(u_char*, u_char*, int, int, int, int, vec*);
82 static void motion0(u_char*, u_char*, int, int, int, vec*);
83 static uint32_t calc_SAD_noaccel(uint8_t*, uint8_t*, int, int);
84 static uint32_t calc_SAD_half_noaccel(uint8_t*, uint8_t*, uint8_t*, int, int);
85 static void calcshift(vec*, vec*);
86 static int xround(float, int);
87 static void doshift(u_char**, u_char**, int, int, int, vec*);
88 static void hshift(u_char*, u_char*, int, int, int, int, int);
89 static void vertical_shift(u_char*, int, int, int, int, int);
90 
91 int
main(int argc,char ** argv)92 main (int argc, char **argv)
93     {
94     int i, c, width, height, frames, err;
95     vec g, shift;
96     int interlace, iflag = 0, chroma_ss;
97     u_char *yuv0[10], *yuv1[10], *yuv2[10], *line1;
98     y4m_stream_info_t istream, ostream;
99     y4m_frame_info_t iframe;
100     int fdin = fileno(stdin);
101 
102     Stab.rad = 15;		/* Default search radius */
103     Stab.stride = 48;		/* Default stride between motion points */
104     Stab.alphax = Stab.alphay = 0.95; /* Default viscosity */
105 
106     y4m_accept_extensions(1);
107 
108     opterr = 0;
109     while ((c = getopt(argc, argv, "va:r:bis:n")) != EOF)
110 	switch  (c)
111 	    {
112 	  case  'n':
113 	    Stab.nosuper = 1;
114 	    break;
115 	  case  'i':
116 	    iflag |= 0200;
117 	    break;
118 	  case  'v':
119 	    Stab.verbose++;
120 	    break;
121 	  case 's':
122 	    Stab.stride = atoi(optarg);
123 	    break;
124 	  case 'a':
125 	    if (strchr(optarg, ':'))
126 		{
127 		if (sscanf(optarg, "%g:%g", &Stab.alphax, &Stab.alphay) != 2)
128 		    usage();
129 		}
130 	    else
131 		{
132 		if (sscanf(optarg, "%g", &Stab.alphax) != 1)
133 		    usage();
134 		Stab.alphay = Stab.alphax;
135 		}
136 	    break;
137 	  case 'r':
138 	    Stab.rad = atoi(optarg);
139 	    break;
140 	  case    '?':
141 	  case    'h':
142 	  default:
143 	    usage();
144 	    }
145 
146     /* Initialize your input stream */
147     y4m_init_stream_info(&istream);
148     y4m_init_frame_info(&iframe);
149     err = y4m_read_stream_header(fdin, &istream);
150     if (err != Y4M_OK)
151 	mjpeg_error_exit1("Input stream error: %s\n", y4m_strerr(err));
152     if (y4m_si_get_plane_count(&istream) != 3)
153 	mjpeg_error_exit1("Only 3 plane formats supported");
154     switch (interlace = y4m_si_get_interlace(&istream))
155 	{
156       case Y4M_ILACE_NONE:
157 	break;
158       case Y4M_ILACE_TOP_FIRST:
159       case Y4M_ILACE_BOTTOM_FIRST:
160 	interlace |= iflag;
161 	break;
162       case Y4M_ILACE_MIXED:
163 	mjpeg_error_exit1("No mixed-interlaced streams!\n");
164       default:
165 	mjpeg_error_exit1("Unknown interlace!\n");
166 	}
167     chroma_ss = y4m_si_get_chroma(&istream);
168     SS_H = y4m_chroma_ss_x_ratio(chroma_ss).d;
169     SS_V = y4m_chroma_ss_y_ratio(chroma_ss).d;
170     switch (chroma_ss)
171 	{
172       case Y4M_CHROMA_420JPEG:
173       case Y4M_CHROMA_420MPEG2:
174       case Y4M_CHROMA_444:
175 	break;
176       case Y4M_CHROMA_MONO:
177 	 mjpeg_error_exit1("MONO (1 plane) chroma not supported!\n");
178       case Y4M_CHROMA_444ALPHA:
179 	 mjpeg_error_exit1("444ALPHA (4 plane) chroma not supported!\n");
180       default:
181 	if (!Stab.nosuper)
182 	    mjpeg_info("Cannot supersample %s chroma",
183 		y4m_chroma_description(chroma_ss));
184 	Stab.nosuper = 1;
185 	break;
186 	}
187     width = y4m_si_get_width(&istream);
188     height = y4m_si_get_height(&istream);
189     if (Stab.verbose)
190 	y4m_log_stream_info(mjpeg_loglev_t("info"), "", &istream);
191 
192     /* Initialize output stream */
193     y4m_init_stream_info(&ostream);
194     y4m_copy_stream_info(&ostream, &istream);
195     y4m_write_stream_header(fileno(stdout), &ostream);
196 
197     /* Allocate our frame arrays */
198     alloc_yuv(yuv0, height, width);
199     alloc_yuv(yuv1, height, width);
200     alloc_yuv(yuv2, height, width);
201 
202     /* Set up the search diameter. */
203     Stab.diam = Stab.rad + Stab.rad + 1;
204 
205     /* Fetch 1st frame - nothing to compare, so just copy it out.
206      * (Note that this is not strictly true if we have interlace
207      * and use the -i modified mode where we treat the fields separately.
208      * But I am *SURE* nobody will notice...  err...) */
209     frames = 1;
210     if (y4m_read_frame(fdin,&istream,&iframe,yuv0) != Y4M_OK)
211 	goto endit;
212     subsample(yuv0[0], yuv0[3], width, height);
213     subsample(yuv0[3], yuv0[4], width/2, height/2);
214     y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv0);
215     for (; y4m_read_frame(fdin,&istream,&iframe,yuv1) == Y4M_OK; frames++)
216 	{
217 	if ((Stab.verbose > 1) || (Stab.verbose && ((frames % 100) == 0)))
218 	    mjpeg_info("Frame %d", frames);
219 	subsample(yuv1[0], yuv1[3], width, height);
220 	subsample(yuv1[3], yuv1[4], width/2, height/2);
221 	switch (interlace)
222 	    {
223 	  /* Easy - non-interlaced */
224 	  case Y4M_ILACE_NONE:
225 	    /* Find out how much this frame has changed from the previous */
226 	    gmotion(yuv0, yuv1, width, width, height, &g);
227 	    /* Figure out how much to shift this frame to compensate */
228 	    calcshift(&g, &shift);
229 	    /* If nothing to shift, just dump this frame and continue */
230 	    if ((shift.x == 0) && (shift.y == 0))
231 		y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv1);
232 	    /* Else shift frame & write it out */
233 	    else
234 		{
235 		doshift(yuv1, yuv2, height, width, width, &shift);
236 		y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv2);
237 		}
238 	    break;
239 	  /* Default interlaced method.
240 	   * Treat fields as one wide field & shift both the same.  */
241 	  case Y4M_ILACE_TOP_FIRST    | 0:
242 	  case Y4M_ILACE_BOTTOM_FIRST | 0:
243 	    /* Find out how much this frame has changed from the previous */
244 	    gmotion(yuv0, yuv1, width*2, width*2, height/2, &g);
245 	    /* Figure out how much to shift this frame to compensate */
246 	    calcshift(&g, &shift);
247 	    /* If nothing to shift, just dump this frame and continue */
248 	    if ((shift.x == 0) && (shift.y == 0))
249 		y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv1);
250 	    /* Shift the fields separately & write the frame */
251 	    else
252 		{
253 		doshift(yuv1,   yuv2,   height/2, width,width*2,&shift);
254 		doshift(yuv1+5, yuv2+5, height/2, width,width*2,&shift);
255 		y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv2);
256 		}
257 	    break;
258 	    /* Alternate interlaced method:
259 	     * Treat fields as separate frames, one half pixel apart vertically. */
260 	  case Y4M_ILACE_TOP_FIRST    | 0200:
261 	    /* Last bottom half -> Top half */
262 	    gmotion(yuv0+5, yuv1, width, width*2, height/2, &g);
263 	    g.y += 0.5;
264 	    calcshift(&g, &shift);
265 	    doshift(yuv1, yuv2, height/2, width, width*2, &shift);
266 	    /* Top half -> Bottom half */
267 	    gmotion(yuv1, yuv1+5, width, width*2, height/2, &g);
268 	    g.y -= 0.5;
269 	    calcshift(&g, &shift);
270 	    doshift(yuv1+5, yuv2+5, height/2, width, width*2, &shift);
271 	    y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv2);
272 	    break;
273 	  case Y4M_ILACE_BOTTOM_FIRST | 0200:
274 	    /* Last top half -> Bottom half */
275 	    gmotion(yuv0, yuv1+5, width, width*2, height/2, &g);
276 	    g.y -= 0.5;
277 	    calcshift(&g, &shift);
278 	    doshift(yuv1+5, yuv2+5, height/2, width, width*2, &shift);
279 	    /* Bottom half -> Top half */
280 	    gmotion(yuv1+5, yuv1, width, width*2, height/2, &g);
281 	    g.y += 0.5;
282 	    calcshift(&g, &shift);
283 	    doshift(yuv1, yuv2, height/2, width, width*2, &shift);
284 	    y4m_write_frame(fileno(stdout), &ostream, &iframe, yuv2);
285 	    break;
286 	    }
287 	/* swap yuv0 and yuv1, so yuv1 becomes the old reference frame
288 	 * for the motion search. */
289 	for (i = 0; i < 10; i++)
290 	    { line1 = yuv0[i]; yuv0[i] = yuv1[i]; yuv1[i] = line1; }
291 	}
292     /* All done - close out the streams and exit */
293 endit:
294     y4m_fini_frame_info(&iframe);
295     y4m_fini_stream_info(&istream);
296     y4m_fini_stream_info(&ostream);
297     exit(0);
298     }
299 
300 static void
usage(void)301 usage (void)
302         {
303 	fputs(
304 "Program to stabilize a video stream.\n"
305 "Works only non-interlaced yuv4mpeg streams for now.\n"
306 "\n"
307 "Usage: y4mstabilizer [-v] [-a <alpha>] [-r <srchRadius>]\n"
308 "\n"
309 "	-v		Verbose.  Repeat -v to increase verbosity/debug info\n"
310 "	-a <alpha>	A \"viscosity\" measure (see below).\n"
311 "	-r <srchRadius>	How far to look for movement.\n"
312 "	-s <stride>	How far apart the motion search points are.\n"
313 "	-n		Do not supersample the chroma to get 1-pixel shifts.\n"
314 "	-i		Try alternate interlaced mode (does not work yet)\n"
315 "\n"
316 "Legal <alpha> values are beween 0 and 1.\n"
317 "Useful <alpha> values are beween 0.7 (or so) and .95.\n"
318 "Higher values resist panning more at the expense of greater\n"
319 "shifting.  <alpha> defaults to 0.95, a fairly high value, on the theory\n"
320 "that you probably have some significant vibrations that need smoothing.\n"
321 "Separate alpha values for the X- and Y-axes can be specified by separating\n"
322 "them with a colon.  For example,\n"
323 "	-a 0.7:0.9\n"
324 "would set the X-axis alpha to 0.7, and the Y-axis alpha to 0.9.  Thus,\n"
325 "the stabilizer would be much more responsive to side-to-side movement,\n"
326 "while resisting up-and-down motion.\n"
327 "\n"
328 "The <srchRadius> defaults to 15.  Smaller values speed things up,\n"
329 "but won't be able to cope with large/fast movements as well.\n"
330 "\n"
331 "The <stride> defaults to 48 pixels.  Giving a larger number here will\n"
332 "speed the process tremendously, but makes it easier for the motion search\n"
333 "to be fooled by local movement.  Use a smaller number if the search seems\n"
334 "to be following local movements.\n"
335 "\n"
336 "No file arguments are needed since this is a filter only program.\n"
337 "\n"
338 "This program presently works best when given 444, deinterlaced input.\n"
339 "Very good results can be obtained with the following pipeline:\n"
340 "    ... | yuvdeinterlace | \\\n"
341 "	   y4mscaler -v 0 -O sar=src -O chromass=444 | \\\n"
342 "	   y4mstabilizer | \\\n"
343 "	   y4mscaler -v 0 -O sar=src -O chromass=420_MPEG2 | ...\n"
344 , stderr);
345 exit(1);
346 }
347 
348 static void
alloc_yuv(u_char ** yuv,int h,int w)349 alloc_yuv (u_char **yuv, int h, int w)
350 {
351 int len = h * w * 2;	/* Double the amount - overkill but it's easier than figuring out how much (off by one?) is really needed */
352 int uvlen = Stab.nosuper ? (len / (SS_H * SS_V)) : len;
353 yuv[0] = malloc(len);
354 if (yuv[0] == NULL)
355 mjpeg_error_exit1(" malloc(%d) failed\n", len);
356 yuv[1] = malloc(uvlen);
357 if (yuv[1] == NULL)
358 mjpeg_error_exit1(" malloc(%d) failed\n", uvlen);
359 yuv[2] = malloc(uvlen);
360 if (yuv[2] == NULL)
361 mjpeg_error_exit1(" malloc(%d) failed\n", uvlen);
362 yuv[3] = malloc(len/4);
363 if (yuv[3] == NULL)
364 mjpeg_error_exit1(" malloc(%d) failed\n", len/4);
365 yuv[4] = malloc(len/16);
366 if (yuv[4] == NULL)
367 mjpeg_error_exit1(" malloc(%d) failed\n", len/16);
368 yuv[5] = yuv[0] + w;
369 yuv[6] = yuv[1] + w/SS_H;
370 yuv[7] = yuv[2] + w/SS_H;
371 yuv[8] = yuv[3] + w/2;
372 yuv[9] = yuv[3] + w/4;
373 }
374 
375 /*****************************************************************************
376 * generate a lowpassfiltered and subsampled copy                            *
377 * of the source image (src) at the destination                              *
378 * image location.                                                           *
379 * Lowpass-filtering is important, as this subsampled                        *
380 * image is used for motion estimation and the result                        *
381 * is more reliable when filtered.                                           *
382 * only subsample actual data, but keeping full buffer size for simplicity   *
383 *****************************************************************************/
384 static void
subsample(uint8_t * src,uint8_t * dst,int w,int h)385 subsample (uint8_t *src, uint8_t *dst, int w, int h)
386 {
387 int c, x, w2 = w / 2;
388 uint8_t *s1 = src;
389 uint8_t *s2 = src + w;
390 for (h /= 2; h >= 0; h--)
391 {
392 for (x = 0; x < w2; x++)
393 {
394 c = *s1++ + *s2++;
395 c += *s1++ + *s2++;
396 *dst++ = c >> 2;
397 }
398 s1 += w;
399 s2 += w;
400 }
401 }
402 
403 /*
404 * Determine global motion.
405 * Note that only the Y-plane is used.
406 * The global motion is taken as the median of the individual motions.
407 *
408 * Note that w (frame width) should equal ws (frame width stride) unless
409 * we are treating an interlaced frame as two subframes, in which case
410 * ws should be twice w.
411 */
412 static void
gmotion(u_char ** y0,u_char ** y1,int w,int ws,int h,vec * dij)413 gmotion (u_char **y0, u_char **y1, int w, int ws, int h, vec *dij)
414 {
415 int i, j, di, dj;
416 int we = w - (Stab.rad+8);
417 int he = h - (Stab.rad+8);
418 int xs[Stab.diam+Stab.diam], ys[Stab.diam+Stab.diam], t = 0;
419 vec ij;
420 bzero(xs, sizeof xs);
421 bzero(ys, sizeof ys);
422 /* Determine local motions for all blocks */
423 for (i = Stab.rad; i < we; i += Stab.stride)
424 for (j = Stab.rad; j < he; j += Stab.stride)
425 {
426 ij.x = i/4; ij.y = j/4;
427 motion(y0[4], y1[4], ws/4, Stab.rad/4, i/4,  j/4,  &ij);
428 ij.x += ij.x; ij.y += ij.y;
429 motion(y0[3], y1[3], ws/2, 3,          i/2, j/2, &ij);
430 ij.x += ij.x; ij.y += ij.y;
431 motion(y0[0], y1[0], ws,   3,          i,   j,   &ij);
432 motion0(y0[0],y1[0], ws,               i,   j,   &ij);
433 di = ij.x - (i+i); dj = ij.y - (j+j);
434 /*if ((abs(di) <= Stab.rad) && (abs(dj) <= Stab.rad))*/
435 {
436 xs[di+Stab.diam]++;
437 ys[dj+Stab.diam]++;
438 t++;
439 }
440 }
441 /* Determine median motions */
442 t /= 2;
443 for (di = i = 0; di < Stab.diam+Stab.diam; i += xs[di++])
444 if (i >= t)
445 break;
446 dij->x = di - Stab.diam;
447 for (dj = j = 0; dj < Stab.diam+Stab.diam; j += ys[dj++])
448 if (j >= t)
449 break;
450 dij->y = dj - Stab.diam;
451 }
452 
453 /*********************************************************************
454 *                                                                   *
455 * Estimate Motion Vectors                                           *
456 *                                                                   *
457 *********************************************************************/
458 static void
motion(u_char * y0,u_char * y1,int w,int r,int ri,int rj,vec * dij)459 motion (u_char *y0, u_char *y1, int w, int r, int ri, int rj, vec *dij)
460 {
461 uint32_t best_SAD=INT_MAX, SAD=INT_MAX;
462 int i = dij->x, j = dij->y;
463 int ii, jj;
464 y0 += (rj * w) + ri;
465 for (ii = -r; ii <= r; ii++)
466 for (jj = -r; jj <= r; jj++)
467 {
468 SAD = calc_SAD_noaccel(y0, y1 + (j+jj) * w + i+ii, w, best_SAD);
469 SAD += ii*ii + jj*jj; /* favour center matches... */
470 if (SAD <= best_SAD)
471 {
472 best_SAD = SAD;
473 dij->x = i + ii;
474 dij->y = j + jj;
475 }
476 }
477 }
478 
479 /*********************************************************************
480 *                                                                   *
481 * Estimate Motion Vectors in not subsampled frames                  *
482 *                                                                   *
483 *********************************************************************/
484 static void
motion0(u_char * y0,u_char * y1,int w,int ri,int rj,vec * dij)485 motion0 (u_char *y0, u_char *y1, int w, int ri, int rj, vec *dij)
486 {
487 uint32_t SAD, best_SAD = INT_MAX;
488 int adjw;
489 int i = dij->x, j = dij->y;
490 int ii, jj;
491 u_char *y1r = y1 + j*w + i;
492 
493 y0 += rj*w + ri;
494 y1 += (j-1)*w + i - 1;
495 adjw = w - 3;
496 for (jj = -1; jj <= 1; jj++, y1 += adjw)
497 for (ii = -1; ii <= 1; ii++, y1++)
498 {
499 SAD = calc_SAD_half_noaccel(y0, y1r, y1, w, best_SAD);
500 if (SAD < best_SAD)
501 {
502 best_SAD = SAD;
503 dij->x = ii+i+i-1;
504 dij->y = jj+j+j-1;
505 }
506 }
507 }
508 
509 /*********************************************************************
510 *                                                                   *
511 * SAD-function for Y without MMX/MMXE                               *
512 *                                                                   *
513 *********************************************************************/
514 static uint32_t
calc_SAD_noaccel(uint8_t * frm,uint8_t * ref,int w,int limit)515 calc_SAD_noaccel (uint8_t *frm, uint8_t *ref, int w, int limit)
516 {
517 uint32_t d = 0;
518 uint32_t adj = w - 8;
519 #define LINE \
520 d += abs(*frm++ - *ref++); d += abs(*frm++ - *ref++); \
521 d += abs(*frm++ - *ref++); d += abs(*frm++ - *ref++); \
522 d += abs(*frm++ - *ref++); d += abs(*frm++ - *ref++); \
523 d += abs(*frm++ - *ref++); d += abs(*frm++ - *ref++); \
524 if (d > limit) \
525 return INT_MAX; \
526 frm += adj; ref += adj
527 LINE; LINE; LINE; LINE;
528 LINE; LINE; LINE; LINE;
529 #undef LINE
530 return d;
531 }
532 
533 /*********************************************************************
534 *                                                                   *
535 * halfpel SAD-function for Y without MMX/MMXE                       *
536 *                                                                   *
537 *********************************************************************/
538 static uint32_t
calc_SAD_half_noaccel(uint8_t * ref,uint8_t * frm1,uint8_t * frm2,int w,int limit)539 calc_SAD_half_noaccel(uint8_t*ref, uint8_t*frm1, uint8_t*frm2, int w, int limit)
540 {
541 uint32_t d = 0;
542 uint32_t adj = w - 8;
543 #define LINE \
544 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
545 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
546 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
547 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
548 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
549 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
550 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
551 d += abs(((*frm1++ + *frm2++) >> 1) - *ref++); \
552 if (d > limit) \
553 return INT_MAX; \
554 frm1 += adj; frm2 += adj; ref += adj
555 LINE; LINE; LINE; LINE;
556 LINE; LINE; LINE; LINE;
557 #undef LINE
558 return d;
559 }
560 
561 static void
calcshift(vec * gp,vec * shftp)562 calcshift (vec *gp, vec *shftp)
563 {
564 int ss_h = Stab.nosuper ? SS_H : 1;
565 int ss_v = Stab.nosuper ? SS_V : 1;
566 /* gmotion() returns motion in half-pixels */
567 /* Factor in <alpha> the "viscosity"... */
568 Stab.gsX = (Stab.gsX * Stab.alphax) + gp->x/2.0;
569 Stab.gsY = (Stab.gsY * Stab.alphay) + gp->y/2.0;
570 /* Now that we know the movement, shift to counteract it */
571 shftp->x = -xround(Stab.gsX, ss_h);
572 shftp->y = -xround(Stab.gsY, ss_v);
573 if (Stab.verbose > 1)
574 mjpeg_info("global motion xy*2=<%d,%d>"
575 " Accumulated xy=<%g,%g> shift xy=%d,%d>\n",
576 gp->x, gp->y, Stab.gsX, Stab.gsY, shftp->x, shftp->y);
577 }
578 
579 /* Round the given float value to the nearest multiple of <r>. */
580 static int
xround(float v,int r)581 xround (float v, int r)
582 {
583 if (v < 0)
584 return (-xround(-v, r));
585 return (((int)((v + r/2.0) / r)) * r);
586 }
587 
588 /*
589 * Shift a frame.
590 * The frame is always copied to the destination.
591 *
592 * Note that w (frame width) should equal ws (frame width stride) unless
593 * we are treating an interlaced frame as two subframes, in which case
594 * ws should be twice w.
595 */
596 static void
doshift(u_char ** yuv1,u_char ** yuv2,int h,int w,int ws,vec * shift)597 doshift (u_char**yuv1, u_char**yuv2, int h, int w, int ws, vec *shift)
598 {
599 int dosuper = (shift->x % SS_H) | (shift->y % SS_V);
600 int ss_h = dosuper ? 1 : SS_H;
601 int ss_v = dosuper ? 1 : SS_V;
602 /* If we have to supersample the chroma, then do it now, before shifting */
603 if (dosuper)
604 chroma_supersample(Y4M_CHROMA_420JPEG, yuv1, w, h);
605 /* Do the horizontal shifting first.  The frame is shifted into
606 * the yuv2 frame. Even if there is no horizontal shifting to do,
607 * we copy the frame because the vertical shift is desctructive,
608 * and we want to preserve the yuv1 frame to compare against the
609 * next one. */
610 hshift(yuv1[0],yuv2[0],h,     w,     shift->x,      16, ws);
611 hshift(yuv1[1],yuv2[1],h/ss_v,w/ss_h,shift->x/ss_h,128, ws/ss_h);
612 hshift(yuv1[2],yuv2[2],h/ss_v,w/ss_h,shift->x/ss_h,128, ws/ss_h);
613 /* Vertical shift, then write the frame */
614 if (shift->y)
615 {
616 vertical_shift(yuv2[0], shift->y,      w,      ws,      h,      16);
617 vertical_shift(yuv2[1], shift->y/ss_v, w/ss_h, ws/ss_h, h/ss_v, 128);
618 vertical_shift(yuv2[2], shift->y/ss_v, w/ss_h, ws/ss_h, h/ss_v, 128);
619 }
620 /* Undo the supersampling */
621 if (dosuper)
622 chroma_subsample(Y4M_CHROMA_420JPEG, yuv1, w, h);
623 }
624 
625 /*
626 * Shift one plane of the frame by the given horizontal shift amount.
627 * The shifted frame is left in the <vdst> buffer.
628 */
629 static void
hshift(u_char * vsrc,u_char * vdst,int h,int w,int shift,int blk,int ws)630 hshift (u_char *vsrc, u_char *vdst, int h, int w, int shift, int blk,int ws)
631 {
632 int i;
633 for (i = 0; i < h; i++)
634 {
635 if (shift > 0)
636 {
637 memmove(vdst + shift, vsrc, w - shift);
638 memset(vdst, blk, shift); /* black */
639 }
640 else if (shift < 0)
641 {
642 memmove(vdst, vsrc - shift, w + shift);
643 memset(vdst+w+shift, blk, -shift);
644 }
645 else
646 memmove(vdst, vsrc, w);
647 vsrc += ws;
648 vdst += ws;
649 }
650 }
651 
652 /*
653 * Shift the frame vertically.  The frame data is modified in place.
654 */
655 static void
vertical_shift(u_char * y,int vshift,int w,int ws,int h,int blk)656 vertical_shift(u_char *y, int vshift, int w, int ws, int h, int blk)
657 {
658 /* Easy case - we can shift everything at once */
659 if (w == ws)
660 {
661 if (vshift > 0)
662 {
663 memmove(y + vshift*ws, y, (h-vshift) * ws);
664 memset(y, blk, vshift * ws);
665 }
666 else
667 {
668 memmove(y, y - vshift * ws, (h+vshift) * ws);
669 memset(y + (h + vshift) * ws, blk, -vshift * w);
670 }
671 }
672 /* Must go line-by-line */
673 else
674 {
675 u_char *dst, *src;
676 int i, n;
677 if (vshift > 0)
678 {
679 dst = y + (h - 1) * ws;
680 src = dst - vshift * ws;
681 n = h - vshift;
682 for (i = 0; i < n; i++, (dst -= ws), (src -= ws))
683 memcpy(dst, src, w);
684 for ( ; i < h; i++, dst -= ws)
685 memset(dst, blk, w);
686 }
687 else
688 {
689 dst = y;
690 src = y - vshift * ws;
691 n = h + vshift;
692 for (i = 0; i < n; i++, (dst += ws), (src += ws))
693 memcpy(dst, src, w);
694 for ( ; i < h; i++, dst += ws)
695 memset(dst, blk, w);
696 }
697 }
698 }
699