1 /*
2     filter_smartyuv.c
3 
4     This file is part of transcode, a video stream processing tool
5 
6     2003 by Tilmann Bitterberg, based on code by Donald Graft.
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 */
22 
23 #define MOD_NAME    "filter_smartyuv.so"
24 #define MOD_VERSION "0.1.6 (2007-05-31)"
25 #define MOD_CAP     "Motion-adaptive deinterlacing"
26 #define MOD_AUTHOR  "Tilmann Bitterberg"
27 
28 #include "transcode.h"
29 #include "filter.h"
30 #include "libtc/libtc.h"
31 #include "libtc/optstr.h"
32 
33 //#undef HAVE_ASM_MMX
34 //#undef CAN_COMPILE_C_ALTIVEC
35 
36 // mmx gives a speedup of about 3 fps
37 // when running without highq, mmx gives 12 fps
38 
39 // altivec does not give much, about 1 fps
40 
41 #ifdef HAVE_ASM_MMX
42 # include "mmx.h"
43 #endif
44 
45 #ifndef HAVE_ASM_MMX
46 # define emms() do{}while(0)
47 #endif
48 
49 #define rdtscll(val) __asm__ __volatile__("rdtsc" : "=A" (val))
50 
51 
52 static vob_t *vob  = NULL;
53 
54 ///////////////////////////////////////////////////////////////////////////
55 
56 // this value is "hardcoded" in the optimized code for speed reasons
57 enum {
58     FRAME_ONLY        = 0,
59     FIELD_ONLY        = 1,
60     FRAME_AND_FIELD   = 2,
61 
62     DENOISE_DIAMETER  = 5,
63     DENOISE_THRESH    = 7,
64 
65     BLACK_BYTE_Y      = 16,
66     BLACK_BYTE_UV     = 128,
67 
68     MIN_Y             = 16,
69     MAX_Y             = 240,
70 
71     LUMA_THRESHOLD    = 14,
72     CHROMA_THRESHOLD  = 7,
73     SCENE_THRESHOLD   = 31,
74 
75     /*
76      * We pad the moving maps with 16 pixels left and right, to make sure
77      * that we always can do aligned loads and stores at a multiple of 16.
78      * this is especially important when doing altivec but might help in
79      * other cases as well.
80      */
81     PAD               = 32
82 };
83 
84 typedef uint8_t (*yuv_clamp_fn)(int x);
85 
clamp_Y(int x)86 static uint8_t clamp_Y(int x) {
87     return ((TC_CLAMP(x, MIN_Y, MAX_Y)) & 0xFF);
88 }
89 
clamp_UV(int x)90 static uint8_t clamp_UV(int x) {
91 	return (x & 0xFF);
92 }
93 
94 
95 /*
96 size: 1-32
97 count: 1-256
98 stride: -32000 - 320000
99 */
100 
101 static void smartyuv_core (char *_src, char *_dst, char *_prev, int _width, int _height,
102                            int _srcpitch, int _dstpitch,
103                            unsigned char *_moving, unsigned char *_fmoving,
104                            yuv_clamp_fn clamp_f, int _threshold );
105 
106 typedef struct MyFilterData {
107     char            *buf;
108     char            *prevFrame;
109     unsigned char   *movingY;
110     unsigned char   *movingU;
111     unsigned char   *movingV;
112     unsigned char   *fmovingY;
113     unsigned char   *fmovingU;
114     unsigned char   *fmovingV;
115     int             motionOnly;
116     int             threshold;
117     int             chromathres;
118     int             codec;
119     int             diffmode;
120     int             scenethreshold;
121     int             cubic;
122     int             highq;
123     int             Blend;
124     int             doChroma;
125     int             verbose;
126 } MyFilterData;
127 
128 static MyFilterData *mfd = NULL;
129 
help_optstr(void)130 static void help_optstr(void)
131 {
132    tc_log_info (MOD_NAME, "(%s) help\n"
133 "* Overview\n"
134 "   This filter is basically a rewrite of the\n"
135 "   smartdeinter filter by Donald Graft (without advanced processing\n"
136 "   options) for YUV mode only. Its faster than using the smartdeinter\n"
137 "   in YUV mode and is also tuned with its threshold settings for YUV\n"
138 "   mode. The filter detects motion and static areas in an image and\n"
139 "   only deinterlaces (either by blending or by cubic interpolation)\n"
140 "   the moving areas. The result is an image with high detail in\n"
141 "   static areas, no information is lost there.\n"
142 "\n"
143 "   The threshold settings should be sufficent for most users. As a\n"
144 "   rule of thumb, I recommend setting the chroma threshold to about\n"
145 "   the half of the luma threshold. If you want more deinterlacing,\n"
146 "   lower the thresholds. The scene threshold can be easily found by\n"
147 "   turning on verbose mode and the preview filter. In verbose mode,\n"
148 "   the filter will print out, when it detects a scene change. If\n"
149 "   scenechanges go by unnoticed, lower the scene threshold. You can\n"
150 "   completly disable chroma processing with the doChroma=0 option.\n"
151 "   Here is a sample commandline\n"
152 "   -J smartyuv=highq=1:diffmode=2:cubic=1:Blend=1:chromathres=4:threshold=8:doChroma=1\n"
153 "* Options\n"
154 "  'motionOnly' Show motion areas only (0=off, 1=on) [0]\n"
155 "    'diffmode' Motion Detection (0=frame, 1=field, 2=both) [0]\n"
156 "   'threshold' Motion Threshold (luma) (0-255) [14]\n"
157 " 'chromathres' Motion Threshold (chroma) (0-255) [7]\n"
158 "  'scenethres' Threshold for detecting scenechanges (0-255) [31]\n"
159 "       'cubic' Do cubic interpolation (0=off 1=on) [1]\n"
160 "       'highq' High-Quality processing (motion Map denoising) (0=off 1=on) [1]\n"
161 "       'Blend' Blend the frames for deinterlacing (0=off 1=on) [1]\n"
162 "    'doChroma' Enable chroma processing (slower but more accurate) (0=off 1=on) [1]\n"
163 "     'verbose' Verbose mode (0=off 1=on) [1]\n"
164 		, MOD_CAP);
165 }
166 
Erode_Dilate(uint8_t * _moving,uint8_t * _fmoving,int width,int height)167 static void Erode_Dilate (uint8_t *_moving, uint8_t *_fmoving, int width, int height)
168 {
169     int sum, x, y;
170     uint8_t  *m, *fmoving, *moving, *p;
171     int w4 = width+PAD;
172 #ifdef HAVE_ASM_MMX
173     int can_use_mmx = !(width%4);
174 #endif
175 
176     // Erode.
177     fmoving = _fmoving;
178     moving = _moving;
179     p = moving - 2*w4 -2;
180 
181     for (y = 0; y < height; y++)
182     {
183 #ifdef HAVE_ASM_MMX
184 	/*
185 	 * The motion map as either 1 or 0.
186 	 * moving[x] is the current position.
187 	 * to decide if fmoving[x] should be 1, we need to sum up all 24 values.
188 	 * Because of mmx, we can do that also with the next 3 positions since
189 	 * the values are read in memory anyway.
190 	 */
191 
192 	if (can_use_mmx) {
193 	    for (x = 0; x < width; x+=4)
194 	    {
195 		uint8_t  res[8];
196 
197 		ac_memcpy(fmoving, moving, 4);
198 
199 		m = p;
200 
201 		movq_m2r   (*m, mm0); m += w4;
202 		paddusb_m2r(*m, mm0); m += w4;
203 		paddusb_m2r(*m, mm0); m += w4;
204 		paddusb_m2r(*m, mm0); m += w4;
205 		paddusb_m2r(*m, mm0);
206 
207 		movq_r2m(mm0, *res);
208 
209 		if (*moving++) {
210 		    res[0]+=res[1];
211 		    res[0]+=res[2];
212 		    res[0]+=res[3];
213 		    res[0]+=res[4];
214 		    *fmoving = (res[0] > 7);
215 		}
216 		fmoving++;
217 
218 		if (*moving++) {
219 		    res[1]+=res[2];
220 		    res[1]+=res[3];
221 		    res[1]+=res[4];
222 		    res[1]+=res[5];
223 		    *fmoving = (res[1] > 7);
224 		}
225 		fmoving++;
226 
227 		if (*moving++) {
228 		    res[2]+=res[3];
229 		    res[2]+=res[4];
230 		    res[2]+=res[5];
231 		    res[2]+=res[6];
232 		    *fmoving = (res[2] > 7);
233 		}
234 		fmoving++;
235 
236 		if (*moving++) {
237 		    res[3]+=res[4];
238 		    res[3]+=res[5];
239 		    res[3]+=res[6];
240 		    res[3]+=res[7];
241 		    *fmoving = (res[3] > 7);
242 		}
243 		fmoving++;
244 
245 		p += 4;
246 
247 	    }
248 	    fmoving += PAD;
249 	    moving += PAD;
250 	    p += PAD;
251 	} else
252 #endif
253 	{
254 	    for (x = 0; x < width; x++)
255 	    {
256 
257 		if (!(fmoving[x] = moving[x]) )
258 		    continue;
259 
260 		m = moving + x - 2*w4 -2;
261 		sum = 1;
262 
263 		//sum += m[0] + m[1] + m[2] + m[3] + m[4];
264 		//max sum is 25 or better 1<<25
265 		sum <<= m[0]; sum <<= m[1]; sum <<= m[2]; sum <<= m[3]; sum <<= m[4];
266 		m += w4;
267 		sum <<= m[0]; sum <<= m[1]; sum <<= m[2]; sum <<= m[3]; sum <<= m[4];
268 		m += w4;
269 		sum <<= m[0]; sum <<= m[1]; sum <<= m[2]; sum <<= m[3]; sum <<= m[4];
270 		m += w4;
271 		sum <<= m[0]; sum <<= m[1]; sum <<= m[2]; sum <<= m[3]; sum <<= m[4];
272 		m += w4;
273 		sum <<= m[0]; sum <<= m[1]; sum <<= m[2]; sum <<= m[3]; sum <<= m[4];
274 
275 		// check if the only bit set has an index of 8 or greater (threshold is 7)
276 		fmoving[x] = (sum > 128);
277 	    }
278 	    fmoving += w4;
279 	    moving += w4;
280 
281 	} // else can use mmx
282 
283     }
284     emms();
285 
286 
287     // Dilate.
288     fmoving = _fmoving;
289     moving = _moving;
290     for (y = 0; y < height; y++)
291     {
292 	for (x = 0; x < width; x++)
293 	{
294 	    if ((moving[x] = fmoving[x])) {
295 
296 		m = moving + x - 2*w4 -2;
297 
298 		memset(m, 1, 5);
299 		m += w4;
300 		memset(m, 1, 5);
301 		m += w4;
302 		memset(m, 1, 5);
303 		m += w4;
304 		memset(m, 1, 5);
305 		m += w4;
306 		memset(m, 1, 5);
307 	    }
308 	}
309 	moving += w4;
310 	fmoving += w4;
311     }
312 }
Blendline_c(uint8_t * dst,uint8_t * src,uint8_t * srcminus,uint8_t * srcplus,uint8_t * moving,uint8_t * movingminus,uint8_t * movingplus,const int w,const int scenechange)313 static void inline Blendline_c (uint8_t *dst, uint8_t *src, uint8_t *srcminus, uint8_t *srcplus,
314 	                            uint8_t *moving, uint8_t *movingminus, uint8_t *movingplus,
315                                 const int w, const int scenechange)
316 {
317     int x = 0;
318     do {
319         if (movingminus[x] | moving[x] | movingplus[x] | scenechange) {
320             /* Blend fields. */
321             dst[x] = ((src[x]>>1) + (srcminus[x]>>2) + (srcplus[x]>>2)) & 0xff;
322         } else {
323             dst[x] = src[x];
324         }
325     } while(++x < w);
326 }
327 
328 // this works fine on OSX too
329 #define ABS_u8(a) (((a)^((a)>>7))-((a)>>7))
330 
smartyuv_core(char * _src,char * _dst,char * _prev,int _width,int _height,int _srcpitch,int _dstpitch,unsigned char * _moving,unsigned char * _fmoving,yuv_clamp_fn clamp_f,int _threshold)331 static void smartyuv_core (char *_src, char *_dst, char *_prev, int _width, int _height,
332                            int _srcpitch, int _dstpitch,
333                            unsigned char *_moving, unsigned char *_fmoving,
334                            yuv_clamp_fn clamp_f, int _threshold )
335 {
336 	const int		srcpitch = _srcpitch;
337 	const int		dstpitch = _dstpitch;
338 
339 	const int		w = _width;
340 	const int		wminus1 = w - 1;
341 
342 	const int		h = _height;
343 	const int		hminus1 = h - 1;
344 	const int		hminus3 = h - 3;
345 
346 	char			*src, *dst, *srcminus=NULL, *srcplus, *srcminusminus=NULL, *srcplusplus=NULL;
347 	unsigned char		*moving, *movingminus, *movingplus;
348 	unsigned char		*fmoving;
349 	char    		*prev;
350 	int			scenechange=0;
351 	long			count=0;
352 	int			x, y;
353 	int			luma, luman, lumap, T;
354 	int 			p1, p2;
355 	int 			rp, rn, rpp, rnn, R;
356 	unsigned char		fiMotion;
357 	int			cubic = mfd->cubic;
358 	static int 		counter=0;
359 #ifdef HAVE_ASM_MMX
360 	const int		can_use_mmx = !(w%8); // width must a multiple of 8
361 #endif
362 #ifdef CAN_COMPILE_C_ALTIVEC
363 	const int		can_use_altivec = !(w%16); // width must a multiple of 16
364 #endif
365 
366 
367 	char * dst_buf;
368 	char * src_buf;
369 
370 	//memset(ptr->video_buf+h*w, BLACK_BYTE_UV, h*w/2);
371 	src_buf = _src;
372 	dst_buf = _dst;
373 
374 	/* Not much deinterlacing to do if there aren't at least 2 lines. */
375 	if (h < 2) return;
376 
377 	/* Skip first and last lines, they'll get a free ride. */
378 	src = src_buf + srcpitch;
379 	srcminus = src - srcpitch;
380 	srcplus = src + srcpitch;
381 	moving = _moving + w+PAD;
382 	prev = _prev + w;
383 
384 	if (mfd->diffmode == FRAME_ONLY || mfd->diffmode == FRAME_AND_FIELD)
385 	{
386 		if (mfd->diffmode == FRAME_ONLY) {
387 
388 #ifdef HAVE_ASM_MMX
389 		  if (can_use_mmx) {
390 
391 		    uint64_t mask1 = 0x00FF00FF00FF00FFULL;
392 
393 		    uint64_t thres = (_threshold<<16) | (_threshold);
394 		    thres = (thres << 32) | (thres);
395 
396 		    movq_m2r (mask1, mm6);
397 		    movq_m2r (thres, mm5);        // thres -> mm6
398 
399 		    count = 0;
400 		    for (y = 1; y < hminus1; y++)
401 		    {
402 			for (x=0; x<w; x+=4) {
403 
404 			    movd_m2r (*src, mm0);         // a b c d 0 0 0 0
405 
406 			    punpcklbw_r2r (mm0, mm0);     // a a b b c c d d
407 			    pand_r2r (mm6, mm0);          // 0 a 0 b 0 c 0 d
408 
409 			    movd_m2r(*prev, mm1);         // e f g h 0 0 0 0
410 
411 			    punpcklbw_r2r (mm1, mm1);     // e e f f g g h h
412 			    pand_r2r (mm6, mm1);          // 0 e 0 f 0 g 0 h
413 
414 			    psubsw_r2r(mm1, mm0);         // mm0 = mm0 - mm1; !!
415 
416 			    movq_r2r(mm0, mm3);
417 
418 			    // abs()
419 			    psraw_i2r(15, mm3);
420 			    pxor_r2r(mm3, mm0);
421 			    psubw_r2r(mm3, mm0);
422 
423 			    // compare if greater than thres
424 			    pcmpgtw_r2r(mm5, mm0);
425 
426 			    // norm
427 			    psrlw_i2r(15, mm0);
428 			    // pack to bytes
429 			    packuswb_r2r(mm0, mm0);
430 
431 			    // write to moving
432 			    movd_r2m(mm0, *moving);
433 
434 			    ac_memcpy(prev, src, 4);
435 
436 			    src+=4;
437 			    prev+=4;
438 
439 			    count += *moving++;
440 			    count += *moving++;
441 			    count += *moving++;
442 			    count += *moving++;
443 
444 			}
445 
446 			moving += PAD;
447 		    }
448 		    emms();
449 
450 		  }  else  // cannot use mmx
451 #elif CAN_COMPILE_C_ALTIVEC
452 		  if (can_use_altivec) {
453 		      vector unsigned char vthres;
454 		      vector unsigned char shift = vec_splat_u8(7);
455 		      unsigned char __attribute__ ((aligned(16))) tdata[16];
456 		      int i;
457 		      memset(tdata, _threshold, 16);
458 		      vthres = vec_ld(0, tdata);
459 
460 		      count = 0;
461 		      for (y = 1; y < hminus1; y++)
462 		      {
463 			  for (x=0; x<w; x+=16) {
464 
465 			      vector unsigned char luma = vec_ld(0, (unsigned char *)src);
466 			      vector unsigned char prv = vec_ld(0, (unsigned char *)prev);
467 			      vector unsigned char vmov;
468 			      vmov = vec_sub (vec_max (luma, prv), vec_min(luma, prv));
469 
470 			      // FF -> 01
471 			      vmov = (vector unsigned char)vec_cmpgt(vmov, vthres);
472 			      vmov = vec_sr(vmov, shift);
473 
474 			      vec_st(vmov, 0, (unsigned char *)moving);
475 
476 			      /* Keep a count of the number of moving pixels for the
477 				 scene change detection. */
478 			      for (i=0; i<16; i++) {
479 				  count += *moving++;
480 				  *prev++ = *src++;
481 			      }
482 
483 			  }
484 
485 			  moving += PAD;
486 		    }
487 
488 		  } else
489 #endif
490 		  {
491 		    count = 0;
492 		    for (y = 1; y < hminus1; y++)
493 		    {
494 			for (x=0; x<w; x++) {
495 				// First check frame motion.
496 				// Set the moving flag if the diff exceeds the configured
497 				// threshold.
498 				int luma = *src++&0xff;
499 				int p0 = luma - (*prev&0xff);
500 
501 				*prev++ = luma;
502 				*moving = ((ABS_u8(p0) > _threshold));
503 
504 				/* Keep a count of the number of moving pixels for the
505 				   scene change detection. */
506 				count += *moving++;
507 
508 			}
509 
510 			moving += PAD;
511 		    }
512 		  } // cannot use mmx
513 
514 		} else if (mfd->diffmode == FRAME_AND_FIELD) {
515 
516 #ifdef HAVE_ASM_MMX
517 		  if (can_use_mmx) {
518 
519 		    uint64_t mask1 = 0x00FF00FF00FF00FFULL;
520 
521 		    uint64_t thres = (_threshold<<16) | (_threshold);
522 		    thres = (thres << 32) | (thres);
523 
524 		    movq_m2r (mask1, mm6);
525 		    movq_m2r (thres, mm5);        // thres -> mm6
526 
527 		    // ---------------------
528 		    // create the motion map
529 		    // ---------------------
530 
531 		    count = 0;
532 		    for (y = 1; y < hminus1; y++)
533 		    {
534 			if (y & 1) { // odd lines
535 
536 			    for (x=0; x<w; x+=4) {
537 
538 				movd_m2r(*src, mm0);          // a b c d 0 0 0 0
539 				movd_m2r(*srcminus, mm1); // e f g h 0 0 0 0
540 				movd_m2r(*prev, mm2);
541 
542 				punpcklbw_r2r (mm0, mm0);     // a a b b c c d d
543 				punpcklbw_r2r (mm1, mm1);     // e e f f g g h h
544 				punpcklbw_r2r (mm2, mm2);
545 				pand_r2r (mm6, mm0);          // 0 a 0 b 0 c 0 d
546 				pand_r2r (mm6, mm1);          // 0 e 0 f 0 g 0 h
547 				pand_r2r (mm6, mm2);
548 
549 				movq_r2r (mm0, mm7);          // save in mm7
550 
551 				psubsw_r2r(mm1, mm0);         // mm0 = mm0 - mm1; !!
552 				psubsw_r2r(mm2, mm7);
553 				movq_r2r(mm0, mm3);
554 				movq_r2r(mm7, mm4);
555 
556 				// abs() ((mm0^(mm0>>15))-(mm0>>15))
557 
558 				psraw_i2r(15, mm3);
559 				psraw_i2r(15, mm4);
560 				pxor_r2r(mm3, mm0);
561 				pxor_r2r(mm4, mm7);
562 				psubw_r2r(mm3, mm0);
563 				psubw_r2r(mm4, mm7);
564 
565 				pcmpgtw_r2r(mm5, mm0);     //compare if greater than thres
566 				pcmpgtw_r2r(mm5, mm7);
567 				psrlw_i2r(15, mm0);       // norm
568 				psrlw_i2r(15, mm7);       // norm
569 				packuswb_r2r(mm0, mm0);   // pack to bytes
570 				packuswb_r2r(mm7, mm7);   // pack to bytes
571 
572 				// mm0: result first compare
573 				// mm1-mm4: free
574 				// mm5: threshold
575 				// mm6: mask
576 				// mm7: copy of src
577 
578 				pand_r2r(mm7, mm0);
579 
580 				// write to moving
581 				movd_r2m(mm0, *moving);
582 
583 				ac_memcpy(prev, src, 4);
584 
585 				src+=4;
586 				prev+=4;
587 				srcminus+=4;
588 
589 				count += *moving++;
590 				count += *moving++;
591 				count += *moving++;
592 				count += *moving++;
593 
594 			    }
595 
596 			} else { // even lines
597 
598 			    for (x=0; x<w; x+=4) {
599 				movd_m2r(*src, mm0);         // a b c d 0 0 0 0
600 				movd_m2r(*(prev+w), mm1);    // e f g h 0 0 0 0
601 				movd_m2r(*prev, mm2);
602 
603 				punpcklbw_r2r (mm0, mm0);     // a a b b c c d d
604 				punpcklbw_r2r (mm1, mm1);     // e e f f g g h h
605 				punpcklbw_r2r (mm2, mm2);
606 				pand_r2r (mm6, mm0);          // 0 a 0 b 0 c 0 d
607 				pand_r2r (mm6, mm1);          // 0 e 0 f 0 g 0 h
608 				pand_r2r (mm6, mm2);
609 
610 				movq_r2r (mm0, mm7);          // save in mm7
611 
612 				psubsw_r2r(mm1, mm0);         // mm0 = mm0 - mm1; !!
613 				psubsw_r2r(mm2, mm7);
614 				movq_r2r(mm0, mm3);
615 				movq_r2r(mm7, mm4);
616 
617 				// abs() ((mm0^(mm0>>15))-(mm0>>15))
618 
619 				psraw_i2r(15, mm3);
620 				psraw_i2r(15, mm4);
621 				pxor_r2r(mm3, mm0);
622 				pxor_r2r(mm4, mm7);
623 				psubw_r2r(mm3, mm0);
624 				psubw_r2r(mm4, mm7);
625 
626 				pcmpgtw_r2r(mm5, mm0);     //compare if greater than thres
627 				pcmpgtw_r2r(mm5, mm7);
628 				psrlw_i2r(15, mm0);       // norm
629 				psrlw_i2r(15, mm7);       // norm
630 				packuswb_r2r(mm0, mm0);   // pack to bytes
631 				packuswb_r2r(mm7, mm7);   // pack to bytes
632 
633 				// mm0: result first compare
634 				// mm1-mm4: free
635 				// mm5: threshold
636 				// mm6: mask
637 				// mm7: copy of src
638 
639 				pand_r2r(mm7, mm0);
640 
641 				// write to moving
642 				movd_r2m(mm0, *moving);
643 
644 				ac_memcpy(prev, src, 4);
645 
646 				src+=4;
647 				prev+=4;
648 
649 				count += *moving++;
650 				count += *moving++;
651 				count += *moving++;
652 				count += *moving++;
653 
654 			    }
655 
656 			}
657 			srcminus += srcpitch;
658 			moving += PAD;
659 		    }
660 
661 		    emms();
662 
663 		  } else // cannot use mmx
664 #elif CAN_COMPILE_C_ALTIVEC_FIXME_BROKEN
665 		  if (can_use_altivec) {
666 
667 		    vector unsigned char vthres;
668 		    vector unsigned char shift = vec_splat_u8(7);
669 		    unsigned char __attribute__ ((aligned(16))) tdata[16];
670 		    int i;
671 		    memset(tdata, _threshold, 16);
672 		    vthres = vec_ld(0, tdata);
673 
674 		    count = 0;
675 		    //tc_log_msg(MOD_NAME, "Align: %p %p %p", src, srcminus, prev);
676 		    for (y = 1; y < hminus1; y++)
677 		    {
678 			if (y & 1) { // odd lines
679 
680 			    for (x=0; x<w; x+=16) {
681 
682 				vector unsigned char luma = vec_ld(0, (unsigned char *)src);
683 				vector unsigned char p0 = vec_ld(x, (unsigned char *)srcminus);
684 				vector unsigned char p1 = vec_ld(0, (unsigned char *)prev);
685 				vector unsigned char vmov;
686 
687 				p0 = vec_sub (vec_max (luma, p0), vec_min (luma, p0));
688 				p1 = vec_sub (vec_max (luma, p1), vec_min (luma, p1));
689 				p0 = (vector unsigned char)vec_cmpgt(p0, vthres);
690 				p1 = (vector unsigned char)vec_cmpgt(p1, vthres);
691 				vmov = vec_and(p0, p1);
692 
693 				// FF -> 01
694 				vmov = vec_sr(vmov, shift);
695 
696 				vec_st(vmov, 0, (unsigned char *)moving);
697 
698 				/* Keep a count of the number of moving pixels for the
699 				   scene change detection. */
700 				for (i=0; i<16; i++) {
701 				    count += *moving++;
702 				    *prev++ = *src++;
703 				}
704 
705 			    }
706 
707 			} else { // even lines
708 
709 			    for (x=0; x<w; x+=16) {
710 
711 				vector unsigned char luma = vec_ld(0, (unsigned char *)src);
712 				vector unsigned char p0 = vec_ld(w, (unsigned char *)prev);
713 				vector unsigned char p1 = vec_ld(0, (unsigned char *)prev);
714 				vector unsigned char vmov;
715 
716 				p0 = vec_sub (vec_max (luma, p0), vec_min (luma, p0));
717 				p1 = vec_sub (vec_max (luma, p1), vec_min (luma, p1));
718 				vmov = vec_and(
719 					(vector unsigned char)vec_cmpgt(p0, vthres),
720 					(vector unsigned char)vec_cmpgt(p1, vthres));
721 
722 				// FF -> 01
723 				vmov = vec_sr(vmov, shift);
724 
725 				vec_st(vmov, 0, (unsigned char *)moving);
726 
727 				/* Keep a count of the number of moving pixels for the
728 				   scene change detection. */
729 				for (i=0; i<16; i++) {
730 				    count += *moving++;
731 				    *prev++ = *src++;
732 				}
733 			    }
734 			} // odd vs. even
735 
736 			srcminus += srcpitch;
737 			moving += PAD;
738 
739 		    } // height
740 
741 		    tc_log_msg(MOD_NAME, "COUNT %d|", count);
742 
743 		  } else
744 #endif
745 		  {
746 		    count = 0;
747 		    for (y = 1; y < hminus1; y++)
748 		    {
749 			x = 0;
750 			if (y & 1) {
751 
752 			    do {
753 
754 				int luma = *src++&0xff;
755 				int p0 = luma - (*(srcminus+x)&0xff);
756 				int p1 = luma - (*prev&0xff);
757 				/* 15:11 < GomGom> abs can be replaced by i^(i>>31)-(i>>31) */
758 
759 				*prev++ = luma;
760 				*moving = ((ABS_u8(p0) > _threshold) & (ABS_u8(p1) > _threshold));
761 				count += *moving++;
762 
763 			    } while(++x < w);
764 
765 			} else {
766 
767 			    do {
768 
769 				int luma = *src++ & 0xff;
770 				int p0 = luma - (*(prev+w)&0xff);
771 				int p1 = luma - (*prev&0xff);
772 
773 				*prev++ = luma;
774 				*moving = ((ABS_u8(p0) > _threshold) & (ABS_u8(p1) > _threshold));
775 				count += *moving++;
776 
777 			    } while(++x < w);
778 			}
779 
780 			moving += PAD;
781 			srcminus += srcpitch;
782 		    }
783 		  }
784 		}
785 
786 		/* Determine whether a scene change has occurred. */
787 		if ((100L * count) / (h * w) >= mfd->scenethreshold) scenechange = 1;
788 		else scenechange = 0;
789 
790 		if (scenechange && mfd->verbose)
791 		    tc_log_info(MOD_NAME, "Scenechange at %6d (%6ld moving pixels)", counter, count);
792 		/*
793 		tc_log_msg(MOD_NAME, "Frame (%04d) count (%8ld) sc (%d) calc (%02ld)",
794 				counter, count, scenechange, (100 * count) / (h * w));
795 				*/
796 
797 
798 		/* Perform a denoising of the motion map if enabled. */
799 		if (!scenechange && mfd->highq)
800 		{
801 		    //uint64_t before = 0;
802 		    //uint64_t after = 0;
803 		    //rdtscll(before);
804 
805 		    Erode_Dilate(_moving, _fmoving, w, h);
806 
807 		    //rdtscll(after);
808 		    //tc_log_msg(MOD_NAME, "%6d : %8lld", count, after-before);
809 		}
810 	}
811 	if (mfd->diffmode == FIELD_ONLY) {
812 
813 		/* Field differencing only mode. */
814 		T = _threshold * _threshold;
815 		for (y = 1; y < hminus1; y++)
816 		{
817 			x = 0;
818 			do
819 			{
820 				// Set the moving flag if the diff exceeds the configured
821 				// threshold.
822 				moving[x] = 0;
823 				if (y & 1)
824 				{
825 					// Now check field motion.
826 					fiMotion = 0;
827 					luma = (src[x]) & 0xff;
828 					lumap= (srcminus[x]) & 0xff;
829 					luman = (srcplus[x]) & 0xff;
830 						if ((lumap - luma) * (luman - luma) > T)
831 							moving[x] = 1;
832 				}
833 				/* Keep a count of the number of moving pixels for the
834 				   scene change detection. */
835 				if (moving[x]) count++;
836 			} while(++x < w);
837 			src = src + srcpitch;
838 			srcminus = srcminus + srcpitch;
839 			srcplus = srcplus + srcpitch;
840 			moving += (w+PAD);
841 		}
842 
843 		/* Determine whether a scene change has occurred. */
844 		if ((100L * count) / (h * w) >= mfd->scenethreshold) scenechange = 1;
845 		else scenechange = 0;
846 
847 		/* Perform a denoising of the motion map if enabled. */
848 		if (!scenechange && mfd->highq)
849 		{
850 			int xlo, xhi, ylo, yhi;
851 			int u, v;
852 			int N = 5;
853 			int Nover2 = N/2;
854 			int sum;
855 			unsigned char *m;
856 
857 			// Erode.
858 			fmoving = _fmoving;
859 			for (y = 0; y < h; y++)
860 			{
861 				for (x = 0; x < w; x++)
862 				{
863 					if (!((_moving + y * (w+PAD))[x]))
864 					{
865 						fmoving[x] = 0;
866 						continue;
867 					}
868 					xlo = x - Nover2; if (xlo < 0) xlo = 0;
869 					xhi = x + Nover2; if (xhi >= w) xhi = wminus1;
870 					ylo = y - Nover2; if (ylo < 0) ylo = 0;
871 					yhi = y + Nover2; if (yhi >= h) yhi = hminus1;
872 					m = _moving + ylo * (w+PAD);
873 					sum = 0;
874 					for (u = ylo; u <= yhi; u++)
875 					{
876 						for (v = xlo; v <= xhi; v++)
877 						{
878 							sum += m[v];
879 						}
880 						m += w;
881 					}
882 					if (sum > 9)
883 						fmoving[x] = 1;
884 					else
885 						fmoving[x] = 0;
886 				}
887 				fmoving += (w+PAD);
888 			}
889 
890 			// Dilate.
891 			N = 5;
892 			Nover2 = N/2;
893 			moving = _moving;
894 			for (y = 0; y < h; y++)
895 			{
896 				for (x = 0; x < w; x++)
897 				{
898 					if (!((_fmoving + y * (w+PAD))[x]))
899 					{
900 						moving[x] = 0;
901 						continue;
902 					}
903 					xlo = x - Nover2; if (xlo < 0) xlo = 0;
904 					xhi = x + Nover2; if (xhi >= w) xhi = wminus1;
905 					ylo = y - Nover2; if (ylo < 0) ylo = 0;
906 					yhi = y + Nover2; if (yhi >= h) yhi = hminus1;
907 					m = _moving + ylo * (w+PAD);
908 					for (u = ylo; u <= yhi; u++)
909 					{
910 						for (v = xlo; v <= xhi; v++)
911 						{
912 							m[v] = 1;
913 						}
914 						m += (w+PAD);
915 					}
916 				}
917 				moving += (w+PAD);
918 			}
919 		}
920 	}
921 
922 	// -----------------
923 	// Render.
924 	// -----------------
925 
926 	// The first line gets a free ride.
927 	src = src_buf;
928 	dst = dst_buf;
929 
930 	ac_memcpy(dst, src, w);
931 	src = src_buf + srcpitch;
932 	srcminus = src - srcpitch;
933 	srcplus = src + srcpitch;
934 
935 	if (cubic)
936 	{
937 		srcminusminus = src - 3 * srcpitch;
938 		srcplusplus = src + 3 * srcpitch;
939 	}
940 
941 	dst = dst_buf + dstpitch;
942 	moving = _moving + w+PAD;
943 	movingminus = _moving;
944 	movingplus = moving + w+PAD;
945 
946 	/*
947 	*/
948 
949 	if (mfd->motionOnly)
950 	{
951 	    for (y = 1; y < hminus1; y++)
952 	    {
953 		if (mfd->Blend)
954 		{
955 		    x = 0;
956 		    do {
957 			if (!(movingminus[x] | moving[x] | movingplus[x]) && !scenechange)
958 			    dst[x] = (clamp_f==clamp_Y)?BLACK_BYTE_Y:BLACK_BYTE_UV;
959 			else
960 			{
961 			    /* Blend fields. */
962 			    dst[x] = (((src[x]&0xff)>>1) + ((srcminus[x]&0xff)>>2) + ((srcplus[x]&0xff)>>2))&0xff;
963 			}
964 		    } while(++x < w);
965 		}
966 		else
967 		{
968 		    x = 0;
969 		    do {
970 			if (!(movingminus[x] | moving[x] | movingplus[x]) && !scenechange)
971 			    dst[x] = (clamp_f==clamp_Y)?BLACK_BYTE_Y:BLACK_BYTE_UV;
972 			else if (y & 1)
973 			{
974 			    if (cubic && (y > 2) && (y < hminus3))
975 			    {
976 				rpp = (srcminusminus[x]) & 0xff;
977 				rp =  (srcminus[x]) & 0xff;
978 				rn =  (srcplus[x]) & 0xff;
979 				rnn = (srcplusplus[x]) & 0xff;
980 				R = (5 * (rp + rn) - (rpp + rnn)) >> 3;
981 				dst[x] = clamp_f(R);
982 			    }
983 			    else
984 			    {
985 				p1 = srcminus[x] &0xff;
986 				p1 &= 0xfe;
987 
988 				p2 = srcplus[x] &0xff;
989 				p2 &= 0xfe;
990 				dst[x] = ((p1>>1) + (p2>>1)) &0xff;
991 			    }
992 			}
993 			else
994 			    dst[x] = src[x];
995 		    } while(++x < w);
996 		}
997 		src = src + srcpitch;
998 		srcminus = srcminus + srcpitch;
999 		srcplus = srcplus + srcpitch;
1000 
1001 		if (cubic)
1002 		{
1003 		    srcminusminus = srcminusminus + srcpitch;
1004 		    srcplusplus = srcplusplus + srcpitch;
1005 		}
1006 
1007 		dst = dst + dstpitch;
1008 		moving += (w+PAD);
1009 		movingminus += (w+PAD);
1010 		movingplus += (w+PAD);
1011 	    }
1012 	    // The last line gets a free ride.
1013 	    ac_memcpy(dst, src, w);
1014 
1015 	    if (clamp_f == clamp_Y)
1016 		counter++;
1017 
1018 	    return;
1019 
1020 	}
1021 
1022 	if (mfd->Blend)
1023 	{
1024 	    // linear blend, see Blendline_c for a plainC version
1025 	    for (y = 1; y < hminus1; y++)
1026 	    {
1027 #ifdef HAVE_ASM_MMX
1028 	      if (can_use_mmx) {
1029 
1030 		uint64_t scmask = (scenechange<<24) | (scenechange<<16) | (scenechange<<8) | scenechange;
1031 		scmask = (scmask << 32) | scmask;
1032 
1033 		pcmpeqw_r2r(mm4, mm4);
1034 		psrlw_i2r(9,mm4);
1035 		packuswb_r2r(mm4, mm4);         // build 0x7f7f7f7f7f7f7f7f
1036 
1037 		pcmpeqw_r2r(mm6, mm6);
1038 		psrlw_i2r(10,mm6);
1039 		packuswb_r2r(mm6, mm6);         // build 0x3f3f3f3f3f3f3f3f
1040 
1041 		for (x=0; x<w; x+=8) {
1042 
1043 		    movq_m2r(scmask, mm0);          // has a scenechange happend?
1044 
1045 		    pxor_r2r(mm5, mm5);             // clear mm5
1046 
1047 		    por_m2r (moving     [x], mm0);
1048 		    movq_m2r(src        [x], mm1);   // load src
1049 		    por_m2r (movingminus[x], mm0);   // motion detected?
1050 		    movq_m2r(src        [x-w], mm2);   // load srcminus
1051 		    por_m2r (movingplus [x], mm0);
1052 		    movq_m2r(src        [x+w], mm3);   // load srcplus
1053 
1054 		    movq_r2r (mm1, mm7);
1055 
1056 		    pcmpgtb_r2r(mm5, mm0);  // make FF out 1 and 0 out of 0
1057 
1058 		    pcmpeqw_r2r(mm5, mm5);  // make all ff's (recycle mm5)
1059 		    psubb_r2r  (mm0, mm5);  // inverse mask
1060 		    pand_r2r   (mm0, mm7);
1061 		    pand_r2r   (mm5, mm1);
1062 		    psrlw_i2r  (1,   mm7);
1063 
1064 		    pand_r2r   (mm4, mm7);  // clear highest bit
1065 		    por_r2r    (mm7, mm1);  // merge src>>1 and src together dependand on moving mask
1066 
1067 		    // mm0: mask, if 0 don't shift, if ff shift
1068 		    // mm1: complete src
1069 		    // mm2: srcminus
1070 		    // mm3: srcplus
1071 		    // mm4: 0x7f mask
1072 		    // mm5: free
1073 		    // mm6: 0x3f mask
1074 		    // mm7: free
1075 
1076 		    // handle srcm(inus) and srcp(lus)
1077 
1078 		    pand_r2r (mm0, mm2);
1079 		    pand_r2r (mm0, mm3);
1080 
1081 		    psrlw_i2r(2,   mm2);  // srcm>>2
1082 		    psrlw_i2r(2,   mm3);  // srcp>>2
1083 		    pand_r2r (mm6, mm2);  // clear highest two bits
1084 		    pand_r2r (mm6, mm3);
1085 
1086 		    paddusb_r2r (mm2, mm1);   // src>>1 + srcn>>2 + srcp>>2
1087 		    paddusb_r2r (mm3, mm1);
1088 
1089 		    movq_r2m(mm1, dst[x]);
1090 
1091 		}
1092 	      } else // cannot use mmx
1093 #elif CAN_COMPILE_C_ALTIVEC
1094 	      if (can_use_altivec) {
1095 		  unsigned char tdata[16];
1096 		  memset (tdata, scenechange, 16);
1097 		  vector unsigned char vscene = vec_ld(0, tdata);
1098 		  vector unsigned char vmov, vsrc2, vdest;
1099 		  vector unsigned char vsrc, vsrcminus, vsrcplus;
1100 		  vector unsigned char zero = vec_splat_u8(0);
1101 		  vector unsigned char ones = vec_splat_u8(1);
1102 		  vector unsigned char twos = vec_splat_u8(2);
1103 
1104 
1105 		  for (x=0; x<w; x+=16) {
1106 		      vmov = vec_xor(vmov, vmov);
1107 		      vmov = vec_or (vmov, vec_ld(x, moving));
1108 		      vsrc = vec_ld(x, (unsigned char *)src);
1109 		      vmov = vec_or (vmov, vec_ld(x, movingminus));
1110 		      vsrcminus = vec_ld(x-w, (unsigned char *)src);
1111 		      vmov = vec_or (vmov, vec_ld(x, movingplus));
1112 		      vsrcplus = vec_ld(x+w, (unsigned char *)src);
1113 		      vmov = vec_or(vmov, vscene);
1114 
1115 		      vsrc2 = vec_sr(vsrc, ones);
1116 		      vsrc2 = vec_add(vsrc2, vec_sr(vsrcminus, twos));
1117 		      vsrc2 = vec_add(vsrc2, vec_sr(vsrcplus, twos));
1118 		      vmov = (vector unsigned char)vec_cmpgt (vmov, zero);
1119 		 vdest = vec_or (vec_sel(vsrc, zero, vmov), vec_sel (vsrc2, zero, vec_nor(vmov, vmov)));
1120 		      vec_st(vdest, x, (unsigned char *)dst);
1121 		  }
1122 
1123 
1124 
1125 
1126 
1127 	      } else
1128 #endif
1129 	      {
1130 		Blendline_c (dst, src, srcminus, srcplus, moving, movingminus, movingplus, w, scenechange);
1131 	      }
1132 
1133 		src +=  srcpitch;
1134 		srcminus += srcpitch;
1135 		srcplus += srcpitch;
1136 
1137 		dst += dstpitch;
1138 		moving += (w+PAD);
1139 		movingminus += (w+PAD);
1140 		movingplus += (w+PAD);
1141 	    }
1142 
1143 	    emms();
1144 	    return;
1145 	}
1146 
1147 	emms();
1148 
1149 	// Doing line interpolate. Thus, even lines are going through
1150 	// for moving and non-moving mode. Odd line pixels will be subject
1151 	// to the motion test.
1152 
1153 	for (y = 1; y < hminus1; y++)
1154 	{
1155 	    if (y&1)
1156 	    {
1157 		x = 0;
1158 		do {
1159 		    if (movingminus[x] | moving[x] | movingplus[x] | scenechange)
1160 			if (cubic & (y > 2) & (y < hminus3))
1161 			{
1162 			    R = (5 * ((srcminus[x] & 0xff) + (srcplus[x] & 0xff))
1163 				    - ((srcminusminus[x] & 0xff) + (srcplusplus[x] & 0xff))) >> 3;
1164 			    dst[x] = clamp_f(R);
1165 			}
1166 			else
1167 			{
1168 			    dst[x] = (((srcminus[x]&0xff) >> 1) + ((srcplus[x]&0xff) >> 1)) & 0xff;
1169 			}
1170 		    else
1171 		    {
1172 			dst[x] = src[x];
1173 		    }
1174 		} while(++x < w);
1175 	    }
1176 	    else
1177 	    {
1178 		// Even line; pass it through.
1179 		ac_memcpy(dst, src, w);
1180 	    }
1181 	    src +=  srcpitch;
1182 	    srcminus += srcpitch;
1183 	    srcplus += srcpitch;
1184 
1185 	    if (cubic)
1186 	    {
1187 		srcminusminus += srcpitch;
1188 		srcplusplus += srcpitch;
1189 	    }
1190 
1191 	    dst += dstpitch;
1192 	    moving += (w+PAD);
1193 	    movingminus += (w+PAD);
1194 	    movingplus += (w+PAD);
1195 	}
1196 
1197 	// The last line gets a free ride.
1198 	ac_memcpy(dst, src, w);
1199 	if (clamp_f == clamp_Y)
1200 	    counter++;
1201 
1202 	return;
1203 }
1204 
tc_filter(frame_list_t * ptr_,char * options)1205 int tc_filter(frame_list_t *ptr_, char *options)
1206 {
1207   vframe_list_t *ptr = (vframe_list_t *)ptr_;
1208 
1209   //----------------------------------
1210   //
1211   // filter init
1212   //
1213   //----------------------------------
1214 
1215 
1216   if(ptr->tag & TC_FILTER_INIT) {
1217 
1218 	unsigned int width, height;
1219 	int msize;
1220 
1221 	if((vob = tc_get_vob())==NULL) return(-1);
1222 
1223 
1224 	mfd = tc_zalloc(sizeof(MyFilterData));
1225 
1226 	if (!mfd) {
1227 		tc_log_error(MOD_NAME, "No memory!");
1228 	        return (-1);
1229 	}
1230 
1231 	width  = vob->im_v_width;
1232 	height = vob->im_v_height;
1233 
1234 	/* default values */
1235 	mfd->motionOnly     = 0;
1236 	mfd->threshold      = LUMA_THRESHOLD;
1237 	mfd->chromathres    = CHROMA_THRESHOLD;
1238 	mfd->scenethreshold = SCENE_THRESHOLD;
1239 	mfd->diffmode       = FRAME_ONLY;
1240 	mfd->codec          = vob->im_v_codec;
1241 	mfd->highq          = 1;
1242 	mfd->cubic          = 1;
1243 	mfd->doChroma       = 1;
1244 	mfd->Blend          = 1;
1245 	mfd->verbose        = 0;
1246 
1247 	if (mfd->codec != CODEC_YUV) {
1248 	    tc_log_error (MOD_NAME, "This filter is only capable of YUV mode");
1249 	    return -1;
1250 	}
1251 
1252 	if (options != NULL) {
1253 
1254 	  if(verbose) tc_log_info(MOD_NAME, "options=%s", options);
1255 
1256 	  optstr_get (options, "motionOnly",     "%d",  &mfd->motionOnly     );
1257 	  optstr_get (options, "threshold",      "%d",  &mfd->threshold      );
1258 	  optstr_get (options, "chromathres",    "%d",  &mfd->chromathres    );
1259 	  optstr_get (options, "Blend",          "%d",  &mfd->Blend          );
1260 	  optstr_get (options, "scenethres",     "%d",  &mfd->scenethreshold );
1261 	  optstr_get (options, "highq",          "%d",  &mfd->highq          );
1262 	  optstr_get (options, "cubic",          "%d",  &mfd->cubic          );
1263 	  optstr_get (options, "diffmode",       "%d",  &mfd->diffmode       );
1264 	  optstr_get (options, "doChroma",       "%d",  &mfd->doChroma       );
1265 	  optstr_get (options, "verbose",        "%d",  &mfd->verbose        );
1266 
1267 	  if (optstr_lookup (options, "help") != NULL) {
1268 		  help_optstr();
1269 	  }
1270 	}
1271 
1272 	if (verbose > 1) {
1273 
1274 	  tc_log_info (MOD_NAME, " Smart YUV Deinterlacer Test Filter Settings (%dx%d):", width, height);
1275 	  tc_log_info (MOD_NAME, "        motionOnly = %d", mfd->motionOnly);
1276 	  tc_log_info (MOD_NAME, "          diffmode = %d", mfd->diffmode);
1277 	  tc_log_info (MOD_NAME, "         threshold = %d", mfd->threshold);
1278 	  tc_log_info (MOD_NAME, "       chromathres = %d", mfd->chromathres);
1279 	  tc_log_info (MOD_NAME, "        scenethres = %d", mfd->scenethreshold);
1280 	  tc_log_info (MOD_NAME, "             cubic = %d", mfd->cubic);
1281 	  tc_log_info (MOD_NAME, "             highq = %d", mfd->highq);
1282 	  tc_log_info (MOD_NAME, "             Blend = %d", mfd->Blend);
1283 	  tc_log_info (MOD_NAME, "          doChroma = %d", mfd->doChroma);
1284 	  tc_log_info (MOD_NAME, "           verbose = %d", mfd->verbose);
1285 	}
1286 
1287 	/* fetch memory */
1288 
1289 	mfd->buf =  tc_bufalloc (width*height*3);
1290 	mfd->prevFrame =  tc_bufalloc (width*height*3);
1291 
1292 	msize = width*height + 4*(width+PAD) + PAD*height;
1293 	mfd->movingY = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1294 	mfd->fmovingY = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1295 
1296 	msize = width*height/4 + 4*(width+PAD) + PAD*height;
1297 	mfd->movingU  = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1298 	mfd->movingV  = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1299 	mfd->fmovingU = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1300 	mfd->fmovingV = (unsigned char *) tc_bufalloc(sizeof(unsigned char)*msize);
1301 
1302 	if ( !mfd->movingY || !mfd->movingU || !mfd->movingV || !mfd->fmovingY ||
1303 	      !mfd->fmovingU || !mfd->fmovingV || !mfd->buf || !mfd->prevFrame) {
1304 	    tc_log_msg(MOD_NAME, "Memory allocation error");
1305 	    return -1;
1306 	}
1307 
1308 	memset(mfd->prevFrame, BLACK_BYTE_Y, width*height);
1309 	memset(mfd->prevFrame+width*height, BLACK_BYTE_UV, width*height/2);
1310 
1311 	memset(mfd->buf, BLACK_BYTE_Y, width*height);
1312 	memset(mfd->buf+width*height, BLACK_BYTE_UV, width*height/2);
1313 
1314 	msize = width*height + 4*(width+PAD) + PAD*height;
1315 	memset(mfd->movingY,  0, msize);
1316 	memset(mfd->fmovingY, 0, msize);
1317 
1318 	msize = width*height/4 + 4*(width+PAD) + PAD*height;
1319 	memset(mfd->movingU,  0, msize);
1320 	memset(mfd->movingV,  0, msize);
1321 	memset(mfd->fmovingU, 0, msize);
1322 	memset(mfd->fmovingV, 0, msize);
1323 
1324 	// Optimisation
1325 	// For the motion maps a little bit more than the needed memory is
1326 	// allocated. This is done, because than we don't have to use
1327 	// conditional borders int the erode and dilate routines. 2 extra lines
1328 	// on top and bottom and 2 pixels left and right for each line.
1329 	// This is also the reason for the w+4's all over the place.
1330 	//
1331 	// This gives an speedup factor in erode+denoise of about 3.
1332 	//
1333 	// A lot of brain went into the optimisations, here are some numbers of
1334 	// the separate steps. Note, to get these numbers I used the rdtsc
1335 	// instruction to read the CPU cycle counter in seperate programms:
1336 	// o  Motion map creation
1337 	//      orig: 26.283.387 Cycles
1338 	//       now:  8.991.686 Cycles
1339 	//       mmx:  5.062.952
1340 	// o  Erode+dilate
1341 	//      orig: 55.847.077
1342 	//       now: 21.764.997
1343 	//  Erodemmx: 18.765.878
1344 	// o  Blending
1345 	//      orig: 8.162.287
1346 	//       now: 5.384.433
1347 	//       mmx: 4.569.875
1348 	//   new mmx: 3.656.537
1349 	// o  Cubic interpolation
1350 	//      orig: 7.487.338
1351 	//       now: 6.684.908
1352 	//      more: 3.554.580
1353 	//
1354 	// Overall improvement in transcode:
1355 	// 11.57 -> 22.78 frames per second for the test clip.
1356 	//
1357 
1358 	// filter init ok.
1359 
1360 	if(verbose) tc_log_info(MOD_NAME,
1361 #ifdef HAVE_ASM_MMX
1362 		"(MMX) "
1363 #endif
1364 #ifdef CAN_COMPILE_C_ALTIVEC
1365 		"(ALTIVEC) "
1366 #endif
1367 		"%s %s", MOD_VERSION, MOD_CAP);
1368 
1369 	return 0;
1370 
1371   } /* TC_FILTER_INIT */
1372 
1373 
1374   if(ptr->tag & TC_FILTER_GET_CONFIG) {
1375       char buf[255];
1376       optstr_filter_desc (options, MOD_NAME, MOD_CAP, MOD_VERSION, MOD_AUTHOR, "VYE", "1");
1377 
1378       tc_snprintf (buf, sizeof(buf), "%d", mfd->motionOnly);
1379       optstr_param (options, "motionOnly", "Show motion areas only, blacking out static areas" ,"%d", buf, "0", "1");
1380       tc_snprintf (buf, sizeof(buf), "%d", mfd->diffmode);
1381       optstr_param (options, "diffmode", "Motion Detection (0=frame, 1=field, 2=both)", "%d", buf, "0", "2" );
1382       tc_snprintf (buf, sizeof(buf), "%d", mfd->threshold);
1383       optstr_param (options, "threshold", "Motion Threshold (luma)", "%d", buf, "0", "255" );
1384       tc_snprintf (buf, sizeof(buf), "%d", mfd->chromathres);
1385       optstr_param (options, "chromathres", "Motion Threshold (chroma)", "%d", buf, "0", "255" );
1386       tc_snprintf (buf, sizeof(buf), "%d", mfd->scenethreshold);
1387       optstr_param (options, "scenethres", "Threshold for detecting scenechanges", "%d", buf, "0", "255" );
1388       tc_snprintf (buf, sizeof(buf), "%d", mfd->highq);
1389       optstr_param (options, "highq", "High-Quality processing (motion Map denoising)", "%d", buf, "0", "1" );
1390       tc_snprintf (buf, sizeof(buf), "%d", mfd->cubic);
1391       optstr_param (options, "cubic", "Do cubic interpolation", "%d", buf, "0", "1" );
1392       tc_snprintf (buf, sizeof(buf), "%d", mfd->Blend);
1393       optstr_param (options, "Blend", "Blend the frames for deinterlacing", "%d", buf, "0", "1" );
1394       tc_snprintf (buf, sizeof(buf), "%d", mfd->doChroma);
1395       optstr_param (options, "doChroma", "Enable chroma processing (slower but more accurate)", "%d", buf, "0", "1" );
1396       tc_snprintf (buf, sizeof(buf), "%d", mfd->verbose);
1397       optstr_param (options, "verbose", "Verbose mode", "%d", buf, "0", "1" );
1398 
1399       return (0);
1400   }
1401 
1402   if(ptr->tag & TC_FILTER_CLOSE) {
1403 
1404 	if (!mfd)
1405 		return 0;
1406 
1407 	tc_buffree (mfd->buf);
1408 	mfd->buf = NULL;
1409 
1410 	tc_buffree (mfd->prevFrame);
1411 	mfd->prevFrame = NULL;
1412 
1413 	tc_buffree (mfd->movingY);
1414 	mfd->movingY = NULL;
1415 	tc_buffree (mfd->movingU);
1416 	mfd->movingU = NULL;
1417 	tc_buffree (mfd->movingV);
1418 	mfd->movingV = NULL;
1419 
1420 	tc_buffree (mfd->fmovingY);
1421 	mfd->fmovingY = NULL;
1422 	tc_buffree (mfd->fmovingU);
1423 	mfd->fmovingU = NULL;
1424 	tc_buffree (mfd->fmovingV);
1425 	mfd->fmovingV = NULL;
1426 
1427 	if (mfd)
1428 		free(mfd);
1429 
1430 	return 0;
1431 
1432   } /* TC_FILTER_CLOSE */
1433 
1434 ///////////////////////////////////////////////////////////////////////////
1435 
1436   //if(ptr->tag & TC_PRE_S_PROCESS && ptr->tag & TC_VIDEO && !(ptr->attributes & TC_FRAME_IS_SKIPPED)) {
1437   if(ptr->tag & TC_PRE_M_PROCESS && ptr->tag & TC_VIDEO && !(ptr->attributes & TC_FRAME_IS_SKIPPED)) {
1438 
1439 	  int U  = ptr->v_width*ptr->v_height;
1440 	  int V  = ptr->v_width*ptr->v_height*5/4;
1441 	  int w2 = ptr->v_width/2;
1442 	  int h2 = ptr->v_height/2;
1443 	  int msize = ptr->v_width*ptr->v_height + 4*(ptr->v_width+PAD) + PAD*ptr->v_height;
1444 	  int off = 2*(ptr->v_width+PAD)+PAD/2;
1445 
1446 	  memset(mfd->movingY,  0, msize);
1447 	  memset(mfd->fmovingY, 0, msize);
1448 	  /*
1449 	  */
1450 
1451 
1452 	  smartyuv_core(ptr->video_buf, mfd->buf, mfd->prevFrame,
1453 		        ptr->v_width, ptr->v_height, ptr->v_width, ptr->v_width,
1454 		        mfd->movingY+off, mfd->fmovingY+off, clamp_Y, mfd->threshold);
1455 
1456 
1457 	  if (mfd->doChroma) {
1458 	      msize = ptr->v_width*ptr->v_height/4 + 4*(ptr->v_width+PAD) + PAD*ptr->v_height;
1459 	      off = 2*(ptr->v_width/2+PAD)+PAD/2;
1460 
1461 	      memset(mfd->movingU,  0, msize);
1462 	      memset(mfd->fmovingU, 0, msize);
1463 	      memset(mfd->movingV,  0, msize);
1464 	      memset(mfd->fmovingV, 0, msize);
1465 	      /*
1466 	      */
1467 
1468 	      smartyuv_core(ptr->video_buf+U, mfd->buf+U, mfd->prevFrame+U,
1469 			  w2, h2, w2, w2,
1470 			  mfd->movingU+off, mfd->fmovingU+off, clamp_UV, mfd->chromathres);
1471 
1472 	      smartyuv_core(ptr->video_buf+V, mfd->buf+V, mfd->prevFrame+V,
1473 			  w2, h2, w2, w2,
1474 			  mfd->movingV+off, mfd->fmovingV+off, clamp_UV, mfd->chromathres);
1475 	  } else {
1476 	      //pass through
1477 	      ac_memcpy(mfd->buf+U, ptr->video_buf+U, ptr->v_width*ptr->v_height/2);
1478 	      //memset(mfd->buf+U, BLACK_BYTE_UV, ptr->v_width*ptr->v_height/2);
1479 	  }
1480 
1481 	  /*
1482 	  memset(mfd->buf, BLACK_BYTE_Y, ptr->v_width*ptr->v_height);
1483 	  memset(mfd->buf+U, BLACK_BYTE_UV, ptr->v_width*ptr->v_height/2);
1484 			  */
1485 
1486 	  ac_memcpy (ptr->video_buf, mfd->buf, ptr->video_size);
1487 
1488 	  return 0;
1489   }
1490   return 0;
1491 }
1492 
1493