1 /***********************************************************
2 * YUVDENOISER for the mjpegtools *
3 * ------------------------------------------------------- *
4 * (C) 2001-2004 Stefan Fendt *
5 * *
6 * Licensed and protected by the GNU-General-Public- *
7 * License version 2 or if you prefer any later version of *
8 * that license). See the file LICENSE for detailed infor- *
9 * mation. *
10 * *
11 * FILE: main.c *
12 * *
13 ***********************************************************/
14
15 /* 2010-09-22, Franz Brauße <dev@karlchenofhell.org>
16 * - added SSE2-accelerated versions of filter_plane_median()
17 * and temporal_filter_planes()
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <math.h>
24 #include "config.h"
25 #include "mjpeg_types.h"
26 #include "yuv4mpeg.h"
27 #include "mjpeg_logging.h"
28 #include "cpu_accel.h"
29 #include "motionsearch.h"
30
31 #if defined(__SSE3__)
32 # include <pmmintrin.h>
33 #elif defined(__SSE2__)
34 # include <emmintrin.h>
35 #endif
36
37 int verbose = 1;
38 int width = 0;
39 int height = 0;
40 int lwidth = 0;
41 int lheight = 0;
42 int cwidth = 0;
43 int cheight = 0;
44 int input_chroma_subsampling = 0;
45 int input_interlaced = 0;
46 int hq_mode = 0;
47
48 int renoise_Y=0;
49 int renoise_U=0;
50 int renoise_V=0;
51
52 int temp_Y_thres = 0;
53 int temp_U_thres = 0;
54 int temp_V_thres = 0;
55
56 int med_pre_Y_thres = 0;
57 int med_pre_U_thres = 0;
58 int med_pre_V_thres = 0;
59
60 int med_post_Y_thres = 0;
61 int med_post_U_thres = 0;
62 int med_post_V_thres = 0;
63
64 int gauss_Y = 0;
65 int gauss_U = 0;
66 int gauss_V = 0;
67
68 uint8_t *frame1[3];
69 uint8_t *frame2[3];
70 uint8_t *frame3[3];
71 uint8_t *frame4[3];
72 uint8_t *frame5[3];
73 uint8_t *frame6[3];
74 uint8_t *frame7[3];
75
76 uint8_t *scratchplane1;
77 uint8_t *scratchplane2;
78 uint8_t *outframe[3];
79
80 int buff_offset;
81 int buff_size;
82
83 uint16_t transform_L16[256];
84 uint8_t transform_G8[65536];
85
86 /***********************************************************
87 * helper-functions *
88 ***********************************************************/
89
90 static void (*filter_plane_median)(uint8_t *, int, int, int);
91 static void (*temporal_filter_planes)(int, int, int, int);
92
93
94 void
gauss_filter_plane(uint8_t * frame,int w,int h,int t)95 gauss_filter_plane (uint8_t * frame, int w, int h, int t)
96 {
97 int i;
98 int v;
99 uint8_t * src = frame;
100 uint8_t * dst = scratchplane1;
101
102 if(t==0) return;
103
104 memcpy ( src-w*2, src, w );
105 memcpy ( src-w , src, w );
106
107 memcpy ( src+(w*h) , src+(w*h)-w, w );
108 memcpy ( src+(w*h)+w, src+(w*h)-w, w );
109
110 for(i=0;i<(w*h);i++)
111 {
112
113 v = *(src -2)*1;
114
115 v += *(src-w -1)*1;
116 v += *(src -1)*2;
117 v += *(src+w -1)*1;
118
119 v += *(src-w*2 )*1;
120 v += *(src-w )*2;
121 v += *(src )*4;
122 v += *(src+w )*2;
123 v += *(src+w*2 )*1;
124
125 v += *(src-w +1)*1;
126 v += *(src +1)*2;
127 v += *(src+w +1)*1;
128
129 v += *(src +2)*1;
130
131 v /= 20;
132
133 v = *(src)*(256-t) + (v)*(t);
134 v /= 256;
135
136 *(dst)=v;
137
138 dst++;
139 src++;
140 }
141
142 memcpy ( frame,scratchplane1,w*h );
143 }
144
145 void
temporal_filter_planes_MC(int idx,int w,int h,int t)146 temporal_filter_planes_MC (int idx, int w, int h, int t)
147 {
148 uint32_t sad,min;
149 uint32_t r, c, m;
150 int32_t d;
151 int x,y,sx,sy;
152
153 uint32_t v;
154 int x1,y1;
155 int x2,y2;
156 int x3,y3;
157 int x5,y5;
158 int x6,y6;
159 int x7,y7;
160
161 uint8_t *f1 = frame1[idx];
162 uint8_t *f2 = frame2[idx];
163 uint8_t *f3 = frame3[idx];
164 uint8_t *f4 = frame4[idx];
165 uint8_t *f5 = frame5[idx];
166 uint8_t *f6 = frame6[idx];
167 uint8_t *f7 = frame7[idx];
168 uint8_t *of = outframe[idx];
169
170 #if 1
171
172 if (t == 0) // shortcircuit filter if t = 0...
173 {
174 memcpy (of, f4, w * h);
175 return;
176 }
177 #endif
178
179 for (y = 0; y < h; y+=16)
180 for (x = 0; x < w; x+=16)
181 {
182
183 // find best matching 16x16 block for f3
184 min=psad_00 ( f4+(x)+(y)*w,f3+(x)+(y)*w,w,16,0x00ffffff );
185 x3=y3=0;
186 for (sy=-4; sy < 4; sy++)
187 for (sx=-4; sx < 4; sx++)
188 {
189 sad = psad_00 ( f4+(x)+(y)*w,f3+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
190 sad += psad_00 ( f4+(x+8)+(y)*w,f3+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
191 if(sad<min)
192 {
193 x3 = sx;
194 y3 = sy;
195 min = sad;
196 }
197 }
198
199 // find best matching 16x16 block for f5
200 min=psad_00 ( f4+(x)+(y)*w,f5+(x)+(y)*w,w,16,0x00ffffff );
201 x5=y5=0;
202 for (sy=-4; sy < 4; sy++)
203 for (sx=-4; sx < 4; sx++)
204 {
205 sad = psad_00 ( f4+(x)+(y)*w,f5+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
206 sad += psad_00 ( f4+(x+8)+(y)*w,f5+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
207 if(sad<min)
208 {
209 x5 = sx;
210 y5 = sy;
211 min = sad;
212 }
213 }
214
215 // find best matching 16x16 block for f2
216 min=psad_00 ( f4+(x)+(y)*w,f2+(x)+(y)*w,w,16,0x00ffffff );
217 x2=y2=0;
218 for (sy=(y3-4); sy < (y3+4); sy++)
219 for (sx=(x3-4); sx < (x3+4); sx++)
220 {
221 sad = psad_00 ( f4+(x)+(y)*w,f2+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
222 sad += psad_00 ( f4+(x+8)+(y)*w,f2+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
223 if(sad<min)
224 {
225 x2 = sx;
226 y2 = sy;
227 min = sad;
228 }
229 }
230
231 // find best matching 16x16 block for f6
232 min=psad_00 ( f4+(x)+(y)*w,f6+(x)+(y)*w,w,16,0x00ffffff );
233 x6=y6=0;
234 for (sy=(y5-4); sy < (y5+4); sy++)
235 for (sx=(x5-4); sx < (x5+4); sx++)
236 {
237 sad = psad_00 ( f4+(x)+(y)*w,f6+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
238 sad += psad_00 ( f4+(x+8)+(y)*w,f6+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
239 if(sad<min)
240 {
241 x6 = sx;
242 y6 = sy;
243 min = sad;
244 }
245 }
246
247 // find best matching 16x16 block for f2
248 min=psad_00 ( f4+(x)+(y)*w,f1+(x)+(y)*w,w,16,0x00ffffff );
249 x1=y1=0;
250 for (sy=(y2-4); sy < (y2+4); sy++)
251 for (sx=(x2-4); sx < (x2+4); sx++)
252 {
253 sad = psad_00 ( f4+(x)+(y)*w,f1+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
254 sad += psad_00 ( f4+(x+8)+(y)*w,f1+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
255 if(sad<min)
256 {
257 x1 = sx;
258 y1 = sy;
259 min = sad;
260 }
261 }
262
263 // find best matching 16x16 block for f7
264 min=psad_00 ( f4+(x)+(y)*w,f7+(x)+(y)*w,w,16,0x00ffffff );
265 x7=y7=0;
266 for (sy=(y6-4); sy < (y6+4); sy++)
267 for (sx=(x6-4); sx < (x6+4); sx++)
268 {
269 sad = psad_00 ( f4+(x)+(y)*w,f7+(x+sx)+(y+sy)*w,w,16,0x00ffffff );
270 sad += psad_00 ( f4+(x+8)+(y)*w,f7+(x+sx+8)+(y+sy)*w,w,16,0x00ffffff );
271 if(sad<min)
272 {
273 x7 = sx;
274 y7 = sy;
275 min = sad;
276 }
277 }
278
279 for (sy=0; sy < 16; sy++)
280 for (sx=0; sx < 16; sx++)
281 {
282 // gauss-filtered reference pixel
283 r = *(f4+(x+sx-1)+(y+sy-1)*w);
284 r += *(f4+(x+sx )+(y+sy-1)*w)*2;
285 r += *(f4+(x+sx+1)+(y+sy-1)*w);
286 r += *(f4+(x+sx-1)+(y+sy )*w)*2;
287 r += *(f4+(x+sx )+(y+sy )*w)*4;
288 r += *(f4+(x+sx+1)+(y+sy )*w)*2;
289 r += *(f4+(x+sx-1)+(y+sy+1)*w);
290 r += *(f4+(x+sx )+(y+sy+1)*w)*2;
291 r += *(f4+(x+sx+1)+(y+sy+1)*w);
292 r /= 16;
293
294 // add non-filtered reference to the accummulator
295 m = *(f4+(x+sx )+(y+sy )*w)*t;
296 c = t;
297
298 // gauss-filtered and translated test pixel
299 sx += x1;
300 sy += y1;
301 v = *(f1+(x+sx-1)+(y+sy-1)*w);
302 v += *(f1+(x+sx )+(y+sy-1)*w)*2;
303 v += *(f1+(x+sx+1)+(y+sy-1)*w);
304 v += *(f1+(x+sx-1)+(y+sy )*w)*2;
305 v += *(f1+(x+sx )+(y+sy )*w)*4;
306 v += *(f1+(x+sx+1)+(y+sy )*w)*2;
307 v += *(f1+(x+sx-1)+(y+sy+1)*w);
308 v += *(f1+(x+sx )+(y+sy+1)*w)*2;
309 v += *(f1+(x+sx+1)+(y+sy+1)*w);
310 sx -= x1;
311 sy -= y1;
312 v /= 16;
313
314 // add weighted and translated but non-filtered test-pixel
315 d = t - abs (r-v);
316 d = d<0? 0:d;
317 c += d;
318 m += *(f1+(x+sx+x1)+(y+sy+y1)*w)*d;
319
320 // gauss-filtered and translated test pixel
321 sx += x2;
322 sy += y2;
323 v = *(f2+(x+sx-1)+(y+sy-1)*w);
324 v += *(f2+(x+sx )+(y+sy-1)*w)*2;
325 v += *(f2+(x+sx+1)+(y+sy-1)*w);
326 v += *(f2+(x+sx-1)+(y+sy )*w)*2;
327 v += *(f2+(x+sx )+(y+sy )*w)*4;
328 v += *(f2+(x+sx+1)+(y+sy )*w)*2;
329 v += *(f2+(x+sx-1)+(y+sy+1)*w);
330 v += *(f2+(x+sx )+(y+sy+1)*w)*2;
331 v += *(f2+(x+sx+1)+(y+sy+1)*w);
332 sx -= x2;
333 sy -= y2;
334 v /= 16;
335
336 // add weighted and translated but non-filtered test-pixel
337 d = t - abs (r-v);
338 d = d<0? 0:d;
339 c += d;
340 m += *(f2+(x+sx+x2)+(y+sy+y2)*w)*d;
341
342 // gauss-filtered and translated test pixel
343 sx += x3;
344 sy += y3;
345 v = *(f3+(x+sx-1)+(y+sy-1)*w);
346 v += *(f3+(x+sx )+(y+sy-1)*w)*2;
347 v += *(f3+(x+sx+1)+(y+sy-1)*w);
348 v += *(f3+(x+sx-1)+(y+sy )*w)*2;
349 v += *(f3+(x+sx )+(y+sy )*w)*4;
350 v += *(f3+(x+sx+1)+(y+sy )*w)*2;
351 v += *(f3+(x+sx-1)+(y+sy+1)*w);
352 v += *(f3+(x+sx )+(y+sy+1)*w)*2;
353 v += *(f3+(x+sx+1)+(y+sy+1)*w);
354 sx -= x3;
355 sy -= y3;
356 v /= 16;
357
358 // add weighted and translated but non-filtered test-pixel
359 d = t - abs (r-v);
360 d = d<0? 0:d;
361 c += d;
362 m += *(f3+(x+sx+x3)+(y+sy+y3)*w)*d;
363
364 // gauss-filtered and translated test pixel
365 sx += x5;
366 sy += y5;
367 v = *(f5+(x+sx-1)+(y+sy-1)*w);
368 v += *(f5+(x+sx )+(y+sy-1)*w)*2;
369 v += *(f5+(x+sx+1)+(y+sy-1)*w);
370 v += *(f5+(x+sx-1)+(y+sy )*w)*2;
371 v += *(f5+(x+sx )+(y+sy )*w)*4;
372 v += *(f5+(x+sx+1)+(y+sy )*w)*2;
373 v += *(f5+(x+sx-1)+(y+sy+1)*w);
374 v += *(f5+(x+sx )+(y+sy+1)*w)*2;
375 v += *(f5+(x+sx+1)+(y+sy+1)*w);
376 sx -= x5;
377 sy -= y5;
378 v /= 16;
379
380 // add weighted and translated but non-filtered test-pixel
381 d = t - abs (r-v);
382 d = d<0? 0:d;
383 c += d;
384 m += *(f5+(x+sx+x5)+(y+sy+y5)*w)*d;
385
386 // gauss-filtered and translated test pixel
387 sx += x6;
388 sy += y6;
389 v = *(f6+(x+sx-1)+(y+sy-1)*w);
390 v += *(f6+(x+sx )+(y+sy-1)*w)*2;
391 v += *(f6+(x+sx+1)+(y+sy-1)*w);
392 v += *(f6+(x+sx-1)+(y+sy )*w)*2;
393 v += *(f6+(x+sx )+(y+sy )*w)*4;
394 v += *(f6+(x+sx+1)+(y+sy )*w)*2;
395 v += *(f6+(x+sx-1)+(y+sy+1)*w);
396 v += *(f6+(x+sx )+(y+sy+1)*w)*2;
397 v += *(f6+(x+sx+1)+(y+sy+1)*w);
398 sx -= x6;
399 sy -= y6;
400 v /= 16;
401
402 // add weighted and translated but non-filtered test-pixel
403 d = t - abs (r-v);
404 d = d<0? 0:d;
405 c += d;
406 m += *(f6+(x+sx+x6)+(y+sy+y6)*w)*d;
407
408 // gauss-filtered and translated test pixel
409 sx += x7;
410 sy += y7;
411 v = *(f7+(x+sx-1)+(y+sy-1)*w);
412 v += *(f7+(x+sx )+(y+sy-1)*w)*2;
413 v += *(f7+(x+sx+1)+(y+sy-1)*w);
414 v += *(f7+(x+sx-1)+(y+sy )*w)*2;
415 v += *(f7+(x+sx )+(y+sy )*w)*4;
416 v += *(f7+(x+sx+1)+(y+sy )*w)*2;
417 v += *(f7+(x+sx-1)+(y+sy+1)*w);
418 v += *(f7+(x+sx )+(y+sy+1)*w)*2;
419 v += *(f7+(x+sx+1)+(y+sy+1)*w);
420 sx -= x7;
421 sy -= y7;
422 v /= 16;
423
424 // add weighted and translated but non-filtered test-pixel
425 d = t - abs (r-v);
426 d = d<0? 0:d;
427 c += d;
428 m += *(f7+(x+sx+x7)+(y+sy+y7)*w)*d;
429
430 *(of+(x+sx)+(y+sy)*w) = m/c;
431
432 }
433 }
434 }
435
renoise(uint8_t * frame,int w,int h,int level)436 void renoise (uint8_t * frame, int w, int h, int level )
437 {
438 uint8_t random[8192];
439 int i;
440 static int cnt=0;
441
442 for(i=0;i<8192;i++)
443 random[(i+cnt/2)&8191]=(i+i+i+i*i*i+1-random[i-1]+random[i-20]*random[i-5]*random[i-25])&255;
444 cnt++;
445
446 for(i=0;i<(w*h);i++)
447 *(frame+i)=(*(frame+i)*(255-level)+random[i&8191]*level)/255;
448 }
449
450 #if defined(__SSE2__)
451
tf0(const __m128i mask,const __m128i l0,const __m128i vt,const __m128i vc,const __m128i vb)452 static inline __m128i tf0(const __m128i mask, const __m128i l0, const __m128i vt, const __m128i vc, const __m128i vb) {
453 __m128i k0, k1, k2, k3, d0; /* temp storage, pixel surroundings, 16-bit words */
454
455 /* even pixels */
456 /* extract and add the pixels above and below the current one */
457 k0 = _mm_and_si128(_mm_srli_si128(vt, 1), mask);
458 k1 = _mm_and_si128(_mm_srli_si128(vb, 1), mask);
459 k0 = _mm_add_epi16(k0, k1);
460
461 /* add together the 4 corner pixels diagonal of the current one */
462 k2 = _mm_add_epi16(_mm_and_si128(vt, mask), _mm_and_si128(vb, mask));
463 k2 = _mm_add_epi16(k2, _mm_srli_si128(k2, 2));
464
465 /* add pixels left and right of the current */
466 k3 = _mm_and_si128(vc, mask);
467 k3 = _mm_add_epi16(k3, _mm_srli_si128(k3, 2));
468
469 /* add weighted current pixel and the above results */
470 k1 = _mm_and_si128(_mm_srli_si128(vc, 1), mask);
471 d0 = _mm_slli_epi16(k1, 1); /* center * 4 */
472 d0 = _mm_add_epi16(d0, k0);
473 d0 = _mm_add_epi16(d0, k3);
474 d0 = _mm_slli_epi16(d0, 1); /* + above,below,left,right * 2 */
475 d0 = _mm_add_epi16(d0, k2); /* + diagonal * 1 */
476 d0 = _mm_srli_epi16(d0, 4); /* all / 16 */
477 return d0;
478 }
479
tf1(const __m128i mask,const __m128i l0,const __m128i vt,const __m128i vc,const __m128i vb)480 static inline __m128i tf1(const __m128i mask, const __m128i l0, const __m128i vt, const __m128i vc, const __m128i vb) {
481 __m128i k0, k1, k2, k3, d1;
482
483 k0 = _mm_srli_si128(_mm_add_epi16(_mm_and_si128(vt, mask), _mm_and_si128(vb, mask)), 2);
484
485 k1 = _mm_and_si128(_mm_srli_si128(vt, 1), mask);
486 k2 = _mm_and_si128(_mm_srli_si128(vb, 1), mask);
487 k2 = _mm_add_epi16(k1, k2);
488 k2 = _mm_add_epi16(k2, _mm_srli_si128(k2, 2));
489
490 k3 = _mm_and_si128(_mm_srli_si128(vc, 1), mask);
491 k3 = _mm_add_epi16(k3, _mm_srli_si128(k3, 2));
492
493 k1 = _mm_and_si128(_mm_srli_si128(vc, 2), mask);
494 d1 = _mm_slli_epi16(k1, 1);
495 d1 = _mm_add_epi16(d1, k0);
496 d1 = _mm_add_epi16(d1, k3);
497 d1 = _mm_slli_epi16(d1, 1);
498 d1 = _mm_add_epi16(d1, k2);
499 d1 = _mm_srli_epi16(d1, 4);
500 return d1;
501 }
502
503 /* 8 times as fast on x86_64, 2.2 times as fast on i686 */
temporal_filter_planes_sse2(int idx,int w,int h,int t)504 void temporal_filter_planes_sse2(int idx, int w, int h, int t)
505 {
506 int x, k;
507
508 uint8_t *f4 = frame4[idx];
509 uint8_t *of = outframe[idx];
510
511 uint8_t *f[6] = {
512 frame3[idx], frame2[idx], frame1[idx], frame5[idx], frame6[idx], frame7[idx]
513 };
514
515 if (t == 0) // shortcircuit filter if t = 0...
516 {
517 memcpy (of, f4, w * h);
518 return;
519 }
520
521 /* vt: x x x x x x x x x x x x x x x x
522 * vc: x 0 1 2 3 4 5 6 7 8 9 a b c d x
523 * vb: x x x x x x x x x x x x x x x x
524 *
525 * c0, d0, m0 and m1 store the respective values for even pixels,
526 * m0 for 0, 2, 4 and 6, m1 for 8, a and c in its lower dwords;
527 * c1, d1, m2 and m3 store the values for the odd pixels,
528 * m2 for 1, 3, 5 and 7, m3 for 9, b and d in its lower dwords;
529 * whereas computation for each pixel is as follows (equivalent to the
530 * non-SSE-accelerated variant of this function):
531 *
532 * for each pixel k1 of the 14 pixels per block:
533 * vt: k2 k0 k2
534 * vc: k3 k1 k3
535 * vb: k2 k0 k2
536 *
537 * r = *f4++;
538 * c = t + 1;
539 * m = r * (t+1);
540 * for (i=0; i<6; i++) {
541 * d = sum(k2) + 2 * (sum(k0) + sum(k3) + 2 * k1)
542 * d = saturate(t - abs(r-d));
543 * c += d;
544 * m += *f[i]++ * d;
545 * }
546 * m *= 2;
547 * *of++ = (m / c + 1) / 2;
548 *
549 * For each frame, first the 7 even pixels are being processed, then the 7
550 * odd ones.
551 */
552
553 __m128i vt, vc, vb; /* top-, center- and bottom-line of 3x16 block */
554 __m128i c0, c1, m0, m1, m2, m3; /* c: 16-bit words, m: 32-bit dwords */
555 __m128i d0, d1, r0, r1; /* 16-bit words, 0: even, 1: odd */
556 const __m128i mask = _mm_set1_epi16(0x00ff);
557 const __m128i l0 = _mm_set1_epi16(t);
558
559 #ifndef OLD_ROUNDING
560 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
561 #endif
562
563 for (x = 0; x < (w * h); x+=14)
564 {
565 vt = _mm_loadu_si128((__m128i *)(f4 - 1 - w));
566 vc = _mm_loadu_si128((__m128i *)(f4 - 1 ));
567 vb = _mm_loadu_si128((__m128i *)(f4 - 1 + w));
568 f4 += 14;
569
570 r0 = tf0(mask, l0, vt, vc, vb); /* even pixels */
571 r1 = tf1(mask, l0, vt, vc, vb); /* odd pixels */
572
573 c0 = c1 = _mm_set1_epi16(t + 1);
574
575 /* m = *f4 * (t+1) */
576 /* The low 16-bit of the multiplication suffice, because both operands are
577 * only 8-bit-values */
578 d0 = _mm_mullo_epi16(_mm_and_si128(_mm_srli_si128(vc, 1), mask), c0);
579 m0 = _mm_unpacklo_epi16(d0, _mm_setzero_si128());
580 m1 = _mm_unpackhi_epi16(d0, _mm_setzero_si128());
581 d1 = _mm_mullo_epi16(_mm_and_si128(_mm_srli_si128(vc, 2), mask), c0);
582 m2 = _mm_unpacklo_epi16(d1, _mm_setzero_si128());
583 m3 = _mm_unpackhi_epi16(d1, _mm_setzero_si128());
584
585 for (k=0; k<sizeof(f)/sizeof(*f); k++) {
586 vt = _mm_loadu_si128((__m128i *)(f[k] - 1 - w));
587 vc = _mm_loadu_si128((__m128i *)(f[k] - 1 ));
588 vb = _mm_loadu_si128((__m128i *)(f[k] - 1 + w));
589 f[k] += 14;
590
591 /* even pixels */
592 d0 = tf0(mask, l0, vt, vc, vb);
593 /* d = l - abs(r-d) */
594 d0 = _mm_subs_epu16(l0, _mm_sub_epi16(_mm_max_epi16(r0, d0), _mm_min_epi16(r0, d0)));
595 c0 = _mm_add_epi16(c0, d0);
596 /* d *= *f[k] */
597 d0 = _mm_mullo_epi16(_mm_and_si128(_mm_srli_si128(vc, 1), mask), d0);
598 m0 = _mm_add_epi32(m0, _mm_unpacklo_epi16(d0, _mm_setzero_si128()));
599 m1 = _mm_add_epi32(m1, _mm_unpackhi_epi16(d0, _mm_setzero_si128()));
600
601 /* odd pixels */
602 d1 = tf1(mask, l0, vt, vc, vb);
603 d1 = _mm_subs_epu16(l0, _mm_sub_epi16(_mm_max_epi16(r1, d1), _mm_min_epi16(r1, d1)));
604 c1 = _mm_add_epi16(c1, d1);
605 d1 = _mm_mullo_epi16(_mm_and_si128(_mm_srli_si128(vc, 2), mask), d1);
606 m2 = _mm_add_epi32(m2, _mm_unpacklo_epi16(d1, _mm_setzero_si128()));
607 m3 = _mm_add_epi32(m3, _mm_unpackhi_epi16(d1, _mm_setzero_si128()));
608 }
609
610 /* extract results from c0, c1 and m0 to m3:
611 * 14 byte result = interleave_bytes((m0,m1) / c0, (m2,m3) / c1) */
612 #ifdef OLD_ROUNDING
613 /* r = m*2/c */
614 /* multiply each m with 2 */
615 m0 = _mm_slli_epi32(m0, 1);
616 m1 = _mm_slli_epi32(m1, 1);
617 m2 = _mm_slli_epi32(m2, 1);
618 m3 = _mm_slli_epi32(m3, 1);
619 #endif
620
621 /* m0-m3 each contain 4 19-bit values ((8-bit)^2 * 8), so a single precision
622 * float with 23-bit mantissa can hold these without precision loss */
623 /* r = m/c */
624 __m128i k0 = _mm_setzero_si128();
625 __m128 f0, f1, f2, f3;
626 f0 = _mm_div_ps(_mm_cvtepi32_ps(m0), _mm_cvtepi32_ps(_mm_unpacklo_epi16(c0, k0)));
627 f1 = _mm_div_ps(_mm_cvtepi32_ps(m1), _mm_cvtepi32_ps(_mm_unpackhi_epi16(c0, k0)));
628 f2 = _mm_div_ps(_mm_cvtepi32_ps(m2), _mm_cvtepi32_ps(_mm_unpacklo_epi16(c1, k0)));
629 f3 = _mm_div_ps(_mm_cvtepi32_ps(m3), _mm_cvtepi32_ps(_mm_unpackhi_epi16(c1, k0)));
630
631 #ifdef OLD_ROUNDING
632 m0 = _mm_cvttps_epi32(f0);
633 m1 = _mm_cvttps_epi32(f1);
634 m2 = _mm_cvttps_epi32(f2);
635 m3 = _mm_cvttps_epi32(f3);
636 #else
637 m0 = _mm_cvtps_epi32(f0);
638 m1 = _mm_cvtps_epi32(f1);
639 m2 = _mm_cvtps_epi32(f2);
640 m3 = _mm_cvtps_epi32(f3);
641 #endif
642
643 r0 = _mm_packs_epi32(m0, m1); /* 7 words f0,f1 */
644 r1 = _mm_packs_epi32(m2, m3); /* 7 words f2,f3 */
645
646 #ifdef OLD_ROUNDING
647 /* (r+1)/2 */
648 k0 = _mm_set1_epi16(1);
649 r0 = _mm_srli_epi16(_mm_add_epi16(r0, k0), 1);
650 r1 = _mm_srli_epi16(_mm_add_epi16(r1, k0), 1);
651 #endif
652
653 /* 7 words r0 interleaved with 7 words r1, all converted to bytes */
654 r0 = _mm_packus_epi16(_mm_unpacklo_epi16(r0, r1), _mm_unpackhi_epi16(r0, r1));
655 /* write 16, but the 2 bytes overlap will be overwritten by the next pass */
656 _mm_storeu_si128((__m128i *)of, r0);
657 of += 14;
658 }
659 _mm_empty();
660 }
661 #endif
662
temporal_filter_planes_p(int idx,int w,int h,int t)663 void temporal_filter_planes_p (int idx, int w, int h, int t)
664 {
665 uint32_t r, c, m;
666 int32_t d;
667 int x;
668
669 uint8_t *f1 = frame1[idx];
670 uint8_t *f2 = frame2[idx];
671 uint8_t *f3 = frame3[idx];
672 uint8_t *f4 = frame4[idx];
673 uint8_t *f5 = frame5[idx];
674 uint8_t *f6 = frame6[idx];
675 uint8_t *f7 = frame7[idx];
676 uint8_t *of = outframe[idx];
677
678 if (t == 0) // shortcircuit filter if t = 0...
679 {
680 memcpy (of, f4, w * h);
681 return;
682 }
683
684 for (x = 0; x < (w * h); x++)
685 {
686 r = *(f4-1-w);
687 r += *(f4 -w)*2;
688 r += *(f4+1-w);
689 r += *(f4-1 )*2;
690 r += *(f4 )*4;
691 r += *(f4+1 )*2;
692 r += *(f4-1+w);
693 r += *(f4 +w)*2;
694 r += *(f4+1+w);
695 r /= 16;
696
697 m = *(f4)*(t+1)*2;
698 c = t+1;
699
700 d = *(f3-1-w);
701 d += *(f3 -w)*2;
702 d += *(f3+1-w);
703 d += *(f3-1 )*2;
704 d += *(f3 )*4;
705 d += *(f3+1 )*2;
706 d += *(f3-1+w);
707 d += *(f3 +w)*2;
708 d += *(f3+1+w);
709 d /= 16;
710
711 d = t - abs (r-d);
712 d = d<0? 0:d;
713 c += d;
714 m += *(f3)*d*2;
715
716 d = *(f2-1-w);
717 d += *(f2 -w)*2;
718 d += *(f2+1-w);
719 d += *(f2-1 )*2;
720 d += *(f2 )*4;
721 d += *(f2+1 )*2;
722 d += *(f2-1+w);
723 d += *(f2 +w)*2;
724 d += *(f2+1+w);
725 d /= 16;
726
727 d = t - abs (r-d);
728 d = d<0? 0:d;
729 c += d;
730 m += *(f2)*d*2;
731
732 d = *(f1-1-w);
733 d += *(f1 -w)*2;
734 d += *(f1+1-w);
735 d += *(f1-1 )*2;
736 d += *(f1 )*4;
737 d += *(f1+1 )*2;
738 d += *(f1-1+w);
739 d += *(f1 +w)*2;
740 d += *(f1+1+w);
741 d /= 16;
742
743 d = t - abs (r-d);
744 d = d<0? 0:d;
745 c += d;
746 m += *(f1)*d*2;
747
748 d = *(f5-1-w);
749 d += *(f5 -w)*2;
750 d += *(f5+1-w);
751 d += *(f5-1 )*2;
752 d += *(f5 )*4;
753 d += *(f5+1 )*2;
754 d += *(f5-1+w);
755 d += *(f5 +w)*2;
756 d += *(f5+1+w);
757 d /= 16;
758
759 d = t - abs (r-d);
760 d = d<0? 0:d;
761 c += d;
762 m += *(f5)*d*2;
763
764 d = *(f6-1-w);
765 d += *(f6 -w)*2;
766 d += *(f6+1-w);
767 d += *(f6-1 )*2;
768 d += *(f6 )*4;
769 d += *(f6+1 )*2;
770 d += *(f6-1+w);
771 d += *(f6 +w)*2;
772 d += *(f6+1+w);
773 d /= 16;
774
775 d = t - abs (r-d);
776 d = d<0? 0:d;
777 c += d;
778 m += *(f6)*d*2;
779
780 d = *(f7-1-w);
781 d += *(f7 -w)*2;
782 d += *(f7+1-w);
783 d += *(f7-1 )*2;
784 d += *(f7 )*4;
785 d += *(f7+1 )*2;
786 d += *(f7-1+w);
787 d += *(f7 +w)*2;
788 d += *(f7+1+w);
789 d /= 16;
790
791 d = t - abs (r-d);
792 d = d<0? 0:d;
793 c += d;
794 m += *(f7)*d*2;
795
796 *(of) = ((m/c)+1)/2;
797
798 f1++;
799 f2++;
800 f3++;
801 f4++;
802 f5++;
803 f6++;
804 f7++;
805 of++;
806 }
807 }
808
809 #if defined(__SSE2__)
810 /* 4 to 5 times faster */
filter_plane_median_sse2(uint8_t * plane,int w,int h,int level)811 void filter_plane_median_sse2(uint8_t *plane, int w, int h, int level) {
812 int i;
813 int avg; /*should not be needed any more */
814 int cnt; /* should not be needed any more */
815 uint8_t * p;
816 uint8_t * d;
817
818 if(level==0) return;
819
820 p = plane;
821 d = scratchplane1;
822
823 // remove strong outliers from the image. An outlier is a pixel which lies outside
824 // of max-thres and min+thres of the surrounding pixels. This should not cause blurring
825 // and it should leave an evenly spread noise-floor to the image.
826 for (i=0; i<=(w*h); i+=14) {
827 __m128i t, c, b, min, max, minmin, maxmax;
828
829 t = _mm_loadu_si128((__m128i *)&p[i-1-w]);
830 c = _mm_loadu_si128((__m128i *)&p[i-1 ]);
831 b = _mm_loadu_si128((__m128i *)&p[i-1+w]);
832 min = _mm_min_epu8(t, b); /* k: (0,k), (2,k) */
833 max = _mm_max_epu8(t, b);
834 minmin = _mm_min_epu8(min, c); /* k: (0,k), (1,k), (2,k) */
835 maxmax = _mm_max_epu8(max, c);
836
837 /* k: (0,k), (1,k), (2,k). (0,k+2), (1,k+2), (2,k+2) */
838 minmin = _mm_min_epu8(minmin, _mm_srli_si128(minmin, 2));
839 maxmax = _mm_max_epu8(maxmax, _mm_srli_si128(maxmax, 2));
840 /* k: (0,k), (1,k), (2,k). (0,k+2), (1,k+2), (2,k+2), (0,k+1), (2,k+1) */
841 min = _mm_min_epu8(minmin, _mm_srli_si128(min, 1));
842 max = _mm_max_epu8(maxmax, _mm_srli_si128(max, 1));
843
844 /* limit c to range [min,max] */
845 c = _mm_max_epu8(min, _mm_min_epu8(max, _mm_srli_si128(c, 1)));
846 /* write 14 valid pixels, the 2 remaining bytes are overwritten subsequently
847 * or lie outside the frame area */
848 _mm_storeu_si128((__m128i *)&d[i], c);
849 }
850
851 // in the second stage we try to average similar spatial pixels, only. This, like
852 // a median, should also not reduce sharpness but flatten the noisefloor. This
853 // part is quite similar to what 2dclean/yuvmedianfilter do. But because of the
854 // different weights given to the pixels it is less aggressive...
855
856 p = scratchplane1;
857 d = scratchplane2;
858
859 // this filter needs values outside of the imageplane, so we just copy the first line
860 // and the last line into the out-of-range area...
861
862 memcpy ( p-w , p, w );
863 memcpy ( p-w*2, p, w );
864
865 memcpy ( p+(w*h) , p+(w*h)-w, w );
866 memcpy ( p+(w*h)+w, p+(w*h)-w, w );
867
868 __m128i lvl = _mm_set1_epi16(level);
869
870 #ifndef OLD_ROUNDING
871 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
872 #endif
873
874 for (i=0; i<=(w*h); i+=4)
875 {
876 uint64_t k0, k1, k2, k3, k6;
877 __m128i c0, c1, v[4], t[4], e[4], a[4];
878
879 /* p points to pixel a. There are 3 stages, each processing 8 surrounding
880 * pixels, resulting in the complete neighbourhood of 24 pixels.
881 *
882 * 0 1 2 3 4 5 6 7
883 * k0: x x x x x x x x
884 * k1: x x x x x x x x
885 * k6: x x a b c d x x
886 * k2: x x x x x x x x
887 * k3: x x x x x x x x
888 *
889 * a,b and c,d each share two 2x4-blocks in their surrounding area, which
890 * are processed by stages 1 and 2. These blocks are referred to as c0 and
891 * c1 by the code below. The remaining surrounding pixels are processed for
892 * each pixel out of a,b,c,d individually in stage 3.
893 * Stage 4 assembles the results, adds the weighted averages and weights
894 * together for each pixel, computes the median and stores it in the
895 * scratch plane.
896 * Coordinates (y,x) originate from the top left of the above diagram. */
897
898 k0 = *(uint64_t *)(p-w*2-2);
899 k1 = *(uint64_t *)(p-w*1-2);
900 k6 = *(uint64_t *)(p -2);
901 k2 = *(uint64_t *)(p+w*1-2);
902 k3 = *(uint64_t *)(p+w*2-2);
903
904 v[0] = _mm_set1_epi16((k6 >> 16) & 0xff); /* pixel a */
905 v[1] = _mm_set1_epi16((k6 >> 24) & 0xff); /* pixel b */
906 v[2] = _mm_set1_epi16((k6 >> 32) & 0xff); /* pixel c */
907 v[3] = _mm_set1_epi16((k6 >> 40) & 0xff); /* pixel d */
908
909 // stage 1: c0 for a,b: (0,1) -> (1,4), c1 for c,d: (0,3) -> (1,6)
910 c0 = _mm_set_epi32((k0 >> 8) & 0xff00ff, (k0 >> 16) & 0xff00ff,
911 (k1 >> 8) & 0xff00ff, (k1 >> 16) & 0xff00ff);
912 c1 = _mm_set_epi32((k0 >> 24) & 0xff00ff, (k0 >> 32) & 0xff00ff,
913 (k1 >> 24) & 0xff00ff, (k1 >> 32) & 0xff00ff);
914 t[0] = _mm_sub_epi16(_mm_max_epu8(c0, v[0]), _mm_min_epu8(c0, v[0]));
915 t[1] = _mm_sub_epi16(_mm_max_epu8(c0, v[1]), _mm_min_epu8(c0, v[1]));
916 t[2] = _mm_sub_epi16(_mm_max_epu8(c1, v[2]), _mm_min_epu8(c1, v[2]));
917 t[3] = _mm_sub_epi16(_mm_max_epu8(c1, v[3]), _mm_min_epu8(c1, v[3]));
918 e[0] = _mm_subs_epu16(lvl, t[0]);
919 e[1] = _mm_subs_epu16(lvl, t[1]);
920 e[2] = _mm_subs_epu16(lvl, t[2]);
921 e[3] = _mm_subs_epu16(lvl, t[3]);
922 a[0] = _mm_madd_epi16(e[0], c0);
923 a[1] = _mm_madd_epi16(e[1], c0);
924 a[2] = _mm_madd_epi16(e[2], c1);
925 a[3] = _mm_madd_epi16(e[3], c1);
926
927 // stage 2: c0 for a,b: (3,1) -> (4,4), c1 for c,d: (3,3) -> (4,6)
928 c0 = _mm_set_epi32((k2 >> 8) & 0xff00ff, (k2 >> 16) & 0xff00ff,
929 (k3 >> 8) & 0xff00ff, (k3 >> 16) & 0xff00ff);
930 c1 = _mm_set_epi32((k2 >> 24) & 0xff00ff, (k2 >> 32) & 0xff00ff,
931 (k3 >> 24) & 0xff00ff, (k3 >> 32) & 0xff00ff);
932 t[0] = _mm_sub_epi16(_mm_max_epu8(c0, v[0]), _mm_min_epu8(c0, v[0]));
933 t[1] = _mm_sub_epi16(_mm_max_epu8(c0, v[1]), _mm_min_epu8(c0, v[1]));
934 t[2] = _mm_sub_epi16(_mm_max_epu8(c1, v[2]), _mm_min_epu8(c1, v[2]));
935 t[3] = _mm_sub_epi16(_mm_max_epu8(c1, v[3]), _mm_min_epu8(c1, v[3]));
936 t[0] = _mm_subs_epu16(lvl, t[0]);
937 t[1] = _mm_subs_epu16(lvl, t[1]);
938 t[2] = _mm_subs_epu16(lvl, t[2]);
939 t[3] = _mm_subs_epu16(lvl, t[3]);
940 e[0] = _mm_add_epi16(t[0], e[0]);
941 e[1] = _mm_add_epi16(t[1], e[1]);
942 e[2] = _mm_add_epi16(t[2], e[2]);
943 e[3] = _mm_add_epi16(t[3], e[3]);
944 a[0] = _mm_add_epi32(_mm_madd_epi16(t[0], c0), a[0]);
945 a[1] = _mm_add_epi32(_mm_madd_epi16(t[1], c0), a[1]);
946 a[2] = _mm_add_epi32(_mm_madd_epi16(t[2], c1), a[2]);
947 a[3] = _mm_add_epi32(_mm_madd_epi16(t[3], c1), a[3]);
948
949 // stage 3:
950 // pixel a: (0,0) -> (4,0), (2,1), (2,3), (2,4)
951 c0 = _mm_set_epi32(((k0 & 0xff) << 16) | (k1 & 0xff),
952 ((k2 & 0xff) << 16) | (k3 & 0xff),
953 (k6 >> 8) & 0xff00ff,
954 (k6 & 0xff) | ((k6 >> 16) & 0xff0000));
955 t[0] = _mm_sub_epi16(_mm_max_epu8(c0, v[0]), _mm_min_epu8(c0, v[0]));
956 t[0] = _mm_subs_epu16(lvl, t[0]);
957 e[0] = _mm_add_epi16(t[0], e[0]);
958 a[0] = _mm_add_epi32(_mm_madd_epi16(t[0], c0), a[0]);
959
960 // pixel c: (0,2) -> (4,2), (2,3), (2,5), (2,6)
961 c1 = _mm_set_epi32((k0 & 0xff0000) | ((k1 >> 16) & 0xff),
962 (k2 & 0xff0000) | ((k3 >> 16) & 0xff),
963 (k6 >> 24) & 0xff00ff,
964 ((k6 >> 16) & 0xff) | ((k6 >> 32) & 0xff0000));
965 t[2] = _mm_sub_epi16(_mm_max_epu8(c1, v[2]), _mm_min_epu8(c1, v[2]));
966 t[2] = _mm_subs_epu16(lvl, t[2]);
967 e[2] = _mm_add_epi16(t[2], e[2]);
968 a[2] = _mm_add_epi32(_mm_madd_epi16(t[2], c1), a[2]);
969
970 // pixel b: (0,5) -> (4,5), (2,1), (2,2), (2,4), (2,5)
971 c0 = _mm_set_epi32(((k0 >> 24) & 0xff0000) | ((k1 >> 40) & 0xff),
972 ((k2 >> 24) & 0xff0000) | ((k3 >> 40) & 0xff),
973 (k6 >> 16) & 0xff00ff,
974 ((k6 >> 8) & 0xff) | ((k6 >> 24) & 0xff0000));
975 t[1] = _mm_sub_epi16(_mm_max_epu8(c0, v[1]), _mm_min_epu8(c0, v[1]));
976 t[1] = _mm_subs_epu16(lvl, t[1]);
977 e[1] = _mm_add_epi16(t[1], e[1]);
978 a[1] = _mm_add_epi32(_mm_madd_epi16(t[1], c0), a[1]);
979
980 // pixel d: (0,7) -> (4,7), (2,3), (2,4), (2,6), (2,7)
981 c1 = _mm_set_epi32(((k0 >> 40) & 0xff0000) | (k1 >> 56),
982 ((k2 >> 40) & 0xff0000) | (k3 >> 56),
983 (k6 >> 32) & 0xff00ff,
984 ((k6 >> 24) & 0xff) | ((k6 >> 40) & 0xff0000));
985 t[3] = _mm_sub_epi16(_mm_max_epu8(c1, v[3]), _mm_min_epu8(c1, v[3]));
986 t[3] = _mm_subs_epu16(lvl, t[3]);
987 e[3] = _mm_add_epi16(t[3], e[3]);
988 a[3] = _mm_add_epi32(_mm_madd_epi16(t[3], c1), a[3]);
989
990 /* add them all together (a loop j=0 to 4 slows things down with gcc 4.4.4) */
991 uint32_t tmp;
992
993 #if defined(__SSE3__)
994 int j;
995 __m128 f0, f1, f2, flvl;
996 __m128i zero, vv;
997 flvl = _mm_set1_ps((float)level);
998 zero = _mm_setzero_si128();
999 vv = _mm_set_epi32((k6 >> 40) & 0xff, (k6 >> 32) & 0xff, (k6 >> 24) & 0xff, (k6 >> 16) & 0xff);
1000
1001 f0 = _mm_hadd_ps(_mm_cvtepi32_ps(a[0]), _mm_cvtepi32_ps(a[1]));
1002 f1 = _mm_hadd_ps(_mm_cvtepi32_ps(a[2]), _mm_cvtepi32_ps(a[3]));
1003 f0 = _mm_hadd_ps(f0, f1);
1004 f0 = _mm_add_ps(f0, _mm_mul_ps(flvl, _mm_cvtepi32_ps(vv)));
1005
1006 # ifdef OLD_ROUNDING
1007 /* avg *= 2 */
1008 f0 = _mm_mul_ps(f0, _mm_set1_ps(2.0f));
1009 # endif
1010
1011 for (j=0; j<4; j++)
1012 e[j] = _mm_unpacklo_epi16(_mm_add_epi16(e[j], _mm_srli_si128(e[j], 8)), zero);
1013 f1 = _mm_hadd_ps(_mm_cvtepi32_ps(e[0]), _mm_cvtepi32_ps(e[1]));
1014 f2 = _mm_hadd_ps(_mm_cvtepi32_ps(e[2]), _mm_cvtepi32_ps(e[3]));
1015 f1 = _mm_hadd_ps(f1, f2);
1016 f1 = _mm_add_ps(f1, flvl);
1017
1018 f0 = _mm_div_ps(f0, f1);
1019 # ifdef OLD_ROUNDING
1020 /* r = (r+1) / 2 */
1021 vv = _mm_cvttps_epi32(f0);
1022 vv = _mm_srli_epi32(_mm_add_epi32(vv, _mm_set1_epi32(1)), 1);
1023 # else
1024 vv = _mm_cvtps_epi32(f0);
1025 # endif
1026 vv = _mm_packus_epi16(_mm_packs_epi32(vv, zero), zero);
1027 tmp = _mm_cvtsi128_si32(vv);
1028
1029 #else
1030
1031 // p[0]
1032 e[0] = _mm_add_epi16(e[0], _mm_srli_si128(e[0], 8)); /* 8 words */
1033 e[0] = _mm_add_epi16(e[0], _mm_srli_si128(e[0], 4));
1034 e[0] = _mm_add_epi16(e[0], _mm_srli_si128(e[0], 2));
1035 cnt = level + (_mm_cvtsi128_si32(e[0]) & 0xffff);
1036 a[0] = _mm_add_epi32(a[0], _mm_srli_si128(a[0], 8)); /* 4 dwords */
1037 a[0] = _mm_add_epi32(a[0], _mm_srli_si128(a[0], 4));
1038 avg = p[0] * level * 2 + (_mm_cvtsi128_si32(a[0]) << 1);
1039 tmp = ((avg/cnt) + 1) / 2;
1040
1041 // p[1]
1042 e[1] = _mm_add_epi16(e[1], _mm_srli_si128(e[1], 8)); /* 8 words */
1043 e[1] = _mm_add_epi16(e[1], _mm_srli_si128(e[1], 4));
1044 e[1] = _mm_add_epi16(e[1], _mm_srli_si128(e[1], 2));
1045 cnt = level + (_mm_cvtsi128_si32(e[1]) & 0xffff);
1046 a[1] = _mm_add_epi32(a[1], _mm_srli_si128(a[1], 8)); /* 4 dwords */
1047 a[1] = _mm_add_epi32(a[1], _mm_srli_si128(a[1], 4));
1048 avg = p[1] * level * 2 + (_mm_cvtsi128_si32(a[1]) << 1);
1049 tmp |= (((avg/cnt) + 1) / 2) << 8;
1050
1051 // p[2]
1052 e[2] = _mm_add_epi16(e[2], _mm_srli_si128(e[2], 8)); /* 8 words */
1053 e[2] = _mm_add_epi16(e[2], _mm_srli_si128(e[2], 4));
1054 e[2] = _mm_add_epi16(e[2], _mm_srli_si128(e[2], 2));
1055 cnt = level + (_mm_cvtsi128_si32(e[2]) & 0xffff);
1056 a[2] = _mm_add_epi32(a[2], _mm_srli_si128(a[2], 8)); /* 4 dwords */
1057 a[2] = _mm_add_epi32(a[2], _mm_srli_si128(a[2], 4));
1058 avg = p[2] * level * 2 + (_mm_cvtsi128_si32(a[2]) << 1);
1059 tmp |= (((avg/cnt) + 1) / 2) << 16;
1060
1061 // p[3]
1062 e[3] = _mm_add_epi16(e[3], _mm_srli_si128(e[3], 8)); /* 8 words */
1063 e[3] = _mm_add_epi16(e[3], _mm_srli_si128(e[3], 4));
1064 e[3] = _mm_add_epi16(e[3], _mm_srli_si128(e[3], 2));
1065 cnt = level + (_mm_cvtsi128_si32(e[3]) & 0xffff);
1066 a[3] = _mm_add_epi32(a[3], _mm_srli_si128(a[3], 8)); /* 4 dwords */
1067 a[3] = _mm_add_epi32(a[3], _mm_srli_si128(a[3], 4));
1068 avg = p[3] * level * 2 + (_mm_cvtsi128_si32(a[3]) << 1);
1069 tmp |= (((avg/cnt) + 1) / 2) << 24;
1070 #endif
1071
1072 *(uint32_t *)d = tmp;
1073
1074 d += 4;
1075 p += 4;
1076 }
1077 _mm_empty();
1078
1079 memcpy(plane,scratchplane2,w*h);
1080 }
1081 #endif
1082
filter_plane_median_p(uint8_t * plane,int w,int h,int level)1083 void filter_plane_median_p ( uint8_t * plane, int w, int h, int level)
1084 {
1085 int i;
1086 int min;
1087 int max;
1088 int avg;
1089 int cnt;
1090 int c;
1091 int e;
1092 uint8_t * p;
1093 uint8_t * d;
1094
1095 if(level==0) return;
1096
1097 p = plane;
1098 d = scratchplane1;
1099
1100 // remove strong outliers from the image. An outlier is a pixel which lies outside
1101 // of max-thres and min+thres of the surrounding pixels. This should not cause blurring
1102 // and it should leave an evenly spread noise-floor to the image.
1103 for(i=0;i<=(w*h);i++)
1104 {
1105 // reset min/max-filter
1106 min=255;
1107 max=0;
1108 // check every remaining position arround the reference-pixel for being min/max...
1109
1110 min=(min>*(p-w-1))? *(p-w-1):min;
1111 max=(max<*(p-w-1))? *(p-w-1):max;
1112 min=(min>*(p-w+0))? *(p-w+0):min;
1113 max=(max<*(p-w+0))? *(p-w+0):max;
1114 min=(min>*(p-w+1))? *(p-w+1):min;
1115 max=(max<*(p-w+1))? *(p-w+1):max;
1116
1117 min=(min>*(p -1))? *(p -1):min;
1118 max=(max<*(p -1))? *(p -1):max;
1119 min=(min>*(p +1))? *(p +1):min;
1120 max=(max<*(p +1))? *(p +1):max;
1121
1122 min=(min>*(p+w-1))? *(p+w-1):min;
1123 max=(max<*(p+w-1))? *(p+w-1):max;
1124 min=(min>*(p+w+0))? *(p+w+0):min;
1125 max=(max<*(p+w+0))? *(p+w+0):max;
1126 min=(min>*(p+w+1))? *(p+w+1):min;
1127 max=(max<*(p+w+1))? *(p+w+1):max;
1128
1129 if( *(p)<(min) )
1130 {
1131 *(d)=min;
1132 }
1133 else
1134 if( *(p)>(max) )
1135 {
1136 *(d)=max;
1137 }
1138 else
1139 {
1140 *(d)=*(p);
1141 }
1142
1143 d++;
1144 p++;
1145 }
1146
1147 // in the second stage we try to average similar spatial pixels, only. This, like
1148 // a median, should also not reduce sharpness but flatten the noisefloor. This
1149 // part is quite similar to what 2dclean/yuvmedianfilter do. But because of the
1150 // different weights given to the pixels it is less aggressive...
1151
1152 p = scratchplane1;
1153 d = scratchplane2;
1154
1155 // this filter needs values outside of the imageplane, so we just copy the first line
1156 // and the last line into the out-of-range area...
1157
1158 memcpy ( p-w , p, w );
1159 memcpy ( p-w*2, p, w );
1160
1161 memcpy ( p+(w*h) , p+(w*h)-w, w );
1162 memcpy ( p+(w*h)+w, p+(w*h)-w, w );
1163
1164 for(i=0;i<=(w*h);i++)
1165 {
1166 avg=*(p)*level*2;
1167 cnt=level;
1168
1169 c = *(p-w*2-2);
1170 e = abs(c-*(p));
1171 e = ((level-e)<0)? 0:level-e;
1172 avg += e*c*2;
1173 cnt += e;
1174
1175 c = *(p-w*2-1);
1176 e = abs(c-*(p));
1177 e = ((level-e)<0)? 0:level-e;
1178 avg += e*c*2;
1179 cnt += e;
1180
1181 c = *(p-w*2);
1182 e = abs(c-*(p));
1183 e = ((level-e)<0)? 0:level-e;
1184 avg += e*c*2;
1185 cnt += e;
1186
1187 c = *(p-w*2+1);
1188 e = abs(c-*(p));
1189 e = ((level-e)<0)? 0:level-e;
1190 avg += e*c*2;
1191 cnt += e;
1192
1193 c = *(p-w*2+2);
1194 e = abs(c-*(p));
1195 e = ((level-e)<0)? 0:level-e;
1196 avg += e*c*2;
1197 cnt += e;
1198
1199 c = *(p-w*1-2);
1200 e = abs(c-*(p));
1201 e = ((level-e)<0)? 0:level-e;
1202 avg += e*c*2;
1203 cnt += e;
1204
1205 c = *(p-w*1-1);
1206 e = abs(c-*(p));
1207 e = ((level-e)<0)? 0:level-e;
1208 avg += e*c*2;
1209 cnt += e;
1210
1211 c = *(p-w*1);
1212 e = abs(c-*(p));
1213 e = ((level-e)<0)? 0:level-e;
1214 avg += e*c*2;
1215 cnt += e;
1216
1217 c = *(p-w*1+1);
1218 e = abs(c-*(p));
1219 e = ((level-e)<0)? 0:level-e;
1220 avg += e*c*2;
1221 cnt += e;
1222
1223 c = *(p-w*1+2);
1224 e = abs(c-*(p));
1225 e = ((level-e)<0)? 0:level-e;
1226 avg += e*c*2;
1227 cnt += e;
1228
1229 c = *(p-2);
1230 e = abs(c-*(p));
1231 e = ((level-e)<0)? 0:level-e;
1232 avg += e*c*2;
1233 cnt += e;
1234
1235 c = *(p-1);
1236 e = abs(c-*(p));
1237 e = ((level-e)<0)? 0:level-e;
1238 avg += e*c*2;
1239 cnt += e;
1240
1241 c = *(p+1);
1242 e = abs(c-*(p));
1243 e = ((level-e)<0)? 0:level-e;
1244 avg += e*c*2;
1245 cnt += e;
1246
1247 c = *(p+2);
1248 e = abs(c-*(p));
1249 e = ((level-e)<0)? 0:level-e;
1250 avg += e*c*2;
1251 cnt += e;
1252
1253 c = *(p+w*1-2);
1254 e = abs(c-*(p));
1255 e = ((level-e)<0)? 0:level-e;
1256 avg += e*c*2;
1257 cnt += e;
1258
1259 c = *(p+w*1-1);
1260 e = abs(c-*(p));
1261 e = ((level-e)<0)? 0:level-e;
1262 avg += e*c*2;
1263 cnt += e;
1264
1265 c = *(p+w*1);
1266 e = abs(c-*(p));
1267 e = ((level-e)<0)? 0:level-e;
1268 avg += e*c*2;
1269 cnt += e;
1270
1271 c = *(p+w*1+1);
1272 e = abs(c-*(p));
1273 e = ((level-e)<0)? 0:level-e;
1274 avg += e*c*2;
1275 cnt += e;
1276
1277 c = *(p+w*1+2);
1278 e = abs(c-*(p));
1279 e = ((level-e)<0)? 0:level-e;
1280 avg += e*c*2;
1281 cnt += e;
1282
1283 c = *(p+w*2-2);
1284 e = abs(c-*(p));
1285 e = ((level-e)<0)? 0:level-e;
1286 avg += e*c*2;
1287 cnt += e;
1288
1289 c = *(p+w*2-1);
1290 e = abs(c-*(p));
1291 e = ((level-e)<0)? 0:level-e;
1292 avg += e*c*2;
1293 cnt += e;
1294
1295 c = *(p+w*2);
1296 e = abs(c-*(p));
1297 e = ((level-e)<0)? 0:level-e;
1298 avg += e*c*2;
1299 cnt += e;
1300
1301 c = *(p+w*2+1);
1302 e = abs(c-*(p));
1303 e = ((level-e)<0)? 0:level-e;
1304 avg += e*c*2;
1305 cnt += e;
1306
1307 c = *(p+w*2+2);
1308 e = abs(c-*(p));
1309 e = ((level-e)<0)? 0:level-e;
1310 avg += e*c*2;
1311 cnt += e;
1312
1313 *(d)=(((avg/cnt)+1)/2);
1314
1315 d++;
1316 p++;
1317 }
1318
1319 memcpy(plane,scratchplane2,w*h);
1320 }
1321
1322 /***********************************************************
1323 * Main Loop *
1324 ***********************************************************/
1325
init_accel()1326 static void init_accel() {
1327 filter_plane_median = filter_plane_median_p;
1328 temporal_filter_planes = temporal_filter_planes_p;
1329 uint32_t tmp;
1330
1331 #if defined(__SSE2__)
1332 int d = 0;
1333 /* __asm__ volatile("cpuid" : "=d"(d) : "a"(1) : "ebx", "ecx"); */
1334 __asm__ volatile("movl %%ebx, %1; cpuid; movl %1, %%ebx" : "=d"(d), "=&g"(tmp) : "a"(1) : "ecx");
1335 if ((d & (1 << 26))) {
1336 mjpeg_info("SETTING SSE2 for standard Temporal-Noise-Filter");
1337 temporal_filter_planes = temporal_filter_planes_sse2;
1338
1339 /*__asm__ volatile("cpuid" : "=d"(d) : "a"(0x80000001) : "ebx", "ecx");*/
1340 __asm__ volatile("movl %%ebx, %1; cpuid; movl %1, %%ebx" : "=d"(d), "=&g"(tmp) : "a"(0x80000001) : "ecx");
1341 if ((d & (1 << 29))) {
1342 /* x86_64 processor */
1343 mjpeg_info("SETTING SSE2 for Median-Filter");
1344 filter_plane_median = filter_plane_median_sse2;
1345 }
1346 }
1347 #endif
1348 }
1349
1350 int
main(int argc,char * argv[])1351 main (int argc, char *argv[])
1352 {
1353 int c;
1354 int fd_in = fileno(stdin);
1355 int fd_out = fileno(stdout);
1356 int err = 0;
1357 char *msg = NULL;
1358 y4m_ratio_t rx, ry;
1359 y4m_frame_info_t iframeinfo;
1360 y4m_stream_info_t istreaminfo;
1361 y4m_frame_info_t oframeinfo;
1362 y4m_stream_info_t ostreaminfo;
1363
1364 mjpeg_info("yuvdenoise version %s", VERSION);
1365
1366 while ((c = getopt (argc, argv, "qhvt:g:m:M:r:G:")) != -1)
1367 {
1368 switch (c)
1369 {
1370 case 'h':
1371 {
1372 mjpeg_info("... | yuvdenoise [OPTIONS] | ...");
1373 mjpeg_info("Brief description of the accepted options:\n");
1374 mjpeg_info("-g [Y=0...255],[U=0...255],[V=0...255]");
1375 mjpeg_info(" This sets the parameters [Y,U,V] for the gauss-filter. 0 means no filtering,");
1376 mjpeg_info(" which is the default, 255 is maximum gaussfiltering. Some camera manufacturers");
1377 mjpeg_info(" think it's a good idea to sharpen the frames extremly. This however will raise");
1378 mjpeg_info(" typical video-compression-artifacts (ringing, blocking, alias, etc...). If you");
1379 mjpeg_info(" desire to have a video free of these, you can raise the gauss-filter-values");
1380 mjpeg_info(" until your image is undesirable soft... (Short: setting decent values helps");
1381 mjpeg_info(" both: the viewer and the encoder -- setting too high values is ugly.) This may");
1382 mjpeg_info(" also help with extremly noisy images from a weak air-captured signal...");
1383 mjpeg_info(" Unlike y4mspatialfilter this will not boost ringing artifacts.\n");
1384 mjpeg_info("-m [0...255],[0...255],[0...255]");
1385 mjpeg_info(" Spatial-Pre-Filter. This one helps with really noisy signals. You should not");
1386 mjpeg_info(" use this with moderate to low noise material. Used with care it helps the tem-");
1387 mjpeg_info(" poral-filter a lot. Misuse will lead to rather dull images (like an overly median-filtered image...");
1388 mjpeg_info("-t [0...255],[0...255],[0...255]");
1389 mjpeg_info(" Temporal-Noise-Filter. This one dramaticaly reduces noise without loosing sharpness. If set too high, however, it may introduce visable ghost-images or smear. ");
1390 mjpeg_info("-M [0...255],[0...255],[0...255]");
1391 mjpeg_info(" Spatial-Post-Filter. This one removes spatial noise left by the temporal-filter.");
1392 mjpeg_info(" Used with care it can dramaticaly lower the bitrate. Using it with to high settings will lead to the same artifacts as the spatial-pre-filter produces.");
1393 mjpeg_info("-G [0...255],[0...255],[0...255]");
1394 mjpeg_info(" Set -m,-t,-M values at once. -t is boosted by a factor of 2. Useful if you do");
1395 mjpeg_info(" not want to tweak all the values by hand... which is better.");
1396 mjpeg_info("-r [0...255],[0...255],[0...255]");
1397 mjpeg_info(" Add some static masking noise. Might be used as an effect, too... *g*");
1398 mjpeg_info("-q HighQuality-Mode. Warning: On almost any machine this is dead slow...");
1399 exit (0);
1400 break;
1401 }
1402 case 'v':
1403 {
1404 verbose = 1;
1405 break;
1406 }
1407 case 'q':
1408 {
1409 hq_mode = 1;
1410 break;
1411 }
1412 case 't':
1413 {
1414 sscanf (optarg, "%i,%i,%i", &temp_Y_thres, &temp_U_thres,
1415 &temp_V_thres);
1416 break;
1417 }
1418 case 'g':
1419 {
1420 sscanf (optarg, "%i,%i,%i", &gauss_Y, &gauss_U, &gauss_V);
1421 break;
1422 }
1423 case 'm':
1424 {
1425 sscanf (optarg, "%i,%i,%i", &med_pre_Y_thres, &med_pre_U_thres, &med_pre_V_thres);
1426 break;
1427 }
1428 case 'M':
1429 {
1430 sscanf (optarg, "%i,%i,%i", &med_post_Y_thres, &med_post_U_thres, &med_post_V_thres);
1431 break;
1432 }
1433 case 'G':
1434 {
1435 sscanf (optarg, "%i,%i,%i", &med_pre_Y_thres, &med_pre_U_thres, &med_pre_V_thres);
1436 med_post_Y_thres = temp_Y_thres = med_pre_Y_thres;
1437 med_post_U_thres = temp_U_thres = med_pre_U_thres;
1438 med_post_V_thres = temp_V_thres = med_pre_V_thres;
1439 temp_Y_thres *= 2;
1440 temp_U_thres *= 2;
1441 temp_V_thres *= 2;
1442 break;
1443 }
1444 case 'r':
1445 {
1446 sscanf (optarg, "%i,%i,%i", &renoise_Y, &renoise_U, &renoise_V);
1447 break;
1448 }
1449 case '?':
1450 default:
1451 exit (1);
1452 }
1453 }
1454
1455 mjpeg_info("Using the following thresholds/settings:");
1456 mjpeg_info("Gauss-Pre-Filter [Y,U,V] : [%i,%i,%i]",
1457 gauss_Y, gauss_U, gauss_V);
1458 mjpeg_info("Median-Pre-Filter [Y,U,V] : [%i,%i,%i]",
1459 med_pre_Y_thres, med_pre_U_thres, med_pre_V_thres);
1460 mjpeg_info("Temporal-Noise-Filter [Y,U,V] : [%i,%i,%i]",
1461 temp_Y_thres, temp_U_thres, temp_V_thres);
1462 mjpeg_info("Median-Post-Filter [Y,U,V] : [%i,%i,%i]",
1463 med_post_Y_thres, med_post_U_thres, med_post_V_thres);
1464 mjpeg_info("Renoise [Y,U,V] : [%i,%i,%i]",
1465 renoise_Y, renoise_U, renoise_V);
1466 mjpeg_info("HQ-Mode : %s",
1467 (hq_mode==0? "off":"on"));
1468
1469 /* initialize stream-information */
1470 y4m_accept_extensions (1);
1471 y4m_init_stream_info (&istreaminfo);
1472 y4m_init_frame_info (&iframeinfo);
1473 y4m_init_stream_info (&ostreaminfo);
1474 y4m_init_frame_info (&oframeinfo);
1475
1476 /* open input stream */
1477 if ((err = y4m_read_stream_header (fd_in, &istreaminfo)) != Y4M_OK)
1478 mjpeg_error_exit1("Couldn't read YUV4MPEG header: %s!", y4m_strerr (err));
1479
1480 /* get format information */
1481 width = y4m_si_get_width (&istreaminfo);
1482 height = y4m_si_get_height (&istreaminfo);
1483 input_chroma_subsampling = y4m_si_get_chroma (&istreaminfo);
1484 input_interlaced = y4m_si_get_interlace (&istreaminfo);
1485 mjpeg_info("Y4M-Stream %ix%i(%s)",
1486 width,
1487 height, y4m_chroma_description (input_chroma_subsampling));
1488
1489 lwidth = width;
1490 lheight = height;
1491
1492 // Setup the denoiser to use the appropriate chroma processing
1493 if (input_chroma_subsampling == Y4M_CHROMA_420JPEG ||
1494 input_chroma_subsampling == Y4M_CHROMA_420MPEG2 ||
1495 input_chroma_subsampling == Y4M_CHROMA_420PALDV)
1496 msg = "Processing Mode : 4:2:0";
1497 else if (input_chroma_subsampling == Y4M_CHROMA_411)
1498 msg = "Processing Mode : 4:1:1";
1499 else if (input_chroma_subsampling == Y4M_CHROMA_422)
1500 msg = "Processing Mode : 4:2:2";
1501 else if (input_chroma_subsampling == Y4M_CHROMA_444)
1502 msg = "Processing Mode : 4:4:4";
1503 else
1504 mjpeg_error_exit1 (" ### Unsupported Y4M Chroma sampling ### ");
1505
1506 rx = y4m_chroma_ss_x_ratio (input_chroma_subsampling);
1507 ry = y4m_chroma_ss_y_ratio (input_chroma_subsampling);
1508 cwidth = width / rx.d;
1509 cheight = height / ry.d;
1510
1511 mjpeg_info("%s %s", msg,
1512 (input_interlaced ==
1513 Y4M_ILACE_NONE) ? "progressive" : "interlaced");
1514 mjpeg_info("Luma-Plane : %ix%i pixels", lwidth, lheight);
1515 mjpeg_info("Chroma-Plane : %ix%i pixels", cwidth, cheight);
1516
1517 if (input_interlaced != Y4M_ILACE_NONE)
1518 {
1519 // process the fields as images side by side
1520 lwidth *= 2;
1521 cwidth *= 2;
1522 lheight /= 2;
1523 cheight /= 2;
1524 }
1525
1526 y4m_si_set_interlace (&ostreaminfo, y4m_si_get_interlace (&istreaminfo));
1527 y4m_si_set_chroma (&ostreaminfo, y4m_si_get_chroma (&istreaminfo));
1528 y4m_si_set_width (&ostreaminfo, width);
1529 y4m_si_set_height (&ostreaminfo, height);
1530 y4m_si_set_framerate (&ostreaminfo, y4m_si_get_framerate (&istreaminfo));
1531 y4m_si_set_sampleaspect (&ostreaminfo,
1532 y4m_si_get_sampleaspect (&istreaminfo));
1533
1534 /* write the outstream header */
1535 y4m_write_stream_header (fd_out, &ostreaminfo);
1536
1537 /* now allocate the needed buffers */
1538 {
1539 /* calculate the memory offset needed to allow the processing
1540 * functions to overshot. The biggest overshot is needed for the
1541 * MC-functions, so we'll use 8*width...
1542 */
1543 buff_offset = lwidth * 8;
1544 buff_size = buff_offset * 2 + lwidth * lheight;
1545
1546 frame1[0] = buff_offset + (uint8_t *) malloc (buff_size);
1547 frame1[1] = buff_offset + (uint8_t *) malloc (buff_size);
1548 frame1[2] = buff_offset + (uint8_t *) malloc (buff_size);
1549
1550 frame2[0] = buff_offset + (uint8_t *) malloc (buff_size);
1551 frame2[1] = buff_offset + (uint8_t *) malloc (buff_size);
1552 frame2[2] = buff_offset + (uint8_t *) malloc (buff_size);
1553
1554 frame3[0] = buff_offset + (uint8_t *) malloc (buff_size);
1555 frame3[1] = buff_offset + (uint8_t *) malloc (buff_size);
1556 frame3[2] = buff_offset + (uint8_t *) malloc (buff_size);
1557
1558 frame4[0] = buff_offset + (uint8_t *) malloc (buff_size);
1559 frame4[1] = buff_offset + (uint8_t *) malloc (buff_size);
1560 frame4[2] = buff_offset + (uint8_t *) malloc (buff_size);
1561
1562 frame5[0] = buff_offset + (uint8_t *) malloc (buff_size);
1563 frame5[1] = buff_offset + (uint8_t *) malloc (buff_size);
1564 frame5[2] = buff_offset + (uint8_t *) malloc (buff_size);
1565
1566 frame6[0] = buff_offset + (uint8_t *) malloc (buff_size);
1567 frame6[1] = buff_offset + (uint8_t *) malloc (buff_size);
1568 frame6[2] = buff_offset + (uint8_t *) malloc (buff_size);
1569
1570 frame7[0] = buff_offset + (uint8_t *) malloc (buff_size);
1571 frame7[1] = buff_offset + (uint8_t *) malloc (buff_size);
1572 frame7[2] = buff_offset + (uint8_t *) malloc (buff_size);
1573
1574 outframe[0] = buff_offset + (uint8_t *) malloc (buff_size);
1575 outframe[1] = buff_offset + (uint8_t *) malloc (buff_size);
1576 outframe[2] = buff_offset + (uint8_t *) malloc (buff_size);
1577
1578 scratchplane1 = buff_offset + (uint8_t *) malloc (buff_size);
1579 scratchplane2 = buff_offset + (uint8_t *) malloc (buff_size);
1580
1581 mjpeg_info("Buffers allocated.");
1582 }
1583
1584 /* initialize motion_library */
1585 init_motion_search ();
1586
1587 init_accel();
1588
1589 /* read every frame until the end of the input stream and process it */
1590 while (Y4M_OK == (err = y4m_read_frame (fd_in,
1591 &istreaminfo,
1592 &iframeinfo, frame1)))
1593 {
1594
1595 static uint32_t frame_nr = 0;
1596 uint8_t *temp[3];
1597
1598 frame_nr++;
1599
1600 gauss_filter_plane (frame1[0], lwidth, lheight, gauss_Y);
1601 gauss_filter_plane (frame1[1], cwidth, cheight, gauss_U);
1602 gauss_filter_plane (frame1[2], cwidth, cheight, gauss_V);
1603
1604 filter_plane_median (frame1[0], lwidth, lheight, med_pre_Y_thres);
1605 filter_plane_median (frame1[1], cwidth, cheight, med_pre_U_thres);
1606 filter_plane_median (frame1[2], cwidth, cheight, med_pre_V_thres);
1607
1608 if(hq_mode==1)
1609 {
1610 temporal_filter_planes_MC (0, lwidth, lheight, temp_Y_thres);
1611 temporal_filter_planes_MC (1, cwidth, cheight, temp_U_thres);
1612 temporal_filter_planes_MC (2, cwidth, cheight, temp_V_thres);
1613 }
1614 else
1615 {
1616 temporal_filter_planes (0, lwidth, lheight, temp_Y_thres);
1617 temporal_filter_planes (1, cwidth, cheight, temp_U_thres);
1618 temporal_filter_planes (2, cwidth, cheight, temp_V_thres);
1619 }
1620
1621 filter_plane_median (outframe[0], lwidth, lheight, med_post_Y_thres);
1622 filter_plane_median (outframe[1], cwidth, cheight, med_post_U_thres);
1623 filter_plane_median (outframe[2], cwidth, cheight, med_post_V_thres);
1624
1625 renoise (outframe[0], lwidth, lheight, renoise_Y );
1626 renoise (outframe[1], cwidth, cheight, renoise_U );
1627 renoise (outframe[2], cwidth, cheight, renoise_V );
1628
1629 if (frame_nr >= 4)
1630 y4m_write_frame (fd_out, &ostreaminfo, &oframeinfo, outframe);
1631
1632 // rotate buffer pointers to rotate input-buffers
1633 temp[0] = frame7[0];
1634 temp[1] = frame7[1];
1635 temp[2] = frame7[2];
1636
1637 frame7[0] = frame6[0];
1638 frame7[1] = frame6[1];
1639 frame7[2] = frame6[2];
1640
1641 frame6[0] = frame5[0];
1642 frame6[1] = frame5[1];
1643 frame6[2] = frame5[2];
1644
1645 frame5[0] = frame4[0];
1646 frame5[1] = frame4[1];
1647 frame5[2] = frame4[2];
1648
1649 frame4[0] = frame3[0];
1650 frame4[1] = frame3[1];
1651 frame4[2] = frame3[2];
1652
1653 frame3[0] = frame2[0];
1654 frame3[1] = frame2[1];
1655 frame3[2] = frame2[2];
1656
1657 frame2[0] = frame1[0];
1658 frame2[1] = frame1[1];
1659 frame2[2] = frame1[2];
1660
1661 frame1[0] = temp[0];
1662 frame1[1] = temp[1];
1663 frame1[2] = temp[2];
1664
1665 }
1666 // write out the left frames...
1667 y4m_write_frame (fd_out, &ostreaminfo, &oframeinfo, frame4);
1668 y4m_write_frame (fd_out, &ostreaminfo, &oframeinfo, frame3);
1669 y4m_write_frame (fd_out, &ostreaminfo, &oframeinfo, frame2);
1670 y4m_write_frame (fd_out, &ostreaminfo, &oframeinfo, frame1);
1671
1672 /* free allocated buffers */
1673 {
1674 free (frame1[0] - buff_offset);
1675 free (frame1[1] - buff_offset);
1676 free (frame1[2] - buff_offset);
1677
1678 free (frame2[0] - buff_offset);
1679 free (frame2[1] - buff_offset);
1680 free (frame2[2] - buff_offset);
1681
1682 free (frame3[0] - buff_offset);
1683 free (frame3[1] - buff_offset);
1684 free (frame3[2] - buff_offset);
1685
1686 free (frame4[0] - buff_offset);
1687 free (frame4[1] - buff_offset);
1688 free (frame4[2] - buff_offset);
1689
1690 free (frame5[0] - buff_offset);
1691 free (frame5[1] - buff_offset);
1692 free (frame5[2] - buff_offset);
1693
1694 free (frame6[0] - buff_offset);
1695 free (frame6[1] - buff_offset);
1696 free (frame6[2] - buff_offset);
1697
1698 free (frame7[0] - buff_offset);
1699 free (frame7[1] - buff_offset);
1700 free (frame7[2] - buff_offset);
1701
1702 free (outframe[0] - buff_offset);
1703 free (outframe[1] - buff_offset);
1704 free (outframe[2] - buff_offset);
1705
1706 free (scratchplane1 - buff_offset);
1707 free (scratchplane2 - buff_offset);
1708
1709 mjpeg_info("Buffers freed.");
1710 }
1711
1712 /* did stream end unexpectedly ? */
1713 if (err != Y4M_ERR_EOF)
1714 mjpeg_error_exit1 ("%s", y4m_strerr (err));
1715
1716 /* Exit gently */
1717 return (0);
1718 }
1719