1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Postprocessing functions -
5 *
6 * Copyright(C) 2003-2010 Michael Militzer <isibaar@xvid.org>
7 * 2004 Marc Fauconneau
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program ; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * $Id: postprocessing.c 1985 2011-05-18 09:02:35Z Isibaar $
24 *
25 ****************************************************************************/
26
27 #include <stdlib.h>
28 #include <string.h>
29 #include <math.h>
30
31 #include "../portab.h"
32 #include "../global.h"
33 #include "image.h"
34 #include "../utils/emms.h"
35 #include "postprocessing.h"
36
37 /* function pointers */
38 IMAGEBRIGHTNESS_PTR image_brightness;
39
40
41 /* Some useful (and fast) macros
42 Note that the MIN/MAX macros assume signed shift - if your compiler
43 doesn't do signed shifts, use the default MIN/MAX macros from global.h */
44
45 #define FAST_MAX(x,y) ((x) - ((((x) - (y))>>(32 - 1)) & ((x) - (y))))
46 #define FAST_MIN(x,y) ((x) + ((((y) - (x))>>(32 - 1)) & ((y) - (x))))
47 #define FAST_ABS(x) ((((int)(x)) >> 31) ^ ((int)(x))) - (((int)(x)) >> 31)
48 #define ABS(X) (((X)>0)?(X):-(X))
49
init_postproc(XVID_POSTPROC * tbls)50 void init_postproc(XVID_POSTPROC *tbls)
51 {
52 init_deblock(tbls);
53 init_noise(tbls);
54 }
55
56 void
stripe_deblock_h(SMPDeblock * h)57 stripe_deblock_h(SMPDeblock *h)
58 {
59 const int stride = h->stride;
60 const int stride2 = stride /2;
61
62 int i,j;
63 int quant;
64
65 /* luma: j,i in block units */
66 if ((h->flags & XVID_DEBLOCKY))
67 {
68 int dering = h->flags & XVID_DERINGY;
69
70 for (j = 1; j < h->stop_y; j++) /* horizontal luma deblocking */
71 for (i = h->start_x; i < h->stop_x; i++)
72 {
73 quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant;
74 deblock8x8_h(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering);
75 }
76 }
77
78 /* chroma */
79 if ((h->flags & XVID_DEBLOCKUV))
80 {
81 int dering = h->flags & XVID_DERINGUV;
82
83 for (j = 1; j < h->stop_y/2; j++) /* horizontal deblocking */
84 for (i = h->start_x/2; i < h->stop_x/2; i++)
85 {
86 quant = h->mbs[(j+0)*h->mb_stride + i].quant;
87 deblock8x8_h(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering);
88 deblock8x8_h(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering);
89 }
90 }
91 }
92
93 void
stripe_deblock_v(SMPDeblock * h)94 stripe_deblock_v(SMPDeblock *h)
95 {
96 const int stride = h->stride;
97 const int stride2 = stride /2;
98
99 int i,j;
100 int quant;
101
102 /* luma: j,i in block units */
103 if ((h->flags & XVID_DEBLOCKY))
104 {
105 int dering = h->flags & XVID_DERINGY;
106
107 for (j = h->start_y; j < h->stop_y; j++) /* vertical deblocking */
108 for (i = 1; i < h->stop_x; i++)
109 {
110 quant = h->mbs[(j+0)/2*h->mb_stride + (i/2)].quant;
111 deblock8x8_v(h->tbls, h->img->y + j*8*stride + i*8, stride, quant, dering);
112 }
113 }
114
115 /* chroma */
116 if ((h->flags & XVID_DEBLOCKUV))
117 {
118 int dering = h->flags & XVID_DERINGUV;
119
120 for (j = h->start_y/2; j < h->stop_y/2; j++) /* vertical deblocking */
121 for (i = 1; i < h->stop_x/2; i++)
122 {
123 quant = h->mbs[(j+0)*h->mb_stride + i].quant;
124 deblock8x8_v(h->tbls, h->img->u + j*8*stride2 + i*8, stride2, quant, dering);
125 deblock8x8_v(h->tbls, h->img->v + j*8*stride2 + i*8, stride2, quant, dering);
126 }
127 }
128 }
129
130 void
image_postproc(XVID_POSTPROC * tbls,IMAGE * img,int edged_width,const MACROBLOCK * mbs,int mb_width,int mb_height,int mb_stride,int flags,int brightness,int frame_num,int bvop,int threads)131 image_postproc(XVID_POSTPROC *tbls, IMAGE * img, int edged_width,
132 const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride,
133 int flags, int brightness, int frame_num, int bvop, int threads)
134 {
135 int k;
136 #ifndef HAVE_PTHREAD
137 int num_threads = 1;
138 #else
139 int num_threads = MAX(1, MIN(threads, 4));
140 void *status = NULL;
141 #endif
142 SMPDeblock data[4];
143
144 /* horizontal deblocking, dispatch threads */
145 for (k = 0; k < num_threads; k++) {
146 data[k].flags = flags;
147 data[k].img = img;
148 data[k].mb_stride = mb_stride;
149 data[k].mbs = mbs;
150 data[k].stride = edged_width;
151 data[k].tbls = tbls;
152
153 data[k].start_x = (k*mb_width / num_threads)*2;
154 data[k].stop_x = ((k+1)*mb_width / num_threads)*2;
155
156 data[k].stop_y = mb_height*2;
157 }
158 #ifdef HAVE_PTHREAD
159 /* create threads */
160 for (k = 1; k < num_threads; k++) {
161 pthread_create(&data[k].handle, NULL,
162 (void*)stripe_deblock_h, (void*)&data[k]);
163 }
164 #endif
165 stripe_deblock_h(&data[0]);
166
167 #ifdef HAVE_PTHREAD
168 /* wait until all threads are finished */
169 for (k = 1; k < num_threads; k++) {
170 pthread_join(data[k].handle, &status);
171 }
172 #endif
173
174 /* vertical deblocking, dispatch threads */
175 for (k = 0; k < num_threads; k++) {
176 data[k].start_y = (k*mb_height / num_threads)*2;
177 data[k].stop_y = ((k+1)*mb_height / num_threads)*2;
178 data[k].stop_x = mb_width*2;
179 }
180
181 #ifdef HAVE_PTHREAD
182 /* create threads */
183 for (k = 1; k < num_threads; k++) {
184 pthread_create(&data[k].handle, NULL,
185 (void*)stripe_deblock_v, (void*)&data[k]);
186 }
187 #endif
188 stripe_deblock_v(&data[0]);
189
190 #ifdef HAVE_PTHREAD
191 /* wait until all threads are finished */
192 for (k = 1; k < num_threads; k++) {
193 pthread_join(data[k].handle, &status);
194 }
195 #endif
196
197 if (!bvop)
198 tbls->prev_quant = mbs->quant;
199
200 if ((flags & XVID_FILMEFFECT))
201 {
202 add_noise(tbls, img->y, img->y, edged_width, mb_width*16,
203 mb_height*16, frame_num % 3, tbls->prev_quant);
204 }
205
206 if (brightness != 0) {
207 image_brightness(img->y, edged_width, mb_width*16, mb_height*16, brightness);
208 }
209 }
210
211 /******************************************************************************/
212
init_deblock(XVID_POSTPROC * tbls)213 void init_deblock(XVID_POSTPROC *tbls)
214 {
215 int i;
216
217 for(i = -255; i < 256; i++) {
218 tbls->xvid_thresh_tbl[i + 255] = 0;
219 if(ABS(i) < THR1)
220 tbls->xvid_thresh_tbl[i + 255] = 1;
221 tbls->xvid_abs_tbl[i + 255] = ABS(i);
222 }
223 }
224
225 #define LOAD_DATA_HOR(x) \
226 /* Load pixel addresses and data for filtering */ \
227 s[0] = *(v[0] = img - 5*stride + x); \
228 s[1] = *(v[1] = img - 4*stride + x); \
229 s[2] = *(v[2] = img - 3*stride + x); \
230 s[3] = *(v[3] = img - 2*stride + x); \
231 s[4] = *(v[4] = img - 1*stride + x); \
232 s[5] = *(v[5] = img + 0*stride + x); \
233 s[6] = *(v[6] = img + 1*stride + x); \
234 s[7] = *(v[7] = img + 2*stride + x); \
235 s[8] = *(v[8] = img + 3*stride + x); \
236 s[9] = *(v[9] = img + 4*stride + x);
237
238 #define LOAD_DATA_VER(x) \
239 /* Load pixel addresses and data for filtering */ \
240 s[0] = *(v[0] = img + x*stride - 5); \
241 s[1] = *(v[1] = img + x*stride - 4); \
242 s[2] = *(v[2] = img + x*stride - 3); \
243 s[3] = *(v[3] = img + x*stride - 2); \
244 s[4] = *(v[4] = img + x*stride - 1); \
245 s[5] = *(v[5] = img + x*stride + 0); \
246 s[6] = *(v[6] = img + x*stride + 1); \
247 s[7] = *(v[7] = img + x*stride + 2); \
248 s[8] = *(v[8] = img + x*stride + 3); \
249 s[9] = *(v[9] = img + x*stride + 4);
250
251 #define APPLY_DERING(x) \
252 *v[x] = (e[x] == 0) ? ( \
253 (e[x-1] == 0) ? ( \
254 (e[x+1] == 0) ? \
255 ((s[x-1]+s[x]*2+s[x+1])>>2) \
256 : ((s[x-1]+s[x])>>1) ) \
257 : ((s[x]+s[x+1])>>1) ) \
258 : s[x];
259
260 #define APPLY_FILTER_CORE \
261 /* First, decide whether to use default or DC-offset mode */ \
262 \
263 eq_cnt = 0; \
264 \
265 eq_cnt += tbls->xvid_thresh_tbl[s[0] - s[1] + 255]; \
266 eq_cnt += tbls->xvid_thresh_tbl[s[1] - s[2] + 255]; \
267 eq_cnt += tbls->xvid_thresh_tbl[s[2] - s[3] + 255]; \
268 eq_cnt += tbls->xvid_thresh_tbl[s[3] - s[4] + 255]; \
269 eq_cnt += tbls->xvid_thresh_tbl[s[4] - s[5] + 255]; \
270 eq_cnt += tbls->xvid_thresh_tbl[s[5] - s[6] + 255]; \
271 eq_cnt += tbls->xvid_thresh_tbl[s[6] - s[7] + 255]; \
272 eq_cnt += tbls->xvid_thresh_tbl[s[7] - s[8] + 255]; \
273 \
274 if(eq_cnt < THR2) { /* Default mode */ \
275 int a30, a31, a32; \
276 int diff, limit; \
277 \
278 if(tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] < quant) { \
279 a30 = ((s[3]<<1) - s[4] * 5 + s[5] * 5 - (s[6]<<1)); \
280 a31 = ((s[1]<<1) - s[2] * 5 + s[3] * 5 - (s[4]<<1)); \
281 a32 = ((s[5]<<1) - s[6] * 5 + s[7] * 5 - (s[8]<<1)); \
282 \
283 diff = (5 * ((SIGN(a30) * MIN(FAST_ABS(a30), MIN(FAST_ABS(a31), FAST_ABS(a32)))) - a30) + 32) >> 6; \
284 limit = (s[4] - s[5]) / 2; \
285 \
286 if (limit > 0) \
287 diff = (diff < 0) ? 0 : ((diff > limit) ? limit : diff); \
288 else \
289 diff = (diff > 0) ? 0 : ((diff < limit) ? limit : diff); \
290 \
291 *v[4] -= diff; \
292 *v[5] += diff; \
293 } \
294 if (dering) { \
295 e[0] = (tbls->xvid_abs_tbl[(s[0] - s[1]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
296 e[1] = (tbls->xvid_abs_tbl[(s[1] - s[2]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
297 e[2] = (tbls->xvid_abs_tbl[(s[2] - s[3]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
298 e[3] = (tbls->xvid_abs_tbl[(s[3] - s[4]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
299 e[4] = (tbls->xvid_abs_tbl[(s[4] - s[5]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
300 e[5] = (tbls->xvid_abs_tbl[(s[5] - s[6]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
301 e[6] = (tbls->xvid_abs_tbl[(s[6] - s[7]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
302 e[7] = (tbls->xvid_abs_tbl[(s[7] - s[8]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
303 e[8] = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] > quant + DERING_STRENGTH) ? 1 : 0; \
304 \
305 e[1] |= e[0]; \
306 e[2] |= e[1]; \
307 e[3] |= e[2]; \
308 e[4] |= e[3]; \
309 e[5] |= e[4]; \
310 e[6] |= e[5]; \
311 e[7] |= e[6]; \
312 e[8] |= e[7]; \
313 e[9] = e[8]; \
314 \
315 APPLY_DERING(1) \
316 APPLY_DERING(2) \
317 APPLY_DERING(3) \
318 APPLY_DERING(4) \
319 APPLY_DERING(5) \
320 APPLY_DERING(6) \
321 APPLY_DERING(7) \
322 APPLY_DERING(8) \
323 } \
324 } \
325 else { /* DC-offset mode */ \
326 uint8_t p0, p9; \
327 int min, max; \
328 \
329 /* Now decide whether to apply smoothing filter or not */ \
330 max = FAST_MAX(s[1], FAST_MAX(s[2], FAST_MAX(s[3], FAST_MAX(s[4], FAST_MAX(s[5], FAST_MAX(s[6], FAST_MAX(s[7], s[8]))))))); \
331 min = FAST_MIN(s[1], FAST_MIN(s[2], FAST_MIN(s[3], FAST_MIN(s[4], FAST_MIN(s[5], FAST_MIN(s[6], FAST_MIN(s[7], s[8]))))))); \
332 \
333 if(((max-min)) < 2*quant) { \
334 \
335 /* Choose edge pixels */ \
336 p0 = (tbls->xvid_abs_tbl[(s[1] - s[0]) + 255] < quant) ? s[0] : s[1]; \
337 p9 = (tbls->xvid_abs_tbl[(s[8] - s[9]) + 255] < quant) ? s[9] : s[8]; \
338 \
339 *v[1] = (uint8_t) ((6*p0 + (s[1]<<2) + (s[2]<<1) + (s[3]<<1) + s[4] + s[5] + 8) >> 4); \
340 *v[2] = (uint8_t) (((p0<<2) + (s[1]<<1) + (s[2]<<2) + (s[3]<<1) + (s[4]<<1) + s[5] + s[6] + 8) >> 4); \
341 *v[3] = (uint8_t) (((p0<<1) + (s[1]<<1) + (s[2]<<1) + (s[3]<<2) + (s[4]<<1) + (s[5]<<1) + s[6] + s[7] + 8) >> 4); \
342 *v[4] = (uint8_t) ((p0 + s[1] + (s[2]<<1) + (s[3]<<1) + (s[4]<<2) + (s[5]<<1) + (s[6]<<1) + s[7] + s[8] + 8) >> 4); \
343 *v[5] = (uint8_t) ((s[1] + s[2] + (s[3]<<1) + (s[4]<<1) + (s[5]<<2) + (s[6]<<1) + (s[7]<<1) + s[8] + p9 + 8) >> 4); \
344 *v[6] = (uint8_t) ((s[2] + s[3] + (s[4]<<1) + (s[5]<<1) + (s[6]<<2) + (s[7]<<1) + (s[8]<<1) + (p9<<1) + 8) >> 4); \
345 *v[7] = (uint8_t) ((s[3] + s[4] + (s[5]<<1) + (s[6]<<1) + (s[7]<<2) + (s[8]<<1) + (p9<<2) + 8) >> 4); \
346 *v[8] = (uint8_t) ((s[4] + s[5] + (s[6]<<1) + (s[7]<<1) + (s[8]<<2) + 6*p9 + 8) >> 4); \
347 } \
348 }
349
deblock8x8_h(XVID_POSTPROC * tbls,uint8_t * img,int stride,int quant,int dering)350 void deblock8x8_h(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering)
351 {
352 int eq_cnt;
353 uint8_t *v[10];
354 int s[10];
355 int e[10];
356
357 LOAD_DATA_HOR(0)
358 APPLY_FILTER_CORE
359
360 LOAD_DATA_HOR(1)
361 APPLY_FILTER_CORE
362
363 LOAD_DATA_HOR(2)
364 APPLY_FILTER_CORE
365
366 LOAD_DATA_HOR(3)
367 APPLY_FILTER_CORE
368
369 LOAD_DATA_HOR(4)
370 APPLY_FILTER_CORE
371
372 LOAD_DATA_HOR(5)
373 APPLY_FILTER_CORE
374
375 LOAD_DATA_HOR(6)
376 APPLY_FILTER_CORE
377
378 LOAD_DATA_HOR(7)
379 APPLY_FILTER_CORE
380 }
381
382
deblock8x8_v(XVID_POSTPROC * tbls,uint8_t * img,int stride,int quant,int dering)383 void deblock8x8_v(XVID_POSTPROC *tbls, uint8_t *img, int stride, int quant, int dering)
384 {
385 int eq_cnt;
386 uint8_t *v[10];
387 int s[10];
388 int e[10];
389
390 LOAD_DATA_VER(0)
391 APPLY_FILTER_CORE
392
393 LOAD_DATA_VER(1)
394 APPLY_FILTER_CORE
395
396 LOAD_DATA_VER(2)
397 APPLY_FILTER_CORE
398
399 LOAD_DATA_VER(3)
400 APPLY_FILTER_CORE
401
402 LOAD_DATA_VER(4)
403 APPLY_FILTER_CORE
404
405 LOAD_DATA_VER(5)
406 APPLY_FILTER_CORE
407
408 LOAD_DATA_VER(6)
409 APPLY_FILTER_CORE
410
411 LOAD_DATA_VER(7)
412 APPLY_FILTER_CORE
413 }
414
415 /******************************************************************************
416 * *
417 * Noise code below taken from MPlayer: http://www.mplayerhq.hu/ *
418 * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> *
419 * *
420 ******************************************************************************/
421
422 #define RAND_N(range) ((int) ((double)range * rand() / (RAND_MAX + 1.0)))
423 #define STRENGTH1 12
424 #define STRENGTH2 8
425
init_noise(XVID_POSTPROC * tbls)426 void init_noise(XVID_POSTPROC *tbls)
427 {
428 int i, j;
429 int patt[4] = { -1,0,1,0 };
430
431 emms();
432
433 srand(123457);
434
435 for(i = 0, j = 0; i < MAX_NOISE; i++, j++)
436 {
437 double x1, x2, w, y1, y2;
438
439 do {
440 x1 = 2.0 * rand() / (float) RAND_MAX - 1.0;
441 x2 = 2.0 * rand() / (float) RAND_MAX - 1.0;
442 w = x1 * x1 + x2 * x2;
443 } while (w >= 1.0);
444
445 w = sqrt((-2.0 * log(w)) / w);
446 y1 = x1 * w;
447 y2 = x1 * w;
448
449 y1 *= STRENGTH1 / sqrt(3.0);
450 y2 *= STRENGTH2 / sqrt(3.0);
451
452 y1 /= 2;
453 y2 /= 2;
454 y1 += patt[j%4] * STRENGTH1 * 0.35;
455 y2 += patt[j%4] * STRENGTH2 * 0.35;
456
457 if (y1 < -128) {
458 y1=-128;
459 }
460 else if (y1 > 127) {
461 y1= 127;
462 }
463
464 if (y2 < -128) {
465 y2=-128;
466 }
467 else if (y2 > 127) {
468 y2= 127;
469 }
470
471 y1 /= 3.0;
472 y2 /= 3.0;
473 tbls->xvid_noise1[i] = (int) y1;
474 tbls->xvid_noise2[i] = (int) y2;
475
476 if (RAND_N(6) == 0) {
477 j--;
478 }
479 }
480
481 for (i = 0; i < MAX_RES; i++)
482 for (j = 0; j < 3; j++) {
483 tbls->xvid_prev_shift[i][j] = tbls->xvid_noise1 + (rand() & (MAX_SHIFT - 1));
484 tbls->xvid_prev_shift[i][3 + j] = tbls->xvid_noise2 + (rand() & (MAX_SHIFT - 1));
485 }
486 }
487
add_noise(XVID_POSTPROC * tbls,uint8_t * dst,uint8_t * src,int stride,int width,int height,int shiftptr,int quant)488 void add_noise(XVID_POSTPROC *tbls, uint8_t *dst, uint8_t *src, int stride, int width, int height, int shiftptr, int quant)
489 {
490 int x, y;
491 int shift = 0;
492 int add = (quant < 5) ? 3 : 0;
493 int8_t *noise = (quant < 5) ? tbls->xvid_noise2 : tbls->xvid_noise1;
494
495 for(y = 0; y < height; y++)
496 {
497 int8_t *src2 = (int8_t *) src;
498
499 shift = rand() & (MAX_SHIFT - 1);
500
501 shift &= ~7;
502 for(x = 0; x < width; x++)
503 {
504 const int n = tbls->xvid_prev_shift[y][0 + add][x] + tbls->xvid_prev_shift[y][1 + add][x] +
505 tbls->xvid_prev_shift[y][2 + add][x];
506
507 dst[x] = src2[x] + ((n * src2[x]) >> 7);
508 }
509
510 tbls->xvid_prev_shift[y][shiftptr + add] = noise + shift;
511
512 dst += stride;
513 src += stride;
514 }
515 }
516
517
image_brightness_c(uint8_t * dst,int stride,int width,int height,int offset)518 void image_brightness_c(uint8_t *dst, int stride, int width, int height, int offset)
519 {
520 int x,y;
521
522 for(y = 0; y < height; y++)
523 {
524 for(x = 0; x < width; x++)
525 {
526 int p = dst[y*stride + x];
527 dst[y*stride + x] = CLIP( p + offset, 0, 255);
528 }
529 }
530 }
531