1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <limits.h>
13 #include "vpx_scale/yv12config.h"
14 #include "vpx/vpx_integer.h"
15 #include "vp9/common/vp9_reconinter.h"
16 #include "vp9/encoder/vp9_context_tree.h"
17 #include "vp9/encoder/vp9_denoiser.h"
18 
19 /* The VP9 denoiser is a work-in-progress. It currently is only designed to work
20  * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
21  * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to
22  * make the calls to the vp9_denoiser_* functions when in speed 5).
23  *
24  * The implementation is very similar to that of the VP8 denoiser. While
25  * choosing the motion vectors / reference frames, the denoiser is run, and if
26  * it did not modify the signal to much, the denoised block is copied to the
27  * signal.
28  */
29 
30 #ifdef OUTPUT_YUV_DENOISED
31 static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
32 #endif
33 
absdiff_thresh(BLOCK_SIZE bs,int increase_denoising)34 static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
35   (void)bs;
36   return 3 + (increase_denoising ? 1 : 0);
37 }
38 
delta_thresh(BLOCK_SIZE bs,int increase_denoising)39 static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) {
40   (void)bs;
41   (void)increase_denoising;
42   return 4;
43 }
44 
noise_motion_thresh(BLOCK_SIZE bs,int increase_denoising)45 static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
46   (void)bs;
47   (void)increase_denoising;
48   return 625;
49 }
50 
sse_thresh(BLOCK_SIZE bs,int increase_denoising)51 static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
52   return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40);
53 }
54 
sse_diff_thresh(BLOCK_SIZE bs,int increase_denoising,int motion_magnitude)55 static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
56                            int motion_magnitude) {
57   if (motion_magnitude >
58       noise_motion_thresh(bs, increase_denoising)) {
59     return 0;
60   } else {
61     return (1 << num_pels_log2_lookup[bs]) * 20;
62   }
63 }
64 
total_adj_strong_thresh(BLOCK_SIZE bs,int increase_denoising)65 int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
66   return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
67 }
68 
total_adj_weak_thresh(BLOCK_SIZE bs,int increase_denoising)69 static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
70   return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
71 }
72 
73 // TODO(jackychen): If increase_denoising is enabled in the future,
74 // we might need to update the code for calculating 'total_adj' in
75 // case the C code is not bit-exact with corresponding sse2 code.
vp9_denoiser_filter_c(const uint8_t * sig,int sig_stride,const uint8_t * mc_avg,int mc_avg_stride,uint8_t * avg,int avg_stride,int increase_denoising,BLOCK_SIZE bs,int motion_magnitude)76 int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
77                           const uint8_t *mc_avg,
78                           int mc_avg_stride,
79                           uint8_t *avg, int avg_stride,
80                           int increase_denoising,
81                           BLOCK_SIZE bs,
82                           int motion_magnitude) {
83   int r, c;
84   const uint8_t *sig_start = sig;
85   const uint8_t *mc_avg_start = mc_avg;
86   uint8_t *avg_start = avg;
87   int diff, adj, absdiff, delta;
88   int adj_val[] = {3, 4, 6};
89   int total_adj = 0;
90   int shift_inc = 1;
91 
92   // If motion_magnitude is small, making the denoiser more aggressive by
93   // increasing the adjustment for each level. Add another increment for
94   // blocks that are labeled for increase denoising.
95   if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) {
96     if (increase_denoising) {
97       shift_inc = 2;
98     }
99     adj_val[0] += shift_inc;
100     adj_val[1] += shift_inc;
101     adj_val[2] += shift_inc;
102   }
103 
104   // First attempt to apply a strong temporal denoising filter.
105   for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
106     for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
107       diff = mc_avg[c] - sig[c];
108       absdiff = abs(diff);
109 
110       if (absdiff <= absdiff_thresh(bs, increase_denoising)) {
111         avg[c] = mc_avg[c];
112         total_adj += diff;
113       } else {
114         switch (absdiff) {
115           case 4: case 5: case 6: case 7:
116             adj = adj_val[0];
117             break;
118           case 8: case 9: case 10: case 11:
119           case 12: case 13: case 14: case 15:
120             adj = adj_val[1];
121             break;
122           default:
123             adj = adj_val[2];
124         }
125         if (diff > 0) {
126           avg[c] = MIN(UINT8_MAX, sig[c] + adj);
127           total_adj += adj;
128         } else {
129           avg[c] = MAX(0, sig[c] - adj);
130           total_adj -= adj;
131         }
132       }
133     }
134     sig += sig_stride;
135     avg += avg_stride;
136     mc_avg += mc_avg_stride;
137   }
138 
139   // If the strong filter did not modify the signal too much, we're all set.
140   if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) {
141     return FILTER_BLOCK;
142   }
143 
144   // Otherwise, we try to dampen the filter if the delta is not too high.
145   delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
146            >> num_pels_log2_lookup[bs]) + 1;
147 
148   if (delta >= delta_thresh(bs, increase_denoising)) {
149     return COPY_BLOCK;
150   }
151 
152   mc_avg =  mc_avg_start;
153   avg = avg_start;
154   sig = sig_start;
155   for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
156     for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
157       diff = mc_avg[c] - sig[c];
158       adj = abs(diff);
159       if (adj > delta) {
160         adj = delta;
161       }
162       if (diff > 0) {
163         // Diff positive means we made positive adjustment above
164         // (in first try/attempt), so now make negative adjustment to bring
165         // denoised signal down.
166         avg[c] = MAX(0, avg[c] - adj);
167         total_adj -= adj;
168       } else {
169         // Diff negative means we made negative adjustment above
170         // (in first try/attempt), so now make positive adjustment to bring
171         // denoised signal up.
172         avg[c] = MIN(UINT8_MAX, avg[c] + adj);
173         total_adj += adj;
174       }
175     }
176     sig += sig_stride;
177     avg += avg_stride;
178     mc_avg += mc_avg_stride;
179   }
180 
181   // We can use the filter if it has been sufficiently dampened
182   if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) {
183     return FILTER_BLOCK;
184   }
185   return COPY_BLOCK;
186 }
187 
block_start(uint8_t * framebuf,int stride,int mi_row,int mi_col)188 static uint8_t *block_start(uint8_t *framebuf, int stride,
189                             int mi_row, int mi_col) {
190   return framebuf + (stride * mi_row * 8) + (mi_col * 8);
191 }
192 
perform_motion_compensation(VP9_DENOISER * denoiser,MACROBLOCK * mb,BLOCK_SIZE bs,int increase_denoising,int mi_row,int mi_col,PICK_MODE_CONTEXT * ctx,int * motion_magnitude)193 static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
194                                                          MACROBLOCK *mb,
195                                                          BLOCK_SIZE bs,
196                                                          int increase_denoising,
197                                                          int mi_row,
198                                                          int mi_col,
199                                                          PICK_MODE_CONTEXT *ctx,
200                                                          int *motion_magnitude
201                                                          ) {
202   int mv_col, mv_row;
203   int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
204   MV_REFERENCE_FRAME frame;
205   MACROBLOCKD *filter_mbd = &mb->e_mbd;
206   MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
207   MB_MODE_INFO saved_mbmi;
208   int i, j;
209   struct buf_2d saved_dst[MAX_MB_PLANE];
210   struct buf_2d saved_pre[MAX_MB_PLANE][2];  // 2 pre buffers
211 
212   mv_col = ctx->best_sse_mv.as_mv.col;
213   mv_row = ctx->best_sse_mv.as_mv.row;
214   *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
215   frame = ctx->best_reference_frame;
216 
217   saved_mbmi = *mbmi;
218 
219   // If the best reference frame uses inter-prediction and there is enough of a
220   // difference in sum-squared-error, use it.
221   if (frame != INTRA_FRAME &&
222       sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) {
223     mbmi->ref_frame[0] = ctx->best_reference_frame;
224     mbmi->mode = ctx->best_sse_inter_mode;
225     mbmi->mv[0] = ctx->best_sse_mv;
226   } else {
227     // Otherwise, use the zero reference frame.
228     frame = ctx->best_zeromv_reference_frame;
229 
230     mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame;
231     mbmi->mode = ZEROMV;
232     mbmi->mv[0].as_int = 0;
233 
234     ctx->best_sse_inter_mode = ZEROMV;
235     ctx->best_sse_mv.as_int = 0;
236     ctx->newmv_sse = ctx->zeromv_sse;
237   }
238 
239   if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
240     // Restore everything to its original state
241     *mbmi = saved_mbmi;
242     return COPY_BLOCK;
243   }
244   if (*motion_magnitude >
245      (noise_motion_thresh(bs, increase_denoising) << 3)) {
246     // Restore everything to its original state
247     *mbmi = saved_mbmi;
248     return COPY_BLOCK;
249   }
250 
251   // We will restore these after motion compensation.
252   for (i = 0; i < MAX_MB_PLANE; ++i) {
253     for (j = 0; j < 2; ++j) {
254       saved_pre[i][j] = filter_mbd->plane[i].pre[j];
255     }
256     saved_dst[i] = filter_mbd->plane[i].dst;
257   }
258 
259   // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
260   // struct.
261   for (j = 0; j < 2; ++j) {
262     filter_mbd->plane[0].pre[j].buf =
263         block_start(denoiser->running_avg_y[frame].y_buffer,
264                     denoiser->running_avg_y[frame].y_stride,
265                     mi_row, mi_col);
266     filter_mbd->plane[0].pre[j].stride =
267         denoiser->running_avg_y[frame].y_stride;
268     filter_mbd->plane[1].pre[j].buf =
269         block_start(denoiser->running_avg_y[frame].u_buffer,
270                     denoiser->running_avg_y[frame].uv_stride,
271                     mi_row, mi_col);
272     filter_mbd->plane[1].pre[j].stride =
273         denoiser->running_avg_y[frame].uv_stride;
274     filter_mbd->plane[2].pre[j].buf =
275         block_start(denoiser->running_avg_y[frame].v_buffer,
276                     denoiser->running_avg_y[frame].uv_stride,
277                     mi_row, mi_col);
278     filter_mbd->plane[2].pre[j].stride =
279         denoiser->running_avg_y[frame].uv_stride;
280   }
281   filter_mbd->plane[0].dst.buf =
282       block_start(denoiser->mc_running_avg_y.y_buffer,
283                   denoiser->mc_running_avg_y.y_stride,
284                   mi_row, mi_col);
285   filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
286   filter_mbd->plane[1].dst.buf =
287       block_start(denoiser->mc_running_avg_y.u_buffer,
288                   denoiser->mc_running_avg_y.uv_stride,
289                   mi_row, mi_col);
290   filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
291   filter_mbd->plane[2].dst.buf =
292       block_start(denoiser->mc_running_avg_y.v_buffer,
293                   denoiser->mc_running_avg_y.uv_stride,
294                   mi_row, mi_col);
295   filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
296 
297   vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs);
298 
299   // Restore everything to its original state
300   *mbmi = saved_mbmi;
301   for (i = 0; i < MAX_MB_PLANE; ++i) {
302     for (j = 0; j < 2; ++j) {
303       filter_mbd->plane[i].pre[j] = saved_pre[i][j];
304     }
305     filter_mbd->plane[i].dst = saved_dst[i];
306   }
307 
308   mv_row = ctx->best_sse_mv.as_mv.row;
309   mv_col = ctx->best_sse_mv.as_mv.col;
310 
311   return FILTER_BLOCK;
312 }
313 
vp9_denoiser_denoise(VP9_DENOISER * denoiser,MACROBLOCK * mb,int mi_row,int mi_col,BLOCK_SIZE bs,PICK_MODE_CONTEXT * ctx)314 void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
315                           int mi_row, int mi_col, BLOCK_SIZE bs,
316                           PICK_MODE_CONTEXT *ctx) {
317   int motion_magnitude = 0;
318   VP9_DENOISER_DECISION decision = FILTER_BLOCK;
319   YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
320   YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
321   uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
322   uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
323                                           mi_row, mi_col);
324   struct buf_2d src = mb->plane[0].src;
325 
326   decision = perform_motion_compensation(denoiser, mb, bs,
327                                          denoiser->increase_denoising,
328                                          mi_row, mi_col, ctx,
329                                          &motion_magnitude);
330 
331   if (decision == FILTER_BLOCK) {
332     decision = vp9_denoiser_filter(src.buf, src.stride,
333                                  mc_avg_start, mc_avg.y_stride,
334                                  avg_start, avg.y_stride,
335                                  0, bs, motion_magnitude);
336   }
337 
338   if (decision == FILTER_BLOCK) {
339     vp9_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
340                       NULL, 0, NULL, 0,
341                       num_4x4_blocks_wide_lookup[bs] << 2,
342                       num_4x4_blocks_high_lookup[bs] << 2);
343   } else {  // COPY_BLOCK
344     vp9_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
345                       NULL, 0, NULL, 0,
346                       num_4x4_blocks_wide_lookup[bs] << 2,
347                       num_4x4_blocks_high_lookup[bs] << 2);
348   }
349 }
350 
copy_frame(YV12_BUFFER_CONFIG dest,const YV12_BUFFER_CONFIG src)351 static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
352   int r;
353   const uint8_t *srcbuf = src.y_buffer;
354   uint8_t *destbuf = dest.y_buffer;
355 
356   assert(dest.y_width == src.y_width);
357   assert(dest.y_height == src.y_height);
358 
359   for (r = 0; r < dest.y_height; ++r) {
360     memcpy(destbuf, srcbuf, dest.y_width);
361     destbuf += dest.y_stride;
362     srcbuf += src.y_stride;
363   }
364 }
365 
swap_frame_buffer(YV12_BUFFER_CONFIG * dest,YV12_BUFFER_CONFIG * src)366 static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest,
367                               YV12_BUFFER_CONFIG *src) {
368   uint8_t *tmp_buf = dest->y_buffer;
369   assert(dest->y_width == src->y_width);
370   assert(dest->y_height == src->y_height);
371   dest->y_buffer = src->y_buffer;
372   src->y_buffer = tmp_buf;
373 }
374 
vp9_denoiser_update_frame_info(VP9_DENOISER * denoiser,YV12_BUFFER_CONFIG src,FRAME_TYPE frame_type,int refresh_alt_ref_frame,int refresh_golden_frame,int refresh_last_frame)375 void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
376                                     YV12_BUFFER_CONFIG src,
377                                     FRAME_TYPE frame_type,
378                                     int refresh_alt_ref_frame,
379                                     int refresh_golden_frame,
380                                     int refresh_last_frame) {
381   if (frame_type == KEY_FRAME) {
382     int i;
383     // Start at 1 so as not to overwrite the INTRA_FRAME
384     for (i = 1; i < MAX_REF_FRAMES; ++i)
385       copy_frame(denoiser->running_avg_y[i], src);
386     return;
387   }
388 
389   /* For non key frames */
390   if (refresh_alt_ref_frame) {
391     swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
392                       &denoiser->running_avg_y[INTRA_FRAME]);
393   }
394   if (refresh_golden_frame) {
395     swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
396                       &denoiser->running_avg_y[INTRA_FRAME]);
397   }
398   if (refresh_last_frame) {
399     swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
400                       &denoiser->running_avg_y[INTRA_FRAME]);
401   }
402 }
403 
vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT * ctx)404 void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
405   ctx->zeromv_sse = UINT_MAX;
406   ctx->newmv_sse = UINT_MAX;
407 }
408 
vp9_denoiser_update_frame_stats(MB_MODE_INFO * mbmi,unsigned int sse,PREDICTION_MODE mode,PICK_MODE_CONTEXT * ctx)409 void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
410                                      PREDICTION_MODE mode,
411                                      PICK_MODE_CONTEXT *ctx) {
412   // TODO(tkopp): Use both MVs if possible
413   if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
414     ctx->zeromv_sse = sse;
415     ctx->best_zeromv_reference_frame = mbmi->ref_frame[0];
416   }
417 
418   if (mbmi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
419     ctx->newmv_sse = sse;
420     ctx->best_sse_inter_mode = mode;
421     ctx->best_sse_mv = mbmi->mv[0];
422     ctx->best_reference_frame = mbmi->ref_frame[0];
423   }
424 }
425 
vp9_denoiser_alloc(VP9_DENOISER * denoiser,int width,int height,int ssx,int ssy,int use_highbitdepth,int border)426 int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
427                        int ssx, int ssy,
428 #if CONFIG_VP9_HIGHBITDEPTH
429                        int use_highbitdepth,
430 #endif
431                        int border) {
432   int i, fail;
433   const int legacy_byte_alignment = 0;
434   assert(denoiser != NULL);
435 
436   for (i = 0; i < MAX_REF_FRAMES; ++i) {
437     fail = vp9_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
438                                   ssx, ssy,
439 #if CONFIG_VP9_HIGHBITDEPTH
440                                   use_highbitdepth,
441 #endif
442                                   border, legacy_byte_alignment);
443     if (fail) {
444       vp9_denoiser_free(denoiser);
445       return 1;
446     }
447 #ifdef OUTPUT_YUV_DENOISED
448     make_grayscale(&denoiser->running_avg_y[i]);
449 #endif
450   }
451 
452   fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
453                                 ssx, ssy,
454 #if CONFIG_VP9_HIGHBITDEPTH
455                                 use_highbitdepth,
456 #endif
457                                 border, legacy_byte_alignment);
458   if (fail) {
459     vp9_denoiser_free(denoiser);
460     return 1;
461   }
462 #ifdef OUTPUT_YUV_DENOISED
463   make_grayscale(&denoiser->running_avg_y[i]);
464 #endif
465   denoiser->increase_denoising = 0;
466   denoiser->frame_buffer_initialized = 1;
467 
468   return 0;
469 }
470 
vp9_denoiser_free(VP9_DENOISER * denoiser)471 void vp9_denoiser_free(VP9_DENOISER *denoiser) {
472   int i;
473   denoiser->frame_buffer_initialized = 0;
474   if (denoiser == NULL) {
475     return;
476   }
477   for (i = 0; i < MAX_REF_FRAMES; ++i) {
478     vp9_free_frame_buffer(&denoiser->running_avg_y[i]);
479   }
480   vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
481 }
482 
483 #ifdef OUTPUT_YUV_DENOISED
make_grayscale(YV12_BUFFER_CONFIG * yuv)484 static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
485   int r, c;
486   uint8_t *u = yuv->u_buffer;
487   uint8_t *v = yuv->v_buffer;
488 
489   for (r = 0; r < yuv->uv_height; ++r) {
490     for (c = 0; c < yuv->uv_width; ++c) {
491       u[c] = UINT8_MAX / 2;
492       v[c] = UINT8_MAX / 2;
493     }
494     u += yuv->uv_stride;
495     v += yuv->uv_stride;
496   }
497 }
498 #endif
499