1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include "vp8/common/onyxc_int.h"
13 #include "onyx_int.h"
14 #include "vp8/common/systemdependent.h"
15 #include "quantize.h"
16 #include "vp8/common/alloccommon.h"
17 #include "mcomp.h"
18 #include "firstpass.h"
19 #include "vpx_scale/vpx_scale.h"
20 #include "vp8/common/extend.h"
21 #include "ratectrl.h"
22 #include "vp8/common/quant_common.h"
23 #include "segmentation.h"
24 #include "vpx_mem/vpx_mem.h"
25 #include "vp8/common/swapyv12buffer.h"
26 #include "vp8/common/threading.h"
27 #include "vpx_ports/vpx_timer.h"
28 
29 #include <math.h>
30 #include <limits.h>
31 
32 #define ALT_REF_MC_ENABLED 1    /* dis/enable MC in AltRef filtering */
33 #define ALT_REF_SUBPEL_ENABLED 1 /* dis/enable subpel in MC AltRef filtering */
34 
35 #if VP8_TEMPORAL_ALT_REF
36 
37 static void vp8_temporal_filter_predictors_mb_c
38 (
39     MACROBLOCKD *x,
40     unsigned char *y_mb_ptr,
41     unsigned char *u_mb_ptr,
42     unsigned char *v_mb_ptr,
43     int stride,
44     int mv_row,
45     int mv_col,
46     unsigned char *pred
47 )
48 {
49     int offset;
50     unsigned char *yptr, *uptr, *vptr;
51 
52     /* Y */
53     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
54 
55     if ((mv_row | mv_col) & 7)
56     {
57         x->subpixel_predict16x16(yptr, stride,
58                                     mv_col & 7, mv_row & 7, &pred[0], 16);
59     }
60     else
61     {
62         vp8_copy_mem16x16(yptr, stride, &pred[0], 16);
63     }
64 
65     /* U & V */
66     mv_row >>= 1;
readstr(FILE * f,char * buf,int nlabort)67     mv_col >>= 1;
68     stride = (stride + 1) >> 1;
69     offset = (mv_row >> 3) * stride + (mv_col >> 3);
70     uptr = u_mb_ptr + offset;
71     vptr = v_mb_ptr + offset;
72 
73     if ((mv_row | mv_col) & 7)
74     {
75         x->subpixel_predict8x8(uptr, stride,
76                             mv_col & 7, mv_row & 7, &pred[256], 8);
77         x->subpixel_predict8x8(vptr, stride,
78                             mv_col & 7, mv_row & 7, &pred[320], 8);
79     }
80     else
81     {
82         vp8_copy_mem8x8(uptr, stride, &pred[256], 8);
83         vp8_copy_mem8x8(vptr, stride, &pred[320], 8);
84     }
85 }
86 void vp8_temporal_filter_apply_c
87 (
88     unsigned char *frame1,
89     unsigned int stride,
90     unsigned char *frame2,
91     unsigned int block_size,
92     int strength,
93     int filter_weight,
94     unsigned int *accumulator,
95     unsigned short *count
96 )
97 {
98     unsigned int i, j, k;
99     int modifier;
100     int byte = 0;
101     const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
102 
103     for (i = 0,k = 0; i < block_size; i++)
104     {
105         for (j = 0; j < block_size; j++, k++)
106         {
107 
108             int src_byte = frame1[byte];
109             int pixel_value = *frame2++;
110 
111             modifier   = src_byte - pixel_value;
112             /* This is an integer approximation of:
113              * float coeff = (3.0 * modifer * modifier) / pow(2, strength);
114              * modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
115              */
116             modifier  *= modifier;
117             modifier  *= 3;
118             modifier  += rounding;
119             modifier >>= strength;
120 
121             if (modifier > 16)
122                 modifier = 16;
123 
124             modifier = 16 - modifier;
125             modifier *= filter_weight;
126 
127             count[k] += modifier;
128             accumulator[k] += modifier * pixel_value;
129 
130             byte++;
131         }
132 
133         byte += stride - block_size;
134     }
135 }
136 
137 #if ALT_REF_MC_ENABLED
138 
139 static int vp8_temporal_filter_find_matching_mb_c
140 (
141     VP8_COMP *cpi,
142     YV12_BUFFER_CONFIG *arf_frame,
143     YV12_BUFFER_CONFIG *frame_ptr,
144     int mb_offset,
145     int error_thresh
146 )
147 {
148     MACROBLOCK *x = &cpi->mb;
149     int step_param;
150     int sadpb = x->sadperbit16;
151     int bestsme = INT_MAX;
152 
153     BLOCK *b = &x->block[0];
154     BLOCKD *d = &x->e_mbd.block[0];
155     int_mv best_ref_mv1;
156     int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
157 
158     /* Save input state */
159     unsigned char **base_src = b->base_src;
160     int src = b->src;
161     int src_stride = b->src_stride;
162     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
163     int pre = d->offset;
164     int pre_stride = x->e_mbd.pre.y_stride;
165 
166     (void)error_thresh;
167 
168     best_ref_mv1.as_int = 0;
169     best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
170     best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
171 
172     /* Setup frame pointers */
173     b->base_src = &arf_frame->y_buffer;
174     b->src_stride = arf_frame->y_stride;
175     b->src = mb_offset;
176 
177     x->e_mbd.pre.y_buffer = frame_ptr->y_buffer;
178     x->e_mbd.pre.y_stride = frame_ptr->y_stride;
179     d->offset = mb_offset;
180 
181     /* Further step/diamond searches as necessary */
182     if (cpi->Speed < 8)
183     {
184         step_param = cpi->sf.first_step + (cpi->Speed > 5);
185     }
186     else
187     {
188         step_param = cpi->sf.first_step + 2;
189     }
190 
191     /* TODO Check that the 16x16 vf & sdf are selected here */
192     /* Ignore mv costing by sending NULL cost arrays */
193     bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv,
194                              step_param, sadpb,
195                              &cpi->fn_ptr[BLOCK_16X16],
196                              NULL, NULL, &best_ref_mv1);
197 
198 #if ALT_REF_SUBPEL_ENABLED
199     /* Try sub-pixel MC? */
200     {
201         int distortion;
202         unsigned int sse;
203         /* Ignore mv costing by sending NULL cost array */
204         bestsme = cpi->find_fractional_mv_step(x, b, d,
205                                                &d->bmi.mv,
206                                                &best_ref_mv1,
207                                                x->errorperbit,
208                                                &cpi->fn_ptr[BLOCK_16X16],
209                                                NULL, &distortion, &sse);
210     }
211 #endif
212 
213     /* Save input state */
214     b->base_src = base_src;
215     b->src = src;
216     b->src_stride = src_stride;
217     x->e_mbd.pre.y_buffer = base_pre;
218     d->offset = pre;
219     x->e_mbd.pre.y_stride = pre_stride;
220 
221     return bestsme;
222 }
223 #endif
224 
225 static void vp8_temporal_filter_iterate_c
226 (
227     VP8_COMP *cpi,
228     int frame_count,
main()229     int alt_ref_index,
230     int strength
231 )
232 {
233     int byte;
234     int frame;
235     int mb_col, mb_row;
236     unsigned int filter_weight;
237     int mb_cols = cpi->common.mb_cols;
238     int mb_rows = cpi->common.mb_rows;
239     int mb_y_offset = 0;
240     int mb_uv_offset = 0;
241     DECLARE_ALIGNED(16, unsigned int, accumulator[16*16 + 8*8 + 8*8]);
242     DECLARE_ALIGNED(16, unsigned short, count[16*16 + 8*8 + 8*8]);
243     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
244     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
245     unsigned char *dst1, *dst2;
246     DECLARE_ALIGNED(16, unsigned char,  predictor[16*16 + 8*8 + 8*8]);
247 
248     /* Save input state */
249     unsigned char *y_buffer = mbd->pre.y_buffer;
250     unsigned char *u_buffer = mbd->pre.u_buffer;
251     unsigned char *v_buffer = mbd->pre.v_buffer;
252 
253     for (mb_row = 0; mb_row < mb_rows; mb_row++)
254     {
255 #if ALT_REF_MC_ENABLED
256         /* Source frames are extended to 16 pixels.  This is different than
257          *  L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
258          * A 6 tap filter is used for motion search.  This requires 2 pixels
259          *  before and 3 pixels after.  So the largest Y mv on a border would
260          *  then be 16 - 3.  The UV blocks are half the size of the Y and
261          *  therefore only extended by 8.  The largest mv that a UV block
262          *  can support is 8 - 3.  A UV mv is half of a Y mv.
263          *  (16 - 3) >> 1 == 6 which is greater than 8 - 3.
264          * To keep the mv in play for both Y and UV planes the max that it
265          *  can be on a border is therefore 16 - 5.
266          */
267         cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
268         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
269                                 + (16 - 5);
270 #endif
271 
272         for (mb_col = 0; mb_col < mb_cols; mb_col++)
273         {
274             int i, j, k;
275             int stride;
276 
277             memset(accumulator, 0, 384*sizeof(unsigned int));
278             memset(count, 0, 384*sizeof(unsigned short));
279 
280 #if ALT_REF_MC_ENABLED
281             cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
282             cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
283                                     + (16 - 5);
284 #endif
285 
286             for (frame = 0; frame < frame_count; frame++)
287             {
288                 if (cpi->frames[frame] == NULL)
289                     continue;
290 
291                 mbd->block[0].bmi.mv.as_mv.row = 0;
292                 mbd->block[0].bmi.mv.as_mv.col = 0;
293 
294                 if (frame == alt_ref_index)
295                 {
296                     filter_weight = 2;
297                 }
298                 else
299                 {
300                     int err = 0;
301 #if ALT_REF_MC_ENABLED
302 #define THRESH_LOW   10000
303 #define THRESH_HIGH  20000
304                     /* Find best match in this frame by MC */
305                     err = vp8_temporal_filter_find_matching_mb_c
306                               (cpi,
307                                cpi->frames[alt_ref_index],
308                                cpi->frames[frame],
309                                mb_y_offset,
310                                THRESH_LOW);
311 #endif
312                     /* Assign higher weight to matching MB if it's error
313                      * score is lower. If not applying MC default behavior
314                      * is to weight all MBs equal.
315                      */
316                     filter_weight = err<THRESH_LOW
317                                        ? 2 : err<THRESH_HIGH ? 1 : 0;
318                 }
319 
320                 if (filter_weight != 0)
321                 {
322                     /* Construct the predictors */
323                     vp8_temporal_filter_predictors_mb_c
324                         (mbd,
325                          cpi->frames[frame]->y_buffer + mb_y_offset,
326                          cpi->frames[frame]->u_buffer + mb_uv_offset,
327                          cpi->frames[frame]->v_buffer + mb_uv_offset,
328                          cpi->frames[frame]->y_stride,
329                          mbd->block[0].bmi.mv.as_mv.row,
330                          mbd->block[0].bmi.mv.as_mv.col,
331                          predictor);
332 
333                     /* Apply the filter (YUV) */
334                     vp8_temporal_filter_apply
335                         (f->y_buffer + mb_y_offset,
336                          f->y_stride,
337                          predictor,
338                          16,
339                          strength,
340                          filter_weight,
341                          accumulator,
342                          count);
343 
344                     vp8_temporal_filter_apply
345                         (f->u_buffer + mb_uv_offset,
346                          f->uv_stride,
347                          predictor + 256,
348                          8,
349                          strength,
350                          filter_weight,
351                          accumulator + 256,
352                          count + 256);
353 
354                     vp8_temporal_filter_apply
355                         (f->v_buffer + mb_uv_offset,
356                          f->uv_stride,
357                          predictor + 320,
358                          8,
359                          strength,
360                          filter_weight,
361                          accumulator + 320,
362                          count + 320);
363                 }
364             }
365 
366             /* Normalize filter output to produce AltRef frame */
367             dst1 = cpi->alt_ref_buffer.y_buffer;
368             stride = cpi->alt_ref_buffer.y_stride;
369             byte = mb_y_offset;
370             for (i = 0,k = 0; i < 16; i++)
371             {
372                 for (j = 0; j < 16; j++, k++)
373                 {
374                     unsigned int pval = accumulator[k] + (count[k] >> 1);
375                     pval *= cpi->fixed_divide[count[k]];
376                     pval >>= 19;
377 
378                     dst1[byte] = (unsigned char)pval;
379 
380                     /* move to next pixel */
381                     byte++;
382                 }
383 
384                 byte += stride - 16;
385             }
386 
387             dst1 = cpi->alt_ref_buffer.u_buffer;
388             dst2 = cpi->alt_ref_buffer.v_buffer;
389             stride = cpi->alt_ref_buffer.uv_stride;
390             byte = mb_uv_offset;
391             for (i = 0,k = 256; i < 8; i++)
392             {
393                 for (j = 0; j < 8; j++, k++)
394                 {
395                     int m=k+64;
396 
397                     /* U */
398                     unsigned int pval = accumulator[k] + (count[k] >> 1);
399                     pval *= cpi->fixed_divide[count[k]];
400                     pval >>= 19;
401                     dst1[byte] = (unsigned char)pval;
402 
403                     /* V */
404                     pval = accumulator[m] + (count[m] >> 1);
405                     pval *= cpi->fixed_divide[count[m]];
406                     pval >>= 19;
407                     dst2[byte] = (unsigned char)pval;
408 
409                     /* move to next pixel */
410                     byte++;
411                 }
412 
413                 byte += stride - 8;
414             }
415 
416             mb_y_offset += 16;
417             mb_uv_offset += 8;
418         }
419 
420         mb_y_offset += 16*(f->y_stride-mb_cols);
421         mb_uv_offset += 8*(f->uv_stride-mb_cols);
422     }
423 
424     /* Restore input state */
425     mbd->pre.y_buffer = y_buffer;
426     mbd->pre.u_buffer = u_buffer;
427     mbd->pre.v_buffer = v_buffer;
428 }
429 
430 void vp8_temporal_filter_prepare_c
431 (
432     VP8_COMP *cpi,
433     int distance
434 )
435 {
436     int frame = 0;
437 
438     int num_frames_backward = 0;
439     int num_frames_forward = 0;
440     int frames_to_blur_backward = 0;
441     int frames_to_blur_forward = 0;
442     int frames_to_blur = 0;
443     int start_frame = 0;
444 
445     int strength = cpi->oxcf.arnr_strength;
446 
447     int blur_type = cpi->oxcf.arnr_type;
448 
449     int max_frames = cpi->active_arnr_frames;
450 
451     num_frames_backward = distance;
452     num_frames_forward = vp8_lookahead_depth(cpi->lookahead)
453                          - (num_frames_backward + 1);
454 
455     switch (blur_type)
456     {
457     case 1:
458         /* Backward Blur */
459 
460         frames_to_blur_backward = num_frames_backward;
461 
462         if (frames_to_blur_backward >= max_frames)
463             frames_to_blur_backward = max_frames - 1;
464 
465         frames_to_blur = frames_to_blur_backward + 1;
466         break;
467 
468     case 2:
469         /* Forward Blur */
470 
471         frames_to_blur_forward = num_frames_forward;
472 
473         if (frames_to_blur_forward >= max_frames)
474             frames_to_blur_forward = max_frames - 1;
475 
476         frames_to_blur = frames_to_blur_forward + 1;
477         break;
478 
479     case 3:
480     default:
481         /* Center Blur */
482         frames_to_blur_forward = num_frames_forward;
483         frames_to_blur_backward = num_frames_backward;
484 
485         if (frames_to_blur_forward > frames_to_blur_backward)
486             frames_to_blur_forward = frames_to_blur_backward;
487 
488         if (frames_to_blur_backward > frames_to_blur_forward)
489             frames_to_blur_backward = frames_to_blur_forward;
490 
491         /* When max_frames is even we have 1 more frame backward than forward */
492         if (frames_to_blur_forward > (max_frames - 1) / 2)
493             frames_to_blur_forward = ((max_frames - 1) / 2);
494 
495         if (frames_to_blur_backward > (max_frames / 2))
496             frames_to_blur_backward = (max_frames / 2);
497 
498         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
499         break;
500     }
501 
502     start_frame = distance + frames_to_blur_forward;
503 
504     /* Setup frame pointers, NULL indicates frame not included in filter */
505     memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
506     for (frame = 0; frame < frames_to_blur; frame++)
507     {
508         int which_buffer =  start_frame - frame;
509         struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead,
510                                                          which_buffer,
511                                                          PEEK_FORWARD);
512         cpi->frames[frames_to_blur-1-frame] = &buf->img;
513     }
514 
515     vp8_temporal_filter_iterate_c (
516         cpi,
517         frames_to_blur,
518         frames_to_blur_backward,
519         strength );
520 }
521 #endif
522