1 /*
2  * VP7/VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Fiona Glaser
7  * Copyright (C) 2012 Daniel Kang
8  * Copyright (C) 2014 Peter Ross
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "libavutil/imgutils.h"
28 
29 #include "avcodec.h"
30 #include "hwconfig.h"
31 #include "internal.h"
32 #include "mathops.h"
33 #include "rectangle.h"
34 #include "thread.h"
35 #include "vp8.h"
36 #include "vp8data.h"
37 
38 #if ARCH_ARM
39 #   include "arm/vp8.h"
40 #endif
41 
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
48 #endif
49 
free_buffers(VP8Context * s)50 static void free_buffers(VP8Context *s)
51 {
52     int i;
53     if (s->thread_data)
54         for (i = 0; i < MAX_THREADS; i++) {
55 #if HAVE_THREADS
56             pthread_cond_destroy(&s->thread_data[i].cond);
57             pthread_mutex_destroy(&s->thread_data[i].lock);
58 #endif
59             av_freep(&s->thread_data[i].filter_strength);
60         }
61     av_freep(&s->thread_data);
62     av_freep(&s->macroblocks_base);
63     av_freep(&s->intra4x4_pred_mode_top);
64     av_freep(&s->top_nnz);
65     av_freep(&s->top_border);
66 
67     s->macroblocks = NULL;
68 }
69 
vp8_alloc_frame(VP8Context * s,VP8Frame * f,int ref)70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 {
72     int ret;
73     if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74                                     ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75         return ret;
76     if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
77         goto fail;
78     if (s->avctx->hwaccel) {
79         const AVHWAccel *hwaccel = s->avctx->hwaccel;
80         if (hwaccel->frame_priv_data_size) {
81             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82             if (!f->hwaccel_priv_buf)
83                 goto fail;
84             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
85         }
86     }
87     return 0;
88 
89 fail:
90     av_buffer_unref(&f->seg_map);
91     ff_thread_release_buffer(s->avctx, &f->tf);
92     return AVERROR(ENOMEM);
93 }
94 
vp8_release_frame(VP8Context * s,VP8Frame * f)95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
96 {
97     av_buffer_unref(&f->seg_map);
98     av_buffer_unref(&f->hwaccel_priv_buf);
99     f->hwaccel_picture_private = NULL;
100     ff_thread_release_buffer(s->avctx, &f->tf);
101 }
102 
103 #if CONFIG_VP8_DECODER
vp8_ref_frame(VP8Context * s,VP8Frame * dst,VP8Frame * src)104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
105 {
106     int ret;
107 
108     vp8_release_frame(s, dst);
109 
110     if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
111         return ret;
112     if (src->seg_map &&
113         !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114         vp8_release_frame(s, dst);
115         return AVERROR(ENOMEM);
116     }
117     if (src->hwaccel_picture_private) {
118         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119         if (!dst->hwaccel_priv_buf)
120             return AVERROR(ENOMEM);
121         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
122     }
123 
124     return 0;
125 }
126 #endif /* CONFIG_VP8_DECODER */
127 
vp8_decode_flush_impl(AVCodecContext * avctx,int free_mem)128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
129 {
130     VP8Context *s = avctx->priv_data;
131     int i;
132 
133     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134         vp8_release_frame(s, &s->frames[i]);
135     memset(s->framep, 0, sizeof(s->framep));
136 
137     if (free_mem)
138         free_buffers(s);
139 }
140 
vp8_decode_flush(AVCodecContext * avctx)141 static void vp8_decode_flush(AVCodecContext *avctx)
142 {
143     vp8_decode_flush_impl(avctx, 0);
144 }
145 
vp8_find_free_buffer(VP8Context * s)146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
147 {
148     VP8Frame *frame = NULL;
149     int i;
150 
151     // find a free buffer
152     for (i = 0; i < 5; i++)
153         if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT]  &&
154             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
156             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157             frame = &s->frames[i];
158             break;
159         }
160     if (i == 5) {
161         av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
162         abort();
163     }
164     if (frame->tf.f->buf[0])
165         vp8_release_frame(s, frame);
166 
167     return frame;
168 }
169 
get_pixel_format(VP8Context * s)170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
171 {
172     enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
174         AV_PIX_FMT_VAAPI,
175 #endif
176 #if CONFIG_VP8_NVDEC_HWACCEL
177         AV_PIX_FMT_CUDA,
178 #endif
179         AV_PIX_FMT_YUV420P,
180         AV_PIX_FMT_NONE,
181     };
182 
183     return ff_get_format(s->avctx, pix_fmts);
184 }
185 
186 static av_always_inline
update_dimensions(VP8Context * s,int width,int height,int is_vp7)187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
188 {
189     AVCodecContext *avctx = s->avctx;
190     int i, ret, dim_reset = 0;
191 
192     if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193         height != s->avctx->height) {
194         vp8_decode_flush_impl(s->avctx, 1);
195 
196         ret = ff_set_dimensions(s->avctx, width, height);
197         if (ret < 0)
198             return ret;
199 
200         dim_reset = (s->macroblocks_base != NULL);
201     }
202 
203     if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
204          !s->actually_webp && !is_vp7) {
205         s->pix_fmt = get_pixel_format(s);
206         if (s->pix_fmt < 0)
207             return AVERROR(EINVAL);
208         avctx->pix_fmt = s->pix_fmt;
209     }
210 
211     s->mb_width  = (s->avctx->coded_width  + 15) / 16;
212     s->mb_height = (s->avctx->coded_height + 15) / 16;
213 
214     s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
215                    avctx->thread_count > 1;
216     if (!s->mb_layout) { // Frame threading and one thread
217         s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
218                                                sizeof(*s->macroblocks));
219         s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
220     } else // Sliced threading
221         s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
222                                          sizeof(*s->macroblocks));
223     s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
224     s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
225     s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
226 
227     if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
228         !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
229         free_buffers(s);
230         return AVERROR(ENOMEM);
231     }
232 
233     for (i = 0; i < MAX_THREADS; i++) {
234         s->thread_data[i].filter_strength =
235             av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
236         if (!s->thread_data[i].filter_strength) {
237             free_buffers(s);
238             return AVERROR(ENOMEM);
239         }
240 #if HAVE_THREADS
241         pthread_mutex_init(&s->thread_data[i].lock, NULL);
242         pthread_cond_init(&s->thread_data[i].cond, NULL);
243 #endif
244     }
245 
246     s->macroblocks = s->macroblocks_base + 1;
247 
248     return 0;
249 }
250 
vp7_update_dimensions(VP8Context * s,int width,int height)251 static int vp7_update_dimensions(VP8Context *s, int width, int height)
252 {
253     return update_dimensions(s, width, height, IS_VP7);
254 }
255 
vp8_update_dimensions(VP8Context * s,int width,int height)256 static int vp8_update_dimensions(VP8Context *s, int width, int height)
257 {
258     return update_dimensions(s, width, height, IS_VP8);
259 }
260 
261 
parse_segment_info(VP8Context * s)262 static void parse_segment_info(VP8Context *s)
263 {
264     VP56RangeCoder *c = &s->c;
265     int i;
266 
267     s->segmentation.update_map = vp8_rac_get(c);
268     s->segmentation.update_feature_data = vp8_rac_get(c);
269 
270     if (s->segmentation.update_feature_data) {
271         s->segmentation.absolute_vals = vp8_rac_get(c);
272 
273         for (i = 0; i < 4; i++)
274             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
275 
276         for (i = 0; i < 4; i++)
277             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
278     }
279     if (s->segmentation.update_map)
280         for (i = 0; i < 3; i++)
281             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
282 }
283 
update_lf_deltas(VP8Context * s)284 static void update_lf_deltas(VP8Context *s)
285 {
286     VP56RangeCoder *c = &s->c;
287     int i;
288 
289     for (i = 0; i < 4; i++) {
290         if (vp8_rac_get(c)) {
291             s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
292 
293             if (vp8_rac_get(c))
294                 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295         }
296     }
297 
298     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
299         if (vp8_rac_get(c)) {
300             s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
301 
302             if (vp8_rac_get(c))
303                 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
304         }
305     }
306 }
307 
setup_partitions(VP8Context * s,const uint8_t * buf,int buf_size)308 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
309 {
310     const uint8_t *sizes = buf;
311     int i;
312     int ret;
313 
314     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
315 
316     buf      += 3 * (s->num_coeff_partitions - 1);
317     buf_size -= 3 * (s->num_coeff_partitions - 1);
318     if (buf_size < 0)
319         return -1;
320 
321     for (i = 0; i < s->num_coeff_partitions - 1; i++) {
322         int size = AV_RL24(sizes + 3 * i);
323         if (buf_size - size < 0)
324             return -1;
325         s->coeff_partition_size[i] = size;
326 
327         ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
328         if (ret < 0)
329             return ret;
330         buf      += size;
331         buf_size -= size;
332     }
333 
334     s->coeff_partition_size[i] = buf_size;
335     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
336 
337     return 0;
338 }
339 
vp7_get_quants(VP8Context * s)340 static void vp7_get_quants(VP8Context *s)
341 {
342     VP56RangeCoder *c = &s->c;
343 
344     int yac_qi  = vp8_rac_get_uint(c, 7);
345     int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346     int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347     int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348     int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349     int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350 
351     s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
352     s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
353     s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
354     s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
355     s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
356     s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
357 }
358 
vp8_get_quants(VP8Context * s)359 static void vp8_get_quants(VP8Context *s)
360 {
361     VP56RangeCoder *c = &s->c;
362     int i, base_qi;
363 
364     s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
365     s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
366     s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
367     s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
368     s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
369     s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
370 
371     for (i = 0; i < 4; i++) {
372         if (s->segmentation.enabled) {
373             base_qi = s->segmentation.base_quant[i];
374             if (!s->segmentation.absolute_vals)
375                 base_qi += s->quant.yac_qi;
376         } else
377             base_qi = s->quant.yac_qi;
378 
379         s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
380         s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
381         s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
382         /* 101581>>16 is equivalent to 155/100 */
383         s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
384         s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
385         s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
386 
387         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
388         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
389     }
390 }
391 
392 /**
393  * Determine which buffers golden and altref should be updated with after this frame.
394  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
395  *
396  * Intra frames update all 3 references
397  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
398  * If the update (golden|altref) flag is set, it's updated with the current frame
399  *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
400  * If the flag is not set, the number read means:
401  *      0: no update
402  *      1: VP56_FRAME_PREVIOUS
403  *      2: update golden with altref, or update altref with golden
404  */
ref_to_update(VP8Context * s,int update,VP56Frame ref)405 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
406 {
407     VP56RangeCoder *c = &s->c;
408 
409     if (update)
410         return VP56_FRAME_CURRENT;
411 
412     switch (vp8_rac_get_uint(c, 2)) {
413     case 1:
414         return VP56_FRAME_PREVIOUS;
415     case 2:
416         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
417     }
418     return VP56_FRAME_NONE;
419 }
420 
vp78_reset_probability_tables(VP8Context * s)421 static void vp78_reset_probability_tables(VP8Context *s)
422 {
423     int i, j;
424     for (i = 0; i < 4; i++)
425         for (j = 0; j < 16; j++)
426             memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
427                    sizeof(s->prob->token[i][j]));
428 }
429 
vp78_update_probability_tables(VP8Context * s)430 static void vp78_update_probability_tables(VP8Context *s)
431 {
432     VP56RangeCoder *c = &s->c;
433     int i, j, k, l, m;
434 
435     for (i = 0; i < 4; i++)
436         for (j = 0; j < 8; j++)
437             for (k = 0; k < 3; k++)
438                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
439                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
440                         int prob = vp8_rac_get_uint(c, 8);
441                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
442                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443                     }
444 }
445 
446 #define VP7_MVC_SIZE 17
447 #define VP8_MVC_SIZE 19
448 
vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context * s,int mvc_size)449 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
450                                                             int mvc_size)
451 {
452     VP56RangeCoder *c = &s->c;
453     int i, j;
454 
455     if (vp8_rac_get(c))
456         for (i = 0; i < 4; i++)
457             s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
458     if (vp8_rac_get(c))
459         for (i = 0; i < 3; i++)
460             s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
461 
462     // 17.2 MV probability update
463     for (i = 0; i < 2; i++)
464         for (j = 0; j < mvc_size; j++)
465             if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
466                 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
467 }
468 
update_refs(VP8Context * s)469 static void update_refs(VP8Context *s)
470 {
471     VP56RangeCoder *c = &s->c;
472 
473     int update_golden = vp8_rac_get(c);
474     int update_altref = vp8_rac_get(c);
475 
476     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
477     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
478 }
479 
copy_chroma(AVFrame * dst,AVFrame * src,int width,int height)480 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 {
482     int i, j;
483 
484     for (j = 1; j < 3; j++) {
485         for (i = 0; i < height / 2; i++)
486             memcpy(dst->data[j] + i * dst->linesize[j],
487                    src->data[j] + i * src->linesize[j], width / 2);
488     }
489 }
490 
fade(uint8_t * dst,ptrdiff_t dst_linesize,const uint8_t * src,ptrdiff_t src_linesize,int width,int height,int alpha,int beta)491 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
492                  const uint8_t *src, ptrdiff_t src_linesize,
493                  int width, int height,
494                  int alpha, int beta)
495 {
496     int i, j;
497     for (j = 0; j < height; j++) {
498         const uint8_t *src2 = src + j * src_linesize;
499         uint8_t *dst2 = dst + j * dst_linesize;
500         for (i = 0; i < width; i++) {
501             uint8_t y = src2[i];
502             dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
503         }
504     }
505 }
506 
vp7_fade_frame(VP8Context * s,int alpha,int beta)507 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
508 {
509     int ret;
510 
511     if (!s->keyframe && (alpha || beta)) {
512         int width  = s->mb_width * 16;
513         int height = s->mb_height * 16;
514         AVFrame *src, *dst;
515 
516         if (!s->framep[VP56_FRAME_PREVIOUS] ||
517             !s->framep[VP56_FRAME_GOLDEN]) {
518             av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
519             return AVERROR_INVALIDDATA;
520         }
521 
522         dst =
523         src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
524 
525         /* preserve the golden frame, write a new previous frame */
526         if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
527             s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
528             if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
529                 return ret;
530 
531             dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
532 
533             copy_chroma(dst, src, width, height);
534         }
535 
536         fade(dst->data[0], dst->linesize[0],
537              src->data[0], src->linesize[0],
538              width, height, alpha, beta);
539     }
540 
541     return 0;
542 }
543 
vp7_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)544 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
545 {
546     VP56RangeCoder *c = &s->c;
547     int part1_size, hscale, vscale, i, j, ret;
548     int width  = s->avctx->width;
549     int height = s->avctx->height;
550     int alpha = 0;
551     int beta  = 0;
552 
553     if (buf_size < 4) {
554         return AVERROR_INVALIDDATA;
555     }
556 
557     s->profile = (buf[0] >> 1) & 7;
558     if (s->profile > 1) {
559         avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
560         return AVERROR_INVALIDDATA;
561     }
562 
563     s->keyframe  = !(buf[0] & 1);
564     s->invisible = 0;
565     part1_size   = AV_RL24(buf) >> 4;
566 
567     if (buf_size < 4 - s->profile + part1_size) {
568         av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
569         return AVERROR_INVALIDDATA;
570     }
571 
572     buf      += 4 - s->profile;
573     buf_size -= 4 - s->profile;
574 
575     memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
576 
577     ret = ff_vp56_init_range_decoder(c, buf, part1_size);
578     if (ret < 0)
579         return ret;
580     buf      += part1_size;
581     buf_size -= part1_size;
582 
583     /* A. Dimension information (keyframes only) */
584     if (s->keyframe) {
585         width  = vp8_rac_get_uint(c, 12);
586         height = vp8_rac_get_uint(c, 12);
587         hscale = vp8_rac_get_uint(c, 2);
588         vscale = vp8_rac_get_uint(c, 2);
589         if (hscale || vscale)
590             avpriv_request_sample(s->avctx, "Upscaling");
591 
592         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
593         vp78_reset_probability_tables(s);
594         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
595                sizeof(s->prob->pred16x16));
596         memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
597                sizeof(s->prob->pred8x8c));
598         for (i = 0; i < 2; i++)
599             memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
600                    sizeof(vp7_mv_default_prob[i]));
601         memset(&s->segmentation, 0, sizeof(s->segmentation));
602         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
603         memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
604     }
605 
606     if (s->keyframe || s->profile > 0)
607         memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
608 
609     /* B. Decoding information for all four macroblock-level features */
610     for (i = 0; i < 4; i++) {
611         s->feature_enabled[i] = vp8_rac_get(c);
612         if (s->feature_enabled[i]) {
613              s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
614 
615              for (j = 0; j < 3; j++)
616                  s->feature_index_prob[i][j] =
617                      vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
618 
619              if (vp7_feature_value_size[s->profile][i])
620                  for (j = 0; j < 4; j++)
621                      s->feature_value[i][j] =
622                         vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
623         }
624     }
625 
626     s->segmentation.enabled    = 0;
627     s->segmentation.update_map = 0;
628     s->lf_delta.enabled        = 0;
629 
630     s->num_coeff_partitions = 1;
631     ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
632     if (ret < 0)
633         return ret;
634 
635     if (!s->macroblocks_base || /* first frame */
636         width != s->avctx->width || height != s->avctx->height ||
637         (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
638         if ((ret = vp7_update_dimensions(s, width, height)) < 0)
639             return ret;
640     }
641 
642     /* C. Dequantization indices */
643     vp7_get_quants(s);
644 
645     /* D. Golden frame update flag (a Flag) for interframes only */
646     if (!s->keyframe) {
647         s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
648         s->sign_bias[VP56_FRAME_GOLDEN] = 0;
649     }
650 
651     s->update_last          = 1;
652     s->update_probabilities = 1;
653     s->fade_present         = 1;
654 
655     if (s->profile > 0) {
656         s->update_probabilities = vp8_rac_get(c);
657         if (!s->update_probabilities)
658             s->prob[1] = s->prob[0];
659 
660         if (!s->keyframe)
661             s->fade_present = vp8_rac_get(c);
662     }
663 
664     if (vpX_rac_is_end(c))
665         return AVERROR_INVALIDDATA;
666     /* E. Fading information for previous frame */
667     if (s->fade_present && vp8_rac_get(c)) {
668         alpha = (int8_t) vp8_rac_get_uint(c, 8);
669         beta  = (int8_t) vp8_rac_get_uint(c, 8);
670     }
671 
672     /* F. Loop filter type */
673     if (!s->profile)
674         s->filter.simple = vp8_rac_get(c);
675 
676     /* G. DCT coefficient ordering specification */
677     if (vp8_rac_get(c))
678         for (i = 1; i < 16; i++)
679             s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
680 
681     /* H. Loop filter levels  */
682     if (s->profile > 0)
683         s->filter.simple = vp8_rac_get(c);
684     s->filter.level     = vp8_rac_get_uint(c, 6);
685     s->filter.sharpness = vp8_rac_get_uint(c, 3);
686 
687     /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
688     vp78_update_probability_tables(s);
689 
690     s->mbskip_enabled = 0;
691 
692     /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
693     if (!s->keyframe) {
694         s->prob->intra  = vp8_rac_get_uint(c, 8);
695         s->prob->last   = vp8_rac_get_uint(c, 8);
696         vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
697     }
698 
699     if (vpX_rac_is_end(c))
700         return AVERROR_INVALIDDATA;
701 
702     if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
703         return ret;
704 
705     return 0;
706 }
707 
vp8_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)708 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
709 {
710     VP56RangeCoder *c = &s->c;
711     int header_size, hscale, vscale, ret;
712     int width  = s->avctx->width;
713     int height = s->avctx->height;
714 
715     if (buf_size < 3) {
716         av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
717         return AVERROR_INVALIDDATA;
718     }
719 
720     s->keyframe  = !(buf[0] & 1);
721     s->profile   =  (buf[0]>>1) & 7;
722     s->invisible = !(buf[0] & 0x10);
723     header_size  = AV_RL24(buf) >> 5;
724     buf      += 3;
725     buf_size -= 3;
726 
727     s->header_partition_size = header_size;
728 
729     if (s->profile > 3)
730         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
731 
732     if (!s->profile)
733         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
734                sizeof(s->put_pixels_tab));
735     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
736         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
737                sizeof(s->put_pixels_tab));
738 
739     if (header_size > buf_size - 7 * s->keyframe) {
740         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
741         return AVERROR_INVALIDDATA;
742     }
743 
744     if (s->keyframe) {
745         if (AV_RL24(buf) != 0x2a019d) {
746             av_log(s->avctx, AV_LOG_ERROR,
747                    "Invalid start code 0x%x\n", AV_RL24(buf));
748             return AVERROR_INVALIDDATA;
749         }
750         width     = AV_RL16(buf + 3) & 0x3fff;
751         height    = AV_RL16(buf + 5) & 0x3fff;
752         hscale    = buf[4] >> 6;
753         vscale    = buf[6] >> 6;
754         buf      += 7;
755         buf_size -= 7;
756 
757         if (hscale || vscale)
758             avpriv_request_sample(s->avctx, "Upscaling");
759 
760         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
761         vp78_reset_probability_tables(s);
762         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
763                sizeof(s->prob->pred16x16));
764         memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
765                sizeof(s->prob->pred8x8c));
766         memcpy(s->prob->mvc, vp8_mv_default_prob,
767                sizeof(s->prob->mvc));
768         memset(&s->segmentation, 0, sizeof(s->segmentation));
769         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
770     }
771 
772     ret = ff_vp56_init_range_decoder(c, buf, header_size);
773     if (ret < 0)
774         return ret;
775     buf      += header_size;
776     buf_size -= header_size;
777 
778     if (s->keyframe) {
779         s->colorspace = vp8_rac_get(c);
780         if (s->colorspace)
781             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
782         s->fullrange = vp8_rac_get(c);
783     }
784 
785     if ((s->segmentation.enabled = vp8_rac_get(c)))
786         parse_segment_info(s);
787     else
788         s->segmentation.update_map = 0; // FIXME: move this to some init function?
789 
790     s->filter.simple    = vp8_rac_get(c);
791     s->filter.level     = vp8_rac_get_uint(c, 6);
792     s->filter.sharpness = vp8_rac_get_uint(c, 3);
793 
794     if ((s->lf_delta.enabled = vp8_rac_get(c))) {
795         s->lf_delta.update = vp8_rac_get(c);
796         if (s->lf_delta.update)
797             update_lf_deltas(s);
798     }
799 
800     if (setup_partitions(s, buf, buf_size)) {
801         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
802         return AVERROR_INVALIDDATA;
803     }
804 
805     if (!s->macroblocks_base || /* first frame */
806         width != s->avctx->width || height != s->avctx->height ||
807         (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
808         if ((ret = vp8_update_dimensions(s, width, height)) < 0)
809             return ret;
810 
811     vp8_get_quants(s);
812 
813     if (!s->keyframe) {
814         update_refs(s);
815         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
816         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
817     }
818 
819     // if we aren't saving this frame's probabilities for future frames,
820     // make a copy of the current probabilities
821     if (!(s->update_probabilities = vp8_rac_get(c)))
822         s->prob[1] = s->prob[0];
823 
824     s->update_last = s->keyframe || vp8_rac_get(c);
825 
826     vp78_update_probability_tables(s);
827 
828     if ((s->mbskip_enabled = vp8_rac_get(c)))
829         s->prob->mbskip = vp8_rac_get_uint(c, 8);
830 
831     if (!s->keyframe) {
832         s->prob->intra  = vp8_rac_get_uint(c, 8);
833         s->prob->last   = vp8_rac_get_uint(c, 8);
834         s->prob->golden = vp8_rac_get_uint(c, 8);
835         vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
836     }
837 
838     // Record the entropy coder state here so that hwaccels can use it.
839     s->c.code_word = vp56_rac_renorm(&s->c);
840     s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
841     s->coder_state_at_header_end.range     = s->c.high;
842     s->coder_state_at_header_end.value     = s->c.code_word >> 16;
843     s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
844 
845     return 0;
846 }
847 
848 static av_always_inline
clamp_mv(VP8mvbounds * s,VP56mv * dst,const VP56mv * src)849 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
850 {
851     dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
852                              av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
853     dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
854                              av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
855 }
856 
857 /**
858  * Motion vector coding, 17.1.
859  */
read_mv_component(VP56RangeCoder * c,const uint8_t * p,int vp7)860 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
861 {
862     int bit, x = 0;
863 
864     if (vp56_rac_get_prob_branchy(c, p[0])) {
865         int i;
866 
867         for (i = 0; i < 3; i++)
868             x += vp56_rac_get_prob(c, p[9 + i]) << i;
869         for (i = (vp7 ? 7 : 9); i > 3; i--)
870             x += vp56_rac_get_prob(c, p[9 + i]) << i;
871         if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
872             x += 8;
873     } else {
874         // small_mvtree
875         const uint8_t *ps = p + 2;
876         bit = vp56_rac_get_prob(c, *ps);
877         ps += 1 + 3 * bit;
878         x  += 4 * bit;
879         bit = vp56_rac_get_prob(c, *ps);
880         ps += 1 + bit;
881         x  += 2 * bit;
882         x  += vp56_rac_get_prob(c, *ps);
883     }
884 
885     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
886 }
887 
vp7_read_mv_component(VP56RangeCoder * c,const uint8_t * p)888 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
889 {
890     return read_mv_component(c, p, 1);
891 }
892 
vp8_read_mv_component(VP56RangeCoder * c,const uint8_t * p)893 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
894 {
895     return read_mv_component(c, p, 0);
896 }
897 
898 static av_always_inline
get_submv_prob(uint32_t left,uint32_t top,int is_vp7)899 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
900 {
901     if (is_vp7)
902         return vp7_submv_prob;
903 
904     if (left == top)
905         return vp8_submv_prob[4 - !!left];
906     if (!top)
907         return vp8_submv_prob[2];
908     return vp8_submv_prob[1 - !!left];
909 }
910 
911 /**
912  * Split motion vector prediction, 16.4.
913  * @returns the number of motion vectors parsed (2, 4 or 16)
914  */
915 static av_always_inline
decode_splitmvs(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int layout,int is_vp7)916 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
917                     int layout, int is_vp7)
918 {
919     int part_idx;
920     int n, num;
921     VP8Macroblock *top_mb;
922     VP8Macroblock *left_mb = &mb[-1];
923     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
924     const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
925     VP56mv *top_mv;
926     VP56mv *left_mv = left_mb->bmv;
927     VP56mv *cur_mv  = mb->bmv;
928 
929     if (!layout) // layout is inlined, s->mb_layout is not
930         top_mb = &mb[2];
931     else
932         top_mb = &mb[-s->mb_width - 1];
933     mbsplits_top = vp8_mbsplits[top_mb->partitioning];
934     top_mv       = top_mb->bmv;
935 
936     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
937         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
938             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
939         else
940             part_idx = VP8_SPLITMVMODE_8x8;
941     } else {
942         part_idx = VP8_SPLITMVMODE_4x4;
943     }
944 
945     num              = vp8_mbsplit_count[part_idx];
946     mbsplits_cur     = vp8_mbsplits[part_idx],
947     firstidx         = vp8_mbfirstidx[part_idx];
948     mb->partitioning = part_idx;
949 
950     for (n = 0; n < num; n++) {
951         int k = firstidx[n];
952         uint32_t left, above;
953         const uint8_t *submv_prob;
954 
955         if (!(k & 3))
956             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
957         else
958             left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
959         if (k <= 3)
960             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
961         else
962             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
963 
964         submv_prob = get_submv_prob(left, above, is_vp7);
965 
966         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
967             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
968                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
969                     mb->bmv[n].y = mb->mv.y +
970                                    read_mv_component(c, s->prob->mvc[0], is_vp7);
971                     mb->bmv[n].x = mb->mv.x +
972                                    read_mv_component(c, s->prob->mvc[1], is_vp7);
973                 } else {
974                     AV_ZERO32(&mb->bmv[n]);
975                 }
976             } else {
977                 AV_WN32A(&mb->bmv[n], above);
978             }
979         } else {
980             AV_WN32A(&mb->bmv[n], left);
981         }
982     }
983 
984     return num;
985 }
986 
987 /**
988  * The vp7 reference decoder uses a padding macroblock column (added to right
989  * edge of the frame) to guard against illegal macroblock offsets. The
990  * algorithm has bugs that permit offsets to straddle the padding column.
991  * This function replicates those bugs.
992  *
993  * @param[out] edge_x macroblock x address
994  * @param[out] edge_y macroblock y address
995  *
996  * @return macroblock offset legal (boolean)
997  */
vp7_calculate_mb_offset(int mb_x,int mb_y,int mb_width,int xoffset,int yoffset,int boundary,int * edge_x,int * edge_y)998 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
999                                    int xoffset, int yoffset, int boundary,
1000                                    int *edge_x, int *edge_y)
1001 {
1002     int vwidth = mb_width + 1;
1003     int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1004     if (new < boundary || new % vwidth == vwidth - 1)
1005         return 0;
1006     *edge_y = new / vwidth;
1007     *edge_x = new % vwidth;
1008     return 1;
1009 }
1010 
get_bmv_ptr(const VP8Macroblock * mb,int subblock)1011 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1012 {
1013     return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1014 }
1015 
1016 static av_always_inline
vp7_decode_mvs(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1017 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1018                     int mb_x, int mb_y, int layout)
1019 {
1020     VP8Macroblock *mb_edge[12];
1021     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1022     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1023     int idx = CNT_ZERO;
1024     VP56mv near_mv[3];
1025     uint8_t cnt[3] = { 0 };
1026     VP56RangeCoder *c = &s->c;
1027     int i;
1028 
1029     AV_ZERO32(&near_mv[0]);
1030     AV_ZERO32(&near_mv[1]);
1031     AV_ZERO32(&near_mv[2]);
1032 
1033     for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1034         const VP7MVPred * pred = &vp7_mv_pred[i];
1035         int edge_x, edge_y;
1036 
1037         if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1038                                     pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1039             VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1040                                              ? s->macroblocks_base + 1 + edge_x +
1041                                                (s->mb_width + 1) * (edge_y + 1)
1042                                              : s->macroblocks + edge_x +
1043                                                (s->mb_height - edge_y - 1) * 2;
1044             uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1045             if (mv) {
1046                 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1047                     if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1048                         idx = CNT_NEAREST;
1049                     } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1050                         if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1051                             continue;
1052                         idx = CNT_NEAR;
1053                     } else {
1054                         AV_WN32A(&near_mv[CNT_NEAR], mv);
1055                         idx = CNT_NEAR;
1056                     }
1057                 } else {
1058                     AV_WN32A(&near_mv[CNT_NEAREST], mv);
1059                     idx = CNT_NEAREST;
1060                 }
1061             } else {
1062                 idx = CNT_ZERO;
1063             }
1064         } else {
1065             idx = CNT_ZERO;
1066         }
1067         cnt[idx] += vp7_mv_pred[i].score;
1068     }
1069 
1070     mb->partitioning = VP8_SPLITMVMODE_NONE;
1071 
1072     if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1073         mb->mode = VP8_MVMODE_MV;
1074 
1075         if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1076 
1077             if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1078 
1079                 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1080                     AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1081                 else
1082                     AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1083 
1084                 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1085                     mb->mode = VP8_MVMODE_SPLIT;
1086                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1087                 } else {
1088                     mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1089                     mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1090                     mb->bmv[0] = mb->mv;
1091                 }
1092             } else {
1093                 mb->mv = near_mv[CNT_NEAR];
1094                 mb->bmv[0] = mb->mv;
1095             }
1096         } else {
1097             mb->mv = near_mv[CNT_NEAREST];
1098             mb->bmv[0] = mb->mv;
1099         }
1100     } else {
1101         mb->mode = VP8_MVMODE_ZERO;
1102         AV_ZERO32(&mb->mv);
1103         mb->bmv[0] = mb->mv;
1104     }
1105 }
1106 
1107 static av_always_inline
vp8_decode_mvs(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1108 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1109                     int mb_x, int mb_y, int layout)
1110 {
1111     VP8Macroblock *mb_edge[3] = { 0      /* top */,
1112                                   mb - 1 /* left */,
1113                                   0      /* top-left */ };
1114     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1115     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1116     int idx = CNT_ZERO;
1117     int cur_sign_bias = s->sign_bias[mb->ref_frame];
1118     int8_t *sign_bias = s->sign_bias;
1119     VP56mv near_mv[4];
1120     uint8_t cnt[4] = { 0 };
1121     VP56RangeCoder *c = &s->c;
1122 
1123     if (!layout) { // layout is inlined (s->mb_layout is not)
1124         mb_edge[0] = mb + 2;
1125         mb_edge[2] = mb + 1;
1126     } else {
1127         mb_edge[0] = mb - s->mb_width - 1;
1128         mb_edge[2] = mb - s->mb_width - 2;
1129     }
1130 
1131     AV_ZERO32(&near_mv[0]);
1132     AV_ZERO32(&near_mv[1]);
1133     AV_ZERO32(&near_mv[2]);
1134 
1135     /* Process MB on top, left and top-left */
1136 #define MV_EDGE_CHECK(n)                                                      \
1137     {                                                                         \
1138         VP8Macroblock *edge = mb_edge[n];                                     \
1139         int edge_ref = edge->ref_frame;                                       \
1140         if (edge_ref != VP56_FRAME_CURRENT) {                                 \
1141             uint32_t mv = AV_RN32A(&edge->mv);                                \
1142             if (mv) {                                                         \
1143                 if (cur_sign_bias != sign_bias[edge_ref]) {                   \
1144                     /* SWAR negate of the values in mv. */                    \
1145                     mv = ~mv;                                                 \
1146                     mv = ((mv & 0x7fff7fff) +                                 \
1147                           0x00010001) ^ (mv & 0x80008000);                    \
1148                 }                                                             \
1149                 if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
1150                     AV_WN32A(&near_mv[++idx], mv);                            \
1151                 cnt[idx] += 1 + (n != 2);                                     \
1152             } else                                                            \
1153                 cnt[CNT_ZERO] += 1 + (n != 2);                                \
1154         }                                                                     \
1155     }
1156 
1157     MV_EDGE_CHECK(0)
1158     MV_EDGE_CHECK(1)
1159     MV_EDGE_CHECK(2)
1160 
1161     mb->partitioning = VP8_SPLITMVMODE_NONE;
1162     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1163         mb->mode = VP8_MVMODE_MV;
1164 
1165         /* If we have three distinct MVs, merge first and last if they're the same */
1166         if (cnt[CNT_SPLITMV] &&
1167             AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1168             cnt[CNT_NEAREST] += 1;
1169 
1170         /* Swap near and nearest if necessary */
1171         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1172             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1173             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1174         }
1175 
1176         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1177             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1178                 /* Choose the best mv out of 0,0 and the nearest mv */
1179                 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1180                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1181                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1182                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1183 
1184                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1185                     mb->mode = VP8_MVMODE_SPLIT;
1186                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1187                 } else {
1188                     mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
1189                     mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
1190                     mb->bmv[0] = mb->mv;
1191                 }
1192             } else {
1193                 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1194                 mb->bmv[0] = mb->mv;
1195             }
1196         } else {
1197             clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1198             mb->bmv[0] = mb->mv;
1199         }
1200     } else {
1201         mb->mode = VP8_MVMODE_ZERO;
1202         AV_ZERO32(&mb->mv);
1203         mb->bmv[0] = mb->mv;
1204     }
1205 }
1206 
1207 static av_always_inline
decode_intra4x4_modes(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int mb_x,int keyframe,int layout)1208 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1209                            int mb_x, int keyframe, int layout)
1210 {
1211     uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1212 
1213     if (layout) {
1214         VP8Macroblock *mb_top = mb - s->mb_width - 1;
1215         memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1216     }
1217     if (keyframe) {
1218         int x, y;
1219         uint8_t *top;
1220         uint8_t *const left = s->intra4x4_pred_mode_left;
1221         if (layout)
1222             top = mb->intra4x4_pred_mode_top;
1223         else
1224             top = s->intra4x4_pred_mode_top + 4 * mb_x;
1225         for (y = 0; y < 4; y++) {
1226             for (x = 0; x < 4; x++) {
1227                 const uint8_t *ctx;
1228                 ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
1229                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1230                 left[y]   = top[x] = *intra4x4;
1231                 intra4x4++;
1232             }
1233         }
1234     } else {
1235         int i;
1236         for (i = 0; i < 16; i++)
1237             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1238                                            vp8_pred4x4_prob_inter);
1239     }
1240 }
1241 
1242 static av_always_inline
decode_mb_mode(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,uint8_t * segment,uint8_t * ref,int layout,int is_vp7)1243 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1244                     VP8Macroblock *mb, int mb_x, int mb_y,
1245                     uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1246 {
1247     VP56RangeCoder *c = &s->c;
1248     static const char * const vp7_feature_name[] = { "q-index",
1249                                                      "lf-delta",
1250                                                      "partial-golden-update",
1251                                                      "blit-pitch" };
1252     if (is_vp7) {
1253         int i;
1254         *segment = 0;
1255         for (i = 0; i < 4; i++) {
1256             if (s->feature_enabled[i]) {
1257                 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1258                       int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1259                                                    s->feature_index_prob[i]);
1260                       av_log(s->avctx, AV_LOG_WARNING,
1261                              "Feature %s present in macroblock (value 0x%x)\n",
1262                              vp7_feature_name[i], s->feature_value[i][index]);
1263                 }
1264            }
1265         }
1266     } else if (s->segmentation.update_map) {
1267         int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1268         *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1269     } else if (s->segmentation.enabled)
1270         *segment = ref ? *ref : *segment;
1271     mb->segment = *segment;
1272 
1273     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1274 
1275     if (s->keyframe) {
1276         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1277                                     vp8_pred16x16_prob_intra);
1278 
1279         if (mb->mode == MODE_I4x4) {
1280             decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1281         } else {
1282             const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1283                                            : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1284             if (s->mb_layout)
1285                 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1286             else
1287                 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1288             AV_WN32A(s->intra4x4_pred_mode_left, modes);
1289         }
1290 
1291         mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1292                                                 vp8_pred8x8c_prob_intra);
1293         mb->ref_frame        = VP56_FRAME_CURRENT;
1294     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1295         // inter MB, 16.2
1296         if (vp56_rac_get_prob_branchy(c, s->prob->last))
1297             mb->ref_frame =
1298                 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1299                                                                    : VP56_FRAME_GOLDEN;
1300         else
1301             mb->ref_frame = VP56_FRAME_PREVIOUS;
1302         s->ref_count[mb->ref_frame - 1]++;
1303 
1304         // motion vectors, 16.3
1305         if (is_vp7)
1306             vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1307         else
1308             vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1309     } else {
1310         // intra MB, 16.1
1311         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1312 
1313         if (mb->mode == MODE_I4x4)
1314             decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1315 
1316         mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1317                                                 s->prob->pred8x8c);
1318         mb->ref_frame        = VP56_FRAME_CURRENT;
1319         mb->partitioning     = VP8_SPLITMVMODE_NONE;
1320         AV_ZERO32(&mb->bmv[0]);
1321     }
1322 }
1323 
1324 /**
1325  * @param r     arithmetic bitstream reader context
1326  * @param block destination for block coefficients
1327  * @param probs probabilities to use when reading trees from the bitstream
1328  * @param i     initial coeff index, 0 unless a separate DC block is coded
1329  * @param qmul  array holding the dc/ac dequant factor at position 0/1
1330  *
1331  * @return 0 if no coeffs were decoded
1332  *         otherwise, the index of the last coeff decoded plus one
1333  */
1334 static av_always_inline
decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16],int vp7)1335 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1336                                  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1337                                  int i, uint8_t *token_prob, int16_t qmul[2],
1338                                  const uint8_t scan[16], int vp7)
1339 {
1340     VP56RangeCoder c = *r;
1341     goto skip_eob;
1342     do {
1343         int coeff;
1344 restart:
1345         if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1346             break;
1347 
1348 skip_eob:
1349         if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1350             if (++i == 16)
1351                 break; // invalid input; blocks should end with EOB
1352             token_prob = probs[i][0];
1353             if (vp7)
1354                 goto restart;
1355             goto skip_eob;
1356         }
1357 
1358         if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1359             coeff = 1;
1360             token_prob = probs[i + 1][1];
1361         } else {
1362             if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1363                 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1364                 if (coeff)
1365                     coeff += vp56_rac_get_prob(&c, token_prob[5]);
1366                 coeff += 2;
1367             } else {
1368                 // DCT_CAT*
1369                 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1370                     if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1371                         coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1372                     } else {                                    // DCT_CAT2
1373                         coeff  = 7;
1374                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1375                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1376                     }
1377                 } else {    // DCT_CAT3 and up
1378                     int a   = vp56_rac_get_prob(&c, token_prob[8]);
1379                     int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
1380                     int cat = (a << 1) + b;
1381                     coeff  = 3 + (8 << cat);
1382                     coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1383                 }
1384             }
1385             token_prob = probs[i + 1][2];
1386         }
1387         block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1388     } while (++i < 16);
1389 
1390     *r = c;
1391     return i;
1392 }
1393 
1394 static av_always_inline
inter_predict_dc(int16_t block[16],int16_t pred[2])1395 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1396 {
1397     int16_t dc = block[0];
1398     int ret = 0;
1399 
1400     if (pred[1] > 3) {
1401         dc += pred[0];
1402         ret = 1;
1403     }
1404 
1405     if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1406         block[0] = pred[0] = dc;
1407         pred[1] = 0;
1408     } else {
1409         if (pred[0] == dc)
1410             pred[1]++;
1411         block[0] = pred[0] = dc;
1412     }
1413 
1414     return ret;
1415 }
1416 
vp7_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16])1417 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1418                                             int16_t block[16],
1419                                             uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1420                                             int i, uint8_t *token_prob,
1421                                             int16_t qmul[2],
1422                                             const uint8_t scan[16])
1423 {
1424     return decode_block_coeffs_internal(r, block, probs, i,
1425                                         token_prob, qmul, scan, IS_VP7);
1426 }
1427 
1428 #ifndef vp8_decode_block_coeffs_internal
vp8_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2])1429 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1430                                             int16_t block[16],
1431                                             uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1432                                             int i, uint8_t *token_prob,
1433                                             int16_t qmul[2])
1434 {
1435     return decode_block_coeffs_internal(r, block, probs, i,
1436                                         token_prob, qmul, ff_zigzag_scan, IS_VP8);
1437 }
1438 #endif
1439 
1440 /**
1441  * @param c          arithmetic bitstream reader context
1442  * @param block      destination for block coefficients
1443  * @param probs      probabilities to use when reading trees from the bitstream
1444  * @param i          initial coeff index, 0 unless a separate DC block is coded
1445  * @param zero_nhood the initial prediction context for number of surrounding
1446  *                   all-zero blocks (only left/top, so 0-2)
1447  * @param qmul       array holding the dc/ac dequant factor at position 0/1
1448  * @param scan       scan pattern (VP7 only)
1449  *
1450  * @return 0 if no coeffs were decoded
1451  *         otherwise, the index of the last coeff decoded plus one
1452  */
1453 static av_always_inline
decode_block_coeffs(VP56RangeCoder * c,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,int zero_nhood,int16_t qmul[2],const uint8_t scan[16],int vp7)1454 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1455                         uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1456                         int i, int zero_nhood, int16_t qmul[2],
1457                         const uint8_t scan[16], int vp7)
1458 {
1459     uint8_t *token_prob = probs[i][zero_nhood];
1460     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1461         return 0;
1462     return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1463                                                   token_prob, qmul, scan)
1464                : vp8_decode_block_coeffs_internal(c, block, probs, i,
1465                                                   token_prob, qmul);
1466 }
1467 
1468 static av_always_inline
decode_mb_coeffs(VP8Context * s,VP8ThreadData * td,VP56RangeCoder * c,VP8Macroblock * mb,uint8_t t_nnz[9],uint8_t l_nnz[9],int is_vp7)1469 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1470                       VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1471                       int is_vp7)
1472 {
1473     int i, x, y, luma_start = 0, luma_ctx = 3;
1474     int nnz_pred, nnz, nnz_total = 0;
1475     int segment = mb->segment;
1476     int block_dc = 0;
1477 
1478     if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1479         nnz_pred = t_nnz[8] + l_nnz[8];
1480 
1481         // decode DC values and do hadamard
1482         nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1483                                   nnz_pred, s->qmat[segment].luma_dc_qmul,
1484                                   ff_zigzag_scan, is_vp7);
1485         l_nnz[8] = t_nnz[8] = !!nnz;
1486 
1487         if (is_vp7 && mb->mode > MODE_I4x4) {
1488             nnz |=  inter_predict_dc(td->block_dc,
1489                                      s->inter_dc_pred[mb->ref_frame - 1]);
1490         }
1491 
1492         if (nnz) {
1493             nnz_total += nnz;
1494             block_dc   = 1;
1495             if (nnz == 1)
1496                 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1497             else
1498                 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1499         }
1500         luma_start = 1;
1501         luma_ctx   = 0;
1502     }
1503 
1504     // luma blocks
1505     for (y = 0; y < 4; y++)
1506         for (x = 0; x < 4; x++) {
1507             nnz_pred = l_nnz[y] + t_nnz[x];
1508             nnz = decode_block_coeffs(c, td->block[y][x],
1509                                       s->prob->token[luma_ctx],
1510                                       luma_start, nnz_pred,
1511                                       s->qmat[segment].luma_qmul,
1512                                       s->prob[0].scan, is_vp7);
1513             /* nnz+block_dc may be one more than the actual last index,
1514              * but we don't care */
1515             td->non_zero_count_cache[y][x] = nnz + block_dc;
1516             t_nnz[x] = l_nnz[y] = !!nnz;
1517             nnz_total += nnz;
1518         }
1519 
1520     // chroma blocks
1521     // TODO: what to do about dimensions? 2nd dim for luma is x,
1522     // but for chroma it's (y<<1)|x
1523     for (i = 4; i < 6; i++)
1524         for (y = 0; y < 2; y++)
1525             for (x = 0; x < 2; x++) {
1526                 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1527                 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1528                                           s->prob->token[2], 0, nnz_pred,
1529                                           s->qmat[segment].chroma_qmul,
1530                                           s->prob[0].scan, is_vp7);
1531                 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1532                 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1533                 nnz_total += nnz;
1534             }
1535 
1536     // if there were no coded coeffs despite the macroblock not being marked skip,
1537     // we MUST not do the inner loop filter and should not do IDCT
1538     // Since skip isn't used for bitstream prediction, just manually set it.
1539     if (!nnz_total)
1540         mb->skip = 1;
1541 }
1542 
1543 static av_always_inline
backup_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int simple)1544 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1545                       uint8_t *src_cb, uint8_t *src_cr,
1546                       ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1547 {
1548     AV_COPY128(top_border, src_y + 15 * linesize);
1549     if (!simple) {
1550         AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1551         AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1552     }
1553 }
1554 
1555 static av_always_inline
xchg_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int mb_x,int mb_y,int mb_width,int simple,int xchg)1556 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1557                     uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1558                     int mb_y, int mb_width, int simple, int xchg)
1559 {
1560     uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
1561     src_y  -= linesize;
1562     src_cb -= uvlinesize;
1563     src_cr -= uvlinesize;
1564 
1565 #define XCHG(a, b, xchg)                                                      \
1566     do {                                                                      \
1567         if (xchg)                                                             \
1568             AV_SWAP64(b, a);                                                  \
1569         else                                                                  \
1570             AV_COPY64(b, a);                                                  \
1571     } while (0)
1572 
1573     XCHG(top_border_m1 + 8, src_y - 8, xchg);
1574     XCHG(top_border, src_y, xchg);
1575     XCHG(top_border + 8, src_y + 8, 1);
1576     if (mb_x < mb_width - 1)
1577         XCHG(top_border + 32, src_y + 16, 1);
1578 
1579     // only copy chroma for normal loop filter
1580     // or to initialize the top row to 127
1581     if (!simple || !mb_y) {
1582         XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1583         XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1584         XCHG(top_border + 16, src_cb, 1);
1585         XCHG(top_border + 24, src_cr, 1);
1586     }
1587 }
1588 
1589 static av_always_inline
check_dc_pred8x8_mode(int mode,int mb_x,int mb_y)1590 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1591 {
1592     if (!mb_x)
1593         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1594     else
1595         return mb_y ? mode : LEFT_DC_PRED8x8;
1596 }
1597 
1598 static av_always_inline
check_tm_pred8x8_mode(int mode,int mb_x,int mb_y,int vp7)1599 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1600 {
1601     if (!mb_x)
1602         return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1603     else
1604         return mb_y ? mode : HOR_PRED8x8;
1605 }
1606 
1607 static av_always_inline
check_intra_pred8x8_mode_emuedge(int mode,int mb_x,int mb_y,int vp7)1608 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1609 {
1610     switch (mode) {
1611     case DC_PRED8x8:
1612         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1613     case VERT_PRED8x8:
1614         return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1615     case HOR_PRED8x8:
1616         return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1617     case PLANE_PRED8x8: /* TM */
1618         return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1619     }
1620     return mode;
1621 }
1622 
1623 static av_always_inline
check_tm_pred4x4_mode(int mode,int mb_x,int mb_y,int vp7)1624 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1625 {
1626     if (!mb_x) {
1627         return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1628     } else {
1629         return mb_y ? mode : HOR_VP8_PRED;
1630     }
1631 }
1632 
1633 static av_always_inline
check_intra_pred4x4_mode_emuedge(int mode,int mb_x,int mb_y,int * copy_buf,int vp7)1634 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1635                                      int *copy_buf, int vp7)
1636 {
1637     switch (mode) {
1638     case VERT_PRED:
1639         if (!mb_x && mb_y) {
1640             *copy_buf = 1;
1641             return mode;
1642         }
1643         /* fall-through */
1644     case DIAG_DOWN_LEFT_PRED:
1645     case VERT_LEFT_PRED:
1646         return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1647     case HOR_PRED:
1648         if (!mb_y) {
1649             *copy_buf = 1;
1650             return mode;
1651         }
1652         /* fall-through */
1653     case HOR_UP_PRED:
1654         return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1655     case TM_VP8_PRED:
1656         return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1657     case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1658                    * as 16x16/8x8 DC */
1659     case DIAG_DOWN_RIGHT_PRED:
1660     case VERT_RIGHT_PRED:
1661     case HOR_DOWN_PRED:
1662         if (!mb_y || !mb_x)
1663             *copy_buf = 1;
1664         return mode;
1665     }
1666     return mode;
1667 }
1668 
1669 static av_always_inline
intra_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y,int is_vp7)1670 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1671                    VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1672 {
1673     int x, y, mode, nnz;
1674     uint32_t tr;
1675 
1676     /* for the first row, we need to run xchg_mb_border to init the top edge
1677      * to 127 otherwise, skip it if we aren't going to deblock */
1678     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1679         xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1680                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1681                        s->filter.simple, 1);
1682 
1683     if (mb->mode < MODE_I4x4) {
1684         mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1685         s->hpc.pred16x16[mode](dst[0], s->linesize);
1686     } else {
1687         uint8_t *ptr = dst[0];
1688         uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1689         const uint8_t lo = is_vp7 ? 128 : 127;
1690         const uint8_t hi = is_vp7 ? 128 : 129;
1691         uint8_t tr_top[4] = { lo, lo, lo, lo };
1692 
1693         // all blocks on the right edge of the macroblock use bottom edge
1694         // the top macroblock for their topright edge
1695         uint8_t *tr_right = ptr - s->linesize + 16;
1696 
1697         // if we're on the right edge of the frame, said edge is extended
1698         // from the top macroblock
1699         if (mb_y && mb_x == s->mb_width - 1) {
1700             tr       = tr_right[-1] * 0x01010101u;
1701             tr_right = (uint8_t *) &tr;
1702         }
1703 
1704         if (mb->skip)
1705             AV_ZERO128(td->non_zero_count_cache);
1706 
1707         for (y = 0; y < 4; y++) {
1708             uint8_t *topright = ptr + 4 - s->linesize;
1709             for (x = 0; x < 4; x++) {
1710                 int copy = 0;
1711                 ptrdiff_t linesize = s->linesize;
1712                 uint8_t *dst = ptr + 4 * x;
1713                 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1714 
1715                 if ((y == 0 || x == 3) && mb_y == 0) {
1716                     topright = tr_top;
1717                 } else if (x == 3)
1718                     topright = tr_right;
1719 
1720                 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1721                                                         mb_y + y, &copy, is_vp7);
1722                 if (copy) {
1723                     dst      = copy_dst + 12;
1724                     linesize = 8;
1725                     if (!(mb_y + y)) {
1726                         copy_dst[3] = lo;
1727                         AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1728                     } else {
1729                         AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1730                         if (!(mb_x + x)) {
1731                             copy_dst[3] = hi;
1732                         } else {
1733                             copy_dst[3] = ptr[4 * x - s->linesize - 1];
1734                         }
1735                     }
1736                     if (!(mb_x + x)) {
1737                         copy_dst[11] =
1738                         copy_dst[19] =
1739                         copy_dst[27] =
1740                         copy_dst[35] = hi;
1741                     } else {
1742                         copy_dst[11] = ptr[4 * x                   - 1];
1743                         copy_dst[19] = ptr[4 * x + s->linesize     - 1];
1744                         copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1745                         copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1746                     }
1747                 }
1748                 s->hpc.pred4x4[mode](dst, topright, linesize);
1749                 if (copy) {
1750                     AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
1751                     AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
1752                     AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1753                     AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1754                 }
1755 
1756                 nnz = td->non_zero_count_cache[y][x];
1757                 if (nnz) {
1758                     if (nnz == 1)
1759                         s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1760                                                   td->block[y][x], s->linesize);
1761                     else
1762                         s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1763                                                td->block[y][x], s->linesize);
1764                 }
1765                 topright += 4;
1766             }
1767 
1768             ptr      += 4 * s->linesize;
1769             intra4x4 += 4;
1770         }
1771     }
1772 
1773     mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1774                                             mb_x, mb_y, is_vp7);
1775     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1776     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1777 
1778     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1779         xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1780                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1781                        s->filter.simple, 0);
1782 }
1783 
1784 static const uint8_t subpel_idx[3][8] = {
1785     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1786                                 // also function pointer index
1787     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1788     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1789 };
1790 
1791 /**
1792  * luma MC function
1793  *
1794  * @param s        VP8 decoding context
1795  * @param dst      target buffer for block data at block position
1796  * @param ref      reference picture buffer at origin (0, 0)
1797  * @param mv       motion vector (relative to block position) to get pixel data from
1798  * @param x_off    horizontal position of block from origin (0, 0)
1799  * @param y_off    vertical position of block from origin (0, 0)
1800  * @param block_w  width of block (16, 8 or 4)
1801  * @param block_h  height of block (always same as block_w)
1802  * @param width    width of src/dst plane data
1803  * @param height   height of src/dst plane data
1804  * @param linesize size of a single line of plane data, including padding
1805  * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1806  */
1807 static av_always_inline
vp8_mc_luma(VP8Context * s,VP8ThreadData * td,uint8_t * dst,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1808 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1809                  ThreadFrame *ref, const VP56mv *mv,
1810                  int x_off, int y_off, int block_w, int block_h,
1811                  int width, int height, ptrdiff_t linesize,
1812                  vp8_mc_func mc_func[3][3])
1813 {
1814     uint8_t *src = ref->f->data[0];
1815 
1816     if (AV_RN32A(mv)) {
1817         ptrdiff_t src_linesize = linesize;
1818 
1819         int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1820         int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1821 
1822         x_off += mv->x >> 2;
1823         y_off += mv->y >> 2;
1824 
1825         // edge emulation
1826         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1827         src += y_off * linesize + x_off;
1828         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1829             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1830             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1831                                      src - my_idx * linesize - mx_idx,
1832                                      EDGE_EMU_LINESIZE, linesize,
1833                                      block_w + subpel_idx[1][mx],
1834                                      block_h + subpel_idx[1][my],
1835                                      x_off - mx_idx, y_off - my_idx,
1836                                      width, height);
1837             src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1838             src_linesize = EDGE_EMU_LINESIZE;
1839         }
1840         mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1841     } else {
1842         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1843         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1844                       linesize, block_h, 0, 0);
1845     }
1846 }
1847 
1848 /**
1849  * chroma MC function
1850  *
1851  * @param s        VP8 decoding context
1852  * @param dst1     target buffer for block data at block position (U plane)
1853  * @param dst2     target buffer for block data at block position (V plane)
1854  * @param ref      reference picture buffer at origin (0, 0)
1855  * @param mv       motion vector (relative to block position) to get pixel data from
1856  * @param x_off    horizontal position of block from origin (0, 0)
1857  * @param y_off    vertical position of block from origin (0, 0)
1858  * @param block_w  width of block (16, 8 or 4)
1859  * @param block_h  height of block (always same as block_w)
1860  * @param width    width of src/dst plane data
1861  * @param height   height of src/dst plane data
1862  * @param linesize size of a single line of plane data, including padding
1863  * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
1864  */
1865 static av_always_inline
vp8_mc_chroma(VP8Context * s,VP8ThreadData * td,uint8_t * dst1,uint8_t * dst2,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1866 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1867                    uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1868                    int x_off, int y_off, int block_w, int block_h,
1869                    int width, int height, ptrdiff_t linesize,
1870                    vp8_mc_func mc_func[3][3])
1871 {
1872     uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1873 
1874     if (AV_RN32A(mv)) {
1875         int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1876         int my = mv->y & 7, my_idx = subpel_idx[0][my];
1877 
1878         x_off += mv->x >> 3;
1879         y_off += mv->y >> 3;
1880 
1881         // edge emulation
1882         src1 += y_off * linesize + x_off;
1883         src2 += y_off * linesize + x_off;
1884         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1885         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1886             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1887             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1888                                      src1 - my_idx * linesize - mx_idx,
1889                                      EDGE_EMU_LINESIZE, linesize,
1890                                      block_w + subpel_idx[1][mx],
1891                                      block_h + subpel_idx[1][my],
1892                                      x_off - mx_idx, y_off - my_idx, width, height);
1893             src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1894             mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1895 
1896             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1897                                      src2 - my_idx * linesize - mx_idx,
1898                                      EDGE_EMU_LINESIZE, linesize,
1899                                      block_w + subpel_idx[1][mx],
1900                                      block_h + subpel_idx[1][my],
1901                                      x_off - mx_idx, y_off - my_idx, width, height);
1902             src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1903             mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1904         } else {
1905             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1906             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1907         }
1908     } else {
1909         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1910         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1911         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912     }
1913 }
1914 
1915 static av_always_inline
vp8_mc_part(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],ThreadFrame * ref_frame,int x_off,int y_off,int bx_off,int by_off,int block_w,int block_h,int width,int height,VP56mv * mv)1916 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1917                  ThreadFrame *ref_frame, int x_off, int y_off,
1918                  int bx_off, int by_off, int block_w, int block_h,
1919                  int width, int height, VP56mv *mv)
1920 {
1921     VP56mv uvmv = *mv;
1922 
1923     /* Y */
1924     vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1925                 ref_frame, mv, x_off + bx_off, y_off + by_off,
1926                 block_w, block_h, width, height, s->linesize,
1927                 s->put_pixels_tab[block_w == 8]);
1928 
1929     /* U/V */
1930     if (s->profile == 3) {
1931         /* this block only applies VP8; it is safe to check
1932          * only the profile, as VP7 profile <= 1 */
1933         uvmv.x &= ~7;
1934         uvmv.y &= ~7;
1935     }
1936     x_off   >>= 1;
1937     y_off   >>= 1;
1938     bx_off  >>= 1;
1939     by_off  >>= 1;
1940     width   >>= 1;
1941     height  >>= 1;
1942     block_w >>= 1;
1943     block_h >>= 1;
1944     vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1945                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1946                   &uvmv, x_off + bx_off, y_off + by_off,
1947                   block_w, block_h, width, height, s->uvlinesize,
1948                   s->put_pixels_tab[1 + (block_w == 4)]);
1949 }
1950 
1951 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1952  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1953 static av_always_inline
prefetch_motion(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int mb_xy,int ref)1954 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1955                      int mb_xy, int ref)
1956 {
1957     /* Don't prefetch refs that haven't been used very often this frame. */
1958     if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1959         int x_off = mb_x << 4, y_off = mb_y << 4;
1960         int mx = (mb->mv.x >> 2) + x_off + 8;
1961         int my = (mb->mv.y >> 2) + y_off;
1962         uint8_t **src = s->framep[ref]->tf.f->data;
1963         int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1964         /* For threading, a ff_thread_await_progress here might be useful, but
1965          * it actually slows down the decoder. Since a bad prefetch doesn't
1966          * generate bad decoder output, we don't run it here. */
1967         s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1968         off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1969         s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1970     }
1971 }
1972 
1973 /**
1974  * Apply motion vectors to prediction buffer, chapter 18.
1975  */
1976 static av_always_inline
inter_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y)1977 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1978                    VP8Macroblock *mb, int mb_x, int mb_y)
1979 {
1980     int x_off = mb_x << 4, y_off = mb_y << 4;
1981     int width = 16 * s->mb_width, height = 16 * s->mb_height;
1982     ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1983     VP56mv *bmv = mb->bmv;
1984 
1985     switch (mb->partitioning) {
1986     case VP8_SPLITMVMODE_NONE:
1987         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1988                     0, 0, 16, 16, width, height, &mb->mv);
1989         break;
1990     case VP8_SPLITMVMODE_4x4: {
1991         int x, y;
1992         VP56mv uvmv;
1993 
1994         /* Y */
1995         for (y = 0; y < 4; y++) {
1996             for (x = 0; x < 4; x++) {
1997                 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1998                             ref, &bmv[4 * y + x],
1999                             4 * x + x_off, 4 * y + y_off, 4, 4,
2000                             width, height, s->linesize,
2001                             s->put_pixels_tab[2]);
2002             }
2003         }
2004 
2005         /* U/V */
2006         x_off  >>= 1;
2007         y_off  >>= 1;
2008         width  >>= 1;
2009         height >>= 1;
2010         for (y = 0; y < 2; y++) {
2011             for (x = 0; x < 2; x++) {
2012                 uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
2013                          mb->bmv[2 * y       * 4 + 2 * x + 1].x +
2014                          mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
2015                          mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2016                 uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
2017                          mb->bmv[2 * y       * 4 + 2 * x + 1].y +
2018                          mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
2019                          mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2020                 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2021                 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2022                 if (s->profile == 3) {
2023                     uvmv.x &= ~7;
2024                     uvmv.y &= ~7;
2025                 }
2026                 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2027                               dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2028                               &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2029                               width, height, s->uvlinesize,
2030                               s->put_pixels_tab[2]);
2031             }
2032         }
2033         break;
2034     }
2035     case VP8_SPLITMVMODE_16x8:
2036         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2037                     0, 0, 16, 8, width, height, &bmv[0]);
2038         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2039                     0, 8, 16, 8, width, height, &bmv[1]);
2040         break;
2041     case VP8_SPLITMVMODE_8x16:
2042         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2043                     0, 0, 8, 16, width, height, &bmv[0]);
2044         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2045                     8, 0, 8, 16, width, height, &bmv[1]);
2046         break;
2047     case VP8_SPLITMVMODE_8x8:
2048         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2049                     0, 0, 8, 8, width, height, &bmv[0]);
2050         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2051                     8, 0, 8, 8, width, height, &bmv[1]);
2052         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2053                     0, 8, 8, 8, width, height, &bmv[2]);
2054         vp8_mc_part(s, td, dst, ref, x_off, y_off,
2055                     8, 8, 8, 8, width, height, &bmv[3]);
2056         break;
2057     }
2058 }
2059 
2060 static av_always_inline
idct_mb(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb)2061 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2062 {
2063     int x, y, ch;
2064 
2065     if (mb->mode != MODE_I4x4) {
2066         uint8_t *y_dst = dst[0];
2067         for (y = 0; y < 4; y++) {
2068             uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2069             if (nnz4) {
2070                 if (nnz4 & ~0x01010101) {
2071                     for (x = 0; x < 4; x++) {
2072                         if ((uint8_t) nnz4 == 1)
2073                             s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2074                                                       td->block[y][x],
2075                                                       s->linesize);
2076                         else if ((uint8_t) nnz4 > 1)
2077                             s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2078                                                    td->block[y][x],
2079                                                    s->linesize);
2080                         nnz4 >>= 8;
2081                         if (!nnz4)
2082                             break;
2083                     }
2084                 } else {
2085                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2086                 }
2087             }
2088             y_dst += 4 * s->linesize;
2089         }
2090     }
2091 
2092     for (ch = 0; ch < 2; ch++) {
2093         uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2094         if (nnz4) {
2095             uint8_t *ch_dst = dst[1 + ch];
2096             if (nnz4 & ~0x01010101) {
2097                 for (y = 0; y < 2; y++) {
2098                     for (x = 0; x < 2; x++) {
2099                         if ((uint8_t) nnz4 == 1)
2100                             s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2101                                                       td->block[4 + ch][(y << 1) + x],
2102                                                       s->uvlinesize);
2103                         else if ((uint8_t) nnz4 > 1)
2104                             s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2105                                                    td->block[4 + ch][(y << 1) + x],
2106                                                    s->uvlinesize);
2107                         nnz4 >>= 8;
2108                         if (!nnz4)
2109                             goto chroma_idct_end;
2110                     }
2111                     ch_dst += 4 * s->uvlinesize;
2112                 }
2113             } else {
2114                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2115             }
2116         }
2117 chroma_idct_end:
2118         ;
2119     }
2120 }
2121 
2122 static av_always_inline
filter_level_for_mb(VP8Context * s,VP8Macroblock * mb,VP8FilterStrength * f,int is_vp7)2123 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2124                          VP8FilterStrength *f, int is_vp7)
2125 {
2126     int interior_limit, filter_level;
2127 
2128     if (s->segmentation.enabled) {
2129         filter_level = s->segmentation.filter_level[mb->segment];
2130         if (!s->segmentation.absolute_vals)
2131             filter_level += s->filter.level;
2132     } else
2133         filter_level = s->filter.level;
2134 
2135     if (s->lf_delta.enabled) {
2136         filter_level += s->lf_delta.ref[mb->ref_frame];
2137         filter_level += s->lf_delta.mode[mb->mode];
2138     }
2139 
2140     filter_level = av_clip_uintp2(filter_level, 6);
2141 
2142     interior_limit = filter_level;
2143     if (s->filter.sharpness) {
2144         interior_limit >>= (s->filter.sharpness + 3) >> 2;
2145         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2146     }
2147     interior_limit = FFMAX(interior_limit, 1);
2148 
2149     f->filter_level = filter_level;
2150     f->inner_limit = interior_limit;
2151     f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2152                       mb->mode == VP8_MVMODE_SPLIT;
2153 }
2154 
2155 static av_always_inline
filter_mb(VP8Context * s,uint8_t * dst[3],VP8FilterStrength * f,int mb_x,int mb_y,int is_vp7)2156 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2157                int mb_x, int mb_y, int is_vp7)
2158 {
2159     int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2160     int filter_level = f->filter_level;
2161     int inner_limit = f->inner_limit;
2162     int inner_filter = f->inner_filter;
2163     ptrdiff_t linesize   = s->linesize;
2164     ptrdiff_t uvlinesize = s->uvlinesize;
2165     static const uint8_t hev_thresh_lut[2][64] = {
2166         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2167           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2168           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2169           3, 3, 3, 3 },
2170         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2171           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2172           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2173           2, 2, 2, 2 }
2174     };
2175 
2176     if (!filter_level)
2177         return;
2178 
2179     if (is_vp7) {
2180         bedge_lim_y  = filter_level;
2181         bedge_lim_uv = filter_level * 2;
2182         mbedge_lim   = filter_level + 2;
2183     } else {
2184         bedge_lim_y  =
2185         bedge_lim_uv = filter_level * 2 + inner_limit;
2186         mbedge_lim   = bedge_lim_y + 4;
2187     }
2188 
2189     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2190 
2191     if (mb_x) {
2192         s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2193                                        mbedge_lim, inner_limit, hev_thresh);
2194         s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2195                                        mbedge_lim, inner_limit, hev_thresh);
2196     }
2197 
2198 #define H_LOOP_FILTER_16Y_INNER(cond)                                         \
2199     if (cond && inner_filter) {                                               \
2200         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
2201                                              bedge_lim_y, inner_limit,        \
2202                                              hev_thresh);                     \
2203         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
2204                                              bedge_lim_y, inner_limit,        \
2205                                              hev_thresh);                     \
2206         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
2207                                              bedge_lim_y, inner_limit,        \
2208                                              hev_thresh);                     \
2209         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
2210                                              uvlinesize,  bedge_lim_uv,       \
2211                                              inner_limit, hev_thresh);        \
2212     }
2213 
2214     H_LOOP_FILTER_16Y_INNER(!is_vp7)
2215 
2216     if (mb_y) {
2217         s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2218                                        mbedge_lim, inner_limit, hev_thresh);
2219         s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2220                                        mbedge_lim, inner_limit, hev_thresh);
2221     }
2222 
2223     if (inner_filter) {
2224         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
2225                                              linesize, bedge_lim_y,
2226                                              inner_limit, hev_thresh);
2227         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
2228                                              linesize, bedge_lim_y,
2229                                              inner_limit, hev_thresh);
2230         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2231                                              linesize, bedge_lim_y,
2232                                              inner_limit, hev_thresh);
2233         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
2234                                              dst[2] +  4 * uvlinesize,
2235                                              uvlinesize, bedge_lim_uv,
2236                                              inner_limit, hev_thresh);
2237     }
2238 
2239     H_LOOP_FILTER_16Y_INNER(is_vp7)
2240 }
2241 
2242 static av_always_inline
filter_mb_simple(VP8Context * s,uint8_t * dst,VP8FilterStrength * f,int mb_x,int mb_y)2243 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2244                       int mb_x, int mb_y)
2245 {
2246     int mbedge_lim, bedge_lim;
2247     int filter_level = f->filter_level;
2248     int inner_limit  = f->inner_limit;
2249     int inner_filter = f->inner_filter;
2250     ptrdiff_t linesize = s->linesize;
2251 
2252     if (!filter_level)
2253         return;
2254 
2255     bedge_lim  = 2 * filter_level + inner_limit;
2256     mbedge_lim = bedge_lim + 4;
2257 
2258     if (mb_x)
2259         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2260     if (inner_filter) {
2261         s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
2262         s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
2263         s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2264     }
2265 
2266     if (mb_y)
2267         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2268     if (inner_filter) {
2269         s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
2270         s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
2271         s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2272     }
2273 }
2274 
2275 #define MARGIN (16 << 2)
2276 static av_always_inline
vp78_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * curframe,VP8Frame * prev_frame,int is_vp7)2277 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2278                                     VP8Frame *prev_frame, int is_vp7)
2279 {
2280     VP8Context *s = avctx->priv_data;
2281     int mb_x, mb_y;
2282 
2283     s->mv_bounds.mv_min.y = -MARGIN;
2284     s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2285     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2286         VP8Macroblock *mb = s->macroblocks_base +
2287                             ((s->mb_width + 1) * (mb_y + 1) + 1);
2288         int mb_xy = mb_y * s->mb_width;
2289 
2290         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2291 
2292         s->mv_bounds.mv_min.x = -MARGIN;
2293         s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2294 
2295         if (vpX_rac_is_end(&s->c)) {
2296             return AVERROR_INVALIDDATA;
2297         }
2298         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2299             if (mb_y == 0)
2300                 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2301                          DC_PRED * 0x01010101);
2302             decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2303                            prev_frame && prev_frame->seg_map ?
2304                            prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2305             s->mv_bounds.mv_min.x -= 64;
2306             s->mv_bounds.mv_max.x -= 64;
2307         }
2308         s->mv_bounds.mv_min.y -= 64;
2309         s->mv_bounds.mv_max.y -= 64;
2310     }
2311     return 0;
2312 }
2313 
vp7_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2314 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2315                                    VP8Frame *prev_frame)
2316 {
2317     return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2318 }
2319 
vp8_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2320 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2321                                    VP8Frame *prev_frame)
2322 {
2323     return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2324 }
2325 
2326 #if HAVE_THREADS
2327 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
2328     do {                                                                      \
2329         int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
2330         if (atomic_load(&otd->thread_mb_pos) < tmp) {                         \
2331             pthread_mutex_lock(&otd->lock);                                   \
2332             atomic_store(&td->wait_mb_pos, tmp);                              \
2333             do {                                                              \
2334                 if (atomic_load(&otd->thread_mb_pos) >= tmp)                  \
2335                     break;                                                    \
2336                 pthread_cond_wait(&otd->cond, &otd->lock);                    \
2337             } while (1);                                                      \
2338             atomic_store(&td->wait_mb_pos, INT_MAX);                          \
2339             pthread_mutex_unlock(&otd->lock);                                 \
2340         }                                                                     \
2341     } while (0)
2342 
2343 #define update_pos(td, mb_y, mb_x)                                            \
2344     do {                                                                      \
2345         int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
2346         int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2347                                (num_jobs > 1);                                \
2348         int is_null          = !next_td || !prev_td;                          \
2349         int pos_check        = (is_null) ? 1 :                                \
2350             (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
2351             (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos));     \
2352         atomic_store(&td->thread_mb_pos, pos);                                \
2353         if (sliced_threading && pos_check) {                                  \
2354             pthread_mutex_lock(&td->lock);                                    \
2355             pthread_cond_broadcast(&td->cond);                                \
2356             pthread_mutex_unlock(&td->lock);                                  \
2357         }                                                                     \
2358     } while (0)
2359 #else
2360 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2361 #define update_pos(td, mb_y, mb_x) while(0)
2362 #endif
2363 
decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2364 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2365                                         int jobnr, int threadnr, int is_vp7)
2366 {
2367     VP8Context *s = avctx->priv_data;
2368     VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2369     int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2370     int mb_x, mb_xy = mb_y * s->mb_width;
2371     int num_jobs = s->num_jobs;
2372     VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2373     VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2374     VP8Macroblock *mb;
2375     uint8_t *dst[3] = {
2376         curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2377         curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
2378         curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
2379     };
2380 
2381     if (vpX_rac_is_end(c))
2382          return AVERROR_INVALIDDATA;
2383 
2384     if (mb_y == 0)
2385         prev_td = td;
2386     else
2387         prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2388     if (mb_y == s->mb_height - 1)
2389         next_td = td;
2390     else
2391         next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2392     if (s->mb_layout == 1)
2393         mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2394     else {
2395         // Make sure the previous frame has read its segmentation map,
2396         // if we re-use the same map.
2397         if (prev_frame && s->segmentation.enabled &&
2398             !s->segmentation.update_map)
2399             ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2400         mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2401         memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2402         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2403     }
2404 
2405     if (!is_vp7 || mb_y == 0)
2406         memset(td->left_nnz, 0, sizeof(td->left_nnz));
2407 
2408     td->mv_bounds.mv_min.x = -MARGIN;
2409     td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2410 
2411     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2412         if (vpX_rac_is_end(c))
2413             return AVERROR_INVALIDDATA;
2414         // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2415         if (prev_td != td) {
2416             if (threadnr != 0) {
2417                 check_thread_pos(td, prev_td,
2418                                  mb_x + (is_vp7 ? 2 : 1),
2419                                  mb_y - (is_vp7 ? 2 : 1));
2420             } else {
2421                 check_thread_pos(td, prev_td,
2422                                  mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2423                                  mb_y - (is_vp7 ? 2 : 1));
2424             }
2425         }
2426 
2427         s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2428                          s->linesize, 4);
2429         s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2430                          dst[2] - dst[1], 2);
2431 
2432         if (!s->mb_layout)
2433             decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2434                            prev_frame && prev_frame->seg_map ?
2435                            prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2436 
2437         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2438 
2439         if (!mb->skip)
2440             decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2441 
2442         if (mb->mode <= MODE_I4x4)
2443             intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2444         else
2445             inter_predict(s, td, dst, mb, mb_x, mb_y);
2446 
2447         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2448 
2449         if (!mb->skip) {
2450             idct_mb(s, td, dst, mb);
2451         } else {
2452             AV_ZERO64(td->left_nnz);
2453             AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2454 
2455             /* Reset DC block predictors if they would exist
2456              * if the mb had coefficients */
2457             if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2458                 td->left_nnz[8]     = 0;
2459                 s->top_nnz[mb_x][8] = 0;
2460             }
2461         }
2462 
2463         if (s->deblock_filter)
2464             filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2465 
2466         if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2467             if (s->filter.simple)
2468                 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469                                  NULL, NULL, s->linesize, 0, 1);
2470             else
2471                 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2472                                  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2473         }
2474 
2475         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2476 
2477         dst[0]      += 16;
2478         dst[1]      += 8;
2479         dst[2]      += 8;
2480         td->mv_bounds.mv_min.x -= 64;
2481         td->mv_bounds.mv_max.x -= 64;
2482 
2483         if (mb_x == s->mb_width + 1) {
2484             update_pos(td, mb_y, s->mb_width + 3);
2485         } else {
2486             update_pos(td, mb_y, mb_x);
2487         }
2488     }
2489     return 0;
2490 }
2491 
vp7_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2492 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2493                                         int jobnr, int threadnr)
2494 {
2495     return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2496 }
2497 
vp8_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2498 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2499                                         int jobnr, int threadnr)
2500 {
2501     return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2502 }
2503 
filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2504 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2505                               int jobnr, int threadnr, int is_vp7)
2506 {
2507     VP8Context *s = avctx->priv_data;
2508     VP8ThreadData *td = &s->thread_data[threadnr];
2509     int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2510     AVFrame *curframe = s->curframe->tf.f;
2511     VP8Macroblock *mb;
2512     VP8ThreadData *prev_td, *next_td;
2513     uint8_t *dst[3] = {
2514         curframe->data[0] + 16 * mb_y * s->linesize,
2515         curframe->data[1] +  8 * mb_y * s->uvlinesize,
2516         curframe->data[2] +  8 * mb_y * s->uvlinesize
2517     };
2518 
2519     if (s->mb_layout == 1)
2520         mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2521     else
2522         mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2523 
2524     if (mb_y == 0)
2525         prev_td = td;
2526     else
2527         prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2528     if (mb_y == s->mb_height - 1)
2529         next_td = td;
2530     else
2531         next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2532 
2533     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2534         VP8FilterStrength *f = &td->filter_strength[mb_x];
2535         if (prev_td != td)
2536             check_thread_pos(td, prev_td,
2537                              (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2538         if (next_td != td)
2539             if (next_td != &s->thread_data[0])
2540                 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2541 
2542         if (num_jobs == 1) {
2543             if (s->filter.simple)
2544                 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545                                  NULL, NULL, s->linesize, 0, 1);
2546             else
2547                 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2548                                  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2549         }
2550 
2551         if (s->filter.simple)
2552             filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2553         else
2554             filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2555         dst[0] += 16;
2556         dst[1] += 8;
2557         dst[2] += 8;
2558 
2559         update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2560     }
2561 }
2562 
vp7_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2563 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2564                               int jobnr, int threadnr)
2565 {
2566     filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2567 }
2568 
vp8_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2569 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2570                               int jobnr, int threadnr)
2571 {
2572     filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2573 }
2574 
2575 static av_always_inline
vp78_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2576 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2577                               int threadnr, int is_vp7)
2578 {
2579     VP8Context *s = avctx->priv_data;
2580     VP8ThreadData *td = &s->thread_data[jobnr];
2581     VP8ThreadData *next_td = NULL, *prev_td = NULL;
2582     VP8Frame *curframe = s->curframe;
2583     int mb_y, num_jobs = s->num_jobs;
2584     int ret;
2585 
2586     td->thread_nr = threadnr;
2587     td->mv_bounds.mv_min.y   = -MARGIN - 64 * threadnr;
2588     td->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2589     for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2590         atomic_store(&td->thread_mb_pos, mb_y << 16);
2591         ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2592         if (ret < 0) {
2593             update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2594             return ret;
2595         }
2596         if (s->deblock_filter)
2597             s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2598         update_pos(td, mb_y, INT_MAX & 0xFFFF);
2599 
2600         td->mv_bounds.mv_min.y -= 64 * num_jobs;
2601         td->mv_bounds.mv_max.y -= 64 * num_jobs;
2602 
2603         if (avctx->active_thread_type == FF_THREAD_FRAME)
2604             ff_thread_report_progress(&curframe->tf, mb_y, 0);
2605     }
2606 
2607     return 0;
2608 }
2609 
vp7_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2610 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2611                                     int jobnr, int threadnr)
2612 {
2613     return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2614 }
2615 
vp8_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2616 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2617                                     int jobnr, int threadnr)
2618 {
2619     return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2620 }
2621 
2622 static av_always_inline
vp78_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt,int is_vp7)2623 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2624                       AVPacket *avpkt, int is_vp7)
2625 {
2626     VP8Context *s = avctx->priv_data;
2627     int ret, i, referenced, num_jobs;
2628     enum AVDiscard skip_thresh;
2629     VP8Frame *av_uninit(curframe), *prev_frame;
2630 
2631     if (is_vp7)
2632         ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2633     else
2634         ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2635 
2636     if (ret < 0)
2637         goto err;
2638 
2639     if (s->actually_webp) {
2640         // avctx->pix_fmt already set in caller.
2641     } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2642         s->pix_fmt = get_pixel_format(s);
2643         if (s->pix_fmt < 0) {
2644             ret = AVERROR(EINVAL);
2645             goto err;
2646         }
2647         avctx->pix_fmt = s->pix_fmt;
2648     }
2649 
2650     prev_frame = s->framep[VP56_FRAME_CURRENT];
2651 
2652     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2653                  s->update_altref == VP56_FRAME_CURRENT;
2654 
2655     skip_thresh = !referenced ? AVDISCARD_NONREF
2656                               : !s->keyframe ? AVDISCARD_NONKEY
2657                                              : AVDISCARD_ALL;
2658 
2659     if (avctx->skip_frame >= skip_thresh) {
2660         s->invisible = 1;
2661         memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662         goto skip_decode;
2663     }
2664     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2665 
2666     // release no longer referenced frames
2667     for (i = 0; i < 5; i++)
2668         if (s->frames[i].tf.f->buf[0] &&
2669             &s->frames[i] != prev_frame &&
2670             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2671             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
2672             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2673             vp8_release_frame(s, &s->frames[i]);
2674 
2675     curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2676 
2677     if (!s->colorspace)
2678         avctx->colorspace = AVCOL_SPC_BT470BG;
2679     if (s->fullrange)
2680         avctx->color_range = AVCOL_RANGE_JPEG;
2681     else
2682         avctx->color_range = AVCOL_RANGE_MPEG;
2683 
2684     /* Given that arithmetic probabilities are updated every frame, it's quite
2685      * likely that the values we have on a random interframe are complete
2686      * junk if we didn't start decode on a keyframe. So just don't display
2687      * anything rather than junk. */
2688     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2689                          !s->framep[VP56_FRAME_GOLDEN]   ||
2690                          !s->framep[VP56_FRAME_GOLDEN2])) {
2691         av_log(avctx, AV_LOG_WARNING,
2692                "Discarding interframe without a prior keyframe!\n");
2693         ret = AVERROR_INVALIDDATA;
2694         goto err;
2695     }
2696 
2697     curframe->tf.f->key_frame = s->keyframe;
2698     curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2699                                             : AV_PICTURE_TYPE_P;
2700     if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2701         goto err;
2702 
2703     // check if golden and altref are swapped
2704     if (s->update_altref != VP56_FRAME_NONE)
2705         s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2706     else
2707         s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2708 
2709     if (s->update_golden != VP56_FRAME_NONE)
2710         s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2711     else
2712         s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2713 
2714     if (s->update_last)
2715         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2716     else
2717         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2718 
2719     s->next_framep[VP56_FRAME_CURRENT] = curframe;
2720 
2721     if (avctx->codec->update_thread_context)
2722         ff_thread_finish_setup(avctx);
2723 
2724     if (avctx->hwaccel) {
2725         ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2726         if (ret < 0)
2727             goto err;
2728 
2729         ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2730         if (ret < 0)
2731             goto err;
2732 
2733         ret = avctx->hwaccel->end_frame(avctx);
2734         if (ret < 0)
2735             goto err;
2736 
2737     } else {
2738         s->linesize   = curframe->tf.f->linesize[0];
2739         s->uvlinesize = curframe->tf.f->linesize[1];
2740 
2741         memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2742         /* Zero macroblock structures for top/top-left prediction
2743          * from outside the frame. */
2744         if (!s->mb_layout)
2745             memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2746                    (s->mb_width + 1) * sizeof(*s->macroblocks));
2747         if (!s->mb_layout && s->keyframe)
2748             memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2749 
2750         memset(s->ref_count, 0, sizeof(s->ref_count));
2751 
2752         if (s->mb_layout == 1) {
2753             // Make sure the previous frame has read its segmentation map,
2754             // if we re-use the same map.
2755             if (prev_frame && s->segmentation.enabled &&
2756                 !s->segmentation.update_map)
2757                 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2758             if (is_vp7)
2759                 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2760             else
2761                 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2762             if (ret < 0)
2763                 goto err;
2764         }
2765 
2766         if (avctx->active_thread_type == FF_THREAD_FRAME)
2767             num_jobs = 1;
2768         else
2769             num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2770         s->num_jobs   = num_jobs;
2771         s->curframe   = curframe;
2772         s->prev_frame = prev_frame;
2773         s->mv_bounds.mv_min.y   = -MARGIN;
2774         s->mv_bounds.mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2775         for (i = 0; i < MAX_THREADS; i++) {
2776             VP8ThreadData *td = &s->thread_data[i];
2777             atomic_init(&td->thread_mb_pos, 0);
2778             atomic_init(&td->wait_mb_pos, INT_MAX);
2779         }
2780         if (is_vp7)
2781             avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2782                             num_jobs);
2783         else
2784             avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2785                             num_jobs);
2786     }
2787 
2788     ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2789     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2790 
2791 skip_decode:
2792     // if future frames don't use the updated probabilities,
2793     // reset them to the values we saved
2794     if (!s->update_probabilities)
2795         s->prob[0] = s->prob[1];
2796 
2797     if (!s->invisible) {
2798         if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2799             return ret;
2800         *got_frame = 1;
2801     }
2802 
2803     return avpkt->size;
2804 err:
2805     memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2806     return ret;
2807 }
2808 
ff_vp8_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2809 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2810                         AVPacket *avpkt)
2811 {
2812     return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2813 }
2814 
2815 #if CONFIG_VP7_DECODER
vp7_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2816 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2817                             AVPacket *avpkt)
2818 {
2819     return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2820 }
2821 #endif /* CONFIG_VP7_DECODER */
2822 
ff_vp8_decode_free(AVCodecContext * avctx)2823 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2824 {
2825     VP8Context *s = avctx->priv_data;
2826     int i;
2827 
2828     if (!s)
2829         return 0;
2830 
2831     vp8_decode_flush_impl(avctx, 1);
2832     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2833         av_frame_free(&s->frames[i].tf.f);
2834 
2835     return 0;
2836 }
2837 
vp8_init_frames(VP8Context * s)2838 static av_cold int vp8_init_frames(VP8Context *s)
2839 {
2840     int i;
2841     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2842         s->frames[i].tf.f = av_frame_alloc();
2843         if (!s->frames[i].tf.f)
2844             return AVERROR(ENOMEM);
2845     }
2846     return 0;
2847 }
2848 
2849 static av_always_inline
vp78_decode_init(AVCodecContext * avctx,int is_vp7)2850 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2851 {
2852     VP8Context *s = avctx->priv_data;
2853     int ret;
2854 
2855     s->avctx = avctx;
2856     s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
2857     s->pix_fmt = AV_PIX_FMT_NONE;
2858     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2859 
2860     ff_videodsp_init(&s->vdsp, 8);
2861 
2862     ff_vp78dsp_init(&s->vp8dsp);
2863     if (CONFIG_VP7_DECODER && is_vp7) {
2864         ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2865         ff_vp7dsp_init(&s->vp8dsp);
2866         s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2867         s->filter_mb_row           = vp7_filter_mb_row;
2868     } else if (CONFIG_VP8_DECODER && !is_vp7) {
2869         ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2870         ff_vp8dsp_init(&s->vp8dsp);
2871         s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2872         s->filter_mb_row           = vp8_filter_mb_row;
2873     }
2874 
2875     /* does not change for VP8 */
2876     memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2877 
2878     if ((ret = vp8_init_frames(s)) < 0) {
2879         ff_vp8_decode_free(avctx);
2880         return ret;
2881     }
2882 
2883     return 0;
2884 }
2885 
2886 #if CONFIG_VP7_DECODER
vp7_decode_init(AVCodecContext * avctx)2887 static int vp7_decode_init(AVCodecContext *avctx)
2888 {
2889     return vp78_decode_init(avctx, IS_VP7);
2890 }
2891 #endif /* CONFIG_VP7_DECODER */
2892 
ff_vp8_decode_init(AVCodecContext * avctx)2893 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2894 {
2895     return vp78_decode_init(avctx, IS_VP8);
2896 }
2897 
2898 #if CONFIG_VP8_DECODER
2899 #if HAVE_THREADS
2900 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2901 
vp8_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)2902 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2903                                             const AVCodecContext *src)
2904 {
2905     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2906     int i;
2907 
2908     if (s->macroblocks_base &&
2909         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2910         free_buffers(s);
2911         s->mb_width  = s_src->mb_width;
2912         s->mb_height = s_src->mb_height;
2913     }
2914 
2915     s->pix_fmt      = s_src->pix_fmt;
2916     s->prob[0]      = s_src->prob[!s_src->update_probabilities];
2917     s->segmentation = s_src->segmentation;
2918     s->lf_delta     = s_src->lf_delta;
2919     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2920 
2921     for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2922         if (s_src->frames[i].tf.f->buf[0]) {
2923             int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2924             if (ret < 0)
2925                 return ret;
2926         }
2927     }
2928 
2929     s->framep[0] = REBASE(s_src->next_framep[0]);
2930     s->framep[1] = REBASE(s_src->next_framep[1]);
2931     s->framep[2] = REBASE(s_src->next_framep[2]);
2932     s->framep[3] = REBASE(s_src->next_framep[3]);
2933 
2934     return 0;
2935 }
2936 #endif /* HAVE_THREADS */
2937 #endif /* CONFIG_VP8_DECODER */
2938 
2939 #if CONFIG_VP7_DECODER
2940 AVCodec ff_vp7_decoder = {
2941     .name                  = "vp7",
2942     .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
2943     .type                  = AVMEDIA_TYPE_VIDEO,
2944     .id                    = AV_CODEC_ID_VP7,
2945     .priv_data_size        = sizeof(VP8Context),
2946     .init                  = vp7_decode_init,
2947     .close                 = ff_vp8_decode_free,
2948     .decode                = vp7_decode_frame,
2949     .capabilities          = AV_CODEC_CAP_DR1,
2950     .flush                 = vp8_decode_flush,
2951 };
2952 #endif /* CONFIG_VP7_DECODER */
2953 
2954 #if CONFIG_VP8_DECODER
2955 AVCodec ff_vp8_decoder = {
2956     .name                  = "vp8",
2957     .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2958     .type                  = AVMEDIA_TYPE_VIDEO,
2959     .id                    = AV_CODEC_ID_VP8,
2960     .priv_data_size        = sizeof(VP8Context),
2961     .init                  = ff_vp8_decode_init,
2962     .close                 = ff_vp8_decode_free,
2963     .decode                = ff_vp8_decode_frame,
2964     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2965                              AV_CODEC_CAP_SLICE_THREADS,
2966     .flush                 = vp8_decode_flush,
2967     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2968     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
2969 #if CONFIG_VP8_VAAPI_HWACCEL
2970                                HWACCEL_VAAPI(vp8),
2971 #endif
2972 #if CONFIG_VP8_NVDEC_HWACCEL
2973                                HWACCEL_NVDEC(vp8),
2974 #endif
2975                                NULL
2976                            },
2977     .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2978 };
2979 #endif /* CONFIG_VP7_DECODER */
2980