1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "libavutil/imgutils.h"
28
29 #include "avcodec.h"
30 #include "hwconfig.h"
31 #include "internal.h"
32 #include "mathops.h"
33 #include "rectangle.h"
34 #include "thread.h"
35 #include "vp8.h"
36 #include "vp8data.h"
37
38 #if ARCH_ARM
39 # include "arm/vp8.h"
40 #endif
41
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
48 #endif
49
free_buffers(VP8Context * s)50 static void free_buffers(VP8Context *s)
51 {
52 int i;
53 if (s->thread_data)
54 for (i = 0; i < MAX_THREADS; i++) {
55 #if HAVE_THREADS
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
58 #endif
59 av_freep(&s->thread_data[i].filter_strength);
60 }
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
66
67 s->macroblocks = NULL;
68 }
69
vp8_alloc_frame(VP8Context * s,VP8Frame * f,int ref)70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 {
72 int ret;
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75 return ret;
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
77 goto fail;
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
83 goto fail;
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
85 }
86 }
87 return 0;
88
89 fail:
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
93 }
94
vp8_release_frame(VP8Context * s,VP8Frame * f)95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
96 {
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
101 }
102
103 #if CONFIG_VP8_DECODER
vp8_ref_frame(VP8Context * s,VP8Frame * dst,VP8Frame * src)104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
105 {
106 int ret;
107
108 vp8_release_frame(s, dst);
109
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
111 return ret;
112 if (src->seg_map &&
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
116 }
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
122 }
123
124 return 0;
125 }
126 #endif /* CONFIG_VP8_DECODER */
127
vp8_decode_flush_impl(AVCodecContext * avctx,int free_mem)128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
129 {
130 VP8Context *s = avctx->priv_data;
131 int i;
132
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
136
137 if (free_mem)
138 free_buffers(s);
139 }
140
vp8_decode_flush(AVCodecContext * avctx)141 static void vp8_decode_flush(AVCodecContext *avctx)
142 {
143 vp8_decode_flush_impl(avctx, 0);
144 }
145
vp8_find_free_buffer(VP8Context * s)146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
147 {
148 VP8Frame *frame = NULL;
149 int i;
150
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
158 break;
159 }
160 if (i == 5) {
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
162 abort();
163 }
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
166
167 return frame;
168 }
169
get_pixel_format(VP8Context * s)170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
171 {
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
174 AV_PIX_FMT_VAAPI,
175 #endif
176 #if CONFIG_VP8_NVDEC_HWACCEL
177 AV_PIX_FMT_CUDA,
178 #endif
179 AV_PIX_FMT_YUV420P,
180 AV_PIX_FMT_NONE,
181 };
182
183 return ff_get_format(s->avctx, pix_fmts);
184 }
185
186 static av_always_inline
update_dimensions(VP8Context * s,int width,int height,int is_vp7)187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
188 {
189 AVCodecContext *avctx = s->avctx;
190 int i, ret, dim_reset = 0;
191
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
195
196 ret = ff_set_dimensions(s->avctx, width, height);
197 if (ret < 0)
198 return ret;
199
200 dim_reset = (s->macroblocks_base != NULL);
201 }
202
203 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
204 !s->actually_webp && !is_vp7) {
205 s->pix_fmt = get_pixel_format(s);
206 if (s->pix_fmt < 0)
207 return AVERROR(EINVAL);
208 avctx->pix_fmt = s->pix_fmt;
209 }
210
211 s->mb_width = (s->avctx->coded_width + 15) / 16;
212 s->mb_height = (s->avctx->coded_height + 15) / 16;
213
214 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
215 avctx->thread_count > 1;
216 if (!s->mb_layout) { // Frame threading and one thread
217 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
218 sizeof(*s->macroblocks));
219 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
220 } else // Sliced threading
221 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
222 sizeof(*s->macroblocks));
223 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
224 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
225 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
226
227 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
228 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
229 free_buffers(s);
230 return AVERROR(ENOMEM);
231 }
232
233 for (i = 0; i < MAX_THREADS; i++) {
234 s->thread_data[i].filter_strength =
235 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
236 if (!s->thread_data[i].filter_strength) {
237 free_buffers(s);
238 return AVERROR(ENOMEM);
239 }
240 #if HAVE_THREADS
241 pthread_mutex_init(&s->thread_data[i].lock, NULL);
242 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 #endif
244 }
245
246 s->macroblocks = s->macroblocks_base + 1;
247
248 return 0;
249 }
250
vp7_update_dimensions(VP8Context * s,int width,int height)251 static int vp7_update_dimensions(VP8Context *s, int width, int height)
252 {
253 return update_dimensions(s, width, height, IS_VP7);
254 }
255
vp8_update_dimensions(VP8Context * s,int width,int height)256 static int vp8_update_dimensions(VP8Context *s, int width, int height)
257 {
258 return update_dimensions(s, width, height, IS_VP8);
259 }
260
261
parse_segment_info(VP8Context * s)262 static void parse_segment_info(VP8Context *s)
263 {
264 VP56RangeCoder *c = &s->c;
265 int i;
266
267 s->segmentation.update_map = vp8_rac_get(c);
268 s->segmentation.update_feature_data = vp8_rac_get(c);
269
270 if (s->segmentation.update_feature_data) {
271 s->segmentation.absolute_vals = vp8_rac_get(c);
272
273 for (i = 0; i < 4; i++)
274 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
275
276 for (i = 0; i < 4; i++)
277 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
278 }
279 if (s->segmentation.update_map)
280 for (i = 0; i < 3; i++)
281 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
282 }
283
update_lf_deltas(VP8Context * s)284 static void update_lf_deltas(VP8Context *s)
285 {
286 VP56RangeCoder *c = &s->c;
287 int i;
288
289 for (i = 0; i < 4; i++) {
290 if (vp8_rac_get(c)) {
291 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
292
293 if (vp8_rac_get(c))
294 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 }
296 }
297
298 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
299 if (vp8_rac_get(c)) {
300 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
301
302 if (vp8_rac_get(c))
303 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
304 }
305 }
306 }
307
setup_partitions(VP8Context * s,const uint8_t * buf,int buf_size)308 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
309 {
310 const uint8_t *sizes = buf;
311 int i;
312 int ret;
313
314 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
315
316 buf += 3 * (s->num_coeff_partitions - 1);
317 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 if (buf_size < 0)
319 return -1;
320
321 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
322 int size = AV_RL24(sizes + 3 * i);
323 if (buf_size - size < 0)
324 return -1;
325 s->coeff_partition_size[i] = size;
326
327 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
328 if (ret < 0)
329 return ret;
330 buf += size;
331 buf_size -= size;
332 }
333
334 s->coeff_partition_size[i] = buf_size;
335 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
336
337 return 0;
338 }
339
vp7_get_quants(VP8Context * s)340 static void vp7_get_quants(VP8Context *s)
341 {
342 VP56RangeCoder *c = &s->c;
343
344 int yac_qi = vp8_rac_get_uint(c, 7);
345 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350
351 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
352 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
353 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
354 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
355 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
356 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
357 }
358
vp8_get_quants(VP8Context * s)359 static void vp8_get_quants(VP8Context *s)
360 {
361 VP56RangeCoder *c = &s->c;
362 int i, base_qi;
363
364 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
365 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
367 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
368 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
369 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
370
371 for (i = 0; i < 4; i++) {
372 if (s->segmentation.enabled) {
373 base_qi = s->segmentation.base_quant[i];
374 if (!s->segmentation.absolute_vals)
375 base_qi += s->quant.yac_qi;
376 } else
377 base_qi = s->quant.yac_qi;
378
379 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
380 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
381 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
382 /* 101581>>16 is equivalent to 155/100 */
383 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
384 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
385 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
386
387 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
388 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
389 }
390 }
391
392 /**
393 * Determine which buffers golden and altref should be updated with after this frame.
394 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
395 *
396 * Intra frames update all 3 references
397 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
398 * If the update (golden|altref) flag is set, it's updated with the current frame
399 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
400 * If the flag is not set, the number read means:
401 * 0: no update
402 * 1: VP56_FRAME_PREVIOUS
403 * 2: update golden with altref, or update altref with golden
404 */
ref_to_update(VP8Context * s,int update,VP56Frame ref)405 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
406 {
407 VP56RangeCoder *c = &s->c;
408
409 if (update)
410 return VP56_FRAME_CURRENT;
411
412 switch (vp8_rac_get_uint(c, 2)) {
413 case 1:
414 return VP56_FRAME_PREVIOUS;
415 case 2:
416 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
417 }
418 return VP56_FRAME_NONE;
419 }
420
vp78_reset_probability_tables(VP8Context * s)421 static void vp78_reset_probability_tables(VP8Context *s)
422 {
423 int i, j;
424 for (i = 0; i < 4; i++)
425 for (j = 0; j < 16; j++)
426 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
427 sizeof(s->prob->token[i][j]));
428 }
429
vp78_update_probability_tables(VP8Context * s)430 static void vp78_update_probability_tables(VP8Context *s)
431 {
432 VP56RangeCoder *c = &s->c;
433 int i, j, k, l, m;
434
435 for (i = 0; i < 4; i++)
436 for (j = 0; j < 8; j++)
437 for (k = 0; k < 3; k++)
438 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
439 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
440 int prob = vp8_rac_get_uint(c, 8);
441 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
442 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 }
444 }
445
446 #define VP7_MVC_SIZE 17
447 #define VP8_MVC_SIZE 19
448
vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context * s,int mvc_size)449 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
450 int mvc_size)
451 {
452 VP56RangeCoder *c = &s->c;
453 int i, j;
454
455 if (vp8_rac_get(c))
456 for (i = 0; i < 4; i++)
457 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
458 if (vp8_rac_get(c))
459 for (i = 0; i < 3; i++)
460 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
461
462 // 17.2 MV probability update
463 for (i = 0; i < 2; i++)
464 for (j = 0; j < mvc_size; j++)
465 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
466 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
467 }
468
update_refs(VP8Context * s)469 static void update_refs(VP8Context *s)
470 {
471 VP56RangeCoder *c = &s->c;
472
473 int update_golden = vp8_rac_get(c);
474 int update_altref = vp8_rac_get(c);
475
476 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
477 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
478 }
479
copy_chroma(AVFrame * dst,AVFrame * src,int width,int height)480 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 {
482 int i, j;
483
484 for (j = 1; j < 3; j++) {
485 for (i = 0; i < height / 2; i++)
486 memcpy(dst->data[j] + i * dst->linesize[j],
487 src->data[j] + i * src->linesize[j], width / 2);
488 }
489 }
490
fade(uint8_t * dst,ptrdiff_t dst_linesize,const uint8_t * src,ptrdiff_t src_linesize,int width,int height,int alpha,int beta)491 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
492 const uint8_t *src, ptrdiff_t src_linesize,
493 int width, int height,
494 int alpha, int beta)
495 {
496 int i, j;
497 for (j = 0; j < height; j++) {
498 const uint8_t *src2 = src + j * src_linesize;
499 uint8_t *dst2 = dst + j * dst_linesize;
500 for (i = 0; i < width; i++) {
501 uint8_t y = src2[i];
502 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
503 }
504 }
505 }
506
vp7_fade_frame(VP8Context * s,int alpha,int beta)507 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
508 {
509 int ret;
510
511 if (!s->keyframe && (alpha || beta)) {
512 int width = s->mb_width * 16;
513 int height = s->mb_height * 16;
514 AVFrame *src, *dst;
515
516 if (!s->framep[VP56_FRAME_PREVIOUS] ||
517 !s->framep[VP56_FRAME_GOLDEN]) {
518 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
519 return AVERROR_INVALIDDATA;
520 }
521
522 dst =
523 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
524
525 /* preserve the golden frame, write a new previous frame */
526 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
527 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
528 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
529 return ret;
530
531 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
532
533 copy_chroma(dst, src, width, height);
534 }
535
536 fade(dst->data[0], dst->linesize[0],
537 src->data[0], src->linesize[0],
538 width, height, alpha, beta);
539 }
540
541 return 0;
542 }
543
vp7_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)544 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
545 {
546 VP56RangeCoder *c = &s->c;
547 int part1_size, hscale, vscale, i, j, ret;
548 int width = s->avctx->width;
549 int height = s->avctx->height;
550 int alpha = 0;
551 int beta = 0;
552
553 if (buf_size < 4) {
554 return AVERROR_INVALIDDATA;
555 }
556
557 s->profile = (buf[0] >> 1) & 7;
558 if (s->profile > 1) {
559 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
560 return AVERROR_INVALIDDATA;
561 }
562
563 s->keyframe = !(buf[0] & 1);
564 s->invisible = 0;
565 part1_size = AV_RL24(buf) >> 4;
566
567 if (buf_size < 4 - s->profile + part1_size) {
568 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
569 return AVERROR_INVALIDDATA;
570 }
571
572 buf += 4 - s->profile;
573 buf_size -= 4 - s->profile;
574
575 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
576
577 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
578 if (ret < 0)
579 return ret;
580 buf += part1_size;
581 buf_size -= part1_size;
582
583 /* A. Dimension information (keyframes only) */
584 if (s->keyframe) {
585 width = vp8_rac_get_uint(c, 12);
586 height = vp8_rac_get_uint(c, 12);
587 hscale = vp8_rac_get_uint(c, 2);
588 vscale = vp8_rac_get_uint(c, 2);
589 if (hscale || vscale)
590 avpriv_request_sample(s->avctx, "Upscaling");
591
592 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
593 vp78_reset_probability_tables(s);
594 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
595 sizeof(s->prob->pred16x16));
596 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
597 sizeof(s->prob->pred8x8c));
598 for (i = 0; i < 2; i++)
599 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
600 sizeof(vp7_mv_default_prob[i]));
601 memset(&s->segmentation, 0, sizeof(s->segmentation));
602 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
603 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
604 }
605
606 if (s->keyframe || s->profile > 0)
607 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
608
609 /* B. Decoding information for all four macroblock-level features */
610 for (i = 0; i < 4; i++) {
611 s->feature_enabled[i] = vp8_rac_get(c);
612 if (s->feature_enabled[i]) {
613 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
614
615 for (j = 0; j < 3; j++)
616 s->feature_index_prob[i][j] =
617 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
618
619 if (vp7_feature_value_size[s->profile][i])
620 for (j = 0; j < 4; j++)
621 s->feature_value[i][j] =
622 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
623 }
624 }
625
626 s->segmentation.enabled = 0;
627 s->segmentation.update_map = 0;
628 s->lf_delta.enabled = 0;
629
630 s->num_coeff_partitions = 1;
631 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
632 if (ret < 0)
633 return ret;
634
635 if (!s->macroblocks_base || /* first frame */
636 width != s->avctx->width || height != s->avctx->height ||
637 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
638 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
639 return ret;
640 }
641
642 /* C. Dequantization indices */
643 vp7_get_quants(s);
644
645 /* D. Golden frame update flag (a Flag) for interframes only */
646 if (!s->keyframe) {
647 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
648 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
649 }
650
651 s->update_last = 1;
652 s->update_probabilities = 1;
653 s->fade_present = 1;
654
655 if (s->profile > 0) {
656 s->update_probabilities = vp8_rac_get(c);
657 if (!s->update_probabilities)
658 s->prob[1] = s->prob[0];
659
660 if (!s->keyframe)
661 s->fade_present = vp8_rac_get(c);
662 }
663
664 if (vpX_rac_is_end(c))
665 return AVERROR_INVALIDDATA;
666 /* E. Fading information for previous frame */
667 if (s->fade_present && vp8_rac_get(c)) {
668 alpha = (int8_t) vp8_rac_get_uint(c, 8);
669 beta = (int8_t) vp8_rac_get_uint(c, 8);
670 }
671
672 /* F. Loop filter type */
673 if (!s->profile)
674 s->filter.simple = vp8_rac_get(c);
675
676 /* G. DCT coefficient ordering specification */
677 if (vp8_rac_get(c))
678 for (i = 1; i < 16; i++)
679 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
680
681 /* H. Loop filter levels */
682 if (s->profile > 0)
683 s->filter.simple = vp8_rac_get(c);
684 s->filter.level = vp8_rac_get_uint(c, 6);
685 s->filter.sharpness = vp8_rac_get_uint(c, 3);
686
687 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
688 vp78_update_probability_tables(s);
689
690 s->mbskip_enabled = 0;
691
692 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
693 if (!s->keyframe) {
694 s->prob->intra = vp8_rac_get_uint(c, 8);
695 s->prob->last = vp8_rac_get_uint(c, 8);
696 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
697 }
698
699 if (vpX_rac_is_end(c))
700 return AVERROR_INVALIDDATA;
701
702 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
703 return ret;
704
705 return 0;
706 }
707
vp8_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)708 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
709 {
710 VP56RangeCoder *c = &s->c;
711 int header_size, hscale, vscale, ret;
712 int width = s->avctx->width;
713 int height = s->avctx->height;
714
715 if (buf_size < 3) {
716 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
717 return AVERROR_INVALIDDATA;
718 }
719
720 s->keyframe = !(buf[0] & 1);
721 s->profile = (buf[0]>>1) & 7;
722 s->invisible = !(buf[0] & 0x10);
723 header_size = AV_RL24(buf) >> 5;
724 buf += 3;
725 buf_size -= 3;
726
727 s->header_partition_size = header_size;
728
729 if (s->profile > 3)
730 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
731
732 if (!s->profile)
733 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
734 sizeof(s->put_pixels_tab));
735 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
736 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
737 sizeof(s->put_pixels_tab));
738
739 if (header_size > buf_size - 7 * s->keyframe) {
740 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
741 return AVERROR_INVALIDDATA;
742 }
743
744 if (s->keyframe) {
745 if (AV_RL24(buf) != 0x2a019d) {
746 av_log(s->avctx, AV_LOG_ERROR,
747 "Invalid start code 0x%x\n", AV_RL24(buf));
748 return AVERROR_INVALIDDATA;
749 }
750 width = AV_RL16(buf + 3) & 0x3fff;
751 height = AV_RL16(buf + 5) & 0x3fff;
752 hscale = buf[4] >> 6;
753 vscale = buf[6] >> 6;
754 buf += 7;
755 buf_size -= 7;
756
757 if (hscale || vscale)
758 avpriv_request_sample(s->avctx, "Upscaling");
759
760 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
761 vp78_reset_probability_tables(s);
762 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
763 sizeof(s->prob->pred16x16));
764 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
765 sizeof(s->prob->pred8x8c));
766 memcpy(s->prob->mvc, vp8_mv_default_prob,
767 sizeof(s->prob->mvc));
768 memset(&s->segmentation, 0, sizeof(s->segmentation));
769 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
770 }
771
772 ret = ff_vp56_init_range_decoder(c, buf, header_size);
773 if (ret < 0)
774 return ret;
775 buf += header_size;
776 buf_size -= header_size;
777
778 if (s->keyframe) {
779 s->colorspace = vp8_rac_get(c);
780 if (s->colorspace)
781 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
782 s->fullrange = vp8_rac_get(c);
783 }
784
785 if ((s->segmentation.enabled = vp8_rac_get(c)))
786 parse_segment_info(s);
787 else
788 s->segmentation.update_map = 0; // FIXME: move this to some init function?
789
790 s->filter.simple = vp8_rac_get(c);
791 s->filter.level = vp8_rac_get_uint(c, 6);
792 s->filter.sharpness = vp8_rac_get_uint(c, 3);
793
794 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
795 s->lf_delta.update = vp8_rac_get(c);
796 if (s->lf_delta.update)
797 update_lf_deltas(s);
798 }
799
800 if (setup_partitions(s, buf, buf_size)) {
801 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
802 return AVERROR_INVALIDDATA;
803 }
804
805 if (!s->macroblocks_base || /* first frame */
806 width != s->avctx->width || height != s->avctx->height ||
807 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
808 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
809 return ret;
810
811 vp8_get_quants(s);
812
813 if (!s->keyframe) {
814 update_refs(s);
815 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
816 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
817 }
818
819 // if we aren't saving this frame's probabilities for future frames,
820 // make a copy of the current probabilities
821 if (!(s->update_probabilities = vp8_rac_get(c)))
822 s->prob[1] = s->prob[0];
823
824 s->update_last = s->keyframe || vp8_rac_get(c);
825
826 vp78_update_probability_tables(s);
827
828 if ((s->mbskip_enabled = vp8_rac_get(c)))
829 s->prob->mbskip = vp8_rac_get_uint(c, 8);
830
831 if (!s->keyframe) {
832 s->prob->intra = vp8_rac_get_uint(c, 8);
833 s->prob->last = vp8_rac_get_uint(c, 8);
834 s->prob->golden = vp8_rac_get_uint(c, 8);
835 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
836 }
837
838 // Record the entropy coder state here so that hwaccels can use it.
839 s->c.code_word = vp56_rac_renorm(&s->c);
840 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
841 s->coder_state_at_header_end.range = s->c.high;
842 s->coder_state_at_header_end.value = s->c.code_word >> 16;
843 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
844
845 return 0;
846 }
847
848 static av_always_inline
clamp_mv(VP8mvbounds * s,VP56mv * dst,const VP56mv * src)849 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
850 {
851 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
852 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
853 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
854 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
855 }
856
857 /**
858 * Motion vector coding, 17.1.
859 */
read_mv_component(VP56RangeCoder * c,const uint8_t * p,int vp7)860 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
861 {
862 int bit, x = 0;
863
864 if (vp56_rac_get_prob_branchy(c, p[0])) {
865 int i;
866
867 for (i = 0; i < 3; i++)
868 x += vp56_rac_get_prob(c, p[9 + i]) << i;
869 for (i = (vp7 ? 7 : 9); i > 3; i--)
870 x += vp56_rac_get_prob(c, p[9 + i]) << i;
871 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
872 x += 8;
873 } else {
874 // small_mvtree
875 const uint8_t *ps = p + 2;
876 bit = vp56_rac_get_prob(c, *ps);
877 ps += 1 + 3 * bit;
878 x += 4 * bit;
879 bit = vp56_rac_get_prob(c, *ps);
880 ps += 1 + bit;
881 x += 2 * bit;
882 x += vp56_rac_get_prob(c, *ps);
883 }
884
885 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
886 }
887
vp7_read_mv_component(VP56RangeCoder * c,const uint8_t * p)888 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
889 {
890 return read_mv_component(c, p, 1);
891 }
892
vp8_read_mv_component(VP56RangeCoder * c,const uint8_t * p)893 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
894 {
895 return read_mv_component(c, p, 0);
896 }
897
898 static av_always_inline
get_submv_prob(uint32_t left,uint32_t top,int is_vp7)899 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
900 {
901 if (is_vp7)
902 return vp7_submv_prob;
903
904 if (left == top)
905 return vp8_submv_prob[4 - !!left];
906 if (!top)
907 return vp8_submv_prob[2];
908 return vp8_submv_prob[1 - !!left];
909 }
910
911 /**
912 * Split motion vector prediction, 16.4.
913 * @returns the number of motion vectors parsed (2, 4 or 16)
914 */
915 static av_always_inline
decode_splitmvs(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int layout,int is_vp7)916 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
917 int layout, int is_vp7)
918 {
919 int part_idx;
920 int n, num;
921 VP8Macroblock *top_mb;
922 VP8Macroblock *left_mb = &mb[-1];
923 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
924 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
925 VP56mv *top_mv;
926 VP56mv *left_mv = left_mb->bmv;
927 VP56mv *cur_mv = mb->bmv;
928
929 if (!layout) // layout is inlined, s->mb_layout is not
930 top_mb = &mb[2];
931 else
932 top_mb = &mb[-s->mb_width - 1];
933 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
934 top_mv = top_mb->bmv;
935
936 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
937 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
938 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
939 else
940 part_idx = VP8_SPLITMVMODE_8x8;
941 } else {
942 part_idx = VP8_SPLITMVMODE_4x4;
943 }
944
945 num = vp8_mbsplit_count[part_idx];
946 mbsplits_cur = vp8_mbsplits[part_idx],
947 firstidx = vp8_mbfirstidx[part_idx];
948 mb->partitioning = part_idx;
949
950 for (n = 0; n < num; n++) {
951 int k = firstidx[n];
952 uint32_t left, above;
953 const uint8_t *submv_prob;
954
955 if (!(k & 3))
956 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
957 else
958 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
959 if (k <= 3)
960 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
961 else
962 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
963
964 submv_prob = get_submv_prob(left, above, is_vp7);
965
966 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
967 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
968 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
969 mb->bmv[n].y = mb->mv.y +
970 read_mv_component(c, s->prob->mvc[0], is_vp7);
971 mb->bmv[n].x = mb->mv.x +
972 read_mv_component(c, s->prob->mvc[1], is_vp7);
973 } else {
974 AV_ZERO32(&mb->bmv[n]);
975 }
976 } else {
977 AV_WN32A(&mb->bmv[n], above);
978 }
979 } else {
980 AV_WN32A(&mb->bmv[n], left);
981 }
982 }
983
984 return num;
985 }
986
987 /**
988 * The vp7 reference decoder uses a padding macroblock column (added to right
989 * edge of the frame) to guard against illegal macroblock offsets. The
990 * algorithm has bugs that permit offsets to straddle the padding column.
991 * This function replicates those bugs.
992 *
993 * @param[out] edge_x macroblock x address
994 * @param[out] edge_y macroblock y address
995 *
996 * @return macroblock offset legal (boolean)
997 */
vp7_calculate_mb_offset(int mb_x,int mb_y,int mb_width,int xoffset,int yoffset,int boundary,int * edge_x,int * edge_y)998 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
999 int xoffset, int yoffset, int boundary,
1000 int *edge_x, int *edge_y)
1001 {
1002 int vwidth = mb_width + 1;
1003 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1004 if (new < boundary || new % vwidth == vwidth - 1)
1005 return 0;
1006 *edge_y = new / vwidth;
1007 *edge_x = new % vwidth;
1008 return 1;
1009 }
1010
get_bmv_ptr(const VP8Macroblock * mb,int subblock)1011 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1012 {
1013 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1014 }
1015
1016 static av_always_inline
vp7_decode_mvs(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1017 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1018 int mb_x, int mb_y, int layout)
1019 {
1020 VP8Macroblock *mb_edge[12];
1021 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1022 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1023 int idx = CNT_ZERO;
1024 VP56mv near_mv[3];
1025 uint8_t cnt[3] = { 0 };
1026 VP56RangeCoder *c = &s->c;
1027 int i;
1028
1029 AV_ZERO32(&near_mv[0]);
1030 AV_ZERO32(&near_mv[1]);
1031 AV_ZERO32(&near_mv[2]);
1032
1033 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1034 const VP7MVPred * pred = &vp7_mv_pred[i];
1035 int edge_x, edge_y;
1036
1037 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1038 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1039 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1040 ? s->macroblocks_base + 1 + edge_x +
1041 (s->mb_width + 1) * (edge_y + 1)
1042 : s->macroblocks + edge_x +
1043 (s->mb_height - edge_y - 1) * 2;
1044 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1045 if (mv) {
1046 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1047 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1048 idx = CNT_NEAREST;
1049 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1050 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1051 continue;
1052 idx = CNT_NEAR;
1053 } else {
1054 AV_WN32A(&near_mv[CNT_NEAR], mv);
1055 idx = CNT_NEAR;
1056 }
1057 } else {
1058 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1059 idx = CNT_NEAREST;
1060 }
1061 } else {
1062 idx = CNT_ZERO;
1063 }
1064 } else {
1065 idx = CNT_ZERO;
1066 }
1067 cnt[idx] += vp7_mv_pred[i].score;
1068 }
1069
1070 mb->partitioning = VP8_SPLITMVMODE_NONE;
1071
1072 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1073 mb->mode = VP8_MVMODE_MV;
1074
1075 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1076
1077 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1078
1079 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1080 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1081 else
1082 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1083
1084 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1085 mb->mode = VP8_MVMODE_SPLIT;
1086 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1087 } else {
1088 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1089 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1090 mb->bmv[0] = mb->mv;
1091 }
1092 } else {
1093 mb->mv = near_mv[CNT_NEAR];
1094 mb->bmv[0] = mb->mv;
1095 }
1096 } else {
1097 mb->mv = near_mv[CNT_NEAREST];
1098 mb->bmv[0] = mb->mv;
1099 }
1100 } else {
1101 mb->mode = VP8_MVMODE_ZERO;
1102 AV_ZERO32(&mb->mv);
1103 mb->bmv[0] = mb->mv;
1104 }
1105 }
1106
1107 static av_always_inline
vp8_decode_mvs(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1108 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1109 int mb_x, int mb_y, int layout)
1110 {
1111 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1112 mb - 1 /* left */,
1113 0 /* top-left */ };
1114 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1115 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1116 int idx = CNT_ZERO;
1117 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1118 int8_t *sign_bias = s->sign_bias;
1119 VP56mv near_mv[4];
1120 uint8_t cnt[4] = { 0 };
1121 VP56RangeCoder *c = &s->c;
1122
1123 if (!layout) { // layout is inlined (s->mb_layout is not)
1124 mb_edge[0] = mb + 2;
1125 mb_edge[2] = mb + 1;
1126 } else {
1127 mb_edge[0] = mb - s->mb_width - 1;
1128 mb_edge[2] = mb - s->mb_width - 2;
1129 }
1130
1131 AV_ZERO32(&near_mv[0]);
1132 AV_ZERO32(&near_mv[1]);
1133 AV_ZERO32(&near_mv[2]);
1134
1135 /* Process MB on top, left and top-left */
1136 #define MV_EDGE_CHECK(n) \
1137 { \
1138 VP8Macroblock *edge = mb_edge[n]; \
1139 int edge_ref = edge->ref_frame; \
1140 if (edge_ref != VP56_FRAME_CURRENT) { \
1141 uint32_t mv = AV_RN32A(&edge->mv); \
1142 if (mv) { \
1143 if (cur_sign_bias != sign_bias[edge_ref]) { \
1144 /* SWAR negate of the values in mv. */ \
1145 mv = ~mv; \
1146 mv = ((mv & 0x7fff7fff) + \
1147 0x00010001) ^ (mv & 0x80008000); \
1148 } \
1149 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1150 AV_WN32A(&near_mv[++idx], mv); \
1151 cnt[idx] += 1 + (n != 2); \
1152 } else \
1153 cnt[CNT_ZERO] += 1 + (n != 2); \
1154 } \
1155 }
1156
1157 MV_EDGE_CHECK(0)
1158 MV_EDGE_CHECK(1)
1159 MV_EDGE_CHECK(2)
1160
1161 mb->partitioning = VP8_SPLITMVMODE_NONE;
1162 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1163 mb->mode = VP8_MVMODE_MV;
1164
1165 /* If we have three distinct MVs, merge first and last if they're the same */
1166 if (cnt[CNT_SPLITMV] &&
1167 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1168 cnt[CNT_NEAREST] += 1;
1169
1170 /* Swap near and nearest if necessary */
1171 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1172 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1173 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1174 }
1175
1176 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1177 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1178 /* Choose the best mv out of 0,0 and the nearest mv */
1179 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1180 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1181 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1182 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1183
1184 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1185 mb->mode = VP8_MVMODE_SPLIT;
1186 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1187 } else {
1188 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1189 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1190 mb->bmv[0] = mb->mv;
1191 }
1192 } else {
1193 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1194 mb->bmv[0] = mb->mv;
1195 }
1196 } else {
1197 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1198 mb->bmv[0] = mb->mv;
1199 }
1200 } else {
1201 mb->mode = VP8_MVMODE_ZERO;
1202 AV_ZERO32(&mb->mv);
1203 mb->bmv[0] = mb->mv;
1204 }
1205 }
1206
1207 static av_always_inline
decode_intra4x4_modes(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int mb_x,int keyframe,int layout)1208 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1209 int mb_x, int keyframe, int layout)
1210 {
1211 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1212
1213 if (layout) {
1214 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1215 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1216 }
1217 if (keyframe) {
1218 int x, y;
1219 uint8_t *top;
1220 uint8_t *const left = s->intra4x4_pred_mode_left;
1221 if (layout)
1222 top = mb->intra4x4_pred_mode_top;
1223 else
1224 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1225 for (y = 0; y < 4; y++) {
1226 for (x = 0; x < 4; x++) {
1227 const uint8_t *ctx;
1228 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1229 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1230 left[y] = top[x] = *intra4x4;
1231 intra4x4++;
1232 }
1233 }
1234 } else {
1235 int i;
1236 for (i = 0; i < 16; i++)
1237 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1238 vp8_pred4x4_prob_inter);
1239 }
1240 }
1241
1242 static av_always_inline
decode_mb_mode(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,uint8_t * segment,uint8_t * ref,int layout,int is_vp7)1243 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1244 VP8Macroblock *mb, int mb_x, int mb_y,
1245 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1246 {
1247 VP56RangeCoder *c = &s->c;
1248 static const char * const vp7_feature_name[] = { "q-index",
1249 "lf-delta",
1250 "partial-golden-update",
1251 "blit-pitch" };
1252 if (is_vp7) {
1253 int i;
1254 *segment = 0;
1255 for (i = 0; i < 4; i++) {
1256 if (s->feature_enabled[i]) {
1257 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1258 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1259 s->feature_index_prob[i]);
1260 av_log(s->avctx, AV_LOG_WARNING,
1261 "Feature %s present in macroblock (value 0x%x)\n",
1262 vp7_feature_name[i], s->feature_value[i][index]);
1263 }
1264 }
1265 }
1266 } else if (s->segmentation.update_map) {
1267 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1268 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1269 } else if (s->segmentation.enabled)
1270 *segment = ref ? *ref : *segment;
1271 mb->segment = *segment;
1272
1273 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1274
1275 if (s->keyframe) {
1276 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1277 vp8_pred16x16_prob_intra);
1278
1279 if (mb->mode == MODE_I4x4) {
1280 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1281 } else {
1282 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1283 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1284 if (s->mb_layout)
1285 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1286 else
1287 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1288 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1289 }
1290
1291 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1292 vp8_pred8x8c_prob_intra);
1293 mb->ref_frame = VP56_FRAME_CURRENT;
1294 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1295 // inter MB, 16.2
1296 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1297 mb->ref_frame =
1298 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1299 : VP56_FRAME_GOLDEN;
1300 else
1301 mb->ref_frame = VP56_FRAME_PREVIOUS;
1302 s->ref_count[mb->ref_frame - 1]++;
1303
1304 // motion vectors, 16.3
1305 if (is_vp7)
1306 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1307 else
1308 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1309 } else {
1310 // intra MB, 16.1
1311 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1312
1313 if (mb->mode == MODE_I4x4)
1314 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1315
1316 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1317 s->prob->pred8x8c);
1318 mb->ref_frame = VP56_FRAME_CURRENT;
1319 mb->partitioning = VP8_SPLITMVMODE_NONE;
1320 AV_ZERO32(&mb->bmv[0]);
1321 }
1322 }
1323
1324 /**
1325 * @param r arithmetic bitstream reader context
1326 * @param block destination for block coefficients
1327 * @param probs probabilities to use when reading trees from the bitstream
1328 * @param i initial coeff index, 0 unless a separate DC block is coded
1329 * @param qmul array holding the dc/ac dequant factor at position 0/1
1330 *
1331 * @return 0 if no coeffs were decoded
1332 * otherwise, the index of the last coeff decoded plus one
1333 */
1334 static av_always_inline
decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16],int vp7)1335 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1336 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1337 int i, uint8_t *token_prob, int16_t qmul[2],
1338 const uint8_t scan[16], int vp7)
1339 {
1340 VP56RangeCoder c = *r;
1341 goto skip_eob;
1342 do {
1343 int coeff;
1344 restart:
1345 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1346 break;
1347
1348 skip_eob:
1349 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1350 if (++i == 16)
1351 break; // invalid input; blocks should end with EOB
1352 token_prob = probs[i][0];
1353 if (vp7)
1354 goto restart;
1355 goto skip_eob;
1356 }
1357
1358 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1359 coeff = 1;
1360 token_prob = probs[i + 1][1];
1361 } else {
1362 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1363 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1364 if (coeff)
1365 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1366 coeff += 2;
1367 } else {
1368 // DCT_CAT*
1369 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1370 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1371 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1372 } else { // DCT_CAT2
1373 coeff = 7;
1374 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1375 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1376 }
1377 } else { // DCT_CAT3 and up
1378 int a = vp56_rac_get_prob(&c, token_prob[8]);
1379 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1380 int cat = (a << 1) + b;
1381 coeff = 3 + (8 << cat);
1382 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1383 }
1384 }
1385 token_prob = probs[i + 1][2];
1386 }
1387 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1388 } while (++i < 16);
1389
1390 *r = c;
1391 return i;
1392 }
1393
1394 static av_always_inline
inter_predict_dc(int16_t block[16],int16_t pred[2])1395 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1396 {
1397 int16_t dc = block[0];
1398 int ret = 0;
1399
1400 if (pred[1] > 3) {
1401 dc += pred[0];
1402 ret = 1;
1403 }
1404
1405 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1406 block[0] = pred[0] = dc;
1407 pred[1] = 0;
1408 } else {
1409 if (pred[0] == dc)
1410 pred[1]++;
1411 block[0] = pred[0] = dc;
1412 }
1413
1414 return ret;
1415 }
1416
vp7_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16])1417 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1418 int16_t block[16],
1419 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1420 int i, uint8_t *token_prob,
1421 int16_t qmul[2],
1422 const uint8_t scan[16])
1423 {
1424 return decode_block_coeffs_internal(r, block, probs, i,
1425 token_prob, qmul, scan, IS_VP7);
1426 }
1427
1428 #ifndef vp8_decode_block_coeffs_internal
vp8_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2])1429 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1430 int16_t block[16],
1431 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1432 int i, uint8_t *token_prob,
1433 int16_t qmul[2])
1434 {
1435 return decode_block_coeffs_internal(r, block, probs, i,
1436 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1437 }
1438 #endif
1439
1440 /**
1441 * @param c arithmetic bitstream reader context
1442 * @param block destination for block coefficients
1443 * @param probs probabilities to use when reading trees from the bitstream
1444 * @param i initial coeff index, 0 unless a separate DC block is coded
1445 * @param zero_nhood the initial prediction context for number of surrounding
1446 * all-zero blocks (only left/top, so 0-2)
1447 * @param qmul array holding the dc/ac dequant factor at position 0/1
1448 * @param scan scan pattern (VP7 only)
1449 *
1450 * @return 0 if no coeffs were decoded
1451 * otherwise, the index of the last coeff decoded plus one
1452 */
1453 static av_always_inline
decode_block_coeffs(VP56RangeCoder * c,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,int zero_nhood,int16_t qmul[2],const uint8_t scan[16],int vp7)1454 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1455 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1456 int i, int zero_nhood, int16_t qmul[2],
1457 const uint8_t scan[16], int vp7)
1458 {
1459 uint8_t *token_prob = probs[i][zero_nhood];
1460 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1461 return 0;
1462 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1463 token_prob, qmul, scan)
1464 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1465 token_prob, qmul);
1466 }
1467
1468 static av_always_inline
decode_mb_coeffs(VP8Context * s,VP8ThreadData * td,VP56RangeCoder * c,VP8Macroblock * mb,uint8_t t_nnz[9],uint8_t l_nnz[9],int is_vp7)1469 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1470 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1471 int is_vp7)
1472 {
1473 int i, x, y, luma_start = 0, luma_ctx = 3;
1474 int nnz_pred, nnz, nnz_total = 0;
1475 int segment = mb->segment;
1476 int block_dc = 0;
1477
1478 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1479 nnz_pred = t_nnz[8] + l_nnz[8];
1480
1481 // decode DC values and do hadamard
1482 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1483 nnz_pred, s->qmat[segment].luma_dc_qmul,
1484 ff_zigzag_scan, is_vp7);
1485 l_nnz[8] = t_nnz[8] = !!nnz;
1486
1487 if (is_vp7 && mb->mode > MODE_I4x4) {
1488 nnz |= inter_predict_dc(td->block_dc,
1489 s->inter_dc_pred[mb->ref_frame - 1]);
1490 }
1491
1492 if (nnz) {
1493 nnz_total += nnz;
1494 block_dc = 1;
1495 if (nnz == 1)
1496 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1497 else
1498 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1499 }
1500 luma_start = 1;
1501 luma_ctx = 0;
1502 }
1503
1504 // luma blocks
1505 for (y = 0; y < 4; y++)
1506 for (x = 0; x < 4; x++) {
1507 nnz_pred = l_nnz[y] + t_nnz[x];
1508 nnz = decode_block_coeffs(c, td->block[y][x],
1509 s->prob->token[luma_ctx],
1510 luma_start, nnz_pred,
1511 s->qmat[segment].luma_qmul,
1512 s->prob[0].scan, is_vp7);
1513 /* nnz+block_dc may be one more than the actual last index,
1514 * but we don't care */
1515 td->non_zero_count_cache[y][x] = nnz + block_dc;
1516 t_nnz[x] = l_nnz[y] = !!nnz;
1517 nnz_total += nnz;
1518 }
1519
1520 // chroma blocks
1521 // TODO: what to do about dimensions? 2nd dim for luma is x,
1522 // but for chroma it's (y<<1)|x
1523 for (i = 4; i < 6; i++)
1524 for (y = 0; y < 2; y++)
1525 for (x = 0; x < 2; x++) {
1526 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1527 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1528 s->prob->token[2], 0, nnz_pred,
1529 s->qmat[segment].chroma_qmul,
1530 s->prob[0].scan, is_vp7);
1531 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1532 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1533 nnz_total += nnz;
1534 }
1535
1536 // if there were no coded coeffs despite the macroblock not being marked skip,
1537 // we MUST not do the inner loop filter and should not do IDCT
1538 // Since skip isn't used for bitstream prediction, just manually set it.
1539 if (!nnz_total)
1540 mb->skip = 1;
1541 }
1542
1543 static av_always_inline
backup_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int simple)1544 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1545 uint8_t *src_cb, uint8_t *src_cr,
1546 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1547 {
1548 AV_COPY128(top_border, src_y + 15 * linesize);
1549 if (!simple) {
1550 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1551 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1552 }
1553 }
1554
1555 static av_always_inline
xchg_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int mb_x,int mb_y,int mb_width,int simple,int xchg)1556 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1557 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1558 int mb_y, int mb_width, int simple, int xchg)
1559 {
1560 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1561 src_y -= linesize;
1562 src_cb -= uvlinesize;
1563 src_cr -= uvlinesize;
1564
1565 #define XCHG(a, b, xchg) \
1566 do { \
1567 if (xchg) \
1568 AV_SWAP64(b, a); \
1569 else \
1570 AV_COPY64(b, a); \
1571 } while (0)
1572
1573 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1574 XCHG(top_border, src_y, xchg);
1575 XCHG(top_border + 8, src_y + 8, 1);
1576 if (mb_x < mb_width - 1)
1577 XCHG(top_border + 32, src_y + 16, 1);
1578
1579 // only copy chroma for normal loop filter
1580 // or to initialize the top row to 127
1581 if (!simple || !mb_y) {
1582 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1583 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1584 XCHG(top_border + 16, src_cb, 1);
1585 XCHG(top_border + 24, src_cr, 1);
1586 }
1587 }
1588
1589 static av_always_inline
check_dc_pred8x8_mode(int mode,int mb_x,int mb_y)1590 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1591 {
1592 if (!mb_x)
1593 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1594 else
1595 return mb_y ? mode : LEFT_DC_PRED8x8;
1596 }
1597
1598 static av_always_inline
check_tm_pred8x8_mode(int mode,int mb_x,int mb_y,int vp7)1599 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1600 {
1601 if (!mb_x)
1602 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1603 else
1604 return mb_y ? mode : HOR_PRED8x8;
1605 }
1606
1607 static av_always_inline
check_intra_pred8x8_mode_emuedge(int mode,int mb_x,int mb_y,int vp7)1608 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1609 {
1610 switch (mode) {
1611 case DC_PRED8x8:
1612 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1613 case VERT_PRED8x8:
1614 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1615 case HOR_PRED8x8:
1616 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1617 case PLANE_PRED8x8: /* TM */
1618 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1619 }
1620 return mode;
1621 }
1622
1623 static av_always_inline
check_tm_pred4x4_mode(int mode,int mb_x,int mb_y,int vp7)1624 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1625 {
1626 if (!mb_x) {
1627 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1628 } else {
1629 return mb_y ? mode : HOR_VP8_PRED;
1630 }
1631 }
1632
1633 static av_always_inline
check_intra_pred4x4_mode_emuedge(int mode,int mb_x,int mb_y,int * copy_buf,int vp7)1634 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1635 int *copy_buf, int vp7)
1636 {
1637 switch (mode) {
1638 case VERT_PRED:
1639 if (!mb_x && mb_y) {
1640 *copy_buf = 1;
1641 return mode;
1642 }
1643 /* fall-through */
1644 case DIAG_DOWN_LEFT_PRED:
1645 case VERT_LEFT_PRED:
1646 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1647 case HOR_PRED:
1648 if (!mb_y) {
1649 *copy_buf = 1;
1650 return mode;
1651 }
1652 /* fall-through */
1653 case HOR_UP_PRED:
1654 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1655 case TM_VP8_PRED:
1656 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1657 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1658 * as 16x16/8x8 DC */
1659 case DIAG_DOWN_RIGHT_PRED:
1660 case VERT_RIGHT_PRED:
1661 case HOR_DOWN_PRED:
1662 if (!mb_y || !mb_x)
1663 *copy_buf = 1;
1664 return mode;
1665 }
1666 return mode;
1667 }
1668
1669 static av_always_inline
intra_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y,int is_vp7)1670 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1671 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1672 {
1673 int x, y, mode, nnz;
1674 uint32_t tr;
1675
1676 /* for the first row, we need to run xchg_mb_border to init the top edge
1677 * to 127 otherwise, skip it if we aren't going to deblock */
1678 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1679 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1680 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1681 s->filter.simple, 1);
1682
1683 if (mb->mode < MODE_I4x4) {
1684 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1685 s->hpc.pred16x16[mode](dst[0], s->linesize);
1686 } else {
1687 uint8_t *ptr = dst[0];
1688 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1689 const uint8_t lo = is_vp7 ? 128 : 127;
1690 const uint8_t hi = is_vp7 ? 128 : 129;
1691 uint8_t tr_top[4] = { lo, lo, lo, lo };
1692
1693 // all blocks on the right edge of the macroblock use bottom edge
1694 // the top macroblock for their topright edge
1695 uint8_t *tr_right = ptr - s->linesize + 16;
1696
1697 // if we're on the right edge of the frame, said edge is extended
1698 // from the top macroblock
1699 if (mb_y && mb_x == s->mb_width - 1) {
1700 tr = tr_right[-1] * 0x01010101u;
1701 tr_right = (uint8_t *) &tr;
1702 }
1703
1704 if (mb->skip)
1705 AV_ZERO128(td->non_zero_count_cache);
1706
1707 for (y = 0; y < 4; y++) {
1708 uint8_t *topright = ptr + 4 - s->linesize;
1709 for (x = 0; x < 4; x++) {
1710 int copy = 0;
1711 ptrdiff_t linesize = s->linesize;
1712 uint8_t *dst = ptr + 4 * x;
1713 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1714
1715 if ((y == 0 || x == 3) && mb_y == 0) {
1716 topright = tr_top;
1717 } else if (x == 3)
1718 topright = tr_right;
1719
1720 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1721 mb_y + y, ©, is_vp7);
1722 if (copy) {
1723 dst = copy_dst + 12;
1724 linesize = 8;
1725 if (!(mb_y + y)) {
1726 copy_dst[3] = lo;
1727 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1728 } else {
1729 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1730 if (!(mb_x + x)) {
1731 copy_dst[3] = hi;
1732 } else {
1733 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1734 }
1735 }
1736 if (!(mb_x + x)) {
1737 copy_dst[11] =
1738 copy_dst[19] =
1739 copy_dst[27] =
1740 copy_dst[35] = hi;
1741 } else {
1742 copy_dst[11] = ptr[4 * x - 1];
1743 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1744 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1745 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1746 }
1747 }
1748 s->hpc.pred4x4[mode](dst, topright, linesize);
1749 if (copy) {
1750 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1751 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1752 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1753 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1754 }
1755
1756 nnz = td->non_zero_count_cache[y][x];
1757 if (nnz) {
1758 if (nnz == 1)
1759 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1760 td->block[y][x], s->linesize);
1761 else
1762 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1763 td->block[y][x], s->linesize);
1764 }
1765 topright += 4;
1766 }
1767
1768 ptr += 4 * s->linesize;
1769 intra4x4 += 4;
1770 }
1771 }
1772
1773 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1774 mb_x, mb_y, is_vp7);
1775 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1776 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1777
1778 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1779 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1780 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1781 s->filter.simple, 0);
1782 }
1783
1784 static const uint8_t subpel_idx[3][8] = {
1785 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1786 // also function pointer index
1787 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1788 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1789 };
1790
1791 /**
1792 * luma MC function
1793 *
1794 * @param s VP8 decoding context
1795 * @param dst target buffer for block data at block position
1796 * @param ref reference picture buffer at origin (0, 0)
1797 * @param mv motion vector (relative to block position) to get pixel data from
1798 * @param x_off horizontal position of block from origin (0, 0)
1799 * @param y_off vertical position of block from origin (0, 0)
1800 * @param block_w width of block (16, 8 or 4)
1801 * @param block_h height of block (always same as block_w)
1802 * @param width width of src/dst plane data
1803 * @param height height of src/dst plane data
1804 * @param linesize size of a single line of plane data, including padding
1805 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1806 */
1807 static av_always_inline
vp8_mc_luma(VP8Context * s,VP8ThreadData * td,uint8_t * dst,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1808 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1809 ThreadFrame *ref, const VP56mv *mv,
1810 int x_off, int y_off, int block_w, int block_h,
1811 int width, int height, ptrdiff_t linesize,
1812 vp8_mc_func mc_func[3][3])
1813 {
1814 uint8_t *src = ref->f->data[0];
1815
1816 if (AV_RN32A(mv)) {
1817 ptrdiff_t src_linesize = linesize;
1818
1819 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1820 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1821
1822 x_off += mv->x >> 2;
1823 y_off += mv->y >> 2;
1824
1825 // edge emulation
1826 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1827 src += y_off * linesize + x_off;
1828 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1829 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1830 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1831 src - my_idx * linesize - mx_idx,
1832 EDGE_EMU_LINESIZE, linesize,
1833 block_w + subpel_idx[1][mx],
1834 block_h + subpel_idx[1][my],
1835 x_off - mx_idx, y_off - my_idx,
1836 width, height);
1837 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1838 src_linesize = EDGE_EMU_LINESIZE;
1839 }
1840 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1841 } else {
1842 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1843 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1844 linesize, block_h, 0, 0);
1845 }
1846 }
1847
1848 /**
1849 * chroma MC function
1850 *
1851 * @param s VP8 decoding context
1852 * @param dst1 target buffer for block data at block position (U plane)
1853 * @param dst2 target buffer for block data at block position (V plane)
1854 * @param ref reference picture buffer at origin (0, 0)
1855 * @param mv motion vector (relative to block position) to get pixel data from
1856 * @param x_off horizontal position of block from origin (0, 0)
1857 * @param y_off vertical position of block from origin (0, 0)
1858 * @param block_w width of block (16, 8 or 4)
1859 * @param block_h height of block (always same as block_w)
1860 * @param width width of src/dst plane data
1861 * @param height height of src/dst plane data
1862 * @param linesize size of a single line of plane data, including padding
1863 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1864 */
1865 static av_always_inline
vp8_mc_chroma(VP8Context * s,VP8ThreadData * td,uint8_t * dst1,uint8_t * dst2,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1866 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1867 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1868 int x_off, int y_off, int block_w, int block_h,
1869 int width, int height, ptrdiff_t linesize,
1870 vp8_mc_func mc_func[3][3])
1871 {
1872 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1873
1874 if (AV_RN32A(mv)) {
1875 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1876 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1877
1878 x_off += mv->x >> 3;
1879 y_off += mv->y >> 3;
1880
1881 // edge emulation
1882 src1 += y_off * linesize + x_off;
1883 src2 += y_off * linesize + x_off;
1884 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1885 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1886 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1887 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1888 src1 - my_idx * linesize - mx_idx,
1889 EDGE_EMU_LINESIZE, linesize,
1890 block_w + subpel_idx[1][mx],
1891 block_h + subpel_idx[1][my],
1892 x_off - mx_idx, y_off - my_idx, width, height);
1893 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1894 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1895
1896 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1897 src2 - my_idx * linesize - mx_idx,
1898 EDGE_EMU_LINESIZE, linesize,
1899 block_w + subpel_idx[1][mx],
1900 block_h + subpel_idx[1][my],
1901 x_off - mx_idx, y_off - my_idx, width, height);
1902 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1903 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1904 } else {
1905 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1906 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1907 }
1908 } else {
1909 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1910 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1911 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912 }
1913 }
1914
1915 static av_always_inline
vp8_mc_part(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],ThreadFrame * ref_frame,int x_off,int y_off,int bx_off,int by_off,int block_w,int block_h,int width,int height,VP56mv * mv)1916 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1917 ThreadFrame *ref_frame, int x_off, int y_off,
1918 int bx_off, int by_off, int block_w, int block_h,
1919 int width, int height, VP56mv *mv)
1920 {
1921 VP56mv uvmv = *mv;
1922
1923 /* Y */
1924 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1925 ref_frame, mv, x_off + bx_off, y_off + by_off,
1926 block_w, block_h, width, height, s->linesize,
1927 s->put_pixels_tab[block_w == 8]);
1928
1929 /* U/V */
1930 if (s->profile == 3) {
1931 /* this block only applies VP8; it is safe to check
1932 * only the profile, as VP7 profile <= 1 */
1933 uvmv.x &= ~7;
1934 uvmv.y &= ~7;
1935 }
1936 x_off >>= 1;
1937 y_off >>= 1;
1938 bx_off >>= 1;
1939 by_off >>= 1;
1940 width >>= 1;
1941 height >>= 1;
1942 block_w >>= 1;
1943 block_h >>= 1;
1944 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1945 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1946 &uvmv, x_off + bx_off, y_off + by_off,
1947 block_w, block_h, width, height, s->uvlinesize,
1948 s->put_pixels_tab[1 + (block_w == 4)]);
1949 }
1950
1951 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1952 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1953 static av_always_inline
prefetch_motion(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int mb_xy,int ref)1954 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1955 int mb_xy, int ref)
1956 {
1957 /* Don't prefetch refs that haven't been used very often this frame. */
1958 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1959 int x_off = mb_x << 4, y_off = mb_y << 4;
1960 int mx = (mb->mv.x >> 2) + x_off + 8;
1961 int my = (mb->mv.y >> 2) + y_off;
1962 uint8_t **src = s->framep[ref]->tf.f->data;
1963 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1964 /* For threading, a ff_thread_await_progress here might be useful, but
1965 * it actually slows down the decoder. Since a bad prefetch doesn't
1966 * generate bad decoder output, we don't run it here. */
1967 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1968 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1969 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1970 }
1971 }
1972
1973 /**
1974 * Apply motion vectors to prediction buffer, chapter 18.
1975 */
1976 static av_always_inline
inter_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y)1977 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1978 VP8Macroblock *mb, int mb_x, int mb_y)
1979 {
1980 int x_off = mb_x << 4, y_off = mb_y << 4;
1981 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1982 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1983 VP56mv *bmv = mb->bmv;
1984
1985 switch (mb->partitioning) {
1986 case VP8_SPLITMVMODE_NONE:
1987 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1988 0, 0, 16, 16, width, height, &mb->mv);
1989 break;
1990 case VP8_SPLITMVMODE_4x4: {
1991 int x, y;
1992 VP56mv uvmv;
1993
1994 /* Y */
1995 for (y = 0; y < 4; y++) {
1996 for (x = 0; x < 4; x++) {
1997 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1998 ref, &bmv[4 * y + x],
1999 4 * x + x_off, 4 * y + y_off, 4, 4,
2000 width, height, s->linesize,
2001 s->put_pixels_tab[2]);
2002 }
2003 }
2004
2005 /* U/V */
2006 x_off >>= 1;
2007 y_off >>= 1;
2008 width >>= 1;
2009 height >>= 1;
2010 for (y = 0; y < 2; y++) {
2011 for (x = 0; x < 2; x++) {
2012 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2013 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2014 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2016 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2017 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2018 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2020 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2021 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2022 if (s->profile == 3) {
2023 uvmv.x &= ~7;
2024 uvmv.y &= ~7;
2025 }
2026 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2027 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2028 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2029 width, height, s->uvlinesize,
2030 s->put_pixels_tab[2]);
2031 }
2032 }
2033 break;
2034 }
2035 case VP8_SPLITMVMODE_16x8:
2036 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2037 0, 0, 16, 8, width, height, &bmv[0]);
2038 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2039 0, 8, 16, 8, width, height, &bmv[1]);
2040 break;
2041 case VP8_SPLITMVMODE_8x16:
2042 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2043 0, 0, 8, 16, width, height, &bmv[0]);
2044 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2045 8, 0, 8, 16, width, height, &bmv[1]);
2046 break;
2047 case VP8_SPLITMVMODE_8x8:
2048 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2049 0, 0, 8, 8, width, height, &bmv[0]);
2050 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2051 8, 0, 8, 8, width, height, &bmv[1]);
2052 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2053 0, 8, 8, 8, width, height, &bmv[2]);
2054 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2055 8, 8, 8, 8, width, height, &bmv[3]);
2056 break;
2057 }
2058 }
2059
2060 static av_always_inline
idct_mb(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb)2061 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2062 {
2063 int x, y, ch;
2064
2065 if (mb->mode != MODE_I4x4) {
2066 uint8_t *y_dst = dst[0];
2067 for (y = 0; y < 4; y++) {
2068 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2069 if (nnz4) {
2070 if (nnz4 & ~0x01010101) {
2071 for (x = 0; x < 4; x++) {
2072 if ((uint8_t) nnz4 == 1)
2073 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2074 td->block[y][x],
2075 s->linesize);
2076 else if ((uint8_t) nnz4 > 1)
2077 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2078 td->block[y][x],
2079 s->linesize);
2080 nnz4 >>= 8;
2081 if (!nnz4)
2082 break;
2083 }
2084 } else {
2085 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2086 }
2087 }
2088 y_dst += 4 * s->linesize;
2089 }
2090 }
2091
2092 for (ch = 0; ch < 2; ch++) {
2093 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2094 if (nnz4) {
2095 uint8_t *ch_dst = dst[1 + ch];
2096 if (nnz4 & ~0x01010101) {
2097 for (y = 0; y < 2; y++) {
2098 for (x = 0; x < 2; x++) {
2099 if ((uint8_t) nnz4 == 1)
2100 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2101 td->block[4 + ch][(y << 1) + x],
2102 s->uvlinesize);
2103 else if ((uint8_t) nnz4 > 1)
2104 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2105 td->block[4 + ch][(y << 1) + x],
2106 s->uvlinesize);
2107 nnz4 >>= 8;
2108 if (!nnz4)
2109 goto chroma_idct_end;
2110 }
2111 ch_dst += 4 * s->uvlinesize;
2112 }
2113 } else {
2114 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2115 }
2116 }
2117 chroma_idct_end:
2118 ;
2119 }
2120 }
2121
2122 static av_always_inline
filter_level_for_mb(VP8Context * s,VP8Macroblock * mb,VP8FilterStrength * f,int is_vp7)2123 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2124 VP8FilterStrength *f, int is_vp7)
2125 {
2126 int interior_limit, filter_level;
2127
2128 if (s->segmentation.enabled) {
2129 filter_level = s->segmentation.filter_level[mb->segment];
2130 if (!s->segmentation.absolute_vals)
2131 filter_level += s->filter.level;
2132 } else
2133 filter_level = s->filter.level;
2134
2135 if (s->lf_delta.enabled) {
2136 filter_level += s->lf_delta.ref[mb->ref_frame];
2137 filter_level += s->lf_delta.mode[mb->mode];
2138 }
2139
2140 filter_level = av_clip_uintp2(filter_level, 6);
2141
2142 interior_limit = filter_level;
2143 if (s->filter.sharpness) {
2144 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2145 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2146 }
2147 interior_limit = FFMAX(interior_limit, 1);
2148
2149 f->filter_level = filter_level;
2150 f->inner_limit = interior_limit;
2151 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2152 mb->mode == VP8_MVMODE_SPLIT;
2153 }
2154
2155 static av_always_inline
filter_mb(VP8Context * s,uint8_t * dst[3],VP8FilterStrength * f,int mb_x,int mb_y,int is_vp7)2156 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2157 int mb_x, int mb_y, int is_vp7)
2158 {
2159 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2160 int filter_level = f->filter_level;
2161 int inner_limit = f->inner_limit;
2162 int inner_filter = f->inner_filter;
2163 ptrdiff_t linesize = s->linesize;
2164 ptrdiff_t uvlinesize = s->uvlinesize;
2165 static const uint8_t hev_thresh_lut[2][64] = {
2166 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2168 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2169 3, 3, 3, 3 },
2170 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2173 2, 2, 2, 2 }
2174 };
2175
2176 if (!filter_level)
2177 return;
2178
2179 if (is_vp7) {
2180 bedge_lim_y = filter_level;
2181 bedge_lim_uv = filter_level * 2;
2182 mbedge_lim = filter_level + 2;
2183 } else {
2184 bedge_lim_y =
2185 bedge_lim_uv = filter_level * 2 + inner_limit;
2186 mbedge_lim = bedge_lim_y + 4;
2187 }
2188
2189 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2190
2191 if (mb_x) {
2192 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2193 mbedge_lim, inner_limit, hev_thresh);
2194 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2195 mbedge_lim, inner_limit, hev_thresh);
2196 }
2197
2198 #define H_LOOP_FILTER_16Y_INNER(cond) \
2199 if (cond && inner_filter) { \
2200 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2201 bedge_lim_y, inner_limit, \
2202 hev_thresh); \
2203 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2204 bedge_lim_y, inner_limit, \
2205 hev_thresh); \
2206 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2207 bedge_lim_y, inner_limit, \
2208 hev_thresh); \
2209 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2210 uvlinesize, bedge_lim_uv, \
2211 inner_limit, hev_thresh); \
2212 }
2213
2214 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2215
2216 if (mb_y) {
2217 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2218 mbedge_lim, inner_limit, hev_thresh);
2219 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2220 mbedge_lim, inner_limit, hev_thresh);
2221 }
2222
2223 if (inner_filter) {
2224 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2225 linesize, bedge_lim_y,
2226 inner_limit, hev_thresh);
2227 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2228 linesize, bedge_lim_y,
2229 inner_limit, hev_thresh);
2230 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2231 linesize, bedge_lim_y,
2232 inner_limit, hev_thresh);
2233 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2234 dst[2] + 4 * uvlinesize,
2235 uvlinesize, bedge_lim_uv,
2236 inner_limit, hev_thresh);
2237 }
2238
2239 H_LOOP_FILTER_16Y_INNER(is_vp7)
2240 }
2241
2242 static av_always_inline
filter_mb_simple(VP8Context * s,uint8_t * dst,VP8FilterStrength * f,int mb_x,int mb_y)2243 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2244 int mb_x, int mb_y)
2245 {
2246 int mbedge_lim, bedge_lim;
2247 int filter_level = f->filter_level;
2248 int inner_limit = f->inner_limit;
2249 int inner_filter = f->inner_filter;
2250 ptrdiff_t linesize = s->linesize;
2251
2252 if (!filter_level)
2253 return;
2254
2255 bedge_lim = 2 * filter_level + inner_limit;
2256 mbedge_lim = bedge_lim + 4;
2257
2258 if (mb_x)
2259 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2260 if (inner_filter) {
2261 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2262 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2263 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2264 }
2265
2266 if (mb_y)
2267 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2268 if (inner_filter) {
2269 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2270 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2271 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2272 }
2273 }
2274
2275 #define MARGIN (16 << 2)
2276 static av_always_inline
vp78_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * curframe,VP8Frame * prev_frame,int is_vp7)2277 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2278 VP8Frame *prev_frame, int is_vp7)
2279 {
2280 VP8Context *s = avctx->priv_data;
2281 int mb_x, mb_y;
2282
2283 s->mv_bounds.mv_min.y = -MARGIN;
2284 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2285 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2286 VP8Macroblock *mb = s->macroblocks_base +
2287 ((s->mb_width + 1) * (mb_y + 1) + 1);
2288 int mb_xy = mb_y * s->mb_width;
2289
2290 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2291
2292 s->mv_bounds.mv_min.x = -MARGIN;
2293 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2294
2295 if (vpX_rac_is_end(&s->c)) {
2296 return AVERROR_INVALIDDATA;
2297 }
2298 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2299 if (mb_y == 0)
2300 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2301 DC_PRED * 0x01010101);
2302 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2303 prev_frame && prev_frame->seg_map ?
2304 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2305 s->mv_bounds.mv_min.x -= 64;
2306 s->mv_bounds.mv_max.x -= 64;
2307 }
2308 s->mv_bounds.mv_min.y -= 64;
2309 s->mv_bounds.mv_max.y -= 64;
2310 }
2311 return 0;
2312 }
2313
vp7_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2314 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2315 VP8Frame *prev_frame)
2316 {
2317 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2318 }
2319
vp8_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2320 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2321 VP8Frame *prev_frame)
2322 {
2323 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2324 }
2325
2326 #if HAVE_THREADS
2327 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2328 do { \
2329 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2330 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2331 pthread_mutex_lock(&otd->lock); \
2332 atomic_store(&td->wait_mb_pos, tmp); \
2333 do { \
2334 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2335 break; \
2336 pthread_cond_wait(&otd->cond, &otd->lock); \
2337 } while (1); \
2338 atomic_store(&td->wait_mb_pos, INT_MAX); \
2339 pthread_mutex_unlock(&otd->lock); \
2340 } \
2341 } while (0)
2342
2343 #define update_pos(td, mb_y, mb_x) \
2344 do { \
2345 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2346 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2347 (num_jobs > 1); \
2348 int is_null = !next_td || !prev_td; \
2349 int pos_check = (is_null) ? 1 : \
2350 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2351 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2352 atomic_store(&td->thread_mb_pos, pos); \
2353 if (sliced_threading && pos_check) { \
2354 pthread_mutex_lock(&td->lock); \
2355 pthread_cond_broadcast(&td->cond); \
2356 pthread_mutex_unlock(&td->lock); \
2357 } \
2358 } while (0)
2359 #else
2360 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2361 #define update_pos(td, mb_y, mb_x) while(0)
2362 #endif
2363
decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2364 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2365 int jobnr, int threadnr, int is_vp7)
2366 {
2367 VP8Context *s = avctx->priv_data;
2368 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2369 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2370 int mb_x, mb_xy = mb_y * s->mb_width;
2371 int num_jobs = s->num_jobs;
2372 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2373 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2374 VP8Macroblock *mb;
2375 uint8_t *dst[3] = {
2376 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2377 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2378 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2379 };
2380
2381 if (vpX_rac_is_end(c))
2382 return AVERROR_INVALIDDATA;
2383
2384 if (mb_y == 0)
2385 prev_td = td;
2386 else
2387 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2388 if (mb_y == s->mb_height - 1)
2389 next_td = td;
2390 else
2391 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2392 if (s->mb_layout == 1)
2393 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2394 else {
2395 // Make sure the previous frame has read its segmentation map,
2396 // if we re-use the same map.
2397 if (prev_frame && s->segmentation.enabled &&
2398 !s->segmentation.update_map)
2399 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2400 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2401 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2402 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2403 }
2404
2405 if (!is_vp7 || mb_y == 0)
2406 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2407
2408 td->mv_bounds.mv_min.x = -MARGIN;
2409 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2410
2411 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2412 if (vpX_rac_is_end(c))
2413 return AVERROR_INVALIDDATA;
2414 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2415 if (prev_td != td) {
2416 if (threadnr != 0) {
2417 check_thread_pos(td, prev_td,
2418 mb_x + (is_vp7 ? 2 : 1),
2419 mb_y - (is_vp7 ? 2 : 1));
2420 } else {
2421 check_thread_pos(td, prev_td,
2422 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2423 mb_y - (is_vp7 ? 2 : 1));
2424 }
2425 }
2426
2427 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2428 s->linesize, 4);
2429 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2430 dst[2] - dst[1], 2);
2431
2432 if (!s->mb_layout)
2433 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2434 prev_frame && prev_frame->seg_map ?
2435 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2436
2437 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2438
2439 if (!mb->skip)
2440 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2441
2442 if (mb->mode <= MODE_I4x4)
2443 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2444 else
2445 inter_predict(s, td, dst, mb, mb_x, mb_y);
2446
2447 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2448
2449 if (!mb->skip) {
2450 idct_mb(s, td, dst, mb);
2451 } else {
2452 AV_ZERO64(td->left_nnz);
2453 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2454
2455 /* Reset DC block predictors if they would exist
2456 * if the mb had coefficients */
2457 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2458 td->left_nnz[8] = 0;
2459 s->top_nnz[mb_x][8] = 0;
2460 }
2461 }
2462
2463 if (s->deblock_filter)
2464 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2465
2466 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2467 if (s->filter.simple)
2468 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469 NULL, NULL, s->linesize, 0, 1);
2470 else
2471 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2472 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2473 }
2474
2475 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2476
2477 dst[0] += 16;
2478 dst[1] += 8;
2479 dst[2] += 8;
2480 td->mv_bounds.mv_min.x -= 64;
2481 td->mv_bounds.mv_max.x -= 64;
2482
2483 if (mb_x == s->mb_width + 1) {
2484 update_pos(td, mb_y, s->mb_width + 3);
2485 } else {
2486 update_pos(td, mb_y, mb_x);
2487 }
2488 }
2489 return 0;
2490 }
2491
vp7_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2492 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2493 int jobnr, int threadnr)
2494 {
2495 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2496 }
2497
vp8_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2498 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2499 int jobnr, int threadnr)
2500 {
2501 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2502 }
2503
filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2504 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2505 int jobnr, int threadnr, int is_vp7)
2506 {
2507 VP8Context *s = avctx->priv_data;
2508 VP8ThreadData *td = &s->thread_data[threadnr];
2509 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2510 AVFrame *curframe = s->curframe->tf.f;
2511 VP8Macroblock *mb;
2512 VP8ThreadData *prev_td, *next_td;
2513 uint8_t *dst[3] = {
2514 curframe->data[0] + 16 * mb_y * s->linesize,
2515 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2516 curframe->data[2] + 8 * mb_y * s->uvlinesize
2517 };
2518
2519 if (s->mb_layout == 1)
2520 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2521 else
2522 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2523
2524 if (mb_y == 0)
2525 prev_td = td;
2526 else
2527 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2528 if (mb_y == s->mb_height - 1)
2529 next_td = td;
2530 else
2531 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2532
2533 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2534 VP8FilterStrength *f = &td->filter_strength[mb_x];
2535 if (prev_td != td)
2536 check_thread_pos(td, prev_td,
2537 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2538 if (next_td != td)
2539 if (next_td != &s->thread_data[0])
2540 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2541
2542 if (num_jobs == 1) {
2543 if (s->filter.simple)
2544 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545 NULL, NULL, s->linesize, 0, 1);
2546 else
2547 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2548 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2549 }
2550
2551 if (s->filter.simple)
2552 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2553 else
2554 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2555 dst[0] += 16;
2556 dst[1] += 8;
2557 dst[2] += 8;
2558
2559 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2560 }
2561 }
2562
vp7_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2563 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2564 int jobnr, int threadnr)
2565 {
2566 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2567 }
2568
vp8_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2569 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2570 int jobnr, int threadnr)
2571 {
2572 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2573 }
2574
2575 static av_always_inline
vp78_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2576 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2577 int threadnr, int is_vp7)
2578 {
2579 VP8Context *s = avctx->priv_data;
2580 VP8ThreadData *td = &s->thread_data[jobnr];
2581 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2582 VP8Frame *curframe = s->curframe;
2583 int mb_y, num_jobs = s->num_jobs;
2584 int ret;
2585
2586 td->thread_nr = threadnr;
2587 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2588 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2589 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2590 atomic_store(&td->thread_mb_pos, mb_y << 16);
2591 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2592 if (ret < 0) {
2593 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2594 return ret;
2595 }
2596 if (s->deblock_filter)
2597 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2598 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2599
2600 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2601 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2602
2603 if (avctx->active_thread_type == FF_THREAD_FRAME)
2604 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2605 }
2606
2607 return 0;
2608 }
2609
vp7_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2610 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2611 int jobnr, int threadnr)
2612 {
2613 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2614 }
2615
vp8_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2616 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2617 int jobnr, int threadnr)
2618 {
2619 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2620 }
2621
2622 static av_always_inline
vp78_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt,int is_vp7)2623 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2624 AVPacket *avpkt, int is_vp7)
2625 {
2626 VP8Context *s = avctx->priv_data;
2627 int ret, i, referenced, num_jobs;
2628 enum AVDiscard skip_thresh;
2629 VP8Frame *av_uninit(curframe), *prev_frame;
2630
2631 if (is_vp7)
2632 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2633 else
2634 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2635
2636 if (ret < 0)
2637 goto err;
2638
2639 if (s->actually_webp) {
2640 // avctx->pix_fmt already set in caller.
2641 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2642 s->pix_fmt = get_pixel_format(s);
2643 if (s->pix_fmt < 0) {
2644 ret = AVERROR(EINVAL);
2645 goto err;
2646 }
2647 avctx->pix_fmt = s->pix_fmt;
2648 }
2649
2650 prev_frame = s->framep[VP56_FRAME_CURRENT];
2651
2652 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2653 s->update_altref == VP56_FRAME_CURRENT;
2654
2655 skip_thresh = !referenced ? AVDISCARD_NONREF
2656 : !s->keyframe ? AVDISCARD_NONKEY
2657 : AVDISCARD_ALL;
2658
2659 if (avctx->skip_frame >= skip_thresh) {
2660 s->invisible = 1;
2661 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662 goto skip_decode;
2663 }
2664 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2665
2666 // release no longer referenced frames
2667 for (i = 0; i < 5; i++)
2668 if (s->frames[i].tf.f->buf[0] &&
2669 &s->frames[i] != prev_frame &&
2670 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2671 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2672 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2673 vp8_release_frame(s, &s->frames[i]);
2674
2675 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2676
2677 if (!s->colorspace)
2678 avctx->colorspace = AVCOL_SPC_BT470BG;
2679 if (s->fullrange)
2680 avctx->color_range = AVCOL_RANGE_JPEG;
2681 else
2682 avctx->color_range = AVCOL_RANGE_MPEG;
2683
2684 /* Given that arithmetic probabilities are updated every frame, it's quite
2685 * likely that the values we have on a random interframe are complete
2686 * junk if we didn't start decode on a keyframe. So just don't display
2687 * anything rather than junk. */
2688 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2689 !s->framep[VP56_FRAME_GOLDEN] ||
2690 !s->framep[VP56_FRAME_GOLDEN2])) {
2691 av_log(avctx, AV_LOG_WARNING,
2692 "Discarding interframe without a prior keyframe!\n");
2693 ret = AVERROR_INVALIDDATA;
2694 goto err;
2695 }
2696
2697 curframe->tf.f->key_frame = s->keyframe;
2698 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2699 : AV_PICTURE_TYPE_P;
2700 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2701 goto err;
2702
2703 // check if golden and altref are swapped
2704 if (s->update_altref != VP56_FRAME_NONE)
2705 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2706 else
2707 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2708
2709 if (s->update_golden != VP56_FRAME_NONE)
2710 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2711 else
2712 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2713
2714 if (s->update_last)
2715 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2716 else
2717 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2718
2719 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2720
2721 if (avctx->codec->update_thread_context)
2722 ff_thread_finish_setup(avctx);
2723
2724 if (avctx->hwaccel) {
2725 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2726 if (ret < 0)
2727 goto err;
2728
2729 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2730 if (ret < 0)
2731 goto err;
2732
2733 ret = avctx->hwaccel->end_frame(avctx);
2734 if (ret < 0)
2735 goto err;
2736
2737 } else {
2738 s->linesize = curframe->tf.f->linesize[0];
2739 s->uvlinesize = curframe->tf.f->linesize[1];
2740
2741 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2742 /* Zero macroblock structures for top/top-left prediction
2743 * from outside the frame. */
2744 if (!s->mb_layout)
2745 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2746 (s->mb_width + 1) * sizeof(*s->macroblocks));
2747 if (!s->mb_layout && s->keyframe)
2748 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2749
2750 memset(s->ref_count, 0, sizeof(s->ref_count));
2751
2752 if (s->mb_layout == 1) {
2753 // Make sure the previous frame has read its segmentation map,
2754 // if we re-use the same map.
2755 if (prev_frame && s->segmentation.enabled &&
2756 !s->segmentation.update_map)
2757 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2758 if (is_vp7)
2759 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2760 else
2761 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2762 if (ret < 0)
2763 goto err;
2764 }
2765
2766 if (avctx->active_thread_type == FF_THREAD_FRAME)
2767 num_jobs = 1;
2768 else
2769 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2770 s->num_jobs = num_jobs;
2771 s->curframe = curframe;
2772 s->prev_frame = prev_frame;
2773 s->mv_bounds.mv_min.y = -MARGIN;
2774 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2775 for (i = 0; i < MAX_THREADS; i++) {
2776 VP8ThreadData *td = &s->thread_data[i];
2777 atomic_init(&td->thread_mb_pos, 0);
2778 atomic_init(&td->wait_mb_pos, INT_MAX);
2779 }
2780 if (is_vp7)
2781 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2782 num_jobs);
2783 else
2784 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2785 num_jobs);
2786 }
2787
2788 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2789 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2790
2791 skip_decode:
2792 // if future frames don't use the updated probabilities,
2793 // reset them to the values we saved
2794 if (!s->update_probabilities)
2795 s->prob[0] = s->prob[1];
2796
2797 if (!s->invisible) {
2798 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2799 return ret;
2800 *got_frame = 1;
2801 }
2802
2803 return avpkt->size;
2804 err:
2805 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2806 return ret;
2807 }
2808
ff_vp8_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2809 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2810 AVPacket *avpkt)
2811 {
2812 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2813 }
2814
2815 #if CONFIG_VP7_DECODER
vp7_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2816 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2817 AVPacket *avpkt)
2818 {
2819 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2820 }
2821 #endif /* CONFIG_VP7_DECODER */
2822
ff_vp8_decode_free(AVCodecContext * avctx)2823 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2824 {
2825 VP8Context *s = avctx->priv_data;
2826 int i;
2827
2828 if (!s)
2829 return 0;
2830
2831 vp8_decode_flush_impl(avctx, 1);
2832 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2833 av_frame_free(&s->frames[i].tf.f);
2834
2835 return 0;
2836 }
2837
vp8_init_frames(VP8Context * s)2838 static av_cold int vp8_init_frames(VP8Context *s)
2839 {
2840 int i;
2841 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2842 s->frames[i].tf.f = av_frame_alloc();
2843 if (!s->frames[i].tf.f)
2844 return AVERROR(ENOMEM);
2845 }
2846 return 0;
2847 }
2848
2849 static av_always_inline
vp78_decode_init(AVCodecContext * avctx,int is_vp7)2850 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2851 {
2852 VP8Context *s = avctx->priv_data;
2853 int ret;
2854
2855 s->avctx = avctx;
2856 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2857 s->pix_fmt = AV_PIX_FMT_NONE;
2858 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2859
2860 ff_videodsp_init(&s->vdsp, 8);
2861
2862 ff_vp78dsp_init(&s->vp8dsp);
2863 if (CONFIG_VP7_DECODER && is_vp7) {
2864 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2865 ff_vp7dsp_init(&s->vp8dsp);
2866 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2867 s->filter_mb_row = vp7_filter_mb_row;
2868 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2869 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2870 ff_vp8dsp_init(&s->vp8dsp);
2871 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2872 s->filter_mb_row = vp8_filter_mb_row;
2873 }
2874
2875 /* does not change for VP8 */
2876 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2877
2878 if ((ret = vp8_init_frames(s)) < 0) {
2879 ff_vp8_decode_free(avctx);
2880 return ret;
2881 }
2882
2883 return 0;
2884 }
2885
2886 #if CONFIG_VP7_DECODER
vp7_decode_init(AVCodecContext * avctx)2887 static int vp7_decode_init(AVCodecContext *avctx)
2888 {
2889 return vp78_decode_init(avctx, IS_VP7);
2890 }
2891 #endif /* CONFIG_VP7_DECODER */
2892
ff_vp8_decode_init(AVCodecContext * avctx)2893 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2894 {
2895 return vp78_decode_init(avctx, IS_VP8);
2896 }
2897
2898 #if CONFIG_VP8_DECODER
2899 #if HAVE_THREADS
2900 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2901
vp8_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)2902 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2903 const AVCodecContext *src)
2904 {
2905 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2906 int i;
2907
2908 if (s->macroblocks_base &&
2909 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2910 free_buffers(s);
2911 s->mb_width = s_src->mb_width;
2912 s->mb_height = s_src->mb_height;
2913 }
2914
2915 s->pix_fmt = s_src->pix_fmt;
2916 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2917 s->segmentation = s_src->segmentation;
2918 s->lf_delta = s_src->lf_delta;
2919 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2920
2921 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2922 if (s_src->frames[i].tf.f->buf[0]) {
2923 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2924 if (ret < 0)
2925 return ret;
2926 }
2927 }
2928
2929 s->framep[0] = REBASE(s_src->next_framep[0]);
2930 s->framep[1] = REBASE(s_src->next_framep[1]);
2931 s->framep[2] = REBASE(s_src->next_framep[2]);
2932 s->framep[3] = REBASE(s_src->next_framep[3]);
2933
2934 return 0;
2935 }
2936 #endif /* HAVE_THREADS */
2937 #endif /* CONFIG_VP8_DECODER */
2938
2939 #if CONFIG_VP7_DECODER
2940 AVCodec ff_vp7_decoder = {
2941 .name = "vp7",
2942 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2943 .type = AVMEDIA_TYPE_VIDEO,
2944 .id = AV_CODEC_ID_VP7,
2945 .priv_data_size = sizeof(VP8Context),
2946 .init = vp7_decode_init,
2947 .close = ff_vp8_decode_free,
2948 .decode = vp7_decode_frame,
2949 .capabilities = AV_CODEC_CAP_DR1,
2950 .flush = vp8_decode_flush,
2951 };
2952 #endif /* CONFIG_VP7_DECODER */
2953
2954 #if CONFIG_VP8_DECODER
2955 AVCodec ff_vp8_decoder = {
2956 .name = "vp8",
2957 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2958 .type = AVMEDIA_TYPE_VIDEO,
2959 .id = AV_CODEC_ID_VP8,
2960 .priv_data_size = sizeof(VP8Context),
2961 .init = ff_vp8_decode_init,
2962 .close = ff_vp8_decode_free,
2963 .decode = ff_vp8_decode_frame,
2964 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2965 AV_CODEC_CAP_SLICE_THREADS,
2966 .flush = vp8_decode_flush,
2967 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2968 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2969 #if CONFIG_VP8_VAAPI_HWACCEL
2970 HWACCEL_VAAPI(vp8),
2971 #endif
2972 #if CONFIG_VP8_NVDEC_HWACCEL
2973 HWACCEL_NVDEC(vp8),
2974 #endif
2975 NULL
2976 },
2977 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2978 };
2979 #endif /* CONFIG_VP7_DECODER */
2980