1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "hwconfig.h"
27 #include "internal.h"
28 #include "profiles.h"
29 #include "thread.h"
30 #include "videodsp.h"
31 #include "vp56.h"
32 #include "vp9.h"
33 #include "vp9data.h"
34 #include "vp9dec.h"
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/video_enc_params.h"
38
39 #define VP9_SYNCCODE 0x498342
40
41 #if HAVE_THREADS
vp9_free_entries(AVCodecContext * avctx)42 static void vp9_free_entries(AVCodecContext *avctx) {
43 VP9Context *s = avctx->priv_data;
44
45 if (avctx->active_thread_type & FF_THREAD_SLICE) {
46 pthread_mutex_destroy(&s->progress_mutex);
47 pthread_cond_destroy(&s->progress_cond);
48 av_freep(&s->entries);
49 }
50 }
51
vp9_alloc_entries(AVCodecContext * avctx,int n)52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
54 int i;
55
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 if (s->entries)
58 av_freep(&s->entries);
59
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
61
62 if (!s->entries) {
63 av_freep(&s->entries);
64 return AVERROR(ENOMEM);
65 }
66
67 for (i = 0; i < n; i++)
68 atomic_init(&s->entries[i], 0);
69
70 pthread_mutex_init(&s->progress_mutex, NULL);
71 pthread_cond_init(&s->progress_cond, NULL);
72 }
73 return 0;
74 }
75
vp9_report_tile_progress(VP9Context * s,int field,int n)76 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
77 pthread_mutex_lock(&s->progress_mutex);
78 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
79 pthread_cond_signal(&s->progress_cond);
80 pthread_mutex_unlock(&s->progress_mutex);
81 }
82
vp9_await_tile_progress(VP9Context * s,int field,int n)83 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
84 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
85 return;
86
87 pthread_mutex_lock(&s->progress_mutex);
88 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
89 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
90 pthread_mutex_unlock(&s->progress_mutex);
91 }
92 #else
vp9_free_entries(AVCodecContext * avctx)93 static void vp9_free_entries(AVCodecContext *avctx) {}
vp9_alloc_entries(AVCodecContext * avctx,int n)94 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
95 #endif
96
vp9_tile_data_free(VP9TileData * td)97 static void vp9_tile_data_free(VP9TileData *td)
98 {
99 av_freep(&td->b_base);
100 av_freep(&td->block_base);
101 av_freep(&td->block_structure);
102 }
103
vp9_frame_unref(AVCodecContext * avctx,VP9Frame * f)104 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
105 {
106 ff_thread_release_buffer(avctx, &f->tf);
107 av_buffer_unref(&f->extradata);
108 av_buffer_unref(&f->hwaccel_priv_buf);
109 f->segmentation_map = NULL;
110 f->hwaccel_picture_private = NULL;
111 }
112
vp9_frame_alloc(AVCodecContext * avctx,VP9Frame * f)113 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
114 {
115 VP9Context *s = avctx->priv_data;
116 int ret, sz;
117
118 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
119 if (ret < 0)
120 return ret;
121
122 sz = 64 * s->sb_cols * s->sb_rows;
123 if (sz != s->frame_extradata_pool_size) {
124 av_buffer_pool_uninit(&s->frame_extradata_pool);
125 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
126 if (!s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
128 goto fail;
129 }
130 s->frame_extradata_pool_size = sz;
131 }
132 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
133 if (!f->extradata) {
134 goto fail;
135 }
136 memset(f->extradata->data, 0, f->extradata->size);
137
138 f->segmentation_map = f->extradata->data;
139 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
140
141 if (avctx->hwaccel) {
142 const AVHWAccel *hwaccel = avctx->hwaccel;
143 av_assert0(!f->hwaccel_picture_private);
144 if (hwaccel->frame_priv_data_size) {
145 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
146 if (!f->hwaccel_priv_buf)
147 goto fail;
148 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
149 }
150 }
151
152 return 0;
153
154 fail:
155 vp9_frame_unref(avctx, f);
156 return AVERROR(ENOMEM);
157 }
158
vp9_frame_ref(AVCodecContext * avctx,VP9Frame * dst,VP9Frame * src)159 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
160 {
161 int ret;
162
163 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
164 if (ret < 0)
165 return ret;
166
167 dst->extradata = av_buffer_ref(src->extradata);
168 if (!dst->extradata)
169 goto fail;
170
171 dst->segmentation_map = src->segmentation_map;
172 dst->mv = src->mv;
173 dst->uses_2pass = src->uses_2pass;
174
175 if (src->hwaccel_picture_private) {
176 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
177 if (!dst->hwaccel_priv_buf)
178 goto fail;
179 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
180 }
181
182 return 0;
183
184 fail:
185 vp9_frame_unref(avctx, dst);
186 return AVERROR(ENOMEM);
187 }
188
update_size(AVCodecContext * avctx,int w,int h)189 static int update_size(AVCodecContext *avctx, int w, int h)
190 {
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
196 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
197 VP9Context *s = avctx->priv_data;
198 uint8_t *p;
199 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
200 int lflvl_len, i;
201
202 av_assert0(w > 0 && h > 0);
203
204 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
205 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
206 return ret;
207
208 switch (s->pix_fmt) {
209 case AV_PIX_FMT_YUV420P:
210 #if CONFIG_VP9_VDPAU_HWACCEL
211 *fmtp++ = AV_PIX_FMT_VDPAU;
212 #endif
213 case AV_PIX_FMT_YUV420P10:
214 #if CONFIG_VP9_DXVA2_HWACCEL
215 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
216 #endif
217 #if CONFIG_VP9_D3D11VA_HWACCEL
218 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
219 *fmtp++ = AV_PIX_FMT_D3D11;
220 #endif
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
223 #endif
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
226 #endif
227 break;
228 case AV_PIX_FMT_YUV420P12:
229 #if CONFIG_VP9_NVDEC_HWACCEL
230 *fmtp++ = AV_PIX_FMT_CUDA;
231 #endif
232 #if CONFIG_VP9_VAAPI_HWACCEL
233 *fmtp++ = AV_PIX_FMT_VAAPI;
234 #endif
235 break;
236 }
237
238 *fmtp++ = s->pix_fmt;
239 *fmtp = AV_PIX_FMT_NONE;
240
241 ret = ff_thread_get_format(avctx, pix_fmts);
242 if (ret < 0)
243 return ret;
244
245 avctx->pix_fmt = ret;
246 s->gf_fmt = s->pix_fmt;
247 s->w = w;
248 s->h = h;
249 }
250
251 cols = (w + 7) >> 3;
252 rows = (h + 7) >> 3;
253
254 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
255 return 0;
256
257 s->last_fmt = s->pix_fmt;
258 s->sb_cols = (w + 63) >> 6;
259 s->sb_rows = (h + 63) >> 6;
260 s->cols = (w + 7) >> 3;
261 s->rows = (h + 7) >> 3;
262 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
263
264 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
265 av_freep(&s->intra_pred_data[0]);
266 // FIXME we slightly over-allocate here for subsampled chroma, but a little
267 // bit of padding shouldn't affect performance...
268 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
269 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
270 if (!p)
271 return AVERROR(ENOMEM);
272 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
273 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
274 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
275 assign(s->above_y_nnz_ctx, uint8_t *, 16);
276 assign(s->above_mode_ctx, uint8_t *, 16);
277 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
278 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
279 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
280 assign(s->above_partition_ctx, uint8_t *, 8);
281 assign(s->above_skip_ctx, uint8_t *, 8);
282 assign(s->above_txfm_ctx, uint8_t *, 8);
283 assign(s->above_segpred_ctx, uint8_t *, 8);
284 assign(s->above_intra_ctx, uint8_t *, 8);
285 assign(s->above_comp_ctx, uint8_t *, 8);
286 assign(s->above_ref_ctx, uint8_t *, 8);
287 assign(s->above_filter_ctx, uint8_t *, 8);
288 assign(s->lflvl, VP9Filter *, lflvl_len);
289 #undef assign
290
291 if (s->td) {
292 for (i = 0; i < s->active_tile_cols; i++)
293 vp9_tile_data_free(&s->td[i]);
294 }
295
296 if (s->s.h.bpp != s->last_bpp) {
297 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
298 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
299 s->last_bpp = s->s.h.bpp;
300 }
301
302 return 0;
303 }
304
update_block_buffers(AVCodecContext * avctx)305 static int update_block_buffers(AVCodecContext *avctx)
306 {
307 int i;
308 VP9Context *s = avctx->priv_data;
309 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
310 VP9TileData *td = &s->td[0];
311
312 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
313 return 0;
314
315 vp9_tile_data_free(td);
316 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
317 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
318 if (s->s.frames[CUR_FRAME].uses_2pass) {
319 int sbs = s->sb_cols * s->sb_rows;
320
321 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
322 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
323 16 * 16 + 2 * chroma_eobs) * sbs);
324 if (!td->b_base || !td->block_base)
325 return AVERROR(ENOMEM);
326 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
327 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
328 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
329 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
330 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
331
332 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
333 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
334 if (!td->block_structure)
335 return AVERROR(ENOMEM);
336 }
337 } else {
338 for (i = 1; i < s->active_tile_cols; i++)
339 vp9_tile_data_free(&s->td[i]);
340
341 for (i = 0; i < s->active_tile_cols; i++) {
342 s->td[i].b_base = av_malloc(sizeof(VP9Block));
343 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
344 16 * 16 + 2 * chroma_eobs);
345 if (!s->td[i].b_base || !s->td[i].block_base)
346 return AVERROR(ENOMEM);
347 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
348 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
349 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
350 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
351 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
352
353 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
354 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
355 if (!s->td[i].block_structure)
356 return AVERROR(ENOMEM);
357 }
358 }
359 }
360 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
361
362 return 0;
363 }
364
365 // The sign bit is at the end, not the start, of a bit sequence
get_sbits_inv(GetBitContext * gb,int n)366 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
367 {
368 int v = get_bits(gb, n);
369 return get_bits1(gb) ? -v : v;
370 }
371
inv_recenter_nonneg(int v,int m)372 static av_always_inline int inv_recenter_nonneg(int v, int m)
373 {
374 if (v > 2 * m)
375 return v;
376 if (v & 1)
377 return m - ((v + 1) >> 1);
378 return m + (v >> 1);
379 }
380
381 // differential forward probability updates
update_prob(VP56RangeCoder * c,int p)382 static int update_prob(VP56RangeCoder *c, int p)
383 {
384 static const uint8_t inv_map_table[255] = {
385 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
386 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
387 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
388 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
389 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
390 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
391 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
392 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
393 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
394 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
395 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
396 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
397 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
398 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
399 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
400 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
401 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
402 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
403 252, 253, 253,
404 };
405 int d;
406
407 /* This code is trying to do a differential probability update. For a
408 * current probability A in the range [1, 255], the difference to a new
409 * probability of any value can be expressed differentially as 1-A, 255-A
410 * where some part of this (absolute range) exists both in positive as
411 * well as the negative part, whereas another part only exists in one
412 * half. We're trying to code this shared part differentially, i.e.
413 * times two where the value of the lowest bit specifies the sign, and
414 * the single part is then coded on top of this. This absolute difference
415 * then again has a value of [0, 254], but a bigger value in this range
416 * indicates that we're further away from the original value A, so we
417 * can code this as a VLC code, since higher values are increasingly
418 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
419 * updates vs. the 'fine, exact' updates further down the range, which
420 * adds one extra dimension to this differential update model. */
421
422 if (!vp8_rac_get(c)) {
423 d = vp8_rac_get_uint(c, 4) + 0;
424 } else if (!vp8_rac_get(c)) {
425 d = vp8_rac_get_uint(c, 4) + 16;
426 } else if (!vp8_rac_get(c)) {
427 d = vp8_rac_get_uint(c, 5) + 32;
428 } else {
429 d = vp8_rac_get_uint(c, 7);
430 if (d >= 65)
431 d = (d << 1) - 65 + vp8_rac_get(c);
432 d += 64;
433 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
434 }
435
436 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
437 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
438 }
439
read_colorspace_details(AVCodecContext * avctx)440 static int read_colorspace_details(AVCodecContext *avctx)
441 {
442 static const enum AVColorSpace colorspaces[8] = {
443 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
444 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
445 };
446 VP9Context *s = avctx->priv_data;
447 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
448
449 s->bpp_index = bits;
450 s->s.h.bpp = 8 + bits * 2;
451 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
452 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
453 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
454 static const enum AVPixelFormat pix_fmt_rgb[3] = {
455 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
456 };
457 s->ss_h = s->ss_v = 0;
458 avctx->color_range = AVCOL_RANGE_JPEG;
459 s->pix_fmt = pix_fmt_rgb[bits];
460 if (avctx->profile & 1) {
461 if (get_bits1(&s->gb)) {
462 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
463 return AVERROR_INVALIDDATA;
464 }
465 } else {
466 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
467 avctx->profile);
468 return AVERROR_INVALIDDATA;
469 }
470 } else {
471 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
472 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
473 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
474 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
475 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
476 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
477 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
478 };
479 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
480 if (avctx->profile & 1) {
481 s->ss_h = get_bits1(&s->gb);
482 s->ss_v = get_bits1(&s->gb);
483 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
484 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
485 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
486 avctx->profile);
487 return AVERROR_INVALIDDATA;
488 } else if (get_bits1(&s->gb)) {
489 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
490 avctx->profile);
491 return AVERROR_INVALIDDATA;
492 }
493 } else {
494 s->ss_h = s->ss_v = 1;
495 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
496 }
497 }
498
499 return 0;
500 }
501
decode_frame_header(AVCodecContext * avctx,const uint8_t * data,int size,int * ref)502 static int decode_frame_header(AVCodecContext *avctx,
503 const uint8_t *data, int size, int *ref)
504 {
505 VP9Context *s = avctx->priv_data;
506 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
507 int last_invisible;
508 const uint8_t *data2;
509
510 /* general header */
511 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
512 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
513 return ret;
514 }
515 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
516 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
517 return AVERROR_INVALIDDATA;
518 }
519 avctx->profile = get_bits1(&s->gb);
520 avctx->profile |= get_bits1(&s->gb) << 1;
521 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
522 if (avctx->profile > 3) {
523 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
524 return AVERROR_INVALIDDATA;
525 }
526 s->s.h.profile = avctx->profile;
527 if (get_bits1(&s->gb)) {
528 *ref = get_bits(&s->gb, 3);
529 return 0;
530 }
531
532 s->last_keyframe = s->s.h.keyframe;
533 s->s.h.keyframe = !get_bits1(&s->gb);
534
535 last_invisible = s->s.h.invisible;
536 s->s.h.invisible = !get_bits1(&s->gb);
537 s->s.h.errorres = get_bits1(&s->gb);
538 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
539
540 if (s->s.h.keyframe) {
541 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
542 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
543 return AVERROR_INVALIDDATA;
544 }
545 if ((ret = read_colorspace_details(avctx)) < 0)
546 return ret;
547 // for profile 1, here follows the subsampling bits
548 s->s.h.refreshrefmask = 0xff;
549 w = get_bits(&s->gb, 16) + 1;
550 h = get_bits(&s->gb, 16) + 1;
551 if (get_bits1(&s->gb)) // display size
552 skip_bits(&s->gb, 32);
553 } else {
554 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
555 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
556 if (s->s.h.intraonly) {
557 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
558 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
559 return AVERROR_INVALIDDATA;
560 }
561 if (avctx->profile >= 1) {
562 if ((ret = read_colorspace_details(avctx)) < 0)
563 return ret;
564 } else {
565 s->ss_h = s->ss_v = 1;
566 s->s.h.bpp = 8;
567 s->bpp_index = 0;
568 s->bytesperpixel = 1;
569 s->pix_fmt = AV_PIX_FMT_YUV420P;
570 avctx->colorspace = AVCOL_SPC_BT470BG;
571 avctx->color_range = AVCOL_RANGE_MPEG;
572 }
573 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
574 w = get_bits(&s->gb, 16) + 1;
575 h = get_bits(&s->gb, 16) + 1;
576 if (get_bits1(&s->gb)) // display size
577 skip_bits(&s->gb, 32);
578 } else {
579 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
580 s->s.h.refidx[0] = get_bits(&s->gb, 3);
581 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
582 s->s.h.refidx[1] = get_bits(&s->gb, 3);
583 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
584 s->s.h.refidx[2] = get_bits(&s->gb, 3);
585 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
586 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
587 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
588 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
589 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
590 return AVERROR_INVALIDDATA;
591 }
592 if (get_bits1(&s->gb)) {
593 w = s->s.refs[s->s.h.refidx[0]].f->width;
594 h = s->s.refs[s->s.h.refidx[0]].f->height;
595 } else if (get_bits1(&s->gb)) {
596 w = s->s.refs[s->s.h.refidx[1]].f->width;
597 h = s->s.refs[s->s.h.refidx[1]].f->height;
598 } else if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[2]].f->width;
600 h = s->s.refs[s->s.h.refidx[2]].f->height;
601 } else {
602 w = get_bits(&s->gb, 16) + 1;
603 h = get_bits(&s->gb, 16) + 1;
604 }
605 // Note that in this code, "CUR_FRAME" is actually before we
606 // have formally allocated a frame, and thus actually represents
607 // the _last_ frame
608 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
609 s->s.frames[CUR_FRAME].tf.f->height == h;
610 if (get_bits1(&s->gb)) // display size
611 skip_bits(&s->gb, 32);
612 s->s.h.highprecisionmvs = get_bits1(&s->gb);
613 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
614 get_bits(&s->gb, 2);
615 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
616 s->s.h.signbias[0] != s->s.h.signbias[2];
617 if (s->s.h.allowcompinter) {
618 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
619 s->s.h.fixcompref = 2;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 1;
622 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
623 s->s.h.fixcompref = 1;
624 s->s.h.varcompref[0] = 0;
625 s->s.h.varcompref[1] = 2;
626 } else {
627 s->s.h.fixcompref = 0;
628 s->s.h.varcompref[0] = 1;
629 s->s.h.varcompref[1] = 2;
630 }
631 }
632 }
633 }
634 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
635 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
636 s->s.h.framectxid = c = get_bits(&s->gb, 2);
637 if (s->s.h.keyframe || s->s.h.intraonly)
638 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
639
640 /* loopfilter header data */
641 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
642 // reset loopfilter defaults
643 s->s.h.lf_delta.ref[0] = 1;
644 s->s.h.lf_delta.ref[1] = 0;
645 s->s.h.lf_delta.ref[2] = -1;
646 s->s.h.lf_delta.ref[3] = -1;
647 s->s.h.lf_delta.mode[0] = 0;
648 s->s.h.lf_delta.mode[1] = 0;
649 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
650 }
651 s->s.h.filter.level = get_bits(&s->gb, 6);
652 sharp = get_bits(&s->gb, 3);
653 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
654 // the old cache values since they are still valid
655 if (s->s.h.filter.sharpness != sharp) {
656 for (i = 1; i <= 63; i++) {
657 int limit = i;
658
659 if (sharp > 0) {
660 limit >>= (sharp + 3) >> 2;
661 limit = FFMIN(limit, 9 - sharp);
662 }
663 limit = FFMAX(limit, 1);
664
665 s->filter_lut.lim_lut[i] = limit;
666 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
667 }
668 }
669 s->s.h.filter.sharpness = sharp;
670 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
671 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
672 for (i = 0; i < 4; i++)
673 if (get_bits1(&s->gb))
674 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
675 for (i = 0; i < 2; i++)
676 if (get_bits1(&s->gb))
677 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
678 }
679 }
680
681 /* quantization header data */
682 s->s.h.yac_qi = get_bits(&s->gb, 8);
683 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
684 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
685 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
686 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
687 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
688 if (s->s.h.lossless)
689 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
690
691 /* segmentation header info */
692 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
693 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
694 for (i = 0; i < 7; i++)
695 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
696 get_bits(&s->gb, 8) : 255;
697 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
698 for (i = 0; i < 3; i++)
699 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
700 get_bits(&s->gb, 8) : 255;
701 }
702
703 if (get_bits1(&s->gb)) {
704 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
705 for (i = 0; i < 8; i++) {
706 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
707 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
708 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
709 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
710 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
711 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
712 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
713 }
714 }
715 }
716
717 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
718 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
719 int qyac, qydc, quvac, quvdc, lflvl, sh;
720
721 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
722 if (s->s.h.segmentation.absolute_vals)
723 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
724 else
725 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
726 } else {
727 qyac = s->s.h.yac_qi;
728 }
729 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
730 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
731 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
732 qyac = av_clip_uintp2(qyac, 8);
733
734 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
735 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
736 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
737 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
738
739 sh = s->s.h.filter.level >= 32;
740 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
741 if (s->s.h.segmentation.absolute_vals)
742 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
743 else
744 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
745 } else {
746 lflvl = s->s.h.filter.level;
747 }
748 if (s->s.h.lf_delta.enabled) {
749 s->s.h.segmentation.feat[i].lflvl[0][0] =
750 s->s.h.segmentation.feat[i].lflvl[0][1] =
751 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
752 for (j = 1; j < 4; j++) {
753 s->s.h.segmentation.feat[i].lflvl[j][0] =
754 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
755 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
756 s->s.h.segmentation.feat[i].lflvl[j][1] =
757 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
759 }
760 } else {
761 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
762 sizeof(s->s.h.segmentation.feat[i].lflvl));
763 }
764 }
765
766 /* tiling info */
767 if ((ret = update_size(avctx, w, h)) < 0) {
768 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
769 w, h, s->pix_fmt);
770 return ret;
771 }
772 for (s->s.h.tiling.log2_tile_cols = 0;
773 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
774 s->s.h.tiling.log2_tile_cols++) ;
775 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
776 max = FFMAX(0, max - 1);
777 while (max > s->s.h.tiling.log2_tile_cols) {
778 if (get_bits1(&s->gb))
779 s->s.h.tiling.log2_tile_cols++;
780 else
781 break;
782 }
783 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
784 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
785 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
786 int n_range_coders;
787 VP56RangeCoder *rc;
788
789 if (s->td) {
790 for (i = 0; i < s->active_tile_cols; i++)
791 vp9_tile_data_free(&s->td[i]);
792 av_free(s->td);
793 }
794
795 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
796 vp9_free_entries(avctx);
797 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
798 s->s.h.tiling.tile_cols : 1;
799 vp9_alloc_entries(avctx, s->sb_rows);
800 if (avctx->active_thread_type == FF_THREAD_SLICE) {
801 n_range_coders = 4; // max_tile_rows
802 } else {
803 n_range_coders = s->s.h.tiling.tile_cols;
804 }
805 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
806 n_range_coders * sizeof(VP56RangeCoder));
807 if (!s->td)
808 return AVERROR(ENOMEM);
809 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
810 for (i = 0; i < s->active_tile_cols; i++) {
811 s->td[i].s = s;
812 s->td[i].c_b = rc;
813 rc += n_range_coders;
814 }
815 }
816
817 /* check reference frames */
818 if (!s->s.h.keyframe && !s->s.h.intraonly) {
819 int valid_ref_frame = 0;
820 for (i = 0; i < 3; i++) {
821 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
822 int refw = ref->width, refh = ref->height;
823
824 if (ref->format != avctx->pix_fmt) {
825 av_log(avctx, AV_LOG_ERROR,
826 "Ref pixfmt (%s) did not match current frame (%s)",
827 av_get_pix_fmt_name(ref->format),
828 av_get_pix_fmt_name(avctx->pix_fmt));
829 return AVERROR_INVALIDDATA;
830 } else if (refw == w && refh == h) {
831 s->mvscale[i][0] = s->mvscale[i][1] = 0;
832 } else {
833 /* Check to make sure at least one of frames that */
834 /* this frame references has valid dimensions */
835 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
836 av_log(avctx, AV_LOG_WARNING,
837 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
838 refw, refh, w, h);
839 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
840 continue;
841 }
842 s->mvscale[i][0] = (refw << 14) / w;
843 s->mvscale[i][1] = (refh << 14) / h;
844 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
845 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
846 }
847 valid_ref_frame++;
848 }
849 if (!valid_ref_frame) {
850 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
851 return AVERROR_INVALIDDATA;
852 }
853 }
854
855 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
856 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
857 s->prob_ctx[3].p = ff_vp9_default_probs;
858 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
859 sizeof(ff_vp9_default_coef_probs));
860 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
861 sizeof(ff_vp9_default_coef_probs));
862 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
863 sizeof(ff_vp9_default_coef_probs));
864 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
865 sizeof(ff_vp9_default_coef_probs));
866 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
867 s->prob_ctx[c].p = ff_vp9_default_probs;
868 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
869 sizeof(ff_vp9_default_coef_probs));
870 }
871
872 // next 16 bits is size of the rest of the header (arith-coded)
873 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
874 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
875
876 data2 = align_get_bits(&s->gb);
877 if (size2 > size - (data2 - data)) {
878 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
879 return AVERROR_INVALIDDATA;
880 }
881 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
882 if (ret < 0)
883 return ret;
884
885 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
886 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
887 return AVERROR_INVALIDDATA;
888 }
889
890 for (i = 0; i < s->active_tile_cols; i++) {
891 if (s->s.h.keyframe || s->s.h.intraonly) {
892 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
893 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
894 } else {
895 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
896 }
897 s->td[i].nb_block_structure = 0;
898 }
899
900 /* FIXME is it faster to not copy here, but do it down in the fw updates
901 * as explicit copies if the fw update is missing (and skip the copy upon
902 * fw update)? */
903 s->prob.p = s->prob_ctx[c].p;
904
905 // txfm updates
906 if (s->s.h.lossless) {
907 s->s.h.txfmmode = TX_4X4;
908 } else {
909 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
910 if (s->s.h.txfmmode == 3)
911 s->s.h.txfmmode += vp8_rac_get(&s->c);
912
913 if (s->s.h.txfmmode == TX_SWITCHABLE) {
914 for (i = 0; i < 2; i++)
915 if (vp56_rac_get_prob_branchy(&s->c, 252))
916 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
917 for (i = 0; i < 2; i++)
918 for (j = 0; j < 2; j++)
919 if (vp56_rac_get_prob_branchy(&s->c, 252))
920 s->prob.p.tx16p[i][j] =
921 update_prob(&s->c, s->prob.p.tx16p[i][j]);
922 for (i = 0; i < 2; i++)
923 for (j = 0; j < 3; j++)
924 if (vp56_rac_get_prob_branchy(&s->c, 252))
925 s->prob.p.tx32p[i][j] =
926 update_prob(&s->c, s->prob.p.tx32p[i][j]);
927 }
928 }
929
930 // coef updates
931 for (i = 0; i < 4; i++) {
932 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
933 if (vp8_rac_get(&s->c)) {
934 for (j = 0; j < 2; j++)
935 for (k = 0; k < 2; k++)
936 for (l = 0; l < 6; l++)
937 for (m = 0; m < 6; m++) {
938 uint8_t *p = s->prob.coef[i][j][k][l][m];
939 uint8_t *r = ref[j][k][l][m];
940 if (m >= 3 && l == 0) // dc only has 3 pt
941 break;
942 for (n = 0; n < 3; n++) {
943 if (vp56_rac_get_prob_branchy(&s->c, 252))
944 p[n] = update_prob(&s->c, r[n]);
945 else
946 p[n] = r[n];
947 }
948 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
949 }
950 } else {
951 for (j = 0; j < 2; j++)
952 for (k = 0; k < 2; k++)
953 for (l = 0; l < 6; l++)
954 for (m = 0; m < 6; m++) {
955 uint8_t *p = s->prob.coef[i][j][k][l][m];
956 uint8_t *r = ref[j][k][l][m];
957 if (m > 3 && l == 0) // dc only has 3 pt
958 break;
959 memcpy(p, r, 3);
960 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
961 }
962 }
963 if (s->s.h.txfmmode == i)
964 break;
965 }
966
967 // mode updates
968 for (i = 0; i < 3; i++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
971 if (!s->s.h.keyframe && !s->s.h.intraonly) {
972 for (i = 0; i < 7; i++)
973 for (j = 0; j < 3; j++)
974 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 s->prob.p.mv_mode[i][j] =
976 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
977
978 if (s->s.h.filtermode == FILTER_SWITCHABLE)
979 for (i = 0; i < 4; i++)
980 for (j = 0; j < 2; j++)
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.filter[i][j] =
983 update_prob(&s->c, s->prob.p.filter[i][j]);
984
985 for (i = 0; i < 4; i++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
987 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
988
989 if (s->s.h.allowcompinter) {
990 s->s.h.comppredmode = vp8_rac_get(&s->c);
991 if (s->s.h.comppredmode)
992 s->s.h.comppredmode += vp8_rac_get(&s->c);
993 if (s->s.h.comppredmode == PRED_SWITCHABLE)
994 for (i = 0; i < 5; i++)
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
996 s->prob.p.comp[i] =
997 update_prob(&s->c, s->prob.p.comp[i]);
998 } else {
999 s->s.h.comppredmode = PRED_SINGLEREF;
1000 }
1001
1002 if (s->s.h.comppredmode != PRED_COMPREF) {
1003 for (i = 0; i < 5; i++) {
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.single_ref[i][0] =
1006 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.single_ref[i][1] =
1009 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1010 }
1011 }
1012
1013 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1014 for (i = 0; i < 5; i++)
1015 if (vp56_rac_get_prob_branchy(&s->c, 252))
1016 s->prob.p.comp_ref[i] =
1017 update_prob(&s->c, s->prob.p.comp_ref[i]);
1018 }
1019
1020 for (i = 0; i < 4; i++)
1021 for (j = 0; j < 9; j++)
1022 if (vp56_rac_get_prob_branchy(&s->c, 252))
1023 s->prob.p.y_mode[i][j] =
1024 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1025
1026 for (i = 0; i < 4; i++)
1027 for (j = 0; j < 4; j++)
1028 for (k = 0; k < 3; k++)
1029 if (vp56_rac_get_prob_branchy(&s->c, 252))
1030 s->prob.p.partition[3 - i][j][k] =
1031 update_prob(&s->c,
1032 s->prob.p.partition[3 - i][j][k]);
1033
1034 // mv fields don't use the update_prob subexp model for some reason
1035 for (i = 0; i < 3; i++)
1036 if (vp56_rac_get_prob_branchy(&s->c, 252))
1037 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038
1039 for (i = 0; i < 2; i++) {
1040 if (vp56_rac_get_prob_branchy(&s->c, 252))
1041 s->prob.p.mv_comp[i].sign =
1042 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1043
1044 for (j = 0; j < 10; j++)
1045 if (vp56_rac_get_prob_branchy(&s->c, 252))
1046 s->prob.p.mv_comp[i].classes[j] =
1047 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1048
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].class0 =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1052
1053 for (j = 0; j < 10; j++)
1054 if (vp56_rac_get_prob_branchy(&s->c, 252))
1055 s->prob.p.mv_comp[i].bits[j] =
1056 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1057 }
1058
1059 for (i = 0; i < 2; i++) {
1060 for (j = 0; j < 2; j++)
1061 for (k = 0; k < 3; k++)
1062 if (vp56_rac_get_prob_branchy(&s->c, 252))
1063 s->prob.p.mv_comp[i].class0_fp[j][k] =
1064 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1065
1066 for (j = 0; j < 3; j++)
1067 if (vp56_rac_get_prob_branchy(&s->c, 252))
1068 s->prob.p.mv_comp[i].fp[j] =
1069 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1070 }
1071
1072 if (s->s.h.highprecisionmvs) {
1073 for (i = 0; i < 2; i++) {
1074 if (vp56_rac_get_prob_branchy(&s->c, 252))
1075 s->prob.p.mv_comp[i].class0_hp =
1076 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1077
1078 if (vp56_rac_get_prob_branchy(&s->c, 252))
1079 s->prob.p.mv_comp[i].hp =
1080 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1081 }
1082 }
1083 }
1084
1085 return (data2 - data) + size2;
1086 }
1087
decode_sb(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1088 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1089 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1090 {
1091 const VP9Context *s = td->s;
1092 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1093 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1094 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1095 s->prob.p.partition[bl][c];
1096 enum BlockPartition bp;
1097 ptrdiff_t hbs = 4 >> bl;
1098 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1099 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1100 int bytesperpixel = s->bytesperpixel;
1101
1102 if (bl == BL_8X8) {
1103 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1104 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1105 } else if (col + hbs < s->cols) { // FIXME why not <=?
1106 if (row + hbs < s->rows) { // FIXME why not <=?
1107 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1108 switch (bp) {
1109 case PARTITION_NONE:
1110 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1111 break;
1112 case PARTITION_H:
1113 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1114 yoff += hbs * 8 * y_stride;
1115 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1116 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1117 break;
1118 case PARTITION_V:
1119 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1120 yoff += hbs * 8 * bytesperpixel;
1121 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1122 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1123 break;
1124 case PARTITION_SPLIT:
1125 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1126 decode_sb(td, row, col + hbs, lflvl,
1127 yoff + 8 * hbs * bytesperpixel,
1128 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1129 yoff += hbs * 8 * y_stride;
1130 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1131 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1132 decode_sb(td, row + hbs, col + hbs, lflvl,
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1135 break;
1136 default:
1137 av_assert0(0);
1138 }
1139 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1140 bp = PARTITION_SPLIT;
1141 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1142 decode_sb(td, row, col + hbs, lflvl,
1143 yoff + 8 * hbs * bytesperpixel,
1144 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1145 } else {
1146 bp = PARTITION_H;
1147 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1148 }
1149 } else if (row + hbs < s->rows) { // FIXME why not <=?
1150 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1151 bp = PARTITION_SPLIT;
1152 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1153 yoff += hbs * 8 * y_stride;
1154 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1155 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1156 } else {
1157 bp = PARTITION_V;
1158 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1159 }
1160 } else {
1161 bp = PARTITION_SPLIT;
1162 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1163 }
1164 td->counts.partition[bl][c][bp]++;
1165 }
1166
decode_sb_mem(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1167 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1168 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1169 {
1170 const VP9Context *s = td->s;
1171 VP9Block *b = td->b;
1172 ptrdiff_t hbs = 4 >> bl;
1173 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1174 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1175 int bytesperpixel = s->bytesperpixel;
1176
1177 if (bl == BL_8X8) {
1178 av_assert2(b->bl == BL_8X8);
1179 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1180 } else if (td->b->bl == bl) {
1181 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1182 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1183 yoff += hbs * 8 * y_stride;
1184 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1185 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1186 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1187 yoff += hbs * 8 * bytesperpixel;
1188 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1189 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1190 }
1191 } else {
1192 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1193 if (col + hbs < s->cols) { // FIXME why not <=?
1194 if (row + hbs < s->rows) {
1195 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1196 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1197 yoff += hbs * 8 * y_stride;
1198 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1199 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1200 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1201 yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1203 } else {
1204 yoff += hbs * 8 * bytesperpixel;
1205 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1206 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1207 }
1208 } else if (row + hbs < s->rows) {
1209 yoff += hbs * 8 * y_stride;
1210 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1211 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1212 }
1213 }
1214 }
1215
set_tile_offset(int * start,int * end,int idx,int log2_n,int n)1216 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1217 {
1218 int sb_start = ( idx * n) >> log2_n;
1219 int sb_end = ((idx + 1) * n) >> log2_n;
1220 *start = FFMIN(sb_start, n) << 3;
1221 *end = FFMIN(sb_end, n) << 3;
1222 }
1223
free_buffers(VP9Context * s)1224 static void free_buffers(VP9Context *s)
1225 {
1226 int i;
1227
1228 av_freep(&s->intra_pred_data[0]);
1229 for (i = 0; i < s->active_tile_cols; i++)
1230 vp9_tile_data_free(&s->td[i]);
1231 }
1232
vp9_decode_free(AVCodecContext * avctx)1233 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1234 {
1235 VP9Context *s = avctx->priv_data;
1236 int i;
1237
1238 for (i = 0; i < 3; i++) {
1239 vp9_frame_unref(avctx, &s->s.frames[i]);
1240 av_frame_free(&s->s.frames[i].tf.f);
1241 }
1242 av_buffer_pool_uninit(&s->frame_extradata_pool);
1243 for (i = 0; i < 8; i++) {
1244 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1245 av_frame_free(&s->s.refs[i].f);
1246 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1247 av_frame_free(&s->next_refs[i].f);
1248 }
1249
1250 free_buffers(s);
1251 vp9_free_entries(avctx);
1252 av_freep(&s->td);
1253 return 0;
1254 }
1255
decode_tiles(AVCodecContext * avctx,const uint8_t * data,int size)1256 static int decode_tiles(AVCodecContext *avctx,
1257 const uint8_t *data, int size)
1258 {
1259 VP9Context *s = avctx->priv_data;
1260 VP9TileData *td = &s->td[0];
1261 int row, col, tile_row, tile_col, ret;
1262 int bytesperpixel;
1263 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1264 AVFrame *f;
1265 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1266
1267 f = s->s.frames[CUR_FRAME].tf.f;
1268 ls_y = f->linesize[0];
1269 ls_uv =f->linesize[1];
1270 bytesperpixel = s->bytesperpixel;
1271
1272 yoff = uvoff = 0;
1273 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1274 set_tile_offset(&tile_row_start, &tile_row_end,
1275 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1276
1277 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1278 int64_t tile_size;
1279
1280 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1281 tile_row == s->s.h.tiling.tile_rows - 1) {
1282 tile_size = size;
1283 } else {
1284 tile_size = AV_RB32(data);
1285 data += 4;
1286 size -= 4;
1287 }
1288 if (tile_size > size) {
1289 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1290 return AVERROR_INVALIDDATA;
1291 }
1292 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1293 if (ret < 0)
1294 return ret;
1295 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1296 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1297 return AVERROR_INVALIDDATA;
1298 }
1299 data += tile_size;
1300 size -= tile_size;
1301 }
1302
1303 for (row = tile_row_start; row < tile_row_end;
1304 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1305 VP9Filter *lflvl_ptr = s->lflvl;
1306 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1307
1308 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1309 set_tile_offset(&tile_col_start, &tile_col_end,
1310 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1311 td->tile_col_start = tile_col_start;
1312 if (s->pass != 2) {
1313 memset(td->left_partition_ctx, 0, 8);
1314 memset(td->left_skip_ctx, 0, 8);
1315 if (s->s.h.keyframe || s->s.h.intraonly) {
1316 memset(td->left_mode_ctx, DC_PRED, 16);
1317 } else {
1318 memset(td->left_mode_ctx, NEARESTMV, 8);
1319 }
1320 memset(td->left_y_nnz_ctx, 0, 16);
1321 memset(td->left_uv_nnz_ctx, 0, 32);
1322 memset(td->left_segpred_ctx, 0, 8);
1323
1324 td->c = &td->c_b[tile_col];
1325 }
1326
1327 for (col = tile_col_start;
1328 col < tile_col_end;
1329 col += 8, yoff2 += 64 * bytesperpixel,
1330 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1331 // FIXME integrate with lf code (i.e. zero after each
1332 // use, similar to invtxfm coefficients, or similar)
1333 if (s->pass != 1) {
1334 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1335 }
1336
1337 if (s->pass == 2) {
1338 decode_sb_mem(td, row, col, lflvl_ptr,
1339 yoff2, uvoff2, BL_64X64);
1340 } else {
1341 if (vpX_rac_is_end(td->c)) {
1342 return AVERROR_INVALIDDATA;
1343 }
1344 decode_sb(td, row, col, lflvl_ptr,
1345 yoff2, uvoff2, BL_64X64);
1346 }
1347 }
1348 }
1349
1350 if (s->pass == 1)
1351 continue;
1352
1353 // backup pre-loopfilter reconstruction data for intra
1354 // prediction of next row of sb64s
1355 if (row + 8 < s->rows) {
1356 memcpy(s->intra_pred_data[0],
1357 f->data[0] + yoff + 63 * ls_y,
1358 8 * s->cols * bytesperpixel);
1359 memcpy(s->intra_pred_data[1],
1360 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1361 8 * s->cols * bytesperpixel >> s->ss_h);
1362 memcpy(s->intra_pred_data[2],
1363 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1364 8 * s->cols * bytesperpixel >> s->ss_h);
1365 }
1366
1367 // loopfilter one row
1368 if (s->s.h.filter.level) {
1369 yoff2 = yoff;
1370 uvoff2 = uvoff;
1371 lflvl_ptr = s->lflvl;
1372 for (col = 0; col < s->cols;
1373 col += 8, yoff2 += 64 * bytesperpixel,
1374 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1375 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1376 yoff2, uvoff2);
1377 }
1378 }
1379
1380 // FIXME maybe we can make this more finegrained by running the
1381 // loopfilter per-block instead of after each sbrow
1382 // In fact that would also make intra pred left preparation easier?
1383 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1384 }
1385 }
1386 return 0;
1387 }
1388
1389 #if HAVE_THREADS
1390 static av_always_inline
decode_tiles_mt(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)1391 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1392 int threadnr)
1393 {
1394 VP9Context *s = avctx->priv_data;
1395 VP9TileData *td = &s->td[jobnr];
1396 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1397 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1398 unsigned tile_cols_len;
1399 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1400 VP9Filter *lflvl_ptr_base;
1401 AVFrame *f;
1402
1403 f = s->s.frames[CUR_FRAME].tf.f;
1404 ls_y = f->linesize[0];
1405 ls_uv =f->linesize[1];
1406
1407 set_tile_offset(&tile_col_start, &tile_col_end,
1408 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1409 td->tile_col_start = tile_col_start;
1410 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1411 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1412 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1413
1414 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1415 set_tile_offset(&tile_row_start, &tile_row_end,
1416 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1417
1418 td->c = &td->c_b[tile_row];
1419 for (row = tile_row_start; row < tile_row_end;
1420 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1421 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1422 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1423
1424 memset(td->left_partition_ctx, 0, 8);
1425 memset(td->left_skip_ctx, 0, 8);
1426 if (s->s.h.keyframe || s->s.h.intraonly) {
1427 memset(td->left_mode_ctx, DC_PRED, 16);
1428 } else {
1429 memset(td->left_mode_ctx, NEARESTMV, 8);
1430 }
1431 memset(td->left_y_nnz_ctx, 0, 16);
1432 memset(td->left_uv_nnz_ctx, 0, 32);
1433 memset(td->left_segpred_ctx, 0, 8);
1434
1435 for (col = tile_col_start;
1436 col < tile_col_end;
1437 col += 8, yoff2 += 64 * bytesperpixel,
1438 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1439 // FIXME integrate with lf code (i.e. zero after each
1440 // use, similar to invtxfm coefficients, or similar)
1441 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1442 decode_sb(td, row, col, lflvl_ptr,
1443 yoff2, uvoff2, BL_64X64);
1444 }
1445
1446 // backup pre-loopfilter reconstruction data for intra
1447 // prediction of next row of sb64s
1448 tile_cols_len = tile_col_end - tile_col_start;
1449 if (row + 8 < s->rows) {
1450 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1451 f->data[0] + yoff + 63 * ls_y,
1452 8 * tile_cols_len * bytesperpixel);
1453 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1454 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1455 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1456 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1457 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1459 }
1460
1461 vp9_report_tile_progress(s, row >> 3, 1);
1462 }
1463 }
1464 return 0;
1465 }
1466
1467 static av_always_inline
loopfilter_proc(AVCodecContext * avctx)1468 int loopfilter_proc(AVCodecContext *avctx)
1469 {
1470 VP9Context *s = avctx->priv_data;
1471 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1472 VP9Filter *lflvl_ptr;
1473 int bytesperpixel = s->bytesperpixel, col, i;
1474 AVFrame *f;
1475
1476 f = s->s.frames[CUR_FRAME].tf.f;
1477 ls_y = f->linesize[0];
1478 ls_uv =f->linesize[1];
1479
1480 for (i = 0; i < s->sb_rows; i++) {
1481 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1482
1483 if (s->s.h.filter.level) {
1484 yoff = (ls_y * 64)*i;
1485 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1486 lflvl_ptr = s->lflvl+s->sb_cols*i;
1487 for (col = 0; col < s->cols;
1488 col += 8, yoff += 64 * bytesperpixel,
1489 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1490 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1491 yoff, uvoff);
1492 }
1493 }
1494 }
1495 return 0;
1496 }
1497 #endif
1498
vp9_export_enc_params(VP9Context * s,VP9Frame * frame)1499 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1500 {
1501 AVVideoEncParams *par;
1502 unsigned int tile, nb_blocks = 0;
1503
1504 if (s->s.h.segmentation.enabled) {
1505 for (tile = 0; tile < s->active_tile_cols; tile++)
1506 nb_blocks += s->td[tile].nb_block_structure;
1507 }
1508
1509 par = av_video_enc_params_create_side_data(frame->tf.f,
1510 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1511 if (!par)
1512 return AVERROR(ENOMEM);
1513
1514 par->qp = s->s.h.yac_qi;
1515 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1516 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1517 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1518 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1519 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1520
1521 if (nb_blocks) {
1522 unsigned int block = 0;
1523 unsigned int tile, block_tile;
1524
1525 for (tile = 0; tile < s->active_tile_cols; tile++) {
1526 VP9TileData *td = &s->td[tile];
1527
1528 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1529 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1530 unsigned int row = td->block_structure[block_tile].row;
1531 unsigned int col = td->block_structure[block_tile].col;
1532 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1533
1534 b->src_x = col * 8;
1535 b->src_y = row * 8;
1536 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1537 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1538
1539 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1540 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1541 if (s->s.h.segmentation.absolute_vals)
1542 b->delta_qp -= par->qp;
1543 }
1544 }
1545 }
1546 }
1547
1548 return 0;
1549 }
1550
vp9_decode_frame(AVCodecContext * avctx,void * frame,int * got_frame,AVPacket * pkt)1551 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1552 int *got_frame, AVPacket *pkt)
1553 {
1554 const uint8_t *data = pkt->data;
1555 int size = pkt->size;
1556 VP9Context *s = avctx->priv_data;
1557 int ret, i, j, ref;
1558 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1559 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1560 AVFrame *f;
1561
1562 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1563 return ret;
1564 } else if (ret == 0) {
1565 if (!s->s.refs[ref].f->buf[0]) {
1566 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1567 return AVERROR_INVALIDDATA;
1568 }
1569 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1570 return ret;
1571 ((AVFrame *)frame)->pts = pkt->pts;
1572 #if FF_API_PKT_PTS
1573 FF_DISABLE_DEPRECATION_WARNINGS
1574 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1575 FF_ENABLE_DEPRECATION_WARNINGS
1576 #endif
1577 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1578 for (i = 0; i < 8; i++) {
1579 if (s->next_refs[i].f->buf[0])
1580 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1581 if (s->s.refs[i].f->buf[0] &&
1582 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1583 return ret;
1584 }
1585 *got_frame = 1;
1586 return pkt->size;
1587 }
1588 data += ret;
1589 size -= ret;
1590
1591 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1592 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1593 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1594 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1595 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1596 return ret;
1597 }
1598 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1599 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1600 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1601 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1602 return ret;
1603 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1604 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1605 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1606 return ret;
1607 f = s->s.frames[CUR_FRAME].tf.f;
1608 f->key_frame = s->s.h.keyframe;
1609 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1610
1611 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1612 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1613 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1614 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1615 }
1616
1617 // ref frame setup
1618 for (i = 0; i < 8; i++) {
1619 if (s->next_refs[i].f->buf[0])
1620 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1621 if (s->s.h.refreshrefmask & (1 << i)) {
1622 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1623 } else if (s->s.refs[i].f->buf[0]) {
1624 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1625 }
1626 if (ret < 0)
1627 return ret;
1628 }
1629
1630 if (avctx->hwaccel) {
1631 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1632 if (ret < 0)
1633 return ret;
1634 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1635 if (ret < 0)
1636 return ret;
1637 ret = avctx->hwaccel->end_frame(avctx);
1638 if (ret < 0)
1639 return ret;
1640 goto finish;
1641 }
1642
1643 // main tile decode loop
1644 memset(s->above_partition_ctx, 0, s->cols);
1645 memset(s->above_skip_ctx, 0, s->cols);
1646 if (s->s.h.keyframe || s->s.h.intraonly) {
1647 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1648 } else {
1649 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1650 }
1651 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1652 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1653 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1654 memset(s->above_segpred_ctx, 0, s->cols);
1655 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1656 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1657 if ((ret = update_block_buffers(avctx)) < 0) {
1658 av_log(avctx, AV_LOG_ERROR,
1659 "Failed to allocate block buffers\n");
1660 return ret;
1661 }
1662 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1663 int j, k, l, m;
1664
1665 for (i = 0; i < 4; i++) {
1666 for (j = 0; j < 2; j++)
1667 for (k = 0; k < 2; k++)
1668 for (l = 0; l < 6; l++)
1669 for (m = 0; m < 6; m++)
1670 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1671 s->prob.coef[i][j][k][l][m], 3);
1672 if (s->s.h.txfmmode == i)
1673 break;
1674 }
1675 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1676 ff_thread_finish_setup(avctx);
1677 } else if (!s->s.h.refreshctx) {
1678 ff_thread_finish_setup(avctx);
1679 }
1680
1681 #if HAVE_THREADS
1682 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1683 for (i = 0; i < s->sb_rows; i++)
1684 atomic_store(&s->entries[i], 0);
1685 }
1686 #endif
1687
1688 do {
1689 for (i = 0; i < s->active_tile_cols; i++) {
1690 s->td[i].b = s->td[i].b_base;
1691 s->td[i].block = s->td[i].block_base;
1692 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1693 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1694 s->td[i].eob = s->td[i].eob_base;
1695 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1696 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1697 s->td[i].error_info = 0;
1698 }
1699
1700 #if HAVE_THREADS
1701 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1702 int tile_row, tile_col;
1703
1704 av_assert1(!s->pass);
1705
1706 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1707 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1708 int64_t tile_size;
1709
1710 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1711 tile_row == s->s.h.tiling.tile_rows - 1) {
1712 tile_size = size;
1713 } else {
1714 tile_size = AV_RB32(data);
1715 data += 4;
1716 size -= 4;
1717 }
1718 if (tile_size > size)
1719 return AVERROR_INVALIDDATA;
1720 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1721 if (ret < 0)
1722 return ret;
1723 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1724 return AVERROR_INVALIDDATA;
1725 data += tile_size;
1726 size -= tile_size;
1727 }
1728 }
1729
1730 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1731 } else
1732 #endif
1733 {
1734 ret = decode_tiles(avctx, data, size);
1735 if (ret < 0) {
1736 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1737 return ret;
1738 }
1739 }
1740
1741 // Sum all counts fields into td[0].counts for tile threading
1742 if (avctx->active_thread_type == FF_THREAD_SLICE)
1743 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1744 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1745 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1746
1747 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1748 ff_vp9_adapt_probs(s);
1749 ff_thread_finish_setup(avctx);
1750 }
1751 } while (s->pass++ == 1);
1752 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1753
1754 if (s->td->error_info < 0) {
1755 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1756 s->td->error_info = 0;
1757 return AVERROR_INVALIDDATA;
1758 }
1759 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1760 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1761 if (ret < 0)
1762 return ret;
1763 }
1764
1765 finish:
1766 // ref frame setup
1767 for (i = 0; i < 8; i++) {
1768 if (s->s.refs[i].f->buf[0])
1769 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1770 if (s->next_refs[i].f->buf[0] &&
1771 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1772 return ret;
1773 }
1774
1775 if (!s->s.h.invisible) {
1776 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1777 return ret;
1778 *got_frame = 1;
1779 }
1780
1781 return pkt->size;
1782 }
1783
vp9_decode_flush(AVCodecContext * avctx)1784 static void vp9_decode_flush(AVCodecContext *avctx)
1785 {
1786 VP9Context *s = avctx->priv_data;
1787 int i;
1788
1789 for (i = 0; i < 3; i++)
1790 vp9_frame_unref(avctx, &s->s.frames[i]);
1791 for (i = 0; i < 8; i++)
1792 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1793 }
1794
init_frames(AVCodecContext * avctx)1795 static int init_frames(AVCodecContext *avctx)
1796 {
1797 VP9Context *s = avctx->priv_data;
1798 int i;
1799
1800 for (i = 0; i < 3; i++) {
1801 s->s.frames[i].tf.f = av_frame_alloc();
1802 if (!s->s.frames[i].tf.f) {
1803 vp9_decode_free(avctx);
1804 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1805 return AVERROR(ENOMEM);
1806 }
1807 }
1808 for (i = 0; i < 8; i++) {
1809 s->s.refs[i].f = av_frame_alloc();
1810 s->next_refs[i].f = av_frame_alloc();
1811 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1812 vp9_decode_free(avctx);
1813 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1814 return AVERROR(ENOMEM);
1815 }
1816 }
1817
1818 return 0;
1819 }
1820
vp9_decode_init(AVCodecContext * avctx)1821 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1822 {
1823 VP9Context *s = avctx->priv_data;
1824
1825 s->last_bpp = 0;
1826 s->s.h.filter.sharpness = -1;
1827
1828 return init_frames(avctx);
1829 }
1830
1831 #if HAVE_THREADS
vp9_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)1832 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1833 {
1834 int i, ret;
1835 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1836
1837 for (i = 0; i < 3; i++) {
1838 if (s->s.frames[i].tf.f->buf[0])
1839 vp9_frame_unref(dst, &s->s.frames[i]);
1840 if (ssrc->s.frames[i].tf.f->buf[0]) {
1841 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1842 return ret;
1843 }
1844 }
1845 for (i = 0; i < 8; i++) {
1846 if (s->s.refs[i].f->buf[0])
1847 ff_thread_release_buffer(dst, &s->s.refs[i]);
1848 if (ssrc->next_refs[i].f->buf[0]) {
1849 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1850 return ret;
1851 }
1852 }
1853
1854 s->s.h.invisible = ssrc->s.h.invisible;
1855 s->s.h.keyframe = ssrc->s.h.keyframe;
1856 s->s.h.intraonly = ssrc->s.h.intraonly;
1857 s->ss_v = ssrc->ss_v;
1858 s->ss_h = ssrc->ss_h;
1859 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1860 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1861 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1862 s->bytesperpixel = ssrc->bytesperpixel;
1863 s->gf_fmt = ssrc->gf_fmt;
1864 s->w = ssrc->w;
1865 s->h = ssrc->h;
1866 s->s.h.bpp = ssrc->s.h.bpp;
1867 s->bpp_index = ssrc->bpp_index;
1868 s->pix_fmt = ssrc->pix_fmt;
1869 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1870 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1871 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1872 sizeof(s->s.h.segmentation.feat));
1873
1874 return 0;
1875 }
1876 #endif
1877
1878 AVCodec ff_vp9_decoder = {
1879 .name = "vp9",
1880 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1881 .type = AVMEDIA_TYPE_VIDEO,
1882 .id = AV_CODEC_ID_VP9,
1883 .priv_data_size = sizeof(VP9Context),
1884 .init = vp9_decode_init,
1885 .close = vp9_decode_free,
1886 .decode = vp9_decode_frame,
1887 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1888 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1889 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1890 .flush = vp9_decode_flush,
1891 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1892 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1893 .bsfs = "vp9_superframe_split",
1894 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1895 #if CONFIG_VP9_DXVA2_HWACCEL
1896 HWACCEL_DXVA2(vp9),
1897 #endif
1898 #if CONFIG_VP9_D3D11VA_HWACCEL
1899 HWACCEL_D3D11VA(vp9),
1900 #endif
1901 #if CONFIG_VP9_D3D11VA2_HWACCEL
1902 HWACCEL_D3D11VA2(vp9),
1903 #endif
1904 #if CONFIG_VP9_NVDEC_HWACCEL
1905 HWACCEL_NVDEC(vp9),
1906 #endif
1907 #if CONFIG_VP9_VAAPI_HWACCEL
1908 HWACCEL_VAAPI(vp9),
1909 #endif
1910 #if CONFIG_VP9_VDPAU_HWACCEL
1911 HWACCEL_VDPAU(vp9),
1912 #endif
1913 NULL
1914 },
1915 };
1916