1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "profiles.h"
28 #include "thread.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33 #include "vp9dec.h"
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
36
37 #define VP9_SYNCCODE 0x498342
38
39 #if HAVE_THREADS
vp9_free_entries(AVCodecContext * avctx)40 static void vp9_free_entries(AVCodecContext *avctx) {
41 VP9Context *s = avctx->priv_data;
42
43 if (avctx->active_thread_type & FF_THREAD_SLICE) {
44 pthread_mutex_destroy(&s->progress_mutex);
45 pthread_cond_destroy(&s->progress_cond);
46 av_freep(&s->entries);
47 }
48 }
49
vp9_alloc_entries(AVCodecContext * avctx,int n)50 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
51 VP9Context *s = avctx->priv_data;
52 int i;
53
54 if (avctx->active_thread_type & FF_THREAD_SLICE) {
55 if (s->entries)
56 av_freep(&s->entries);
57
58 s->entries = av_malloc_array(n, sizeof(atomic_int));
59
60 if (!s->entries) {
61 av_freep(&s->entries);
62 return AVERROR(ENOMEM);
63 }
64
65 for (i = 0; i < n; i++)
66 atomic_init(&s->entries[i], 0);
67
68 pthread_mutex_init(&s->progress_mutex, NULL);
69 pthread_cond_init(&s->progress_cond, NULL);
70 }
71 return 0;
72 }
73
vp9_report_tile_progress(VP9Context * s,int field,int n)74 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
75 pthread_mutex_lock(&s->progress_mutex);
76 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
77 pthread_cond_signal(&s->progress_cond);
78 pthread_mutex_unlock(&s->progress_mutex);
79 }
80
vp9_await_tile_progress(VP9Context * s,int field,int n)81 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
82 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
83 return;
84
85 pthread_mutex_lock(&s->progress_mutex);
86 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
87 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
88 pthread_mutex_unlock(&s->progress_mutex);
89 }
90 #else
vp9_free_entries(AVCodecContext * avctx)91 static void vp9_free_entries(AVCodecContext *avctx) {}
vp9_alloc_entries(AVCodecContext * avctx,int n)92 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
93 #endif
94
vp9_frame_unref(AVCodecContext * avctx,VP9Frame * f)95 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
96 {
97 ff_thread_release_buffer(avctx, &f->tf);
98 av_buffer_unref(&f->extradata);
99 av_buffer_unref(&f->hwaccel_priv_buf);
100 f->segmentation_map = NULL;
101 f->hwaccel_picture_private = NULL;
102 }
103
vp9_frame_alloc(AVCodecContext * avctx,VP9Frame * f)104 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
105 {
106 VP9Context *s = avctx->priv_data;
107 int ret, sz;
108
109 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
110 if (ret < 0)
111 return ret;
112
113 sz = 64 * s->sb_cols * s->sb_rows;
114 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
115 if (!f->extradata) {
116 goto fail;
117 }
118
119 f->segmentation_map = f->extradata->data;
120 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
121
122 if (avctx->hwaccel) {
123 const AVHWAccel *hwaccel = avctx->hwaccel;
124 av_assert0(!f->hwaccel_picture_private);
125 if (hwaccel->frame_priv_data_size) {
126 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
127 if (!f->hwaccel_priv_buf)
128 goto fail;
129 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
130 }
131 }
132
133 return 0;
134
135 fail:
136 vp9_frame_unref(avctx, f);
137 return AVERROR(ENOMEM);
138 }
139
vp9_frame_ref(AVCodecContext * avctx,VP9Frame * dst,VP9Frame * src)140 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
141 {
142 int ret;
143
144 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
145 if (ret < 0)
146 return ret;
147
148 dst->extradata = av_buffer_ref(src->extradata);
149 if (!dst->extradata)
150 goto fail;
151
152 dst->segmentation_map = src->segmentation_map;
153 dst->mv = src->mv;
154 dst->uses_2pass = src->uses_2pass;
155
156 if (src->hwaccel_picture_private) {
157 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
158 if (!dst->hwaccel_priv_buf)
159 goto fail;
160 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
161 }
162
163 return 0;
164
165 fail:
166 vp9_frame_unref(avctx, dst);
167 return AVERROR(ENOMEM);
168 }
169
update_size(AVCodecContext * avctx,int w,int h)170 static int update_size(AVCodecContext *avctx, int w, int h)
171 {
172 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
173 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
174 VP9Context *s = avctx->priv_data;
175 uint8_t *p;
176 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
177 int lflvl_len, i;
178
179 av_assert0(w > 0 && h > 0);
180
181 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
182 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
183 return ret;
184
185 switch (s->pix_fmt) {
186 case AV_PIX_FMT_YUV420P:
187 #if CONFIG_VP9_DXVA2_HWACCEL
188 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
189 #endif
190 #if CONFIG_VP9_D3D11VA_HWACCEL
191 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
192 *fmtp++ = AV_PIX_FMT_D3D11;
193 #endif
194 #if CONFIG_VP9_VAAPI_HWACCEL
195 *fmtp++ = AV_PIX_FMT_VAAPI;
196 #endif
197 break;
198 case AV_PIX_FMT_YUV420P10:
199 case AV_PIX_FMT_YUV420P12:
200 #if CONFIG_VP9_VAAPI_HWACCEL
201 *fmtp++ = AV_PIX_FMT_VAAPI;
202 #endif
203 break;
204 }
205
206 *fmtp++ = s->pix_fmt;
207 *fmtp = AV_PIX_FMT_NONE;
208
209 ret = ff_thread_get_format(avctx, pix_fmts);
210 if (ret < 0)
211 return ret;
212
213 avctx->pix_fmt = ret;
214 s->gf_fmt = s->pix_fmt;
215 s->w = w;
216 s->h = h;
217 }
218
219 cols = (w + 7) >> 3;
220 rows = (h + 7) >> 3;
221
222 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
223 return 0;
224
225 s->last_fmt = s->pix_fmt;
226 s->sb_cols = (w + 63) >> 6;
227 s->sb_rows = (h + 63) >> 6;
228 s->cols = (w + 7) >> 3;
229 s->rows = (h + 7) >> 3;
230 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
231
232 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
233 av_freep(&s->intra_pred_data[0]);
234 // FIXME we slightly over-allocate here for subsampled chroma, but a little
235 // bit of padding shouldn't affect performance...
236 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
237 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
238 if (!p)
239 return AVERROR(ENOMEM);
240 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
241 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
242 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
243 assign(s->above_y_nnz_ctx, uint8_t *, 16);
244 assign(s->above_mode_ctx, uint8_t *, 16);
245 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
246 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
247 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
248 assign(s->above_partition_ctx, uint8_t *, 8);
249 assign(s->above_skip_ctx, uint8_t *, 8);
250 assign(s->above_txfm_ctx, uint8_t *, 8);
251 assign(s->above_segpred_ctx, uint8_t *, 8);
252 assign(s->above_intra_ctx, uint8_t *, 8);
253 assign(s->above_comp_ctx, uint8_t *, 8);
254 assign(s->above_ref_ctx, uint8_t *, 8);
255 assign(s->above_filter_ctx, uint8_t *, 8);
256 assign(s->lflvl, VP9Filter *, lflvl_len);
257 #undef assign
258
259 if (s->td) {
260 for (i = 0; i < s->active_tile_cols; i++) {
261 av_freep(&s->td[i].b_base);
262 av_freep(&s->td[i].block_base);
263 }
264 }
265
266 if (s->s.h.bpp != s->last_bpp) {
267 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
268 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
269 s->last_bpp = s->s.h.bpp;
270 }
271
272 return 0;
273 }
274
update_block_buffers(AVCodecContext * avctx)275 static int update_block_buffers(AVCodecContext *avctx)
276 {
277 int i;
278 VP9Context *s = avctx->priv_data;
279 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
280 VP9TileData *td = &s->td[0];
281
282 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
283 return 0;
284
285 av_free(td->b_base);
286 av_free(td->block_base);
287 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
288 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
289 if (s->s.frames[CUR_FRAME].uses_2pass) {
290 int sbs = s->sb_cols * s->sb_rows;
291
292 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
293 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
294 16 * 16 + 2 * chroma_eobs) * sbs);
295 if (!td->b_base || !td->block_base)
296 return AVERROR(ENOMEM);
297 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
298 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
299 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
300 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
301 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
302 } else {
303 for (i = 1; i < s->active_tile_cols; i++) {
304 if (s->td[i].b_base && s->td[i].block_base) {
305 av_free(s->td[i].b_base);
306 av_free(s->td[i].block_base);
307 }
308 }
309 for (i = 0; i < s->active_tile_cols; i++) {
310 s->td[i].b_base = av_malloc(sizeof(VP9Block));
311 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
312 16 * 16 + 2 * chroma_eobs);
313 if (!s->td[i].b_base || !s->td[i].block_base)
314 return AVERROR(ENOMEM);
315 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
316 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
317 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
318 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
319 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
320 }
321 }
322 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
323
324 return 0;
325 }
326
327 // The sign bit is at the end, not the start, of a bit sequence
get_sbits_inv(GetBitContext * gb,int n)328 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
329 {
330 int v = get_bits(gb, n);
331 return get_bits1(gb) ? -v : v;
332 }
333
inv_recenter_nonneg(int v,int m)334 static av_always_inline int inv_recenter_nonneg(int v, int m)
335 {
336 if (v > 2 * m)
337 return v;
338 if (v & 1)
339 return m - ((v + 1) >> 1);
340 return m + (v >> 1);
341 }
342
343 // differential forward probability updates
update_prob(VP56RangeCoder * c,int p)344 static int update_prob(VP56RangeCoder *c, int p)
345 {
346 static const int inv_map_table[255] = {
347 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
348 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
349 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
350 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
351 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
352 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
353 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
354 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
355 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
356 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
357 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
358 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
359 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
360 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
361 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
362 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
363 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
364 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
365 252, 253, 253,
366 };
367 int d;
368
369 /* This code is trying to do a differential probability update. For a
370 * current probability A in the range [1, 255], the difference to a new
371 * probability of any value can be expressed differentially as 1-A, 255-A
372 * where some part of this (absolute range) exists both in positive as
373 * well as the negative part, whereas another part only exists in one
374 * half. We're trying to code this shared part differentially, i.e.
375 * times two where the value of the lowest bit specifies the sign, and
376 * the single part is then coded on top of this. This absolute difference
377 * then again has a value of [0, 254], but a bigger value in this range
378 * indicates that we're further away from the original value A, so we
379 * can code this as a VLC code, since higher values are increasingly
380 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
381 * updates vs. the 'fine, exact' updates further down the range, which
382 * adds one extra dimension to this differential update model. */
383
384 if (!vp8_rac_get(c)) {
385 d = vp8_rac_get_uint(c, 4) + 0;
386 } else if (!vp8_rac_get(c)) {
387 d = vp8_rac_get_uint(c, 4) + 16;
388 } else if (!vp8_rac_get(c)) {
389 d = vp8_rac_get_uint(c, 5) + 32;
390 } else {
391 d = vp8_rac_get_uint(c, 7);
392 if (d >= 65)
393 d = (d << 1) - 65 + vp8_rac_get(c);
394 d += 64;
395 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
396 }
397
398 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
399 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
400 }
401
read_colorspace_details(AVCodecContext * avctx)402 static int read_colorspace_details(AVCodecContext *avctx)
403 {
404 static const enum AVColorSpace colorspaces[8] = {
405 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
406 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
407 };
408 VP9Context *s = avctx->priv_data;
409 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
410
411 s->bpp_index = bits;
412 s->s.h.bpp = 8 + bits * 2;
413 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
414 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
415 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
416 static const enum AVPixelFormat pix_fmt_rgb[3] = {
417 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
418 };
419 s->ss_h = s->ss_v = 0;
420 avctx->color_range = AVCOL_RANGE_JPEG;
421 s->pix_fmt = pix_fmt_rgb[bits];
422 if (avctx->profile & 1) {
423 if (get_bits1(&s->gb)) {
424 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
425 return AVERROR_INVALIDDATA;
426 }
427 } else {
428 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
429 avctx->profile);
430 return AVERROR_INVALIDDATA;
431 }
432 } else {
433 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
434 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
435 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
436 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
437 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
438 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
439 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
440 };
441 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
442 if (avctx->profile & 1) {
443 s->ss_h = get_bits1(&s->gb);
444 s->ss_v = get_bits1(&s->gb);
445 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
446 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
447 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
448 avctx->profile);
449 return AVERROR_INVALIDDATA;
450 } else if (get_bits1(&s->gb)) {
451 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
452 avctx->profile);
453 return AVERROR_INVALIDDATA;
454 }
455 } else {
456 s->ss_h = s->ss_v = 1;
457 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
458 }
459 }
460
461 return 0;
462 }
463
decode_frame_header(AVCodecContext * avctx,const uint8_t * data,int size,int * ref)464 static int decode_frame_header(AVCodecContext *avctx,
465 const uint8_t *data, int size, int *ref)
466 {
467 VP9Context *s = avctx->priv_data;
468 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
469 int last_invisible;
470 const uint8_t *data2;
471
472 /* general header */
473 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
474 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
475 return ret;
476 }
477 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
478 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
479 return AVERROR_INVALIDDATA;
480 }
481 avctx->profile = get_bits1(&s->gb);
482 avctx->profile |= get_bits1(&s->gb) << 1;
483 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
484 if (avctx->profile > 3) {
485 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
486 return AVERROR_INVALIDDATA;
487 }
488 s->s.h.profile = avctx->profile;
489 if (get_bits1(&s->gb)) {
490 *ref = get_bits(&s->gb, 3);
491 return 0;
492 }
493
494 s->last_keyframe = s->s.h.keyframe;
495 s->s.h.keyframe = !get_bits1(&s->gb);
496
497 last_invisible = s->s.h.invisible;
498 s->s.h.invisible = !get_bits1(&s->gb);
499 s->s.h.errorres = get_bits1(&s->gb);
500 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
501
502 if (s->s.h.keyframe) {
503 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
504 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
505 return AVERROR_INVALIDDATA;
506 }
507 if ((ret = read_colorspace_details(avctx)) < 0)
508 return ret;
509 // for profile 1, here follows the subsampling bits
510 s->s.h.refreshrefmask = 0xff;
511 w = get_bits(&s->gb, 16) + 1;
512 h = get_bits(&s->gb, 16) + 1;
513 if (get_bits1(&s->gb)) // display size
514 skip_bits(&s->gb, 32);
515 } else {
516 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
517 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
518 if (s->s.h.intraonly) {
519 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
520 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
521 return AVERROR_INVALIDDATA;
522 }
523 if (avctx->profile >= 1) {
524 if ((ret = read_colorspace_details(avctx)) < 0)
525 return ret;
526 } else {
527 s->ss_h = s->ss_v = 1;
528 s->s.h.bpp = 8;
529 s->bpp_index = 0;
530 s->bytesperpixel = 1;
531 s->pix_fmt = AV_PIX_FMT_YUV420P;
532 avctx->colorspace = AVCOL_SPC_BT470BG;
533 avctx->color_range = AVCOL_RANGE_MPEG;
534 }
535 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
536 w = get_bits(&s->gb, 16) + 1;
537 h = get_bits(&s->gb, 16) + 1;
538 if (get_bits1(&s->gb)) // display size
539 skip_bits(&s->gb, 32);
540 } else {
541 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
542 s->s.h.refidx[0] = get_bits(&s->gb, 3);
543 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
544 s->s.h.refidx[1] = get_bits(&s->gb, 3);
545 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
546 s->s.h.refidx[2] = get_bits(&s->gb, 3);
547 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
548 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
549 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
550 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
551 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
552 return AVERROR_INVALIDDATA;
553 }
554 if (get_bits1(&s->gb)) {
555 w = s->s.refs[s->s.h.refidx[0]].f->width;
556 h = s->s.refs[s->s.h.refidx[0]].f->height;
557 } else if (get_bits1(&s->gb)) {
558 w = s->s.refs[s->s.h.refidx[1]].f->width;
559 h = s->s.refs[s->s.h.refidx[1]].f->height;
560 } else if (get_bits1(&s->gb)) {
561 w = s->s.refs[s->s.h.refidx[2]].f->width;
562 h = s->s.refs[s->s.h.refidx[2]].f->height;
563 } else {
564 w = get_bits(&s->gb, 16) + 1;
565 h = get_bits(&s->gb, 16) + 1;
566 }
567 // Note that in this code, "CUR_FRAME" is actually before we
568 // have formally allocated a frame, and thus actually represents
569 // the _last_ frame
570 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
571 s->s.frames[CUR_FRAME].tf.f->height == h;
572 if (get_bits1(&s->gb)) // display size
573 skip_bits(&s->gb, 32);
574 s->s.h.highprecisionmvs = get_bits1(&s->gb);
575 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
576 get_bits(&s->gb, 2);
577 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
578 s->s.h.signbias[0] != s->s.h.signbias[2];
579 if (s->s.h.allowcompinter) {
580 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
581 s->s.h.fixcompref = 2;
582 s->s.h.varcompref[0] = 0;
583 s->s.h.varcompref[1] = 1;
584 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
585 s->s.h.fixcompref = 1;
586 s->s.h.varcompref[0] = 0;
587 s->s.h.varcompref[1] = 2;
588 } else {
589 s->s.h.fixcompref = 0;
590 s->s.h.varcompref[0] = 1;
591 s->s.h.varcompref[1] = 2;
592 }
593 }
594 }
595 }
596 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
597 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
598 s->s.h.framectxid = c = get_bits(&s->gb, 2);
599 if (s->s.h.keyframe || s->s.h.intraonly)
600 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
601
602 /* loopfilter header data */
603 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
604 // reset loopfilter defaults
605 s->s.h.lf_delta.ref[0] = 1;
606 s->s.h.lf_delta.ref[1] = 0;
607 s->s.h.lf_delta.ref[2] = -1;
608 s->s.h.lf_delta.ref[3] = -1;
609 s->s.h.lf_delta.mode[0] = 0;
610 s->s.h.lf_delta.mode[1] = 0;
611 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
612 }
613 s->s.h.filter.level = get_bits(&s->gb, 6);
614 sharp = get_bits(&s->gb, 3);
615 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
616 // the old cache values since they are still valid
617 if (s->s.h.filter.sharpness != sharp) {
618 for (i = 1; i <= 63; i++) {
619 int limit = i;
620
621 if (sharp > 0) {
622 limit >>= (sharp + 3) >> 2;
623 limit = FFMIN(limit, 9 - sharp);
624 }
625 limit = FFMAX(limit, 1);
626
627 s->filter_lut.lim_lut[i] = limit;
628 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
629 }
630 }
631 s->s.h.filter.sharpness = sharp;
632 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
633 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
634 for (i = 0; i < 4; i++)
635 if (get_bits1(&s->gb))
636 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
637 for (i = 0; i < 2; i++)
638 if (get_bits1(&s->gb))
639 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
640 }
641 }
642
643 /* quantization header data */
644 s->s.h.yac_qi = get_bits(&s->gb, 8);
645 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
646 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
647 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
648 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
649 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
650 if (s->s.h.lossless)
651 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
652
653 /* segmentation header info */
654 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
655 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
656 for (i = 0; i < 7; i++)
657 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
658 get_bits(&s->gb, 8) : 255;
659 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
660 for (i = 0; i < 3; i++)
661 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
662 get_bits(&s->gb, 8) : 255;
663 }
664
665 if (get_bits1(&s->gb)) {
666 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
667 for (i = 0; i < 8; i++) {
668 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
669 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
670 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
671 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
672 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
673 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
674 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
675 }
676 }
677 }
678
679 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
680 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
681 int qyac, qydc, quvac, quvdc, lflvl, sh;
682
683 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
684 if (s->s.h.segmentation.absolute_vals)
685 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
686 else
687 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
688 } else {
689 qyac = s->s.h.yac_qi;
690 }
691 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
692 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
693 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
694 qyac = av_clip_uintp2(qyac, 8);
695
696 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
697 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
698 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
699 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
700
701 sh = s->s.h.filter.level >= 32;
702 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
703 if (s->s.h.segmentation.absolute_vals)
704 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
705 else
706 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
707 } else {
708 lflvl = s->s.h.filter.level;
709 }
710 if (s->s.h.lf_delta.enabled) {
711 s->s.h.segmentation.feat[i].lflvl[0][0] =
712 s->s.h.segmentation.feat[i].lflvl[0][1] =
713 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
714 for (j = 1; j < 4; j++) {
715 s->s.h.segmentation.feat[i].lflvl[j][0] =
716 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
717 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
718 s->s.h.segmentation.feat[i].lflvl[j][1] =
719 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
720 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
721 }
722 } else {
723 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
724 sizeof(s->s.h.segmentation.feat[i].lflvl));
725 }
726 }
727
728 /* tiling info */
729 if ((ret = update_size(avctx, w, h)) < 0) {
730 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
731 w, h, s->pix_fmt);
732 return ret;
733 }
734 for (s->s.h.tiling.log2_tile_cols = 0;
735 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
736 s->s.h.tiling.log2_tile_cols++) ;
737 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
738 max = FFMAX(0, max - 1);
739 while (max > s->s.h.tiling.log2_tile_cols) {
740 if (get_bits1(&s->gb))
741 s->s.h.tiling.log2_tile_cols++;
742 else
743 break;
744 }
745 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
746 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
747 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
748 int n_range_coders;
749 VP56RangeCoder *rc;
750
751 if (s->td) {
752 for (i = 0; i < s->active_tile_cols; i++) {
753 av_free(s->td[i].b_base);
754 av_free(s->td[i].block_base);
755 }
756 av_free(s->td);
757 }
758
759 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
760 vp9_free_entries(avctx);
761 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
762 s->s.h.tiling.tile_cols : 1;
763 vp9_alloc_entries(avctx, s->sb_rows);
764 if (avctx->active_thread_type == FF_THREAD_SLICE) {
765 n_range_coders = 4; // max_tile_rows
766 } else {
767 n_range_coders = s->s.h.tiling.tile_cols;
768 }
769 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
770 n_range_coders * sizeof(VP56RangeCoder));
771 if (!s->td)
772 return AVERROR(ENOMEM);
773 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
774 for (i = 0; i < s->active_tile_cols; i++) {
775 s->td[i].s = s;
776 s->td[i].c_b = rc;
777 rc += n_range_coders;
778 }
779 }
780
781 /* check reference frames */
782 if (!s->s.h.keyframe && !s->s.h.intraonly) {
783 for (i = 0; i < 3; i++) {
784 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
785 int refw = ref->width, refh = ref->height;
786
787 if (ref->format != avctx->pix_fmt) {
788 av_log(avctx, AV_LOG_ERROR,
789 "Ref pixfmt (%s) did not match current frame (%s)",
790 av_get_pix_fmt_name(ref->format),
791 av_get_pix_fmt_name(avctx->pix_fmt));
792 return AVERROR_INVALIDDATA;
793 } else if (refw == w && refh == h) {
794 s->mvscale[i][0] = s->mvscale[i][1] = 0;
795 } else {
796 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
797 av_log(avctx, AV_LOG_ERROR,
798 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
799 refw, refh, w, h);
800 return AVERROR_INVALIDDATA;
801 }
802 s->mvscale[i][0] = (refw << 14) / w;
803 s->mvscale[i][1] = (refh << 14) / h;
804 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
805 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
806 }
807 }
808 }
809
810 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
811 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
812 s->prob_ctx[3].p = ff_vp9_default_probs;
813 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
814 sizeof(ff_vp9_default_coef_probs));
815 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
816 sizeof(ff_vp9_default_coef_probs));
817 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
818 sizeof(ff_vp9_default_coef_probs));
819 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
820 sizeof(ff_vp9_default_coef_probs));
821 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
822 s->prob_ctx[c].p = ff_vp9_default_probs;
823 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
824 sizeof(ff_vp9_default_coef_probs));
825 }
826
827 // next 16 bits is size of the rest of the header (arith-coded)
828 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
829 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
830
831 data2 = align_get_bits(&s->gb);
832 if (size2 > size - (data2 - data)) {
833 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
834 return AVERROR_INVALIDDATA;
835 }
836 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
837 if (ret < 0)
838 return ret;
839
840 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
841 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
842 return AVERROR_INVALIDDATA;
843 }
844
845 for (i = 0; i < s->active_tile_cols; i++) {
846 if (s->s.h.keyframe || s->s.h.intraonly) {
847 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
848 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
849 } else {
850 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
851 }
852 }
853
854 /* FIXME is it faster to not copy here, but do it down in the fw updates
855 * as explicit copies if the fw update is missing (and skip the copy upon
856 * fw update)? */
857 s->prob.p = s->prob_ctx[c].p;
858
859 // txfm updates
860 if (s->s.h.lossless) {
861 s->s.h.txfmmode = TX_4X4;
862 } else {
863 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
864 if (s->s.h.txfmmode == 3)
865 s->s.h.txfmmode += vp8_rac_get(&s->c);
866
867 if (s->s.h.txfmmode == TX_SWITCHABLE) {
868 for (i = 0; i < 2; i++)
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
871 for (i = 0; i < 2; i++)
872 for (j = 0; j < 2; j++)
873 if (vp56_rac_get_prob_branchy(&s->c, 252))
874 s->prob.p.tx16p[i][j] =
875 update_prob(&s->c, s->prob.p.tx16p[i][j]);
876 for (i = 0; i < 2; i++)
877 for (j = 0; j < 3; j++)
878 if (vp56_rac_get_prob_branchy(&s->c, 252))
879 s->prob.p.tx32p[i][j] =
880 update_prob(&s->c, s->prob.p.tx32p[i][j]);
881 }
882 }
883
884 // coef updates
885 for (i = 0; i < 4; i++) {
886 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
887 if (vp8_rac_get(&s->c)) {
888 for (j = 0; j < 2; j++)
889 for (k = 0; k < 2; k++)
890 for (l = 0; l < 6; l++)
891 for (m = 0; m < 6; m++) {
892 uint8_t *p = s->prob.coef[i][j][k][l][m];
893 uint8_t *r = ref[j][k][l][m];
894 if (m >= 3 && l == 0) // dc only has 3 pt
895 break;
896 for (n = 0; n < 3; n++) {
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 p[n] = update_prob(&s->c, r[n]);
899 else
900 p[n] = r[n];
901 }
902 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
903 }
904 } else {
905 for (j = 0; j < 2; j++)
906 for (k = 0; k < 2; k++)
907 for (l = 0; l < 6; l++)
908 for (m = 0; m < 6; m++) {
909 uint8_t *p = s->prob.coef[i][j][k][l][m];
910 uint8_t *r = ref[j][k][l][m];
911 if (m > 3 && l == 0) // dc only has 3 pt
912 break;
913 memcpy(p, r, 3);
914 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
915 }
916 }
917 if (s->s.h.txfmmode == i)
918 break;
919 }
920
921 // mode updates
922 for (i = 0; i < 3; i++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
925 if (!s->s.h.keyframe && !s->s.h.intraonly) {
926 for (i = 0; i < 7; i++)
927 for (j = 0; j < 3; j++)
928 if (vp56_rac_get_prob_branchy(&s->c, 252))
929 s->prob.p.mv_mode[i][j] =
930 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
931
932 if (s->s.h.filtermode == FILTER_SWITCHABLE)
933 for (i = 0; i < 4; i++)
934 for (j = 0; j < 2; j++)
935 if (vp56_rac_get_prob_branchy(&s->c, 252))
936 s->prob.p.filter[i][j] =
937 update_prob(&s->c, s->prob.p.filter[i][j]);
938
939 for (i = 0; i < 4; i++)
940 if (vp56_rac_get_prob_branchy(&s->c, 252))
941 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
942
943 if (s->s.h.allowcompinter) {
944 s->s.h.comppredmode = vp8_rac_get(&s->c);
945 if (s->s.h.comppredmode)
946 s->s.h.comppredmode += vp8_rac_get(&s->c);
947 if (s->s.h.comppredmode == PRED_SWITCHABLE)
948 for (i = 0; i < 5; i++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.comp[i] =
951 update_prob(&s->c, s->prob.p.comp[i]);
952 } else {
953 s->s.h.comppredmode = PRED_SINGLEREF;
954 }
955
956 if (s->s.h.comppredmode != PRED_COMPREF) {
957 for (i = 0; i < 5; i++) {
958 if (vp56_rac_get_prob_branchy(&s->c, 252))
959 s->prob.p.single_ref[i][0] =
960 update_prob(&s->c, s->prob.p.single_ref[i][0]);
961 if (vp56_rac_get_prob_branchy(&s->c, 252))
962 s->prob.p.single_ref[i][1] =
963 update_prob(&s->c, s->prob.p.single_ref[i][1]);
964 }
965 }
966
967 if (s->s.h.comppredmode != PRED_SINGLEREF) {
968 for (i = 0; i < 5; i++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.comp_ref[i] =
971 update_prob(&s->c, s->prob.p.comp_ref[i]);
972 }
973
974 for (i = 0; i < 4; i++)
975 for (j = 0; j < 9; j++)
976 if (vp56_rac_get_prob_branchy(&s->c, 252))
977 s->prob.p.y_mode[i][j] =
978 update_prob(&s->c, s->prob.p.y_mode[i][j]);
979
980 for (i = 0; i < 4; i++)
981 for (j = 0; j < 4; j++)
982 for (k = 0; k < 3; k++)
983 if (vp56_rac_get_prob_branchy(&s->c, 252))
984 s->prob.p.partition[3 - i][j][k] =
985 update_prob(&s->c,
986 s->prob.p.partition[3 - i][j][k]);
987
988 // mv fields don't use the update_prob subexp model for some reason
989 for (i = 0; i < 3; i++)
990 if (vp56_rac_get_prob_branchy(&s->c, 252))
991 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
992
993 for (i = 0; i < 2; i++) {
994 if (vp56_rac_get_prob_branchy(&s->c, 252))
995 s->prob.p.mv_comp[i].sign =
996 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
997
998 for (j = 0; j < 10; j++)
999 if (vp56_rac_get_prob_branchy(&s->c, 252))
1000 s->prob.p.mv_comp[i].classes[j] =
1001 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1002
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.mv_comp[i].class0 =
1005 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1006
1007 for (j = 0; j < 10; j++)
1008 if (vp56_rac_get_prob_branchy(&s->c, 252))
1009 s->prob.p.mv_comp[i].bits[j] =
1010 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011 }
1012
1013 for (i = 0; i < 2; i++) {
1014 for (j = 0; j < 2; j++)
1015 for (k = 0; k < 3; k++)
1016 if (vp56_rac_get_prob_branchy(&s->c, 252))
1017 s->prob.p.mv_comp[i].class0_fp[j][k] =
1018 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1019
1020 for (j = 0; j < 3; j++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.mv_comp[i].fp[j] =
1023 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1024 }
1025
1026 if (s->s.h.highprecisionmvs) {
1027 for (i = 0; i < 2; i++) {
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.mv_comp[i].class0_hp =
1030 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].hp =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035 }
1036 }
1037 }
1038
1039 return (data2 - data) + size2;
1040 }
1041
decode_sb(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1042 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1043 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1044 {
1045 const VP9Context *s = td->s;
1046 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1047 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1048 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1049 s->prob.p.partition[bl][c];
1050 enum BlockPartition bp;
1051 ptrdiff_t hbs = 4 >> bl;
1052 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1053 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1054 int bytesperpixel = s->bytesperpixel;
1055
1056 if (bl == BL_8X8) {
1057 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1058 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1059 } else if (col + hbs < s->cols) { // FIXME why not <=?
1060 if (row + hbs < s->rows) { // FIXME why not <=?
1061 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1062 switch (bp) {
1063 case PARTITION_NONE:
1064 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1065 break;
1066 case PARTITION_H:
1067 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1068 yoff += hbs * 8 * y_stride;
1069 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1070 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1071 break;
1072 case PARTITION_V:
1073 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1074 yoff += hbs * 8 * bytesperpixel;
1075 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1076 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1077 break;
1078 case PARTITION_SPLIT:
1079 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1080 decode_sb(td, row, col + hbs, lflvl,
1081 yoff + 8 * hbs * bytesperpixel,
1082 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1083 yoff += hbs * 8 * y_stride;
1084 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1085 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1086 decode_sb(td, row + hbs, col + hbs, lflvl,
1087 yoff + 8 * hbs * bytesperpixel,
1088 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1089 break;
1090 default:
1091 av_assert0(0);
1092 }
1093 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1094 bp = PARTITION_SPLIT;
1095 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1096 decode_sb(td, row, col + hbs, lflvl,
1097 yoff + 8 * hbs * bytesperpixel,
1098 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1099 } else {
1100 bp = PARTITION_H;
1101 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1102 }
1103 } else if (row + hbs < s->rows) { // FIXME why not <=?
1104 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1105 bp = PARTITION_SPLIT;
1106 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1107 yoff += hbs * 8 * y_stride;
1108 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1109 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1110 } else {
1111 bp = PARTITION_V;
1112 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1113 }
1114 } else {
1115 bp = PARTITION_SPLIT;
1116 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1117 }
1118 td->counts.partition[bl][c][bp]++;
1119 }
1120
decode_sb_mem(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1121 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1122 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1123 {
1124 const VP9Context *s = td->s;
1125 VP9Block *b = td->b;
1126 ptrdiff_t hbs = 4 >> bl;
1127 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1128 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1129 int bytesperpixel = s->bytesperpixel;
1130
1131 if (bl == BL_8X8) {
1132 av_assert2(b->bl == BL_8X8);
1133 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1134 } else if (td->b->bl == bl) {
1135 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1136 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1137 yoff += hbs * 8 * y_stride;
1138 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1139 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1140 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1141 yoff += hbs * 8 * bytesperpixel;
1142 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1143 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1144 }
1145 } else {
1146 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1147 if (col + hbs < s->cols) { // FIXME why not <=?
1148 if (row + hbs < s->rows) {
1149 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1150 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1151 yoff += hbs * 8 * y_stride;
1152 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1153 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1154 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1155 yoff + 8 * hbs * bytesperpixel,
1156 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1157 } else {
1158 yoff += hbs * 8 * bytesperpixel;
1159 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1160 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1161 }
1162 } else if (row + hbs < s->rows) {
1163 yoff += hbs * 8 * y_stride;
1164 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1165 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1166 }
1167 }
1168 }
1169
set_tile_offset(int * start,int * end,int idx,int log2_n,int n)1170 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1171 {
1172 int sb_start = ( idx * n) >> log2_n;
1173 int sb_end = ((idx + 1) * n) >> log2_n;
1174 *start = FFMIN(sb_start, n) << 3;
1175 *end = FFMIN(sb_end, n) << 3;
1176 }
1177
free_buffers(VP9Context * s)1178 static void free_buffers(VP9Context *s)
1179 {
1180 int i;
1181
1182 av_freep(&s->intra_pred_data[0]);
1183 for (i = 0; i < s->active_tile_cols; i++) {
1184 av_freep(&s->td[i].b_base);
1185 av_freep(&s->td[i].block_base);
1186 }
1187 }
1188
vp9_decode_free(AVCodecContext * avctx)1189 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1190 {
1191 VP9Context *s = avctx->priv_data;
1192 int i;
1193
1194 for (i = 0; i < 3; i++) {
1195 if (s->s.frames[i].tf.f->buf[0])
1196 vp9_frame_unref(avctx, &s->s.frames[i]);
1197 av_frame_free(&s->s.frames[i].tf.f);
1198 }
1199 for (i = 0; i < 8; i++) {
1200 if (s->s.refs[i].f->buf[0])
1201 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1202 av_frame_free(&s->s.refs[i].f);
1203 if (s->next_refs[i].f->buf[0])
1204 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1205 av_frame_free(&s->next_refs[i].f);
1206 }
1207
1208 free_buffers(s);
1209 vp9_free_entries(avctx);
1210 av_freep(&s->td);
1211 return 0;
1212 }
1213
decode_tiles(AVCodecContext * avctx,const uint8_t * data,int size)1214 static int decode_tiles(AVCodecContext *avctx,
1215 const uint8_t *data, int size)
1216 {
1217 VP9Context *s = avctx->priv_data;
1218 VP9TileData *td = &s->td[0];
1219 int row, col, tile_row, tile_col, ret;
1220 int bytesperpixel;
1221 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1222 AVFrame *f;
1223 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1224
1225 f = s->s.frames[CUR_FRAME].tf.f;
1226 ls_y = f->linesize[0];
1227 ls_uv =f->linesize[1];
1228 bytesperpixel = s->bytesperpixel;
1229
1230 yoff = uvoff = 0;
1231 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1232 set_tile_offset(&tile_row_start, &tile_row_end,
1233 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1234
1235 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1236 int64_t tile_size;
1237
1238 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1239 tile_row == s->s.h.tiling.tile_rows - 1) {
1240 tile_size = size;
1241 } else {
1242 tile_size = AV_RB32(data);
1243 data += 4;
1244 size -= 4;
1245 }
1246 if (tile_size > size) {
1247 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1248 return AVERROR_INVALIDDATA;
1249 }
1250 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1251 if (ret < 0)
1252 return ret;
1253 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1254 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1255 return AVERROR_INVALIDDATA;
1256 }
1257 data += tile_size;
1258 size -= tile_size;
1259 }
1260
1261 for (row = tile_row_start; row < tile_row_end;
1262 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1263 VP9Filter *lflvl_ptr = s->lflvl;
1264 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1265
1266 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1267 set_tile_offset(&tile_col_start, &tile_col_end,
1268 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1269 td->tile_col_start = tile_col_start;
1270 if (s->pass != 2) {
1271 memset(td->left_partition_ctx, 0, 8);
1272 memset(td->left_skip_ctx, 0, 8);
1273 if (s->s.h.keyframe || s->s.h.intraonly) {
1274 memset(td->left_mode_ctx, DC_PRED, 16);
1275 } else {
1276 memset(td->left_mode_ctx, NEARESTMV, 8);
1277 }
1278 memset(td->left_y_nnz_ctx, 0, 16);
1279 memset(td->left_uv_nnz_ctx, 0, 32);
1280 memset(td->left_segpred_ctx, 0, 8);
1281
1282 td->c = &td->c_b[tile_col];
1283 }
1284
1285 for (col = tile_col_start;
1286 col < tile_col_end;
1287 col += 8, yoff2 += 64 * bytesperpixel,
1288 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1289 // FIXME integrate with lf code (i.e. zero after each
1290 // use, similar to invtxfm coefficients, or similar)
1291 if (s->pass != 1) {
1292 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1293 }
1294
1295 if (s->pass == 2) {
1296 decode_sb_mem(td, row, col, lflvl_ptr,
1297 yoff2, uvoff2, BL_64X64);
1298 } else {
1299 decode_sb(td, row, col, lflvl_ptr,
1300 yoff2, uvoff2, BL_64X64);
1301 }
1302 }
1303 }
1304
1305 if (s->pass == 1)
1306 continue;
1307
1308 // backup pre-loopfilter reconstruction data for intra
1309 // prediction of next row of sb64s
1310 if (row + 8 < s->rows) {
1311 memcpy(s->intra_pred_data[0],
1312 f->data[0] + yoff + 63 * ls_y,
1313 8 * s->cols * bytesperpixel);
1314 memcpy(s->intra_pred_data[1],
1315 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1316 8 * s->cols * bytesperpixel >> s->ss_h);
1317 memcpy(s->intra_pred_data[2],
1318 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1319 8 * s->cols * bytesperpixel >> s->ss_h);
1320 }
1321
1322 // loopfilter one row
1323 if (s->s.h.filter.level) {
1324 yoff2 = yoff;
1325 uvoff2 = uvoff;
1326 lflvl_ptr = s->lflvl;
1327 for (col = 0; col < s->cols;
1328 col += 8, yoff2 += 64 * bytesperpixel,
1329 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1330 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1331 yoff2, uvoff2);
1332 }
1333 }
1334
1335 // FIXME maybe we can make this more finegrained by running the
1336 // loopfilter per-block instead of after each sbrow
1337 // In fact that would also make intra pred left preparation easier?
1338 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1339 }
1340 }
1341 return 0;
1342 }
1343
1344 #if HAVE_THREADS
1345 static av_always_inline
decode_tiles_mt(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)1346 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1347 int threadnr)
1348 {
1349 VP9Context *s = avctx->priv_data;
1350 VP9TileData *td = &s->td[jobnr];
1351 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1352 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1353 unsigned tile_cols_len;
1354 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1355 VP9Filter *lflvl_ptr_base;
1356 AVFrame *f;
1357
1358 f = s->s.frames[CUR_FRAME].tf.f;
1359 ls_y = f->linesize[0];
1360 ls_uv =f->linesize[1];
1361
1362 set_tile_offset(&tile_col_start, &tile_col_end,
1363 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1364 td->tile_col_start = tile_col_start;
1365 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1366 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1367 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1368
1369 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1370 set_tile_offset(&tile_row_start, &tile_row_end,
1371 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1372
1373 td->c = &td->c_b[tile_row];
1374 for (row = tile_row_start; row < tile_row_end;
1375 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1376 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1377 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1378
1379 memset(td->left_partition_ctx, 0, 8);
1380 memset(td->left_skip_ctx, 0, 8);
1381 if (s->s.h.keyframe || s->s.h.intraonly) {
1382 memset(td->left_mode_ctx, DC_PRED, 16);
1383 } else {
1384 memset(td->left_mode_ctx, NEARESTMV, 8);
1385 }
1386 memset(td->left_y_nnz_ctx, 0, 16);
1387 memset(td->left_uv_nnz_ctx, 0, 32);
1388 memset(td->left_segpred_ctx, 0, 8);
1389
1390 for (col = tile_col_start;
1391 col < tile_col_end;
1392 col += 8, yoff2 += 64 * bytesperpixel,
1393 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1394 // FIXME integrate with lf code (i.e. zero after each
1395 // use, similar to invtxfm coefficients, or similar)
1396 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1397 decode_sb(td, row, col, lflvl_ptr,
1398 yoff2, uvoff2, BL_64X64);
1399 }
1400
1401 // backup pre-loopfilter reconstruction data for intra
1402 // prediction of next row of sb64s
1403 tile_cols_len = tile_col_end - tile_col_start;
1404 if (row + 8 < s->rows) {
1405 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1406 f->data[0] + yoff + 63 * ls_y,
1407 8 * tile_cols_len * bytesperpixel);
1408 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1409 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1410 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1411 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1412 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1413 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1414 }
1415
1416 vp9_report_tile_progress(s, row >> 3, 1);
1417 }
1418 }
1419 return 0;
1420 }
1421
1422 static av_always_inline
loopfilter_proc(AVCodecContext * avctx)1423 int loopfilter_proc(AVCodecContext *avctx)
1424 {
1425 VP9Context *s = avctx->priv_data;
1426 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1427 VP9Filter *lflvl_ptr;
1428 int bytesperpixel = s->bytesperpixel, col, i;
1429 AVFrame *f;
1430
1431 f = s->s.frames[CUR_FRAME].tf.f;
1432 ls_y = f->linesize[0];
1433 ls_uv =f->linesize[1];
1434
1435 for (i = 0; i < s->sb_rows; i++) {
1436 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1437
1438 if (s->s.h.filter.level) {
1439 yoff = (ls_y * 64)*i;
1440 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1441 lflvl_ptr = s->lflvl+s->sb_cols*i;
1442 for (col = 0; col < s->cols;
1443 col += 8, yoff += 64 * bytesperpixel,
1444 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1445 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1446 yoff, uvoff);
1447 }
1448 }
1449 }
1450 return 0;
1451 }
1452 #endif
1453
vp9_decode_frame(AVCodecContext * avctx,void * frame,int * got_frame,AVPacket * pkt)1454 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1455 int *got_frame, AVPacket *pkt)
1456 {
1457 const uint8_t *data = pkt->data;
1458 int size = pkt->size;
1459 VP9Context *s = avctx->priv_data;
1460 int ret, i, j, ref;
1461 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1462 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1463 AVFrame *f;
1464
1465 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1466 return ret;
1467 } else if (ret == 0) {
1468 if (!s->s.refs[ref].f->buf[0]) {
1469 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1470 return AVERROR_INVALIDDATA;
1471 }
1472 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1473 return ret;
1474 ((AVFrame *)frame)->pts = pkt->pts;
1475 #if FF_API_PKT_PTS
1476 FF_DISABLE_DEPRECATION_WARNINGS
1477 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1478 FF_ENABLE_DEPRECATION_WARNINGS
1479 #endif
1480 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1481 for (i = 0; i < 8; i++) {
1482 if (s->next_refs[i].f->buf[0])
1483 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1484 if (s->s.refs[i].f->buf[0] &&
1485 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1486 return ret;
1487 }
1488 *got_frame = 1;
1489 return pkt->size;
1490 }
1491 data += ret;
1492 size -= ret;
1493
1494 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1495 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1496 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1497 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1498 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1499 return ret;
1500 }
1501 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1502 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1503 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1504 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1505 return ret;
1506 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1507 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1508 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1509 return ret;
1510 f = s->s.frames[CUR_FRAME].tf.f;
1511 f->key_frame = s->s.h.keyframe;
1512 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1513
1514 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1515 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1516 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1517 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1518 }
1519
1520 // ref frame setup
1521 for (i = 0; i < 8; i++) {
1522 if (s->next_refs[i].f->buf[0])
1523 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1524 if (s->s.h.refreshrefmask & (1 << i)) {
1525 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1526 } else if (s->s.refs[i].f->buf[0]) {
1527 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1528 }
1529 if (ret < 0)
1530 return ret;
1531 }
1532
1533 if (avctx->hwaccel) {
1534 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1535 if (ret < 0)
1536 return ret;
1537 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1538 if (ret < 0)
1539 return ret;
1540 ret = avctx->hwaccel->end_frame(avctx);
1541 if (ret < 0)
1542 return ret;
1543 goto finish;
1544 }
1545
1546 // main tile decode loop
1547 memset(s->above_partition_ctx, 0, s->cols);
1548 memset(s->above_skip_ctx, 0, s->cols);
1549 if (s->s.h.keyframe || s->s.h.intraonly) {
1550 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1551 } else {
1552 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1553 }
1554 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1555 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1556 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1557 memset(s->above_segpred_ctx, 0, s->cols);
1558 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1559 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1560 if ((ret = update_block_buffers(avctx)) < 0) {
1561 av_log(avctx, AV_LOG_ERROR,
1562 "Failed to allocate block buffers\n");
1563 return ret;
1564 }
1565 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1566 int j, k, l, m;
1567
1568 for (i = 0; i < 4; i++) {
1569 for (j = 0; j < 2; j++)
1570 for (k = 0; k < 2; k++)
1571 for (l = 0; l < 6; l++)
1572 for (m = 0; m < 6; m++)
1573 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1574 s->prob.coef[i][j][k][l][m], 3);
1575 if (s->s.h.txfmmode == i)
1576 break;
1577 }
1578 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1579 ff_thread_finish_setup(avctx);
1580 } else if (!s->s.h.refreshctx) {
1581 ff_thread_finish_setup(avctx);
1582 }
1583
1584 #if HAVE_THREADS
1585 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1586 for (i = 0; i < s->sb_rows; i++)
1587 atomic_store(&s->entries[i], 0);
1588 }
1589 #endif
1590
1591 do {
1592 for (i = 0; i < s->active_tile_cols; i++) {
1593 s->td[i].b = s->td[i].b_base;
1594 s->td[i].block = s->td[i].block_base;
1595 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1596 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1597 s->td[i].eob = s->td[i].eob_base;
1598 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1599 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1600 }
1601
1602 #if HAVE_THREADS
1603 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1604 int tile_row, tile_col;
1605
1606 av_assert1(!s->pass);
1607
1608 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1609 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1610 int64_t tile_size;
1611
1612 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1613 tile_row == s->s.h.tiling.tile_rows - 1) {
1614 tile_size = size;
1615 } else {
1616 tile_size = AV_RB32(data);
1617 data += 4;
1618 size -= 4;
1619 }
1620 if (tile_size > size)
1621 return AVERROR_INVALIDDATA;
1622 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1623 if (ret < 0)
1624 return ret;
1625 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1626 return AVERROR_INVALIDDATA;
1627 data += tile_size;
1628 size -= tile_size;
1629 }
1630 }
1631
1632 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1633 } else
1634 #endif
1635 {
1636 ret = decode_tiles(avctx, data, size);
1637 if (ret < 0)
1638 return ret;
1639 }
1640
1641 // Sum all counts fields into td[0].counts for tile threading
1642 if (avctx->active_thread_type == FF_THREAD_SLICE)
1643 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1644 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1645 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1646
1647 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1648 ff_vp9_adapt_probs(s);
1649 ff_thread_finish_setup(avctx);
1650 }
1651 } while (s->pass++ == 1);
1652 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1653
1654 finish:
1655 // ref frame setup
1656 for (i = 0; i < 8; i++) {
1657 if (s->s.refs[i].f->buf[0])
1658 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1659 if (s->next_refs[i].f->buf[0] &&
1660 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1661 return ret;
1662 }
1663
1664 if (!s->s.h.invisible) {
1665 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1666 return ret;
1667 *got_frame = 1;
1668 }
1669
1670 return pkt->size;
1671 }
1672
vp9_decode_flush(AVCodecContext * avctx)1673 static void vp9_decode_flush(AVCodecContext *avctx)
1674 {
1675 VP9Context *s = avctx->priv_data;
1676 int i;
1677
1678 for (i = 0; i < 3; i++)
1679 vp9_frame_unref(avctx, &s->s.frames[i]);
1680 for (i = 0; i < 8; i++)
1681 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1682 }
1683
init_frames(AVCodecContext * avctx)1684 static int init_frames(AVCodecContext *avctx)
1685 {
1686 VP9Context *s = avctx->priv_data;
1687 int i;
1688
1689 for (i = 0; i < 3; i++) {
1690 s->s.frames[i].tf.f = av_frame_alloc();
1691 if (!s->s.frames[i].tf.f) {
1692 vp9_decode_free(avctx);
1693 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1694 return AVERROR(ENOMEM);
1695 }
1696 }
1697 for (i = 0; i < 8; i++) {
1698 s->s.refs[i].f = av_frame_alloc();
1699 s->next_refs[i].f = av_frame_alloc();
1700 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1701 vp9_decode_free(avctx);
1702 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1703 return AVERROR(ENOMEM);
1704 }
1705 }
1706
1707 return 0;
1708 }
1709
vp9_decode_init(AVCodecContext * avctx)1710 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1711 {
1712 VP9Context *s = avctx->priv_data;
1713
1714 avctx->internal->allocate_progress = 1;
1715 s->last_bpp = 0;
1716 s->s.h.filter.sharpness = -1;
1717
1718 return init_frames(avctx);
1719 }
1720
1721 #if HAVE_THREADS
vp9_decode_init_thread_copy(AVCodecContext * avctx)1722 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1723 {
1724 return init_frames(avctx);
1725 }
1726
vp9_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)1727 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1728 {
1729 int i, ret;
1730 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1731
1732 for (i = 0; i < 3; i++) {
1733 if (s->s.frames[i].tf.f->buf[0])
1734 vp9_frame_unref(dst, &s->s.frames[i]);
1735 if (ssrc->s.frames[i].tf.f->buf[0]) {
1736 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1737 return ret;
1738 }
1739 }
1740 for (i = 0; i < 8; i++) {
1741 if (s->s.refs[i].f->buf[0])
1742 ff_thread_release_buffer(dst, &s->s.refs[i]);
1743 if (ssrc->next_refs[i].f->buf[0]) {
1744 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1745 return ret;
1746 }
1747 }
1748
1749 s->s.h.invisible = ssrc->s.h.invisible;
1750 s->s.h.keyframe = ssrc->s.h.keyframe;
1751 s->s.h.intraonly = ssrc->s.h.intraonly;
1752 s->ss_v = ssrc->ss_v;
1753 s->ss_h = ssrc->ss_h;
1754 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1755 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1756 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1757 s->bytesperpixel = ssrc->bytesperpixel;
1758 s->gf_fmt = ssrc->gf_fmt;
1759 s->w = ssrc->w;
1760 s->h = ssrc->h;
1761 s->s.h.bpp = ssrc->s.h.bpp;
1762 s->bpp_index = ssrc->bpp_index;
1763 s->pix_fmt = ssrc->pix_fmt;
1764 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1765 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1766 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1767 sizeof(s->s.h.segmentation.feat));
1768
1769 return 0;
1770 }
1771 #endif
1772
1773 AVCodec ff_vp9_decoder = {
1774 .name = "vp9",
1775 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1776 .type = AVMEDIA_TYPE_VIDEO,
1777 .id = AV_CODEC_ID_VP9,
1778 .priv_data_size = sizeof(VP9Context),
1779 .init = vp9_decode_init,
1780 .close = vp9_decode_free,
1781 .decode = vp9_decode_frame,
1782 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1783 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1784 .flush = vp9_decode_flush,
1785 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1786 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1787 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1788 };
1789