1 /*
2 * NewTek SpeedHQ codec
3 * Copyright 2017 Steinar H. Gunderson
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * NewTek SpeedHQ decoder.
25 */
26
27 #define BITSTREAM_READER_LE
28
29 #include "config.h"
30 #include "libavutil/attributes.h"
31 #include "libavutil/mem_internal.h"
32
33 #include "avcodec.h"
34 #include "get_bits.h"
35 #include "internal.h"
36 #include "libavutil/thread.h"
37 #include "mathops.h"
38 #include "mpeg12.h"
39 #include "mpeg12data.h"
40 #include "mpeg12vlc.h"
41
42 #define MAX_INDEX (64 - 1)
43
44 /*
45 * 5 bits makes for very small tables, with no more than two lookups needed
46 * for the longest (10-bit) codes.
47 */
48 #define ALPHA_VLC_BITS 5
49
50 typedef struct SHQContext {
51 AVCodecContext *avctx;
52 BlockDSPContext bdsp;
53 IDCTDSPContext idsp;
54 ScanTable intra_scantable;
55 int quant_matrix[64];
56 enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
57 subsampling;
58 enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
59 } SHQContext;
60
61
62 /* AC codes: Very similar but not identical to MPEG-2. */
63 static const uint16_t speedhq_vlc[123][2] = {
64 {0x0001, 2}, {0x0003, 3}, {0x000E, 4}, {0x0007, 5},
65 {0x0017, 5}, {0x0028, 6}, {0x0008, 6}, {0x006F, 7},
66 {0x001F, 7}, {0x00C4, 8}, {0x0044, 8}, {0x005F, 8},
67 {0x00DF, 8}, {0x007F, 8}, {0x00FF, 8}, {0x3E00, 14},
68 {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
69 {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
70 {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
71 {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
72 {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
73 {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
74 {0x0002, 3}, {0x000C, 5}, {0x004F, 7}, {0x00E4, 8},
75 {0x0004, 8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
76 {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
77 {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
78 {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
79 {0x0014, 5}, {0x0070, 7}, {0x003F, 8}, {0x00C0, 10},
80 {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
81 {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C, 5},
82 {0x0064, 8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
83 {0x0018, 6}, {0x00BF, 8}, {0x0480, 12}, {0x0B80, 12},
84 {0x0038, 6}, {0x0040, 9}, {0x0900, 13}, {0x0030, 7},
85 {0x0780, 12}, {0x2800, 16}, {0x0010, 7}, {0x0A80, 12},
86 {0x0050, 7}, {0x0880, 12}, {0x000F, 7}, {0x1100, 13},
87 {0x002F, 7}, {0x0100, 13}, {0x0084, 8}, {0x5800, 16},
88 {0x00A4, 8}, {0x9800, 16}, {0x0024, 8}, {0x1800, 16},
89 {0x0140, 9}, {0xE800, 16}, {0x01C0, 9}, {0x6800, 16},
90 {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
91 {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
92 {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
93 {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
94 {0xD800, 16},
95 {0x0020, 6}, /* escape */
96 {0x0006, 4} /* EOB */
97 };
98
99 static const uint8_t speedhq_level[121] = {
100 1, 2, 3, 4, 5, 6, 7, 8,
101 9, 10, 11, 12, 13, 14, 15, 16,
102 17, 18, 19, 20, 21, 22, 23, 24,
103 25, 26, 27, 28, 29, 30, 31, 32,
104 33, 34, 35, 36, 37, 38, 39, 40,
105 1, 2, 3, 4, 5, 6, 7, 8,
106 9, 10, 11, 12, 13, 14, 15, 16,
107 17, 18, 19, 20, 1, 2, 3, 4,
108 5, 6, 7, 8, 9, 10, 11, 1,
109 2, 3, 4, 5, 1, 2, 3, 4,
110 1, 2, 3, 1, 2, 3, 1, 2,
111 1, 2, 1, 2, 1, 2, 1, 2,
112 1, 2, 1, 2, 1, 2, 1, 2,
113 1, 2, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1,
116 };
117
118 static const uint8_t speedhq_run[121] = {
119 0, 0, 0, 0, 0, 0, 0, 0,
120 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 0, 0, 0, 0, 0, 0, 0,
122 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 0, 0, 0, 0, 0, 0, 0,
124 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 2, 2, 2, 2,
127 2, 2, 2, 2, 2, 2, 2, 3,
128 3, 3, 3, 3, 4, 4, 4, 4,
129 5, 5, 5, 6, 6, 6, 7, 7,
130 8, 8, 9, 9, 10, 10, 11, 11,
131 12, 12, 13, 13, 14, 14, 15, 15,
132 16, 16, 17, 18, 19, 20, 21, 22,
133 23, 24, 25, 26, 27, 28, 29, 30,
134 31,
135 };
136
137 RLTable ff_rl_speedhq = {
138 121,
139 121,
140 speedhq_vlc,
141 speedhq_run,
142 speedhq_level,
143 };
144
145 #if CONFIG_SPEEDHQ_DECODER
146 /* NOTE: The first element is always 16, unscaled. */
147 static const uint8_t unscaled_quant_matrix[64] = {
148 16, 16, 19, 22, 26, 27, 29, 34,
149 16, 16, 22, 24, 27, 29, 34, 37,
150 19, 22, 26, 27, 29, 34, 34, 38,
151 22, 22, 26, 27, 29, 34, 37, 40,
152 22, 26, 27, 29, 32, 35, 40, 48,
153 26, 27, 29, 32, 35, 40, 48, 58,
154 26, 27, 29, 34, 38, 46, 56, 69,
155 27, 29, 35, 38, 46, 56, 69, 83
156 };
157
158 static uint8_t speedhq_static_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
159
160 static VLC dc_lum_vlc_le;
161 static VLC dc_chroma_vlc_le;
162 static VLC dc_alpha_run_vlc_le;
163 static VLC dc_alpha_level_vlc_le;
164
decode_dc_le(GetBitContext * gb,int component)165 static inline int decode_dc_le(GetBitContext *gb, int component)
166 {
167 int code, diff;
168
169 if (component == 0 || component == 3) {
170 code = get_vlc2(gb, dc_lum_vlc_le.table, DC_VLC_BITS, 2);
171 } else {
172 code = get_vlc2(gb, dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
173 }
174 if (!code) {
175 diff = 0;
176 } else {
177 diff = get_xbits_le(gb, code);
178 }
179 return diff;
180 }
181
decode_alpha_block(const SHQContext * s,GetBitContext * gb,uint8_t last_alpha[16],uint8_t * dest,int linesize)182 static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
183 {
184 uint8_t block[128];
185 int i = 0, x, y;
186
187 memset(block, 0, sizeof(block));
188
189 {
190 OPEN_READER(re, gb);
191
192 for ( ;; ) {
193 int run, level;
194
195 UPDATE_CACHE_LE(re, gb);
196 GET_VLC(run, re, gb, dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
197
198 if (run < 0) break;
199 i += run;
200 if (i >= 128)
201 return AVERROR_INVALIDDATA;
202
203 UPDATE_CACHE_LE(re, gb);
204 GET_VLC(level, re, gb, dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
205 block[i++] = level;
206 }
207
208 CLOSE_READER(re, gb);
209 }
210
211 for (y = 0; y < 8; y++) {
212 for (x = 0; x < 16; x++) {
213 last_alpha[x] -= block[y * 16 + x];
214 }
215 memcpy(dest, last_alpha, 16);
216 dest += linesize;
217 }
218
219 return 0;
220 }
221
decode_dct_block(const SHQContext * s,GetBitContext * gb,int last_dc[4],int component,uint8_t * dest,int linesize)222 static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
223 {
224 const int *quant_matrix = s->quant_matrix;
225 const uint8_t *scantable = s->intra_scantable.permutated;
226 LOCAL_ALIGNED_32(int16_t, block, [64]);
227 int dc_offset;
228
229 s->bdsp.clear_block(block);
230
231 dc_offset = decode_dc_le(gb, component);
232 last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */
233 block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always 16. */
234
235 /* Read AC coefficients. */
236 {
237 int i = 0;
238 OPEN_READER(re, gb);
239 for ( ;; ) {
240 int level, run;
241 UPDATE_CACHE_LE(re, gb);
242 GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
243 TEX_VLC_BITS, 2, 0);
244 if (level == 127) {
245 break;
246 } else if (level) {
247 i += run;
248 if (i > MAX_INDEX)
249 return AVERROR_INVALIDDATA;
250 /* If next bit is 1, level = -level */
251 level = (level ^ SHOW_SBITS(re, gb, 1)) -
252 SHOW_SBITS(re, gb, 1);
253 LAST_SKIP_BITS(re, gb, 1);
254 } else {
255 /* Escape. */
256 #if MIN_CACHE_BITS < 6 + 6 + 12
257 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
258 #endif
259 run = SHOW_UBITS(re, gb, 6) + 1;
260 SKIP_BITS(re, gb, 6);
261 level = SHOW_UBITS(re, gb, 12) - 2048;
262 LAST_SKIP_BITS(re, gb, 12);
263
264 i += run;
265 if (i > MAX_INDEX)
266 return AVERROR_INVALIDDATA;
267 }
268
269 block[scantable[i]] = (level * quant_matrix[i]) >> 4;
270 }
271 CLOSE_READER(re, gb);
272 }
273
274 s->idsp.idct_put(dest, linesize, block);
275
276 return 0;
277 }
278
decode_speedhq_border(const SHQContext * s,GetBitContext * gb,AVFrame * frame,int field_number,int line_stride)279 static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)
280 {
281 int linesize_y = frame->linesize[0] * line_stride;
282 int linesize_cb = frame->linesize[1] * line_stride;
283 int linesize_cr = frame->linesize[2] * line_stride;
284 int linesize_a;
285 int ret;
286
287 if (s->alpha_type != SHQ_NO_ALPHA)
288 linesize_a = frame->linesize[3] * line_stride;
289
290 for (int y = 0; y < frame->height; y += 16 * line_stride) {
291 int last_dc[4] = { 1024, 1024, 1024, 1024 };
292 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
293 uint8_t last_alpha[16];
294 int x = frame->width - 8;
295
296 dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
297 if (s->subsampling == SHQ_SUBSAMPLING_420) {
298 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
299 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
300 } else if (s->subsampling == SHQ_SUBSAMPLING_422) {
301 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
302 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
303 }
304 if (s->alpha_type != SHQ_NO_ALPHA) {
305 memset(last_alpha, 255, sizeof(last_alpha));
306 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x;
307 }
308
309 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0)
310 return ret;
311 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
312 return ret;
313 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
314 return ret;
315 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
316 return ret;
317 if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
318 return ret;
319 if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
320 return ret;
321
322 if (s->subsampling != SHQ_SUBSAMPLING_420) {
323 if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
324 return ret;
325 if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
326 return ret;
327 }
328
329 if (s->alpha_type == SHQ_RLE_ALPHA) {
330 /* Alpha coded using 16x8 RLE blocks. */
331 if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0)
332 return ret;
333 if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
334 return ret;
335 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
336 /* Alpha encoded exactly like luma. */
337 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0)
338 return ret;
339 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
340 return ret;
341 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
342 return ret;
343 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
344 return ret;
345 }
346 }
347
348 return 0;
349 }
350
decode_speedhq_field(const SHQContext * s,const uint8_t * buf,int buf_size,AVFrame * frame,int field_number,int start,int end,int line_stride)351 static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
352 {
353 int ret, slice_number, slice_offsets[5];
354 int linesize_y = frame->linesize[0] * line_stride;
355 int linesize_cb = frame->linesize[1] * line_stride;
356 int linesize_cr = frame->linesize[2] * line_stride;
357 int linesize_a;
358 GetBitContext gb;
359
360 if (s->alpha_type != SHQ_NO_ALPHA)
361 linesize_a = frame->linesize[3] * line_stride;
362
363 if (end < start || end - start < 3 || end > buf_size)
364 return AVERROR_INVALIDDATA;
365
366 slice_offsets[0] = start;
367 slice_offsets[4] = end;
368 for (slice_number = 1; slice_number < 4; slice_number++) {
369 uint32_t last_offset, slice_len;
370
371 last_offset = slice_offsets[slice_number - 1];
372 slice_len = AV_RL24(buf + last_offset);
373 slice_offsets[slice_number] = last_offset + slice_len;
374
375 if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
376 return AVERROR_INVALIDDATA;
377 }
378
379 for (slice_number = 0; slice_number < 4; slice_number++) {
380 uint32_t slice_begin, slice_end;
381 int x, y;
382
383 slice_begin = slice_offsets[slice_number];
384 slice_end = slice_offsets[slice_number + 1];
385
386 if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
387 return ret;
388
389 for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
390 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
391 int last_dc[4] = { 1024, 1024, 1024, 1024 };
392 uint8_t last_alpha[16];
393
394 memset(last_alpha, 255, sizeof(last_alpha));
395
396 dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
397 if (s->subsampling == SHQ_SUBSAMPLING_420) {
398 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
399 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
400 } else {
401 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
402 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
403 }
404 if (s->alpha_type != SHQ_NO_ALPHA) {
405 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
406 }
407
408 for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
409 /* Decode the four luma blocks. */
410 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
411 return ret;
412 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
413 return ret;
414 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
415 return ret;
416 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
417 return ret;
418
419 /*
420 * Decode the first chroma block. For 4:2:0, this is the only one;
421 * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
422 */
423 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
424 return ret;
425 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
426 return ret;
427
428 if (s->subsampling != SHQ_SUBSAMPLING_420) {
429 /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
430 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
431 return ret;
432 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
433 return ret;
434
435 if (s->subsampling == SHQ_SUBSAMPLING_444) {
436 /* Top-right and bottom-right blocks. */
437 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
438 return ret;
439 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
440 return ret;
441 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
442 return ret;
443 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
444 return ret;
445
446 dest_cb += 8;
447 dest_cr += 8;
448 }
449 }
450 dest_y += 16;
451 dest_cb += 8;
452 dest_cr += 8;
453
454 if (s->alpha_type == SHQ_RLE_ALPHA) {
455 /* Alpha coded using 16x8 RLE blocks. */
456 if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
457 return ret;
458 if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
459 return ret;
460 dest_a += 16;
461 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
462 /* Alpha encoded exactly like luma. */
463 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
464 return ret;
465 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
466 return ret;
467 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
468 return ret;
469 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
470 return ret;
471 dest_a += 16;
472 }
473 }
474 }
475 }
476
477 if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15))
478 return decode_speedhq_border(s, &gb, frame, field_number, line_stride);
479
480 return 0;
481 }
482
compute_quant_matrix(int * output,int qscale)483 static void compute_quant_matrix(int *output, int qscale)
484 {
485 int i;
486 for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
487 }
488
speedhq_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)489 static int speedhq_decode_frame(AVCodecContext *avctx,
490 void *data, int *got_frame,
491 AVPacket *avpkt)
492 {
493 SHQContext * const s = avctx->priv_data;
494 const uint8_t *buf = avpkt->data;
495 int buf_size = avpkt->size;
496 AVFrame *frame = data;
497 uint8_t quality;
498 uint32_t second_field_offset;
499 int ret;
500
501 if (buf_size < 4 || avctx->width < 8)
502 return AVERROR_INVALIDDATA;
503
504 quality = buf[0];
505 if (quality >= 100) {
506 return AVERROR_INVALIDDATA;
507 }
508
509 compute_quant_matrix(s->quant_matrix, 100 - quality);
510
511 second_field_offset = AV_RL24(buf + 1);
512 if (second_field_offset >= buf_size - 3) {
513 return AVERROR_INVALIDDATA;
514 }
515
516 avctx->coded_width = FFALIGN(avctx->width, 16);
517 avctx->coded_height = FFALIGN(avctx->height, 16);
518
519 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
520 return ret;
521 }
522 frame->key_frame = 1;
523
524 if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
525 /*
526 * Overlapping first and second fields is used to signal
527 * encoding only a single field. In this case, "height"
528 * is ambiguous; it could mean either the height of the
529 * frame as a whole, or of the field. The former would make
530 * more sense for compatibility with legacy decoders,
531 * but this matches the convention used in NDI, which is
532 * the primary user of this trick.
533 */
534 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
535 return ret;
536 } else {
537 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
538 return ret;
539 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
540 return ret;
541 }
542
543 *got_frame = 1;
544 return buf_size;
545 }
546
547 /*
548 * Alpha VLC. Run and level are independently coded, and would be
549 * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
550 * bother with combining them into one table.
551 */
compute_alpha_vlcs(void)552 static av_cold void compute_alpha_vlcs(void)
553 {
554 uint16_t run_code[134], level_code[266];
555 uint8_t run_bits[134], level_bits[266];
556 int16_t run_symbols[134], level_symbols[266];
557 int entry, i, sign;
558
559 /* Initialize VLC for alpha run. */
560 entry = 0;
561
562 /* 0 -> 0. */
563 run_code[entry] = 0;
564 run_bits[entry] = 1;
565 run_symbols[entry] = 0;
566 ++entry;
567
568 /* 10xx -> xx plus 1. */
569 for (i = 0; i < 4; ++i) {
570 run_code[entry] = (i << 2) | 1;
571 run_bits[entry] = 4;
572 run_symbols[entry] = i + 1;
573 ++entry;
574 }
575
576 /* 111xxxxxxx -> xxxxxxx. */
577 for (i = 0; i < 128; ++i) {
578 run_code[entry] = (i << 3) | 7;
579 run_bits[entry] = 10;
580 run_symbols[entry] = i;
581 ++entry;
582 }
583
584 /* 110 -> EOB. */
585 run_code[entry] = 3;
586 run_bits[entry] = 3;
587 run_symbols[entry] = -1;
588 ++entry;
589
590 av_assert0(entry == FF_ARRAY_ELEMS(run_code));
591
592 INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
593 FF_ARRAY_ELEMS(run_code),
594 run_bits, 1, 1,
595 run_code, 2, 2,
596 run_symbols, 2, 2, 160);
597
598 /* Initialize VLC for alpha level. */
599 entry = 0;
600
601 for (sign = 0; sign <= 1; ++sign) {
602 /* 1s -> -1 or +1 (depending on sign bit). */
603 level_code[entry] = (sign << 1) | 1;
604 level_bits[entry] = 2;
605 level_symbols[entry] = sign ? -1 : 1;
606 ++entry;
607
608 /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
609 for (i = 0; i < 4; ++i) {
610 level_code[entry] = (i << 3) | (sign << 2) | 2;
611 level_bits[entry] = 5;
612 level_symbols[entry] = sign ? -(i + 2) : (i + 2);
613 ++entry;
614 }
615 }
616
617 /*
618 * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
619 * here that would better be encoded in other ways (e.g. 0 would be
620 * encoded by increasing run, and +/- 1 would be encoded with a
621 * shorter code), but it doesn't hurt to allow everything.
622 */
623 for (i = 0; i < 256; ++i) {
624 level_code[entry] = i << 2;
625 level_bits[entry] = 10;
626 level_symbols[entry] = i;
627 ++entry;
628 }
629
630 av_assert0(entry == FF_ARRAY_ELEMS(level_code));
631
632 INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
633 FF_ARRAY_ELEMS(level_code),
634 level_bits, 1, 1,
635 level_code, 2, 2,
636 level_symbols, 2, 2, 288);
637 }
638
speedhq_static_init(void)639 static av_cold void speedhq_static_init(void)
640 {
641 /* Exactly the same as MPEG-2, except for a little-endian reader. */
642 INIT_CUSTOM_VLC_STATIC(&dc_lum_vlc_le, DC_VLC_BITS, 12,
643 ff_mpeg12_vlc_dc_lum_bits, 1, 1,
644 ff_mpeg12_vlc_dc_lum_code, 2, 2,
645 INIT_VLC_OUTPUT_LE, 512);
646 INIT_CUSTOM_VLC_STATIC(&dc_chroma_vlc_le, DC_VLC_BITS, 12,
647 ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
648 ff_mpeg12_vlc_dc_chroma_code, 2, 2,
649 INIT_VLC_OUTPUT_LE, 514);
650
651 ff_rl_init(&ff_rl_speedhq, speedhq_static_rl_table_store);
652 INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
653
654 compute_alpha_vlcs();
655 }
656
speedhq_decode_init(AVCodecContext * avctx)657 static av_cold int speedhq_decode_init(AVCodecContext *avctx)
658 {
659 int ret;
660 static AVOnce init_once = AV_ONCE_INIT;
661 SHQContext * const s = avctx->priv_data;
662
663 s->avctx = avctx;
664
665 ret = ff_thread_once(&init_once, speedhq_static_init);
666 if (ret)
667 return AVERROR_UNKNOWN;
668
669 ff_blockdsp_init(&s->bdsp, avctx);
670 ff_idctdsp_init(&s->idsp, avctx);
671 ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
672
673 switch (avctx->codec_tag) {
674 case MKTAG('S', 'H', 'Q', '0'):
675 s->subsampling = SHQ_SUBSAMPLING_420;
676 s->alpha_type = SHQ_NO_ALPHA;
677 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
678 break;
679 case MKTAG('S', 'H', 'Q', '1'):
680 s->subsampling = SHQ_SUBSAMPLING_420;
681 s->alpha_type = SHQ_RLE_ALPHA;
682 avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
683 break;
684 case MKTAG('S', 'H', 'Q', '2'):
685 s->subsampling = SHQ_SUBSAMPLING_422;
686 s->alpha_type = SHQ_NO_ALPHA;
687 avctx->pix_fmt = AV_PIX_FMT_YUV422P;
688 break;
689 case MKTAG('S', 'H', 'Q', '3'):
690 s->subsampling = SHQ_SUBSAMPLING_422;
691 s->alpha_type = SHQ_RLE_ALPHA;
692 avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
693 break;
694 case MKTAG('S', 'H', 'Q', '4'):
695 s->subsampling = SHQ_SUBSAMPLING_444;
696 s->alpha_type = SHQ_NO_ALPHA;
697 avctx->pix_fmt = AV_PIX_FMT_YUV444P;
698 break;
699 case MKTAG('S', 'H', 'Q', '5'):
700 s->subsampling = SHQ_SUBSAMPLING_444;
701 s->alpha_type = SHQ_RLE_ALPHA;
702 avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
703 break;
704 case MKTAG('S', 'H', 'Q', '7'):
705 s->subsampling = SHQ_SUBSAMPLING_422;
706 s->alpha_type = SHQ_DCT_ALPHA;
707 avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
708 break;
709 case MKTAG('S', 'H', 'Q', '9'):
710 s->subsampling = SHQ_SUBSAMPLING_444;
711 s->alpha_type = SHQ_DCT_ALPHA;
712 avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
713 break;
714 default:
715 av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
716 avctx->codec_tag);
717 return AVERROR_INVALIDDATA;
718 }
719
720 /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
721 avctx->colorspace = AVCOL_SPC_BT470BG;
722 avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
723
724 return 0;
725 }
726
727 AVCodec ff_speedhq_decoder = {
728 .name = "speedhq",
729 .long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
730 .type = AVMEDIA_TYPE_VIDEO,
731 .id = AV_CODEC_ID_SPEEDHQ,
732 .priv_data_size = sizeof(SHQContext),
733 .init = speedhq_decode_init,
734 .decode = speedhq_decode_frame,
735 .capabilities = AV_CODEC_CAP_DR1,
736 };
737 #endif /* CONFIG_SPEEDHQ_DECODER */
738