1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $
15
16 ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53 Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55 /*Last MV dominates */
56 {
57 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59 OC_MODE_INTER_MV_FOUR
60 },
61 {
62 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64 OC_MODE_INTER_MV_FOUR
65 },
66 {
67 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69 OC_MODE_INTER_MV_FOUR
70 },
71 {
72 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73 OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74 OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75 },
76 /*No MV dominates.*/
77 {
78 OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80 OC_MODE_INTER_MV_FOUR
81 },
82 {
83 OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84 OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85 OC_MODE_INTER_MV_FOUR
86 },
87 /*Default ordering.*/
88 {
89 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90 OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91 OC_MODE_INTER_MV_FOUR
92 }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97 the Huffman tables.
98 The extension means more bits can be read with fewer calls to the bitpacker
99 during the Huffman decoding process (at the cost of larger Huffman tables),
100 and fewer tokens require additional extra bits (reducing the average storage
101 per decoded token).
102 The revised ordering reveals essential information in the token value
103 itself; specifically, whether or not there are additional extra bits to read
104 and the parameter to which those extra bits are applied.
105 The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106 The extra bits are added into code word at the bit position inferred from the
107 token value, giving the final code word from which all required parameters
108 are derived.
109 The number of EOBs and the leading zero run length can be extracted directly.
110 The coefficient magnitude is optionally negated before extraction, according
111 to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114 internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121 (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122 sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128 Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129 is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run legth bits in the code word.
133 These are placed at index 0 and given 8 bits (even though 6 would suffice)
134 because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT (8)
138 /*The location of the (1) flip bit in the code word.
139 This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT (20)
141 /*The location of the (11) token magnitude bits in the code word.
142 These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147 ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148 (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149 (_flip)<<OC_DCT_CW_FLIP_BIT| \
150 (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153 of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157 We use this formulation because Intel has no useful cmov.
158 A real architecture would probably do better with two of those.
159 This translates to 11 instructions(!), and is _still_ faster than either a
160 table lookup (just barely) or the naive double-ternary implementation (which
161 gcc translates to a jump and a cmov).
162 This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163 you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165 ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166 +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169 See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170 order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172 /*These tokens require additional extra bits for the EOB count.*/
173 /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174 OC_DCT_CW_FINISH,
175 /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176 OC_DCT_CW_PACK(16, 0, 0,0),
177 /*These tokens require additional extra bits for the magnitude.*/
178 /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179 OC_DCT_CW_PACK( 0, 0, 13,0),
180 OC_DCT_CW_PACK( 0, 0, 13,1),
181 /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182 OC_DCT_CW_PACK( 0, 0, 21,0),
183 OC_DCT_CW_PACK( 0, 0, 21,1),
184 /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185 OC_DCT_CW_PACK( 0, 0, 37,0),
186 OC_DCT_CW_PACK( 0, 0, 37,1),
187 /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188 OC_DCT_CW_PACK( 0, 0, 69,0),
189 OC_DCT_CW_PACK( 0, 0,325,0),
190 OC_DCT_CW_PACK( 0, 0, 69,1),
191 OC_DCT_CW_PACK( 0, 0,325,1),
192 /*These tokens require additional extra bits for the run length.*/
193 /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194 OC_DCT_CW_PACK( 0,10, +1,0),
195 OC_DCT_CW_PACK( 0,10, -1,0),
196 /*OC_DCT_ZRL_TOKEN (6 extra bits)
197 Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198 OC_DCT_CW_PACK( 0, 0, 0,1),
199 /*The remaining tokens require no additional extra bits.*/
200 /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201 OC_DCT_CW_PACK( 1, 0, 0,0),
202 /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203 OC_DCT_CW_PACK( 2, 0, 0,0),
204 /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205 OC_DCT_CW_PACK( 3, 0, 0,0),
206 /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207 OC_DCT_CW_PACK( 0, 1, +1,0),
208 OC_DCT_CW_PACK( 0, 1, -1,0),
209 OC_DCT_CW_PACK( 0, 2, +1,0),
210 OC_DCT_CW_PACK( 0, 2, -1,0),
211 OC_DCT_CW_PACK( 0, 3, +1,0),
212 OC_DCT_CW_PACK( 0, 3, -1,0),
213 OC_DCT_CW_PACK( 0, 4, +1,0),
214 OC_DCT_CW_PACK( 0, 4, -1,0),
215 OC_DCT_CW_PACK( 0, 5, +1,0),
216 OC_DCT_CW_PACK( 0, 5, -1,0),
217 /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218 OC_DCT_CW_PACK( 0, 1, +2,0),
219 OC_DCT_CW_PACK( 0, 1, +3,0),
220 OC_DCT_CW_PACK( 0, 1, -2,0),
221 OC_DCT_CW_PACK( 0, 1, -3,0),
222 /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223 OC_DCT_CW_PACK( 0, 6, +1,0),
224 OC_DCT_CW_PACK( 0, 7, +1,0),
225 OC_DCT_CW_PACK( 0, 8, +1,0),
226 OC_DCT_CW_PACK( 0, 9, +1,0),
227 OC_DCT_CW_PACK( 0, 6, -1,0),
228 OC_DCT_CW_PACK( 0, 7, -1,0),
229 OC_DCT_CW_PACK( 0, 8, -1,0),
230 OC_DCT_CW_PACK( 0, 9, -1,0),
231 /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232 OC_DCT_CW_PACK( 0, 2, +2,0),
233 OC_DCT_CW_PACK( 0, 3, +2,0),
234 OC_DCT_CW_PACK( 0, 2, +3,0),
235 OC_DCT_CW_PACK( 0, 3, +3,0),
236 OC_DCT_CW_PACK( 0, 2, -2,0),
237 OC_DCT_CW_PACK( 0, 3, -2,0),
238 OC_DCT_CW_PACK( 0, 2, -3,0),
239 OC_DCT_CW_PACK( 0, 3, -3,0),
240 /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241 Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242 OC_DCT_CW_PACK( 0, 0, 0,1),
243 OC_DCT_CW_PACK( 0, 1, 0,0),
244 OC_DCT_CW_PACK( 0, 2, 0,0),
245 OC_DCT_CW_PACK( 0, 3, 0,0),
246 OC_DCT_CW_PACK( 0, 4, 0,0),
247 OC_DCT_CW_PACK( 0, 5, 0,0),
248 OC_DCT_CW_PACK( 0, 6, 0,0),
249 OC_DCT_CW_PACK( 0, 7, 0,0),
250 /*OC_ONE_TOKEN (0 extra bits)*/
251 OC_DCT_CW_PACK( 0, 0, +1,0),
252 /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253 OC_DCT_CW_PACK( 0, 0, -1,0),
254 /*OC_TWO_TOKEN (0 extra bits)*/
255 OC_DCT_CW_PACK( 0, 0, +2,0),
256 /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257 OC_DCT_CW_PACK( 0, 0, -2,0),
258 /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259 OC_DCT_CW_PACK( 0, 0, +3,0),
260 OC_DCT_CW_PACK( 0, 0, -3,0),
261 OC_DCT_CW_PACK( 0, 0, +4,0),
262 OC_DCT_CW_PACK( 0, 0, -4,0),
263 OC_DCT_CW_PACK( 0, 0, +5,0),
264 OC_DCT_CW_PACK( 0, 0, -5,0),
265 OC_DCT_CW_PACK( 0, 0, +6,0),
266 OC_DCT_CW_PACK( 0, 0, -6,0),
267 /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268 OC_DCT_CW_PACK( 0, 0, +7,0),
269 OC_DCT_CW_PACK( 0, 0, +8,0),
270 OC_DCT_CW_PACK( 0, 0, -7,0),
271 OC_DCT_CW_PACK( 0, 0, -8,0),
272 /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273 OC_DCT_CW_PACK( 0, 0, +9,0),
274 OC_DCT_CW_PACK( 0, 0,+10,0),
275 OC_DCT_CW_PACK( 0, 0,+11,0),
276 OC_DCT_CW_PACK( 0, 0,+12,0),
277 OC_DCT_CW_PACK( 0, 0, -9,0),
278 OC_DCT_CW_PACK( 0, 0,-10,0),
279 OC_DCT_CW_PACK( 0, 0,-11,0),
280 OC_DCT_CW_PACK( 0, 0,-12,0),
281 /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282 OC_DCT_CW_PACK( 8, 0, 0,0),
283 OC_DCT_CW_PACK( 9, 0, 0,0),
284 OC_DCT_CW_PACK(10, 0, 0,0),
285 OC_DCT_CW_PACK(11, 0, 0,0),
286 OC_DCT_CW_PACK(12, 0, 0,0),
287 OC_DCT_CW_PACK(13, 0, 0,0),
288 OC_DCT_CW_PACK(14, 0, 0,0),
289 OC_DCT_CW_PACK(15, 0, 0,0),
290 /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291 OC_DCT_CW_PACK( 4, 0, 0,0),
292 OC_DCT_CW_PACK( 5, 0, 0,0),
293 OC_DCT_CW_PACK( 6, 0, 0,0),
294 OC_DCT_CW_PACK( 7, 0, 0,0),
295 };
296
297
298
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300 long bits;
301 int ret;
302 /*Coding scheme:
303 Codeword Run Length
304 0 1
305 10x 2-3
306 110x 4-5
307 1110xx 6-9
308 11110xxx 10-17
309 111110xxxx 18-33
310 111111xxxxxxxxxxxx 34-4129*/
311 bits=oc_pack_read1(_opb);
312 if(bits==0)return 1;
313 bits=oc_pack_read(_opb,2);
314 if((bits&2)==0)return 2+(int)bits;
315 else if((bits&1)==0){
316 bits=oc_pack_read1(_opb);
317 return 4+(int)bits;
318 }
319 bits=oc_pack_read(_opb,3);
320 if((bits&4)==0)return 6+(int)bits;
321 else if((bits&2)==0){
322 ret=10+((bits&1)<<2);
323 bits=oc_pack_read(_opb,2);
324 return ret+(int)bits;
325 }
326 else if((bits&1)==0){
327 bits=oc_pack_read(_opb,4);
328 return 18+(int)bits;
329 }
330 bits=oc_pack_read(_opb,12);
331 return 34+(int)bits;
332 }
333
oc_block_run_unpack(oc_pack_buf * _opb)334 static int oc_block_run_unpack(oc_pack_buf *_opb){
335 long bits;
336 long bits2;
337 /*Coding scheme:
338 Codeword Run Length
339 0x 1-2
340 10x 3-4
341 110x 5-6
342 1110xx 7-10
343 11110xx 11-14
344 11111xxxx 15-30*/
345 bits=oc_pack_read(_opb,2);
346 if((bits&2)==0)return 1+(int)bits;
347 else if((bits&1)==0){
348 bits=oc_pack_read1(_opb);
349 return 3+(int)bits;
350 }
351 bits=oc_pack_read(_opb,2);
352 if((bits&2)==0)return 5+(int)bits;
353 else if((bits&1)==0){
354 bits=oc_pack_read(_opb,2);
355 return 7+(int)bits;
356 }
357 bits=oc_pack_read(_opb,3);
358 if((bits&4)==0)return 11+bits;
359 bits2=oc_pack_read(_opb,2);
360 return 15+((bits&3)<<2)+bits2;
361 }
362
363
364
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)365 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
366 const th_setup_info *_setup){
367 int qti;
368 int pli;
369 int qi;
370 int ret;
371 ret=oc_state_init(&_dec->state,_info,3);
372 if(ret<0)return ret;
373 ret=oc_huff_trees_copy(_dec->huff_tables,
374 (const oc_huff_node *const *)_setup->huff_tables);
375 if(ret<0){
376 oc_state_clear(&_dec->state);
377 return ret;
378 }
379 /*For each fragment, allocate one byte for every DCT coefficient token, plus
380 one byte for extra-bits for each token, plus one more byte for the long
381 EOB run, just in case it's the very last token and has a run length of
382 one.*/
383 _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
384 _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
385 if(_dec->dct_tokens==NULL){
386 oc_huff_trees_clear(_dec->huff_tables);
387 oc_state_clear(&_dec->state);
388 return TH_EFAULT;
389 }
390 for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
391 _dec->state.dequant_tables[qi][pli][qti]=
392 _dec->state.dequant_table_data[qi][pli][qti];
393 }
394 oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
395 &_setup->qinfo);
396 for(qi=0;qi<64;qi++){
397 int qsum;
398 qsum=0;
399 for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
400 qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
401 _dec->state.dequant_tables[qi][pli][qti][17]+
402 _dec->state.dequant_tables[qi][pli][qti][18]+
403 _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
404 }
405 _dec->pp_sharp_mod[qi]=-(qsum>>11);
406 }
407 memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
408 sizeof(_dec->state.loop_filter_limits));
409 _dec->pp_level=OC_PP_LEVEL_DISABLED;
410 _dec->dc_qis=NULL;
411 _dec->variances=NULL;
412 _dec->pp_frame_data=NULL;
413 _dec->stripe_cb.ctx=NULL;
414 _dec->stripe_cb.stripe_decoded=NULL;
415 #if defined(HAVE_CAIRO)
416 _dec->telemetry=0;
417 _dec->telemetry_bits=0;
418 _dec->telemetry_qi=0;
419 _dec->telemetry_mbmode=0;
420 _dec->telemetry_mv=0;
421 _dec->telemetry_frame_data=NULL;
422 #endif
423 return 0;
424 }
425
oc_dec_clear(oc_dec_ctx * _dec)426 static void oc_dec_clear(oc_dec_ctx *_dec){
427 #if defined(HAVE_CAIRO)
428 _ogg_free(_dec->telemetry_frame_data);
429 #endif
430 _ogg_free(_dec->pp_frame_data);
431 _ogg_free(_dec->variances);
432 _ogg_free(_dec->dc_qis);
433 _ogg_free(_dec->dct_tokens);
434 oc_huff_trees_clear(_dec->huff_tables);
435 oc_state_clear(&_dec->state);
436 }
437
438
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)439 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
440 long val;
441 /*Check to make sure this is a data packet.*/
442 val=oc_pack_read1(&_dec->opb);
443 if(val!=0)return TH_EBADPACKET;
444 /*Read in the frame type (I or P).*/
445 val=oc_pack_read1(&_dec->opb);
446 _dec->state.frame_type=(int)val;
447 /*Read in the qi list.*/
448 val=oc_pack_read(&_dec->opb,6);
449 _dec->state.qis[0]=(unsigned char)val;
450 val=oc_pack_read1(&_dec->opb);
451 if(!val)_dec->state.nqis=1;
452 else{
453 val=oc_pack_read(&_dec->opb,6);
454 _dec->state.qis[1]=(unsigned char)val;
455 val=oc_pack_read1(&_dec->opb);
456 if(!val)_dec->state.nqis=2;
457 else{
458 val=oc_pack_read(&_dec->opb,6);
459 _dec->state.qis[2]=(unsigned char)val;
460 _dec->state.nqis=3;
461 }
462 }
463 if(_dec->state.frame_type==OC_INTRA_FRAME){
464 /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
465 Most of the other unused bits in the VP3 headers were eliminated.
466 I don't know why these remain.*/
467 /*I wanted to eliminate wasted bits, but not all config wiggle room
468 --Monty.*/
469 val=oc_pack_read(&_dec->opb,3);
470 if(val!=0)return TH_EIMPL;
471 }
472 return 0;
473 }
474
475 /*Mark all fragments as coded and in OC_MODE_INTRA.
476 This also builds up the coded fragment list (in coded order), and clears the
477 uncoded fragment list.
478 It does not update the coded macro block list nor the super block flags, as
479 those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)480 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
481 const oc_sb_map *sb_maps;
482 const oc_sb_flags *sb_flags;
483 oc_fragment *frags;
484 ptrdiff_t *coded_fragis;
485 ptrdiff_t ncoded_fragis;
486 ptrdiff_t prev_ncoded_fragis;
487 unsigned nsbs;
488 unsigned sbi;
489 int pli;
490 coded_fragis=_dec->state.coded_fragis;
491 prev_ncoded_fragis=ncoded_fragis=0;
492 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
493 sb_flags=_dec->state.sb_flags;
494 frags=_dec->state.frags;
495 sbi=nsbs=0;
496 for(pli=0;pli<3;pli++){
497 nsbs+=_dec->state.fplanes[pli].nsbs;
498 for(;sbi<nsbs;sbi++){
499 int quadi;
500 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
501 int bi;
502 for(bi=0;bi<4;bi++){
503 ptrdiff_t fragi;
504 fragi=sb_maps[sbi][quadi][bi];
505 if(fragi>=0){
506 frags[fragi].coded=1;
507 frags[fragi].mb_mode=OC_MODE_INTRA;
508 coded_fragis[ncoded_fragis++]=fragi;
509 }
510 }
511 }
512 }
513 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
514 prev_ncoded_fragis=ncoded_fragis;
515 }
516 _dec->state.ntotal_coded_fragis=ncoded_fragis;
517 }
518
519 /*Decodes the bit flags indicating whether each super block is partially coded
520 or not.
521 Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)522 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
523 oc_sb_flags *sb_flags;
524 unsigned nsbs;
525 unsigned sbi;
526 unsigned npartial;
527 unsigned run_count;
528 long val;
529 int flag;
530 val=oc_pack_read1(&_dec->opb);
531 flag=(int)val;
532 sb_flags=_dec->state.sb_flags;
533 nsbs=_dec->state.nsbs;
534 sbi=npartial=0;
535 while(sbi<nsbs){
536 int full_run;
537 run_count=oc_sb_run_unpack(&_dec->opb);
538 full_run=run_count>=4129;
539 do{
540 sb_flags[sbi].coded_partially=flag;
541 sb_flags[sbi].coded_fully=0;
542 npartial+=flag;
543 sbi++;
544 }
545 while(--run_count>0&&sbi<nsbs);
546 if(full_run&&sbi<nsbs){
547 val=oc_pack_read1(&_dec->opb);
548 flag=(int)val;
549 }
550 else flag=!flag;
551 }
552 /*TODO: run_count should be 0 here.
553 If it's not, we should issue a warning of some kind.*/
554 return npartial;
555 }
556
557 /*Decodes the bit flags for whether or not each non-partially-coded super
558 block is fully coded or not.
559 This function should only be called if there is at least one
560 non-partially-coded super block.
561 Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)562 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
563 oc_sb_flags *sb_flags;
564 unsigned nsbs;
565 unsigned sbi;
566 unsigned run_count;
567 long val;
568 int flag;
569 sb_flags=_dec->state.sb_flags;
570 nsbs=_dec->state.nsbs;
571 /*Skip partially coded super blocks.*/
572 for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
573 val=oc_pack_read1(&_dec->opb);
574 flag=(int)val;
575 do{
576 int full_run;
577 run_count=oc_sb_run_unpack(&_dec->opb);
578 full_run=run_count>=4129;
579 for(;sbi<nsbs;sbi++){
580 if(sb_flags[sbi].coded_partially)continue;
581 if(run_count--<=0)break;
582 sb_flags[sbi].coded_fully=flag;
583 }
584 if(full_run&&sbi<nsbs){
585 val=oc_pack_read1(&_dec->opb);
586 flag=(int)val;
587 }
588 else flag=!flag;
589 }
590 while(sbi<nsbs);
591 /*TODO: run_count should be 0 here.
592 If it's not, we should issue a warning of some kind.*/
593 }
594
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)595 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
596 const oc_sb_map *sb_maps;
597 const oc_sb_flags *sb_flags;
598 oc_fragment *frags;
599 unsigned nsbs;
600 unsigned sbi;
601 unsigned npartial;
602 long val;
603 int pli;
604 int flag;
605 int run_count;
606 ptrdiff_t *coded_fragis;
607 ptrdiff_t *uncoded_fragis;
608 ptrdiff_t ncoded_fragis;
609 ptrdiff_t nuncoded_fragis;
610 ptrdiff_t prev_ncoded_fragis;
611 npartial=oc_dec_partial_sb_flags_unpack(_dec);
612 if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
613 if(npartial>0){
614 val=oc_pack_read1(&_dec->opb);
615 flag=!(int)val;
616 }
617 else flag=0;
618 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
619 sb_flags=_dec->state.sb_flags;
620 frags=_dec->state.frags;
621 sbi=nsbs=run_count=0;
622 coded_fragis=_dec->state.coded_fragis;
623 uncoded_fragis=coded_fragis+_dec->state.nfrags;
624 prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
625 for(pli=0;pli<3;pli++){
626 nsbs+=_dec->state.fplanes[pli].nsbs;
627 for(;sbi<nsbs;sbi++){
628 int quadi;
629 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
630 int bi;
631 for(bi=0;bi<4;bi++){
632 ptrdiff_t fragi;
633 fragi=sb_maps[sbi][quadi][bi];
634 if(fragi>=0){
635 int coded;
636 if(sb_flags[sbi].coded_fully)coded=1;
637 else if(!sb_flags[sbi].coded_partially)coded=0;
638 else{
639 if(run_count<=0){
640 run_count=oc_block_run_unpack(&_dec->opb);
641 flag=!flag;
642 }
643 run_count--;
644 coded=flag;
645 }
646 if(coded)coded_fragis[ncoded_fragis++]=fragi;
647 else *(uncoded_fragis-++nuncoded_fragis)=fragi;
648 frags[fragi].coded=coded;
649 }
650 }
651 }
652 }
653 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
654 prev_ncoded_fragis=ncoded_fragis;
655 }
656 _dec->state.ntotal_coded_fragis=ncoded_fragis;
657 /*TODO: run_count should be 0 here.
658 If it's not, we should issue a warning of some kind.*/
659 }
660
661
662
663 typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
664
oc_vlc_mode_unpack(oc_pack_buf * _opb)665 static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
666 long val;
667 int i;
668 for(i=0;i<7;i++){
669 val=oc_pack_read1(_opb);
670 if(!val)break;
671 }
672 return i;
673 }
674
oc_clc_mode_unpack(oc_pack_buf * _opb)675 static int oc_clc_mode_unpack(oc_pack_buf *_opb){
676 long val;
677 val=oc_pack_read(_opb,3);
678 return (int)val;
679 }
680
681 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)682 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
683 const oc_mb_map *mb_maps;
684 signed char *mb_modes;
685 const oc_fragment *frags;
686 const unsigned char *alphabet;
687 unsigned char scheme0_alphabet[8];
688 oc_mode_unpack_func mode_unpack;
689 size_t nmbs;
690 size_t mbi;
691 long val;
692 int mode_scheme;
693 val=oc_pack_read(&_dec->opb,3);
694 mode_scheme=(int)val;
695 if(mode_scheme==0){
696 int mi;
697 /*Just in case, initialize the modes to something.
698 If the bitstream doesn't contain each index exactly once, it's likely
699 corrupt and the rest of the packet is garbage anyway, but this way we
700 won't crash, and we'll decode SOMETHING.*/
701 /*LOOP VECTORIZES*/
702 for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
703 for(mi=0;mi<OC_NMODES;mi++){
704 val=oc_pack_read(&_dec->opb,3);
705 scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
706 }
707 alphabet=scheme0_alphabet;
708 }
709 else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
710 if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
711 else mode_unpack=oc_vlc_mode_unpack;
712 mb_modes=_dec->state.mb_modes;
713 mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
714 nmbs=_dec->state.nmbs;
715 frags=_dec->state.frags;
716 for(mbi=0;mbi<nmbs;mbi++){
717 if(mb_modes[mbi]!=OC_MODE_INVALID){
718 int bi;
719 /*Check for a coded luma block in this macro block.*/
720 for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
721 /*We found one, decode a mode.*/
722 if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
723 /*There were none: INTER_NOMV is forced.*/
724 else mb_modes[mbi]=OC_MODE_INTER_NOMV;
725 }
726 }
727 }
728
729
730
731 typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
732
oc_vlc_mv_comp_unpack(oc_pack_buf * _opb)733 static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
734 long bits;
735 int mask;
736 int mv;
737 bits=oc_pack_read(_opb,3);
738 switch(bits){
739 case 0:return 0;
740 case 1:return 1;
741 case 2:return -1;
742 case 3:
743 case 4:{
744 mv=(int)(bits-1);
745 bits=oc_pack_read1(_opb);
746 }break;
747 /*case 5:
748 case 6:
749 case 7:*/
750 default:{
751 mv=1<<bits-3;
752 bits=oc_pack_read(_opb,bits-2);
753 mv+=(int)(bits>>1);
754 bits&=1;
755 }break;
756 }
757 mask=-(int)bits;
758 return mv+mask^mask;
759 }
760
oc_clc_mv_comp_unpack(oc_pack_buf * _opb)761 static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
762 long bits;
763 int mask;
764 int mv;
765 bits=oc_pack_read(_opb,6);
766 mv=(int)bits>>1;
767 mask=-((int)bits&1);
768 return mv+mask^mask;
769 }
770
771 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
772 block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)773 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
774 const oc_mb_map *mb_maps;
775 const signed char *mb_modes;
776 oc_set_chroma_mvs_func set_chroma_mvs;
777 oc_mv_comp_unpack_func mv_comp_unpack;
778 oc_fragment *frags;
779 oc_mv *frag_mvs;
780 const unsigned char *map_idxs;
781 int map_nidxs;
782 oc_mv last_mv[2];
783 oc_mv cbmvs[4];
784 size_t nmbs;
785 size_t mbi;
786 long val;
787 set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
788 val=oc_pack_read1(&_dec->opb);
789 mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
790 map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
791 map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
792 memset(last_mv,0,sizeof(last_mv));
793 frags=_dec->state.frags;
794 frag_mvs=_dec->state.frag_mvs;
795 mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
796 mb_modes=_dec->state.mb_modes;
797 nmbs=_dec->state.nmbs;
798 for(mbi=0;mbi<nmbs;mbi++){
799 int mb_mode;
800 mb_mode=mb_modes[mbi];
801 if(mb_mode!=OC_MODE_INVALID){
802 oc_mv mbmv;
803 ptrdiff_t fragi;
804 int coded[13];
805 int codedi;
806 int ncoded;
807 int mapi;
808 int mapii;
809 /*Search for at least one coded fragment.*/
810 ncoded=mapii=0;
811 do{
812 mapi=map_idxs[mapii];
813 fragi=mb_maps[mbi][mapi>>2][mapi&3];
814 if(frags[fragi].coded)coded[ncoded++]=mapi;
815 }
816 while(++mapii<map_nidxs);
817 if(ncoded<=0)continue;
818 switch(mb_mode){
819 case OC_MODE_INTER_MV_FOUR:{
820 oc_mv lbmvs[4];
821 int bi;
822 /*Mark the tail of the list, so we don't accidentally go past it.*/
823 coded[ncoded]=-1;
824 for(bi=codedi=0;bi<4;bi++){
825 if(coded[codedi]==bi){
826 codedi++;
827 fragi=mb_maps[mbi][0][bi];
828 frags[fragi].mb_mode=mb_mode;
829 lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
830 lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
831 memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
832 }
833 else lbmvs[bi][0]=lbmvs[bi][1]=0;
834 }
835 if(codedi>0){
836 memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
837 memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
838 }
839 if(codedi<ncoded){
840 (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
841 for(;codedi<ncoded;codedi++){
842 mapi=coded[codedi];
843 bi=mapi&3;
844 fragi=mb_maps[mbi][mapi>>2][bi];
845 frags[fragi].mb_mode=mb_mode;
846 memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
847 }
848 }
849 }break;
850 case OC_MODE_INTER_MV:{
851 memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
852 mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
853 mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
854 }break;
855 case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
856 case OC_MODE_INTER_MV_LAST2:{
857 memcpy(mbmv,last_mv[1],sizeof(mbmv));
858 memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
859 memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
860 }break;
861 case OC_MODE_GOLDEN_MV:{
862 mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
863 mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
864 }break;
865 default:memset(mbmv,0,sizeof(mbmv));break;
866 }
867 /*4MV mode fills in the fragments itself.
868 For all other modes we can use this common code.*/
869 if(mb_mode!=OC_MODE_INTER_MV_FOUR){
870 for(codedi=0;codedi<ncoded;codedi++){
871 mapi=coded[codedi];
872 fragi=mb_maps[mbi][mapi>>2][mapi&3];
873 frags[fragi].mb_mode=mb_mode;
874 memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
875 }
876 }
877 }
878 }
879 }
880
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)881 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
882 oc_fragment *frags;
883 const ptrdiff_t *coded_fragis;
884 ptrdiff_t ncoded_fragis;
885 ptrdiff_t fragii;
886 ptrdiff_t fragi;
887 ncoded_fragis=_dec->state.ntotal_coded_fragis;
888 if(ncoded_fragis<=0)return;
889 frags=_dec->state.frags;
890 coded_fragis=_dec->state.coded_fragis;
891 if(_dec->state.nqis==1){
892 /*If this frame has only a single qi value, then just use it for all coded
893 fragments.*/
894 for(fragii=0;fragii<ncoded_fragis;fragii++){
895 frags[coded_fragis[fragii]].qii=0;
896 }
897 }
898 else{
899 long val;
900 int flag;
901 int nqi1;
902 int run_count;
903 /*Otherwise, we decode a qi index for each fragment, using two passes of
904 the same binary RLE scheme used for super-block coded bits.
905 The first pass marks each fragment as having a qii of 0 or greater than
906 0, and the second pass (if necessary), distinguishes between a qii of
907 1 and 2.
908 At first we just store the qii in the fragment.
909 After all the qii's are decoded, we make a final pass to replace them
910 with the corresponding qi's for this frame.*/
911 val=oc_pack_read1(&_dec->opb);
912 flag=(int)val;
913 nqi1=0;
914 fragii=0;
915 while(fragii<ncoded_fragis){
916 int full_run;
917 run_count=oc_sb_run_unpack(&_dec->opb);
918 full_run=run_count>=4129;
919 do{
920 frags[coded_fragis[fragii++]].qii=flag;
921 nqi1+=flag;
922 }
923 while(--run_count>0&&fragii<ncoded_fragis);
924 if(full_run&&fragii<ncoded_fragis){
925 val=oc_pack_read1(&_dec->opb);
926 flag=(int)val;
927 }
928 else flag=!flag;
929 }
930 /*TODO: run_count should be 0 here.
931 If it's not, we should issue a warning of some kind.*/
932 /*If we have 3 different qi's for this frame, and there was at least one
933 fragment with a non-zero qi, make the second pass.*/
934 if(_dec->state.nqis==3&&nqi1>0){
935 /*Skip qii==0 fragments.*/
936 for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
937 val=oc_pack_read1(&_dec->opb);
938 flag=(int)val;
939 do{
940 int full_run;
941 run_count=oc_sb_run_unpack(&_dec->opb);
942 full_run=run_count>=4129;
943 for(;fragii<ncoded_fragis;fragii++){
944 fragi=coded_fragis[fragii];
945 if(frags[fragi].qii==0)continue;
946 if(run_count--<=0)break;
947 frags[fragi].qii+=flag;
948 }
949 if(full_run&&fragii<ncoded_fragis){
950 val=oc_pack_read1(&_dec->opb);
951 flag=(int)val;
952 }
953 else flag=!flag;
954 }
955 while(fragii<ncoded_fragis);
956 /*TODO: run_count should be 0 here.
957 If it's not, we should issue a warning of some kind.*/
958 }
959 }
960 }
961
962
963
964 /*Unpacks the DC coefficient tokens.
965 Unlike when unpacking the AC coefficient tokens, we actually need to decode
966 the DC coefficient values now so that we can do DC prediction.
967 _huff_idx: The index of the Huffman table to use for each color plane.
968 _ntoks_left: The number of tokens left to be decoded in each color plane for
969 each coefficient.
970 This is updated as EOB tokens and zero run tokens are decoded.
971 Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])972 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
973 ptrdiff_t _ntoks_left[3][64]){
974 unsigned char *dct_tokens;
975 oc_fragment *frags;
976 const ptrdiff_t *coded_fragis;
977 ptrdiff_t ncoded_fragis;
978 ptrdiff_t fragii;
979 ptrdiff_t eobs;
980 ptrdiff_t ti;
981 int pli;
982 dct_tokens=_dec->dct_tokens;
983 frags=_dec->state.frags;
984 coded_fragis=_dec->state.coded_fragis;
985 ncoded_fragis=fragii=eobs=ti=0;
986 for(pli=0;pli<3;pli++){
987 ptrdiff_t run_counts[64];
988 ptrdiff_t eob_count;
989 ptrdiff_t eobi;
990 int rli;
991 ncoded_fragis+=_dec->state.ncoded_fragis[pli];
992 memset(run_counts,0,sizeof(run_counts));
993 _dec->eob_runs[pli][0]=eobs;
994 _dec->ti0[pli][0]=ti;
995 /*Continue any previous EOB run, if there was one.*/
996 eobi=eobs;
997 if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
998 eob_count=eobi;
999 eobs-=eobi;
1000 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1001 while(fragii<ncoded_fragis){
1002 int token;
1003 int cw;
1004 int eb;
1005 int skip;
1006 token=oc_huff_token_decode(&_dec->opb,
1007 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1008 dct_tokens[ti++]=(unsigned char)token;
1009 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1010 eb=(int)oc_pack_read(&_dec->opb,
1011 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1012 dct_tokens[ti++]=(unsigned char)eb;
1013 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1014 eb<<=OC_DCT_TOKEN_EB_POS(token);
1015 }
1016 else eb=0;
1017 cw=OC_DCT_CODE_WORD[token]+eb;
1018 eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1019 if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1020 if(eobs){
1021 eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1022 eob_count+=eobi;
1023 eobs-=eobi;
1024 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1025 }
1026 else{
1027 int coeff;
1028 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1029 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1030 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1031 if(skip)coeff=0;
1032 run_counts[skip]++;
1033 frags[coded_fragis[fragii++]].dc=coeff;
1034 }
1035 }
1036 /*Add the total EOB count to the longest run length.*/
1037 run_counts[63]+=eob_count;
1038 /*And convert the run_counts array to a moment table.*/
1039 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1040 /*Finally, subtract off the number of coefficients that have been
1041 accounted for by runs started in this coefficient.*/
1042 for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1043 }
1044 _dec->dct_tokens_count=ti;
1045 return eobs;
1046 }
1047
1048 /*Unpacks the AC coefficient tokens.
1049 This can completely discard coefficient values while unpacking, and so is
1050 somewhat simpler than unpacking the DC coefficient tokens.
1051 _huff_idx: The index of the Huffman table to use for each color plane.
1052 _ntoks_left: The number of tokens left to be decoded in each color plane for
1053 each coefficient.
1054 This is updated as EOB tokens and zero run tokens are decoded.
1055 _eobs: The length of any outstanding EOB run from previous
1056 coefficients.
1057 Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1058 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1059 ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1060 unsigned char *dct_tokens;
1061 ptrdiff_t ti;
1062 int pli;
1063 dct_tokens=_dec->dct_tokens;
1064 ti=_dec->dct_tokens_count;
1065 for(pli=0;pli<3;pli++){
1066 ptrdiff_t run_counts[64];
1067 ptrdiff_t eob_count;
1068 size_t ntoks_left;
1069 size_t ntoks;
1070 int rli;
1071 _dec->eob_runs[pli][_zzi]=_eobs;
1072 _dec->ti0[pli][_zzi]=ti;
1073 ntoks_left=_ntoks_left[pli][_zzi];
1074 memset(run_counts,0,sizeof(run_counts));
1075 eob_count=0;
1076 ntoks=0;
1077 while(ntoks+_eobs<ntoks_left){
1078 int token;
1079 int cw;
1080 int eb;
1081 int skip;
1082 ntoks+=_eobs;
1083 eob_count+=_eobs;
1084 token=oc_huff_token_decode(&_dec->opb,
1085 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1086 dct_tokens[ti++]=(unsigned char)token;
1087 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1088 eb=(int)oc_pack_read(&_dec->opb,
1089 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1090 dct_tokens[ti++]=(unsigned char)eb;
1091 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1092 eb<<=OC_DCT_TOKEN_EB_POS(token);
1093 }
1094 else eb=0;
1095 cw=OC_DCT_CODE_WORD[token]+eb;
1096 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1097 _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1098 if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1099 if(_eobs==0){
1100 run_counts[skip]++;
1101 ntoks++;
1102 }
1103 }
1104 /*Add the portion of the last EOB run actually used by this coefficient.*/
1105 eob_count+=ntoks_left-ntoks;
1106 /*And remove it from the remaining EOB count.*/
1107 _eobs-=ntoks_left-ntoks;
1108 /*Add the total EOB count to the longest run length.*/
1109 run_counts[63]+=eob_count;
1110 /*And convert the run_counts array to a moment table.*/
1111 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1112 /*Finally, subtract off the number of coefficients that have been
1113 accounted for by runs started in this coefficient.*/
1114 for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1115 }
1116 _dec->dct_tokens_count=ti;
1117 return _eobs;
1118 }
1119
1120 /*Tokens describing the DCT coefficients that belong to each fragment are
1121 stored in the bitstream grouped by coefficient, not by fragment.
1122
1123 This means that we either decode all the tokens in order, building up a
1124 separate coefficient list for each fragment as we go, and then go back and
1125 do the iDCT on each fragment, or we have to create separate lists of tokens
1126 for each coefficient, so that we can pull the next token required off the
1127 head of the appropriate list when decoding a specific fragment.
1128
1129 The former was VP3's choice, and it meant 2*w*h extra storage for all the
1130 decoded coefficient values.
1131
1132 We take the second option, which lets us store just one to three bytes per
1133 token (generally far fewer than the number of coefficients, due to EOB
1134 tokens and zero runs), and which requires us to only maintain a counter for
1135 each of the 64 coefficients, instead of a counter for every fragment to
1136 determine where the next token goes.
1137
1138 We actually use 3 counters per coefficient, one for each color plane, so we
1139 can decode all color planes simultaneously.
1140 This lets color conversion, etc., be done as soon as a full MCU (one or
1141 two super block rows) is decoded, while the image data is still in cache.*/
1142
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1143 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1144 static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1145 ptrdiff_t ntoks_left[3][64];
1146 int huff_idxs[2];
1147 ptrdiff_t eobs;
1148 long val;
1149 int pli;
1150 int zzi;
1151 int hgi;
1152 for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1153 ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1154 }
1155 val=oc_pack_read(&_dec->opb,4);
1156 huff_idxs[0]=(int)val;
1157 val=oc_pack_read(&_dec->opb,4);
1158 huff_idxs[1]=(int)val;
1159 _dec->eob_runs[0][0]=0;
1160 eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1161 #if defined(HAVE_CAIRO)
1162 _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1163 #endif
1164 val=oc_pack_read(&_dec->opb,4);
1165 huff_idxs[0]=(int)val;
1166 val=oc_pack_read(&_dec->opb,4);
1167 huff_idxs[1]=(int)val;
1168 zzi=1;
1169 for(hgi=1;hgi<5;hgi++){
1170 huff_idxs[0]+=16;
1171 huff_idxs[1]+=16;
1172 for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1173 eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1174 }
1175 }
1176 /*TODO: eobs should be exactly zero, or 4096 or greater.
1177 The second case occurs when an EOB run of size zero is encountered, which
1178 gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1179 If neither of these conditions holds, then a warning should be issued.*/
1180 }
1181
1182
oc_dec_postprocess_init(oc_dec_ctx * _dec)1183 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1184 /*pp_level 0: disabled; free any memory used and return*/
1185 if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1186 if(_dec->dc_qis!=NULL){
1187 _ogg_free(_dec->dc_qis);
1188 _dec->dc_qis=NULL;
1189 _ogg_free(_dec->variances);
1190 _dec->variances=NULL;
1191 _ogg_free(_dec->pp_frame_data);
1192 _dec->pp_frame_data=NULL;
1193 }
1194 return 1;
1195 }
1196 if(_dec->dc_qis==NULL){
1197 /*If we haven't been tracking DC quantization indices, there's no point in
1198 starting now.*/
1199 if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1200 _dec->dc_qis=(unsigned char *)_ogg_malloc(
1201 _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1202 if(_dec->dc_qis==NULL)return 1;
1203 memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1204 }
1205 else{
1206 unsigned char *dc_qis;
1207 const ptrdiff_t *coded_fragis;
1208 ptrdiff_t ncoded_fragis;
1209 ptrdiff_t fragii;
1210 unsigned char qi0;
1211 /*Update the DC quantization index of each coded block.*/
1212 dc_qis=_dec->dc_qis;
1213 coded_fragis=_dec->state.coded_fragis;
1214 ncoded_fragis=_dec->state.ncoded_fragis[0]+
1215 _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1216 qi0=(unsigned char)_dec->state.qis[0];
1217 for(fragii=0;fragii<ncoded_fragis;fragii++){
1218 dc_qis[coded_fragis[fragii]]=qi0;
1219 }
1220 }
1221 /*pp_level 1: Stop after updating DC quantization indices.*/
1222 if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1223 if(_dec->variances!=NULL){
1224 _ogg_free(_dec->variances);
1225 _dec->variances=NULL;
1226 _ogg_free(_dec->pp_frame_data);
1227 _dec->pp_frame_data=NULL;
1228 }
1229 return 1;
1230 }
1231 if(_dec->variances==NULL){
1232 size_t frame_sz;
1233 size_t c_sz;
1234 int c_w;
1235 int c_h;
1236 frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1237 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1238 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1239 c_sz=c_w*(size_t)c_h;
1240 /*Allocate space for the chroma planes, even if we're not going to use
1241 them; this simplifies allocation state management, though it may waste
1242 memory on the few systems that don't overcommit pages.*/
1243 frame_sz+=c_sz<<1;
1244 _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1245 frame_sz*sizeof(_dec->pp_frame_data[0]));
1246 _dec->variances=(int *)_ogg_malloc(
1247 _dec->state.nfrags*sizeof(_dec->variances[0]));
1248 if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1249 _ogg_free(_dec->pp_frame_data);
1250 _dec->pp_frame_data=NULL;
1251 _ogg_free(_dec->variances);
1252 _dec->variances=NULL;
1253 return 1;
1254 }
1255 /*Force an update of the PP buffer pointers.*/
1256 _dec->pp_frame_state=0;
1257 }
1258 /*Update the PP buffer pointers if necessary.*/
1259 if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1260 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1261 /*If chroma processing is disabled, just use the PP luma plane.*/
1262 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1263 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1264 _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1265 _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1266 (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1267 }
1268 else{
1269 size_t y_sz;
1270 size_t c_sz;
1271 int c_w;
1272 int c_h;
1273 /*Otherwise, set up pointers to all three PP planes.*/
1274 y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1275 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1276 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1277 c_sz=c_w*(size_t)c_h;
1278 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1279 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1280 _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1281 _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1282 _dec->pp_frame_buf[1].width=c_w;
1283 _dec->pp_frame_buf[1].height=c_h;
1284 _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1285 _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1286 _dec->pp_frame_buf[2].width=c_w;
1287 _dec->pp_frame_buf[2].height=c_h;
1288 _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1289 _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1290 oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1291 }
1292 _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1293 }
1294 /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1295 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1296 memcpy(_dec->pp_frame_buf+1,
1297 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1298 sizeof(_dec->pp_frame_buf[1])*2);
1299 }
1300 return 0;
1301 }
1302
1303
1304
1305 typedef struct{
1306 int bounding_values[256];
1307 ptrdiff_t ti[3][64];
1308 ptrdiff_t eob_runs[3][64];
1309 const ptrdiff_t *coded_fragis[3];
1310 const ptrdiff_t *uncoded_fragis[3];
1311 ptrdiff_t ncoded_fragis[3];
1312 ptrdiff_t nuncoded_fragis[3];
1313 const ogg_uint16_t *dequant[3][3][2];
1314 int fragy0[3];
1315 int fragy_end[3];
1316 int pred_last[3][3];
1317 int mcu_nvfrags;
1318 int loop_filter;
1319 int pp_level;
1320 }oc_dec_pipeline_state;
1321
1322
1323
1324 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1325 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1326 oc_dec_pipeline_state *_pipe){
1327 const ptrdiff_t *coded_fragis;
1328 const ptrdiff_t *uncoded_fragis;
1329 int pli;
1330 int qii;
1331 int qti;
1332 /*If chroma is sub-sampled in the vertical direction, we have to decode two
1333 super block rows of Y' for each super block row of Cb and Cr.*/
1334 _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1335 /*Initialize the token and extra bits indices for each plane and
1336 coefficient.*/
1337 memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1338 /*Also copy over the initial the EOB run counts.*/
1339 memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1340 /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1341 coded_fragis=_dec->state.coded_fragis;
1342 uncoded_fragis=coded_fragis+_dec->state.nfrags;
1343 for(pli=0;pli<3;pli++){
1344 ptrdiff_t ncoded_fragis;
1345 _pipe->coded_fragis[pli]=coded_fragis;
1346 _pipe->uncoded_fragis[pli]=uncoded_fragis;
1347 ncoded_fragis=_dec->state.ncoded_fragis[pli];
1348 coded_fragis+=ncoded_fragis;
1349 uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1350 }
1351 /*Set up condensed quantizer tables.*/
1352 for(pli=0;pli<3;pli++){
1353 for(qii=0;qii<_dec->state.nqis;qii++){
1354 for(qti=0;qti<2;qti++){
1355 _pipe->dequant[pli][qii][qti]=
1356 _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1357 }
1358 }
1359 }
1360 /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1361 memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1362 /*Initialize the bounding value array for the loop filter.*/
1363 _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
1364 _pipe->bounding_values);
1365 /*Initialize any buffers needed for post-processing.
1366 We also save the current post-processing level, to guard against the user
1367 changing it from a callback.*/
1368 if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1369 /*If we don't have enough information to post-process, disable it, regardless
1370 of the user-requested level.*/
1371 else{
1372 _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1373 memcpy(_dec->pp_frame_buf,
1374 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1375 sizeof(_dec->pp_frame_buf[0])*3);
1376 }
1377 }
1378
1379 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1380 rows).
1381 As a side effect, the number of coded and uncoded fragments in this plane of
1382 the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1383 static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
1384 oc_dec_pipeline_state *_pipe,int _pli){
1385 const oc_fragment_plane *fplane;
1386 oc_fragment *frags;
1387 int *pred_last;
1388 ptrdiff_t ncoded_fragis;
1389 ptrdiff_t fragi;
1390 int fragx;
1391 int fragy;
1392 int fragy0;
1393 int fragy_end;
1394 int nhfrags;
1395 /*Compute the first and last fragment row of the current MCU for this
1396 plane.*/
1397 fplane=_dec->state.fplanes+_pli;
1398 fragy0=_pipe->fragy0[_pli];
1399 fragy_end=_pipe->fragy_end[_pli];
1400 nhfrags=fplane->nhfrags;
1401 pred_last=_pipe->pred_last[_pli];
1402 frags=_dec->state.frags;
1403 ncoded_fragis=0;
1404 fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1405 for(fragy=fragy0;fragy<fragy_end;fragy++){
1406 if(fragy==0){
1407 /*For the first row, all of the cases reduce to just using the previous
1408 predictor for the same reference frame.*/
1409 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1410 if(frags[fragi].coded){
1411 int ref;
1412 ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
1413 pred_last[ref]=frags[fragi].dc+=pred_last[ref];
1414 ncoded_fragis++;
1415 }
1416 }
1417 }
1418 else{
1419 oc_fragment *u_frags;
1420 int l_ref;
1421 int ul_ref;
1422 int u_ref;
1423 u_frags=frags-nhfrags;
1424 l_ref=-1;
1425 ul_ref=-1;
1426 u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
1427 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1428 int ur_ref;
1429 if(fragx+1>=nhfrags)ur_ref=-1;
1430 else{
1431 ur_ref=u_frags[fragi+1].coded?
1432 OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
1433 }
1434 if(frags[fragi].coded){
1435 int pred;
1436 int ref;
1437 ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
1438 /*We break out a separate case based on which of our neighbors use
1439 the same reference frames.
1440 This is somewhat faster than trying to make a generic case which
1441 handles all of them, since it reduces lots of poorly predicted
1442 jumps to one switch statement, and also lets a number of the
1443 multiplications be optimized out by strength reduction.*/
1444 switch((l_ref==ref)|(ul_ref==ref)<<1|
1445 (u_ref==ref)<<2|(ur_ref==ref)<<3){
1446 default:pred=pred_last[ref];break;
1447 case 1:
1448 case 3:pred=frags[fragi-1].dc;break;
1449 case 2:pred=u_frags[fragi-1].dc;break;
1450 case 4:
1451 case 6:
1452 case 12:pred=u_frags[fragi].dc;break;
1453 case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1454 case 8:pred=u_frags[fragi+1].dc;break;
1455 case 9:
1456 case 11:
1457 case 13:{
1458 pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1459 }break;
1460 case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1461 case 14:{
1462 pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1463 +10*u_frags[fragi].dc)/16;
1464 }break;
1465 case 7:
1466 case 15:{
1467 int p0;
1468 int p1;
1469 int p2;
1470 p0=frags[fragi-1].dc;
1471 p1=u_frags[fragi-1].dc;
1472 p2=u_frags[fragi].dc;
1473 pred=(29*(p0+p2)-26*p1)/32;
1474 if(abs(pred-p2)>128)pred=p2;
1475 else if(abs(pred-p0)>128)pred=p0;
1476 else if(abs(pred-p1)>128)pred=p1;
1477 }break;
1478 }
1479 pred_last[ref]=frags[fragi].dc+=pred;
1480 ncoded_fragis++;
1481 l_ref=ref;
1482 }
1483 else l_ref=-1;
1484 ul_ref=u_ref;
1485 u_ref=ur_ref;
1486 }
1487 }
1488 }
1489 _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1490 /*Also save the number of uncoded fragments so we know how many to copy.*/
1491 _pipe->nuncoded_fragis[_pli]=
1492 (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1493 }
1494
1495 /*Reconstructs all coded fragments in a single MCU (one or two super block
1496 rows).
1497 This requires that each coded fragment have a proper macro block mode and
1498 motion vector (if not in INTRA mode), and have it's DC value decoded, with
1499 the DC prediction process reversed, and the number of coded and uncoded
1500 fragments in this plane of the MCU be counted.
1501 The token lists for each color plane and coefficient should also be filled
1502 in, along with initial token offsets, extra bits offsets, and EOB run
1503 counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1504 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1505 oc_dec_pipeline_state *_pipe,int _pli){
1506 unsigned char *dct_tokens;
1507 const unsigned char *dct_fzig_zag;
1508 ogg_uint16_t dc_quant[2];
1509 const oc_fragment *frags;
1510 const ptrdiff_t *coded_fragis;
1511 ptrdiff_t ncoded_fragis;
1512 ptrdiff_t fragii;
1513 ptrdiff_t *ti;
1514 ptrdiff_t *eob_runs;
1515 int qti;
1516 dct_tokens=_dec->dct_tokens;
1517 dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1518 frags=_dec->state.frags;
1519 coded_fragis=_pipe->coded_fragis[_pli];
1520 ncoded_fragis=_pipe->ncoded_fragis[_pli];
1521 ti=_pipe->ti[_pli];
1522 eob_runs=_pipe->eob_runs[_pli];
1523 for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1524 for(fragii=0;fragii<ncoded_fragis;fragii++){
1525 /*This array is made one element larger because the zig-zag index array
1526 uses the final element as a dumping ground for out-of-range indices
1527 to protect us from buffer overflow.*/
1528 OC_ALIGN8(ogg_int16_t dct_coeffs[65]);
1529 const ogg_uint16_t *ac_quant;
1530 ptrdiff_t fragi;
1531 int last_zzi;
1532 int zzi;
1533 fragi=coded_fragis[fragii];
1534 for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
1535 qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536 ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537 /*Decode the AC coefficients.*/
1538 for(zzi=0;zzi<64;){
1539 int token;
1540 last_zzi=zzi;
1541 if(eob_runs[zzi]){
1542 eob_runs[zzi]--;
1543 break;
1544 }
1545 else{
1546 ptrdiff_t eob;
1547 int cw;
1548 int rlen;
1549 int coeff;
1550 int lti;
1551 lti=ti[zzi];
1552 token=dct_tokens[lti++];
1553 cw=OC_DCT_CODE_WORD[token];
1554 /*These parts could be done branchless, but the branches are fairly
1555 predictable and the C code translates into more than a few
1556 instructions, so it's worth it to avoid them.*/
1557 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558 cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559 }
1560 eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561 if(token==OC_DCT_TOKEN_FAT_EOB){
1562 eob+=dct_tokens[lti++]<<8;
1563 if(eob==0)eob=OC_DCT_EOB_FINISH;
1564 }
1565 rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568 eob_runs[zzi]=eob;
1569 ti[zzi]=lti;
1570 zzi+=rlen;
1571 dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1572 zzi+=!eob;
1573 }
1574 }
1575 /*TODO: zzi should be exactly 64 here.
1576 If it's not, we should report some kind of warning.*/
1577 zzi=OC_MINI(zzi,64);
1578 dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1579 /*last_zzi is always initialized.
1580 If your compiler thinks otherwise, it is dumb.*/
1581 oc_state_frag_recon(&_dec->state,fragi,_pli,
1582 dct_coeffs,last_zzi,dc_quant[qti]);
1583 }
1584 _pipe->coded_fragis[_pli]+=ncoded_fragis;
1585 /*Right now the reconstructed MCU has only the coded blocks in it.*/
1586 /*TODO: We make the decision here to always copy the uncoded blocks into it
1587 from the reference frame.
1588 We could also copy the coded blocks back over the reference frame, if we
1589 wait for an additional MCU to be decoded, which might be faster if only a
1590 small number of blocks are coded.
1591 However, this introduces more latency, creating a larger cache footprint.
1592 It's unknown which decision is better, but this one results in simpler
1593 code, and the hard case (high bitrate, high resolution) is handled
1594 correctly.*/
1595 /*Copy the uncoded blocks from the previous reference frame.*/
1596 _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1597 oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
1598 _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
1599 }
1600
1601 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1602 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1603 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1604 int *_variance0,int *_variance1){
1605 unsigned char *rdst;
1606 const unsigned char *rsrc;
1607 unsigned char *cdst;
1608 const unsigned char *csrc;
1609 int r[10];
1610 int sum0;
1611 int sum1;
1612 int bx;
1613 int by;
1614 rdst=_dst;
1615 rsrc=_src;
1616 for(bx=0;bx<8;bx++){
1617 cdst=rdst;
1618 csrc=rsrc;
1619 for(by=0;by<10;by++){
1620 r[by]=*csrc;
1621 csrc+=_src_ystride;
1622 }
1623 sum0=sum1=0;
1624 for(by=0;by<4;by++){
1625 sum0+=abs(r[by+1]-r[by]);
1626 sum1+=abs(r[by+5]-r[by+6]);
1627 }
1628 *_variance0+=OC_MINI(255,sum0);
1629 *_variance1+=OC_MINI(255,sum1);
1630 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1631 *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1632 cdst+=_dst_ystride;
1633 *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1634 cdst+=_dst_ystride;
1635 for(by=0;by<4;by++){
1636 *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1637 r[by+4]+r[by+5]+r[by+6]+4>>3);
1638 cdst+=_dst_ystride;
1639 }
1640 *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1641 cdst+=_dst_ystride;
1642 *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1643 }
1644 else{
1645 for(by=1;by<=8;by++){
1646 *cdst=(unsigned char)r[by];
1647 cdst+=_dst_ystride;
1648 }
1649 }
1650 rdst++;
1651 rsrc++;
1652 }
1653 }
1654
1655 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1656 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1657 int _qstep,int _flimit,int *_variances){
1658 unsigned char *rdst;
1659 const unsigned char *rsrc;
1660 unsigned char *cdst;
1661 int r[10];
1662 int sum0;
1663 int sum1;
1664 int bx;
1665 int by;
1666 cdst=_dst;
1667 for(by=0;by<8;by++){
1668 rsrc=cdst-1;
1669 rdst=cdst;
1670 for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1671 sum0=sum1=0;
1672 for(bx=0;bx<4;bx++){
1673 sum0+=abs(r[bx+1]-r[bx]);
1674 sum1+=abs(r[bx+5]-r[bx+6]);
1675 }
1676 _variances[0]+=OC_MINI(255,sum0);
1677 _variances[1]+=OC_MINI(255,sum1);
1678 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1679 *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1680 *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1681 for(bx=0;bx<4;bx++){
1682 *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1683 r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1684 }
1685 *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1686 *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1687 }
1688 cdst+=_dst_ystride;
1689 }
1690 }
1691
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1692 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1693 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1694 int _fragy_end){
1695 oc_fragment_plane *fplane;
1696 int *variance;
1697 unsigned char *dc_qi;
1698 unsigned char *dst;
1699 const unsigned char *src;
1700 ptrdiff_t froffset;
1701 int dst_ystride;
1702 int src_ystride;
1703 int nhfrags;
1704 int width;
1705 int notstart;
1706 int notdone;
1707 int flimit;
1708 int qstep;
1709 int y_end;
1710 int y;
1711 int x;
1712 _dst+=_pli;
1713 _src+=_pli;
1714 fplane=_dec->state.fplanes+_pli;
1715 nhfrags=fplane->nhfrags;
1716 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1717 variance=_dec->variances+froffset;
1718 dc_qi=_dec->dc_qis+froffset;
1719 notstart=_fragy0>0;
1720 notdone=_fragy_end<fplane->nvfrags;
1721 /*We want to clear an extra row of variances, except at the end.*/
1722 memset(variance+(nhfrags&-notstart),0,
1723 (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1724 /*Except for the first time, we want to point to the middle of the row.*/
1725 y=(_fragy0<<3)+(notstart<<2);
1726 dst_ystride=_dst->stride;
1727 src_ystride=_src->stride;
1728 dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1729 src=_src->data+y*(ptrdiff_t)src_ystride;
1730 width=_dst->width;
1731 for(;y<4;y++){
1732 memcpy(dst,src,width*sizeof(dst[0]));
1733 dst+=dst_ystride;
1734 src+=src_ystride;
1735 }
1736 /*We also want to skip the last row in the frame for this loop.*/
1737 y_end=_fragy_end-!notdone<<3;
1738 for(;y<y_end;y+=8){
1739 qstep=_dec->pp_dc_scale[*dc_qi];
1740 flimit=(qstep*3)>>2;
1741 oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1742 qstep,flimit,variance,variance+nhfrags);
1743 variance++;
1744 dc_qi++;
1745 for(x=8;x<width;x+=8){
1746 qstep=_dec->pp_dc_scale[*dc_qi];
1747 flimit=(qstep*3)>>2;
1748 oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1749 qstep,flimit,variance,variance+nhfrags);
1750 oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1751 qstep,flimit,variance-1);
1752 variance++;
1753 dc_qi++;
1754 }
1755 dst+=dst_ystride<<3;
1756 src+=src_ystride<<3;
1757 }
1758 /*And finally, handle the last row in the frame, if it's in the range.*/
1759 if(!notdone){
1760 int height;
1761 height=_dst->height;
1762 for(;y<height;y++){
1763 memcpy(dst,src,width*sizeof(dst[0]));
1764 dst+=dst_ystride;
1765 src+=src_ystride;
1766 }
1767 /*Filter the last row of vertical block edges.*/
1768 dc_qi++;
1769 for(x=8;x<width;x+=8){
1770 qstep=_dec->pp_dc_scale[*dc_qi++];
1771 flimit=(qstep*3)>>2;
1772 oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1773 qstep,flimit,variance++);
1774 }
1775 }
1776 }
1777
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1778 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1779 int _dc_scale,int _sharp_mod,int _strong){
1780 static const unsigned char OC_MOD_MAX[2]={24,32};
1781 static const unsigned char OC_MOD_SHIFT[2]={1,0};
1782 const unsigned char *psrc;
1783 const unsigned char *src;
1784 const unsigned char *nsrc;
1785 unsigned char *dst;
1786 int vmod[72];
1787 int hmod[72];
1788 int mod_hi;
1789 int by;
1790 int bx;
1791 mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1792 dst=_idata;
1793 src=dst;
1794 psrc=src-(_ystride&-!(_b&4));
1795 for(by=0;by<9;by++){
1796 for(bx=0;bx<8;bx++){
1797 int mod;
1798 mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1799 vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1800 }
1801 psrc=src;
1802 src+=_ystride&-(!(_b&8)|by<7);
1803 }
1804 nsrc=dst;
1805 psrc=dst-!(_b&1);
1806 for(bx=0;bx<9;bx++){
1807 src=nsrc;
1808 for(by=0;by<8;by++){
1809 int mod;
1810 mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1811 hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1812 psrc+=_ystride;
1813 src+=_ystride;
1814 }
1815 psrc=nsrc;
1816 nsrc+=!(_b&2)|bx<7;
1817 }
1818 src=dst;
1819 psrc=src-(_ystride&-!(_b&4));
1820 nsrc=src+_ystride;
1821 for(by=0;by<8;by++){
1822 int a;
1823 int b;
1824 int w;
1825 a=128;
1826 b=64;
1827 w=hmod[by];
1828 a-=w;
1829 b+=w**(src-!(_b&1));
1830 w=vmod[by<<3];
1831 a-=w;
1832 b+=w*psrc[0];
1833 w=vmod[by+1<<3];
1834 a-=w;
1835 b+=w*nsrc[0];
1836 w=hmod[(1<<3)+by];
1837 a-=w;
1838 b+=w*src[1];
1839 dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1840 for(bx=1;bx<7;bx++){
1841 a=128;
1842 b=64;
1843 w=hmod[(bx<<3)+by];
1844 a-=w;
1845 b+=w*src[bx-1];
1846 w=vmod[(by<<3)+bx];
1847 a-=w;
1848 b+=w*psrc[bx];
1849 w=vmod[(by+1<<3)+bx];
1850 a-=w;
1851 b+=w*nsrc[bx];
1852 w=hmod[(bx+1<<3)+by];
1853 a-=w;
1854 b+=w*src[bx+1];
1855 dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1856 }
1857 a=128;
1858 b=64;
1859 w=hmod[(7<<3)+by];
1860 a-=w;
1861 b+=w*src[6];
1862 w=vmod[(by<<3)+7];
1863 a-=w;
1864 b+=w*psrc[7];
1865 w=vmod[(by+1<<3)+7];
1866 a-=w;
1867 b+=w*nsrc[7];
1868 w=hmod[(8<<3)+by];
1869 a-=w;
1870 b+=w*src[7+!(_b&2)];
1871 dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1872 dst+=_ystride;
1873 psrc=src;
1874 src=nsrc;
1875 nsrc+=_ystride&-(!(_b&8)|by<6);
1876 }
1877 }
1878
1879 #define OC_DERING_THRESH1 (384)
1880 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1881 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1882 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1883
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1884 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1885 int _pli,int _fragy0,int _fragy_end){
1886 th_img_plane *iplane;
1887 oc_fragment_plane *fplane;
1888 oc_fragment *frag;
1889 int *variance;
1890 unsigned char *idata;
1891 ptrdiff_t froffset;
1892 int ystride;
1893 int nhfrags;
1894 int sthresh;
1895 int strong;
1896 int y_end;
1897 int width;
1898 int height;
1899 int y;
1900 int x;
1901 iplane=_img+_pli;
1902 fplane=_dec->state.fplanes+_pli;
1903 nhfrags=fplane->nhfrags;
1904 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1905 variance=_dec->variances+froffset;
1906 frag=_dec->state.frags+froffset;
1907 strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1908 sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1909 y=_fragy0<<3;
1910 ystride=iplane->stride;
1911 idata=iplane->data+y*(ptrdiff_t)ystride;
1912 y_end=_fragy_end<<3;
1913 width=iplane->width;
1914 height=iplane->height;
1915 for(;y<y_end;y+=8){
1916 for(x=0;x<width;x+=8){
1917 int b;
1918 int qi;
1919 int var;
1920 qi=_dec->state.qis[frag->qii];
1921 var=*variance;
1922 b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1923 if(strong&&var>sthresh){
1924 oc_dering_block(idata+x,ystride,b,
1925 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1926 if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1927 !(b&2)&&variance[1]>OC_DERING_THRESH4||
1928 !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1929 !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1930 oc_dering_block(idata+x,ystride,b,
1931 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932 oc_dering_block(idata+x,ystride,b,
1933 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1934 }
1935 }
1936 else if(var>OC_DERING_THRESH2){
1937 oc_dering_block(idata+x,ystride,b,
1938 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1939 }
1940 else if(var>OC_DERING_THRESH1){
1941 oc_dering_block(idata+x,ystride,b,
1942 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1943 }
1944 frag++;
1945 variance++;
1946 }
1947 idata+=ystride<<3;
1948 }
1949 }
1950
1951
1952
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1953 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1954 oc_dec_ctx *dec;
1955 if(_info==NULL||_setup==NULL)return NULL;
1956 dec=_ogg_malloc(sizeof(*dec));
1957 if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1958 _ogg_free(dec);
1959 return NULL;
1960 }
1961 dec->state.curframe_num=0;
1962 return dec;
1963 }
1964
th_decode_free(th_dec_ctx * _dec)1965 void th_decode_free(th_dec_ctx *_dec){
1966 if(_dec!=NULL){
1967 oc_dec_clear(_dec);
1968 _ogg_free(_dec);
1969 }
1970 }
1971
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1972 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1973 size_t _buf_sz){
1974 switch(_req){
1975 case TH_DECCTL_GET_PPLEVEL_MAX:{
1976 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1977 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1978 (*(int *)_buf)=OC_PP_LEVEL_MAX;
1979 return 0;
1980 }break;
1981 case TH_DECCTL_SET_PPLEVEL:{
1982 int pp_level;
1983 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1984 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1985 pp_level=*(int *)_buf;
1986 if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1987 _dec->pp_level=pp_level;
1988 return 0;
1989 }break;
1990 case TH_DECCTL_SET_GRANPOS:{
1991 ogg_int64_t granpos;
1992 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1993 if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
1994 granpos=*(ogg_int64_t *)_buf;
1995 if(granpos<0)return TH_EINVAL;
1996 _dec->state.granpos=granpos;
1997 _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
1998 -_dec->state.granpos_bias;
1999 _dec->state.curframe_num=_dec->state.keyframe_num
2000 +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2001 return 0;
2002 }break;
2003 case TH_DECCTL_SET_STRIPE_CB:{
2004 th_stripe_callback *cb;
2005 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2006 if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2007 cb=(th_stripe_callback *)_buf;
2008 _dec->stripe_cb.ctx=cb->ctx;
2009 _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2010 return 0;
2011 }break;
2012 #ifdef HAVE_CAIRO
2013 case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2014 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2015 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2016 _dec->telemetry=1;
2017 _dec->telemetry_mbmode=*(int *)_buf;
2018 return 0;
2019 }break;
2020 case TH_DECCTL_SET_TELEMETRY_MV:{
2021 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2022 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2023 _dec->telemetry=1;
2024 _dec->telemetry_mv=*(int *)_buf;
2025 return 0;
2026 }break;
2027 case TH_DECCTL_SET_TELEMETRY_QI:{
2028 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2029 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2030 _dec->telemetry=1;
2031 _dec->telemetry_qi=*(int *)_buf;
2032 return 0;
2033 }break;
2034 case TH_DECCTL_SET_TELEMETRY_BITS:{
2035 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2036 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2037 _dec->telemetry=1;
2038 _dec->telemetry_bits=*(int *)_buf;
2039 return 0;
2040 }break;
2041 #endif
2042 default:return TH_EIMPL;
2043 }
2044 }
2045
2046 /*We're decoding an INTER frame, but have no initialized reference
2047 buffers (i.e., decoding did not start on a key frame).
2048 We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2049 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2050 th_info *info;
2051 size_t yplane_sz;
2052 size_t cplane_sz;
2053 int yhstride;
2054 int yheight;
2055 int chstride;
2056 int cheight;
2057 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2058 _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2059 _dec->state.ref_frame_idx[OC_FRAME_SELF]=1;
2060 info=&_dec->state.info;
2061 yhstride=info->frame_width+2*OC_UMV_PADDING;
2062 yheight=info->frame_height+2*OC_UMV_PADDING;
2063 chstride=yhstride>>!(info->pixel_fmt&1);
2064 cheight=yheight>>!(info->pixel_fmt&2);
2065 yplane_sz=yhstride*(size_t)yheight;
2066 cplane_sz=chstride*(size_t)cheight;
2067 memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
2068 }
2069
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2070 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2071 ogg_int64_t *_granpos){
2072 int ret;
2073 if(_dec==NULL||_op==NULL)return TH_EFAULT;
2074 /*A completely empty packet indicates a dropped frame and is treated exactly
2075 like an inter frame with no coded blocks.
2076 Only proceed if we have a non-empty packet.*/
2077 if(_op->bytes!=0){
2078 oc_dec_pipeline_state pipe;
2079 th_ycbcr_buffer stripe_buf;
2080 int stripe_fragy;
2081 int refi;
2082 int pli;
2083 int notstart;
2084 int notdone;
2085 oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2086 #if defined(HAVE_CAIRO)
2087 _dec->telemetry_frame_bytes=_op->bytes;
2088 #endif
2089 ret=oc_dec_frame_header_unpack(_dec);
2090 if(ret<0)return ret;
2091 /*Select a free buffer to use for the reconstructed version of this
2092 frame.*/
2093 if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2094 (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2095 _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2096 /*No reference frames yet!*/
2097 oc_dec_init_dummy_frame(_dec);
2098 refi=_dec->state.ref_frame_idx[OC_FRAME_SELF];
2099 }
2100 else{
2101 for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2102 refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2103 _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2104 }
2105 if(_dec->state.frame_type==OC_INTRA_FRAME){
2106 oc_dec_mark_all_intra(_dec);
2107 _dec->state.keyframe_num=_dec->state.curframe_num;
2108 #if defined(HAVE_CAIRO)
2109 _dec->telemetry_coding_bytes=
2110 _dec->telemetry_mode_bytes=
2111 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2112 #endif
2113 }
2114 else{
2115 oc_dec_coded_flags_unpack(_dec);
2116 #if defined(HAVE_CAIRO)
2117 _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2118 #endif
2119 oc_dec_mb_modes_unpack(_dec);
2120 #if defined(HAVE_CAIRO)
2121 _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2122 #endif
2123 oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2124 #if defined(HAVE_CAIRO)
2125 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2126 #endif
2127 }
2128 oc_dec_block_qis_unpack(_dec);
2129 #if defined(HAVE_CAIRO)
2130 _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2131 #endif
2132 oc_dec_residual_tokens_unpack(_dec);
2133 /*Update granule position.
2134 This must be done before the striped decode callbacks so that the
2135 application knows what to do with the frame data.*/
2136 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2137 _dec->state.info.keyframe_granule_shift)
2138 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2139 _dec->state.curframe_num++;
2140 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2141 /*All of the rest of the operations -- DC prediction reversal,
2142 reconstructing coded fragments, copying uncoded fragments, loop
2143 filtering, extending borders, and out-of-loop post-processing -- should
2144 be pipelined.
2145 I.e., DC prediction reversal, reconstruction, and uncoded fragment
2146 copying are done for one or two super block rows, then loop filtering is
2147 run as far as it can, then bordering copying, then post-processing.
2148 For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2149 block rows, and one chroma.
2150 Otherwise, an MCU consists of one super block row from each plane.
2151 Inside each MCU, we perform all of the steps on one color plane before
2152 moving on to the next.
2153 After reconstruction, the additional filtering stages introduce a delay
2154 since they need some pixels from the next fragment row.
2155 Thus the actual number of decoded rows available is slightly smaller for
2156 the first MCU, and slightly larger for the last.
2157
2158 This entire process allows us to operate on the data while it is still in
2159 cache, resulting in big performance improvements.
2160 An application callback allows further application processing (blitting
2161 to video memory, color conversion, etc.) to also use the data while it's
2162 in cache.*/
2163 oc_dec_pipeline_init(_dec,&pipe);
2164 oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2165 notstart=0;
2166 notdone=1;
2167 for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
2168 int avail_fragy0;
2169 int avail_fragy_end;
2170 avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2171 notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
2172 for(pli=0;pli<3;pli++){
2173 oc_fragment_plane *fplane;
2174 int frag_shift;
2175 int pp_offset;
2176 int sdelay;
2177 int edelay;
2178 fplane=_dec->state.fplanes+pli;
2179 /*Compute the first and last fragment row of the current MCU for this
2180 plane.*/
2181 frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2182 pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2183 pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2184 pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
2185 oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
2186 oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
2187 sdelay=edelay=0;
2188 if(pipe.loop_filter){
2189 sdelay+=notstart;
2190 edelay+=notdone;
2191 oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
2192 refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2193 }
2194 /*To fill the borders, we have an additional two pixel delay, since a
2195 fragment in the next row could filter its top edge, using two pixels
2196 from a fragment in this row.
2197 But there's no reason to delay a full fragment between the two.*/
2198 oc_state_borders_fill_rows(&_dec->state,refi,pli,
2199 (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2200 (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2201 /*Out-of-loop post-processing.*/
2202 pp_offset=3*(pli!=0);
2203 if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2204 /*Perform de-blocking in one plane.*/
2205 sdelay+=notstart;
2206 edelay+=notdone;
2207 oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2208 _dec->state.ref_frame_bufs[refi],pli,
2209 pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2210 if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2211 /*Perform de-ringing in one plane.*/
2212 sdelay+=notstart;
2213 edelay+=notdone;
2214 oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2215 pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2216 }
2217 }
2218 /*If no post-processing is done, we still need to delay a row for the
2219 loop filter, thanks to the strange filtering order VP3 chose.*/
2220 else if(pipe.loop_filter){
2221 sdelay+=notstart;
2222 edelay+=notdone;
2223 }
2224 /*Compute the intersection of the available rows in all planes.
2225 If chroma is sub-sampled, the effect of each of its delays is
2226 doubled, but luma might have more post-processing filters enabled
2227 than chroma, so we don't know up front which one is the limiting
2228 factor.*/
2229 avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
2230 avail_fragy_end=OC_MINI(avail_fragy_end,
2231 pipe.fragy_end[pli]-edelay<<frag_shift);
2232 }
2233 if(_dec->stripe_cb.stripe_decoded!=NULL){
2234 /*The callback might want to use the FPU, so let's make sure they can.
2235 We violate all kinds of ABI restrictions by not doing this until
2236 now, but none of them actually matter since we don't use floating
2237 point ourselves.*/
2238 oc_restore_fpu(&_dec->state);
2239 /*Make the callback, ensuring we flip the sense of the "start" and
2240 "end" of the available region upside down.*/
2241 (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2242 _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2243 _dec->state.fplanes[0].nvfrags-avail_fragy0);
2244 }
2245 notstart=1;
2246 }
2247 /*Finish filling in the reference frame borders.*/
2248 for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2249 /*Update the reference frame indices.*/
2250 if(_dec->state.frame_type==OC_INTRA_FRAME){
2251 /*The new frame becomes both the previous and gold reference frames.*/
2252 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2253 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2254 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2255 }
2256 else{
2257 /*Otherwise, just replace the previous reference frame.*/
2258 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2259 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2260 }
2261 /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2262 gamma values, if nothing else).*/
2263 oc_restore_fpu(&_dec->state);
2264 #if defined(OC_DUMP_IMAGES)
2265 /*Don't dump images for dropped frames.*/
2266 oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2267 #endif
2268 return 0;
2269 }
2270 else{
2271 if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2272 _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){
2273 int refi;
2274 /*No reference frames yet!*/
2275 oc_dec_init_dummy_frame(_dec);
2276 refi=_dec->state.ref_frame_idx[OC_FRAME_PREV];
2277 _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2278 memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi],
2279 sizeof(_dec->pp_frame_buf[0])*3);
2280 }
2281 /*Just update the granule position and return.*/
2282 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2283 _dec->state.info.keyframe_granule_shift)
2284 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2285 _dec->state.curframe_num++;
2286 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2287 return TH_DUPFRAME;
2288 }
2289 }
2290
th_decode_ycbcr_out(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr)2291 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2292 if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2293 oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2294 #if defined(HAVE_CAIRO)
2295 /*If telemetry ioctls are active, we need to draw to the output buffer.
2296 Stuff the plane into cairo.*/
2297 if(_dec->telemetry){
2298 cairo_surface_t *cs;
2299 unsigned char *data;
2300 unsigned char *y_row;
2301 unsigned char *u_row;
2302 unsigned char *v_row;
2303 unsigned char *rgb_row;
2304 int cstride;
2305 int w;
2306 int h;
2307 int x;
2308 int y;
2309 int hdec;
2310 int vdec;
2311 w=_ycbcr[0].width;
2312 h=_ycbcr[0].height;
2313 hdec=!(_dec->state.info.pixel_fmt&1);
2314 vdec=!(_dec->state.info.pixel_fmt&2);
2315 /*Lazy data buffer init.
2316 We could try to re-use the post-processing buffer, which would save
2317 memory, but complicate the allocation logic there.
2318 I don't think anyone cares about memory usage when using telemetry; it is
2319 not meant for embedded devices.*/
2320 if(_dec->telemetry_frame_data==NULL){
2321 _dec->telemetry_frame_data=_ogg_malloc(
2322 (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2323 if(_dec->telemetry_frame_data==NULL)return 0;
2324 }
2325 cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2326 /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2327 data=cairo_image_surface_get_data(cs);
2328 if(data==NULL){
2329 cairo_surface_destroy(cs);
2330 return 0;
2331 }
2332 cstride=cairo_image_surface_get_stride(cs);
2333 y_row=_ycbcr[0].data;
2334 u_row=_ycbcr[1].data;
2335 v_row=_ycbcr[2].data;
2336 rgb_row=data;
2337 for(y=0;y<h;y++){
2338 for(x=0;x<w;x++){
2339 int r;
2340 int g;
2341 int b;
2342 r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2343 g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2344 -2672387*v_row[x>>hdec]+447306710)/3287200;
2345 b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2346 rgb_row[4*x+0]=OC_CLAMP255(b);
2347 rgb_row[4*x+1]=OC_CLAMP255(g);
2348 rgb_row[4*x+2]=OC_CLAMP255(r);
2349 }
2350 y_row+=_ycbcr[0].stride;
2351 u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2352 v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2353 rgb_row+=cstride;
2354 }
2355 /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2356 {
2357 cairo_t *c;
2358 const oc_fragment *frags;
2359 oc_mv *frag_mvs;
2360 const signed char *mb_modes;
2361 oc_mb_map *mb_maps;
2362 size_t nmbs;
2363 size_t mbi;
2364 int row2;
2365 int col2;
2366 int qim[3]={0,0,0};
2367 if(_dec->state.nqis==2){
2368 int bqi;
2369 bqi=_dec->state.qis[0];
2370 if(_dec->state.qis[1]>bqi)qim[1]=1;
2371 if(_dec->state.qis[1]<bqi)qim[1]=-1;
2372 }
2373 if(_dec->state.nqis==3){
2374 int bqi;
2375 int cqi;
2376 int dqi;
2377 bqi=_dec->state.qis[0];
2378 cqi=_dec->state.qis[1];
2379 dqi=_dec->state.qis[2];
2380 if(cqi>bqi&&dqi>bqi){
2381 if(dqi>cqi){
2382 qim[1]=1;
2383 qim[2]=2;
2384 }
2385 else{
2386 qim[1]=2;
2387 qim[2]=1;
2388 }
2389 }
2390 else if(cqi<bqi&&dqi<bqi){
2391 if(dqi<cqi){
2392 qim[1]=-1;
2393 qim[2]=-2;
2394 }
2395 else{
2396 qim[1]=-2;
2397 qim[2]=-1;
2398 }
2399 }
2400 else{
2401 if(cqi<bqi)qim[1]=-1;
2402 else qim[1]=1;
2403 if(dqi<bqi)qim[2]=-1;
2404 else qim[2]=1;
2405 }
2406 }
2407 c=cairo_create(cs);
2408 frags=_dec->state.frags;
2409 frag_mvs=_dec->state.frag_mvs;
2410 mb_modes=_dec->state.mb_modes;
2411 mb_maps=_dec->state.mb_maps;
2412 nmbs=_dec->state.nmbs;
2413 row2=0;
2414 col2=0;
2415 for(mbi=0;mbi<nmbs;mbi++){
2416 float x;
2417 float y;
2418 int bi;
2419 y=h-(row2+((col2+1>>1)&1))*16-16;
2420 x=(col2>>1)*16;
2421 cairo_set_line_width(c,1.);
2422 /*Keyframe (all intra) red box.*/
2423 if(_dec->state.frame_type==OC_INTRA_FRAME){
2424 if(_dec->telemetry_mbmode&0x02){
2425 cairo_set_source_rgba(c,1.,0,0,.5);
2426 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2427 cairo_stroke_preserve(c);
2428 cairo_set_source_rgba(c,1.,0,0,.25);
2429 cairo_fill(c);
2430 }
2431 }
2432 else{
2433 const signed char *frag_mv;
2434 ptrdiff_t fragi;
2435 for(bi=0;bi<4;bi++){
2436 fragi=mb_maps[mbi][0][bi];
2437 if(fragi>=0&&frags[fragi].coded){
2438 frag_mv=frag_mvs[fragi];
2439 break;
2440 }
2441 }
2442 if(bi<4){
2443 switch(mb_modes[mbi]){
2444 case OC_MODE_INTRA:{
2445 if(_dec->telemetry_mbmode&0x02){
2446 cairo_set_source_rgba(c,1.,0,0,.5);
2447 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2448 cairo_stroke_preserve(c);
2449 cairo_set_source_rgba(c,1.,0,0,.25);
2450 cairo_fill(c);
2451 }
2452 }break;
2453 case OC_MODE_INTER_NOMV:{
2454 if(_dec->telemetry_mbmode&0x01){
2455 cairo_set_source_rgba(c,0,0,1.,.5);
2456 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2457 cairo_stroke_preserve(c);
2458 cairo_set_source_rgba(c,0,0,1.,.25);
2459 cairo_fill(c);
2460 }
2461 }break;
2462 case OC_MODE_INTER_MV:{
2463 if(_dec->telemetry_mbmode&0x04){
2464 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2465 cairo_set_source_rgba(c,0,1.,0,.5);
2466 cairo_stroke(c);
2467 }
2468 if(_dec->telemetry_mv&0x04){
2469 cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2470 cairo_set_source_rgba(c,1.,1.,1.,.9);
2471 cairo_set_line_width(c,3.);
2472 cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2473 cairo_stroke_preserve(c);
2474 cairo_set_line_width(c,2.);
2475 cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2476 cairo_stroke_preserve(c);
2477 cairo_set_line_width(c,1.);
2478 cairo_line_to(c,x+8,y+8);
2479 cairo_stroke(c);
2480 }
2481 }break;
2482 case OC_MODE_INTER_MV_LAST:{
2483 if(_dec->telemetry_mbmode&0x08){
2484 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2485 cairo_set_source_rgba(c,0,1.,0,.5);
2486 cairo_move_to(c,x+13.5,y+2.5);
2487 cairo_line_to(c,x+2.5,y+8);
2488 cairo_line_to(c,x+13.5,y+13.5);
2489 cairo_stroke(c);
2490 }
2491 if(_dec->telemetry_mv&0x08){
2492 cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2493 cairo_set_source_rgba(c,1.,1.,1.,.9);
2494 cairo_set_line_width(c,3.);
2495 cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2496 cairo_stroke_preserve(c);
2497 cairo_set_line_width(c,2.);
2498 cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2499 cairo_stroke_preserve(c);
2500 cairo_set_line_width(c,1.);
2501 cairo_line_to(c,x+8,y+8);
2502 cairo_stroke(c);
2503 }
2504 }break;
2505 case OC_MODE_INTER_MV_LAST2:{
2506 if(_dec->telemetry_mbmode&0x10){
2507 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2508 cairo_set_source_rgba(c,0,1.,0,.5);
2509 cairo_move_to(c,x+8,y+2.5);
2510 cairo_line_to(c,x+2.5,y+8);
2511 cairo_line_to(c,x+8,y+13.5);
2512 cairo_move_to(c,x+13.5,y+2.5);
2513 cairo_line_to(c,x+8,y+8);
2514 cairo_line_to(c,x+13.5,y+13.5);
2515 cairo_stroke(c);
2516 }
2517 if(_dec->telemetry_mv&0x10){
2518 cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2519 cairo_set_source_rgba(c,1.,1.,1.,.9);
2520 cairo_set_line_width(c,3.);
2521 cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2522 cairo_stroke_preserve(c);
2523 cairo_set_line_width(c,2.);
2524 cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2525 cairo_stroke_preserve(c);
2526 cairo_set_line_width(c,1.);
2527 cairo_line_to(c,x+8,y+8);
2528 cairo_stroke(c);
2529 }
2530 }break;
2531 case OC_MODE_GOLDEN_NOMV:{
2532 if(_dec->telemetry_mbmode&0x20){
2533 cairo_set_source_rgba(c,1.,1.,0,.5);
2534 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2535 cairo_stroke_preserve(c);
2536 cairo_set_source_rgba(c,1.,1.,0,.25);
2537 cairo_fill(c);
2538 }
2539 }break;
2540 case OC_MODE_GOLDEN_MV:{
2541 if(_dec->telemetry_mbmode&0x40){
2542 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2543 cairo_set_source_rgba(c,1.,1.,0,.5);
2544 cairo_stroke(c);
2545 }
2546 if(_dec->telemetry_mv&0x40){
2547 cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2548 cairo_set_source_rgba(c,1.,1.,1.,.9);
2549 cairo_set_line_width(c,3.);
2550 cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2551 cairo_stroke_preserve(c);
2552 cairo_set_line_width(c,2.);
2553 cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2554 cairo_stroke_preserve(c);
2555 cairo_set_line_width(c,1.);
2556 cairo_line_to(c,x+8,y+8);
2557 cairo_stroke(c);
2558 }
2559 }break;
2560 case OC_MODE_INTER_MV_FOUR:{
2561 if(_dec->telemetry_mbmode&0x80){
2562 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2563 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2564 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2565 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2566 cairo_set_source_rgba(c,0,1.,0,.5);
2567 cairo_stroke(c);
2568 }
2569 /*4mv is odd, coded in raster order.*/
2570 fragi=mb_maps[mbi][0][0];
2571 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2572 frag_mv=frag_mvs[fragi];
2573 cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
2574 cairo_set_source_rgba(c,1.,1.,1.,.9);
2575 cairo_set_line_width(c,3.);
2576 cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
2577 cairo_stroke_preserve(c);
2578 cairo_set_line_width(c,2.);
2579 cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
2580 cairo_stroke_preserve(c);
2581 cairo_set_line_width(c,1.);
2582 cairo_line_to(c,x+4,y+12);
2583 cairo_stroke(c);
2584 }
2585 fragi=mb_maps[mbi][0][1];
2586 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2587 frag_mv=frag_mvs[fragi];
2588 cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
2589 cairo_set_source_rgba(c,1.,1.,1.,.9);
2590 cairo_set_line_width(c,3.);
2591 cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
2592 cairo_stroke_preserve(c);
2593 cairo_set_line_width(c,2.);
2594 cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
2595 cairo_stroke_preserve(c);
2596 cairo_set_line_width(c,1.);
2597 cairo_line_to(c,x+12,y+12);
2598 cairo_stroke(c);
2599 }
2600 fragi=mb_maps[mbi][0][2];
2601 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2602 frag_mv=frag_mvs[fragi];
2603 cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
2604 cairo_set_source_rgba(c,1.,1.,1.,.9);
2605 cairo_set_line_width(c,3.);
2606 cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
2607 cairo_stroke_preserve(c);
2608 cairo_set_line_width(c,2.);
2609 cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
2610 cairo_stroke_preserve(c);
2611 cairo_set_line_width(c,1.);
2612 cairo_line_to(c,x+4,y+4);
2613 cairo_stroke(c);
2614 }
2615 fragi=mb_maps[mbi][0][3];
2616 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2617 frag_mv=frag_mvs[fragi];
2618 cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
2619 cairo_set_source_rgba(c,1.,1.,1.,.9);
2620 cairo_set_line_width(c,3.);
2621 cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
2622 cairo_stroke_preserve(c);
2623 cairo_set_line_width(c,2.);
2624 cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
2625 cairo_stroke_preserve(c);
2626 cairo_set_line_width(c,1.);
2627 cairo_line_to(c,x+12,y+4);
2628 cairo_stroke(c);
2629 }
2630 }break;
2631 }
2632 }
2633 }
2634 /*qii illustration.*/
2635 if(_dec->telemetry_qi&0x2){
2636 cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2637 for(bi=0;bi<4;bi++){
2638 ptrdiff_t fragi;
2639 int qiv;
2640 int xp;
2641 int yp;
2642 xp=x+(bi&1)*8;
2643 yp=y+8-(bi&2)*4;
2644 fragi=mb_maps[mbi][0][bi];
2645 if(fragi>=0&&frags[fragi].coded){
2646 qiv=qim[frags[fragi].qii];
2647 cairo_set_line_width(c,3.);
2648 cairo_set_source_rgba(c,0.,0.,0.,.5);
2649 switch(qiv){
2650 /*Double plus:*/
2651 case 2:{
2652 if((bi&1)^((bi&2)>>1)){
2653 cairo_move_to(c,xp+2.5,yp+1.5);
2654 cairo_line_to(c,xp+2.5,yp+3.5);
2655 cairo_move_to(c,xp+1.5,yp+2.5);
2656 cairo_line_to(c,xp+3.5,yp+2.5);
2657 cairo_move_to(c,xp+5.5,yp+4.5);
2658 cairo_line_to(c,xp+5.5,yp+6.5);
2659 cairo_move_to(c,xp+4.5,yp+5.5);
2660 cairo_line_to(c,xp+6.5,yp+5.5);
2661 cairo_stroke_preserve(c);
2662 cairo_set_source_rgba(c,0.,1.,1.,1.);
2663 }
2664 else{
2665 cairo_move_to(c,xp+5.5,yp+1.5);
2666 cairo_line_to(c,xp+5.5,yp+3.5);
2667 cairo_move_to(c,xp+4.5,yp+2.5);
2668 cairo_line_to(c,xp+6.5,yp+2.5);
2669 cairo_move_to(c,xp+2.5,yp+4.5);
2670 cairo_line_to(c,xp+2.5,yp+6.5);
2671 cairo_move_to(c,xp+1.5,yp+5.5);
2672 cairo_line_to(c,xp+3.5,yp+5.5);
2673 cairo_stroke_preserve(c);
2674 cairo_set_source_rgba(c,0.,1.,1.,1.);
2675 }
2676 }break;
2677 /*Double minus:*/
2678 case -2:{
2679 cairo_move_to(c,xp+2.5,yp+2.5);
2680 cairo_line_to(c,xp+5.5,yp+2.5);
2681 cairo_move_to(c,xp+2.5,yp+5.5);
2682 cairo_line_to(c,xp+5.5,yp+5.5);
2683 cairo_stroke_preserve(c);
2684 cairo_set_source_rgba(c,1.,1.,1.,1.);
2685 }break;
2686 /*Plus:*/
2687 case 1:{
2688 if(bi&2==0)yp-=2;
2689 if(bi&1==0)xp-=2;
2690 cairo_move_to(c,xp+4.5,yp+2.5);
2691 cairo_line_to(c,xp+4.5,yp+6.5);
2692 cairo_move_to(c,xp+2.5,yp+4.5);
2693 cairo_line_to(c,xp+6.5,yp+4.5);
2694 cairo_stroke_preserve(c);
2695 cairo_set_source_rgba(c,.1,1.,.3,1.);
2696 break;
2697 }
2698 /*Fall through.*/
2699 /*Minus:*/
2700 case -1:{
2701 cairo_move_to(c,xp+2.5,yp+4.5);
2702 cairo_line_to(c,xp+6.5,yp+4.5);
2703 cairo_stroke_preserve(c);
2704 cairo_set_source_rgba(c,1.,.3,.1,1.);
2705 }break;
2706 default:continue;
2707 }
2708 cairo_set_line_width(c,1.);
2709 cairo_stroke(c);
2710 }
2711 }
2712 }
2713 col2++;
2714 if((col2>>1)>=_dec->state.nhmbs){
2715 col2=0;
2716 row2+=2;
2717 }
2718 }
2719 /*Bit usage indicator[s]:*/
2720 if(_dec->telemetry_bits){
2721 int widths[6];
2722 int fpsn;
2723 int fpsd;
2724 int mult;
2725 int fullw;
2726 int padw;
2727 int i;
2728 fpsn=_dec->state.info.fps_numerator;
2729 fpsd=_dec->state.info.fps_denominator;
2730 mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2731 fullw=250.f*h*fpsd*mult/fpsn;
2732 padw=w-24;
2733 /*Header and coded block bits.*/
2734 if(_dec->telemetry_frame_bytes<0||
2735 _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2736 _dec->telemetry_frame_bytes=0;
2737 }
2738 if(_dec->telemetry_coding_bytes<0||
2739 _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2740 _dec->telemetry_coding_bytes=0;
2741 }
2742 if(_dec->telemetry_mode_bytes<0||
2743 _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2744 _dec->telemetry_mode_bytes=0;
2745 }
2746 if(_dec->telemetry_mv_bytes<0||
2747 _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2748 _dec->telemetry_mv_bytes=0;
2749 }
2750 if(_dec->telemetry_qi_bytes<0||
2751 _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2752 _dec->telemetry_qi_bytes=0;
2753 }
2754 if(_dec->telemetry_dc_bytes<0||
2755 _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2756 _dec->telemetry_dc_bytes=0;
2757 }
2758 widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2759 widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2760 widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2761 widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2762 widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2763 widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2764 for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2765 cairo_set_source_rgba(c,.0,.0,.0,.6);
2766 cairo_rectangle(c,10,h-33,widths[0]+1,5);
2767 cairo_rectangle(c,10,h-29,widths[1]+1,5);
2768 cairo_rectangle(c,10,h-25,widths[2]+1,5);
2769 cairo_rectangle(c,10,h-21,widths[3]+1,5);
2770 cairo_rectangle(c,10,h-17,widths[4]+1,5);
2771 cairo_rectangle(c,10,h-13,widths[5]+1,5);
2772 cairo_fill(c);
2773 cairo_set_source_rgb(c,1,0,0);
2774 cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2775 cairo_fill(c);
2776 cairo_set_source_rgb(c,0,1,0);
2777 cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2778 cairo_fill(c);
2779 cairo_set_source_rgb(c,0,0,1);
2780 cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2781 cairo_fill(c);
2782 cairo_set_source_rgb(c,.6,.4,.0);
2783 cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2784 cairo_fill(c);
2785 cairo_set_source_rgb(c,.3,.3,.3);
2786 cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2787 cairo_fill(c);
2788 cairo_set_source_rgb(c,.5,.5,.8);
2789 cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2790 cairo_fill(c);
2791 }
2792 /*Master qi indicator[s]:*/
2793 if(_dec->telemetry_qi&0x1){
2794 cairo_text_extents_t extents;
2795 char buffer[10];
2796 int p;
2797 int y;
2798 p=0;
2799 y=h-7.5;
2800 if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2801 buffer[p++]=48+_dec->state.qis[0]%10;
2802 if(_dec->state.nqis>=2){
2803 buffer[p++]=' ';
2804 if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2805 buffer[p++]=48+_dec->state.qis[1]%10;
2806 }
2807 if(_dec->state.nqis==3){
2808 buffer[p++]=' ';
2809 if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2810 buffer[p++]=48+_dec->state.qis[2]%10;
2811 }
2812 buffer[p++]='\0';
2813 cairo_select_font_face(c,"sans",
2814 CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2815 cairo_set_font_size(c,18);
2816 cairo_text_extents(c,buffer,&extents);
2817 cairo_set_source_rgb(c,1,1,1);
2818 cairo_move_to(c,w-extents.x_advance-10,y);
2819 cairo_show_text(c,buffer);
2820 cairo_set_source_rgb(c,0,0,0);
2821 cairo_move_to(c,w-extents.x_advance-10,y);
2822 cairo_text_path(c,buffer);
2823 cairo_set_line_width(c,.8);
2824 cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2825 cairo_stroke(c);
2826 }
2827 cairo_destroy(c);
2828 }
2829 /*Out of the Cairo plane into the telemetry YUV buffer.*/
2830 _ycbcr[0].data=_dec->telemetry_frame_data;
2831 _ycbcr[0].stride=_ycbcr[0].width;
2832 _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2833 _ycbcr[1].stride=_ycbcr[1].width;
2834 _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2835 _ycbcr[2].stride=_ycbcr[2].width;
2836 y_row=_ycbcr[0].data;
2837 u_row=_ycbcr[1].data;
2838 v_row=_ycbcr[2].data;
2839 rgb_row=data;
2840 /*This is one of the few places it's worth handling chroma on a
2841 case-by-case basis.*/
2842 switch(_dec->state.info.pixel_fmt){
2843 case TH_PF_420:{
2844 for(y=0;y<h;y+=2){
2845 unsigned char *y_row2;
2846 unsigned char *rgb_row2;
2847 y_row2=y_row+_ycbcr[0].stride;
2848 rgb_row2=rgb_row+cstride;
2849 for(x=0;x<w;x+=2){
2850 int y;
2851 int u;
2852 int v;
2853 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2854 +24966*rgb_row[4*x+0]+4207500)/255000;
2855 y_row[x]=OC_CLAMP255(y);
2856 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2857 +24966*rgb_row[4*x+4]+4207500)/255000;
2858 y_row[x+1]=OC_CLAMP255(y);
2859 y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2860 +24966*rgb_row2[4*x+0]+4207500)/255000;
2861 y_row2[x]=OC_CLAMP255(y);
2862 y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2863 +24966*rgb_row2[4*x+4]+4207500)/255000;
2864 y_row2[x+1]=OC_CLAMP255(y);
2865 u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2866 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2867 -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2868 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2869 +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2870 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2871 v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2872 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2873 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2874 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2875 -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2876 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2877 u_row[x>>1]=OC_CLAMP255(u);
2878 v_row[x>>1]=OC_CLAMP255(v);
2879 }
2880 y_row+=_ycbcr[0].stride<<1;
2881 u_row+=_ycbcr[1].stride;
2882 v_row+=_ycbcr[2].stride;
2883 rgb_row+=cstride<<1;
2884 }
2885 }break;
2886 case TH_PF_422:{
2887 for(y=0;y<h;y++){
2888 for(x=0;x<w;x+=2){
2889 int y;
2890 int u;
2891 int v;
2892 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2893 +24966*rgb_row[4*x+0]+4207500)/255000;
2894 y_row[x]=OC_CLAMP255(y);
2895 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2896 +24966*rgb_row[4*x+4]+4207500)/255000;
2897 y_row[x+1]=OC_CLAMP255(y);
2898 u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2899 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2900 +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2901 v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2902 -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2903 -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2904 u_row[x>>1]=OC_CLAMP255(u);
2905 v_row[x>>1]=OC_CLAMP255(v);
2906 }
2907 y_row+=_ycbcr[0].stride;
2908 u_row+=_ycbcr[1].stride;
2909 v_row+=_ycbcr[2].stride;
2910 rgb_row+=cstride;
2911 }
2912 }break;
2913 /*case TH_PF_444:*/
2914 default:{
2915 for(y=0;y<h;y++){
2916 for(x=0;x<w;x++){
2917 int y;
2918 int u;
2919 int v;
2920 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2921 +24966*rgb_row[4*x+0]+4207500)/255000;
2922 u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2923 +99232*rgb_row[4*x+0]+29032005)/225930;
2924 v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2925 -25536*rgb_row[4*x+0]+45940035)/357510;
2926 y_row[x]=OC_CLAMP255(y);
2927 u_row[x]=OC_CLAMP255(u);
2928 v_row[x]=OC_CLAMP255(v);
2929 }
2930 y_row+=_ycbcr[0].stride;
2931 u_row+=_ycbcr[1].stride;
2932 v_row+=_ycbcr[2].stride;
2933 rgb_row+=cstride;
2934 }
2935 }break;
2936 }
2937 /*Finished.
2938 Destroy the surface.*/
2939 cairo_surface_destroy(cs);
2940 }
2941 #endif
2942 return 0;
2943 }
2944