1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id$
15
16 ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53 Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55 /*Last MV dominates */
56 {
57 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59 OC_MODE_INTER_MV_FOUR
60 },
61 {
62 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64 OC_MODE_INTER_MV_FOUR
65 },
66 {
67 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69 OC_MODE_INTER_MV_FOUR
70 },
71 {
72 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73 OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74 OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75 },
76 /*No MV dominates.*/
77 {
78 OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80 OC_MODE_INTER_MV_FOUR
81 },
82 {
83 OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84 OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85 OC_MODE_INTER_MV_FOUR
86 },
87 /*Default ordering.*/
88 {
89 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90 OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91 OC_MODE_INTER_MV_FOUR
92 }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97 the Huffman tables.
98 The extension means more bits can be read with fewer calls to the bitpacker
99 during the Huffman decoding process (at the cost of larger Huffman tables),
100 and fewer tokens require additional extra bits (reducing the average storage
101 per decoded token).
102 The revised ordering reveals essential information in the token value
103 itself; specifically, whether or not there are additional extra bits to read
104 and the parameter to which those extra bits are applied.
105 The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106 The extra bits are added into code word at the bit position inferred from the
107 token value, giving the final code word from which all required parameters
108 are derived.
109 The number of EOBs and the leading zero run length can be extracted directly.
110 The coefficient magnitude is optionally negated before extraction, according
111 to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114 internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121 (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122 sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128 Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129 is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run length bits in the code word.
133 These are placed at index 0 and given 8 bits (even though 6 would suffice)
134 because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT (8)
138 /*The location of the (1) flip bit in the code word.
139 This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT (20)
141 /*The location of the (11) token magnitude bits in the code word.
142 These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147 ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148 (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149 (_flip)<<OC_DCT_CW_FLIP_BIT| \
150 (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153 of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157 We use this formulation because Intel has no useful cmov.
158 A real architecture would probably do better with two of those.
159 This translates to 11 instructions(!), and is _still_ faster than either a
160 table lookup (just barely) or the naive double-ternary implementation (which
161 gcc translates to a jump and a cmov).
162 This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163 you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165 ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166 +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169 See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170 order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172 /*These tokens require additional extra bits for the EOB count.*/
173 /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174 OC_DCT_CW_FINISH,
175 /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176 OC_DCT_CW_PACK(16, 0, 0,0),
177 /*These tokens require additional extra bits for the magnitude.*/
178 /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179 OC_DCT_CW_PACK( 0, 0, 13,0),
180 OC_DCT_CW_PACK( 0, 0, 13,1),
181 /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182 OC_DCT_CW_PACK( 0, 0, 21,0),
183 OC_DCT_CW_PACK( 0, 0, 21,1),
184 /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185 OC_DCT_CW_PACK( 0, 0, 37,0),
186 OC_DCT_CW_PACK( 0, 0, 37,1),
187 /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188 OC_DCT_CW_PACK( 0, 0, 69,0),
189 OC_DCT_CW_PACK( 0, 0,325,0),
190 OC_DCT_CW_PACK( 0, 0, 69,1),
191 OC_DCT_CW_PACK( 0, 0,325,1),
192 /*These tokens require additional extra bits for the run length.*/
193 /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194 OC_DCT_CW_PACK( 0,10, +1,0),
195 OC_DCT_CW_PACK( 0,10, -1,0),
196 /*OC_DCT_ZRL_TOKEN (6 extra bits)
197 Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198 OC_DCT_CW_PACK( 0, 0, 0,1),
199 /*The remaining tokens require no additional extra bits.*/
200 /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201 OC_DCT_CW_PACK( 1, 0, 0,0),
202 /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203 OC_DCT_CW_PACK( 2, 0, 0,0),
204 /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205 OC_DCT_CW_PACK( 3, 0, 0,0),
206 /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207 OC_DCT_CW_PACK( 0, 1, +1,0),
208 OC_DCT_CW_PACK( 0, 1, -1,0),
209 OC_DCT_CW_PACK( 0, 2, +1,0),
210 OC_DCT_CW_PACK( 0, 2, -1,0),
211 OC_DCT_CW_PACK( 0, 3, +1,0),
212 OC_DCT_CW_PACK( 0, 3, -1,0),
213 OC_DCT_CW_PACK( 0, 4, +1,0),
214 OC_DCT_CW_PACK( 0, 4, -1,0),
215 OC_DCT_CW_PACK( 0, 5, +1,0),
216 OC_DCT_CW_PACK( 0, 5, -1,0),
217 /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218 OC_DCT_CW_PACK( 0, 1, +2,0),
219 OC_DCT_CW_PACK( 0, 1, +3,0),
220 OC_DCT_CW_PACK( 0, 1, -2,0),
221 OC_DCT_CW_PACK( 0, 1, -3,0),
222 /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223 OC_DCT_CW_PACK( 0, 6, +1,0),
224 OC_DCT_CW_PACK( 0, 7, +1,0),
225 OC_DCT_CW_PACK( 0, 8, +1,0),
226 OC_DCT_CW_PACK( 0, 9, +1,0),
227 OC_DCT_CW_PACK( 0, 6, -1,0),
228 OC_DCT_CW_PACK( 0, 7, -1,0),
229 OC_DCT_CW_PACK( 0, 8, -1,0),
230 OC_DCT_CW_PACK( 0, 9, -1,0),
231 /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232 OC_DCT_CW_PACK( 0, 2, +2,0),
233 OC_DCT_CW_PACK( 0, 3, +2,0),
234 OC_DCT_CW_PACK( 0, 2, +3,0),
235 OC_DCT_CW_PACK( 0, 3, +3,0),
236 OC_DCT_CW_PACK( 0, 2, -2,0),
237 OC_DCT_CW_PACK( 0, 3, -2,0),
238 OC_DCT_CW_PACK( 0, 2, -3,0),
239 OC_DCT_CW_PACK( 0, 3, -3,0),
240 /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241 Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242 OC_DCT_CW_PACK( 0, 0, 0,1),
243 OC_DCT_CW_PACK( 0, 1, 0,0),
244 OC_DCT_CW_PACK( 0, 2, 0,0),
245 OC_DCT_CW_PACK( 0, 3, 0,0),
246 OC_DCT_CW_PACK( 0, 4, 0,0),
247 OC_DCT_CW_PACK( 0, 5, 0,0),
248 OC_DCT_CW_PACK( 0, 6, 0,0),
249 OC_DCT_CW_PACK( 0, 7, 0,0),
250 /*OC_ONE_TOKEN (0 extra bits)*/
251 OC_DCT_CW_PACK( 0, 0, +1,0),
252 /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253 OC_DCT_CW_PACK( 0, 0, -1,0),
254 /*OC_TWO_TOKEN (0 extra bits)*/
255 OC_DCT_CW_PACK( 0, 0, +2,0),
256 /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257 OC_DCT_CW_PACK( 0, 0, -2,0),
258 /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259 OC_DCT_CW_PACK( 0, 0, +3,0),
260 OC_DCT_CW_PACK( 0, 0, -3,0),
261 OC_DCT_CW_PACK( 0, 0, +4,0),
262 OC_DCT_CW_PACK( 0, 0, -4,0),
263 OC_DCT_CW_PACK( 0, 0, +5,0),
264 OC_DCT_CW_PACK( 0, 0, -5,0),
265 OC_DCT_CW_PACK( 0, 0, +6,0),
266 OC_DCT_CW_PACK( 0, 0, -6,0),
267 /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268 OC_DCT_CW_PACK( 0, 0, +7,0),
269 OC_DCT_CW_PACK( 0, 0, +8,0),
270 OC_DCT_CW_PACK( 0, 0, -7,0),
271 OC_DCT_CW_PACK( 0, 0, -8,0),
272 /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273 OC_DCT_CW_PACK( 0, 0, +9,0),
274 OC_DCT_CW_PACK( 0, 0,+10,0),
275 OC_DCT_CW_PACK( 0, 0,+11,0),
276 OC_DCT_CW_PACK( 0, 0,+12,0),
277 OC_DCT_CW_PACK( 0, 0, -9,0),
278 OC_DCT_CW_PACK( 0, 0,-10,0),
279 OC_DCT_CW_PACK( 0, 0,-11,0),
280 OC_DCT_CW_PACK( 0, 0,-12,0),
281 /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282 OC_DCT_CW_PACK( 8, 0, 0,0),
283 OC_DCT_CW_PACK( 9, 0, 0,0),
284 OC_DCT_CW_PACK(10, 0, 0,0),
285 OC_DCT_CW_PACK(11, 0, 0,0),
286 OC_DCT_CW_PACK(12, 0, 0,0),
287 OC_DCT_CW_PACK(13, 0, 0,0),
288 OC_DCT_CW_PACK(14, 0, 0,0),
289 OC_DCT_CW_PACK(15, 0, 0,0),
290 /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291 OC_DCT_CW_PACK( 4, 0, 0,0),
292 OC_DCT_CW_PACK( 5, 0, 0,0),
293 OC_DCT_CW_PACK( 6, 0, 0,0),
294 OC_DCT_CW_PACK( 7, 0, 0,0),
295 };
296
297
298
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300 /*Coding scheme:
301 Codeword Run Length
302 0 1
303 10x 2-3
304 110x 4-5
305 1110xx 6-9
306 11110xxx 10-17
307 111110xxxx 18-33
308 111111xxxxxxxxxxxx 34-4129*/
309 static const ogg_int16_t OC_SB_RUN_TREE[22]={
310 4,
311 -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312 -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313 -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314 -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315 2,
316 -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317 };
318 int ret;
319 ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320 if(ret>=0x10){
321 int offs;
322 offs=ret&0x1F;
323 ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324 }
325 return ret;
326 }
327
oc_block_run_unpack(oc_pack_buf * _opb)328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329 /*Coding scheme:
330 Codeword Run Length
331 0x 1-2
332 10x 3-4
333 110x 5-6
334 1110xx 7-10
335 11110xx 11-14
336 11111xxxx 15-30*/
337 static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338 5,
339 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341 -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342 -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343 -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344 -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345 -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346 33, 36, 39, 44,
347 1,-(1<<8|7),-(1<<8|8),
348 1,-(1<<8|9),-(1<<8|10),
349 2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350 4,
351 -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352 -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353 -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354 -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355 };
356 return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358
359
360
oc_dec_accel_init_c(oc_dec_ctx * _dec)361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363 _dec->opt_vtable.dc_unpredict_mcu_plane=
364 oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369 const th_setup_info *_setup){
370 int qti;
371 int pli;
372 int qi;
373 int ret;
374 ret=oc_state_init(&_dec->state,_info,3);
375 if(ret<0)return ret;
376 ret=oc_huff_trees_copy(_dec->huff_tables,
377 (const ogg_int16_t *const *)_setup->huff_tables);
378 if(ret<0){
379 oc_state_clear(&_dec->state);
380 return ret;
381 }
382 /*For each fragment, allocate one byte for every DCT coefficient token, plus
383 one byte for extra-bits for each token, plus one more byte for the long
384 EOB run, just in case it's the very last token and has a run length of
385 one.*/
386 _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387 _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388 if(_dec->dct_tokens==NULL){
389 oc_huff_trees_clear(_dec->huff_tables);
390 oc_state_clear(&_dec->state);
391 return TH_EFAULT;
392 }
393 for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394 _dec->state.dequant_tables[qi][pli][qti]=
395 _dec->state.dequant_table_data[qi][pli][qti];
396 }
397 oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398 &_setup->qinfo);
399 for(qi=0;qi<64;qi++){
400 int qsum;
401 qsum=0;
402 for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403 qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404 _dec->state.dequant_tables[qi][pli][qti][17]+
405 _dec->state.dequant_tables[qi][pli][qti][18]+
406 _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407 }
408 _dec->pp_sharp_mod[qi]=-(qsum>>11);
409 }
410 memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411 sizeof(_dec->state.loop_filter_limits));
412 oc_dec_accel_init(_dec);
413 _dec->pp_level=OC_PP_LEVEL_DISABLED;
414 _dec->dc_qis=NULL;
415 _dec->variances=NULL;
416 _dec->pp_frame_data=NULL;
417 _dec->stripe_cb.ctx=NULL;
418 _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420 _dec->telemetry=0;
421 _dec->telemetry_bits=0;
422 _dec->telemetry_qi=0;
423 _dec->telemetry_mbmode=0;
424 _dec->telemetry_mv=0;
425 _dec->telemetry_frame_data=NULL;
426 #endif
427 return 0;
428 }
429
oc_dec_clear(oc_dec_ctx * _dec)430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432 _ogg_free(_dec->telemetry_frame_data);
433 #endif
434 _ogg_free(_dec->pp_frame_data);
435 _ogg_free(_dec->variances);
436 _ogg_free(_dec->dc_qis);
437 _ogg_free(_dec->dct_tokens);
438 oc_huff_trees_clear(_dec->huff_tables);
439 oc_state_clear(&_dec->state);
440 }
441
442
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444 long val;
445 /*Check to make sure this is a data packet.*/
446 val=oc_pack_read1(&_dec->opb);
447 if(val!=0)return TH_EBADPACKET;
448 /*Read in the frame type (I or P).*/
449 val=oc_pack_read1(&_dec->opb);
450 _dec->state.frame_type=(int)val;
451 /*Read in the qi list.*/
452 val=oc_pack_read(&_dec->opb,6);
453 _dec->state.qis[0]=(unsigned char)val;
454 val=oc_pack_read1(&_dec->opb);
455 if(!val)_dec->state.nqis=1;
456 else{
457 val=oc_pack_read(&_dec->opb,6);
458 _dec->state.qis[1]=(unsigned char)val;
459 val=oc_pack_read1(&_dec->opb);
460 if(!val)_dec->state.nqis=2;
461 else{
462 val=oc_pack_read(&_dec->opb,6);
463 _dec->state.qis[2]=(unsigned char)val;
464 _dec->state.nqis=3;
465 }
466 }
467 if(_dec->state.frame_type==OC_INTRA_FRAME){
468 /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469 Most of the other unused bits in the VP3 headers were eliminated.
470 I don't know why these remain.*/
471 /*I wanted to eliminate wasted bits, but not all config wiggle room
472 --Monty.*/
473 val=oc_pack_read(&_dec->opb,3);
474 if(val!=0)return TH_EIMPL;
475 }
476 return 0;
477 }
478
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480 This also builds up the coded fragment list (in coded order), and clears the
481 uncoded fragment list.
482 It does not update the coded macro block list nor the super block flags, as
483 those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485 const oc_sb_map *sb_maps;
486 const oc_sb_flags *sb_flags;
487 oc_fragment *frags;
488 ptrdiff_t *coded_fragis;
489 ptrdiff_t ncoded_fragis;
490 ptrdiff_t prev_ncoded_fragis;
491 unsigned nsbs;
492 unsigned sbi;
493 int pli;
494 coded_fragis=_dec->state.coded_fragis;
495 prev_ncoded_fragis=ncoded_fragis=0;
496 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497 sb_flags=_dec->state.sb_flags;
498 frags=_dec->state.frags;
499 sbi=nsbs=0;
500 for(pli=0;pli<3;pli++){
501 nsbs+=_dec->state.fplanes[pli].nsbs;
502 for(;sbi<nsbs;sbi++){
503 int quadi;
504 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505 int bi;
506 for(bi=0;bi<4;bi++){
507 ptrdiff_t fragi;
508 fragi=sb_maps[sbi][quadi][bi];
509 if(fragi>=0){
510 frags[fragi].coded=1;
511 frags[fragi].refi=OC_FRAME_SELF;
512 frags[fragi].mb_mode=OC_MODE_INTRA;
513 coded_fragis[ncoded_fragis++]=fragi;
514 }
515 }
516 }
517 }
518 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519 prev_ncoded_fragis=ncoded_fragis;
520 }
521 _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523
524 /*Decodes the bit flags indicating whether each super block is partially coded
525 or not.
526 Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528 oc_sb_flags *sb_flags;
529 unsigned nsbs;
530 unsigned sbi;
531 unsigned npartial;
532 unsigned run_count;
533 long val;
534 int flag;
535 val=oc_pack_read1(&_dec->opb);
536 flag=(int)val;
537 sb_flags=_dec->state.sb_flags;
538 nsbs=_dec->state.nsbs;
539 sbi=npartial=0;
540 while(sbi<nsbs){
541 int full_run;
542 run_count=oc_sb_run_unpack(&_dec->opb);
543 full_run=run_count>=4129;
544 do{
545 sb_flags[sbi].coded_partially=flag;
546 sb_flags[sbi].coded_fully=0;
547 npartial+=flag;
548 sbi++;
549 }
550 while(--run_count>0&&sbi<nsbs);
551 if(full_run&&sbi<nsbs){
552 val=oc_pack_read1(&_dec->opb);
553 flag=(int)val;
554 }
555 else flag=!flag;
556 }
557 /*TODO: run_count should be 0 here.
558 If it's not, we should issue a warning of some kind.*/
559 return npartial;
560 }
561
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563 block is fully coded or not.
564 This function should only be called if there is at least one
565 non-partially-coded super block.
566 Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568 oc_sb_flags *sb_flags;
569 unsigned nsbs;
570 unsigned sbi;
571 unsigned run_count;
572 long val;
573 int flag;
574 sb_flags=_dec->state.sb_flags;
575 nsbs=_dec->state.nsbs;
576 /*Skip partially coded super blocks.*/
577 for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578 val=oc_pack_read1(&_dec->opb);
579 flag=(int)val;
580 do{
581 int full_run;
582 run_count=oc_sb_run_unpack(&_dec->opb);
583 full_run=run_count>=4129;
584 for(;sbi<nsbs;sbi++){
585 if(sb_flags[sbi].coded_partially)continue;
586 if(run_count--<=0)break;
587 sb_flags[sbi].coded_fully=flag;
588 }
589 if(full_run&&sbi<nsbs){
590 val=oc_pack_read1(&_dec->opb);
591 flag=(int)val;
592 }
593 else flag=!flag;
594 }
595 while(sbi<nsbs);
596 /*TODO: run_count should be 0 here.
597 If it's not, we should issue a warning of some kind.*/
598 }
599
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601 const oc_sb_map *sb_maps;
602 const oc_sb_flags *sb_flags;
603 signed char *mb_modes;
604 oc_fragment *frags;
605 unsigned nsbs;
606 unsigned sbi;
607 unsigned npartial;
608 long val;
609 int pli;
610 int flag;
611 int run_count;
612 ptrdiff_t *coded_fragis;
613 ptrdiff_t *uncoded_fragis;
614 ptrdiff_t ncoded_fragis;
615 ptrdiff_t nuncoded_fragis;
616 ptrdiff_t prev_ncoded_fragis;
617 npartial=oc_dec_partial_sb_flags_unpack(_dec);
618 if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619 if(npartial>0){
620 val=oc_pack_read1(&_dec->opb);
621 flag=!(int)val;
622 }
623 else flag=0;
624 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625 sb_flags=_dec->state.sb_flags;
626 mb_modes=_dec->state.mb_modes;
627 frags=_dec->state.frags;
628 sbi=nsbs=run_count=0;
629 coded_fragis=_dec->state.coded_fragis;
630 uncoded_fragis=coded_fragis+_dec->state.nfrags;
631 prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632 for(pli=0;pli<3;pli++){
633 nsbs+=_dec->state.fplanes[pli].nsbs;
634 for(;sbi<nsbs;sbi++){
635 int quadi;
636 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637 int quad_coded;
638 int bi;
639 quad_coded=0;
640 for(bi=0;bi<4;bi++){
641 ptrdiff_t fragi;
642 fragi=sb_maps[sbi][quadi][bi];
643 if(fragi>=0){
644 int coded;
645 if(sb_flags[sbi].coded_fully)coded=1;
646 else if(!sb_flags[sbi].coded_partially)coded=0;
647 else{
648 if(run_count<=0){
649 run_count=oc_block_run_unpack(&_dec->opb);
650 flag=!flag;
651 }
652 run_count--;
653 coded=flag;
654 }
655 if(coded)coded_fragis[ncoded_fragis++]=fragi;
656 else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657 quad_coded|=coded;
658 frags[fragi].coded=coded;
659 frags[fragi].refi=OC_FRAME_NONE;
660 }
661 }
662 /*Remember if there's a coded luma block in this macro block.*/
663 if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664 }
665 }
666 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667 prev_ncoded_fragis=ncoded_fragis;
668 }
669 _dec->state.ntotal_coded_fragis=ncoded_fragis;
670 /*TODO: run_count should be 0 here.
671 If it's not, we should issue a warning of some kind.*/
672 }
673
674
675 /*Coding scheme:
676 Codeword Mode Index
677 0 0
678 10 1
679 110 2
680 1110 3
681 11110 4
682 111110 5
683 1111110 6
684 1111111 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686 4,
687 -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688 -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690 -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691 3,
692 -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693 -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697 3,
698 -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699 -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701
702 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704 signed char *mb_modes;
705 const unsigned char *alphabet;
706 unsigned char scheme0_alphabet[8];
707 const ogg_int16_t *mode_tree;
708 size_t nmbs;
709 size_t mbi;
710 long val;
711 int mode_scheme;
712 val=oc_pack_read(&_dec->opb,3);
713 mode_scheme=(int)val;
714 if(mode_scheme==0){
715 int mi;
716 /*Just in case, initialize the modes to something.
717 If the bitstream doesn't contain each index exactly once, it's likely
718 corrupt and the rest of the packet is garbage anyway, but this way we
719 won't crash, and we'll decode SOMETHING.*/
720 /*LOOP VECTORIZES*/
721 for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722 for(mi=0;mi<OC_NMODES;mi++){
723 val=oc_pack_read(&_dec->opb,3);
724 scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725 }
726 alphabet=scheme0_alphabet;
727 }
728 else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729 mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730 mb_modes=_dec->state.mb_modes;
731 nmbs=_dec->state.nmbs;
732 for(mbi=0;mbi<nmbs;mbi++){
733 if(mb_modes[mbi]>0){
734 /*We have a coded luma block; decode a mode.*/
735 mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736 }
737 /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738 fact that OC_MODE_INTER_NOMV is already 0.*/
739 }
740 }
741
742
743
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745 5,
746 -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747 -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748 -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749 -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750 -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751 33, 36, 39, 42,
752 45, 50, 55, 60,
753 65, 74, 83, 92,
754 1,-(1<<8|32+4),-(1<<8|32-4),
755 1,-(1<<8|32+5),-(1<<8|32-5),
756 1,-(1<<8|32+6),-(1<<8|32-6),
757 1,-(1<<8|32+7),-(1<<8|32-7),
758 2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759 2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760 2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761 2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762 3,
763 -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764 -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765 3,
766 -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767 -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768 3,
769 -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770 -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771 3,
772 -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773 -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777 6,
778 -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779 -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780 -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781 -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782 -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783 -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784 -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785 -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786 -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787 -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788 -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789 -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790 -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791 -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792 -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793 -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795
796
oc_mv_unpack(oc_pack_buf * _opb,const ogg_int16_t * _tree)797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798 int dx;
799 int dy;
800 dx=oc_huff_token_decode(_opb,_tree)-32;
801 dy=oc_huff_token_decode(_opb,_tree)-32;
802 return OC_MV(dx,dy);
803 }
804
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806 block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808 const oc_mb_map *mb_maps;
809 const signed char *mb_modes;
810 oc_set_chroma_mvs_func set_chroma_mvs;
811 const ogg_int16_t *mv_comp_tree;
812 oc_fragment *frags;
813 oc_mv *frag_mvs;
814 const unsigned char *map_idxs;
815 int map_nidxs;
816 oc_mv last_mv;
817 oc_mv prior_mv;
818 oc_mv cbmvs[4];
819 size_t nmbs;
820 size_t mbi;
821 long val;
822 set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823 val=oc_pack_read1(&_dec->opb);
824 mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825 map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826 map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827 prior_mv=last_mv=0;
828 frags=_dec->state.frags;
829 frag_mvs=_dec->state.frag_mvs;
830 mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831 mb_modes=_dec->state.mb_modes;
832 nmbs=_dec->state.nmbs;
833 for(mbi=0;mbi<nmbs;mbi++){
834 int mb_mode;
835 mb_mode=mb_modes[mbi];
836 if(mb_mode!=OC_MODE_INVALID){
837 oc_mv mbmv;
838 ptrdiff_t fragi;
839 int mapi;
840 int mapii;
841 int refi;
842 if(mb_mode==OC_MODE_INTER_MV_FOUR){
843 oc_mv lbmvs[4];
844 int bi;
845 prior_mv=last_mv;
846 for(bi=0;bi<4;bi++){
847 fragi=mb_maps[mbi][0][bi];
848 if(frags[fragi].coded){
849 frags[fragi].refi=OC_FRAME_PREV;
850 frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851 lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852 frag_mvs[fragi]=lbmvs[bi];
853 }
854 else lbmvs[bi]=0;
855 }
856 (*set_chroma_mvs)(cbmvs,lbmvs);
857 for(mapii=4;mapii<map_nidxs;mapii++){
858 mapi=map_idxs[mapii];
859 bi=mapi&3;
860 fragi=mb_maps[mbi][mapi>>2][bi];
861 if(frags[fragi].coded){
862 frags[fragi].refi=OC_FRAME_PREV;
863 frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864 frag_mvs[fragi]=cbmvs[bi];
865 }
866 }
867 }
868 else{
869 switch(mb_mode){
870 case OC_MODE_INTER_MV:{
871 prior_mv=last_mv;
872 last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873 }break;
874 case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875 case OC_MODE_INTER_MV_LAST2:{
876 mbmv=prior_mv;
877 prior_mv=last_mv;
878 last_mv=mbmv;
879 }break;
880 case OC_MODE_GOLDEN_MV:{
881 mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882 }break;
883 default:mbmv=0;break;
884 }
885 /*Fill in the MVs for the fragments.*/
886 refi=OC_FRAME_FOR_MODE(mb_mode);
887 mapii=0;
888 do{
889 mapi=map_idxs[mapii];
890 fragi=mb_maps[mbi][mapi>>2][mapi&3];
891 if(frags[fragi].coded){
892 frags[fragi].refi=refi;
893 frags[fragi].mb_mode=mb_mode;
894 frag_mvs[fragi]=mbmv;
895 }
896 }
897 while(++mapii<map_nidxs);
898 }
899 }
900 }
901 }
902
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904 oc_fragment *frags;
905 const ptrdiff_t *coded_fragis;
906 ptrdiff_t ncoded_fragis;
907 ptrdiff_t fragii;
908 ptrdiff_t fragi;
909 ncoded_fragis=_dec->state.ntotal_coded_fragis;
910 if(ncoded_fragis<=0)return;
911 frags=_dec->state.frags;
912 coded_fragis=_dec->state.coded_fragis;
913 if(_dec->state.nqis==1){
914 /*If this frame has only a single qi value, then just use it for all coded
915 fragments.*/
916 for(fragii=0;fragii<ncoded_fragis;fragii++){
917 frags[coded_fragis[fragii]].qii=0;
918 }
919 }
920 else{
921 long val;
922 int flag;
923 int nqi1;
924 int run_count;
925 /*Otherwise, we decode a qi index for each fragment, using two passes of
926 the same binary RLE scheme used for super-block coded bits.
927 The first pass marks each fragment as having a qii of 0 or greater than
928 0, and the second pass (if necessary), distinguishes between a qii of
929 1 and 2.
930 At first we just store the qii in the fragment.
931 After all the qii's are decoded, we make a final pass to replace them
932 with the corresponding qi's for this frame.*/
933 val=oc_pack_read1(&_dec->opb);
934 flag=(int)val;
935 nqi1=0;
936 fragii=0;
937 while(fragii<ncoded_fragis){
938 int full_run;
939 run_count=oc_sb_run_unpack(&_dec->opb);
940 full_run=run_count>=4129;
941 do{
942 frags[coded_fragis[fragii++]].qii=flag;
943 nqi1+=flag;
944 }
945 while(--run_count>0&&fragii<ncoded_fragis);
946 if(full_run&&fragii<ncoded_fragis){
947 val=oc_pack_read1(&_dec->opb);
948 flag=(int)val;
949 }
950 else flag=!flag;
951 }
952 /*TODO: run_count should be 0 here.
953 If it's not, we should issue a warning of some kind.*/
954 /*If we have 3 different qi's for this frame, and there was at least one
955 fragment with a non-zero qi, make the second pass.*/
956 if(_dec->state.nqis==3&&nqi1>0){
957 /*Skip qii==0 fragments.*/
958 for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959 val=oc_pack_read1(&_dec->opb);
960 flag=(int)val;
961 do{
962 int full_run;
963 run_count=oc_sb_run_unpack(&_dec->opb);
964 full_run=run_count>=4129;
965 for(;fragii<ncoded_fragis;fragii++){
966 fragi=coded_fragis[fragii];
967 if(frags[fragi].qii==0)continue;
968 if(run_count--<=0)break;
969 frags[fragi].qii+=flag;
970 }
971 if(full_run&&fragii<ncoded_fragis){
972 val=oc_pack_read1(&_dec->opb);
973 flag=(int)val;
974 }
975 else flag=!flag;
976 }
977 while(fragii<ncoded_fragis);
978 /*TODO: run_count should be 0 here.
979 If it's not, we should issue a warning of some kind.*/
980 }
981 }
982 }
983
984
985
986 /*Unpacks the DC coefficient tokens.
987 Unlike when unpacking the AC coefficient tokens, we actually need to decode
988 the DC coefficient values now so that we can do DC prediction.
989 _huff_idx: The index of the Huffman table to use for each color plane.
990 _ntoks_left: The number of tokens left to be decoded in each color plane for
991 each coefficient.
992 This is updated as EOB tokens and zero run tokens are decoded.
993 Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995 ptrdiff_t _ntoks_left[3][64]){
996 unsigned char *dct_tokens;
997 oc_fragment *frags;
998 const ptrdiff_t *coded_fragis;
999 ptrdiff_t ncoded_fragis;
1000 ptrdiff_t fragii;
1001 ptrdiff_t eobs;
1002 ptrdiff_t ti;
1003 int pli;
1004 dct_tokens=_dec->dct_tokens;
1005 frags=_dec->state.frags;
1006 coded_fragis=_dec->state.coded_fragis;
1007 ncoded_fragis=fragii=eobs=ti=0;
1008 for(pli=0;pli<3;pli++){
1009 ptrdiff_t run_counts[64];
1010 ptrdiff_t eob_count;
1011 ptrdiff_t eobi;
1012 int rli;
1013 ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014 memset(run_counts,0,sizeof(run_counts));
1015 _dec->eob_runs[pli][0]=eobs;
1016 _dec->ti0[pli][0]=ti;
1017 /*Continue any previous EOB run, if there was one.*/
1018 eobi=eobs;
1019 if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020 eob_count=eobi;
1021 eobs-=eobi;
1022 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023 while(fragii<ncoded_fragis){
1024 int token;
1025 int cw;
1026 int eb;
1027 int skip;
1028 token=oc_huff_token_decode(&_dec->opb,
1029 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030 dct_tokens[ti++]=(unsigned char)token;
1031 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032 eb=(int)oc_pack_read(&_dec->opb,
1033 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034 dct_tokens[ti++]=(unsigned char)eb;
1035 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036 eb<<=OC_DCT_TOKEN_EB_POS(token);
1037 }
1038 else eb=0;
1039 cw=OC_DCT_CODE_WORD[token]+eb;
1040 eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041 if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042 if(eobs){
1043 eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044 eob_count+=eobi;
1045 eobs-=eobi;
1046 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047 }
1048 else{
1049 int coeff;
1050 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053 if(skip)coeff=0;
1054 run_counts[skip]++;
1055 frags[coded_fragis[fragii++]].dc=coeff;
1056 }
1057 }
1058 /*Add the total EOB count to the longest run length.*/
1059 run_counts[63]+=eob_count;
1060 /*And convert the run_counts array to a moment table.*/
1061 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062 /*Finally, subtract off the number of coefficients that have been
1063 accounted for by runs started in this coefficient.*/
1064 for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065 }
1066 _dec->dct_tokens_count=ti;
1067 return eobs;
1068 }
1069
1070 /*Unpacks the AC coefficient tokens.
1071 This can completely discard coefficient values while unpacking, and so is
1072 somewhat simpler than unpacking the DC coefficient tokens.
1073 _huff_idx: The index of the Huffman table to use for each color plane.
1074 _ntoks_left: The number of tokens left to be decoded in each color plane for
1075 each coefficient.
1076 This is updated as EOB tokens and zero run tokens are decoded.
1077 _eobs: The length of any outstanding EOB run from previous
1078 coefficients.
1079 Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081 ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082 unsigned char *dct_tokens;
1083 ptrdiff_t ti;
1084 int pli;
1085 dct_tokens=_dec->dct_tokens;
1086 ti=_dec->dct_tokens_count;
1087 for(pli=0;pli<3;pli++){
1088 ptrdiff_t run_counts[64];
1089 ptrdiff_t eob_count;
1090 size_t ntoks_left;
1091 size_t ntoks;
1092 int rli;
1093 _dec->eob_runs[pli][_zzi]=_eobs;
1094 _dec->ti0[pli][_zzi]=ti;
1095 ntoks_left=_ntoks_left[pli][_zzi];
1096 memset(run_counts,0,sizeof(run_counts));
1097 eob_count=0;
1098 ntoks=0;
1099 while(ntoks+_eobs<ntoks_left){
1100 int token;
1101 int cw;
1102 int eb;
1103 int skip;
1104 ntoks+=_eobs;
1105 eob_count+=_eobs;
1106 token=oc_huff_token_decode(&_dec->opb,
1107 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108 dct_tokens[ti++]=(unsigned char)token;
1109 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110 eb=(int)oc_pack_read(&_dec->opb,
1111 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112 dct_tokens[ti++]=(unsigned char)eb;
1113 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114 eb<<=OC_DCT_TOKEN_EB_POS(token);
1115 }
1116 else eb=0;
1117 cw=OC_DCT_CODE_WORD[token]+eb;
1118 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119 _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120 if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121 if(_eobs==0){
1122 run_counts[skip]++;
1123 ntoks++;
1124 }
1125 }
1126 /*Add the portion of the last EOB run actually used by this coefficient.*/
1127 eob_count+=ntoks_left-ntoks;
1128 /*And remove it from the remaining EOB count.*/
1129 _eobs-=ntoks_left-ntoks;
1130 /*Add the total EOB count to the longest run length.*/
1131 run_counts[63]+=eob_count;
1132 /*And convert the run_counts array to a moment table.*/
1133 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134 /*Finally, subtract off the number of coefficients that have been
1135 accounted for by runs started in this coefficient.*/
1136 for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137 }
1138 _dec->dct_tokens_count=ti;
1139 return _eobs;
1140 }
1141
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143 stored in the bitstream grouped by coefficient, not by fragment.
1144
1145 This means that we either decode all the tokens in order, building up a
1146 separate coefficient list for each fragment as we go, and then go back and
1147 do the iDCT on each fragment, or we have to create separate lists of tokens
1148 for each coefficient, so that we can pull the next token required off the
1149 head of the appropriate list when decoding a specific fragment.
1150
1151 The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152 decoded coefficient values.
1153
1154 We take the second option, which lets us store just one to three bytes per
1155 token (generally far fewer than the number of coefficients, due to EOB
1156 tokens and zero runs), and which requires us to only maintain a counter for
1157 each of the 64 coefficients, instead of a counter for every fragment to
1158 determine where the next token goes.
1159
1160 We actually use 3 counters per coefficient, one for each color plane, so we
1161 can decode all color planes simultaneously.
1162 This lets color conversion, etc., be done as soon as a full MCU (one or
1163 two super block rows) is decoded, while the image data is still in cache.*/
1164
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166 static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167 ptrdiff_t ntoks_left[3][64];
1168 int huff_idxs[2];
1169 ptrdiff_t eobs;
1170 long val;
1171 int pli;
1172 int zzi;
1173 int hgi;
1174 for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175 ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176 }
1177 val=oc_pack_read(&_dec->opb,4);
1178 huff_idxs[0]=(int)val;
1179 val=oc_pack_read(&_dec->opb,4);
1180 huff_idxs[1]=(int)val;
1181 _dec->eob_runs[0][0]=0;
1182 eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184 _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186 val=oc_pack_read(&_dec->opb,4);
1187 huff_idxs[0]=(int)val;
1188 val=oc_pack_read(&_dec->opb,4);
1189 huff_idxs[1]=(int)val;
1190 zzi=1;
1191 for(hgi=1;hgi<5;hgi++){
1192 huff_idxs[0]+=16;
1193 huff_idxs[1]+=16;
1194 for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195 eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196 }
1197 }
1198 /*TODO: eobs should be exactly zero, or 4096 or greater.
1199 The second case occurs when an EOB run of size zero is encountered, which
1200 gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201 If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203
1204
oc_dec_postprocess_init(oc_dec_ctx * _dec)1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206 /*musl libc malloc()/realloc() calls might use floating point, so make sure
1207 we've cleared the MMX state for them.*/
1208 oc_restore_fpu(&_dec->state);
1209 /*pp_level 0: disabled; free any memory used and return*/
1210 if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1211 if(_dec->dc_qis!=NULL){
1212 _ogg_free(_dec->dc_qis);
1213 _dec->dc_qis=NULL;
1214 _ogg_free(_dec->variances);
1215 _dec->variances=NULL;
1216 _ogg_free(_dec->pp_frame_data);
1217 _dec->pp_frame_data=NULL;
1218 }
1219 return 1;
1220 }
1221 if(_dec->dc_qis==NULL){
1222 /*If we haven't been tracking DC quantization indices, there's no point in
1223 starting now.*/
1224 if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1225 _dec->dc_qis=(unsigned char *)_ogg_malloc(
1226 _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1227 if(_dec->dc_qis==NULL)return 1;
1228 memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1229 }
1230 else{
1231 unsigned char *dc_qis;
1232 const ptrdiff_t *coded_fragis;
1233 ptrdiff_t ncoded_fragis;
1234 ptrdiff_t fragii;
1235 unsigned char qi0;
1236 /*Update the DC quantization index of each coded block.*/
1237 dc_qis=_dec->dc_qis;
1238 coded_fragis=_dec->state.coded_fragis;
1239 ncoded_fragis=_dec->state.ncoded_fragis[0]+
1240 _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1241 qi0=(unsigned char)_dec->state.qis[0];
1242 for(fragii=0;fragii<ncoded_fragis;fragii++){
1243 dc_qis[coded_fragis[fragii]]=qi0;
1244 }
1245 }
1246 /*pp_level 1: Stop after updating DC quantization indices.*/
1247 if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1248 if(_dec->variances!=NULL){
1249 _ogg_free(_dec->variances);
1250 _dec->variances=NULL;
1251 _ogg_free(_dec->pp_frame_data);
1252 _dec->pp_frame_data=NULL;
1253 }
1254 return 1;
1255 }
1256 if(_dec->variances==NULL){
1257 size_t frame_sz;
1258 size_t c_sz;
1259 int c_w;
1260 int c_h;
1261 frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1262 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1263 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1264 c_sz=c_w*(size_t)c_h;
1265 /*Allocate space for the chroma planes, even if we're not going to use
1266 them; this simplifies allocation state management, though it may waste
1267 memory on the few systems that don't overcommit pages.*/
1268 frame_sz+=c_sz<<1;
1269 _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1270 frame_sz*sizeof(_dec->pp_frame_data[0]));
1271 _dec->variances=(int *)_ogg_malloc(
1272 _dec->state.nfrags*sizeof(_dec->variances[0]));
1273 if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1274 _ogg_free(_dec->pp_frame_data);
1275 _dec->pp_frame_data=NULL;
1276 _ogg_free(_dec->variances);
1277 _dec->variances=NULL;
1278 return 1;
1279 }
1280 /*Force an update of the PP buffer pointers.*/
1281 _dec->pp_frame_state=0;
1282 }
1283 /*Update the PP buffer pointers if necessary.*/
1284 if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1285 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1286 /*If chroma processing is disabled, just use the PP luma plane.*/
1287 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1288 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1289 _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1290 _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1291 (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1292 }
1293 else{
1294 size_t y_sz;
1295 size_t c_sz;
1296 int c_w;
1297 int c_h;
1298 /*Otherwise, set up pointers to all three PP planes.*/
1299 y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1300 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1301 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1302 c_sz=c_w*(size_t)c_h;
1303 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1304 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1305 _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1306 _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1307 _dec->pp_frame_buf[1].width=c_w;
1308 _dec->pp_frame_buf[1].height=c_h;
1309 _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1310 _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1311 _dec->pp_frame_buf[2].width=c_w;
1312 _dec->pp_frame_buf[2].height=c_h;
1313 _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1314 _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1315 oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1316 }
1317 _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1318 }
1319 /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1320 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1321 memcpy(_dec->pp_frame_buf+1,
1322 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1323 sizeof(_dec->pp_frame_buf[1])*2);
1324 }
1325 return 0;
1326 }
1327
1328
1329 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1330 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1331 oc_dec_pipeline_state *_pipe){
1332 const ptrdiff_t *coded_fragis;
1333 const ptrdiff_t *uncoded_fragis;
1334 int flimit;
1335 int pli;
1336 int qii;
1337 int qti;
1338 int zzi;
1339 /*If chroma is sub-sampled in the vertical direction, we have to decode two
1340 super block rows of Y' for each super block row of Cb and Cr.*/
1341 _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1342 /*Initialize the token and extra bits indices for each plane and
1343 coefficient.*/
1344 memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1345 /*Also copy over the initial the EOB run counts.*/
1346 memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1347 /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1348 coded_fragis=_dec->state.coded_fragis;
1349 uncoded_fragis=coded_fragis+_dec->state.nfrags;
1350 for(pli=0;pli<3;pli++){
1351 ptrdiff_t ncoded_fragis;
1352 _pipe->coded_fragis[pli]=coded_fragis;
1353 _pipe->uncoded_fragis[pli]=uncoded_fragis;
1354 ncoded_fragis=_dec->state.ncoded_fragis[pli];
1355 coded_fragis+=ncoded_fragis;
1356 uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1357 }
1358 /*Set up condensed quantizer tables.*/
1359 for(pli=0;pli<3;pli++){
1360 for(qii=0;qii<_dec->state.nqis;qii++){
1361 for(qti=0;qti<2;qti++){
1362 _pipe->dequant[pli][qii][qti]=
1363 _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1364 }
1365 }
1366 }
1367 /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1368 memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1369 /*Initialize the bounding value array for the loop filter.*/
1370 flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1371 _pipe->loop_filter=flimit!=0;
1372 if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1373 /*Initialize any buffers needed for post-processing.
1374 We also save the current post-processing level, to guard against the user
1375 changing it from a callback.*/
1376 if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1377 /*If we don't have enough information to post-process, disable it, regardless
1378 of the user-requested level.*/
1379 else{
1380 _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1381 memcpy(_dec->pp_frame_buf,
1382 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1383 sizeof(_dec->pp_frame_buf[0])*3);
1384 }
1385 /*Clear down the DCT coefficient buffer for the first block.*/
1386 for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1387 }
1388
1389 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1390 rows).
1391 As a side effect, the number of coded and uncoded fragments in this plane of
1392 the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1393 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1394 oc_dec_pipeline_state *_pipe,int _pli){
1395 const oc_fragment_plane *fplane;
1396 oc_fragment *frags;
1397 int *pred_last;
1398 ptrdiff_t ncoded_fragis;
1399 ptrdiff_t fragi;
1400 int fragx;
1401 int fragy;
1402 int fragy0;
1403 int fragy_end;
1404 int nhfrags;
1405 /*Compute the first and last fragment row of the current MCU for this
1406 plane.*/
1407 fplane=_dec->state.fplanes+_pli;
1408 fragy0=_pipe->fragy0[_pli];
1409 fragy_end=_pipe->fragy_end[_pli];
1410 nhfrags=fplane->nhfrags;
1411 pred_last=_pipe->pred_last[_pli];
1412 frags=_dec->state.frags;
1413 ncoded_fragis=0;
1414 fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1415 for(fragy=fragy0;fragy<fragy_end;fragy++){
1416 if(fragy==0){
1417 /*For the first row, all of the cases reduce to just using the previous
1418 predictor for the same reference frame.*/
1419 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1420 if(frags[fragi].coded){
1421 int refi;
1422 refi=frags[fragi].refi;
1423 pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1424 ncoded_fragis++;
1425 }
1426 }
1427 }
1428 else{
1429 oc_fragment *u_frags;
1430 int l_ref;
1431 int ul_ref;
1432 int u_ref;
1433 u_frags=frags-nhfrags;
1434 l_ref=-1;
1435 ul_ref=-1;
1436 u_ref=u_frags[fragi].refi;
1437 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1438 int ur_ref;
1439 if(fragx+1>=nhfrags)ur_ref=-1;
1440 else ur_ref=u_frags[fragi+1].refi;
1441 if(frags[fragi].coded){
1442 int pred;
1443 int refi;
1444 refi=frags[fragi].refi;
1445 /*We break out a separate case based on which of our neighbors use
1446 the same reference frames.
1447 This is somewhat faster than trying to make a generic case which
1448 handles all of them, since it reduces lots of poorly predicted
1449 jumps to one switch statement, and also lets a number of the
1450 multiplications be optimized out by strength reduction.*/
1451 switch((l_ref==refi)|(ul_ref==refi)<<1|
1452 (u_ref==refi)<<2|(ur_ref==refi)<<3){
1453 default:pred=pred_last[refi];break;
1454 case 1:
1455 case 3:pred=frags[fragi-1].dc;break;
1456 case 2:pred=u_frags[fragi-1].dc;break;
1457 case 4:
1458 case 6:
1459 case 12:pred=u_frags[fragi].dc;break;
1460 case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1461 case 8:pred=u_frags[fragi+1].dc;break;
1462 case 9:
1463 case 11:
1464 case 13:{
1465 /*The TI compiler mis-compiles this line.*/
1466 pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1467 }break;
1468 case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1469 case 14:{
1470 pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1471 +10*u_frags[fragi].dc)/16;
1472 }break;
1473 case 7:
1474 case 15:{
1475 int p0;
1476 int p1;
1477 int p2;
1478 p0=frags[fragi-1].dc;
1479 p1=u_frags[fragi-1].dc;
1480 p2=u_frags[fragi].dc;
1481 pred=(29*(p0+p2)-26*p1)/32;
1482 if(abs(pred-p2)>128)pred=p2;
1483 else if(abs(pred-p0)>128)pred=p0;
1484 else if(abs(pred-p1)>128)pred=p1;
1485 }break;
1486 }
1487 pred_last[refi]=frags[fragi].dc+=pred;
1488 ncoded_fragis++;
1489 l_ref=refi;
1490 }
1491 else l_ref=-1;
1492 ul_ref=u_ref;
1493 u_ref=ur_ref;
1494 }
1495 }
1496 }
1497 _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1498 /*Also save the number of uncoded fragments so we know how many to copy.*/
1499 _pipe->nuncoded_fragis[_pli]=
1500 (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1501 }
1502
1503 /*Reconstructs all coded fragments in a single MCU (one or two super block
1504 rows).
1505 This requires that each coded fragment have a proper macro block mode and
1506 motion vector (if not in INTRA mode), and have its DC value decoded, with
1507 the DC prediction process reversed, and the number of coded and uncoded
1508 fragments in this plane of the MCU be counted.
1509 The token lists for each color plane and coefficient should also be filled
1510 in, along with initial token offsets, extra bits offsets, and EOB run
1511 counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1512 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1513 oc_dec_pipeline_state *_pipe,int _pli){
1514 unsigned char *dct_tokens;
1515 const unsigned char *dct_fzig_zag;
1516 ogg_uint16_t dc_quant[2];
1517 const oc_fragment *frags;
1518 const ptrdiff_t *coded_fragis;
1519 ptrdiff_t ncoded_fragis;
1520 ptrdiff_t fragii;
1521 ptrdiff_t *ti;
1522 ptrdiff_t *eob_runs;
1523 int qti;
1524 dct_tokens=_dec->dct_tokens;
1525 dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1526 frags=_dec->state.frags;
1527 coded_fragis=_pipe->coded_fragis[_pli];
1528 ncoded_fragis=_pipe->ncoded_fragis[_pli];
1529 ti=_pipe->ti[_pli];
1530 eob_runs=_pipe->eob_runs[_pli];
1531 for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1532 for(fragii=0;fragii<ncoded_fragis;fragii++){
1533 const ogg_uint16_t *ac_quant;
1534 ptrdiff_t fragi;
1535 int last_zzi;
1536 int zzi;
1537 fragi=coded_fragis[fragii];
1538 qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1539 ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1540 /*Decode the AC coefficients.*/
1541 for(zzi=0;zzi<64;){
1542 int token;
1543 last_zzi=zzi;
1544 if(eob_runs[zzi]){
1545 eob_runs[zzi]--;
1546 break;
1547 }
1548 else{
1549 ptrdiff_t eob;
1550 int cw;
1551 int rlen;
1552 int coeff;
1553 int lti;
1554 lti=ti[zzi];
1555 token=dct_tokens[lti++];
1556 cw=OC_DCT_CODE_WORD[token];
1557 /*These parts could be done branchless, but the branches are fairly
1558 predictable and the C code translates into more than a few
1559 instructions, so it's worth it to avoid them.*/
1560 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1561 cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1562 }
1563 eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1564 if(token==OC_DCT_TOKEN_FAT_EOB){
1565 eob+=dct_tokens[lti++]<<8;
1566 if(eob==0)eob=OC_DCT_EOB_FINISH;
1567 }
1568 rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1569 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1570 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1571 eob_runs[zzi]=eob;
1572 ti[zzi]=lti;
1573 zzi+=rlen;
1574 _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1575 (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1576 zzi+=!eob;
1577 }
1578 }
1579 /*TODO: zzi should be exactly 64 here.
1580 If it's not, we should report some kind of warning.*/
1581 zzi=OC_MINI(zzi,64);
1582 _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1583 /*last_zzi is always initialized.
1584 If your compiler thinks otherwise, it is dumb.*/
1585 oc_state_frag_recon(&_dec->state,fragi,_pli,
1586 _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1587 }
1588 _pipe->coded_fragis[_pli]+=ncoded_fragis;
1589 /*Right now the reconstructed MCU has only the coded blocks in it.*/
1590 /*TODO: We make the decision here to always copy the uncoded blocks into it
1591 from the reference frame.
1592 We could also copy the coded blocks back over the reference frame, if we
1593 wait for an additional MCU to be decoded, which might be faster if only a
1594 small number of blocks are coded.
1595 However, this introduces more latency, creating a larger cache footprint.
1596 It's unknown which decision is better, but this one results in simpler
1597 code, and the hard case (high bitrate, high resolution) is handled
1598 correctly.*/
1599 /*Copy the uncoded blocks from the previous reference frame.*/
1600 if(_pipe->nuncoded_fragis[_pli]>0){
1601 _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1602 oc_frag_copy_list(&_dec->state,
1603 _dec->state.ref_frame_data[OC_FRAME_SELF],
1604 _dec->state.ref_frame_data[OC_FRAME_PREV],
1605 _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1606 _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1607 }
1608 }
1609
1610 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1611 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1612 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1613 int *_variance0,int *_variance1){
1614 unsigned char *rdst;
1615 const unsigned char *rsrc;
1616 unsigned char *cdst;
1617 const unsigned char *csrc;
1618 int r[10];
1619 int sum0;
1620 int sum1;
1621 int bx;
1622 int by;
1623 rdst=_dst;
1624 rsrc=_src;
1625 for(bx=0;bx<8;bx++){
1626 cdst=rdst;
1627 csrc=rsrc;
1628 for(by=0;by<10;by++){
1629 r[by]=*csrc;
1630 csrc+=_src_ystride;
1631 }
1632 sum0=sum1=0;
1633 for(by=0;by<4;by++){
1634 sum0+=abs(r[by+1]-r[by]);
1635 sum1+=abs(r[by+5]-r[by+6]);
1636 }
1637 *_variance0+=OC_MINI(255,sum0);
1638 *_variance1+=OC_MINI(255,sum1);
1639 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1640 *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1641 cdst+=_dst_ystride;
1642 *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1643 cdst+=_dst_ystride;
1644 for(by=0;by<4;by++){
1645 *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1646 r[by+4]+r[by+5]+r[by+6]+4>>3);
1647 cdst+=_dst_ystride;
1648 }
1649 *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1650 cdst+=_dst_ystride;
1651 *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1652 }
1653 else{
1654 for(by=1;by<=8;by++){
1655 *cdst=(unsigned char)r[by];
1656 cdst+=_dst_ystride;
1657 }
1658 }
1659 rdst++;
1660 rsrc++;
1661 }
1662 }
1663
1664 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1665 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1666 int _qstep,int _flimit,int *_variances){
1667 unsigned char *rdst;
1668 const unsigned char *rsrc;
1669 unsigned char *cdst;
1670 int r[10];
1671 int sum0;
1672 int sum1;
1673 int bx;
1674 int by;
1675 cdst=_dst;
1676 for(by=0;by<8;by++){
1677 rsrc=cdst-1;
1678 rdst=cdst;
1679 for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1680 sum0=sum1=0;
1681 for(bx=0;bx<4;bx++){
1682 sum0+=abs(r[bx+1]-r[bx]);
1683 sum1+=abs(r[bx+5]-r[bx+6]);
1684 }
1685 _variances[0]+=OC_MINI(255,sum0);
1686 _variances[1]+=OC_MINI(255,sum1);
1687 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1688 *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1689 *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1690 for(bx=0;bx<4;bx++){
1691 *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1692 r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1693 }
1694 *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1695 *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1696 }
1697 cdst+=_dst_ystride;
1698 }
1699 }
1700
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1701 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1702 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1703 int _fragy_end){
1704 oc_fragment_plane *fplane;
1705 int *variance;
1706 unsigned char *dc_qi;
1707 unsigned char *dst;
1708 const unsigned char *src;
1709 ptrdiff_t froffset;
1710 int dst_ystride;
1711 int src_ystride;
1712 int nhfrags;
1713 int width;
1714 int notstart;
1715 int notdone;
1716 int flimit;
1717 int qstep;
1718 int y_end;
1719 int y;
1720 int x;
1721 _dst+=_pli;
1722 _src+=_pli;
1723 fplane=_dec->state.fplanes+_pli;
1724 nhfrags=fplane->nhfrags;
1725 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1726 variance=_dec->variances+froffset;
1727 dc_qi=_dec->dc_qis+froffset;
1728 notstart=_fragy0>0;
1729 notdone=_fragy_end<fplane->nvfrags;
1730 /*We want to clear an extra row of variances, except at the end.*/
1731 memset(variance+(nhfrags&-notstart),0,
1732 (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1733 /*Except for the first time, we want to point to the middle of the row.*/
1734 y=(_fragy0<<3)+(notstart<<2);
1735 dst_ystride=_dst->stride;
1736 src_ystride=_src->stride;
1737 dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1738 src=_src->data+y*(ptrdiff_t)src_ystride;
1739 width=_dst->width;
1740 for(;y<4;y++){
1741 memcpy(dst,src,width*sizeof(dst[0]));
1742 dst+=dst_ystride;
1743 src+=src_ystride;
1744 }
1745 /*We also want to skip the last row in the frame for this loop.*/
1746 y_end=_fragy_end-!notdone<<3;
1747 for(;y<y_end;y+=8){
1748 qstep=_dec->pp_dc_scale[*dc_qi];
1749 flimit=(qstep*3)>>2;
1750 oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1751 qstep,flimit,variance,variance+nhfrags);
1752 variance++;
1753 dc_qi++;
1754 for(x=8;x<width;x+=8){
1755 qstep=_dec->pp_dc_scale[*dc_qi];
1756 flimit=(qstep*3)>>2;
1757 oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1758 qstep,flimit,variance,variance+nhfrags);
1759 oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1760 qstep,flimit,variance-1);
1761 variance++;
1762 dc_qi++;
1763 }
1764 dst+=dst_ystride<<3;
1765 src+=src_ystride<<3;
1766 }
1767 /*And finally, handle the last row in the frame, if it's in the range.*/
1768 if(!notdone){
1769 int height;
1770 height=_dst->height;
1771 for(;y<height;y++){
1772 memcpy(dst,src,width*sizeof(dst[0]));
1773 dst+=dst_ystride;
1774 src+=src_ystride;
1775 }
1776 /*Filter the last row of vertical block edges.*/
1777 dc_qi++;
1778 for(x=8;x<width;x+=8){
1779 qstep=_dec->pp_dc_scale[*dc_qi++];
1780 flimit=(qstep*3)>>2;
1781 oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1782 qstep,flimit,variance++);
1783 }
1784 }
1785 }
1786
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1787 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1788 int _dc_scale,int _sharp_mod,int _strong){
1789 static const unsigned char OC_MOD_MAX[2]={24,32};
1790 static const unsigned char OC_MOD_SHIFT[2]={1,0};
1791 const unsigned char *psrc;
1792 const unsigned char *src;
1793 const unsigned char *nsrc;
1794 unsigned char *dst;
1795 int vmod[72];
1796 int hmod[72];
1797 int mod_hi;
1798 int by;
1799 int bx;
1800 mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1801 dst=_idata;
1802 src=dst;
1803 psrc=src-(_ystride&-!(_b&4));
1804 for(by=0;by<9;by++){
1805 for(bx=0;bx<8;bx++){
1806 int mod;
1807 mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1808 vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1809 }
1810 psrc=src;
1811 src+=_ystride&-(!(_b&8)|by<7);
1812 }
1813 nsrc=dst;
1814 psrc=dst-!(_b&1);
1815 for(bx=0;bx<9;bx++){
1816 src=nsrc;
1817 for(by=0;by<8;by++){
1818 int mod;
1819 mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1820 hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1821 psrc+=_ystride;
1822 src+=_ystride;
1823 }
1824 psrc=nsrc;
1825 nsrc+=!(_b&2)|bx<7;
1826 }
1827 src=dst;
1828 psrc=src-(_ystride&-!(_b&4));
1829 nsrc=src+_ystride;
1830 for(by=0;by<8;by++){
1831 int a;
1832 int b;
1833 int w;
1834 a=128;
1835 b=64;
1836 w=hmod[by];
1837 a-=w;
1838 b+=w**(src-!(_b&1));
1839 w=vmod[by<<3];
1840 a-=w;
1841 b+=w*psrc[0];
1842 w=vmod[by+1<<3];
1843 a-=w;
1844 b+=w*nsrc[0];
1845 w=hmod[(1<<3)+by];
1846 a-=w;
1847 b+=w*src[1];
1848 dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1849 for(bx=1;bx<7;bx++){
1850 a=128;
1851 b=64;
1852 w=hmod[(bx<<3)+by];
1853 a-=w;
1854 b+=w*src[bx-1];
1855 w=vmod[(by<<3)+bx];
1856 a-=w;
1857 b+=w*psrc[bx];
1858 w=vmod[(by+1<<3)+bx];
1859 a-=w;
1860 b+=w*nsrc[bx];
1861 w=hmod[(bx+1<<3)+by];
1862 a-=w;
1863 b+=w*src[bx+1];
1864 dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1865 }
1866 a=128;
1867 b=64;
1868 w=hmod[(7<<3)+by];
1869 a-=w;
1870 b+=w*src[6];
1871 w=vmod[(by<<3)+7];
1872 a-=w;
1873 b+=w*psrc[7];
1874 w=vmod[(by+1<<3)+7];
1875 a-=w;
1876 b+=w*nsrc[7];
1877 w=hmod[(8<<3)+by];
1878 a-=w;
1879 b+=w*src[7+!(_b&2)];
1880 dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1881 dst+=_ystride;
1882 psrc=src;
1883 src=nsrc;
1884 nsrc+=_ystride&-(!(_b&8)|by<6);
1885 }
1886 }
1887
1888 #define OC_DERING_THRESH1 (384)
1889 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1890 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1891 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1892
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1893 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1894 int _pli,int _fragy0,int _fragy_end){
1895 th_img_plane *iplane;
1896 oc_fragment_plane *fplane;
1897 oc_fragment *frag;
1898 int *variance;
1899 unsigned char *idata;
1900 ptrdiff_t froffset;
1901 int ystride;
1902 int nhfrags;
1903 int sthresh;
1904 int strong;
1905 int y_end;
1906 int width;
1907 int height;
1908 int y;
1909 int x;
1910 iplane=_img+_pli;
1911 fplane=_dec->state.fplanes+_pli;
1912 nhfrags=fplane->nhfrags;
1913 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1914 variance=_dec->variances+froffset;
1915 frag=_dec->state.frags+froffset;
1916 strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1917 sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1918 y=_fragy0<<3;
1919 ystride=iplane->stride;
1920 idata=iplane->data+y*(ptrdiff_t)ystride;
1921 y_end=_fragy_end<<3;
1922 width=iplane->width;
1923 height=iplane->height;
1924 for(;y<y_end;y+=8){
1925 for(x=0;x<width;x+=8){
1926 int b;
1927 int qi;
1928 int var;
1929 qi=_dec->state.qis[frag->qii];
1930 var=*variance;
1931 b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1932 if(strong&&var>sthresh){
1933 oc_dering_block(idata+x,ystride,b,
1934 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1935 if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1936 !(b&2)&&variance[1]>OC_DERING_THRESH4||
1937 !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1938 !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1939 oc_dering_block(idata+x,ystride,b,
1940 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1941 oc_dering_block(idata+x,ystride,b,
1942 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1943 }
1944 }
1945 else if(var>OC_DERING_THRESH2){
1946 oc_dering_block(idata+x,ystride,b,
1947 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1948 }
1949 else if(var>OC_DERING_THRESH1){
1950 oc_dering_block(idata+x,ystride,b,
1951 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1952 }
1953 frag++;
1954 variance++;
1955 }
1956 idata+=ystride<<3;
1957 }
1958 }
1959
1960
1961
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1962 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1963 oc_dec_ctx *dec;
1964 if(_info==NULL||_setup==NULL)return NULL;
1965 dec=oc_aligned_malloc(sizeof(*dec),16);
1966 if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1967 oc_aligned_free(dec);
1968 return NULL;
1969 }
1970 dec->state.curframe_num=0;
1971 return dec;
1972 }
1973
th_decode_free(th_dec_ctx * _dec)1974 void th_decode_free(th_dec_ctx *_dec){
1975 if(_dec!=NULL){
1976 oc_dec_clear(_dec);
1977 oc_aligned_free(_dec);
1978 }
1979 }
1980
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1981 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1982 size_t _buf_sz){
1983 switch(_req){
1984 case TH_DECCTL_GET_PPLEVEL_MAX:{
1985 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1986 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1987 (*(int *)_buf)=OC_PP_LEVEL_MAX;
1988 return 0;
1989 }break;
1990 case TH_DECCTL_SET_PPLEVEL:{
1991 int pp_level;
1992 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1993 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1994 pp_level=*(int *)_buf;
1995 if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1996 _dec->pp_level=pp_level;
1997 return 0;
1998 }break;
1999 case TH_DECCTL_SET_GRANPOS:{
2000 ogg_int64_t granpos;
2001 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2002 if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2003 granpos=*(ogg_int64_t *)_buf;
2004 if(granpos<0)return TH_EINVAL;
2005 _dec->state.granpos=granpos;
2006 _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2007 -_dec->state.granpos_bias;
2008 _dec->state.curframe_num=_dec->state.keyframe_num
2009 +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2010 return 0;
2011 }break;
2012 case TH_DECCTL_SET_STRIPE_CB:{
2013 th_stripe_callback *cb;
2014 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2015 if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2016 cb=(th_stripe_callback *)_buf;
2017 _dec->stripe_cb.ctx=cb->ctx;
2018 _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2019 return 0;
2020 }break;
2021 #ifdef HAVE_CAIRO
2022 case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2023 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2024 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2025 _dec->telemetry=1;
2026 _dec->telemetry_mbmode=*(int *)_buf;
2027 return 0;
2028 }break;
2029 case TH_DECCTL_SET_TELEMETRY_MV:{
2030 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2031 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2032 _dec->telemetry=1;
2033 _dec->telemetry_mv=*(int *)_buf;
2034 return 0;
2035 }break;
2036 case TH_DECCTL_SET_TELEMETRY_QI:{
2037 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2038 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2039 _dec->telemetry=1;
2040 _dec->telemetry_qi=*(int *)_buf;
2041 return 0;
2042 }break;
2043 case TH_DECCTL_SET_TELEMETRY_BITS:{
2044 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2045 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2046 _dec->telemetry=1;
2047 _dec->telemetry_bits=*(int *)_buf;
2048 return 0;
2049 }break;
2050 #endif
2051 default:return TH_EIMPL;
2052 }
2053 }
2054
2055 /*We're decoding an INTER frame, but have no initialized reference
2056 buffers (i.e., decoding did not start on a key frame).
2057 We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2058 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2059 th_info *info;
2060 size_t yplane_sz;
2061 size_t cplane_sz;
2062 ptrdiff_t yoffset;
2063 int yhstride;
2064 int yheight;
2065 int chstride;
2066 int cheight;
2067 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2068 _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2069 _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2070 _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2071 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2072 _dec->state.ref_frame_data[OC_FRAME_SELF]=
2073 _dec->state.ref_frame_bufs[0][0].data;
2074 memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2075 sizeof(_dec->pp_frame_buf[0])*3);
2076 info=&_dec->state.info;
2077 yhstride=abs(_dec->state.ref_ystride[0]);
2078 yheight=info->frame_height+2*OC_UMV_PADDING;
2079 chstride=abs(_dec->state.ref_ystride[1]);
2080 cheight=yheight>>!(info->pixel_fmt&2);
2081 yplane_sz=yhstride*(size_t)yheight+16;
2082 cplane_sz=chstride*(size_t)cheight;
2083 yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2084 memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2085 }
2086
2087 #if defined(HAVE_CAIRO)
oc_render_telemetry(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr,int _telemetry)2088 static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2089 int _telemetry){
2090 /*Stuff the plane into cairo.*/
2091 cairo_surface_t *cs;
2092 unsigned char *data;
2093 unsigned char *y_row;
2094 unsigned char *u_row;
2095 unsigned char *v_row;
2096 unsigned char *rgb_row;
2097 int cstride;
2098 int w;
2099 int h;
2100 int x;
2101 int y;
2102 int hdec;
2103 int vdec;
2104 w=_ycbcr[0].width;
2105 h=_ycbcr[0].height;
2106 hdec=!(_dec->state.info.pixel_fmt&1);
2107 vdec=!(_dec->state.info.pixel_fmt&2);
2108 /*Lazy data buffer init.
2109 We could try to re-use the post-processing buffer, which would save
2110 memory, but complicate the allocation logic there.
2111 I don't think anyone cares about memory usage when using telemetry; it is
2112 not meant for embedded devices.*/
2113 if(_dec->telemetry_frame_data==NULL){
2114 _dec->telemetry_frame_data=_ogg_malloc(
2115 (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2116 if(_dec->telemetry_frame_data==NULL)return;
2117 }
2118 cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2119 /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2120 data=cairo_image_surface_get_data(cs);
2121 if(data==NULL){
2122 cairo_surface_destroy(cs);
2123 return;
2124 }
2125 cstride=cairo_image_surface_get_stride(cs);
2126 y_row=_ycbcr[0].data;
2127 u_row=_ycbcr[1].data;
2128 v_row=_ycbcr[2].data;
2129 rgb_row=data;
2130 for(y=0;y<h;y++){
2131 for(x=0;x<w;x++){
2132 int r;
2133 int g;
2134 int b;
2135 r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2136 g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2137 -2672387*v_row[x>>hdec]+447306710)/3287200;
2138 b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2139 rgb_row[4*x+0]=OC_CLAMP255(b);
2140 rgb_row[4*x+1]=OC_CLAMP255(g);
2141 rgb_row[4*x+2]=OC_CLAMP255(r);
2142 }
2143 y_row+=_ycbcr[0].stride;
2144 u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2145 v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2146 rgb_row+=cstride;
2147 }
2148 /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2149 {
2150 cairo_t *c;
2151 const oc_fragment *frags;
2152 oc_mv *frag_mvs;
2153 const signed char *mb_modes;
2154 oc_mb_map *mb_maps;
2155 size_t nmbs;
2156 size_t mbi;
2157 int row2;
2158 int col2;
2159 int qim[3]={0,0,0};
2160 if(_dec->state.nqis==2){
2161 int bqi;
2162 bqi=_dec->state.qis[0];
2163 if(_dec->state.qis[1]>bqi)qim[1]=1;
2164 if(_dec->state.qis[1]<bqi)qim[1]=-1;
2165 }
2166 if(_dec->state.nqis==3){
2167 int bqi;
2168 int cqi;
2169 int dqi;
2170 bqi=_dec->state.qis[0];
2171 cqi=_dec->state.qis[1];
2172 dqi=_dec->state.qis[2];
2173 if(cqi>bqi&&dqi>bqi){
2174 if(dqi>cqi){
2175 qim[1]=1;
2176 qim[2]=2;
2177 }
2178 else{
2179 qim[1]=2;
2180 qim[2]=1;
2181 }
2182 }
2183 else if(cqi<bqi&&dqi<bqi){
2184 if(dqi<cqi){
2185 qim[1]=-1;
2186 qim[2]=-2;
2187 }
2188 else{
2189 qim[1]=-2;
2190 qim[2]=-1;
2191 }
2192 }
2193 else{
2194 if(cqi<bqi)qim[1]=-1;
2195 else qim[1]=1;
2196 if(dqi<bqi)qim[2]=-1;
2197 else qim[2]=1;
2198 }
2199 }
2200 c=cairo_create(cs);
2201 frags=_dec->state.frags;
2202 frag_mvs=_dec->state.frag_mvs;
2203 mb_modes=_dec->state.mb_modes;
2204 mb_maps=_dec->state.mb_maps;
2205 nmbs=_dec->state.nmbs;
2206 row2=0;
2207 col2=0;
2208 for(mbi=0;mbi<nmbs;mbi++){
2209 float x;
2210 float y;
2211 int bi;
2212 y=h-(row2+((col2+1>>1)&1))*16-16;
2213 x=(col2>>1)*16;
2214 cairo_set_line_width(c,1.);
2215 /*Keyframe (all intra) red box.*/
2216 if(_dec->state.frame_type==OC_INTRA_FRAME){
2217 if(_dec->telemetry_mbmode&0x02){
2218 cairo_set_source_rgba(c,1.,0,0,.5);
2219 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2220 cairo_stroke_preserve(c);
2221 cairo_set_source_rgba(c,1.,0,0,.25);
2222 cairo_fill(c);
2223 }
2224 }
2225 else{
2226 ptrdiff_t fragi;
2227 int frag_mvx;
2228 int frag_mvy;
2229 for(bi=0;bi<4;bi++){
2230 fragi=mb_maps[mbi][0][bi];
2231 if(fragi>=0&&frags[fragi].coded){
2232 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2233 frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2234 break;
2235 }
2236 }
2237 if(bi<4){
2238 switch(mb_modes[mbi]){
2239 case OC_MODE_INTRA:{
2240 if(_dec->telemetry_mbmode&0x02){
2241 cairo_set_source_rgba(c,1.,0,0,.5);
2242 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2243 cairo_stroke_preserve(c);
2244 cairo_set_source_rgba(c,1.,0,0,.25);
2245 cairo_fill(c);
2246 }
2247 }break;
2248 case OC_MODE_INTER_NOMV:{
2249 if(_dec->telemetry_mbmode&0x01){
2250 cairo_set_source_rgba(c,0,0,1.,.5);
2251 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2252 cairo_stroke_preserve(c);
2253 cairo_set_source_rgba(c,0,0,1.,.25);
2254 cairo_fill(c);
2255 }
2256 }break;
2257 case OC_MODE_INTER_MV:{
2258 if(_dec->telemetry_mbmode&0x04){
2259 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2260 cairo_set_source_rgba(c,0,1.,0,.5);
2261 cairo_stroke(c);
2262 }
2263 if(_dec->telemetry_mv&0x04){
2264 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2265 cairo_set_source_rgba(c,1.,1.,1.,.9);
2266 cairo_set_line_width(c,3.);
2267 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2268 cairo_stroke_preserve(c);
2269 cairo_set_line_width(c,2.);
2270 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2271 cairo_stroke_preserve(c);
2272 cairo_set_line_width(c,1.);
2273 cairo_line_to(c,x+8,y+8);
2274 cairo_stroke(c);
2275 }
2276 }break;
2277 case OC_MODE_INTER_MV_LAST:{
2278 if(_dec->telemetry_mbmode&0x08){
2279 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2280 cairo_set_source_rgba(c,0,1.,0,.5);
2281 cairo_move_to(c,x+13.5,y+2.5);
2282 cairo_line_to(c,x+2.5,y+8);
2283 cairo_line_to(c,x+13.5,y+13.5);
2284 cairo_stroke(c);
2285 }
2286 if(_dec->telemetry_mv&0x08){
2287 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2288 cairo_set_source_rgba(c,1.,1.,1.,.9);
2289 cairo_set_line_width(c,3.);
2290 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2291 cairo_stroke_preserve(c);
2292 cairo_set_line_width(c,2.);
2293 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2294 cairo_stroke_preserve(c);
2295 cairo_set_line_width(c,1.);
2296 cairo_line_to(c,x+8,y+8);
2297 cairo_stroke(c);
2298 }
2299 }break;
2300 case OC_MODE_INTER_MV_LAST2:{
2301 if(_dec->telemetry_mbmode&0x10){
2302 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2303 cairo_set_source_rgba(c,0,1.,0,.5);
2304 cairo_move_to(c,x+8,y+2.5);
2305 cairo_line_to(c,x+2.5,y+8);
2306 cairo_line_to(c,x+8,y+13.5);
2307 cairo_move_to(c,x+13.5,y+2.5);
2308 cairo_line_to(c,x+8,y+8);
2309 cairo_line_to(c,x+13.5,y+13.5);
2310 cairo_stroke(c);
2311 }
2312 if(_dec->telemetry_mv&0x10){
2313 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2314 cairo_set_source_rgba(c,1.,1.,1.,.9);
2315 cairo_set_line_width(c,3.);
2316 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2317 cairo_stroke_preserve(c);
2318 cairo_set_line_width(c,2.);
2319 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2320 cairo_stroke_preserve(c);
2321 cairo_set_line_width(c,1.);
2322 cairo_line_to(c,x+8,y+8);
2323 cairo_stroke(c);
2324 }
2325 }break;
2326 case OC_MODE_GOLDEN_NOMV:{
2327 if(_dec->telemetry_mbmode&0x20){
2328 cairo_set_source_rgba(c,1.,1.,0,.5);
2329 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2330 cairo_stroke_preserve(c);
2331 cairo_set_source_rgba(c,1.,1.,0,.25);
2332 cairo_fill(c);
2333 }
2334 }break;
2335 case OC_MODE_GOLDEN_MV:{
2336 if(_dec->telemetry_mbmode&0x40){
2337 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2338 cairo_set_source_rgba(c,1.,1.,0,.5);
2339 cairo_stroke(c);
2340 }
2341 if(_dec->telemetry_mv&0x40){
2342 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2343 cairo_set_source_rgba(c,1.,1.,1.,.9);
2344 cairo_set_line_width(c,3.);
2345 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2346 cairo_stroke_preserve(c);
2347 cairo_set_line_width(c,2.);
2348 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2349 cairo_stroke_preserve(c);
2350 cairo_set_line_width(c,1.);
2351 cairo_line_to(c,x+8,y+8);
2352 cairo_stroke(c);
2353 }
2354 }break;
2355 case OC_MODE_INTER_MV_FOUR:{
2356 if(_dec->telemetry_mbmode&0x80){
2357 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2358 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2359 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2360 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2361 cairo_set_source_rgba(c,0,1.,0,.5);
2362 cairo_stroke(c);
2363 }
2364 /*4mv is odd, coded in raster order.*/
2365 fragi=mb_maps[mbi][0][0];
2366 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2367 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2368 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2369 cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2370 cairo_set_source_rgba(c,1.,1.,1.,.9);
2371 cairo_set_line_width(c,3.);
2372 cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2373 cairo_stroke_preserve(c);
2374 cairo_set_line_width(c,2.);
2375 cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2376 cairo_stroke_preserve(c);
2377 cairo_set_line_width(c,1.);
2378 cairo_line_to(c,x+4,y+12);
2379 cairo_stroke(c);
2380 }
2381 fragi=mb_maps[mbi][0][1];
2382 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2383 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2384 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2385 cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2386 cairo_set_source_rgba(c,1.,1.,1.,.9);
2387 cairo_set_line_width(c,3.);
2388 cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2389 cairo_stroke_preserve(c);
2390 cairo_set_line_width(c,2.);
2391 cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2392 cairo_stroke_preserve(c);
2393 cairo_set_line_width(c,1.);
2394 cairo_line_to(c,x+12,y+12);
2395 cairo_stroke(c);
2396 }
2397 fragi=mb_maps[mbi][0][2];
2398 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2399 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2400 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2401 cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2402 cairo_set_source_rgba(c,1.,1.,1.,.9);
2403 cairo_set_line_width(c,3.);
2404 cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2405 cairo_stroke_preserve(c);
2406 cairo_set_line_width(c,2.);
2407 cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2408 cairo_stroke_preserve(c);
2409 cairo_set_line_width(c,1.);
2410 cairo_line_to(c,x+4,y+4);
2411 cairo_stroke(c);
2412 }
2413 fragi=mb_maps[mbi][0][3];
2414 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2415 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2416 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2417 cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2418 cairo_set_source_rgba(c,1.,1.,1.,.9);
2419 cairo_set_line_width(c,3.);
2420 cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2421 cairo_stroke_preserve(c);
2422 cairo_set_line_width(c,2.);
2423 cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2424 cairo_stroke_preserve(c);
2425 cairo_set_line_width(c,1.);
2426 cairo_line_to(c,x+12,y+4);
2427 cairo_stroke(c);
2428 }
2429 }break;
2430 }
2431 }
2432 }
2433 /*qii illustration.*/
2434 if(_dec->telemetry_qi&0x2){
2435 cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2436 for(bi=0;bi<4;bi++){
2437 ptrdiff_t fragi;
2438 int qiv;
2439 int xp;
2440 int yp;
2441 xp=x+(bi&1)*8;
2442 yp=y+8-(bi&2)*4;
2443 fragi=mb_maps[mbi][0][bi];
2444 if(fragi>=0&&frags[fragi].coded){
2445 qiv=qim[frags[fragi].qii];
2446 cairo_set_line_width(c,3.);
2447 cairo_set_source_rgba(c,0.,0.,0.,.5);
2448 switch(qiv){
2449 /*Double plus:*/
2450 case 2:{
2451 if((bi&1)^((bi&2)>>1)){
2452 cairo_move_to(c,xp+2.5,yp+1.5);
2453 cairo_line_to(c,xp+2.5,yp+3.5);
2454 cairo_move_to(c,xp+1.5,yp+2.5);
2455 cairo_line_to(c,xp+3.5,yp+2.5);
2456 cairo_move_to(c,xp+5.5,yp+4.5);
2457 cairo_line_to(c,xp+5.5,yp+6.5);
2458 cairo_move_to(c,xp+4.5,yp+5.5);
2459 cairo_line_to(c,xp+6.5,yp+5.5);
2460 cairo_stroke_preserve(c);
2461 cairo_set_source_rgba(c,0.,1.,1.,1.);
2462 }
2463 else{
2464 cairo_move_to(c,xp+5.5,yp+1.5);
2465 cairo_line_to(c,xp+5.5,yp+3.5);
2466 cairo_move_to(c,xp+4.5,yp+2.5);
2467 cairo_line_to(c,xp+6.5,yp+2.5);
2468 cairo_move_to(c,xp+2.5,yp+4.5);
2469 cairo_line_to(c,xp+2.5,yp+6.5);
2470 cairo_move_to(c,xp+1.5,yp+5.5);
2471 cairo_line_to(c,xp+3.5,yp+5.5);
2472 cairo_stroke_preserve(c);
2473 cairo_set_source_rgba(c,0.,1.,1.,1.);
2474 }
2475 }break;
2476 /*Double minus:*/
2477 case -2:{
2478 cairo_move_to(c,xp+2.5,yp+2.5);
2479 cairo_line_to(c,xp+5.5,yp+2.5);
2480 cairo_move_to(c,xp+2.5,yp+5.5);
2481 cairo_line_to(c,xp+5.5,yp+5.5);
2482 cairo_stroke_preserve(c);
2483 cairo_set_source_rgba(c,1.,1.,1.,1.);
2484 }break;
2485 /*Plus:*/
2486 case 1:{
2487 if((bi&2)==0)yp-=2;
2488 if((bi&1)==0)xp-=2;
2489 cairo_move_to(c,xp+4.5,yp+2.5);
2490 cairo_line_to(c,xp+4.5,yp+6.5);
2491 cairo_move_to(c,xp+2.5,yp+4.5);
2492 cairo_line_to(c,xp+6.5,yp+4.5);
2493 cairo_stroke_preserve(c);
2494 cairo_set_source_rgba(c,.1,1.,.3,1.);
2495 break;
2496 }
2497 /*Fall through.*/
2498 /*Minus:*/
2499 case -1:{
2500 cairo_move_to(c,xp+2.5,yp+4.5);
2501 cairo_line_to(c,xp+6.5,yp+4.5);
2502 cairo_stroke_preserve(c);
2503 cairo_set_source_rgba(c,1.,.3,.1,1.);
2504 }break;
2505 default:continue;
2506 }
2507 cairo_set_line_width(c,1.);
2508 cairo_stroke(c);
2509 }
2510 }
2511 }
2512 col2++;
2513 if((col2>>1)>=_dec->state.nhmbs){
2514 col2=0;
2515 row2+=2;
2516 }
2517 }
2518 /*Bit usage indicator[s]:*/
2519 if(_dec->telemetry_bits){
2520 int widths[6];
2521 int fpsn;
2522 int fpsd;
2523 int mult;
2524 int fullw;
2525 int padw;
2526 int i;
2527 fpsn=_dec->state.info.fps_numerator;
2528 fpsd=_dec->state.info.fps_denominator;
2529 mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2530 fullw=250.f*h*fpsd*mult/fpsn;
2531 padw=w-24;
2532 /*Header and coded block bits.*/
2533 if(_dec->telemetry_frame_bytes<0||
2534 _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2535 _dec->telemetry_frame_bytes=0;
2536 }
2537 if(_dec->telemetry_coding_bytes<0||
2538 _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2539 _dec->telemetry_coding_bytes=0;
2540 }
2541 if(_dec->telemetry_mode_bytes<0||
2542 _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2543 _dec->telemetry_mode_bytes=0;
2544 }
2545 if(_dec->telemetry_mv_bytes<0||
2546 _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2547 _dec->telemetry_mv_bytes=0;
2548 }
2549 if(_dec->telemetry_qi_bytes<0||
2550 _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2551 _dec->telemetry_qi_bytes=0;
2552 }
2553 if(_dec->telemetry_dc_bytes<0||
2554 _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2555 _dec->telemetry_dc_bytes=0;
2556 }
2557 widths[0]=padw*
2558 (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2559 widths[1]=padw*
2560 (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2561 widths[2]=padw*
2562 (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2563 widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2564 widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2565 widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2566 for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2567 cairo_set_source_rgba(c,.0,.0,.0,.6);
2568 cairo_rectangle(c,10,h-33,widths[0]+1,5);
2569 cairo_rectangle(c,10,h-29,widths[1]+1,5);
2570 cairo_rectangle(c,10,h-25,widths[2]+1,5);
2571 cairo_rectangle(c,10,h-21,widths[3]+1,5);
2572 cairo_rectangle(c,10,h-17,widths[4]+1,5);
2573 cairo_rectangle(c,10,h-13,widths[5]+1,5);
2574 cairo_fill(c);
2575 cairo_set_source_rgb(c,1,0,0);
2576 cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2577 cairo_fill(c);
2578 cairo_set_source_rgb(c,0,1,0);
2579 cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2580 cairo_fill(c);
2581 cairo_set_source_rgb(c,0,0,1);
2582 cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2583 cairo_fill(c);
2584 cairo_set_source_rgb(c,.6,.4,.0);
2585 cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2586 cairo_fill(c);
2587 cairo_set_source_rgb(c,.3,.3,.3);
2588 cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2589 cairo_fill(c);
2590 cairo_set_source_rgb(c,.5,.5,.8);
2591 cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2592 cairo_fill(c);
2593 }
2594 /*Master qi indicator[s]:*/
2595 if(_dec->telemetry_qi&0x1){
2596 cairo_text_extents_t extents;
2597 char buffer[10];
2598 int p;
2599 int y;
2600 p=0;
2601 y=h-7.5;
2602 if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2603 buffer[p++]=48+_dec->state.qis[0]%10;
2604 if(_dec->state.nqis>=2){
2605 buffer[p++]=' ';
2606 if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2607 buffer[p++]=48+_dec->state.qis[1]%10;
2608 }
2609 if(_dec->state.nqis==3){
2610 buffer[p++]=' ';
2611 if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2612 buffer[p++]=48+_dec->state.qis[2]%10;
2613 }
2614 buffer[p++]='\0';
2615 cairo_select_font_face(c,"sans",
2616 CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2617 cairo_set_font_size(c,18);
2618 cairo_text_extents(c,buffer,&extents);
2619 cairo_set_source_rgb(c,1,1,1);
2620 cairo_move_to(c,w-extents.x_advance-10,y);
2621 cairo_show_text(c,buffer);
2622 cairo_set_source_rgb(c,0,0,0);
2623 cairo_move_to(c,w-extents.x_advance-10,y);
2624 cairo_text_path(c,buffer);
2625 cairo_set_line_width(c,.8);
2626 cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2627 cairo_stroke(c);
2628 }
2629 cairo_destroy(c);
2630 }
2631 /*Out of the Cairo plane into the telemetry YUV buffer.*/
2632 _ycbcr[0].data=_dec->telemetry_frame_data;
2633 _ycbcr[0].stride=_ycbcr[0].width;
2634 _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2635 _ycbcr[1].stride=_ycbcr[1].width;
2636 _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2637 _ycbcr[2].stride=_ycbcr[2].width;
2638 y_row=_ycbcr[0].data;
2639 u_row=_ycbcr[1].data;
2640 v_row=_ycbcr[2].data;
2641 rgb_row=data;
2642 /*This is one of the few places it's worth handling chroma on a
2643 case-by-case basis.*/
2644 switch(_dec->state.info.pixel_fmt){
2645 case TH_PF_420:{
2646 for(y=0;y<h;y+=2){
2647 unsigned char *y_row2;
2648 unsigned char *rgb_row2;
2649 y_row2=y_row+_ycbcr[0].stride;
2650 rgb_row2=rgb_row+cstride;
2651 for(x=0;x<w;x+=2){
2652 int y;
2653 int u;
2654 int v;
2655 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2656 +24966*rgb_row[4*x+0]+4207500)/255000;
2657 y_row[x]=OC_CLAMP255(y);
2658 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2659 +24966*rgb_row[4*x+4]+4207500)/255000;
2660 y_row[x+1]=OC_CLAMP255(y);
2661 y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2662 +24966*rgb_row2[4*x+0]+4207500)/255000;
2663 y_row2[x]=OC_CLAMP255(y);
2664 y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2665 +24966*rgb_row2[4*x+4]+4207500)/255000;
2666 y_row2[x+1]=OC_CLAMP255(y);
2667 u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2668 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2669 -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2670 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2671 +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2672 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2673 v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2674 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2675 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2676 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2677 -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2678 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2679 u_row[x>>1]=OC_CLAMP255(u);
2680 v_row[x>>1]=OC_CLAMP255(v);
2681 }
2682 y_row+=_ycbcr[0].stride<<1;
2683 u_row+=_ycbcr[1].stride;
2684 v_row+=_ycbcr[2].stride;
2685 rgb_row+=cstride<<1;
2686 }
2687 }break;
2688 case TH_PF_422:{
2689 for(y=0;y<h;y++){
2690 for(x=0;x<w;x+=2){
2691 int y;
2692 int u;
2693 int v;
2694 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2695 +24966*rgb_row[4*x+0]+4207500)/255000;
2696 y_row[x]=OC_CLAMP255(y);
2697 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2698 +24966*rgb_row[4*x+4]+4207500)/255000;
2699 y_row[x+1]=OC_CLAMP255(y);
2700 u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2701 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2702 +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2703 v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2704 -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2705 -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2706 u_row[x>>1]=OC_CLAMP255(u);
2707 v_row[x>>1]=OC_CLAMP255(v);
2708 }
2709 y_row+=_ycbcr[0].stride;
2710 u_row+=_ycbcr[1].stride;
2711 v_row+=_ycbcr[2].stride;
2712 rgb_row+=cstride;
2713 }
2714 }break;
2715 /*case TH_PF_444:*/
2716 default:{
2717 for(y=0;y<h;y++){
2718 for(x=0;x<w;x++){
2719 int y;
2720 int u;
2721 int v;
2722 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2723 +24966*rgb_row[4*x+0]+4207500)/255000;
2724 u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2725 +99232*rgb_row[4*x+0]+29032005)/225930;
2726 v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2727 -25536*rgb_row[4*x+0]+45940035)/357510;
2728 y_row[x]=OC_CLAMP255(y);
2729 u_row[x]=OC_CLAMP255(u);
2730 v_row[x]=OC_CLAMP255(v);
2731 }
2732 y_row+=_ycbcr[0].stride;
2733 u_row+=_ycbcr[1].stride;
2734 v_row+=_ycbcr[2].stride;
2735 rgb_row+=cstride;
2736 }
2737 }break;
2738 }
2739 /*Finished.
2740 Destroy the surface.*/
2741 cairo_surface_destroy(cs);
2742 }
2743 #endif
2744
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2745 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2746 ogg_int64_t *_granpos){
2747 int ret;
2748 if(_dec==NULL||_op==NULL)return TH_EFAULT;
2749 /*A completely empty packet indicates a dropped frame and is treated exactly
2750 like an inter frame with no coded blocks.*/
2751 if(_op->bytes==0){
2752 _dec->state.frame_type=OC_INTER_FRAME;
2753 _dec->state.ntotal_coded_fragis=0;
2754 }
2755 else{
2756 oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2757 ret=oc_dec_frame_header_unpack(_dec);
2758 if(ret<0)return ret;
2759 if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2760 else oc_dec_coded_flags_unpack(_dec);
2761 }
2762 /*If there have been no reference frames, and we need one, initialize one.*/
2763 if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2764 (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2765 _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2766 oc_dec_init_dummy_frame(_dec);
2767 }
2768 /*If this was an inter frame with no coded blocks...*/
2769 if(_dec->state.ntotal_coded_fragis<=0){
2770 /*Just update the granule position and return.*/
2771 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2772 _dec->state.info.keyframe_granule_shift)
2773 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2774 _dec->state.curframe_num++;
2775 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2776 return TH_DUPFRAME;
2777 }
2778 else{
2779 th_ycbcr_buffer stripe_buf;
2780 int stripe_fragy;
2781 int refi;
2782 int pli;
2783 int notstart;
2784 int notdone;
2785 #ifdef HAVE_CAIRO
2786 int telemetry;
2787 /*Save the current telemetry state.
2788 This prevents it from being modified in the middle of decoding this
2789 frame, which could cause us to skip calls to the striped decoding
2790 callback.*/
2791 telemetry=_dec->telemetry;
2792 #endif
2793 /*Select a free buffer to use for the reconstructed version of this frame.*/
2794 for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2795 refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2796 _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2797 _dec->state.ref_frame_data[OC_FRAME_SELF]=
2798 _dec->state.ref_frame_bufs[refi][0].data;
2799 #if defined(HAVE_CAIRO)
2800 _dec->telemetry_frame_bytes=_op->bytes;
2801 #endif
2802 if(_dec->state.frame_type==OC_INTRA_FRAME){
2803 _dec->state.keyframe_num=_dec->state.curframe_num;
2804 #if defined(HAVE_CAIRO)
2805 _dec->telemetry_coding_bytes=
2806 _dec->telemetry_mode_bytes=
2807 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2808 #endif
2809 }
2810 else{
2811 #if defined(HAVE_CAIRO)
2812 _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2813 #endif
2814 oc_dec_mb_modes_unpack(_dec);
2815 #if defined(HAVE_CAIRO)
2816 _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2817 #endif
2818 oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2819 #if defined(HAVE_CAIRO)
2820 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2821 #endif
2822 }
2823 oc_dec_block_qis_unpack(_dec);
2824 #if defined(HAVE_CAIRO)
2825 _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2826 #endif
2827 oc_dec_residual_tokens_unpack(_dec);
2828 /*Update granule position.
2829 This must be done before the striped decode callbacks so that the
2830 application knows what to do with the frame data.*/
2831 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2832 _dec->state.info.keyframe_granule_shift)
2833 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2834 _dec->state.curframe_num++;
2835 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2836 /*All of the rest of the operations -- DC prediction reversal,
2837 reconstructing coded fragments, copying uncoded fragments, loop
2838 filtering, extending borders, and out-of-loop post-processing -- should
2839 be pipelined.
2840 I.e., DC prediction reversal, reconstruction, and uncoded fragment
2841 copying are done for one or two super block rows, then loop filtering is
2842 run as far as it can, then bordering copying, then post-processing.
2843 For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2844 block rows, and one chroma.
2845 Otherwise, an MCU consists of one super block row from each plane.
2846 Inside each MCU, we perform all of the steps on one color plane before
2847 moving on to the next.
2848 After reconstruction, the additional filtering stages introduce a delay
2849 since they need some pixels from the next fragment row.
2850 Thus the actual number of decoded rows available is slightly smaller for
2851 the first MCU, and slightly larger for the last.
2852
2853 This entire process allows us to operate on the data while it is still in
2854 cache, resulting in big performance improvements.
2855 An application callback allows further application processing (blitting
2856 to video memory, color conversion, etc.) to also use the data while it's
2857 in cache.*/
2858 oc_dec_pipeline_init(_dec,&_dec->pipe);
2859 oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2860 notstart=0;
2861 notdone=1;
2862 for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2863 int avail_fragy0;
2864 int avail_fragy_end;
2865 avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2866 notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2867 for(pli=0;pli<3;pli++){
2868 oc_fragment_plane *fplane;
2869 int frag_shift;
2870 int pp_offset;
2871 int sdelay;
2872 int edelay;
2873 fplane=_dec->state.fplanes+pli;
2874 /*Compute the first and last fragment row of the current MCU for this
2875 plane.*/
2876 frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2877 _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2878 _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2879 _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2880 oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2881 oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2882 sdelay=edelay=0;
2883 if(_dec->pipe.loop_filter){
2884 sdelay+=notstart;
2885 edelay+=notdone;
2886 oc_state_loop_filter_frag_rows(&_dec->state,
2887 _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2888 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2889 }
2890 /*To fill the borders, we have an additional two pixel delay, since a
2891 fragment in the next row could filter its top edge, using two pixels
2892 from a fragment in this row.
2893 But there's no reason to delay a full fragment between the two.*/
2894 oc_state_borders_fill_rows(&_dec->state,refi,pli,
2895 (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2896 (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2897 /*Out-of-loop post-processing.*/
2898 pp_offset=3*(pli!=0);
2899 if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2900 /*Perform de-blocking in one plane.*/
2901 sdelay+=notstart;
2902 edelay+=notdone;
2903 oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2904 _dec->state.ref_frame_bufs[refi],pli,
2905 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2906 if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2907 /*Perform de-ringing in one plane.*/
2908 sdelay+=notstart;
2909 edelay+=notdone;
2910 oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2911 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2912 }
2913 }
2914 /*If no post-processing is done, we still need to delay a row for the
2915 loop filter, thanks to the strange filtering order VP3 chose.*/
2916 else if(_dec->pipe.loop_filter){
2917 sdelay+=notstart;
2918 edelay+=notdone;
2919 }
2920 /*Compute the intersection of the available rows in all planes.
2921 If chroma is sub-sampled, the effect of each of its delays is
2922 doubled, but luma might have more post-processing filters enabled
2923 than chroma, so we don't know up front which one is the limiting
2924 factor.*/
2925 avail_fragy0=OC_MINI(avail_fragy0,
2926 _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2927 avail_fragy_end=OC_MINI(avail_fragy_end,
2928 _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2929 }
2930 #ifdef HAVE_CAIRO
2931 if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2932 #else
2933 if(_dec->stripe_cb.stripe_decoded!=NULL){
2934 #endif
2935 /*The callback might want to use the FPU, so let's make sure they can.
2936 We violate all kinds of ABI restrictions by not doing this until
2937 now, but none of them actually matter since we don't use floating
2938 point ourselves.*/
2939 oc_restore_fpu(&_dec->state);
2940 /*Make the callback, ensuring we flip the sense of the "start" and
2941 "end" of the available region upside down.*/
2942 (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2943 _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2944 _dec->state.fplanes[0].nvfrags-avail_fragy0);
2945 }
2946 notstart=1;
2947 }
2948 /*Finish filling in the reference frame borders.*/
2949 for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2950 /*Update the reference frame indices.*/
2951 if(_dec->state.frame_type==OC_INTRA_FRAME){
2952 /*The new frame becomes both the previous and gold reference frames.*/
2953 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2954 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2955 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2956 _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2957 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2958 _dec->state.ref_frame_data[OC_FRAME_SELF];
2959 }
2960 else{
2961 /*Otherwise, just replace the previous reference frame.*/
2962 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2963 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2964 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2965 _dec->state.ref_frame_data[OC_FRAME_SELF];
2966 }
2967 /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2968 gamma values, if nothing else).*/
2969 oc_restore_fpu(&_dec->state);
2970 #ifdef HAVE_CAIRO
2971 /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2972 if(telemetry){
2973 oc_render_telemetry(_dec,stripe_buf,telemetry);
2974 oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2975 /*If we had a striped decoding callback, we skipped calling it above
2976 (because the telemetry wasn't rendered yet).
2977 Call it now with the whole frame.*/
2978 if(_dec->stripe_cb.stripe_decoded!=NULL){
2979 (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2980 stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2981 }
2982 }
2983 #endif
2984 #if defined(OC_DUMP_IMAGES)
2985 /*We only dump images if there were some coded blocks.*/
2986 oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2987 #endif
2988 return 0;
2989 }
2990 }
2991
2992 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2993 if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2994 oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2995 return 0;
2996 }
2997