1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id: decode.c 17576 2010-10-29 01:07:51Z tterribe $
15
16 ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29
30
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX (7)
49
50
51
52 /*The mode alphabets for the various mode coding schemes.
53 Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55 /*Last MV dominates */
56 {
57 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59 OC_MODE_INTER_MV_FOUR
60 },
61 {
62 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64 OC_MODE_INTER_MV_FOUR
65 },
66 {
67 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69 OC_MODE_INTER_MV_FOUR
70 },
71 {
72 OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73 OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74 OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75 },
76 /*No MV dominates.*/
77 {
78 OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79 OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80 OC_MODE_INTER_MV_FOUR
81 },
82 {
83 OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84 OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85 OC_MODE_INTER_MV_FOUR
86 },
87 /*Default ordering.*/
88 {
89 OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90 OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91 OC_MODE_INTER_MV_FOUR
92 }
93 };
94
95
96 /*The original DCT tokens are extended and reordered during the construction of
97 the Huffman tables.
98 The extension means more bits can be read with fewer calls to the bitpacker
99 during the Huffman decoding process (at the cost of larger Huffman tables),
100 and fewer tokens require additional extra bits (reducing the average storage
101 per decoded token).
102 The revised ordering reveals essential information in the token value
103 itself; specifically, whether or not there are additional extra bits to read
104 and the parameter to which those extra bits are applied.
105 The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106 The extra bits are added into code word at the bit position inferred from the
107 token value, giving the final code word from which all required parameters
108 are derived.
109 The number of EOBs and the leading zero run length can be extracted directly.
110 The coefficient magnitude is optionally negated before extraction, according
111 to a 'flip' bit.*/
112
113 /*The number of additional extra bits that are decoded with each of the
114 internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121 (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122 sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126
127 /*The number of EOBs to use for an end-of-frame token.
128 Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129 is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131
132 /*The location of the (6) run length bits in the code word.
133 These are placed at index 0 and given 8 bits (even though 6 would suffice)
134 because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT (8)
138 /*The location of the (1) flip bit in the code word.
139 This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT (20)
141 /*The location of the (11) token magnitude bits in the code word.
142 These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT (21)
144
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147 ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148 (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149 (_flip)<<OC_DCT_CW_FLIP_BIT| \
150 (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151
152 /*A special code word value that signals the end of the frame (a long EOB run
153 of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155
156 /*The position at which to insert the extra bits in the code word.
157 We use this formulation because Intel has no useful cmov.
158 A real architecture would probably do better with two of those.
159 This translates to 11 instructions(!), and is _still_ faster than either a
160 table lookup (just barely) or the naive double-ternary implementation (which
161 gcc translates to a jump and a cmov).
162 This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163 you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165 ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166 +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167
168 /*The code words for each internal token.
169 See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170 order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172 /*These tokens require additional extra bits for the EOB count.*/
173 /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174 OC_DCT_CW_FINISH,
175 /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176 OC_DCT_CW_PACK(16, 0, 0,0),
177 /*These tokens require additional extra bits for the magnitude.*/
178 /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179 OC_DCT_CW_PACK( 0, 0, 13,0),
180 OC_DCT_CW_PACK( 0, 0, 13,1),
181 /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182 OC_DCT_CW_PACK( 0, 0, 21,0),
183 OC_DCT_CW_PACK( 0, 0, 21,1),
184 /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185 OC_DCT_CW_PACK( 0, 0, 37,0),
186 OC_DCT_CW_PACK( 0, 0, 37,1),
187 /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188 OC_DCT_CW_PACK( 0, 0, 69,0),
189 OC_DCT_CW_PACK( 0, 0,325,0),
190 OC_DCT_CW_PACK( 0, 0, 69,1),
191 OC_DCT_CW_PACK( 0, 0,325,1),
192 /*These tokens require additional extra bits for the run length.*/
193 /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194 OC_DCT_CW_PACK( 0,10, +1,0),
195 OC_DCT_CW_PACK( 0,10, -1,0),
196 /*OC_DCT_ZRL_TOKEN (6 extra bits)
197 Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198 OC_DCT_CW_PACK( 0, 0, 0,1),
199 /*The remaining tokens require no additional extra bits.*/
200 /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201 OC_DCT_CW_PACK( 1, 0, 0,0),
202 /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203 OC_DCT_CW_PACK( 2, 0, 0,0),
204 /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205 OC_DCT_CW_PACK( 3, 0, 0,0),
206 /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207 OC_DCT_CW_PACK( 0, 1, +1,0),
208 OC_DCT_CW_PACK( 0, 1, -1,0),
209 OC_DCT_CW_PACK( 0, 2, +1,0),
210 OC_DCT_CW_PACK( 0, 2, -1,0),
211 OC_DCT_CW_PACK( 0, 3, +1,0),
212 OC_DCT_CW_PACK( 0, 3, -1,0),
213 OC_DCT_CW_PACK( 0, 4, +1,0),
214 OC_DCT_CW_PACK( 0, 4, -1,0),
215 OC_DCT_CW_PACK( 0, 5, +1,0),
216 OC_DCT_CW_PACK( 0, 5, -1,0),
217 /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218 OC_DCT_CW_PACK( 0, 1, +2,0),
219 OC_DCT_CW_PACK( 0, 1, +3,0),
220 OC_DCT_CW_PACK( 0, 1, -2,0),
221 OC_DCT_CW_PACK( 0, 1, -3,0),
222 /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223 OC_DCT_CW_PACK( 0, 6, +1,0),
224 OC_DCT_CW_PACK( 0, 7, +1,0),
225 OC_DCT_CW_PACK( 0, 8, +1,0),
226 OC_DCT_CW_PACK( 0, 9, +1,0),
227 OC_DCT_CW_PACK( 0, 6, -1,0),
228 OC_DCT_CW_PACK( 0, 7, -1,0),
229 OC_DCT_CW_PACK( 0, 8, -1,0),
230 OC_DCT_CW_PACK( 0, 9, -1,0),
231 /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232 OC_DCT_CW_PACK( 0, 2, +2,0),
233 OC_DCT_CW_PACK( 0, 3, +2,0),
234 OC_DCT_CW_PACK( 0, 2, +3,0),
235 OC_DCT_CW_PACK( 0, 3, +3,0),
236 OC_DCT_CW_PACK( 0, 2, -2,0),
237 OC_DCT_CW_PACK( 0, 3, -2,0),
238 OC_DCT_CW_PACK( 0, 2, -3,0),
239 OC_DCT_CW_PACK( 0, 3, -3,0),
240 /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241 Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242 OC_DCT_CW_PACK( 0, 0, 0,1),
243 OC_DCT_CW_PACK( 0, 1, 0,0),
244 OC_DCT_CW_PACK( 0, 2, 0,0),
245 OC_DCT_CW_PACK( 0, 3, 0,0),
246 OC_DCT_CW_PACK( 0, 4, 0,0),
247 OC_DCT_CW_PACK( 0, 5, 0,0),
248 OC_DCT_CW_PACK( 0, 6, 0,0),
249 OC_DCT_CW_PACK( 0, 7, 0,0),
250 /*OC_ONE_TOKEN (0 extra bits)*/
251 OC_DCT_CW_PACK( 0, 0, +1,0),
252 /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253 OC_DCT_CW_PACK( 0, 0, -1,0),
254 /*OC_TWO_TOKEN (0 extra bits)*/
255 OC_DCT_CW_PACK( 0, 0, +2,0),
256 /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257 OC_DCT_CW_PACK( 0, 0, -2,0),
258 /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259 OC_DCT_CW_PACK( 0, 0, +3,0),
260 OC_DCT_CW_PACK( 0, 0, -3,0),
261 OC_DCT_CW_PACK( 0, 0, +4,0),
262 OC_DCT_CW_PACK( 0, 0, -4,0),
263 OC_DCT_CW_PACK( 0, 0, +5,0),
264 OC_DCT_CW_PACK( 0, 0, -5,0),
265 OC_DCT_CW_PACK( 0, 0, +6,0),
266 OC_DCT_CW_PACK( 0, 0, -6,0),
267 /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268 OC_DCT_CW_PACK( 0, 0, +7,0),
269 OC_DCT_CW_PACK( 0, 0, +8,0),
270 OC_DCT_CW_PACK( 0, 0, -7,0),
271 OC_DCT_CW_PACK( 0, 0, -8,0),
272 /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273 OC_DCT_CW_PACK( 0, 0, +9,0),
274 OC_DCT_CW_PACK( 0, 0,+10,0),
275 OC_DCT_CW_PACK( 0, 0,+11,0),
276 OC_DCT_CW_PACK( 0, 0,+12,0),
277 OC_DCT_CW_PACK( 0, 0, -9,0),
278 OC_DCT_CW_PACK( 0, 0,-10,0),
279 OC_DCT_CW_PACK( 0, 0,-11,0),
280 OC_DCT_CW_PACK( 0, 0,-12,0),
281 /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282 OC_DCT_CW_PACK( 8, 0, 0,0),
283 OC_DCT_CW_PACK( 9, 0, 0,0),
284 OC_DCT_CW_PACK(10, 0, 0,0),
285 OC_DCT_CW_PACK(11, 0, 0,0),
286 OC_DCT_CW_PACK(12, 0, 0,0),
287 OC_DCT_CW_PACK(13, 0, 0,0),
288 OC_DCT_CW_PACK(14, 0, 0,0),
289 OC_DCT_CW_PACK(15, 0, 0,0),
290 /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291 OC_DCT_CW_PACK( 4, 0, 0,0),
292 OC_DCT_CW_PACK( 5, 0, 0,0),
293 OC_DCT_CW_PACK( 6, 0, 0,0),
294 OC_DCT_CW_PACK( 7, 0, 0,0),
295 };
296
297
298
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300 /*Coding scheme:
301 Codeword Run Length
302 0 1
303 10x 2-3
304 110x 4-5
305 1110xx 6-9
306 11110xxx 10-17
307 111110xxxx 18-33
308 111111xxxxxxxxxxxx 34-4129*/
309 static const ogg_int16_t OC_SB_RUN_TREE[22]={
310 4,
311 -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312 -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313 -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314 -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315 2,
316 -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317 };
318 int ret;
319 ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320 if(ret>=0x10){
321 int offs;
322 offs=ret&0x1F;
323 ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324 }
325 return ret;
326 }
327
oc_block_run_unpack(oc_pack_buf * _opb)328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329 /*Coding scheme:
330 Codeword Run Length
331 0x 1-2
332 10x 3-4
333 110x 5-6
334 1110xx 7-10
335 11110xx 11-14
336 11111xxxx 15-30*/
337 static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338 5,
339 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341 -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342 -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343 -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344 -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345 -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346 33, 36, 39, 44,
347 1,-(1<<8|7),-(1<<8|8),
348 1,-(1<<8|9),-(1<<8|10),
349 2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350 4,
351 -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352 -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353 -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354 -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355 };
356 return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358
359
360
oc_dec_accel_init_c(oc_dec_ctx * _dec)361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363 _dec->opt_vtable.dc_unpredict_mcu_plane=
364 oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369 const th_setup_info *_setup){
370 int qti;
371 int pli;
372 int qi;
373 int ret;
374 ret=oc_state_init(&_dec->state,_info,3);
375 if(ret<0)return ret;
376 ret=oc_huff_trees_copy(_dec->huff_tables,
377 (const ogg_int16_t *const *)_setup->huff_tables);
378 if(ret<0){
379 oc_state_clear(&_dec->state);
380 return ret;
381 }
382 /*For each fragment, allocate one byte for every DCT coefficient token, plus
383 one byte for extra-bits for each token, plus one more byte for the long
384 EOB run, just in case it's the very last token and has a run length of
385 one.*/
386 _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387 _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388 if(_dec->dct_tokens==NULL){
389 oc_huff_trees_clear(_dec->huff_tables);
390 oc_state_clear(&_dec->state);
391 return TH_EFAULT;
392 }
393 for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394 _dec->state.dequant_tables[qi][pli][qti]=
395 _dec->state.dequant_table_data[qi][pli][qti];
396 }
397 oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398 &_setup->qinfo);
399 for(qi=0;qi<64;qi++){
400 int qsum;
401 qsum=0;
402 for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403 qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404 _dec->state.dequant_tables[qi][pli][qti][17]+
405 _dec->state.dequant_tables[qi][pli][qti][18]+
406 _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407 }
408 _dec->pp_sharp_mod[qi]=-(qsum>>11);
409 }
410 memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411 sizeof(_dec->state.loop_filter_limits));
412 oc_dec_accel_init(_dec);
413 _dec->pp_level=OC_PP_LEVEL_DISABLED;
414 _dec->dc_qis=NULL;
415 _dec->variances=NULL;
416 _dec->pp_frame_data=NULL;
417 _dec->stripe_cb.ctx=NULL;
418 _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420 _dec->telemetry=0;
421 _dec->telemetry_bits=0;
422 _dec->telemetry_qi=0;
423 _dec->telemetry_mbmode=0;
424 _dec->telemetry_mv=0;
425 _dec->telemetry_frame_data=NULL;
426 #endif
427 return 0;
428 }
429
oc_dec_clear(oc_dec_ctx * _dec)430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432 _ogg_free(_dec->telemetry_frame_data);
433 #endif
434 _ogg_free(_dec->pp_frame_data);
435 _ogg_free(_dec->variances);
436 _ogg_free(_dec->dc_qis);
437 _ogg_free(_dec->dct_tokens);
438 oc_huff_trees_clear(_dec->huff_tables);
439 oc_state_clear(&_dec->state);
440 }
441
442
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444 long val;
445 /*Check to make sure this is a data packet.*/
446 val=oc_pack_read1(&_dec->opb);
447 if(val!=0)return TH_EBADPACKET;
448 /*Read in the frame type (I or P).*/
449 val=oc_pack_read1(&_dec->opb);
450 _dec->state.frame_type=(int)val;
451 /*Read in the qi list.*/
452 val=oc_pack_read(&_dec->opb,6);
453 _dec->state.qis[0]=(unsigned char)val;
454 val=oc_pack_read1(&_dec->opb);
455 if(!val)_dec->state.nqis=1;
456 else{
457 val=oc_pack_read(&_dec->opb,6);
458 _dec->state.qis[1]=(unsigned char)val;
459 val=oc_pack_read1(&_dec->opb);
460 if(!val)_dec->state.nqis=2;
461 else{
462 val=oc_pack_read(&_dec->opb,6);
463 _dec->state.qis[2]=(unsigned char)val;
464 _dec->state.nqis=3;
465 }
466 }
467 if(_dec->state.frame_type==OC_INTRA_FRAME){
468 /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469 Most of the other unused bits in the VP3 headers were eliminated.
470 I don't know why these remain.*/
471 /*I wanted to eliminate wasted bits, but not all config wiggle room
472 --Monty.*/
473 val=oc_pack_read(&_dec->opb,3);
474 if(val!=0)return TH_EIMPL;
475 }
476 return 0;
477 }
478
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480 This also builds up the coded fragment list (in coded order), and clears the
481 uncoded fragment list.
482 It does not update the coded macro block list nor the super block flags, as
483 those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485 const oc_sb_map *sb_maps;
486 const oc_sb_flags *sb_flags;
487 oc_fragment *frags;
488 ptrdiff_t *coded_fragis;
489 ptrdiff_t ncoded_fragis;
490 ptrdiff_t prev_ncoded_fragis;
491 unsigned nsbs;
492 unsigned sbi;
493 int pli;
494 coded_fragis=_dec->state.coded_fragis;
495 prev_ncoded_fragis=ncoded_fragis=0;
496 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497 sb_flags=_dec->state.sb_flags;
498 frags=_dec->state.frags;
499 sbi=nsbs=0;
500 for(pli=0;pli<3;pli++){
501 nsbs+=_dec->state.fplanes[pli].nsbs;
502 for(;sbi<nsbs;sbi++){
503 int quadi;
504 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505 int bi;
506 for(bi=0;bi<4;bi++){
507 ptrdiff_t fragi;
508 fragi=sb_maps[sbi][quadi][bi];
509 if(fragi>=0){
510 frags[fragi].coded=1;
511 frags[fragi].refi=OC_FRAME_SELF;
512 frags[fragi].mb_mode=OC_MODE_INTRA;
513 coded_fragis[ncoded_fragis++]=fragi;
514 }
515 }
516 }
517 }
518 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519 prev_ncoded_fragis=ncoded_fragis;
520 }
521 _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523
524 /*Decodes the bit flags indicating whether each super block is partially coded
525 or not.
526 Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528 oc_sb_flags *sb_flags;
529 unsigned nsbs;
530 unsigned sbi;
531 unsigned npartial;
532 unsigned run_count;
533 long val;
534 int flag;
535 val=oc_pack_read1(&_dec->opb);
536 flag=(int)val;
537 sb_flags=_dec->state.sb_flags;
538 nsbs=_dec->state.nsbs;
539 sbi=npartial=0;
540 while(sbi<nsbs){
541 int full_run;
542 run_count=oc_sb_run_unpack(&_dec->opb);
543 full_run=run_count>=4129;
544 do{
545 sb_flags[sbi].coded_partially=flag;
546 sb_flags[sbi].coded_fully=0;
547 npartial+=flag;
548 sbi++;
549 }
550 while(--run_count>0&&sbi<nsbs);
551 if(full_run&&sbi<nsbs){
552 val=oc_pack_read1(&_dec->opb);
553 flag=(int)val;
554 }
555 else flag=!flag;
556 }
557 /*TODO: run_count should be 0 here.
558 If it's not, we should issue a warning of some kind.*/
559 return npartial;
560 }
561
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563 block is fully coded or not.
564 This function should only be called if there is at least one
565 non-partially-coded super block.
566 Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568 oc_sb_flags *sb_flags;
569 unsigned nsbs;
570 unsigned sbi;
571 unsigned run_count;
572 long val;
573 int flag;
574 sb_flags=_dec->state.sb_flags;
575 nsbs=_dec->state.nsbs;
576 /*Skip partially coded super blocks.*/
577 for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578 val=oc_pack_read1(&_dec->opb);
579 flag=(int)val;
580 do{
581 int full_run;
582 run_count=oc_sb_run_unpack(&_dec->opb);
583 full_run=run_count>=4129;
584 for(;sbi<nsbs;sbi++){
585 if(sb_flags[sbi].coded_partially)continue;
586 if(run_count--<=0)break;
587 sb_flags[sbi].coded_fully=flag;
588 }
589 if(full_run&&sbi<nsbs){
590 val=oc_pack_read1(&_dec->opb);
591 flag=(int)val;
592 }
593 else flag=!flag;
594 }
595 while(sbi<nsbs);
596 /*TODO: run_count should be 0 here.
597 If it's not, we should issue a warning of some kind.*/
598 }
599
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601 const oc_sb_map *sb_maps;
602 const oc_sb_flags *sb_flags;
603 signed char *mb_modes;
604 oc_fragment *frags;
605 unsigned nsbs;
606 unsigned sbi;
607 unsigned npartial;
608 long val;
609 int pli;
610 int flag;
611 int run_count;
612 ptrdiff_t *coded_fragis;
613 ptrdiff_t *uncoded_fragis;
614 ptrdiff_t ncoded_fragis;
615 ptrdiff_t nuncoded_fragis;
616 ptrdiff_t prev_ncoded_fragis;
617 npartial=oc_dec_partial_sb_flags_unpack(_dec);
618 if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619 if(npartial>0){
620 val=oc_pack_read1(&_dec->opb);
621 flag=!(int)val;
622 }
623 else flag=0;
624 sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625 sb_flags=_dec->state.sb_flags;
626 mb_modes=_dec->state.mb_modes;
627 frags=_dec->state.frags;
628 sbi=nsbs=run_count=0;
629 coded_fragis=_dec->state.coded_fragis;
630 uncoded_fragis=coded_fragis+_dec->state.nfrags;
631 prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632 for(pli=0;pli<3;pli++){
633 nsbs+=_dec->state.fplanes[pli].nsbs;
634 for(;sbi<nsbs;sbi++){
635 int quadi;
636 for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637 int quad_coded;
638 int bi;
639 quad_coded=0;
640 for(bi=0;bi<4;bi++){
641 ptrdiff_t fragi;
642 fragi=sb_maps[sbi][quadi][bi];
643 if(fragi>=0){
644 int coded;
645 if(sb_flags[sbi].coded_fully)coded=1;
646 else if(!sb_flags[sbi].coded_partially)coded=0;
647 else{
648 if(run_count<=0){
649 run_count=oc_block_run_unpack(&_dec->opb);
650 flag=!flag;
651 }
652 run_count--;
653 coded=flag;
654 }
655 if(coded)coded_fragis[ncoded_fragis++]=fragi;
656 else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657 quad_coded|=coded;
658 frags[fragi].coded=coded;
659 frags[fragi].refi=OC_FRAME_NONE;
660 }
661 }
662 /*Remember if there's a coded luma block in this macro block.*/
663 if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664 }
665 }
666 _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667 prev_ncoded_fragis=ncoded_fragis;
668 }
669 _dec->state.ntotal_coded_fragis=ncoded_fragis;
670 /*TODO: run_count should be 0 here.
671 If it's not, we should issue a warning of some kind.*/
672 }
673
674
675 /*Coding scheme:
676 Codeword Mode Index
677 0 0
678 10 1
679 110 2
680 1110 3
681 11110 4
682 111110 5
683 1111110 6
684 1111111 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686 4,
687 -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688 -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689 -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690 -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691 3,
692 -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693 -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697 3,
698 -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699 -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701
702 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704 signed char *mb_modes;
705 const unsigned char *alphabet;
706 unsigned char scheme0_alphabet[8];
707 const ogg_int16_t *mode_tree;
708 size_t nmbs;
709 size_t mbi;
710 long val;
711 int mode_scheme;
712 val=oc_pack_read(&_dec->opb,3);
713 mode_scheme=(int)val;
714 if(mode_scheme==0){
715 int mi;
716 /*Just in case, initialize the modes to something.
717 If the bitstream doesn't contain each index exactly once, it's likely
718 corrupt and the rest of the packet is garbage anyway, but this way we
719 won't crash, and we'll decode SOMETHING.*/
720 /*LOOP VECTORIZES*/
721 for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722 for(mi=0;mi<OC_NMODES;mi++){
723 val=oc_pack_read(&_dec->opb,3);
724 scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725 }
726 alphabet=scheme0_alphabet;
727 }
728 else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729 mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730 mb_modes=_dec->state.mb_modes;
731 nmbs=_dec->state.nmbs;
732 for(mbi=0;mbi<nmbs;mbi++){
733 if(mb_modes[mbi]>0){
734 /*We have a coded luma block; decode a mode.*/
735 mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736 }
737 /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738 fact that OC_MODE_INTER_NOMV is already 0.*/
739 }
740 }
741
742
743
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745 5,
746 -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747 -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748 -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749 -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750 -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751 33, 36, 39, 42,
752 45, 50, 55, 60,
753 65, 74, 83, 92,
754 1,-(1<<8|32+4),-(1<<8|32-4),
755 1,-(1<<8|32+5),-(1<<8|32-5),
756 1,-(1<<8|32+6),-(1<<8|32-6),
757 1,-(1<<8|32+7),-(1<<8|32-7),
758 2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759 2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760 2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761 2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762 3,
763 -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764 -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765 3,
766 -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767 -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768 3,
769 -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770 -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771 3,
772 -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773 -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777 6,
778 -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779 -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780 -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781 -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782 -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783 -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784 -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785 -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786 -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787 -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788 -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789 -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790 -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791 -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792 -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793 -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795
796
oc_mv_unpack(oc_pack_buf * _opb,const ogg_int16_t * _tree)797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798 int dx;
799 int dy;
800 dx=oc_huff_token_decode(_opb,_tree)-32;
801 dy=oc_huff_token_decode(_opb,_tree)-32;
802 return OC_MV(dx,dy);
803 }
804
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806 block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808 const oc_mb_map *mb_maps;
809 const signed char *mb_modes;
810 oc_set_chroma_mvs_func set_chroma_mvs;
811 const ogg_int16_t *mv_comp_tree;
812 oc_fragment *frags;
813 oc_mv *frag_mvs;
814 const unsigned char *map_idxs;
815 int map_nidxs;
816 oc_mv last_mv;
817 oc_mv prior_mv;
818 oc_mv cbmvs[4];
819 size_t nmbs;
820 size_t mbi;
821 long val;
822 set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823 val=oc_pack_read1(&_dec->opb);
824 mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825 map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826 map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827 prior_mv=last_mv=0;
828 frags=_dec->state.frags;
829 frag_mvs=_dec->state.frag_mvs;
830 mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831 mb_modes=_dec->state.mb_modes;
832 nmbs=_dec->state.nmbs;
833 for(mbi=0;mbi<nmbs;mbi++){
834 int mb_mode;
835 mb_mode=mb_modes[mbi];
836 if(mb_mode!=OC_MODE_INVALID){
837 oc_mv mbmv;
838 ptrdiff_t fragi;
839 int mapi;
840 int mapii;
841 int refi;
842 if(mb_mode==OC_MODE_INTER_MV_FOUR){
843 oc_mv lbmvs[4];
844 int bi;
845 prior_mv=last_mv;
846 for(bi=0;bi<4;bi++){
847 fragi=mb_maps[mbi][0][bi];
848 if(frags[fragi].coded){
849 frags[fragi].refi=OC_FRAME_PREV;
850 frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851 lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852 frag_mvs[fragi]=lbmvs[bi];
853 }
854 else lbmvs[bi]=0;
855 }
856 (*set_chroma_mvs)(cbmvs,lbmvs);
857 for(mapii=4;mapii<map_nidxs;mapii++){
858 mapi=map_idxs[mapii];
859 bi=mapi&3;
860 fragi=mb_maps[mbi][mapi>>2][bi];
861 if(frags[fragi].coded){
862 frags[fragi].refi=OC_FRAME_PREV;
863 frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864 frag_mvs[fragi]=cbmvs[bi];
865 }
866 }
867 }
868 else{
869 switch(mb_mode){
870 case OC_MODE_INTER_MV:{
871 prior_mv=last_mv;
872 last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873 }break;
874 case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875 case OC_MODE_INTER_MV_LAST2:{
876 mbmv=prior_mv;
877 prior_mv=last_mv;
878 last_mv=mbmv;
879 }break;
880 case OC_MODE_GOLDEN_MV:{
881 mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882 }break;
883 default:mbmv=0;break;
884 }
885 /*Fill in the MVs for the fragments.*/
886 refi=OC_FRAME_FOR_MODE(mb_mode);
887 mapii=0;
888 do{
889 mapi=map_idxs[mapii];
890 fragi=mb_maps[mbi][mapi>>2][mapi&3];
891 if(frags[fragi].coded){
892 frags[fragi].refi=refi;
893 frags[fragi].mb_mode=mb_mode;
894 frag_mvs[fragi]=mbmv;
895 }
896 }
897 while(++mapii<map_nidxs);
898 }
899 }
900 }
901 }
902
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904 oc_fragment *frags;
905 const ptrdiff_t *coded_fragis;
906 ptrdiff_t ncoded_fragis;
907 ptrdiff_t fragii;
908 ptrdiff_t fragi;
909 ncoded_fragis=_dec->state.ntotal_coded_fragis;
910 if(ncoded_fragis<=0)return;
911 frags=_dec->state.frags;
912 coded_fragis=_dec->state.coded_fragis;
913 if(_dec->state.nqis==1){
914 /*If this frame has only a single qi value, then just use it for all coded
915 fragments.*/
916 for(fragii=0;fragii<ncoded_fragis;fragii++){
917 frags[coded_fragis[fragii]].qii=0;
918 }
919 }
920 else{
921 long val;
922 int flag;
923 int nqi1;
924 int run_count;
925 /*Otherwise, we decode a qi index for each fragment, using two passes of
926 the same binary RLE scheme used for super-block coded bits.
927 The first pass marks each fragment as having a qii of 0 or greater than
928 0, and the second pass (if necessary), distinguishes between a qii of
929 1 and 2.
930 At first we just store the qii in the fragment.
931 After all the qii's are decoded, we make a final pass to replace them
932 with the corresponding qi's for this frame.*/
933 val=oc_pack_read1(&_dec->opb);
934 flag=(int)val;
935 nqi1=0;
936 fragii=0;
937 while(fragii<ncoded_fragis){
938 int full_run;
939 run_count=oc_sb_run_unpack(&_dec->opb);
940 full_run=run_count>=4129;
941 do{
942 frags[coded_fragis[fragii++]].qii=flag;
943 nqi1+=flag;
944 }
945 while(--run_count>0&&fragii<ncoded_fragis);
946 if(full_run&&fragii<ncoded_fragis){
947 val=oc_pack_read1(&_dec->opb);
948 flag=(int)val;
949 }
950 else flag=!flag;
951 }
952 /*TODO: run_count should be 0 here.
953 If it's not, we should issue a warning of some kind.*/
954 /*If we have 3 different qi's for this frame, and there was at least one
955 fragment with a non-zero qi, make the second pass.*/
956 if(_dec->state.nqis==3&&nqi1>0){
957 /*Skip qii==0 fragments.*/
958 for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959 val=oc_pack_read1(&_dec->opb);
960 flag=(int)val;
961 do{
962 int full_run;
963 run_count=oc_sb_run_unpack(&_dec->opb);
964 full_run=run_count>=4129;
965 for(;fragii<ncoded_fragis;fragii++){
966 fragi=coded_fragis[fragii];
967 if(frags[fragi].qii==0)continue;
968 if(run_count--<=0)break;
969 frags[fragi].qii+=flag;
970 }
971 if(full_run&&fragii<ncoded_fragis){
972 val=oc_pack_read1(&_dec->opb);
973 flag=(int)val;
974 }
975 else flag=!flag;
976 }
977 while(fragii<ncoded_fragis);
978 /*TODO: run_count should be 0 here.
979 If it's not, we should issue a warning of some kind.*/
980 }
981 }
982 }
983
984
985
986 /*Unpacks the DC coefficient tokens.
987 Unlike when unpacking the AC coefficient tokens, we actually need to decode
988 the DC coefficient values now so that we can do DC prediction.
989 _huff_idx: The index of the Huffman table to use for each color plane.
990 _ntoks_left: The number of tokens left to be decoded in each color plane for
991 each coefficient.
992 This is updated as EOB tokens and zero run tokens are decoded.
993 Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995 ptrdiff_t _ntoks_left[3][64]){
996 unsigned char *dct_tokens;
997 oc_fragment *frags;
998 const ptrdiff_t *coded_fragis;
999 ptrdiff_t ncoded_fragis;
1000 ptrdiff_t fragii;
1001 ptrdiff_t eobs;
1002 ptrdiff_t ti;
1003 int pli;
1004 dct_tokens=_dec->dct_tokens;
1005 frags=_dec->state.frags;
1006 coded_fragis=_dec->state.coded_fragis;
1007 ncoded_fragis=fragii=eobs=ti=0;
1008 for(pli=0;pli<3;pli++){
1009 ptrdiff_t run_counts[64];
1010 ptrdiff_t eob_count;
1011 ptrdiff_t eobi;
1012 int rli;
1013 ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014 memset(run_counts,0,sizeof(run_counts));
1015 _dec->eob_runs[pli][0]=eobs;
1016 _dec->ti0[pli][0]=ti;
1017 /*Continue any previous EOB run, if there was one.*/
1018 eobi=eobs;
1019 if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020 eob_count=eobi;
1021 eobs-=eobi;
1022 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023 while(fragii<ncoded_fragis){
1024 int token;
1025 int cw;
1026 int eb;
1027 int skip;
1028 token=oc_huff_token_decode(&_dec->opb,
1029 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030 dct_tokens[ti++]=(unsigned char)token;
1031 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032 eb=(int)oc_pack_read(&_dec->opb,
1033 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034 dct_tokens[ti++]=(unsigned char)eb;
1035 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036 eb<<=OC_DCT_TOKEN_EB_POS(token);
1037 }
1038 else eb=0;
1039 cw=OC_DCT_CODE_WORD[token]+eb;
1040 eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041 if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042 if(eobs){
1043 eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044 eob_count+=eobi;
1045 eobs-=eobi;
1046 while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047 }
1048 else{
1049 int coeff;
1050 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053 if(skip)coeff=0;
1054 run_counts[skip]++;
1055 frags[coded_fragis[fragii++]].dc=coeff;
1056 }
1057 }
1058 /*Add the total EOB count to the longest run length.*/
1059 run_counts[63]+=eob_count;
1060 /*And convert the run_counts array to a moment table.*/
1061 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062 /*Finally, subtract off the number of coefficients that have been
1063 accounted for by runs started in this coefficient.*/
1064 for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065 }
1066 _dec->dct_tokens_count=ti;
1067 return eobs;
1068 }
1069
1070 /*Unpacks the AC coefficient tokens.
1071 This can completely discard coefficient values while unpacking, and so is
1072 somewhat simpler than unpacking the DC coefficient tokens.
1073 _huff_idx: The index of the Huffman table to use for each color plane.
1074 _ntoks_left: The number of tokens left to be decoded in each color plane for
1075 each coefficient.
1076 This is updated as EOB tokens and zero run tokens are decoded.
1077 _eobs: The length of any outstanding EOB run from previous
1078 coefficients.
1079 Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081 ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082 unsigned char *dct_tokens;
1083 ptrdiff_t ti;
1084 int pli;
1085 dct_tokens=_dec->dct_tokens;
1086 ti=_dec->dct_tokens_count;
1087 for(pli=0;pli<3;pli++){
1088 ptrdiff_t run_counts[64];
1089 ptrdiff_t eob_count;
1090 size_t ntoks_left;
1091 size_t ntoks;
1092 int rli;
1093 _dec->eob_runs[pli][_zzi]=_eobs;
1094 _dec->ti0[pli][_zzi]=ti;
1095 ntoks_left=_ntoks_left[pli][_zzi];
1096 memset(run_counts,0,sizeof(run_counts));
1097 eob_count=0;
1098 ntoks=0;
1099 while(ntoks+_eobs<ntoks_left){
1100 int token;
1101 int cw;
1102 int eb;
1103 int skip;
1104 ntoks+=_eobs;
1105 eob_count+=_eobs;
1106 token=oc_huff_token_decode(&_dec->opb,
1107 _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108 dct_tokens[ti++]=(unsigned char)token;
1109 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110 eb=(int)oc_pack_read(&_dec->opb,
1111 OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112 dct_tokens[ti++]=(unsigned char)eb;
1113 if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114 eb<<=OC_DCT_TOKEN_EB_POS(token);
1115 }
1116 else eb=0;
1117 cw=OC_DCT_CODE_WORD[token]+eb;
1118 skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119 _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120 if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121 if(_eobs==0){
1122 run_counts[skip]++;
1123 ntoks++;
1124 }
1125 }
1126 /*Add the portion of the last EOB run actually used by this coefficient.*/
1127 eob_count+=ntoks_left-ntoks;
1128 /*And remove it from the remaining EOB count.*/
1129 _eobs-=ntoks_left-ntoks;
1130 /*Add the total EOB count to the longest run length.*/
1131 run_counts[63]+=eob_count;
1132 /*And convert the run_counts array to a moment table.*/
1133 for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134 /*Finally, subtract off the number of coefficients that have been
1135 accounted for by runs started in this coefficient.*/
1136 for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137 }
1138 _dec->dct_tokens_count=ti;
1139 return _eobs;
1140 }
1141
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143 stored in the bitstream grouped by coefficient, not by fragment.
1144
1145 This means that we either decode all the tokens in order, building up a
1146 separate coefficient list for each fragment as we go, and then go back and
1147 do the iDCT on each fragment, or we have to create separate lists of tokens
1148 for each coefficient, so that we can pull the next token required off the
1149 head of the appropriate list when decoding a specific fragment.
1150
1151 The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152 decoded coefficient values.
1153
1154 We take the second option, which lets us store just one to three bytes per
1155 token (generally far fewer than the number of coefficients, due to EOB
1156 tokens and zero runs), and which requires us to only maintain a counter for
1157 each of the 64 coefficients, instead of a counter for every fragment to
1158 determine where the next token goes.
1159
1160 We actually use 3 counters per coefficient, one for each color plane, so we
1161 can decode all color planes simultaneously.
1162 This lets color conversion, etc., be done as soon as a full MCU (one or
1163 two super block rows) is decoded, while the image data is still in cache.*/
1164
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166 static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167 ptrdiff_t ntoks_left[3][64];
1168 int huff_idxs[2];
1169 ptrdiff_t eobs;
1170 long val;
1171 int pli;
1172 int zzi;
1173 int hgi;
1174 for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175 ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176 }
1177 val=oc_pack_read(&_dec->opb,4);
1178 huff_idxs[0]=(int)val;
1179 val=oc_pack_read(&_dec->opb,4);
1180 huff_idxs[1]=(int)val;
1181 _dec->eob_runs[0][0]=0;
1182 eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184 _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186 val=oc_pack_read(&_dec->opb,4);
1187 huff_idxs[0]=(int)val;
1188 val=oc_pack_read(&_dec->opb,4);
1189 huff_idxs[1]=(int)val;
1190 zzi=1;
1191 for(hgi=1;hgi<5;hgi++){
1192 huff_idxs[0]+=16;
1193 huff_idxs[1]+=16;
1194 for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195 eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196 }
1197 }
1198 /*TODO: eobs should be exactly zero, or 4096 or greater.
1199 The second case occurs when an EOB run of size zero is encountered, which
1200 gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201 If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203
1204
oc_dec_postprocess_init(oc_dec_ctx * _dec)1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206 /*pp_level 0: disabled; free any memory used and return*/
1207 if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1208 if(_dec->dc_qis!=NULL){
1209 _ogg_free(_dec->dc_qis);
1210 _dec->dc_qis=NULL;
1211 _ogg_free(_dec->variances);
1212 _dec->variances=NULL;
1213 _ogg_free(_dec->pp_frame_data);
1214 _dec->pp_frame_data=NULL;
1215 }
1216 return 1;
1217 }
1218 if(_dec->dc_qis==NULL){
1219 /*If we haven't been tracking DC quantization indices, there's no point in
1220 starting now.*/
1221 if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1222 _dec->dc_qis=(unsigned char *)_ogg_malloc(
1223 _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1224 if(_dec->dc_qis==NULL)return 1;
1225 memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1226 }
1227 else{
1228 unsigned char *dc_qis;
1229 const ptrdiff_t *coded_fragis;
1230 ptrdiff_t ncoded_fragis;
1231 ptrdiff_t fragii;
1232 unsigned char qi0;
1233 /*Update the DC quantization index of each coded block.*/
1234 dc_qis=_dec->dc_qis;
1235 coded_fragis=_dec->state.coded_fragis;
1236 ncoded_fragis=_dec->state.ncoded_fragis[0]+
1237 _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1238 qi0=(unsigned char)_dec->state.qis[0];
1239 for(fragii=0;fragii<ncoded_fragis;fragii++){
1240 dc_qis[coded_fragis[fragii]]=qi0;
1241 }
1242 }
1243 /*pp_level 1: Stop after updating DC quantization indices.*/
1244 if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1245 if(_dec->variances!=NULL){
1246 _ogg_free(_dec->variances);
1247 _dec->variances=NULL;
1248 _ogg_free(_dec->pp_frame_data);
1249 _dec->pp_frame_data=NULL;
1250 }
1251 return 1;
1252 }
1253 if(_dec->variances==NULL){
1254 size_t frame_sz;
1255 size_t c_sz;
1256 int c_w;
1257 int c_h;
1258 frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1259 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1260 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1261 c_sz=c_w*(size_t)c_h;
1262 /*Allocate space for the chroma planes, even if we're not going to use
1263 them; this simplifies allocation state management, though it may waste
1264 memory on the few systems that don't overcommit pages.*/
1265 frame_sz+=c_sz<<1;
1266 _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1267 frame_sz*sizeof(_dec->pp_frame_data[0]));
1268 _dec->variances=(int *)_ogg_malloc(
1269 _dec->state.nfrags*sizeof(_dec->variances[0]));
1270 if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1271 _ogg_free(_dec->pp_frame_data);
1272 _dec->pp_frame_data=NULL;
1273 _ogg_free(_dec->variances);
1274 _dec->variances=NULL;
1275 return 1;
1276 }
1277 /*Force an update of the PP buffer pointers.*/
1278 _dec->pp_frame_state=0;
1279 }
1280 /*Update the PP buffer pointers if necessary.*/
1281 if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1282 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1283 /*If chroma processing is disabled, just use the PP luma plane.*/
1284 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1285 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1286 _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1287 _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1288 (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1289 }
1290 else{
1291 size_t y_sz;
1292 size_t c_sz;
1293 int c_w;
1294 int c_h;
1295 /*Otherwise, set up pointers to all three PP planes.*/
1296 y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1297 c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1298 c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1299 c_sz=c_w*(size_t)c_h;
1300 _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1301 _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1302 _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1303 _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1304 _dec->pp_frame_buf[1].width=c_w;
1305 _dec->pp_frame_buf[1].height=c_h;
1306 _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1307 _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1308 _dec->pp_frame_buf[2].width=c_w;
1309 _dec->pp_frame_buf[2].height=c_h;
1310 _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1311 _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1312 oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1313 }
1314 _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1315 }
1316 /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1317 if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1318 memcpy(_dec->pp_frame_buf+1,
1319 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1320 sizeof(_dec->pp_frame_buf[1])*2);
1321 }
1322 return 0;
1323 }
1324
1325
1326 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1327 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1328 oc_dec_pipeline_state *_pipe){
1329 const ptrdiff_t *coded_fragis;
1330 const ptrdiff_t *uncoded_fragis;
1331 int flimit;
1332 int pli;
1333 int qii;
1334 int qti;
1335 int zzi;
1336 /*If chroma is sub-sampled in the vertical direction, we have to decode two
1337 super block rows of Y' for each super block row of Cb and Cr.*/
1338 _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1339 /*Initialize the token and extra bits indices for each plane and
1340 coefficient.*/
1341 memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1342 /*Also copy over the initial the EOB run counts.*/
1343 memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1344 /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1345 coded_fragis=_dec->state.coded_fragis;
1346 uncoded_fragis=coded_fragis+_dec->state.nfrags;
1347 for(pli=0;pli<3;pli++){
1348 ptrdiff_t ncoded_fragis;
1349 _pipe->coded_fragis[pli]=coded_fragis;
1350 _pipe->uncoded_fragis[pli]=uncoded_fragis;
1351 ncoded_fragis=_dec->state.ncoded_fragis[pli];
1352 coded_fragis+=ncoded_fragis;
1353 uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1354 }
1355 /*Set up condensed quantizer tables.*/
1356 for(pli=0;pli<3;pli++){
1357 for(qii=0;qii<_dec->state.nqis;qii++){
1358 for(qti=0;qti<2;qti++){
1359 _pipe->dequant[pli][qii][qti]=
1360 _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1361 }
1362 }
1363 }
1364 /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1365 memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1366 /*Initialize the bounding value array for the loop filter.*/
1367 flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1368 _pipe->loop_filter=flimit!=0;
1369 if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1370 /*Initialize any buffers needed for post-processing.
1371 We also save the current post-processing level, to guard against the user
1372 changing it from a callback.*/
1373 if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1374 /*If we don't have enough information to post-process, disable it, regardless
1375 of the user-requested level.*/
1376 else{
1377 _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1378 memcpy(_dec->pp_frame_buf,
1379 _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1380 sizeof(_dec->pp_frame_buf[0])*3);
1381 }
1382 /*Clear down the DCT coefficient buffer for the first block.*/
1383 for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1384 }
1385
1386 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1387 rows).
1388 As a side effect, the number of coded and uncoded fragments in this plane of
1389 the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1390 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1391 oc_dec_pipeline_state *_pipe,int _pli){
1392 const oc_fragment_plane *fplane;
1393 oc_fragment *frags;
1394 int *pred_last;
1395 ptrdiff_t ncoded_fragis;
1396 ptrdiff_t fragi;
1397 int fragx;
1398 int fragy;
1399 int fragy0;
1400 int fragy_end;
1401 int nhfrags;
1402 /*Compute the first and last fragment row of the current MCU for this
1403 plane.*/
1404 fplane=_dec->state.fplanes+_pli;
1405 fragy0=_pipe->fragy0[_pli];
1406 fragy_end=_pipe->fragy_end[_pli];
1407 nhfrags=fplane->nhfrags;
1408 pred_last=_pipe->pred_last[_pli];
1409 frags=_dec->state.frags;
1410 ncoded_fragis=0;
1411 fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1412 for(fragy=fragy0;fragy<fragy_end;fragy++){
1413 if(fragy==0){
1414 /*For the first row, all of the cases reduce to just using the previous
1415 predictor for the same reference frame.*/
1416 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1417 if(frags[fragi].coded){
1418 int refi;
1419 refi=frags[fragi].refi;
1420 pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1421 ncoded_fragis++;
1422 }
1423 }
1424 }
1425 else{
1426 oc_fragment *u_frags;
1427 int l_ref;
1428 int ul_ref;
1429 int u_ref;
1430 u_frags=frags-nhfrags;
1431 l_ref=-1;
1432 ul_ref=-1;
1433 u_ref=u_frags[fragi].refi;
1434 for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1435 int ur_ref;
1436 if(fragx+1>=nhfrags)ur_ref=-1;
1437 else ur_ref=u_frags[fragi+1].refi;
1438 if(frags[fragi].coded){
1439 int pred;
1440 int refi;
1441 refi=frags[fragi].refi;
1442 /*We break out a separate case based on which of our neighbors use
1443 the same reference frames.
1444 This is somewhat faster than trying to make a generic case which
1445 handles all of them, since it reduces lots of poorly predicted
1446 jumps to one switch statement, and also lets a number of the
1447 multiplications be optimized out by strength reduction.*/
1448 switch((l_ref==refi)|(ul_ref==refi)<<1|
1449 (u_ref==refi)<<2|(ur_ref==refi)<<3){
1450 default:pred=pred_last[refi];break;
1451 case 1:
1452 case 3:pred=frags[fragi-1].dc;break;
1453 case 2:pred=u_frags[fragi-1].dc;break;
1454 case 4:
1455 case 6:
1456 case 12:pred=u_frags[fragi].dc;break;
1457 case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1458 case 8:pred=u_frags[fragi+1].dc;break;
1459 case 9:
1460 case 11:
1461 case 13:{
1462 /*The TI compiler mis-compiles this line.*/
1463 pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1464 }break;
1465 case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1466 case 14:{
1467 pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1468 +10*u_frags[fragi].dc)/16;
1469 }break;
1470 case 7:
1471 case 15:{
1472 int p0;
1473 int p1;
1474 int p2;
1475 p0=frags[fragi-1].dc;
1476 p1=u_frags[fragi-1].dc;
1477 p2=u_frags[fragi].dc;
1478 pred=(29*(p0+p2)-26*p1)/32;
1479 if(abs(pred-p2)>128)pred=p2;
1480 else if(abs(pred-p0)>128)pred=p0;
1481 else if(abs(pred-p1)>128)pred=p1;
1482 }break;
1483 }
1484 pred_last[refi]=frags[fragi].dc+=pred;
1485 ncoded_fragis++;
1486 l_ref=refi;
1487 }
1488 else l_ref=-1;
1489 ul_ref=u_ref;
1490 u_ref=ur_ref;
1491 }
1492 }
1493 }
1494 _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1495 /*Also save the number of uncoded fragments so we know how many to copy.*/
1496 _pipe->nuncoded_fragis[_pli]=
1497 (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1498 }
1499
1500 /*Reconstructs all coded fragments in a single MCU (one or two super block
1501 rows).
1502 This requires that each coded fragment have a proper macro block mode and
1503 motion vector (if not in INTRA mode), and have its DC value decoded, with
1504 the DC prediction process reversed, and the number of coded and uncoded
1505 fragments in this plane of the MCU be counted.
1506 The token lists for each color plane and coefficient should also be filled
1507 in, along with initial token offsets, extra bits offsets, and EOB run
1508 counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1509 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1510 oc_dec_pipeline_state *_pipe,int _pli){
1511 unsigned char *dct_tokens;
1512 const unsigned char *dct_fzig_zag;
1513 ogg_uint16_t dc_quant[2];
1514 const oc_fragment *frags;
1515 const ptrdiff_t *coded_fragis;
1516 ptrdiff_t ncoded_fragis;
1517 ptrdiff_t fragii;
1518 ptrdiff_t *ti;
1519 ptrdiff_t *eob_runs;
1520 int qti;
1521 dct_tokens=_dec->dct_tokens;
1522 dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1523 frags=_dec->state.frags;
1524 coded_fragis=_pipe->coded_fragis[_pli];
1525 ncoded_fragis=_pipe->ncoded_fragis[_pli];
1526 ti=_pipe->ti[_pli];
1527 eob_runs=_pipe->eob_runs[_pli];
1528 for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1529 for(fragii=0;fragii<ncoded_fragis;fragii++){
1530 const ogg_uint16_t *ac_quant;
1531 ptrdiff_t fragi;
1532 int last_zzi;
1533 int zzi;
1534 fragi=coded_fragis[fragii];
1535 qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536 ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537 /*Decode the AC coefficients.*/
1538 for(zzi=0;zzi<64;){
1539 int token;
1540 last_zzi=zzi;
1541 if(eob_runs[zzi]){
1542 eob_runs[zzi]--;
1543 break;
1544 }
1545 else{
1546 ptrdiff_t eob;
1547 int cw;
1548 int rlen;
1549 int coeff;
1550 int lti;
1551 lti=ti[zzi];
1552 token=dct_tokens[lti++];
1553 cw=OC_DCT_CODE_WORD[token];
1554 /*These parts could be done branchless, but the branches are fairly
1555 predictable and the C code translates into more than a few
1556 instructions, so it's worth it to avoid them.*/
1557 if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558 cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559 }
1560 eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561 if(token==OC_DCT_TOKEN_FAT_EOB){
1562 eob+=dct_tokens[lti++]<<8;
1563 if(eob==0)eob=OC_DCT_EOB_FINISH;
1564 }
1565 rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566 cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567 coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568 eob_runs[zzi]=eob;
1569 ti[zzi]=lti;
1570 zzi+=rlen;
1571 _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1572 (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1573 zzi+=!eob;
1574 }
1575 }
1576 /*TODO: zzi should be exactly 64 here.
1577 If it's not, we should report some kind of warning.*/
1578 zzi=OC_MINI(zzi,64);
1579 _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1580 /*last_zzi is always initialized.
1581 If your compiler thinks otherwise, it is dumb.*/
1582 oc_state_frag_recon(&_dec->state,fragi,_pli,
1583 _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1584 }
1585 _pipe->coded_fragis[_pli]+=ncoded_fragis;
1586 /*Right now the reconstructed MCU has only the coded blocks in it.*/
1587 /*TODO: We make the decision here to always copy the uncoded blocks into it
1588 from the reference frame.
1589 We could also copy the coded blocks back over the reference frame, if we
1590 wait for an additional MCU to be decoded, which might be faster if only a
1591 small number of blocks are coded.
1592 However, this introduces more latency, creating a larger cache footprint.
1593 It's unknown which decision is better, but this one results in simpler
1594 code, and the hard case (high bitrate, high resolution) is handled
1595 correctly.*/
1596 /*Copy the uncoded blocks from the previous reference frame.*/
1597 if(_pipe->nuncoded_fragis[_pli]>0){
1598 _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1599 oc_frag_copy_list(&_dec->state,
1600 _dec->state.ref_frame_data[OC_FRAME_SELF],
1601 _dec->state.ref_frame_data[OC_FRAME_PREV],
1602 _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1603 _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1604 }
1605 }
1606
1607 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1608 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1609 const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1610 int *_variance0,int *_variance1){
1611 unsigned char *rdst;
1612 const unsigned char *rsrc;
1613 unsigned char *cdst;
1614 const unsigned char *csrc;
1615 int r[10];
1616 int sum0;
1617 int sum1;
1618 int bx;
1619 int by;
1620 rdst=_dst;
1621 rsrc=_src;
1622 for(bx=0;bx<8;bx++){
1623 cdst=rdst;
1624 csrc=rsrc;
1625 for(by=0;by<10;by++){
1626 r[by]=*csrc;
1627 csrc+=_src_ystride;
1628 }
1629 sum0=sum1=0;
1630 for(by=0;by<4;by++){
1631 sum0+=abs(r[by+1]-r[by]);
1632 sum1+=abs(r[by+5]-r[by+6]);
1633 }
1634 *_variance0+=OC_MINI(255,sum0);
1635 *_variance1+=OC_MINI(255,sum1);
1636 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1637 *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1638 cdst+=_dst_ystride;
1639 *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1640 cdst+=_dst_ystride;
1641 for(by=0;by<4;by++){
1642 *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1643 r[by+4]+r[by+5]+r[by+6]+4>>3);
1644 cdst+=_dst_ystride;
1645 }
1646 *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1647 cdst+=_dst_ystride;
1648 *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1649 }
1650 else{
1651 for(by=1;by<=8;by++){
1652 *cdst=(unsigned char)r[by];
1653 cdst+=_dst_ystride;
1654 }
1655 }
1656 rdst++;
1657 rsrc++;
1658 }
1659 }
1660
1661 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1662 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1663 int _qstep,int _flimit,int *_variances){
1664 unsigned char *rdst;
1665 const unsigned char *rsrc;
1666 unsigned char *cdst;
1667 int r[10];
1668 int sum0;
1669 int sum1;
1670 int bx;
1671 int by;
1672 cdst=_dst;
1673 for(by=0;by<8;by++){
1674 rsrc=cdst-1;
1675 rdst=cdst;
1676 for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1677 sum0=sum1=0;
1678 for(bx=0;bx<4;bx++){
1679 sum0+=abs(r[bx+1]-r[bx]);
1680 sum1+=abs(r[bx+5]-r[bx+6]);
1681 }
1682 _variances[0]+=OC_MINI(255,sum0);
1683 _variances[1]+=OC_MINI(255,sum1);
1684 if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1685 *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1686 *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1687 for(bx=0;bx<4;bx++){
1688 *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1689 r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1690 }
1691 *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1692 *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1693 }
1694 cdst+=_dst_ystride;
1695 }
1696 }
1697
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1698 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1699 th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1700 int _fragy_end){
1701 oc_fragment_plane *fplane;
1702 int *variance;
1703 unsigned char *dc_qi;
1704 unsigned char *dst;
1705 const unsigned char *src;
1706 ptrdiff_t froffset;
1707 int dst_ystride;
1708 int src_ystride;
1709 int nhfrags;
1710 int width;
1711 int notstart;
1712 int notdone;
1713 int flimit;
1714 int qstep;
1715 int y_end;
1716 int y;
1717 int x;
1718 _dst+=_pli;
1719 _src+=_pli;
1720 fplane=_dec->state.fplanes+_pli;
1721 nhfrags=fplane->nhfrags;
1722 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1723 variance=_dec->variances+froffset;
1724 dc_qi=_dec->dc_qis+froffset;
1725 notstart=_fragy0>0;
1726 notdone=_fragy_end<fplane->nvfrags;
1727 /*We want to clear an extra row of variances, except at the end.*/
1728 memset(variance+(nhfrags&-notstart),0,
1729 (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1730 /*Except for the first time, we want to point to the middle of the row.*/
1731 y=(_fragy0<<3)+(notstart<<2);
1732 dst_ystride=_dst->stride;
1733 src_ystride=_src->stride;
1734 dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1735 src=_src->data+y*(ptrdiff_t)src_ystride;
1736 width=_dst->width;
1737 for(;y<4;y++){
1738 memcpy(dst,src,width*sizeof(dst[0]));
1739 dst+=dst_ystride;
1740 src+=src_ystride;
1741 }
1742 /*We also want to skip the last row in the frame for this loop.*/
1743 y_end=_fragy_end-!notdone<<3;
1744 for(;y<y_end;y+=8){
1745 qstep=_dec->pp_dc_scale[*dc_qi];
1746 flimit=(qstep*3)>>2;
1747 oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1748 qstep,flimit,variance,variance+nhfrags);
1749 variance++;
1750 dc_qi++;
1751 for(x=8;x<width;x+=8){
1752 qstep=_dec->pp_dc_scale[*dc_qi];
1753 flimit=(qstep*3)>>2;
1754 oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1755 qstep,flimit,variance,variance+nhfrags);
1756 oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1757 qstep,flimit,variance-1);
1758 variance++;
1759 dc_qi++;
1760 }
1761 dst+=dst_ystride<<3;
1762 src+=src_ystride<<3;
1763 }
1764 /*And finally, handle the last row in the frame, if it's in the range.*/
1765 if(!notdone){
1766 int height;
1767 height=_dst->height;
1768 for(;y<height;y++){
1769 memcpy(dst,src,width*sizeof(dst[0]));
1770 dst+=dst_ystride;
1771 src+=src_ystride;
1772 }
1773 /*Filter the last row of vertical block edges.*/
1774 dc_qi++;
1775 for(x=8;x<width;x+=8){
1776 qstep=_dec->pp_dc_scale[*dc_qi++];
1777 flimit=(qstep*3)>>2;
1778 oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1779 qstep,flimit,variance++);
1780 }
1781 }
1782 }
1783
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1784 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1785 int _dc_scale,int _sharp_mod,int _strong){
1786 static const unsigned char OC_MOD_MAX[2]={24,32};
1787 static const unsigned char OC_MOD_SHIFT[2]={1,0};
1788 const unsigned char *psrc;
1789 const unsigned char *src;
1790 const unsigned char *nsrc;
1791 unsigned char *dst;
1792 int vmod[72];
1793 int hmod[72];
1794 int mod_hi;
1795 int by;
1796 int bx;
1797 mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1798 dst=_idata;
1799 src=dst;
1800 psrc=src-(_ystride&-!(_b&4));
1801 for(by=0;by<9;by++){
1802 for(bx=0;bx<8;bx++){
1803 int mod;
1804 mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1805 vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1806 }
1807 psrc=src;
1808 src+=_ystride&-(!(_b&8)|by<7);
1809 }
1810 nsrc=dst;
1811 psrc=dst-!(_b&1);
1812 for(bx=0;bx<9;bx++){
1813 src=nsrc;
1814 for(by=0;by<8;by++){
1815 int mod;
1816 mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1817 hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1818 psrc+=_ystride;
1819 src+=_ystride;
1820 }
1821 psrc=nsrc;
1822 nsrc+=!(_b&2)|bx<7;
1823 }
1824 src=dst;
1825 psrc=src-(_ystride&-!(_b&4));
1826 nsrc=src+_ystride;
1827 for(by=0;by<8;by++){
1828 int a;
1829 int b;
1830 int w;
1831 a=128;
1832 b=64;
1833 w=hmod[by];
1834 a-=w;
1835 b+=w**(src-!(_b&1));
1836 w=vmod[by<<3];
1837 a-=w;
1838 b+=w*psrc[0];
1839 w=vmod[by+1<<3];
1840 a-=w;
1841 b+=w*nsrc[0];
1842 w=hmod[(1<<3)+by];
1843 a-=w;
1844 b+=w*src[1];
1845 dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1846 for(bx=1;bx<7;bx++){
1847 a=128;
1848 b=64;
1849 w=hmod[(bx<<3)+by];
1850 a-=w;
1851 b+=w*src[bx-1];
1852 w=vmod[(by<<3)+bx];
1853 a-=w;
1854 b+=w*psrc[bx];
1855 w=vmod[(by+1<<3)+bx];
1856 a-=w;
1857 b+=w*nsrc[bx];
1858 w=hmod[(bx+1<<3)+by];
1859 a-=w;
1860 b+=w*src[bx+1];
1861 dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1862 }
1863 a=128;
1864 b=64;
1865 w=hmod[(7<<3)+by];
1866 a-=w;
1867 b+=w*src[6];
1868 w=vmod[(by<<3)+7];
1869 a-=w;
1870 b+=w*psrc[7];
1871 w=vmod[(by+1<<3)+7];
1872 a-=w;
1873 b+=w*nsrc[7];
1874 w=hmod[(8<<3)+by];
1875 a-=w;
1876 b+=w*src[7+!(_b&2)];
1877 dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1878 dst+=_ystride;
1879 psrc=src;
1880 src=nsrc;
1881 nsrc+=_ystride&-(!(_b&8)|by<6);
1882 }
1883 }
1884
1885 #define OC_DERING_THRESH1 (384)
1886 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1887 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1888 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1889
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1890 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1891 int _pli,int _fragy0,int _fragy_end){
1892 th_img_plane *iplane;
1893 oc_fragment_plane *fplane;
1894 oc_fragment *frag;
1895 int *variance;
1896 unsigned char *idata;
1897 ptrdiff_t froffset;
1898 int ystride;
1899 int nhfrags;
1900 int sthresh;
1901 int strong;
1902 int y_end;
1903 int width;
1904 int height;
1905 int y;
1906 int x;
1907 iplane=_img+_pli;
1908 fplane=_dec->state.fplanes+_pli;
1909 nhfrags=fplane->nhfrags;
1910 froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1911 variance=_dec->variances+froffset;
1912 frag=_dec->state.frags+froffset;
1913 strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1914 sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1915 y=_fragy0<<3;
1916 ystride=iplane->stride;
1917 idata=iplane->data+y*(ptrdiff_t)ystride;
1918 y_end=_fragy_end<<3;
1919 width=iplane->width;
1920 height=iplane->height;
1921 for(;y<y_end;y+=8){
1922 for(x=0;x<width;x+=8){
1923 int b;
1924 int qi;
1925 int var;
1926 qi=_dec->state.qis[frag->qii];
1927 var=*variance;
1928 b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1929 if(strong&&var>sthresh){
1930 oc_dering_block(idata+x,ystride,b,
1931 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932 if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1933 !(b&2)&&variance[1]>OC_DERING_THRESH4||
1934 !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1935 !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1936 oc_dering_block(idata+x,ystride,b,
1937 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1938 oc_dering_block(idata+x,ystride,b,
1939 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940 }
1941 }
1942 else if(var>OC_DERING_THRESH2){
1943 oc_dering_block(idata+x,ystride,b,
1944 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1945 }
1946 else if(var>OC_DERING_THRESH1){
1947 oc_dering_block(idata+x,ystride,b,
1948 _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1949 }
1950 frag++;
1951 variance++;
1952 }
1953 idata+=ystride<<3;
1954 }
1955 }
1956
1957
1958
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1959 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1960 oc_dec_ctx *dec;
1961 if(_info==NULL||_setup==NULL)return NULL;
1962 dec=oc_aligned_malloc(sizeof(*dec),16);
1963 if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1964 oc_aligned_free(dec);
1965 return NULL;
1966 }
1967 dec->state.curframe_num=0;
1968 return dec;
1969 }
1970
th_decode_free(th_dec_ctx * _dec)1971 void th_decode_free(th_dec_ctx *_dec){
1972 if(_dec!=NULL){
1973 oc_dec_clear(_dec);
1974 oc_aligned_free(_dec);
1975 }
1976 }
1977
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1978 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1979 size_t _buf_sz){
1980 switch(_req){
1981 case TH_DECCTL_GET_PPLEVEL_MAX:{
1982 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1983 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1984 (*(int *)_buf)=OC_PP_LEVEL_MAX;
1985 return 0;
1986 }break;
1987 case TH_DECCTL_SET_PPLEVEL:{
1988 int pp_level;
1989 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1990 if(_buf_sz!=sizeof(int))return TH_EINVAL;
1991 pp_level=*(int *)_buf;
1992 if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1993 _dec->pp_level=pp_level;
1994 return 0;
1995 }break;
1996 case TH_DECCTL_SET_GRANPOS:{
1997 ogg_int64_t granpos;
1998 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1999 if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2000 granpos=*(ogg_int64_t *)_buf;
2001 if(granpos<0)return TH_EINVAL;
2002 _dec->state.granpos=granpos;
2003 _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2004 -_dec->state.granpos_bias;
2005 _dec->state.curframe_num=_dec->state.keyframe_num
2006 +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2007 return 0;
2008 }break;
2009 case TH_DECCTL_SET_STRIPE_CB:{
2010 th_stripe_callback *cb;
2011 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2012 if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2013 cb=(th_stripe_callback *)_buf;
2014 _dec->stripe_cb.ctx=cb->ctx;
2015 _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2016 return 0;
2017 }break;
2018 #ifdef HAVE_CAIRO
2019 case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2020 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2021 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2022 _dec->telemetry=1;
2023 _dec->telemetry_mbmode=*(int *)_buf;
2024 return 0;
2025 }break;
2026 case TH_DECCTL_SET_TELEMETRY_MV:{
2027 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2028 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2029 _dec->telemetry=1;
2030 _dec->telemetry_mv=*(int *)_buf;
2031 return 0;
2032 }break;
2033 case TH_DECCTL_SET_TELEMETRY_QI:{
2034 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036 _dec->telemetry=1;
2037 _dec->telemetry_qi=*(int *)_buf;
2038 return 0;
2039 }break;
2040 case TH_DECCTL_SET_TELEMETRY_BITS:{
2041 if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2042 if(_buf_sz!=sizeof(int))return TH_EINVAL;
2043 _dec->telemetry=1;
2044 _dec->telemetry_bits=*(int *)_buf;
2045 return 0;
2046 }break;
2047 #endif
2048 default:return TH_EIMPL;
2049 }
2050 }
2051
2052 /*We're decoding an INTER frame, but have no initialized reference
2053 buffers (i.e., decoding did not start on a key frame).
2054 We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2055 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2056 th_info *info;
2057 size_t yplane_sz;
2058 size_t cplane_sz;
2059 ptrdiff_t yoffset;
2060 int yhstride;
2061 int yheight;
2062 int chstride;
2063 int cheight;
2064 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2065 _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2066 _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2067 _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2068 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2069 _dec->state.ref_frame_data[OC_FRAME_SELF]=
2070 _dec->state.ref_frame_bufs[0][0].data;
2071 memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2072 sizeof(_dec->pp_frame_buf[0])*3);
2073 info=&_dec->state.info;
2074 yhstride=abs(_dec->state.ref_ystride[0]);
2075 yheight=info->frame_height+2*OC_UMV_PADDING;
2076 chstride=abs(_dec->state.ref_ystride[1]);
2077 cheight=yheight>>!(info->pixel_fmt&2);
2078 yplane_sz=yhstride*(size_t)yheight+16;
2079 cplane_sz=chstride*(size_t)cheight;
2080 yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2081 memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2082 }
2083
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2084 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2085 ogg_int64_t *_granpos){
2086 int ret;
2087 if(_dec==NULL||_op==NULL)return TH_EFAULT;
2088 /*A completely empty packet indicates a dropped frame and is treated exactly
2089 like an inter frame with no coded blocks.*/
2090 if(_op->bytes==0){
2091 _dec->state.frame_type=OC_INTER_FRAME;
2092 _dec->state.ntotal_coded_fragis=0;
2093 }
2094 else{
2095 oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2096 ret=oc_dec_frame_header_unpack(_dec);
2097 if(ret<0)return ret;
2098 if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2099 else oc_dec_coded_flags_unpack(_dec);
2100 }
2101 /*If there have been no reference frames, and we need one, initialize one.*/
2102 if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2103 (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2104 _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2105 oc_dec_init_dummy_frame(_dec);
2106 }
2107 /*If this was an inter frame with no coded blocks...*/
2108 if(_dec->state.ntotal_coded_fragis<=0){
2109 /*Just update the granule position and return.*/
2110 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2111 _dec->state.info.keyframe_granule_shift)
2112 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2113 _dec->state.curframe_num++;
2114 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2115 return TH_DUPFRAME;
2116 }
2117 else{
2118 th_ycbcr_buffer stripe_buf;
2119 int stripe_fragy;
2120 int refi;
2121 int pli;
2122 int notstart;
2123 int notdone;
2124 /*Select a free buffer to use for the reconstructed version of this frame.*/
2125 for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2126 refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2127 _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2128 _dec->state.ref_frame_data[OC_FRAME_SELF]=
2129 _dec->state.ref_frame_bufs[refi][0].data;
2130 #if defined(HAVE_CAIRO)
2131 _dec->telemetry_frame_bytes=_op->bytes;
2132 #endif
2133 if(_dec->state.frame_type==OC_INTRA_FRAME){
2134 _dec->state.keyframe_num=_dec->state.curframe_num;
2135 #if defined(HAVE_CAIRO)
2136 _dec->telemetry_coding_bytes=
2137 _dec->telemetry_mode_bytes=
2138 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2139 #endif
2140 }
2141 else{
2142 #if defined(HAVE_CAIRO)
2143 _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2144 #endif
2145 oc_dec_mb_modes_unpack(_dec);
2146 #if defined(HAVE_CAIRO)
2147 _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2148 #endif
2149 oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2150 #if defined(HAVE_CAIRO)
2151 _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2152 #endif
2153 }
2154 oc_dec_block_qis_unpack(_dec);
2155 #if defined(HAVE_CAIRO)
2156 _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2157 #endif
2158 oc_dec_residual_tokens_unpack(_dec);
2159 /*Update granule position.
2160 This must be done before the striped decode callbacks so that the
2161 application knows what to do with the frame data.*/
2162 _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2163 _dec->state.info.keyframe_granule_shift)
2164 +(_dec->state.curframe_num-_dec->state.keyframe_num);
2165 _dec->state.curframe_num++;
2166 if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2167 /*All of the rest of the operations -- DC prediction reversal,
2168 reconstructing coded fragments, copying uncoded fragments, loop
2169 filtering, extending borders, and out-of-loop post-processing -- should
2170 be pipelined.
2171 I.e., DC prediction reversal, reconstruction, and uncoded fragment
2172 copying are done for one or two super block rows, then loop filtering is
2173 run as far as it can, then bordering copying, then post-processing.
2174 For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2175 block rows, and one chroma.
2176 Otherwise, an MCU consists of one super block row from each plane.
2177 Inside each MCU, we perform all of the steps on one color plane before
2178 moving on to the next.
2179 After reconstruction, the additional filtering stages introduce a delay
2180 since they need some pixels from the next fragment row.
2181 Thus the actual number of decoded rows available is slightly smaller for
2182 the first MCU, and slightly larger for the last.
2183
2184 This entire process allows us to operate on the data while it is still in
2185 cache, resulting in big performance improvements.
2186 An application callback allows further application processing (blitting
2187 to video memory, color conversion, etc.) to also use the data while it's
2188 in cache.*/
2189 oc_dec_pipeline_init(_dec,&_dec->pipe);
2190 oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2191 notstart=0;
2192 notdone=1;
2193 for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2194 int avail_fragy0;
2195 int avail_fragy_end;
2196 avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2197 notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2198 for(pli=0;pli<3;pli++){
2199 oc_fragment_plane *fplane;
2200 int frag_shift;
2201 int pp_offset;
2202 int sdelay;
2203 int edelay;
2204 fplane=_dec->state.fplanes+pli;
2205 /*Compute the first and last fragment row of the current MCU for this
2206 plane.*/
2207 frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2208 _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2209 _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2210 _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2211 oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2212 oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2213 sdelay=edelay=0;
2214 if(_dec->pipe.loop_filter){
2215 sdelay+=notstart;
2216 edelay+=notdone;
2217 oc_state_loop_filter_frag_rows(&_dec->state,
2218 _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2219 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2220 }
2221 /*To fill the borders, we have an additional two pixel delay, since a
2222 fragment in the next row could filter its top edge, using two pixels
2223 from a fragment in this row.
2224 But there's no reason to delay a full fragment between the two.*/
2225 oc_state_borders_fill_rows(&_dec->state,refi,pli,
2226 (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2227 (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2228 /*Out-of-loop post-processing.*/
2229 pp_offset=3*(pli!=0);
2230 if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2231 /*Perform de-blocking in one plane.*/
2232 sdelay+=notstart;
2233 edelay+=notdone;
2234 oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2235 _dec->state.ref_frame_bufs[refi],pli,
2236 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2237 if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2238 /*Perform de-ringing in one plane.*/
2239 sdelay+=notstart;
2240 edelay+=notdone;
2241 oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2242 _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2243 }
2244 }
2245 /*If no post-processing is done, we still need to delay a row for the
2246 loop filter, thanks to the strange filtering order VP3 chose.*/
2247 else if(_dec->pipe.loop_filter){
2248 sdelay+=notstart;
2249 edelay+=notdone;
2250 }
2251 /*Compute the intersection of the available rows in all planes.
2252 If chroma is sub-sampled, the effect of each of its delays is
2253 doubled, but luma might have more post-processing filters enabled
2254 than chroma, so we don't know up front which one is the limiting
2255 factor.*/
2256 avail_fragy0=OC_MINI(avail_fragy0,
2257 _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2258 avail_fragy_end=OC_MINI(avail_fragy_end,
2259 _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2260 }
2261 if(_dec->stripe_cb.stripe_decoded!=NULL){
2262 /*The callback might want to use the FPU, so let's make sure they can.
2263 We violate all kinds of ABI restrictions by not doing this until
2264 now, but none of them actually matter since we don't use floating
2265 point ourselves.*/
2266 oc_restore_fpu(&_dec->state);
2267 /*Make the callback, ensuring we flip the sense of the "start" and
2268 "end" of the available region upside down.*/
2269 (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2270 _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2271 _dec->state.fplanes[0].nvfrags-avail_fragy0);
2272 }
2273 notstart=1;
2274 }
2275 /*Finish filling in the reference frame borders.*/
2276 for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2277 /*Update the reference frame indices.*/
2278 if(_dec->state.frame_type==OC_INTRA_FRAME){
2279 /*The new frame becomes both the previous and gold reference frames.*/
2280 _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2281 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2282 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2283 _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2284 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2285 _dec->state.ref_frame_data[OC_FRAME_SELF];
2286 }
2287 else{
2288 /*Otherwise, just replace the previous reference frame.*/
2289 _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2290 _dec->state.ref_frame_idx[OC_FRAME_SELF];
2291 _dec->state.ref_frame_data[OC_FRAME_PREV]=
2292 _dec->state.ref_frame_data[OC_FRAME_SELF];
2293 }
2294 /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2295 gamma values, if nothing else).*/
2296 oc_restore_fpu(&_dec->state);
2297 #if defined(OC_DUMP_IMAGES)
2298 /*We only dump images if there were some coded blocks.*/
2299 oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2300 #endif
2301 return 0;
2302 }
2303 }
2304
th_decode_ycbcr_out(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr)2305 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2306 if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2307 oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2308 #if defined(HAVE_CAIRO)
2309 /*If telemetry ioctls are active, we need to draw to the output buffer.
2310 Stuff the plane into cairo.*/
2311 if(_dec->telemetry){
2312 cairo_surface_t *cs;
2313 unsigned char *data;
2314 unsigned char *y_row;
2315 unsigned char *u_row;
2316 unsigned char *v_row;
2317 unsigned char *rgb_row;
2318 int cstride;
2319 int w;
2320 int h;
2321 int x;
2322 int y;
2323 int hdec;
2324 int vdec;
2325 w=_ycbcr[0].width;
2326 h=_ycbcr[0].height;
2327 hdec=!(_dec->state.info.pixel_fmt&1);
2328 vdec=!(_dec->state.info.pixel_fmt&2);
2329 /*Lazy data buffer init.
2330 We could try to re-use the post-processing buffer, which would save
2331 memory, but complicate the allocation logic there.
2332 I don't think anyone cares about memory usage when using telemetry; it is
2333 not meant for embedded devices.*/
2334 if(_dec->telemetry_frame_data==NULL){
2335 _dec->telemetry_frame_data=_ogg_malloc(
2336 (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2337 if(_dec->telemetry_frame_data==NULL)return 0;
2338 }
2339 cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2340 /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2341 data=cairo_image_surface_get_data(cs);
2342 if(data==NULL){
2343 cairo_surface_destroy(cs);
2344 return 0;
2345 }
2346 cstride=cairo_image_surface_get_stride(cs);
2347 y_row=_ycbcr[0].data;
2348 u_row=_ycbcr[1].data;
2349 v_row=_ycbcr[2].data;
2350 rgb_row=data;
2351 for(y=0;y<h;y++){
2352 for(x=0;x<w;x++){
2353 int r;
2354 int g;
2355 int b;
2356 r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2357 g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2358 -2672387*v_row[x>>hdec]+447306710)/3287200;
2359 b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2360 rgb_row[4*x+0]=OC_CLAMP255(b);
2361 rgb_row[4*x+1]=OC_CLAMP255(g);
2362 rgb_row[4*x+2]=OC_CLAMP255(r);
2363 }
2364 y_row+=_ycbcr[0].stride;
2365 u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2366 v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2367 rgb_row+=cstride;
2368 }
2369 /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2370 {
2371 cairo_t *c;
2372 const oc_fragment *frags;
2373 oc_mv *frag_mvs;
2374 const signed char *mb_modes;
2375 oc_mb_map *mb_maps;
2376 size_t nmbs;
2377 size_t mbi;
2378 int row2;
2379 int col2;
2380 int qim[3]={0,0,0};
2381 if(_dec->state.nqis==2){
2382 int bqi;
2383 bqi=_dec->state.qis[0];
2384 if(_dec->state.qis[1]>bqi)qim[1]=1;
2385 if(_dec->state.qis[1]<bqi)qim[1]=-1;
2386 }
2387 if(_dec->state.nqis==3){
2388 int bqi;
2389 int cqi;
2390 int dqi;
2391 bqi=_dec->state.qis[0];
2392 cqi=_dec->state.qis[1];
2393 dqi=_dec->state.qis[2];
2394 if(cqi>bqi&&dqi>bqi){
2395 if(dqi>cqi){
2396 qim[1]=1;
2397 qim[2]=2;
2398 }
2399 else{
2400 qim[1]=2;
2401 qim[2]=1;
2402 }
2403 }
2404 else if(cqi<bqi&&dqi<bqi){
2405 if(dqi<cqi){
2406 qim[1]=-1;
2407 qim[2]=-2;
2408 }
2409 else{
2410 qim[1]=-2;
2411 qim[2]=-1;
2412 }
2413 }
2414 else{
2415 if(cqi<bqi)qim[1]=-1;
2416 else qim[1]=1;
2417 if(dqi<bqi)qim[2]=-1;
2418 else qim[2]=1;
2419 }
2420 }
2421 c=cairo_create(cs);
2422 frags=_dec->state.frags;
2423 frag_mvs=_dec->state.frag_mvs;
2424 mb_modes=_dec->state.mb_modes;
2425 mb_maps=_dec->state.mb_maps;
2426 nmbs=_dec->state.nmbs;
2427 row2=0;
2428 col2=0;
2429 for(mbi=0;mbi<nmbs;mbi++){
2430 float x;
2431 float y;
2432 int bi;
2433 y=h-(row2+((col2+1>>1)&1))*16-16;
2434 x=(col2>>1)*16;
2435 cairo_set_line_width(c,1.);
2436 /*Keyframe (all intra) red box.*/
2437 if(_dec->state.frame_type==OC_INTRA_FRAME){
2438 if(_dec->telemetry_mbmode&0x02){
2439 cairo_set_source_rgba(c,1.,0,0,.5);
2440 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2441 cairo_stroke_preserve(c);
2442 cairo_set_source_rgba(c,1.,0,0,.25);
2443 cairo_fill(c);
2444 }
2445 }
2446 else{
2447 ptrdiff_t fragi;
2448 int frag_mvx;
2449 int frag_mvy;
2450 for(bi=0;bi<4;bi++){
2451 fragi=mb_maps[mbi][0][bi];
2452 if(fragi>=0&&frags[fragi].coded){
2453 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2454 frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2455 break;
2456 }
2457 }
2458 if(bi<4){
2459 switch(mb_modes[mbi]){
2460 case OC_MODE_INTRA:{
2461 if(_dec->telemetry_mbmode&0x02){
2462 cairo_set_source_rgba(c,1.,0,0,.5);
2463 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2464 cairo_stroke_preserve(c);
2465 cairo_set_source_rgba(c,1.,0,0,.25);
2466 cairo_fill(c);
2467 }
2468 }break;
2469 case OC_MODE_INTER_NOMV:{
2470 if(_dec->telemetry_mbmode&0x01){
2471 cairo_set_source_rgba(c,0,0,1.,.5);
2472 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2473 cairo_stroke_preserve(c);
2474 cairo_set_source_rgba(c,0,0,1.,.25);
2475 cairo_fill(c);
2476 }
2477 }break;
2478 case OC_MODE_INTER_MV:{
2479 if(_dec->telemetry_mbmode&0x04){
2480 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2481 cairo_set_source_rgba(c,0,1.,0,.5);
2482 cairo_stroke(c);
2483 }
2484 if(_dec->telemetry_mv&0x04){
2485 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2486 cairo_set_source_rgba(c,1.,1.,1.,.9);
2487 cairo_set_line_width(c,3.);
2488 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2489 cairo_stroke_preserve(c);
2490 cairo_set_line_width(c,2.);
2491 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2492 cairo_stroke_preserve(c);
2493 cairo_set_line_width(c,1.);
2494 cairo_line_to(c,x+8,y+8);
2495 cairo_stroke(c);
2496 }
2497 }break;
2498 case OC_MODE_INTER_MV_LAST:{
2499 if(_dec->telemetry_mbmode&0x08){
2500 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2501 cairo_set_source_rgba(c,0,1.,0,.5);
2502 cairo_move_to(c,x+13.5,y+2.5);
2503 cairo_line_to(c,x+2.5,y+8);
2504 cairo_line_to(c,x+13.5,y+13.5);
2505 cairo_stroke(c);
2506 }
2507 if(_dec->telemetry_mv&0x08){
2508 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2509 cairo_set_source_rgba(c,1.,1.,1.,.9);
2510 cairo_set_line_width(c,3.);
2511 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2512 cairo_stroke_preserve(c);
2513 cairo_set_line_width(c,2.);
2514 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2515 cairo_stroke_preserve(c);
2516 cairo_set_line_width(c,1.);
2517 cairo_line_to(c,x+8,y+8);
2518 cairo_stroke(c);
2519 }
2520 }break;
2521 case OC_MODE_INTER_MV_LAST2:{
2522 if(_dec->telemetry_mbmode&0x10){
2523 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2524 cairo_set_source_rgba(c,0,1.,0,.5);
2525 cairo_move_to(c,x+8,y+2.5);
2526 cairo_line_to(c,x+2.5,y+8);
2527 cairo_line_to(c,x+8,y+13.5);
2528 cairo_move_to(c,x+13.5,y+2.5);
2529 cairo_line_to(c,x+8,y+8);
2530 cairo_line_to(c,x+13.5,y+13.5);
2531 cairo_stroke(c);
2532 }
2533 if(_dec->telemetry_mv&0x10){
2534 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2535 cairo_set_source_rgba(c,1.,1.,1.,.9);
2536 cairo_set_line_width(c,3.);
2537 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2538 cairo_stroke_preserve(c);
2539 cairo_set_line_width(c,2.);
2540 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2541 cairo_stroke_preserve(c);
2542 cairo_set_line_width(c,1.);
2543 cairo_line_to(c,x+8,y+8);
2544 cairo_stroke(c);
2545 }
2546 }break;
2547 case OC_MODE_GOLDEN_NOMV:{
2548 if(_dec->telemetry_mbmode&0x20){
2549 cairo_set_source_rgba(c,1.,1.,0,.5);
2550 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2551 cairo_stroke_preserve(c);
2552 cairo_set_source_rgba(c,1.,1.,0,.25);
2553 cairo_fill(c);
2554 }
2555 }break;
2556 case OC_MODE_GOLDEN_MV:{
2557 if(_dec->telemetry_mbmode&0x40){
2558 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2559 cairo_set_source_rgba(c,1.,1.,0,.5);
2560 cairo_stroke(c);
2561 }
2562 if(_dec->telemetry_mv&0x40){
2563 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2564 cairo_set_source_rgba(c,1.,1.,1.,.9);
2565 cairo_set_line_width(c,3.);
2566 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2567 cairo_stroke_preserve(c);
2568 cairo_set_line_width(c,2.);
2569 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2570 cairo_stroke_preserve(c);
2571 cairo_set_line_width(c,1.);
2572 cairo_line_to(c,x+8,y+8);
2573 cairo_stroke(c);
2574 }
2575 }break;
2576 case OC_MODE_INTER_MV_FOUR:{
2577 if(_dec->telemetry_mbmode&0x80){
2578 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2579 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2580 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2581 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2582 cairo_set_source_rgba(c,0,1.,0,.5);
2583 cairo_stroke(c);
2584 }
2585 /*4mv is odd, coded in raster order.*/
2586 fragi=mb_maps[mbi][0][0];
2587 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2588 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2589 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2590 cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2591 cairo_set_source_rgba(c,1.,1.,1.,.9);
2592 cairo_set_line_width(c,3.);
2593 cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2594 cairo_stroke_preserve(c);
2595 cairo_set_line_width(c,2.);
2596 cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2597 cairo_stroke_preserve(c);
2598 cairo_set_line_width(c,1.);
2599 cairo_line_to(c,x+4,y+12);
2600 cairo_stroke(c);
2601 }
2602 fragi=mb_maps[mbi][0][1];
2603 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2604 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2605 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2606 cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2607 cairo_set_source_rgba(c,1.,1.,1.,.9);
2608 cairo_set_line_width(c,3.);
2609 cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2610 cairo_stroke_preserve(c);
2611 cairo_set_line_width(c,2.);
2612 cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2613 cairo_stroke_preserve(c);
2614 cairo_set_line_width(c,1.);
2615 cairo_line_to(c,x+12,y+12);
2616 cairo_stroke(c);
2617 }
2618 fragi=mb_maps[mbi][0][2];
2619 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2620 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2621 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2622 cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2623 cairo_set_source_rgba(c,1.,1.,1.,.9);
2624 cairo_set_line_width(c,3.);
2625 cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2626 cairo_stroke_preserve(c);
2627 cairo_set_line_width(c,2.);
2628 cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2629 cairo_stroke_preserve(c);
2630 cairo_set_line_width(c,1.);
2631 cairo_line_to(c,x+4,y+4);
2632 cairo_stroke(c);
2633 }
2634 fragi=mb_maps[mbi][0][3];
2635 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2636 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2637 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2638 cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2639 cairo_set_source_rgba(c,1.,1.,1.,.9);
2640 cairo_set_line_width(c,3.);
2641 cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2642 cairo_stroke_preserve(c);
2643 cairo_set_line_width(c,2.);
2644 cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2645 cairo_stroke_preserve(c);
2646 cairo_set_line_width(c,1.);
2647 cairo_line_to(c,x+12,y+4);
2648 cairo_stroke(c);
2649 }
2650 }break;
2651 }
2652 }
2653 }
2654 /*qii illustration.*/
2655 if(_dec->telemetry_qi&0x2){
2656 cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2657 for(bi=0;bi<4;bi++){
2658 ptrdiff_t fragi;
2659 int qiv;
2660 int xp;
2661 int yp;
2662 xp=x+(bi&1)*8;
2663 yp=y+8-(bi&2)*4;
2664 fragi=mb_maps[mbi][0][bi];
2665 if(fragi>=0&&frags[fragi].coded){
2666 qiv=qim[frags[fragi].qii];
2667 cairo_set_line_width(c,3.);
2668 cairo_set_source_rgba(c,0.,0.,0.,.5);
2669 switch(qiv){
2670 /*Double plus:*/
2671 case 2:{
2672 if((bi&1)^((bi&2)>>1)){
2673 cairo_move_to(c,xp+2.5,yp+1.5);
2674 cairo_line_to(c,xp+2.5,yp+3.5);
2675 cairo_move_to(c,xp+1.5,yp+2.5);
2676 cairo_line_to(c,xp+3.5,yp+2.5);
2677 cairo_move_to(c,xp+5.5,yp+4.5);
2678 cairo_line_to(c,xp+5.5,yp+6.5);
2679 cairo_move_to(c,xp+4.5,yp+5.5);
2680 cairo_line_to(c,xp+6.5,yp+5.5);
2681 cairo_stroke_preserve(c);
2682 cairo_set_source_rgba(c,0.,1.,1.,1.);
2683 }
2684 else{
2685 cairo_move_to(c,xp+5.5,yp+1.5);
2686 cairo_line_to(c,xp+5.5,yp+3.5);
2687 cairo_move_to(c,xp+4.5,yp+2.5);
2688 cairo_line_to(c,xp+6.5,yp+2.5);
2689 cairo_move_to(c,xp+2.5,yp+4.5);
2690 cairo_line_to(c,xp+2.5,yp+6.5);
2691 cairo_move_to(c,xp+1.5,yp+5.5);
2692 cairo_line_to(c,xp+3.5,yp+5.5);
2693 cairo_stroke_preserve(c);
2694 cairo_set_source_rgba(c,0.,1.,1.,1.);
2695 }
2696 }break;
2697 /*Double minus:*/
2698 case -2:{
2699 cairo_move_to(c,xp+2.5,yp+2.5);
2700 cairo_line_to(c,xp+5.5,yp+2.5);
2701 cairo_move_to(c,xp+2.5,yp+5.5);
2702 cairo_line_to(c,xp+5.5,yp+5.5);
2703 cairo_stroke_preserve(c);
2704 cairo_set_source_rgba(c,1.,1.,1.,1.);
2705 }break;
2706 /*Plus:*/
2707 case 1:{
2708 if(bi&2==0)yp-=2;
2709 if(bi&1==0)xp-=2;
2710 cairo_move_to(c,xp+4.5,yp+2.5);
2711 cairo_line_to(c,xp+4.5,yp+6.5);
2712 cairo_move_to(c,xp+2.5,yp+4.5);
2713 cairo_line_to(c,xp+6.5,yp+4.5);
2714 cairo_stroke_preserve(c);
2715 cairo_set_source_rgba(c,.1,1.,.3,1.);
2716 break;
2717 }
2718 /*Fall through.*/
2719 /*Minus:*/
2720 case -1:{
2721 cairo_move_to(c,xp+2.5,yp+4.5);
2722 cairo_line_to(c,xp+6.5,yp+4.5);
2723 cairo_stroke_preserve(c);
2724 cairo_set_source_rgba(c,1.,.3,.1,1.);
2725 }break;
2726 default:continue;
2727 }
2728 cairo_set_line_width(c,1.);
2729 cairo_stroke(c);
2730 }
2731 }
2732 }
2733 col2++;
2734 if((col2>>1)>=_dec->state.nhmbs){
2735 col2=0;
2736 row2+=2;
2737 }
2738 }
2739 /*Bit usage indicator[s]:*/
2740 if(_dec->telemetry_bits){
2741 int widths[6];
2742 int fpsn;
2743 int fpsd;
2744 int mult;
2745 int fullw;
2746 int padw;
2747 int i;
2748 fpsn=_dec->state.info.fps_numerator;
2749 fpsd=_dec->state.info.fps_denominator;
2750 mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2751 fullw=250.f*h*fpsd*mult/fpsn;
2752 padw=w-24;
2753 /*Header and coded block bits.*/
2754 if(_dec->telemetry_frame_bytes<0||
2755 _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2756 _dec->telemetry_frame_bytes=0;
2757 }
2758 if(_dec->telemetry_coding_bytes<0||
2759 _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2760 _dec->telemetry_coding_bytes=0;
2761 }
2762 if(_dec->telemetry_mode_bytes<0||
2763 _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2764 _dec->telemetry_mode_bytes=0;
2765 }
2766 if(_dec->telemetry_mv_bytes<0||
2767 _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2768 _dec->telemetry_mv_bytes=0;
2769 }
2770 if(_dec->telemetry_qi_bytes<0||
2771 _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2772 _dec->telemetry_qi_bytes=0;
2773 }
2774 if(_dec->telemetry_dc_bytes<0||
2775 _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2776 _dec->telemetry_dc_bytes=0;
2777 }
2778 widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2779 widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2780 widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2781 widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2782 widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2783 widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2784 for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2785 cairo_set_source_rgba(c,.0,.0,.0,.6);
2786 cairo_rectangle(c,10,h-33,widths[0]+1,5);
2787 cairo_rectangle(c,10,h-29,widths[1]+1,5);
2788 cairo_rectangle(c,10,h-25,widths[2]+1,5);
2789 cairo_rectangle(c,10,h-21,widths[3]+1,5);
2790 cairo_rectangle(c,10,h-17,widths[4]+1,5);
2791 cairo_rectangle(c,10,h-13,widths[5]+1,5);
2792 cairo_fill(c);
2793 cairo_set_source_rgb(c,1,0,0);
2794 cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2795 cairo_fill(c);
2796 cairo_set_source_rgb(c,0,1,0);
2797 cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2798 cairo_fill(c);
2799 cairo_set_source_rgb(c,0,0,1);
2800 cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2801 cairo_fill(c);
2802 cairo_set_source_rgb(c,.6,.4,.0);
2803 cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2804 cairo_fill(c);
2805 cairo_set_source_rgb(c,.3,.3,.3);
2806 cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2807 cairo_fill(c);
2808 cairo_set_source_rgb(c,.5,.5,.8);
2809 cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2810 cairo_fill(c);
2811 }
2812 /*Master qi indicator[s]:*/
2813 if(_dec->telemetry_qi&0x1){
2814 cairo_text_extents_t extents;
2815 char buffer[10];
2816 int p;
2817 int y;
2818 p=0;
2819 y=h-7.5;
2820 if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2821 buffer[p++]=48+_dec->state.qis[0]%10;
2822 if(_dec->state.nqis>=2){
2823 buffer[p++]=' ';
2824 if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2825 buffer[p++]=48+_dec->state.qis[1]%10;
2826 }
2827 if(_dec->state.nqis==3){
2828 buffer[p++]=' ';
2829 if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2830 buffer[p++]=48+_dec->state.qis[2]%10;
2831 }
2832 buffer[p++]='\0';
2833 cairo_select_font_face(c,"sans",
2834 CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2835 cairo_set_font_size(c,18);
2836 cairo_text_extents(c,buffer,&extents);
2837 cairo_set_source_rgb(c,1,1,1);
2838 cairo_move_to(c,w-extents.x_advance-10,y);
2839 cairo_show_text(c,buffer);
2840 cairo_set_source_rgb(c,0,0,0);
2841 cairo_move_to(c,w-extents.x_advance-10,y);
2842 cairo_text_path(c,buffer);
2843 cairo_set_line_width(c,.8);
2844 cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2845 cairo_stroke(c);
2846 }
2847 cairo_destroy(c);
2848 }
2849 /*Out of the Cairo plane into the telemetry YUV buffer.*/
2850 _ycbcr[0].data=_dec->telemetry_frame_data;
2851 _ycbcr[0].stride=_ycbcr[0].width;
2852 _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2853 _ycbcr[1].stride=_ycbcr[1].width;
2854 _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2855 _ycbcr[2].stride=_ycbcr[2].width;
2856 y_row=_ycbcr[0].data;
2857 u_row=_ycbcr[1].data;
2858 v_row=_ycbcr[2].data;
2859 rgb_row=data;
2860 /*This is one of the few places it's worth handling chroma on a
2861 case-by-case basis.*/
2862 switch(_dec->state.info.pixel_fmt){
2863 case TH_PF_420:{
2864 for(y=0;y<h;y+=2){
2865 unsigned char *y_row2;
2866 unsigned char *rgb_row2;
2867 y_row2=y_row+_ycbcr[0].stride;
2868 rgb_row2=rgb_row+cstride;
2869 for(x=0;x<w;x+=2){
2870 int y;
2871 int u;
2872 int v;
2873 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2874 +24966*rgb_row[4*x+0]+4207500)/255000;
2875 y_row[x]=OC_CLAMP255(y);
2876 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2877 +24966*rgb_row[4*x+4]+4207500)/255000;
2878 y_row[x+1]=OC_CLAMP255(y);
2879 y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2880 +24966*rgb_row2[4*x+0]+4207500)/255000;
2881 y_row2[x]=OC_CLAMP255(y);
2882 y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2883 +24966*rgb_row2[4*x+4]+4207500)/255000;
2884 y_row2[x+1]=OC_CLAMP255(y);
2885 u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2886 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2887 -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2888 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2889 +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2890 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2891 v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2892 +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2893 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2894 +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2895 -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2896 +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2897 u_row[x>>1]=OC_CLAMP255(u);
2898 v_row[x>>1]=OC_CLAMP255(v);
2899 }
2900 y_row+=_ycbcr[0].stride<<1;
2901 u_row+=_ycbcr[1].stride;
2902 v_row+=_ycbcr[2].stride;
2903 rgb_row+=cstride<<1;
2904 }
2905 }break;
2906 case TH_PF_422:{
2907 for(y=0;y<h;y++){
2908 for(x=0;x<w;x+=2){
2909 int y;
2910 int u;
2911 int v;
2912 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2913 +24966*rgb_row[4*x+0]+4207500)/255000;
2914 y_row[x]=OC_CLAMP255(y);
2915 y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2916 +24966*rgb_row[4*x+4]+4207500)/255000;
2917 y_row[x+1]=OC_CLAMP255(y);
2918 u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2919 -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2920 +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2921 v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2922 -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2923 -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2924 u_row[x>>1]=OC_CLAMP255(u);
2925 v_row[x>>1]=OC_CLAMP255(v);
2926 }
2927 y_row+=_ycbcr[0].stride;
2928 u_row+=_ycbcr[1].stride;
2929 v_row+=_ycbcr[2].stride;
2930 rgb_row+=cstride;
2931 }
2932 }break;
2933 /*case TH_PF_444:*/
2934 default:{
2935 for(y=0;y<h;y++){
2936 for(x=0;x<w;x++){
2937 int y;
2938 int u;
2939 int v;
2940 y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2941 +24966*rgb_row[4*x+0]+4207500)/255000;
2942 u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2943 +99232*rgb_row[4*x+0]+29032005)/225930;
2944 v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2945 -25536*rgb_row[4*x+0]+45940035)/357510;
2946 y_row[x]=OC_CLAMP255(y);
2947 u_row[x]=OC_CLAMP255(u);
2948 v_row[x]=OC_CLAMP255(v);
2949 }
2950 y_row+=_ycbcr[0].stride;
2951 u_row+=_ycbcr[1].stride;
2952 v_row+=_ycbcr[2].stride;
2953 rgb_row+=cstride;
2954 }
2955 }break;
2956 }
2957 /*Finished.
2958 Destroy the surface.*/
2959 cairo_surface_destroy(cs);
2960 }
2961 #endif
2962 return 0;
2963 }
2964