1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id: decode.c 17576 2010-10-29 01:07:51Z tterribe $
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29 
30 
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49 
50 
51 
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94 
95 
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112 
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118 
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123 
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126 
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131 
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144 
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151 
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155 
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167 
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296 
297 
298 
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327 
oc_block_run_unpack(oc_pack_buf * _opb)328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358 
359 
360 
oc_dec_accel_init_c(oc_dec_ctx * _dec)361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367 
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404        _dec->state.dequant_tables[qi][pli][qti][17]+
405        _dec->state.dequant_tables[qi][pli][qti][18]+
406        _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry=0;
421   _dec->telemetry_bits=0;
422   _dec->telemetry_qi=0;
423   _dec->telemetry_mbmode=0;
424   _dec->telemetry_mv=0;
425   _dec->telemetry_frame_data=NULL;
426 #endif
427   return 0;
428 }
429 
oc_dec_clear(oc_dec_ctx * _dec)430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432   _ogg_free(_dec->telemetry_frame_data);
433 #endif
434   _ogg_free(_dec->pp_frame_data);
435   _ogg_free(_dec->variances);
436   _ogg_free(_dec->dc_qis);
437   _ogg_free(_dec->dct_tokens);
438   oc_huff_trees_clear(_dec->huff_tables);
439   oc_state_clear(&_dec->state);
440 }
441 
442 
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444   long val;
445   /*Check to make sure this is a data packet.*/
446   val=oc_pack_read1(&_dec->opb);
447   if(val!=0)return TH_EBADPACKET;
448   /*Read in the frame type (I or P).*/
449   val=oc_pack_read1(&_dec->opb);
450   _dec->state.frame_type=(int)val;
451   /*Read in the qi list.*/
452   val=oc_pack_read(&_dec->opb,6);
453   _dec->state.qis[0]=(unsigned char)val;
454   val=oc_pack_read1(&_dec->opb);
455   if(!val)_dec->state.nqis=1;
456   else{
457     val=oc_pack_read(&_dec->opb,6);
458     _dec->state.qis[1]=(unsigned char)val;
459     val=oc_pack_read1(&_dec->opb);
460     if(!val)_dec->state.nqis=2;
461     else{
462       val=oc_pack_read(&_dec->opb,6);
463       _dec->state.qis[2]=(unsigned char)val;
464       _dec->state.nqis=3;
465     }
466   }
467   if(_dec->state.frame_type==OC_INTRA_FRAME){
468     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469       Most of the other unused bits in the VP3 headers were eliminated.
470       I don't know why these remain.*/
471     /*I wanted to eliminate wasted bits, but not all config wiggle room
472        --Monty.*/
473     val=oc_pack_read(&_dec->opb,3);
474     if(val!=0)return TH_EIMPL;
475   }
476   return 0;
477 }
478 
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480   This also builds up the coded fragment list (in coded order), and clears the
481    uncoded fragment list.
482   It does not update the coded macro block list nor the super block flags, as
483    those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485   const oc_sb_map   *sb_maps;
486   const oc_sb_flags *sb_flags;
487   oc_fragment       *frags;
488   ptrdiff_t         *coded_fragis;
489   ptrdiff_t          ncoded_fragis;
490   ptrdiff_t          prev_ncoded_fragis;
491   unsigned           nsbs;
492   unsigned           sbi;
493   int                pli;
494   coded_fragis=_dec->state.coded_fragis;
495   prev_ncoded_fragis=ncoded_fragis=0;
496   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497   sb_flags=_dec->state.sb_flags;
498   frags=_dec->state.frags;
499   sbi=nsbs=0;
500   for(pli=0;pli<3;pli++){
501     nsbs+=_dec->state.fplanes[pli].nsbs;
502     for(;sbi<nsbs;sbi++){
503       int quadi;
504       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505         int bi;
506         for(bi=0;bi<4;bi++){
507           ptrdiff_t fragi;
508           fragi=sb_maps[sbi][quadi][bi];
509           if(fragi>=0){
510             frags[fragi].coded=1;
511             frags[fragi].refi=OC_FRAME_SELF;
512             frags[fragi].mb_mode=OC_MODE_INTRA;
513             coded_fragis[ncoded_fragis++]=fragi;
514           }
515         }
516       }
517     }
518     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519     prev_ncoded_fragis=ncoded_fragis;
520   }
521   _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523 
524 /*Decodes the bit flags indicating whether each super block is partially coded
525    or not.
526   Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528   oc_sb_flags *sb_flags;
529   unsigned     nsbs;
530   unsigned     sbi;
531   unsigned     npartial;
532   unsigned     run_count;
533   long         val;
534   int          flag;
535   val=oc_pack_read1(&_dec->opb);
536   flag=(int)val;
537   sb_flags=_dec->state.sb_flags;
538   nsbs=_dec->state.nsbs;
539   sbi=npartial=0;
540   while(sbi<nsbs){
541     int full_run;
542     run_count=oc_sb_run_unpack(&_dec->opb);
543     full_run=run_count>=4129;
544     do{
545       sb_flags[sbi].coded_partially=flag;
546       sb_flags[sbi].coded_fully=0;
547       npartial+=flag;
548       sbi++;
549     }
550     while(--run_count>0&&sbi<nsbs);
551     if(full_run&&sbi<nsbs){
552       val=oc_pack_read1(&_dec->opb);
553       flag=(int)val;
554     }
555     else flag=!flag;
556   }
557   /*TODO: run_count should be 0 here.
558     If it's not, we should issue a warning of some kind.*/
559   return npartial;
560 }
561 
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563    block is fully coded or not.
564   This function should only be called if there is at least one
565    non-partially-coded super block.
566   Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568   oc_sb_flags *sb_flags;
569   unsigned     nsbs;
570   unsigned     sbi;
571   unsigned     run_count;
572   long         val;
573   int          flag;
574   sb_flags=_dec->state.sb_flags;
575   nsbs=_dec->state.nsbs;
576   /*Skip partially coded super blocks.*/
577   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578   val=oc_pack_read1(&_dec->opb);
579   flag=(int)val;
580   do{
581     int full_run;
582     run_count=oc_sb_run_unpack(&_dec->opb);
583     full_run=run_count>=4129;
584     for(;sbi<nsbs;sbi++){
585       if(sb_flags[sbi].coded_partially)continue;
586       if(run_count--<=0)break;
587       sb_flags[sbi].coded_fully=flag;
588     }
589     if(full_run&&sbi<nsbs){
590       val=oc_pack_read1(&_dec->opb);
591       flag=(int)val;
592     }
593     else flag=!flag;
594   }
595   while(sbi<nsbs);
596   /*TODO: run_count should be 0 here.
597     If it's not, we should issue a warning of some kind.*/
598 }
599 
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601   const oc_sb_map   *sb_maps;
602   const oc_sb_flags *sb_flags;
603   signed char       *mb_modes;
604   oc_fragment       *frags;
605   unsigned           nsbs;
606   unsigned           sbi;
607   unsigned           npartial;
608   long               val;
609   int                pli;
610   int                flag;
611   int                run_count;
612   ptrdiff_t         *coded_fragis;
613   ptrdiff_t         *uncoded_fragis;
614   ptrdiff_t          ncoded_fragis;
615   ptrdiff_t          nuncoded_fragis;
616   ptrdiff_t          prev_ncoded_fragis;
617   npartial=oc_dec_partial_sb_flags_unpack(_dec);
618   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619   if(npartial>0){
620     val=oc_pack_read1(&_dec->opb);
621     flag=!(int)val;
622   }
623   else flag=0;
624   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625   sb_flags=_dec->state.sb_flags;
626   mb_modes=_dec->state.mb_modes;
627   frags=_dec->state.frags;
628   sbi=nsbs=run_count=0;
629   coded_fragis=_dec->state.coded_fragis;
630   uncoded_fragis=coded_fragis+_dec->state.nfrags;
631   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632   for(pli=0;pli<3;pli++){
633     nsbs+=_dec->state.fplanes[pli].nsbs;
634     for(;sbi<nsbs;sbi++){
635       int quadi;
636       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637         int quad_coded;
638         int bi;
639         quad_coded=0;
640         for(bi=0;bi<4;bi++){
641           ptrdiff_t fragi;
642           fragi=sb_maps[sbi][quadi][bi];
643           if(fragi>=0){
644             int coded;
645             if(sb_flags[sbi].coded_fully)coded=1;
646             else if(!sb_flags[sbi].coded_partially)coded=0;
647             else{
648               if(run_count<=0){
649                 run_count=oc_block_run_unpack(&_dec->opb);
650                 flag=!flag;
651               }
652               run_count--;
653               coded=flag;
654             }
655             if(coded)coded_fragis[ncoded_fragis++]=fragi;
656             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657             quad_coded|=coded;
658             frags[fragi].coded=coded;
659             frags[fragi].refi=OC_FRAME_NONE;
660           }
661         }
662         /*Remember if there's a coded luma block in this macro block.*/
663         if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664       }
665     }
666     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667     prev_ncoded_fragis=ncoded_fragis;
668   }
669   _dec->state.ntotal_coded_fragis=ncoded_fragis;
670   /*TODO: run_count should be 0 here.
671     If it's not, we should issue a warning of some kind.*/
672 }
673 
674 
675 /*Coding scheme:
676    Codeword            Mode Index
677    0                       0
678    10                      1
679    110                     2
680    1110                    3
681    11110                   4
682    111110                  5
683    1111110                 6
684    1111111                 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686   4,
687    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691     3,
692      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695 
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697   3,
698    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701 
702 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704   signed char         *mb_modes;
705   const unsigned char *alphabet;
706   unsigned char        scheme0_alphabet[8];
707   const ogg_int16_t   *mode_tree;
708   size_t               nmbs;
709   size_t               mbi;
710   long                 val;
711   int                  mode_scheme;
712   val=oc_pack_read(&_dec->opb,3);
713   mode_scheme=(int)val;
714   if(mode_scheme==0){
715     int mi;
716     /*Just in case, initialize the modes to something.
717       If the bitstream doesn't contain each index exactly once, it's likely
718        corrupt and the rest of the packet is garbage anyway, but this way we
719        won't crash, and we'll decode SOMETHING.*/
720     /*LOOP VECTORIZES*/
721     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722     for(mi=0;mi<OC_NMODES;mi++){
723       val=oc_pack_read(&_dec->opb,3);
724       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725     }
726     alphabet=scheme0_alphabet;
727   }
728   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730   mb_modes=_dec->state.mb_modes;
731   nmbs=_dec->state.nmbs;
732   for(mbi=0;mbi<nmbs;mbi++){
733     if(mb_modes[mbi]>0){
734       /*We have a coded luma block; decode a mode.*/
735       mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736     }
737     /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738        fact that OC_MODE_INTER_NOMV is already 0.*/
739   }
740 }
741 
742 
743 
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745   5,
746    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751    33,          36,          39,          42,
752    45,          50,          55,          60,
753    65,          74,          83,          92,
754     1,-(1<<8|32+4),-(1<<8|32-4),
755     1,-(1<<8|32+5),-(1<<8|32-5),
756     1,-(1<<8|32+6),-(1<<8|32-6),
757     1,-(1<<8|32+7),-(1<<8|32-7),
758     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762     3,
763      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765     3,
766      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768     3,
769      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771     3,
772      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775 
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777   6,
778    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795 
796 
oc_mv_unpack(oc_pack_buf * _opb,const ogg_int16_t * _tree)797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798   int dx;
799   int dy;
800   dx=oc_huff_token_decode(_opb,_tree)-32;
801   dy=oc_huff_token_decode(_opb,_tree)-32;
802   return OC_MV(dx,dy);
803 }
804 
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806    block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808   const oc_mb_map        *mb_maps;
809   const signed char      *mb_modes;
810   oc_set_chroma_mvs_func  set_chroma_mvs;
811   const ogg_int16_t      *mv_comp_tree;
812   oc_fragment            *frags;
813   oc_mv                  *frag_mvs;
814   const unsigned char    *map_idxs;
815   int                     map_nidxs;
816   oc_mv                   last_mv;
817   oc_mv                   prior_mv;
818   oc_mv                   cbmvs[4];
819   size_t                  nmbs;
820   size_t                  mbi;
821   long                    val;
822   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823   val=oc_pack_read1(&_dec->opb);
824   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827   prior_mv=last_mv=0;
828   frags=_dec->state.frags;
829   frag_mvs=_dec->state.frag_mvs;
830   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831   mb_modes=_dec->state.mb_modes;
832   nmbs=_dec->state.nmbs;
833   for(mbi=0;mbi<nmbs;mbi++){
834     int mb_mode;
835     mb_mode=mb_modes[mbi];
836     if(mb_mode!=OC_MODE_INVALID){
837       oc_mv     mbmv;
838       ptrdiff_t fragi;
839       int       mapi;
840       int       mapii;
841       int       refi;
842       if(mb_mode==OC_MODE_INTER_MV_FOUR){
843         oc_mv lbmvs[4];
844         int   bi;
845         prior_mv=last_mv;
846         for(bi=0;bi<4;bi++){
847           fragi=mb_maps[mbi][0][bi];
848           if(frags[fragi].coded){
849             frags[fragi].refi=OC_FRAME_PREV;
850             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851             lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852             frag_mvs[fragi]=lbmvs[bi];
853           }
854           else lbmvs[bi]=0;
855         }
856         (*set_chroma_mvs)(cbmvs,lbmvs);
857         for(mapii=4;mapii<map_nidxs;mapii++){
858           mapi=map_idxs[mapii];
859           bi=mapi&3;
860           fragi=mb_maps[mbi][mapi>>2][bi];
861           if(frags[fragi].coded){
862             frags[fragi].refi=OC_FRAME_PREV;
863             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864             frag_mvs[fragi]=cbmvs[bi];
865           }
866         }
867       }
868       else{
869         switch(mb_mode){
870           case OC_MODE_INTER_MV:{
871             prior_mv=last_mv;
872             last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873           }break;
874           case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875           case OC_MODE_INTER_MV_LAST2:{
876             mbmv=prior_mv;
877             prior_mv=last_mv;
878             last_mv=mbmv;
879           }break;
880           case OC_MODE_GOLDEN_MV:{
881             mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882           }break;
883           default:mbmv=0;break;
884         }
885         /*Fill in the MVs for the fragments.*/
886         refi=OC_FRAME_FOR_MODE(mb_mode);
887         mapii=0;
888         do{
889           mapi=map_idxs[mapii];
890           fragi=mb_maps[mbi][mapi>>2][mapi&3];
891           if(frags[fragi].coded){
892             frags[fragi].refi=refi;
893             frags[fragi].mb_mode=mb_mode;
894             frag_mvs[fragi]=mbmv;
895           }
896         }
897         while(++mapii<map_nidxs);
898       }
899     }
900   }
901 }
902 
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904   oc_fragment     *frags;
905   const ptrdiff_t *coded_fragis;
906   ptrdiff_t        ncoded_fragis;
907   ptrdiff_t        fragii;
908   ptrdiff_t        fragi;
909   ncoded_fragis=_dec->state.ntotal_coded_fragis;
910   if(ncoded_fragis<=0)return;
911   frags=_dec->state.frags;
912   coded_fragis=_dec->state.coded_fragis;
913   if(_dec->state.nqis==1){
914     /*If this frame has only a single qi value, then just use it for all coded
915        fragments.*/
916     for(fragii=0;fragii<ncoded_fragis;fragii++){
917       frags[coded_fragis[fragii]].qii=0;
918     }
919   }
920   else{
921     long val;
922     int  flag;
923     int  nqi1;
924     int  run_count;
925     /*Otherwise, we decode a qi index for each fragment, using two passes of
926       the same binary RLE scheme used for super-block coded bits.
927      The first pass marks each fragment as having a qii of 0 or greater than
928       0, and the second pass (if necessary), distinguishes between a qii of
929       1 and 2.
930      At first we just store the qii in the fragment.
931      After all the qii's are decoded, we make a final pass to replace them
932       with the corresponding qi's for this frame.*/
933     val=oc_pack_read1(&_dec->opb);
934     flag=(int)val;
935     nqi1=0;
936     fragii=0;
937     while(fragii<ncoded_fragis){
938       int full_run;
939       run_count=oc_sb_run_unpack(&_dec->opb);
940       full_run=run_count>=4129;
941       do{
942         frags[coded_fragis[fragii++]].qii=flag;
943         nqi1+=flag;
944       }
945       while(--run_count>0&&fragii<ncoded_fragis);
946       if(full_run&&fragii<ncoded_fragis){
947         val=oc_pack_read1(&_dec->opb);
948         flag=(int)val;
949       }
950       else flag=!flag;
951     }
952     /*TODO: run_count should be 0 here.
953       If it's not, we should issue a warning of some kind.*/
954     /*If we have 3 different qi's for this frame, and there was at least one
955        fragment with a non-zero qi, make the second pass.*/
956     if(_dec->state.nqis==3&&nqi1>0){
957       /*Skip qii==0 fragments.*/
958       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959       val=oc_pack_read1(&_dec->opb);
960       flag=(int)val;
961       do{
962         int full_run;
963         run_count=oc_sb_run_unpack(&_dec->opb);
964         full_run=run_count>=4129;
965         for(;fragii<ncoded_fragis;fragii++){
966           fragi=coded_fragis[fragii];
967           if(frags[fragi].qii==0)continue;
968           if(run_count--<=0)break;
969           frags[fragi].qii+=flag;
970         }
971         if(full_run&&fragii<ncoded_fragis){
972           val=oc_pack_read1(&_dec->opb);
973           flag=(int)val;
974         }
975         else flag=!flag;
976       }
977       while(fragii<ncoded_fragis);
978       /*TODO: run_count should be 0 here.
979         If it's not, we should issue a warning of some kind.*/
980     }
981   }
982 }
983 
984 
985 
986 /*Unpacks the DC coefficient tokens.
987   Unlike when unpacking the AC coefficient tokens, we actually need to decode
988    the DC coefficient values now so that we can do DC prediction.
989   _huff_idx:   The index of the Huffman table to use for each color plane.
990   _ntoks_left: The number of tokens left to be decoded in each color plane for
991                 each coefficient.
992                This is updated as EOB tokens and zero run tokens are decoded.
993   Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995  ptrdiff_t _ntoks_left[3][64]){
996   unsigned char   *dct_tokens;
997   oc_fragment     *frags;
998   const ptrdiff_t *coded_fragis;
999   ptrdiff_t        ncoded_fragis;
1000   ptrdiff_t        fragii;
1001   ptrdiff_t        eobs;
1002   ptrdiff_t        ti;
1003   int              pli;
1004   dct_tokens=_dec->dct_tokens;
1005   frags=_dec->state.frags;
1006   coded_fragis=_dec->state.coded_fragis;
1007   ncoded_fragis=fragii=eobs=ti=0;
1008   for(pli=0;pli<3;pli++){
1009     ptrdiff_t run_counts[64];
1010     ptrdiff_t eob_count;
1011     ptrdiff_t eobi;
1012     int       rli;
1013     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014     memset(run_counts,0,sizeof(run_counts));
1015     _dec->eob_runs[pli][0]=eobs;
1016     _dec->ti0[pli][0]=ti;
1017     /*Continue any previous EOB run, if there was one.*/
1018     eobi=eobs;
1019     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020     eob_count=eobi;
1021     eobs-=eobi;
1022     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023     while(fragii<ncoded_fragis){
1024       int token;
1025       int cw;
1026       int eb;
1027       int skip;
1028       token=oc_huff_token_decode(&_dec->opb,
1029        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030       dct_tokens[ti++]=(unsigned char)token;
1031       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032         eb=(int)oc_pack_read(&_dec->opb,
1033          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034         dct_tokens[ti++]=(unsigned char)eb;
1035         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036         eb<<=OC_DCT_TOKEN_EB_POS(token);
1037       }
1038       else eb=0;
1039       cw=OC_DCT_CODE_WORD[token]+eb;
1040       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042       if(eobs){
1043         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044         eob_count+=eobi;
1045         eobs-=eobi;
1046         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047       }
1048       else{
1049         int coeff;
1050         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053         if(skip)coeff=0;
1054         run_counts[skip]++;
1055         frags[coded_fragis[fragii++]].dc=coeff;
1056       }
1057     }
1058     /*Add the total EOB count to the longest run length.*/
1059     run_counts[63]+=eob_count;
1060     /*And convert the run_counts array to a moment table.*/
1061     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062     /*Finally, subtract off the number of coefficients that have been
1063        accounted for by runs started in this coefficient.*/
1064     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065   }
1066   _dec->dct_tokens_count=ti;
1067   return eobs;
1068 }
1069 
1070 /*Unpacks the AC coefficient tokens.
1071   This can completely discard coefficient values while unpacking, and so is
1072    somewhat simpler than unpacking the DC coefficient tokens.
1073   _huff_idx:   The index of the Huffman table to use for each color plane.
1074   _ntoks_left: The number of tokens left to be decoded in each color plane for
1075                 each coefficient.
1076                This is updated as EOB tokens and zero run tokens are decoded.
1077   _eobs:       The length of any outstanding EOB run from previous
1078                 coefficients.
1079   Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082   unsigned char *dct_tokens;
1083   ptrdiff_t      ti;
1084   int            pli;
1085   dct_tokens=_dec->dct_tokens;
1086   ti=_dec->dct_tokens_count;
1087   for(pli=0;pli<3;pli++){
1088     ptrdiff_t run_counts[64];
1089     ptrdiff_t eob_count;
1090     size_t    ntoks_left;
1091     size_t    ntoks;
1092     int       rli;
1093     _dec->eob_runs[pli][_zzi]=_eobs;
1094     _dec->ti0[pli][_zzi]=ti;
1095     ntoks_left=_ntoks_left[pli][_zzi];
1096     memset(run_counts,0,sizeof(run_counts));
1097     eob_count=0;
1098     ntoks=0;
1099     while(ntoks+_eobs<ntoks_left){
1100       int token;
1101       int cw;
1102       int eb;
1103       int skip;
1104       ntoks+=_eobs;
1105       eob_count+=_eobs;
1106       token=oc_huff_token_decode(&_dec->opb,
1107        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108       dct_tokens[ti++]=(unsigned char)token;
1109       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110         eb=(int)oc_pack_read(&_dec->opb,
1111          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112         dct_tokens[ti++]=(unsigned char)eb;
1113         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114         eb<<=OC_DCT_TOKEN_EB_POS(token);
1115       }
1116       else eb=0;
1117       cw=OC_DCT_CODE_WORD[token]+eb;
1118       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121       if(_eobs==0){
1122         run_counts[skip]++;
1123         ntoks++;
1124       }
1125     }
1126     /*Add the portion of the last EOB run actually used by this coefficient.*/
1127     eob_count+=ntoks_left-ntoks;
1128     /*And remove it from the remaining EOB count.*/
1129     _eobs-=ntoks_left-ntoks;
1130     /*Add the total EOB count to the longest run length.*/
1131     run_counts[63]+=eob_count;
1132     /*And convert the run_counts array to a moment table.*/
1133     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134     /*Finally, subtract off the number of coefficients that have been
1135        accounted for by runs started in this coefficient.*/
1136     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137   }
1138   _dec->dct_tokens_count=ti;
1139   return _eobs;
1140 }
1141 
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143    stored in the bitstream grouped by coefficient, not by fragment.
1144 
1145   This means that we either decode all the tokens in order, building up a
1146    separate coefficient list for each fragment as we go, and then go back and
1147    do the iDCT on each fragment, or we have to create separate lists of tokens
1148    for each coefficient, so that we can pull the next token required off the
1149    head of the appropriate list when decoding a specific fragment.
1150 
1151   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152    decoded coefficient values.
1153 
1154   We take the second option, which lets us store just one to three bytes per
1155    token (generally far fewer than the number of coefficients, due to EOB
1156    tokens and zero runs), and which requires us to only maintain a counter for
1157    each of the 64 coefficients, instead of a counter for every fragment to
1158    determine where the next token goes.
1159 
1160   We actually use 3 counters per coefficient, one for each color plane, so we
1161    can decode all color planes simultaneously.
1162   This lets color conversion, etc., be done as soon as a full MCU (one or
1163    two super block rows) is decoded, while the image data is still in cache.*/
1164 
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167   ptrdiff_t  ntoks_left[3][64];
1168   int        huff_idxs[2];
1169   ptrdiff_t  eobs;
1170   long       val;
1171   int        pli;
1172   int        zzi;
1173   int        hgi;
1174   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176   }
1177   val=oc_pack_read(&_dec->opb,4);
1178   huff_idxs[0]=(int)val;
1179   val=oc_pack_read(&_dec->opb,4);
1180   huff_idxs[1]=(int)val;
1181   _dec->eob_runs[0][0]=0;
1182   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186   val=oc_pack_read(&_dec->opb,4);
1187   huff_idxs[0]=(int)val;
1188   val=oc_pack_read(&_dec->opb,4);
1189   huff_idxs[1]=(int)val;
1190   zzi=1;
1191   for(hgi=1;hgi<5;hgi++){
1192     huff_idxs[0]+=16;
1193     huff_idxs[1]+=16;
1194     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196     }
1197   }
1198   /*TODO: eobs should be exactly zero, or 4096 or greater.
1199     The second case occurs when an EOB run of size zero is encountered, which
1200      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201     If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203 
1204 
oc_dec_postprocess_init(oc_dec_ctx * _dec)1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206   /*pp_level 0: disabled; free any memory used and return*/
1207   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1208     if(_dec->dc_qis!=NULL){
1209       _ogg_free(_dec->dc_qis);
1210       _dec->dc_qis=NULL;
1211       _ogg_free(_dec->variances);
1212       _dec->variances=NULL;
1213       _ogg_free(_dec->pp_frame_data);
1214       _dec->pp_frame_data=NULL;
1215     }
1216     return 1;
1217   }
1218   if(_dec->dc_qis==NULL){
1219     /*If we haven't been tracking DC quantization indices, there's no point in
1220        starting now.*/
1221     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1222     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1223      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1224     if(_dec->dc_qis==NULL)return 1;
1225     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1226   }
1227   else{
1228     unsigned char   *dc_qis;
1229     const ptrdiff_t *coded_fragis;
1230     ptrdiff_t        ncoded_fragis;
1231     ptrdiff_t        fragii;
1232     unsigned char    qi0;
1233     /*Update the DC quantization index of each coded block.*/
1234     dc_qis=_dec->dc_qis;
1235     coded_fragis=_dec->state.coded_fragis;
1236     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1237      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1238     qi0=(unsigned char)_dec->state.qis[0];
1239     for(fragii=0;fragii<ncoded_fragis;fragii++){
1240       dc_qis[coded_fragis[fragii]]=qi0;
1241     }
1242   }
1243   /*pp_level 1: Stop after updating DC quantization indices.*/
1244   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1245     if(_dec->variances!=NULL){
1246       _ogg_free(_dec->variances);
1247       _dec->variances=NULL;
1248       _ogg_free(_dec->pp_frame_data);
1249       _dec->pp_frame_data=NULL;
1250     }
1251     return 1;
1252   }
1253   if(_dec->variances==NULL){
1254     size_t frame_sz;
1255     size_t c_sz;
1256     int    c_w;
1257     int    c_h;
1258     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1259     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1260     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1261     c_sz=c_w*(size_t)c_h;
1262     /*Allocate space for the chroma planes, even if we're not going to use
1263        them; this simplifies allocation state management, though it may waste
1264        memory on the few systems that don't overcommit pages.*/
1265     frame_sz+=c_sz<<1;
1266     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1267      frame_sz*sizeof(_dec->pp_frame_data[0]));
1268     _dec->variances=(int *)_ogg_malloc(
1269      _dec->state.nfrags*sizeof(_dec->variances[0]));
1270     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1271       _ogg_free(_dec->pp_frame_data);
1272       _dec->pp_frame_data=NULL;
1273       _ogg_free(_dec->variances);
1274       _dec->variances=NULL;
1275       return 1;
1276     }
1277     /*Force an update of the PP buffer pointers.*/
1278     _dec->pp_frame_state=0;
1279   }
1280   /*Update the PP buffer pointers if necessary.*/
1281   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1282     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1283       /*If chroma processing is disabled, just use the PP luma plane.*/
1284       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1285       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1286       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1287       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1288        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1289     }
1290     else{
1291       size_t y_sz;
1292       size_t c_sz;
1293       int    c_w;
1294       int    c_h;
1295       /*Otherwise, set up pointers to all three PP planes.*/
1296       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1297       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1298       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1299       c_sz=c_w*(size_t)c_h;
1300       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1301       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1302       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1303       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1304       _dec->pp_frame_buf[1].width=c_w;
1305       _dec->pp_frame_buf[1].height=c_h;
1306       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1307       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1308       _dec->pp_frame_buf[2].width=c_w;
1309       _dec->pp_frame_buf[2].height=c_h;
1310       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1311       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1312       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1313     }
1314     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1315   }
1316   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1317   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1318     memcpy(_dec->pp_frame_buf+1,
1319      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1320      sizeof(_dec->pp_frame_buf[1])*2);
1321   }
1322   return 0;
1323 }
1324 
1325 
1326 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1327 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1328  oc_dec_pipeline_state *_pipe){
1329   const ptrdiff_t *coded_fragis;
1330   const ptrdiff_t *uncoded_fragis;
1331   int              flimit;
1332   int              pli;
1333   int              qii;
1334   int              qti;
1335   int              zzi;
1336   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1337      super block rows of Y' for each super block row of Cb and Cr.*/
1338   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1339   /*Initialize the token and extra bits indices for each plane and
1340      coefficient.*/
1341   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1342   /*Also copy over the initial the EOB run counts.*/
1343   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1344   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1345   coded_fragis=_dec->state.coded_fragis;
1346   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1347   for(pli=0;pli<3;pli++){
1348     ptrdiff_t ncoded_fragis;
1349     _pipe->coded_fragis[pli]=coded_fragis;
1350     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1351     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1352     coded_fragis+=ncoded_fragis;
1353     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1354   }
1355   /*Set up condensed quantizer tables.*/
1356   for(pli=0;pli<3;pli++){
1357     for(qii=0;qii<_dec->state.nqis;qii++){
1358       for(qti=0;qti<2;qti++){
1359         _pipe->dequant[pli][qii][qti]=
1360          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1361       }
1362     }
1363   }
1364   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1365   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1366   /*Initialize the bounding value array for the loop filter.*/
1367   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1368   _pipe->loop_filter=flimit!=0;
1369   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1370   /*Initialize any buffers needed for post-processing.
1371     We also save the current post-processing level, to guard against the user
1372      changing it from a callback.*/
1373   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1374   /*If we don't have enough information to post-process, disable it, regardless
1375      of the user-requested level.*/
1376   else{
1377     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1378     memcpy(_dec->pp_frame_buf,
1379      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1380      sizeof(_dec->pp_frame_buf[0])*3);
1381   }
1382   /*Clear down the DCT coefficient buffer for the first block.*/
1383   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1384 }
1385 
1386 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1387    rows).
1388   As a side effect, the number of coded and uncoded fragments in this plane of
1389    the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1390 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1391  oc_dec_pipeline_state *_pipe,int _pli){
1392   const oc_fragment_plane *fplane;
1393   oc_fragment             *frags;
1394   int                     *pred_last;
1395   ptrdiff_t                ncoded_fragis;
1396   ptrdiff_t                fragi;
1397   int                      fragx;
1398   int                      fragy;
1399   int                      fragy0;
1400   int                      fragy_end;
1401   int                      nhfrags;
1402   /*Compute the first and last fragment row of the current MCU for this
1403      plane.*/
1404   fplane=_dec->state.fplanes+_pli;
1405   fragy0=_pipe->fragy0[_pli];
1406   fragy_end=_pipe->fragy_end[_pli];
1407   nhfrags=fplane->nhfrags;
1408   pred_last=_pipe->pred_last[_pli];
1409   frags=_dec->state.frags;
1410   ncoded_fragis=0;
1411   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1412   for(fragy=fragy0;fragy<fragy_end;fragy++){
1413     if(fragy==0){
1414       /*For the first row, all of the cases reduce to just using the previous
1415          predictor for the same reference frame.*/
1416       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1417         if(frags[fragi].coded){
1418           int refi;
1419           refi=frags[fragi].refi;
1420           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1421           ncoded_fragis++;
1422         }
1423       }
1424     }
1425     else{
1426       oc_fragment *u_frags;
1427       int          l_ref;
1428       int          ul_ref;
1429       int          u_ref;
1430       u_frags=frags-nhfrags;
1431       l_ref=-1;
1432       ul_ref=-1;
1433       u_ref=u_frags[fragi].refi;
1434       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1435         int ur_ref;
1436         if(fragx+1>=nhfrags)ur_ref=-1;
1437         else ur_ref=u_frags[fragi+1].refi;
1438         if(frags[fragi].coded){
1439           int pred;
1440           int refi;
1441           refi=frags[fragi].refi;
1442           /*We break out a separate case based on which of our neighbors use
1443              the same reference frames.
1444             This is somewhat faster than trying to make a generic case which
1445              handles all of them, since it reduces lots of poorly predicted
1446              jumps to one switch statement, and also lets a number of the
1447              multiplications be optimized out by strength reduction.*/
1448           switch((l_ref==refi)|(ul_ref==refi)<<1|
1449            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1450             default:pred=pred_last[refi];break;
1451             case  1:
1452             case  3:pred=frags[fragi-1].dc;break;
1453             case  2:pred=u_frags[fragi-1].dc;break;
1454             case  4:
1455             case  6:
1456             case 12:pred=u_frags[fragi].dc;break;
1457             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1458             case  8:pred=u_frags[fragi+1].dc;break;
1459             case  9:
1460             case 11:
1461             case 13:{
1462               /*The TI compiler mis-compiles this line.*/
1463               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1464             }break;
1465             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1466             case 14:{
1467               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1468                +10*u_frags[fragi].dc)/16;
1469             }break;
1470             case  7:
1471             case 15:{
1472               int p0;
1473               int p1;
1474               int p2;
1475               p0=frags[fragi-1].dc;
1476               p1=u_frags[fragi-1].dc;
1477               p2=u_frags[fragi].dc;
1478               pred=(29*(p0+p2)-26*p1)/32;
1479               if(abs(pred-p2)>128)pred=p2;
1480               else if(abs(pred-p0)>128)pred=p0;
1481               else if(abs(pred-p1)>128)pred=p1;
1482             }break;
1483           }
1484           pred_last[refi]=frags[fragi].dc+=pred;
1485           ncoded_fragis++;
1486           l_ref=refi;
1487         }
1488         else l_ref=-1;
1489         ul_ref=u_ref;
1490         u_ref=ur_ref;
1491       }
1492     }
1493   }
1494   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1495   /*Also save the number of uncoded fragments so we know how many to copy.*/
1496   _pipe->nuncoded_fragis[_pli]=
1497    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1498 }
1499 
1500 /*Reconstructs all coded fragments in a single MCU (one or two super block
1501    rows).
1502   This requires that each coded fragment have a proper macro block mode and
1503    motion vector (if not in INTRA mode), and have its DC value decoded, with
1504    the DC prediction process reversed, and the number of coded and uncoded
1505    fragments in this plane of the MCU be counted.
1506   The token lists for each color plane and coefficient should also be filled
1507    in, along with initial token offsets, extra bits offsets, and EOB run
1508    counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1509 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1510  oc_dec_pipeline_state *_pipe,int _pli){
1511   unsigned char       *dct_tokens;
1512   const unsigned char *dct_fzig_zag;
1513   ogg_uint16_t         dc_quant[2];
1514   const oc_fragment   *frags;
1515   const ptrdiff_t     *coded_fragis;
1516   ptrdiff_t            ncoded_fragis;
1517   ptrdiff_t            fragii;
1518   ptrdiff_t           *ti;
1519   ptrdiff_t           *eob_runs;
1520   int                  qti;
1521   dct_tokens=_dec->dct_tokens;
1522   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1523   frags=_dec->state.frags;
1524   coded_fragis=_pipe->coded_fragis[_pli];
1525   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1526   ti=_pipe->ti[_pli];
1527   eob_runs=_pipe->eob_runs[_pli];
1528   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1529   for(fragii=0;fragii<ncoded_fragis;fragii++){
1530     const ogg_uint16_t *ac_quant;
1531     ptrdiff_t           fragi;
1532     int                 last_zzi;
1533     int                 zzi;
1534     fragi=coded_fragis[fragii];
1535     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537     /*Decode the AC coefficients.*/
1538     for(zzi=0;zzi<64;){
1539       int token;
1540       last_zzi=zzi;
1541       if(eob_runs[zzi]){
1542         eob_runs[zzi]--;
1543         break;
1544       }
1545       else{
1546         ptrdiff_t eob;
1547         int       cw;
1548         int       rlen;
1549         int       coeff;
1550         int       lti;
1551         lti=ti[zzi];
1552         token=dct_tokens[lti++];
1553         cw=OC_DCT_CODE_WORD[token];
1554         /*These parts could be done branchless, but the branches are fairly
1555            predictable and the C code translates into more than a few
1556            instructions, so it's worth it to avoid them.*/
1557         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559         }
1560         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561         if(token==OC_DCT_TOKEN_FAT_EOB){
1562           eob+=dct_tokens[lti++]<<8;
1563           if(eob==0)eob=OC_DCT_EOB_FINISH;
1564         }
1565         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568         eob_runs[zzi]=eob;
1569         ti[zzi]=lti;
1570         zzi+=rlen;
1571         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1572          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1573         zzi+=!eob;
1574       }
1575     }
1576     /*TODO: zzi should be exactly 64 here.
1577       If it's not, we should report some kind of warning.*/
1578     zzi=OC_MINI(zzi,64);
1579     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1580     /*last_zzi is always initialized.
1581       If your compiler thinks otherwise, it is dumb.*/
1582     oc_state_frag_recon(&_dec->state,fragi,_pli,
1583      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1584   }
1585   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1586   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1587   /*TODO: We make the decision here to always copy the uncoded blocks into it
1588      from the reference frame.
1589     We could also copy the coded blocks back over the reference frame, if we
1590      wait for an additional MCU to be decoded, which might be faster if only a
1591      small number of blocks are coded.
1592     However, this introduces more latency, creating a larger cache footprint.
1593     It's unknown which decision is better, but this one results in simpler
1594      code, and the hard case (high bitrate, high resolution) is handled
1595      correctly.*/
1596   /*Copy the uncoded blocks from the previous reference frame.*/
1597   if(_pipe->nuncoded_fragis[_pli]>0){
1598     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1599     oc_frag_copy_list(&_dec->state,
1600      _dec->state.ref_frame_data[OC_FRAME_SELF],
1601      _dec->state.ref_frame_data[OC_FRAME_PREV],
1602      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1603      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1604   }
1605 }
1606 
1607 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1608 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1609  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1610  int *_variance0,int *_variance1){
1611   unsigned char       *rdst;
1612   const unsigned char *rsrc;
1613   unsigned char       *cdst;
1614   const unsigned char *csrc;
1615   int                  r[10];
1616   int                  sum0;
1617   int                  sum1;
1618   int                  bx;
1619   int                  by;
1620   rdst=_dst;
1621   rsrc=_src;
1622   for(bx=0;bx<8;bx++){
1623     cdst=rdst;
1624     csrc=rsrc;
1625     for(by=0;by<10;by++){
1626       r[by]=*csrc;
1627       csrc+=_src_ystride;
1628     }
1629     sum0=sum1=0;
1630     for(by=0;by<4;by++){
1631       sum0+=abs(r[by+1]-r[by]);
1632       sum1+=abs(r[by+5]-r[by+6]);
1633     }
1634     *_variance0+=OC_MINI(255,sum0);
1635     *_variance1+=OC_MINI(255,sum1);
1636     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1637       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1638       cdst+=_dst_ystride;
1639       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1640       cdst+=_dst_ystride;
1641       for(by=0;by<4;by++){
1642         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1643          r[by+4]+r[by+5]+r[by+6]+4>>3);
1644         cdst+=_dst_ystride;
1645       }
1646       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1647       cdst+=_dst_ystride;
1648       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1649     }
1650     else{
1651       for(by=1;by<=8;by++){
1652         *cdst=(unsigned char)r[by];
1653         cdst+=_dst_ystride;
1654       }
1655     }
1656     rdst++;
1657     rsrc++;
1658   }
1659 }
1660 
1661 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1662 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1663  int _qstep,int _flimit,int *_variances){
1664   unsigned char       *rdst;
1665   const unsigned char *rsrc;
1666   unsigned char       *cdst;
1667   int                  r[10];
1668   int                  sum0;
1669   int                  sum1;
1670   int                  bx;
1671   int                  by;
1672   cdst=_dst;
1673   for(by=0;by<8;by++){
1674     rsrc=cdst-1;
1675     rdst=cdst;
1676     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1677     sum0=sum1=0;
1678     for(bx=0;bx<4;bx++){
1679       sum0+=abs(r[bx+1]-r[bx]);
1680       sum1+=abs(r[bx+5]-r[bx+6]);
1681     }
1682     _variances[0]+=OC_MINI(255,sum0);
1683     _variances[1]+=OC_MINI(255,sum1);
1684     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1685       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1686       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1687       for(bx=0;bx<4;bx++){
1688         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1689          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1690       }
1691       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1692       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1693     }
1694     cdst+=_dst_ystride;
1695   }
1696 }
1697 
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1698 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1699  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1700  int _fragy_end){
1701   oc_fragment_plane   *fplane;
1702   int                 *variance;
1703   unsigned char       *dc_qi;
1704   unsigned char       *dst;
1705   const unsigned char *src;
1706   ptrdiff_t            froffset;
1707   int                  dst_ystride;
1708   int                  src_ystride;
1709   int                  nhfrags;
1710   int                  width;
1711   int                  notstart;
1712   int                  notdone;
1713   int                  flimit;
1714   int                  qstep;
1715   int                  y_end;
1716   int                  y;
1717   int                  x;
1718   _dst+=_pli;
1719   _src+=_pli;
1720   fplane=_dec->state.fplanes+_pli;
1721   nhfrags=fplane->nhfrags;
1722   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1723   variance=_dec->variances+froffset;
1724   dc_qi=_dec->dc_qis+froffset;
1725   notstart=_fragy0>0;
1726   notdone=_fragy_end<fplane->nvfrags;
1727   /*We want to clear an extra row of variances, except at the end.*/
1728   memset(variance+(nhfrags&-notstart),0,
1729    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1730   /*Except for the first time, we want to point to the middle of the row.*/
1731   y=(_fragy0<<3)+(notstart<<2);
1732   dst_ystride=_dst->stride;
1733   src_ystride=_src->stride;
1734   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1735   src=_src->data+y*(ptrdiff_t)src_ystride;
1736   width=_dst->width;
1737   for(;y<4;y++){
1738     memcpy(dst,src,width*sizeof(dst[0]));
1739     dst+=dst_ystride;
1740     src+=src_ystride;
1741   }
1742   /*We also want to skip the last row in the frame for this loop.*/
1743   y_end=_fragy_end-!notdone<<3;
1744   for(;y<y_end;y+=8){
1745     qstep=_dec->pp_dc_scale[*dc_qi];
1746     flimit=(qstep*3)>>2;
1747     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1748      qstep,flimit,variance,variance+nhfrags);
1749     variance++;
1750     dc_qi++;
1751     for(x=8;x<width;x+=8){
1752       qstep=_dec->pp_dc_scale[*dc_qi];
1753       flimit=(qstep*3)>>2;
1754       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1755        qstep,flimit,variance,variance+nhfrags);
1756       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1757        qstep,flimit,variance-1);
1758       variance++;
1759       dc_qi++;
1760     }
1761     dst+=dst_ystride<<3;
1762     src+=src_ystride<<3;
1763   }
1764   /*And finally, handle the last row in the frame, if it's in the range.*/
1765   if(!notdone){
1766     int height;
1767     height=_dst->height;
1768     for(;y<height;y++){
1769       memcpy(dst,src,width*sizeof(dst[0]));
1770       dst+=dst_ystride;
1771       src+=src_ystride;
1772     }
1773     /*Filter the last row of vertical block edges.*/
1774     dc_qi++;
1775     for(x=8;x<width;x+=8){
1776       qstep=_dec->pp_dc_scale[*dc_qi++];
1777       flimit=(qstep*3)>>2;
1778       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1779        qstep,flimit,variance++);
1780     }
1781   }
1782 }
1783 
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1784 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1785  int _dc_scale,int _sharp_mod,int _strong){
1786   static const unsigned char OC_MOD_MAX[2]={24,32};
1787   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1788   const unsigned char *psrc;
1789   const unsigned char *src;
1790   const unsigned char *nsrc;
1791   unsigned char       *dst;
1792   int                  vmod[72];
1793   int                  hmod[72];
1794   int                  mod_hi;
1795   int                  by;
1796   int                  bx;
1797   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1798   dst=_idata;
1799   src=dst;
1800   psrc=src-(_ystride&-!(_b&4));
1801   for(by=0;by<9;by++){
1802     for(bx=0;bx<8;bx++){
1803       int mod;
1804       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1805       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1806     }
1807     psrc=src;
1808     src+=_ystride&-(!(_b&8)|by<7);
1809   }
1810   nsrc=dst;
1811   psrc=dst-!(_b&1);
1812   for(bx=0;bx<9;bx++){
1813     src=nsrc;
1814     for(by=0;by<8;by++){
1815       int mod;
1816       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1817       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1818       psrc+=_ystride;
1819       src+=_ystride;
1820     }
1821     psrc=nsrc;
1822     nsrc+=!(_b&2)|bx<7;
1823   }
1824   src=dst;
1825   psrc=src-(_ystride&-!(_b&4));
1826   nsrc=src+_ystride;
1827   for(by=0;by<8;by++){
1828     int a;
1829     int b;
1830     int w;
1831     a=128;
1832     b=64;
1833     w=hmod[by];
1834     a-=w;
1835     b+=w**(src-!(_b&1));
1836     w=vmod[by<<3];
1837     a-=w;
1838     b+=w*psrc[0];
1839     w=vmod[by+1<<3];
1840     a-=w;
1841     b+=w*nsrc[0];
1842     w=hmod[(1<<3)+by];
1843     a-=w;
1844     b+=w*src[1];
1845     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1846     for(bx=1;bx<7;bx++){
1847       a=128;
1848       b=64;
1849       w=hmod[(bx<<3)+by];
1850       a-=w;
1851       b+=w*src[bx-1];
1852       w=vmod[(by<<3)+bx];
1853       a-=w;
1854       b+=w*psrc[bx];
1855       w=vmod[(by+1<<3)+bx];
1856       a-=w;
1857       b+=w*nsrc[bx];
1858       w=hmod[(bx+1<<3)+by];
1859       a-=w;
1860       b+=w*src[bx+1];
1861       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1862     }
1863     a=128;
1864     b=64;
1865     w=hmod[(7<<3)+by];
1866     a-=w;
1867     b+=w*src[6];
1868     w=vmod[(by<<3)+7];
1869     a-=w;
1870     b+=w*psrc[7];
1871     w=vmod[(by+1<<3)+7];
1872     a-=w;
1873     b+=w*nsrc[7];
1874     w=hmod[(8<<3)+by];
1875     a-=w;
1876     b+=w*src[7+!(_b&2)];
1877     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1878     dst+=_ystride;
1879     psrc=src;
1880     src=nsrc;
1881     nsrc+=_ystride&-(!(_b&8)|by<6);
1882   }
1883 }
1884 
1885 #define OC_DERING_THRESH1 (384)
1886 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1887 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1888 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1889 
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1890 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1891  int _pli,int _fragy0,int _fragy_end){
1892   th_img_plane      *iplane;
1893   oc_fragment_plane *fplane;
1894   oc_fragment       *frag;
1895   int               *variance;
1896   unsigned char     *idata;
1897   ptrdiff_t          froffset;
1898   int                ystride;
1899   int                nhfrags;
1900   int                sthresh;
1901   int                strong;
1902   int                y_end;
1903   int                width;
1904   int                height;
1905   int                y;
1906   int                x;
1907   iplane=_img+_pli;
1908   fplane=_dec->state.fplanes+_pli;
1909   nhfrags=fplane->nhfrags;
1910   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1911   variance=_dec->variances+froffset;
1912   frag=_dec->state.frags+froffset;
1913   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1914   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1915   y=_fragy0<<3;
1916   ystride=iplane->stride;
1917   idata=iplane->data+y*(ptrdiff_t)ystride;
1918   y_end=_fragy_end<<3;
1919   width=iplane->width;
1920   height=iplane->height;
1921   for(;y<y_end;y+=8){
1922     for(x=0;x<width;x+=8){
1923       int b;
1924       int qi;
1925       int var;
1926       qi=_dec->state.qis[frag->qii];
1927       var=*variance;
1928       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1929       if(strong&&var>sthresh){
1930         oc_dering_block(idata+x,ystride,b,
1931          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1933          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1934          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1935          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1936           oc_dering_block(idata+x,ystride,b,
1937            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1938           oc_dering_block(idata+x,ystride,b,
1939            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940         }
1941       }
1942       else if(var>OC_DERING_THRESH2){
1943         oc_dering_block(idata+x,ystride,b,
1944          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1945       }
1946       else if(var>OC_DERING_THRESH1){
1947         oc_dering_block(idata+x,ystride,b,
1948          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1949       }
1950       frag++;
1951       variance++;
1952     }
1953     idata+=ystride<<3;
1954   }
1955 }
1956 
1957 
1958 
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1959 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1960   oc_dec_ctx *dec;
1961   if(_info==NULL||_setup==NULL)return NULL;
1962   dec=oc_aligned_malloc(sizeof(*dec),16);
1963   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1964     oc_aligned_free(dec);
1965     return NULL;
1966   }
1967   dec->state.curframe_num=0;
1968   return dec;
1969 }
1970 
th_decode_free(th_dec_ctx * _dec)1971 void th_decode_free(th_dec_ctx *_dec){
1972   if(_dec!=NULL){
1973     oc_dec_clear(_dec);
1974     oc_aligned_free(_dec);
1975   }
1976 }
1977 
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1978 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1979  size_t _buf_sz){
1980   switch(_req){
1981   case TH_DECCTL_GET_PPLEVEL_MAX:{
1982     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1983     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1984     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1985     return 0;
1986   }break;
1987   case TH_DECCTL_SET_PPLEVEL:{
1988     int pp_level;
1989     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1990     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1991     pp_level=*(int *)_buf;
1992     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1993     _dec->pp_level=pp_level;
1994     return 0;
1995   }break;
1996   case TH_DECCTL_SET_GRANPOS:{
1997     ogg_int64_t granpos;
1998     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1999     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2000     granpos=*(ogg_int64_t *)_buf;
2001     if(granpos<0)return TH_EINVAL;
2002     _dec->state.granpos=granpos;
2003     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2004      -_dec->state.granpos_bias;
2005     _dec->state.curframe_num=_dec->state.keyframe_num
2006      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2007     return 0;
2008   }break;
2009   case TH_DECCTL_SET_STRIPE_CB:{
2010     th_stripe_callback *cb;
2011     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2012     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2013     cb=(th_stripe_callback *)_buf;
2014     _dec->stripe_cb.ctx=cb->ctx;
2015     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2016     return 0;
2017   }break;
2018 #ifdef HAVE_CAIRO
2019   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2020     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2021     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2022     _dec->telemetry=1;
2023     _dec->telemetry_mbmode=*(int *)_buf;
2024     return 0;
2025   }break;
2026   case TH_DECCTL_SET_TELEMETRY_MV:{
2027     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2028     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2029     _dec->telemetry=1;
2030     _dec->telemetry_mv=*(int *)_buf;
2031     return 0;
2032   }break;
2033   case TH_DECCTL_SET_TELEMETRY_QI:{
2034     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036     _dec->telemetry=1;
2037     _dec->telemetry_qi=*(int *)_buf;
2038     return 0;
2039   }break;
2040   case TH_DECCTL_SET_TELEMETRY_BITS:{
2041     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2042     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2043     _dec->telemetry=1;
2044     _dec->telemetry_bits=*(int *)_buf;
2045     return 0;
2046   }break;
2047 #endif
2048   default:return TH_EIMPL;
2049   }
2050 }
2051 
2052 /*We're decoding an INTER frame, but have no initialized reference
2053    buffers (i.e., decoding did not start on a key frame).
2054   We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2055 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2056   th_info   *info;
2057   size_t     yplane_sz;
2058   size_t     cplane_sz;
2059   ptrdiff_t  yoffset;
2060   int        yhstride;
2061   int        yheight;
2062   int        chstride;
2063   int        cheight;
2064   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2065   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2066   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2067   _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2068    _dec->state.ref_frame_data[OC_FRAME_PREV]=
2069    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2070    _dec->state.ref_frame_bufs[0][0].data;
2071   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2072    sizeof(_dec->pp_frame_buf[0])*3);
2073   info=&_dec->state.info;
2074   yhstride=abs(_dec->state.ref_ystride[0]);
2075   yheight=info->frame_height+2*OC_UMV_PADDING;
2076   chstride=abs(_dec->state.ref_ystride[1]);
2077   cheight=yheight>>!(info->pixel_fmt&2);
2078   yplane_sz=yhstride*(size_t)yheight+16;
2079   cplane_sz=chstride*(size_t)cheight;
2080   yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2081   memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2082 }
2083 
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2084 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2085  ogg_int64_t *_granpos){
2086   int ret;
2087   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2088   /*A completely empty packet indicates a dropped frame and is treated exactly
2089      like an inter frame with no coded blocks.*/
2090   if(_op->bytes==0){
2091     _dec->state.frame_type=OC_INTER_FRAME;
2092     _dec->state.ntotal_coded_fragis=0;
2093   }
2094   else{
2095     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2096     ret=oc_dec_frame_header_unpack(_dec);
2097     if(ret<0)return ret;
2098     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2099     else oc_dec_coded_flags_unpack(_dec);
2100   }
2101   /*If there have been no reference frames, and we need one, initialize one.*/
2102   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2103    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2104    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2105     oc_dec_init_dummy_frame(_dec);
2106   }
2107   /*If this was an inter frame with no coded blocks...*/
2108   if(_dec->state.ntotal_coded_fragis<=0){
2109     /*Just update the granule position and return.*/
2110     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2111      _dec->state.info.keyframe_granule_shift)
2112      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2113     _dec->state.curframe_num++;
2114     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2115     return TH_DUPFRAME;
2116   }
2117   else{
2118     th_ycbcr_buffer stripe_buf;
2119     int             stripe_fragy;
2120     int             refi;
2121     int             pli;
2122     int             notstart;
2123     int             notdone;
2124     /*Select a free buffer to use for the reconstructed version of this frame.*/
2125     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2126      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2127     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2128     _dec->state.ref_frame_data[OC_FRAME_SELF]=
2129      _dec->state.ref_frame_bufs[refi][0].data;
2130 #if defined(HAVE_CAIRO)
2131     _dec->telemetry_frame_bytes=_op->bytes;
2132 #endif
2133     if(_dec->state.frame_type==OC_INTRA_FRAME){
2134       _dec->state.keyframe_num=_dec->state.curframe_num;
2135 #if defined(HAVE_CAIRO)
2136       _dec->telemetry_coding_bytes=
2137        _dec->telemetry_mode_bytes=
2138        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2139 #endif
2140     }
2141     else{
2142 #if defined(HAVE_CAIRO)
2143       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2144 #endif
2145       oc_dec_mb_modes_unpack(_dec);
2146 #if defined(HAVE_CAIRO)
2147       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2148 #endif
2149       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2150 #if defined(HAVE_CAIRO)
2151       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2152 #endif
2153     }
2154     oc_dec_block_qis_unpack(_dec);
2155 #if defined(HAVE_CAIRO)
2156     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2157 #endif
2158     oc_dec_residual_tokens_unpack(_dec);
2159     /*Update granule position.
2160       This must be done before the striped decode callbacks so that the
2161        application knows what to do with the frame data.*/
2162     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2163      _dec->state.info.keyframe_granule_shift)
2164      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2165     _dec->state.curframe_num++;
2166     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2167     /*All of the rest of the operations -- DC prediction reversal,
2168        reconstructing coded fragments, copying uncoded fragments, loop
2169        filtering, extending borders, and out-of-loop post-processing -- should
2170        be pipelined.
2171       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2172        copying are done for one or two super block rows, then loop filtering is
2173        run as far as it can, then bordering copying, then post-processing.
2174       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2175        block rows, and one chroma.
2176       Otherwise, an MCU consists of one super block row from each plane.
2177       Inside each MCU, we perform all of the steps on one color plane before
2178        moving on to the next.
2179       After reconstruction, the additional filtering stages introduce a delay
2180        since they need some pixels from the next fragment row.
2181       Thus the actual number of decoded rows available is slightly smaller for
2182        the first MCU, and slightly larger for the last.
2183 
2184       This entire process allows us to operate on the data while it is still in
2185        cache, resulting in big performance improvements.
2186       An application callback allows further application processing (blitting
2187        to video memory, color conversion, etc.) to also use the data while it's
2188        in cache.*/
2189     oc_dec_pipeline_init(_dec,&_dec->pipe);
2190     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2191     notstart=0;
2192     notdone=1;
2193     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2194       int avail_fragy0;
2195       int avail_fragy_end;
2196       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2197       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2198       for(pli=0;pli<3;pli++){
2199         oc_fragment_plane *fplane;
2200         int                frag_shift;
2201         int                pp_offset;
2202         int                sdelay;
2203         int                edelay;
2204         fplane=_dec->state.fplanes+pli;
2205         /*Compute the first and last fragment row of the current MCU for this
2206            plane.*/
2207         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2208         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2209         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2210          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2211         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2212         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2213         sdelay=edelay=0;
2214         if(_dec->pipe.loop_filter){
2215           sdelay+=notstart;
2216           edelay+=notdone;
2217           oc_state_loop_filter_frag_rows(&_dec->state,
2218            _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2219            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2220         }
2221         /*To fill the borders, we have an additional two pixel delay, since a
2222            fragment in the next row could filter its top edge, using two pixels
2223            from a fragment in this row.
2224           But there's no reason to delay a full fragment between the two.*/
2225         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2226          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2227          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2228         /*Out-of-loop post-processing.*/
2229         pp_offset=3*(pli!=0);
2230         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2231           /*Perform de-blocking in one plane.*/
2232           sdelay+=notstart;
2233           edelay+=notdone;
2234           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2235            _dec->state.ref_frame_bufs[refi],pli,
2236            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2237           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2238             /*Perform de-ringing in one plane.*/
2239             sdelay+=notstart;
2240             edelay+=notdone;
2241             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2242              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2243           }
2244         }
2245         /*If no post-processing is done, we still need to delay a row for the
2246            loop filter, thanks to the strange filtering order VP3 chose.*/
2247         else if(_dec->pipe.loop_filter){
2248           sdelay+=notstart;
2249           edelay+=notdone;
2250         }
2251         /*Compute the intersection of the available rows in all planes.
2252           If chroma is sub-sampled, the effect of each of its delays is
2253            doubled, but luma might have more post-processing filters enabled
2254            than chroma, so we don't know up front which one is the limiting
2255            factor.*/
2256         avail_fragy0=OC_MINI(avail_fragy0,
2257          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2258         avail_fragy_end=OC_MINI(avail_fragy_end,
2259          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2260       }
2261       if(_dec->stripe_cb.stripe_decoded!=NULL){
2262         /*The callback might want to use the FPU, so let's make sure they can.
2263           We violate all kinds of ABI restrictions by not doing this until
2264            now, but none of them actually matter since we don't use floating
2265            point ourselves.*/
2266         oc_restore_fpu(&_dec->state);
2267         /*Make the callback, ensuring we flip the sense of the "start" and
2268            "end" of the available region upside down.*/
2269         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2270          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2271          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2272       }
2273       notstart=1;
2274     }
2275     /*Finish filling in the reference frame borders.*/
2276     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2277     /*Update the reference frame indices.*/
2278     if(_dec->state.frame_type==OC_INTRA_FRAME){
2279       /*The new frame becomes both the previous and gold reference frames.*/
2280       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2281        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2282        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2283       _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2284        _dec->state.ref_frame_data[OC_FRAME_PREV]=
2285        _dec->state.ref_frame_data[OC_FRAME_SELF];
2286     }
2287     else{
2288       /*Otherwise, just replace the previous reference frame.*/
2289       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2290        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2291       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2292        _dec->state.ref_frame_data[OC_FRAME_SELF];
2293     }
2294     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2295        gamma values, if nothing else).*/
2296     oc_restore_fpu(&_dec->state);
2297 #if defined(OC_DUMP_IMAGES)
2298     /*We only dump images if there were some coded blocks.*/
2299     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2300 #endif
2301     return 0;
2302   }
2303 }
2304 
th_decode_ycbcr_out(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr)2305 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2306   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2307   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2308 #if defined(HAVE_CAIRO)
2309   /*If telemetry ioctls are active, we need to draw to the output buffer.
2310     Stuff the plane into cairo.*/
2311   if(_dec->telemetry){
2312     cairo_surface_t *cs;
2313     unsigned char   *data;
2314     unsigned char   *y_row;
2315     unsigned char   *u_row;
2316     unsigned char   *v_row;
2317     unsigned char   *rgb_row;
2318     int              cstride;
2319     int              w;
2320     int              h;
2321     int              x;
2322     int              y;
2323     int              hdec;
2324     int              vdec;
2325     w=_ycbcr[0].width;
2326     h=_ycbcr[0].height;
2327     hdec=!(_dec->state.info.pixel_fmt&1);
2328     vdec=!(_dec->state.info.pixel_fmt&2);
2329     /*Lazy data buffer init.
2330       We could try to re-use the post-processing buffer, which would save
2331        memory, but complicate the allocation logic there.
2332       I don't think anyone cares about memory usage when using telemetry; it is
2333        not meant for embedded devices.*/
2334     if(_dec->telemetry_frame_data==NULL){
2335       _dec->telemetry_frame_data=_ogg_malloc(
2336        (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2337       if(_dec->telemetry_frame_data==NULL)return 0;
2338     }
2339     cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2340     /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2341     data=cairo_image_surface_get_data(cs);
2342     if(data==NULL){
2343       cairo_surface_destroy(cs);
2344       return 0;
2345     }
2346     cstride=cairo_image_surface_get_stride(cs);
2347     y_row=_ycbcr[0].data;
2348     u_row=_ycbcr[1].data;
2349     v_row=_ycbcr[2].data;
2350     rgb_row=data;
2351     for(y=0;y<h;y++){
2352       for(x=0;x<w;x++){
2353         int r;
2354         int g;
2355         int b;
2356         r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2357         g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2358          -2672387*v_row[x>>hdec]+447306710)/3287200;
2359         b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2360         rgb_row[4*x+0]=OC_CLAMP255(b);
2361         rgb_row[4*x+1]=OC_CLAMP255(g);
2362         rgb_row[4*x+2]=OC_CLAMP255(r);
2363       }
2364       y_row+=_ycbcr[0].stride;
2365       u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2366       v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2367       rgb_row+=cstride;
2368     }
2369     /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2370     {
2371       cairo_t           *c;
2372       const oc_fragment *frags;
2373       oc_mv             *frag_mvs;
2374       const signed char *mb_modes;
2375       oc_mb_map         *mb_maps;
2376       size_t             nmbs;
2377       size_t             mbi;
2378       int                row2;
2379       int                col2;
2380       int                qim[3]={0,0,0};
2381       if(_dec->state.nqis==2){
2382         int bqi;
2383         bqi=_dec->state.qis[0];
2384         if(_dec->state.qis[1]>bqi)qim[1]=1;
2385         if(_dec->state.qis[1]<bqi)qim[1]=-1;
2386       }
2387       if(_dec->state.nqis==3){
2388         int bqi;
2389         int cqi;
2390         int dqi;
2391         bqi=_dec->state.qis[0];
2392         cqi=_dec->state.qis[1];
2393         dqi=_dec->state.qis[2];
2394         if(cqi>bqi&&dqi>bqi){
2395           if(dqi>cqi){
2396             qim[1]=1;
2397             qim[2]=2;
2398           }
2399           else{
2400             qim[1]=2;
2401             qim[2]=1;
2402           }
2403         }
2404         else if(cqi<bqi&&dqi<bqi){
2405           if(dqi<cqi){
2406             qim[1]=-1;
2407             qim[2]=-2;
2408           }
2409           else{
2410             qim[1]=-2;
2411             qim[2]=-1;
2412           }
2413         }
2414         else{
2415           if(cqi<bqi)qim[1]=-1;
2416           else qim[1]=1;
2417           if(dqi<bqi)qim[2]=-1;
2418           else qim[2]=1;
2419         }
2420       }
2421       c=cairo_create(cs);
2422       frags=_dec->state.frags;
2423       frag_mvs=_dec->state.frag_mvs;
2424       mb_modes=_dec->state.mb_modes;
2425       mb_maps=_dec->state.mb_maps;
2426       nmbs=_dec->state.nmbs;
2427       row2=0;
2428       col2=0;
2429       for(mbi=0;mbi<nmbs;mbi++){
2430         float x;
2431         float y;
2432         int   bi;
2433         y=h-(row2+((col2+1>>1)&1))*16-16;
2434         x=(col2>>1)*16;
2435         cairo_set_line_width(c,1.);
2436         /*Keyframe (all intra) red box.*/
2437         if(_dec->state.frame_type==OC_INTRA_FRAME){
2438           if(_dec->telemetry_mbmode&0x02){
2439             cairo_set_source_rgba(c,1.,0,0,.5);
2440             cairo_rectangle(c,x+2.5,y+2.5,11,11);
2441             cairo_stroke_preserve(c);
2442             cairo_set_source_rgba(c,1.,0,0,.25);
2443             cairo_fill(c);
2444           }
2445         }
2446         else{
2447           ptrdiff_t fragi;
2448           int       frag_mvx;
2449           int       frag_mvy;
2450           for(bi=0;bi<4;bi++){
2451             fragi=mb_maps[mbi][0][bi];
2452             if(fragi>=0&&frags[fragi].coded){
2453               frag_mvx=OC_MV_X(frag_mvs[fragi]);
2454               frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2455               break;
2456             }
2457           }
2458           if(bi<4){
2459             switch(mb_modes[mbi]){
2460               case OC_MODE_INTRA:{
2461                 if(_dec->telemetry_mbmode&0x02){
2462                   cairo_set_source_rgba(c,1.,0,0,.5);
2463                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2464                   cairo_stroke_preserve(c);
2465                   cairo_set_source_rgba(c,1.,0,0,.25);
2466                   cairo_fill(c);
2467                 }
2468               }break;
2469               case OC_MODE_INTER_NOMV:{
2470                 if(_dec->telemetry_mbmode&0x01){
2471                   cairo_set_source_rgba(c,0,0,1.,.5);
2472                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2473                   cairo_stroke_preserve(c);
2474                   cairo_set_source_rgba(c,0,0,1.,.25);
2475                   cairo_fill(c);
2476                 }
2477               }break;
2478               case OC_MODE_INTER_MV:{
2479                 if(_dec->telemetry_mbmode&0x04){
2480                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2481                   cairo_set_source_rgba(c,0,1.,0,.5);
2482                   cairo_stroke(c);
2483                 }
2484                 if(_dec->telemetry_mv&0x04){
2485                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2486                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2487                   cairo_set_line_width(c,3.);
2488                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2489                   cairo_stroke_preserve(c);
2490                   cairo_set_line_width(c,2.);
2491                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2492                   cairo_stroke_preserve(c);
2493                   cairo_set_line_width(c,1.);
2494                   cairo_line_to(c,x+8,y+8);
2495                   cairo_stroke(c);
2496                 }
2497               }break;
2498               case OC_MODE_INTER_MV_LAST:{
2499                 if(_dec->telemetry_mbmode&0x08){
2500                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2501                   cairo_set_source_rgba(c,0,1.,0,.5);
2502                   cairo_move_to(c,x+13.5,y+2.5);
2503                   cairo_line_to(c,x+2.5,y+8);
2504                   cairo_line_to(c,x+13.5,y+13.5);
2505                   cairo_stroke(c);
2506                 }
2507                 if(_dec->telemetry_mv&0x08){
2508                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2509                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2510                   cairo_set_line_width(c,3.);
2511                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2512                   cairo_stroke_preserve(c);
2513                   cairo_set_line_width(c,2.);
2514                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2515                   cairo_stroke_preserve(c);
2516                   cairo_set_line_width(c,1.);
2517                   cairo_line_to(c,x+8,y+8);
2518                   cairo_stroke(c);
2519                 }
2520               }break;
2521               case OC_MODE_INTER_MV_LAST2:{
2522                 if(_dec->telemetry_mbmode&0x10){
2523                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2524                   cairo_set_source_rgba(c,0,1.,0,.5);
2525                   cairo_move_to(c,x+8,y+2.5);
2526                   cairo_line_to(c,x+2.5,y+8);
2527                   cairo_line_to(c,x+8,y+13.5);
2528                   cairo_move_to(c,x+13.5,y+2.5);
2529                   cairo_line_to(c,x+8,y+8);
2530                   cairo_line_to(c,x+13.5,y+13.5);
2531                   cairo_stroke(c);
2532                 }
2533                 if(_dec->telemetry_mv&0x10){
2534                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2535                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2536                   cairo_set_line_width(c,3.);
2537                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2538                   cairo_stroke_preserve(c);
2539                   cairo_set_line_width(c,2.);
2540                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2541                   cairo_stroke_preserve(c);
2542                   cairo_set_line_width(c,1.);
2543                   cairo_line_to(c,x+8,y+8);
2544                   cairo_stroke(c);
2545                 }
2546               }break;
2547               case OC_MODE_GOLDEN_NOMV:{
2548                 if(_dec->telemetry_mbmode&0x20){
2549                   cairo_set_source_rgba(c,1.,1.,0,.5);
2550                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2551                   cairo_stroke_preserve(c);
2552                   cairo_set_source_rgba(c,1.,1.,0,.25);
2553                   cairo_fill(c);
2554                 }
2555               }break;
2556               case OC_MODE_GOLDEN_MV:{
2557                 if(_dec->telemetry_mbmode&0x40){
2558                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2559                   cairo_set_source_rgba(c,1.,1.,0,.5);
2560                   cairo_stroke(c);
2561                 }
2562                 if(_dec->telemetry_mv&0x40){
2563                   cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2564                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2565                   cairo_set_line_width(c,3.);
2566                   cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2567                   cairo_stroke_preserve(c);
2568                   cairo_set_line_width(c,2.);
2569                   cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2570                   cairo_stroke_preserve(c);
2571                   cairo_set_line_width(c,1.);
2572                   cairo_line_to(c,x+8,y+8);
2573                   cairo_stroke(c);
2574                 }
2575               }break;
2576               case OC_MODE_INTER_MV_FOUR:{
2577                 if(_dec->telemetry_mbmode&0x80){
2578                   cairo_rectangle(c,x+2.5,y+2.5,4,4);
2579                   cairo_rectangle(c,x+9.5,y+2.5,4,4);
2580                   cairo_rectangle(c,x+2.5,y+9.5,4,4);
2581                   cairo_rectangle(c,x+9.5,y+9.5,4,4);
2582                   cairo_set_source_rgba(c,0,1.,0,.5);
2583                   cairo_stroke(c);
2584                 }
2585                 /*4mv is odd, coded in raster order.*/
2586                 fragi=mb_maps[mbi][0][0];
2587                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2588                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2589                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2590                   cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2591                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2592                   cairo_set_line_width(c,3.);
2593                   cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2594                   cairo_stroke_preserve(c);
2595                   cairo_set_line_width(c,2.);
2596                   cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2597                   cairo_stroke_preserve(c);
2598                   cairo_set_line_width(c,1.);
2599                   cairo_line_to(c,x+4,y+12);
2600                   cairo_stroke(c);
2601                 }
2602                 fragi=mb_maps[mbi][0][1];
2603                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2604                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2605                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2606                   cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2607                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2608                   cairo_set_line_width(c,3.);
2609                   cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2610                   cairo_stroke_preserve(c);
2611                   cairo_set_line_width(c,2.);
2612                   cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2613                   cairo_stroke_preserve(c);
2614                   cairo_set_line_width(c,1.);
2615                   cairo_line_to(c,x+12,y+12);
2616                   cairo_stroke(c);
2617                 }
2618                 fragi=mb_maps[mbi][0][2];
2619                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2620                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2621                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2622                   cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2623                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2624                   cairo_set_line_width(c,3.);
2625                   cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2626                   cairo_stroke_preserve(c);
2627                   cairo_set_line_width(c,2.);
2628                   cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2629                   cairo_stroke_preserve(c);
2630                   cairo_set_line_width(c,1.);
2631                   cairo_line_to(c,x+4,y+4);
2632                   cairo_stroke(c);
2633                 }
2634                 fragi=mb_maps[mbi][0][3];
2635                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2636                   frag_mvx=OC_MV_X(frag_mvs[fragi]);
2637                   frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2638                   cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2639                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2640                   cairo_set_line_width(c,3.);
2641                   cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2642                   cairo_stroke_preserve(c);
2643                   cairo_set_line_width(c,2.);
2644                   cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2645                   cairo_stroke_preserve(c);
2646                   cairo_set_line_width(c,1.);
2647                   cairo_line_to(c,x+12,y+4);
2648                   cairo_stroke(c);
2649                 }
2650               }break;
2651             }
2652           }
2653         }
2654         /*qii illustration.*/
2655         if(_dec->telemetry_qi&0x2){
2656           cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2657           for(bi=0;bi<4;bi++){
2658             ptrdiff_t fragi;
2659             int       qiv;
2660             int       xp;
2661             int       yp;
2662             xp=x+(bi&1)*8;
2663             yp=y+8-(bi&2)*4;
2664             fragi=mb_maps[mbi][0][bi];
2665             if(fragi>=0&&frags[fragi].coded){
2666               qiv=qim[frags[fragi].qii];
2667               cairo_set_line_width(c,3.);
2668               cairo_set_source_rgba(c,0.,0.,0.,.5);
2669               switch(qiv){
2670                 /*Double plus:*/
2671                 case 2:{
2672                   if((bi&1)^((bi&2)>>1)){
2673                     cairo_move_to(c,xp+2.5,yp+1.5);
2674                     cairo_line_to(c,xp+2.5,yp+3.5);
2675                     cairo_move_to(c,xp+1.5,yp+2.5);
2676                     cairo_line_to(c,xp+3.5,yp+2.5);
2677                     cairo_move_to(c,xp+5.5,yp+4.5);
2678                     cairo_line_to(c,xp+5.5,yp+6.5);
2679                     cairo_move_to(c,xp+4.5,yp+5.5);
2680                     cairo_line_to(c,xp+6.5,yp+5.5);
2681                     cairo_stroke_preserve(c);
2682                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2683                   }
2684                   else{
2685                     cairo_move_to(c,xp+5.5,yp+1.5);
2686                     cairo_line_to(c,xp+5.5,yp+3.5);
2687                     cairo_move_to(c,xp+4.5,yp+2.5);
2688                     cairo_line_to(c,xp+6.5,yp+2.5);
2689                     cairo_move_to(c,xp+2.5,yp+4.5);
2690                     cairo_line_to(c,xp+2.5,yp+6.5);
2691                     cairo_move_to(c,xp+1.5,yp+5.5);
2692                     cairo_line_to(c,xp+3.5,yp+5.5);
2693                     cairo_stroke_preserve(c);
2694                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2695                   }
2696                 }break;
2697                 /*Double minus:*/
2698                 case -2:{
2699                   cairo_move_to(c,xp+2.5,yp+2.5);
2700                   cairo_line_to(c,xp+5.5,yp+2.5);
2701                   cairo_move_to(c,xp+2.5,yp+5.5);
2702                   cairo_line_to(c,xp+5.5,yp+5.5);
2703                   cairo_stroke_preserve(c);
2704                   cairo_set_source_rgba(c,1.,1.,1.,1.);
2705                 }break;
2706                 /*Plus:*/
2707                 case 1:{
2708                   if(bi&2==0)yp-=2;
2709                   if(bi&1==0)xp-=2;
2710                   cairo_move_to(c,xp+4.5,yp+2.5);
2711                   cairo_line_to(c,xp+4.5,yp+6.5);
2712                   cairo_move_to(c,xp+2.5,yp+4.5);
2713                   cairo_line_to(c,xp+6.5,yp+4.5);
2714                   cairo_stroke_preserve(c);
2715                   cairo_set_source_rgba(c,.1,1.,.3,1.);
2716                   break;
2717                 }
2718                 /*Fall through.*/
2719                 /*Minus:*/
2720                 case -1:{
2721                   cairo_move_to(c,xp+2.5,yp+4.5);
2722                   cairo_line_to(c,xp+6.5,yp+4.5);
2723                   cairo_stroke_preserve(c);
2724                   cairo_set_source_rgba(c,1.,.3,.1,1.);
2725                 }break;
2726                 default:continue;
2727               }
2728               cairo_set_line_width(c,1.);
2729               cairo_stroke(c);
2730             }
2731           }
2732         }
2733         col2++;
2734         if((col2>>1)>=_dec->state.nhmbs){
2735           col2=0;
2736           row2+=2;
2737         }
2738       }
2739       /*Bit usage indicator[s]:*/
2740       if(_dec->telemetry_bits){
2741         int widths[6];
2742         int fpsn;
2743         int fpsd;
2744         int mult;
2745         int fullw;
2746         int padw;
2747         int i;
2748         fpsn=_dec->state.info.fps_numerator;
2749         fpsd=_dec->state.info.fps_denominator;
2750         mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2751         fullw=250.f*h*fpsd*mult/fpsn;
2752         padw=w-24;
2753         /*Header and coded block bits.*/
2754         if(_dec->telemetry_frame_bytes<0||
2755          _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2756           _dec->telemetry_frame_bytes=0;
2757         }
2758         if(_dec->telemetry_coding_bytes<0||
2759          _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2760           _dec->telemetry_coding_bytes=0;
2761         }
2762         if(_dec->telemetry_mode_bytes<0||
2763          _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2764           _dec->telemetry_mode_bytes=0;
2765         }
2766         if(_dec->telemetry_mv_bytes<0||
2767          _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2768           _dec->telemetry_mv_bytes=0;
2769         }
2770         if(_dec->telemetry_qi_bytes<0||
2771          _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2772           _dec->telemetry_qi_bytes=0;
2773         }
2774         if(_dec->telemetry_dc_bytes<0||
2775          _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2776           _dec->telemetry_dc_bytes=0;
2777         }
2778         widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2779         widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2780         widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2781         widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2782         widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2783         widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2784         for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2785         cairo_set_source_rgba(c,.0,.0,.0,.6);
2786         cairo_rectangle(c,10,h-33,widths[0]+1,5);
2787         cairo_rectangle(c,10,h-29,widths[1]+1,5);
2788         cairo_rectangle(c,10,h-25,widths[2]+1,5);
2789         cairo_rectangle(c,10,h-21,widths[3]+1,5);
2790         cairo_rectangle(c,10,h-17,widths[4]+1,5);
2791         cairo_rectangle(c,10,h-13,widths[5]+1,5);
2792         cairo_fill(c);
2793         cairo_set_source_rgb(c,1,0,0);
2794         cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2795         cairo_fill(c);
2796         cairo_set_source_rgb(c,0,1,0);
2797         cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2798         cairo_fill(c);
2799         cairo_set_source_rgb(c,0,0,1);
2800         cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2801         cairo_fill(c);
2802         cairo_set_source_rgb(c,.6,.4,.0);
2803         cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2804         cairo_fill(c);
2805         cairo_set_source_rgb(c,.3,.3,.3);
2806         cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2807         cairo_fill(c);
2808         cairo_set_source_rgb(c,.5,.5,.8);
2809         cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2810         cairo_fill(c);
2811       }
2812       /*Master qi indicator[s]:*/
2813       if(_dec->telemetry_qi&0x1){
2814         cairo_text_extents_t extents;
2815         char                 buffer[10];
2816         int                  p;
2817         int                  y;
2818         p=0;
2819         y=h-7.5;
2820         if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2821         buffer[p++]=48+_dec->state.qis[0]%10;
2822         if(_dec->state.nqis>=2){
2823           buffer[p++]=' ';
2824           if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2825           buffer[p++]=48+_dec->state.qis[1]%10;
2826         }
2827         if(_dec->state.nqis==3){
2828           buffer[p++]=' ';
2829           if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2830           buffer[p++]=48+_dec->state.qis[2]%10;
2831         }
2832         buffer[p++]='\0';
2833         cairo_select_font_face(c,"sans",
2834          CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2835         cairo_set_font_size(c,18);
2836         cairo_text_extents(c,buffer,&extents);
2837         cairo_set_source_rgb(c,1,1,1);
2838         cairo_move_to(c,w-extents.x_advance-10,y);
2839         cairo_show_text(c,buffer);
2840         cairo_set_source_rgb(c,0,0,0);
2841         cairo_move_to(c,w-extents.x_advance-10,y);
2842         cairo_text_path(c,buffer);
2843         cairo_set_line_width(c,.8);
2844         cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2845         cairo_stroke(c);
2846       }
2847       cairo_destroy(c);
2848     }
2849     /*Out of the Cairo plane into the telemetry YUV buffer.*/
2850     _ycbcr[0].data=_dec->telemetry_frame_data;
2851     _ycbcr[0].stride=_ycbcr[0].width;
2852     _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2853     _ycbcr[1].stride=_ycbcr[1].width;
2854     _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2855     _ycbcr[2].stride=_ycbcr[2].width;
2856     y_row=_ycbcr[0].data;
2857     u_row=_ycbcr[1].data;
2858     v_row=_ycbcr[2].data;
2859     rgb_row=data;
2860     /*This is one of the few places it's worth handling chroma on a
2861        case-by-case basis.*/
2862     switch(_dec->state.info.pixel_fmt){
2863       case TH_PF_420:{
2864         for(y=0;y<h;y+=2){
2865           unsigned char *y_row2;
2866           unsigned char *rgb_row2;
2867           y_row2=y_row+_ycbcr[0].stride;
2868           rgb_row2=rgb_row+cstride;
2869           for(x=0;x<w;x+=2){
2870             int y;
2871             int u;
2872             int v;
2873             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2874              +24966*rgb_row[4*x+0]+4207500)/255000;
2875             y_row[x]=OC_CLAMP255(y);
2876             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2877              +24966*rgb_row[4*x+4]+4207500)/255000;
2878             y_row[x+1]=OC_CLAMP255(y);
2879             y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2880              +24966*rgb_row2[4*x+0]+4207500)/255000;
2881             y_row2[x]=OC_CLAMP255(y);
2882             y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2883              +24966*rgb_row2[4*x+4]+4207500)/255000;
2884             y_row2[x+1]=OC_CLAMP255(y);
2885             u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2886              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2887              -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2888              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2889              +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2890              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2891             v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2892              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2893              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2894               +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2895              -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2896               +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2897             u_row[x>>1]=OC_CLAMP255(u);
2898             v_row[x>>1]=OC_CLAMP255(v);
2899           }
2900           y_row+=_ycbcr[0].stride<<1;
2901           u_row+=_ycbcr[1].stride;
2902           v_row+=_ycbcr[2].stride;
2903           rgb_row+=cstride<<1;
2904         }
2905       }break;
2906       case TH_PF_422:{
2907         for(y=0;y<h;y++){
2908           for(x=0;x<w;x+=2){
2909             int y;
2910             int u;
2911             int v;
2912             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2913              +24966*rgb_row[4*x+0]+4207500)/255000;
2914             y_row[x]=OC_CLAMP255(y);
2915             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2916              +24966*rgb_row[4*x+4]+4207500)/255000;
2917             y_row[x+1]=OC_CLAMP255(y);
2918             u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2919              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2920              +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2921             v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2922              -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2923              -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2924             u_row[x>>1]=OC_CLAMP255(u);
2925             v_row[x>>1]=OC_CLAMP255(v);
2926           }
2927           y_row+=_ycbcr[0].stride;
2928           u_row+=_ycbcr[1].stride;
2929           v_row+=_ycbcr[2].stride;
2930           rgb_row+=cstride;
2931         }
2932       }break;
2933       /*case TH_PF_444:*/
2934       default:{
2935         for(y=0;y<h;y++){
2936           for(x=0;x<w;x++){
2937             int y;
2938             int u;
2939             int v;
2940             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2941              +24966*rgb_row[4*x+0]+4207500)/255000;
2942             u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2943              +99232*rgb_row[4*x+0]+29032005)/225930;
2944             v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2945              -25536*rgb_row[4*x+0]+45940035)/357510;
2946             y_row[x]=OC_CLAMP255(y);
2947             u_row[x]=OC_CLAMP255(u);
2948             v_row[x]=OC_CLAMP255(v);
2949           }
2950           y_row+=_ycbcr[0].stride;
2951           u_row+=_ycbcr[1].stride;
2952           v_row+=_ycbcr[2].stride;
2953           rgb_row+=cstride;
2954         }
2955       }break;
2956     }
2957     /*Finished.
2958       Destroy the surface.*/
2959     cairo_surface_destroy(cs);
2960   }
2961 #endif
2962   return 0;
2963 }
2964