1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id$
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29 
30 
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49 
50 
51 
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94 
95 
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112 
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118 
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123 
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126 
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131 
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144 
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151 
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155 
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167 
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296 
297 
298 
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327 
oc_block_run_unpack(oc_pack_buf * _opb)328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358 
359 
360 
oc_dec_accel_init_c(oc_dec_ctx * _dec)361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367 
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404        _dec->state.dequant_tables[qi][pli][qti][17]+
405        _dec->state.dequant_tables[qi][pli][qti][18]+
406        _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry=0;
421   _dec->telemetry_bits=0;
422   _dec->telemetry_qi=0;
423   _dec->telemetry_mbmode=0;
424   _dec->telemetry_mv=0;
425   _dec->telemetry_frame_data=NULL;
426 #endif
427   return 0;
428 }
429 
oc_dec_clear(oc_dec_ctx * _dec)430 static void oc_dec_clear(oc_dec_ctx *_dec){
431 #if defined(HAVE_CAIRO)
432   _ogg_free(_dec->telemetry_frame_data);
433 #endif
434   _ogg_free(_dec->pp_frame_data);
435   _ogg_free(_dec->variances);
436   _ogg_free(_dec->dc_qis);
437   _ogg_free(_dec->dct_tokens);
438   oc_huff_trees_clear(_dec->huff_tables);
439   oc_state_clear(&_dec->state);
440 }
441 
442 
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)443 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
444   long val;
445   /*Check to make sure this is a data packet.*/
446   val=oc_pack_read1(&_dec->opb);
447   if(val!=0)return TH_EBADPACKET;
448   /*Read in the frame type (I or P).*/
449   val=oc_pack_read1(&_dec->opb);
450   _dec->state.frame_type=(int)val;
451   /*Read in the qi list.*/
452   val=oc_pack_read(&_dec->opb,6);
453   _dec->state.qis[0]=(unsigned char)val;
454   val=oc_pack_read1(&_dec->opb);
455   if(!val)_dec->state.nqis=1;
456   else{
457     val=oc_pack_read(&_dec->opb,6);
458     _dec->state.qis[1]=(unsigned char)val;
459     val=oc_pack_read1(&_dec->opb);
460     if(!val)_dec->state.nqis=2;
461     else{
462       val=oc_pack_read(&_dec->opb,6);
463       _dec->state.qis[2]=(unsigned char)val;
464       _dec->state.nqis=3;
465     }
466   }
467   if(_dec->state.frame_type==OC_INTRA_FRAME){
468     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
469       Most of the other unused bits in the VP3 headers were eliminated.
470       I don't know why these remain.*/
471     /*I wanted to eliminate wasted bits, but not all config wiggle room
472        --Monty.*/
473     val=oc_pack_read(&_dec->opb,3);
474     if(val!=0)return TH_EIMPL;
475   }
476   return 0;
477 }
478 
479 /*Mark all fragments as coded and in OC_MODE_INTRA.
480   This also builds up the coded fragment list (in coded order), and clears the
481    uncoded fragment list.
482   It does not update the coded macro block list nor the super block flags, as
483    those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)484 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
485   const oc_sb_map   *sb_maps;
486   const oc_sb_flags *sb_flags;
487   oc_fragment       *frags;
488   ptrdiff_t         *coded_fragis;
489   ptrdiff_t          ncoded_fragis;
490   ptrdiff_t          prev_ncoded_fragis;
491   unsigned           nsbs;
492   unsigned           sbi;
493   int                pli;
494   coded_fragis=_dec->state.coded_fragis;
495   prev_ncoded_fragis=ncoded_fragis=0;
496   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
497   sb_flags=_dec->state.sb_flags;
498   frags=_dec->state.frags;
499   sbi=nsbs=0;
500   for(pli=0;pli<3;pli++){
501     nsbs+=_dec->state.fplanes[pli].nsbs;
502     for(;sbi<nsbs;sbi++){
503       int quadi;
504       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
505         int bi;
506         for(bi=0;bi<4;bi++){
507           ptrdiff_t fragi;
508           fragi=sb_maps[sbi][quadi][bi];
509           if(fragi>=0){
510             frags[fragi].coded=1;
511             frags[fragi].refi=OC_FRAME_SELF;
512             frags[fragi].mb_mode=OC_MODE_INTRA;
513             coded_fragis[ncoded_fragis++]=fragi;
514           }
515         }
516       }
517     }
518     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
519     prev_ncoded_fragis=ncoded_fragis;
520   }
521   _dec->state.ntotal_coded_fragis=ncoded_fragis;
522 }
523 
524 /*Decodes the bit flags indicating whether each super block is partially coded
525    or not.
526   Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)527 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
528   oc_sb_flags *sb_flags;
529   unsigned     nsbs;
530   unsigned     sbi;
531   unsigned     npartial;
532   unsigned     run_count;
533   long         val;
534   int          flag;
535   val=oc_pack_read1(&_dec->opb);
536   flag=(int)val;
537   sb_flags=_dec->state.sb_flags;
538   nsbs=_dec->state.nsbs;
539   sbi=npartial=0;
540   while(sbi<nsbs){
541     int full_run;
542     run_count=oc_sb_run_unpack(&_dec->opb);
543     full_run=run_count>=4129;
544     do{
545       sb_flags[sbi].coded_partially=flag;
546       sb_flags[sbi].coded_fully=0;
547       npartial+=flag;
548       sbi++;
549     }
550     while(--run_count>0&&sbi<nsbs);
551     if(full_run&&sbi<nsbs){
552       val=oc_pack_read1(&_dec->opb);
553       flag=(int)val;
554     }
555     else flag=!flag;
556   }
557   /*TODO: run_count should be 0 here.
558     If it's not, we should issue a warning of some kind.*/
559   return npartial;
560 }
561 
562 /*Decodes the bit flags for whether or not each non-partially-coded super
563    block is fully coded or not.
564   This function should only be called if there is at least one
565    non-partially-coded super block.
566   Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)567 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
568   oc_sb_flags *sb_flags;
569   unsigned     nsbs;
570   unsigned     sbi;
571   unsigned     run_count;
572   long         val;
573   int          flag;
574   sb_flags=_dec->state.sb_flags;
575   nsbs=_dec->state.nsbs;
576   /*Skip partially coded super blocks.*/
577   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
578   val=oc_pack_read1(&_dec->opb);
579   flag=(int)val;
580   do{
581     int full_run;
582     run_count=oc_sb_run_unpack(&_dec->opb);
583     full_run=run_count>=4129;
584     for(;sbi<nsbs;sbi++){
585       if(sb_flags[sbi].coded_partially)continue;
586       if(run_count--<=0)break;
587       sb_flags[sbi].coded_fully=flag;
588     }
589     if(full_run&&sbi<nsbs){
590       val=oc_pack_read1(&_dec->opb);
591       flag=(int)val;
592     }
593     else flag=!flag;
594   }
595   while(sbi<nsbs);
596   /*TODO: run_count should be 0 here.
597     If it's not, we should issue a warning of some kind.*/
598 }
599 
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)600 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
601   const oc_sb_map   *sb_maps;
602   const oc_sb_flags *sb_flags;
603   signed char       *mb_modes;
604   oc_fragment       *frags;
605   unsigned           nsbs;
606   unsigned           sbi;
607   unsigned           npartial;
608   long               val;
609   int                pli;
610   int                flag;
611   int                run_count;
612   ptrdiff_t         *coded_fragis;
613   ptrdiff_t         *uncoded_fragis;
614   ptrdiff_t          ncoded_fragis;
615   ptrdiff_t          nuncoded_fragis;
616   ptrdiff_t          prev_ncoded_fragis;
617   npartial=oc_dec_partial_sb_flags_unpack(_dec);
618   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
619   if(npartial>0){
620     val=oc_pack_read1(&_dec->opb);
621     flag=!(int)val;
622   }
623   else flag=0;
624   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
625   sb_flags=_dec->state.sb_flags;
626   mb_modes=_dec->state.mb_modes;
627   frags=_dec->state.frags;
628   sbi=nsbs=run_count=0;
629   coded_fragis=_dec->state.coded_fragis;
630   uncoded_fragis=coded_fragis+_dec->state.nfrags;
631   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
632   for(pli=0;pli<3;pli++){
633     nsbs+=_dec->state.fplanes[pli].nsbs;
634     for(;sbi<nsbs;sbi++){
635       int quadi;
636       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
637         int quad_coded;
638         int bi;
639         quad_coded=0;
640         for(bi=0;bi<4;bi++){
641           ptrdiff_t fragi;
642           fragi=sb_maps[sbi][quadi][bi];
643           if(fragi>=0){
644             int coded;
645             if(sb_flags[sbi].coded_fully)coded=1;
646             else if(!sb_flags[sbi].coded_partially)coded=0;
647             else{
648               if(run_count<=0){
649                 run_count=oc_block_run_unpack(&_dec->opb);
650                 flag=!flag;
651               }
652               run_count--;
653               coded=flag;
654             }
655             if(coded)coded_fragis[ncoded_fragis++]=fragi;
656             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
657             quad_coded|=coded;
658             frags[fragi].coded=coded;
659             frags[fragi].refi=OC_FRAME_NONE;
660           }
661         }
662         /*Remember if there's a coded luma block in this macro block.*/
663         if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
664       }
665     }
666     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
667     prev_ncoded_fragis=ncoded_fragis;
668   }
669   _dec->state.ntotal_coded_fragis=ncoded_fragis;
670   /*TODO: run_count should be 0 here.
671     If it's not, we should issue a warning of some kind.*/
672 }
673 
674 
675 /*Coding scheme:
676    Codeword            Mode Index
677    0                       0
678    10                      1
679    110                     2
680    1110                    3
681    11110                   4
682    111110                  5
683    1111110                 6
684    1111111                 7*/
685 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
686   4,
687    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
689    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
690    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
691     3,
692      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
693      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
694 };
695 
696 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
697   3,
698    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
699    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
700 };
701 
702 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)703 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
704   signed char         *mb_modes;
705   const unsigned char *alphabet;
706   unsigned char        scheme0_alphabet[8];
707   const ogg_int16_t   *mode_tree;
708   size_t               nmbs;
709   size_t               mbi;
710   long                 val;
711   int                  mode_scheme;
712   val=oc_pack_read(&_dec->opb,3);
713   mode_scheme=(int)val;
714   if(mode_scheme==0){
715     int mi;
716     /*Just in case, initialize the modes to something.
717       If the bitstream doesn't contain each index exactly once, it's likely
718        corrupt and the rest of the packet is garbage anyway, but this way we
719        won't crash, and we'll decode SOMETHING.*/
720     /*LOOP VECTORIZES*/
721     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
722     for(mi=0;mi<OC_NMODES;mi++){
723       val=oc_pack_read(&_dec->opb,3);
724       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
725     }
726     alphabet=scheme0_alphabet;
727   }
728   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
729   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
730   mb_modes=_dec->state.mb_modes;
731   nmbs=_dec->state.nmbs;
732   for(mbi=0;mbi<nmbs;mbi++){
733     if(mb_modes[mbi]>0){
734       /*We have a coded luma block; decode a mode.*/
735       mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
736     }
737     /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
738        fact that OC_MODE_INTER_NOMV is already 0.*/
739   }
740 }
741 
742 
743 
744 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
745   5,
746    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
747    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
748    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
749    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
750    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
751    33,          36,          39,          42,
752    45,          50,          55,          60,
753    65,          74,          83,          92,
754     1,-(1<<8|32+4),-(1<<8|32-4),
755     1,-(1<<8|32+5),-(1<<8|32-5),
756     1,-(1<<8|32+6),-(1<<8|32-6),
757     1,-(1<<8|32+7),-(1<<8|32-7),
758     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
759     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
760     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
761     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
762     3,
763      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
764      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
765     3,
766      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
767      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
768     3,
769      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
770      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
771     3,
772      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
773      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
774 };
775 
776 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
777   6,
778    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
779    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
780    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
781    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
782    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
783    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
784    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
785    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
786    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
787    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
788    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
789    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
790    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
791    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
792    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
793    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
794 };
795 
796 
oc_mv_unpack(oc_pack_buf * _opb,const ogg_int16_t * _tree)797 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
798   int dx;
799   int dy;
800   dx=oc_huff_token_decode(_opb,_tree)-32;
801   dy=oc_huff_token_decode(_opb,_tree)-32;
802   return OC_MV(dx,dy);
803 }
804 
805 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
806    block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)807 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
808   const oc_mb_map        *mb_maps;
809   const signed char      *mb_modes;
810   oc_set_chroma_mvs_func  set_chroma_mvs;
811   const ogg_int16_t      *mv_comp_tree;
812   oc_fragment            *frags;
813   oc_mv                  *frag_mvs;
814   const unsigned char    *map_idxs;
815   int                     map_nidxs;
816   oc_mv                   last_mv;
817   oc_mv                   prior_mv;
818   oc_mv                   cbmvs[4];
819   size_t                  nmbs;
820   size_t                  mbi;
821   long                    val;
822   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
823   val=oc_pack_read1(&_dec->opb);
824   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
825   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
826   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
827   prior_mv=last_mv=0;
828   frags=_dec->state.frags;
829   frag_mvs=_dec->state.frag_mvs;
830   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
831   mb_modes=_dec->state.mb_modes;
832   nmbs=_dec->state.nmbs;
833   for(mbi=0;mbi<nmbs;mbi++){
834     int mb_mode;
835     mb_mode=mb_modes[mbi];
836     if(mb_mode!=OC_MODE_INVALID){
837       oc_mv     mbmv;
838       ptrdiff_t fragi;
839       int       mapi;
840       int       mapii;
841       int       refi;
842       if(mb_mode==OC_MODE_INTER_MV_FOUR){
843         oc_mv lbmvs[4];
844         int   bi;
845         prior_mv=last_mv;
846         for(bi=0;bi<4;bi++){
847           fragi=mb_maps[mbi][0][bi];
848           if(frags[fragi].coded){
849             frags[fragi].refi=OC_FRAME_PREV;
850             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
851             lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
852             frag_mvs[fragi]=lbmvs[bi];
853           }
854           else lbmvs[bi]=0;
855         }
856         (*set_chroma_mvs)(cbmvs,lbmvs);
857         for(mapii=4;mapii<map_nidxs;mapii++){
858           mapi=map_idxs[mapii];
859           bi=mapi&3;
860           fragi=mb_maps[mbi][mapi>>2][bi];
861           if(frags[fragi].coded){
862             frags[fragi].refi=OC_FRAME_PREV;
863             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
864             frag_mvs[fragi]=cbmvs[bi];
865           }
866         }
867       }
868       else{
869         switch(mb_mode){
870           case OC_MODE_INTER_MV:{
871             prior_mv=last_mv;
872             last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
873           }break;
874           case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
875           case OC_MODE_INTER_MV_LAST2:{
876             mbmv=prior_mv;
877             prior_mv=last_mv;
878             last_mv=mbmv;
879           }break;
880           case OC_MODE_GOLDEN_MV:{
881             mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
882           }break;
883           default:mbmv=0;break;
884         }
885         /*Fill in the MVs for the fragments.*/
886         refi=OC_FRAME_FOR_MODE(mb_mode);
887         mapii=0;
888         do{
889           mapi=map_idxs[mapii];
890           fragi=mb_maps[mbi][mapi>>2][mapi&3];
891           if(frags[fragi].coded){
892             frags[fragi].refi=refi;
893             frags[fragi].mb_mode=mb_mode;
894             frag_mvs[fragi]=mbmv;
895           }
896         }
897         while(++mapii<map_nidxs);
898       }
899     }
900   }
901 }
902 
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)903 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
904   oc_fragment     *frags;
905   const ptrdiff_t *coded_fragis;
906   ptrdiff_t        ncoded_fragis;
907   ptrdiff_t        fragii;
908   ptrdiff_t        fragi;
909   ncoded_fragis=_dec->state.ntotal_coded_fragis;
910   if(ncoded_fragis<=0)return;
911   frags=_dec->state.frags;
912   coded_fragis=_dec->state.coded_fragis;
913   if(_dec->state.nqis==1){
914     /*If this frame has only a single qi value, then just use it for all coded
915        fragments.*/
916     for(fragii=0;fragii<ncoded_fragis;fragii++){
917       frags[coded_fragis[fragii]].qii=0;
918     }
919   }
920   else{
921     long val;
922     int  flag;
923     int  nqi1;
924     int  run_count;
925     /*Otherwise, we decode a qi index for each fragment, using two passes of
926       the same binary RLE scheme used for super-block coded bits.
927      The first pass marks each fragment as having a qii of 0 or greater than
928       0, and the second pass (if necessary), distinguishes between a qii of
929       1 and 2.
930      At first we just store the qii in the fragment.
931      After all the qii's are decoded, we make a final pass to replace them
932       with the corresponding qi's for this frame.*/
933     val=oc_pack_read1(&_dec->opb);
934     flag=(int)val;
935     nqi1=0;
936     fragii=0;
937     while(fragii<ncoded_fragis){
938       int full_run;
939       run_count=oc_sb_run_unpack(&_dec->opb);
940       full_run=run_count>=4129;
941       do{
942         frags[coded_fragis[fragii++]].qii=flag;
943         nqi1+=flag;
944       }
945       while(--run_count>0&&fragii<ncoded_fragis);
946       if(full_run&&fragii<ncoded_fragis){
947         val=oc_pack_read1(&_dec->opb);
948         flag=(int)val;
949       }
950       else flag=!flag;
951     }
952     /*TODO: run_count should be 0 here.
953       If it's not, we should issue a warning of some kind.*/
954     /*If we have 3 different qi's for this frame, and there was at least one
955        fragment with a non-zero qi, make the second pass.*/
956     if(_dec->state.nqis==3&&nqi1>0){
957       /*Skip qii==0 fragments.*/
958       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
959       val=oc_pack_read1(&_dec->opb);
960       flag=(int)val;
961       do{
962         int full_run;
963         run_count=oc_sb_run_unpack(&_dec->opb);
964         full_run=run_count>=4129;
965         for(;fragii<ncoded_fragis;fragii++){
966           fragi=coded_fragis[fragii];
967           if(frags[fragi].qii==0)continue;
968           if(run_count--<=0)break;
969           frags[fragi].qii+=flag;
970         }
971         if(full_run&&fragii<ncoded_fragis){
972           val=oc_pack_read1(&_dec->opb);
973           flag=(int)val;
974         }
975         else flag=!flag;
976       }
977       while(fragii<ncoded_fragis);
978       /*TODO: run_count should be 0 here.
979         If it's not, we should issue a warning of some kind.*/
980     }
981   }
982 }
983 
984 
985 
986 /*Unpacks the DC coefficient tokens.
987   Unlike when unpacking the AC coefficient tokens, we actually need to decode
988    the DC coefficient values now so that we can do DC prediction.
989   _huff_idx:   The index of the Huffman table to use for each color plane.
990   _ntoks_left: The number of tokens left to be decoded in each color plane for
991                 each coefficient.
992                This is updated as EOB tokens and zero run tokens are decoded.
993   Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])994 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
995  ptrdiff_t _ntoks_left[3][64]){
996   unsigned char   *dct_tokens;
997   oc_fragment     *frags;
998   const ptrdiff_t *coded_fragis;
999   ptrdiff_t        ncoded_fragis;
1000   ptrdiff_t        fragii;
1001   ptrdiff_t        eobs;
1002   ptrdiff_t        ti;
1003   int              pli;
1004   dct_tokens=_dec->dct_tokens;
1005   frags=_dec->state.frags;
1006   coded_fragis=_dec->state.coded_fragis;
1007   ncoded_fragis=fragii=eobs=ti=0;
1008   for(pli=0;pli<3;pli++){
1009     ptrdiff_t run_counts[64];
1010     ptrdiff_t eob_count;
1011     ptrdiff_t eobi;
1012     int       rli;
1013     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1014     memset(run_counts,0,sizeof(run_counts));
1015     _dec->eob_runs[pli][0]=eobs;
1016     _dec->ti0[pli][0]=ti;
1017     /*Continue any previous EOB run, if there was one.*/
1018     eobi=eobs;
1019     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1020     eob_count=eobi;
1021     eobs-=eobi;
1022     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1023     while(fragii<ncoded_fragis){
1024       int token;
1025       int cw;
1026       int eb;
1027       int skip;
1028       token=oc_huff_token_decode(&_dec->opb,
1029        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1030       dct_tokens[ti++]=(unsigned char)token;
1031       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1032         eb=(int)oc_pack_read(&_dec->opb,
1033          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1034         dct_tokens[ti++]=(unsigned char)eb;
1035         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1036         eb<<=OC_DCT_TOKEN_EB_POS(token);
1037       }
1038       else eb=0;
1039       cw=OC_DCT_CODE_WORD[token]+eb;
1040       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1041       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1042       if(eobs){
1043         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1044         eob_count+=eobi;
1045         eobs-=eobi;
1046         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1047       }
1048       else{
1049         int coeff;
1050         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1051         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1052         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1053         if(skip)coeff=0;
1054         run_counts[skip]++;
1055         frags[coded_fragis[fragii++]].dc=coeff;
1056       }
1057     }
1058     /*Add the total EOB count to the longest run length.*/
1059     run_counts[63]+=eob_count;
1060     /*And convert the run_counts array to a moment table.*/
1061     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1062     /*Finally, subtract off the number of coefficients that have been
1063        accounted for by runs started in this coefficient.*/
1064     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1065   }
1066   _dec->dct_tokens_count=ti;
1067   return eobs;
1068 }
1069 
1070 /*Unpacks the AC coefficient tokens.
1071   This can completely discard coefficient values while unpacking, and so is
1072    somewhat simpler than unpacking the DC coefficient tokens.
1073   _huff_idx:   The index of the Huffman table to use for each color plane.
1074   _ntoks_left: The number of tokens left to be decoded in each color plane for
1075                 each coefficient.
1076                This is updated as EOB tokens and zero run tokens are decoded.
1077   _eobs:       The length of any outstanding EOB run from previous
1078                 coefficients.
1079   Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1080 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1081  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1082   unsigned char *dct_tokens;
1083   ptrdiff_t      ti;
1084   int            pli;
1085   dct_tokens=_dec->dct_tokens;
1086   ti=_dec->dct_tokens_count;
1087   for(pli=0;pli<3;pli++){
1088     ptrdiff_t run_counts[64];
1089     ptrdiff_t eob_count;
1090     size_t    ntoks_left;
1091     size_t    ntoks;
1092     int       rli;
1093     _dec->eob_runs[pli][_zzi]=_eobs;
1094     _dec->ti0[pli][_zzi]=ti;
1095     ntoks_left=_ntoks_left[pli][_zzi];
1096     memset(run_counts,0,sizeof(run_counts));
1097     eob_count=0;
1098     ntoks=0;
1099     while(ntoks+_eobs<ntoks_left){
1100       int token;
1101       int cw;
1102       int eb;
1103       int skip;
1104       ntoks+=_eobs;
1105       eob_count+=_eobs;
1106       token=oc_huff_token_decode(&_dec->opb,
1107        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1108       dct_tokens[ti++]=(unsigned char)token;
1109       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1110         eb=(int)oc_pack_read(&_dec->opb,
1111          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1112         dct_tokens[ti++]=(unsigned char)eb;
1113         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1114         eb<<=OC_DCT_TOKEN_EB_POS(token);
1115       }
1116       else eb=0;
1117       cw=OC_DCT_CODE_WORD[token]+eb;
1118       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1119       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1120       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1121       if(_eobs==0){
1122         run_counts[skip]++;
1123         ntoks++;
1124       }
1125     }
1126     /*Add the portion of the last EOB run actually used by this coefficient.*/
1127     eob_count+=ntoks_left-ntoks;
1128     /*And remove it from the remaining EOB count.*/
1129     _eobs-=ntoks_left-ntoks;
1130     /*Add the total EOB count to the longest run length.*/
1131     run_counts[63]+=eob_count;
1132     /*And convert the run_counts array to a moment table.*/
1133     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1134     /*Finally, subtract off the number of coefficients that have been
1135        accounted for by runs started in this coefficient.*/
1136     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1137   }
1138   _dec->dct_tokens_count=ti;
1139   return _eobs;
1140 }
1141 
1142 /*Tokens describing the DCT coefficients that belong to each fragment are
1143    stored in the bitstream grouped by coefficient, not by fragment.
1144 
1145   This means that we either decode all the tokens in order, building up a
1146    separate coefficient list for each fragment as we go, and then go back and
1147    do the iDCT on each fragment, or we have to create separate lists of tokens
1148    for each coefficient, so that we can pull the next token required off the
1149    head of the appropriate list when decoding a specific fragment.
1150 
1151   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1152    decoded coefficient values.
1153 
1154   We take the second option, which lets us store just one to three bytes per
1155    token (generally far fewer than the number of coefficients, due to EOB
1156    tokens and zero runs), and which requires us to only maintain a counter for
1157    each of the 64 coefficients, instead of a counter for every fragment to
1158    determine where the next token goes.
1159 
1160   We actually use 3 counters per coefficient, one for each color plane, so we
1161    can decode all color planes simultaneously.
1162   This lets color conversion, etc., be done as soon as a full MCU (one or
1163    two super block rows) is decoded, while the image data is still in cache.*/
1164 
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1165 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1166   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1167   ptrdiff_t  ntoks_left[3][64];
1168   int        huff_idxs[2];
1169   ptrdiff_t  eobs;
1170   long       val;
1171   int        pli;
1172   int        zzi;
1173   int        hgi;
1174   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1175     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1176   }
1177   val=oc_pack_read(&_dec->opb,4);
1178   huff_idxs[0]=(int)val;
1179   val=oc_pack_read(&_dec->opb,4);
1180   huff_idxs[1]=(int)val;
1181   _dec->eob_runs[0][0]=0;
1182   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1183 #if defined(HAVE_CAIRO)
1184   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1185 #endif
1186   val=oc_pack_read(&_dec->opb,4);
1187   huff_idxs[0]=(int)val;
1188   val=oc_pack_read(&_dec->opb,4);
1189   huff_idxs[1]=(int)val;
1190   zzi=1;
1191   for(hgi=1;hgi<5;hgi++){
1192     huff_idxs[0]+=16;
1193     huff_idxs[1]+=16;
1194     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1195       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1196     }
1197   }
1198   /*TODO: eobs should be exactly zero, or 4096 or greater.
1199     The second case occurs when an EOB run of size zero is encountered, which
1200      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1201     If neither of these conditions holds, then a warning should be issued.*/
1202 }
1203 
1204 
oc_dec_postprocess_init(oc_dec_ctx * _dec)1205 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1206   /*musl libc malloc()/realloc() calls might use floating point, so make sure
1207      we've cleared the MMX state for them.*/
1208   oc_restore_fpu(&_dec->state);
1209   /*pp_level 0: disabled; free any memory used and return*/
1210   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1211     if(_dec->dc_qis!=NULL){
1212       _ogg_free(_dec->dc_qis);
1213       _dec->dc_qis=NULL;
1214       _ogg_free(_dec->variances);
1215       _dec->variances=NULL;
1216       _ogg_free(_dec->pp_frame_data);
1217       _dec->pp_frame_data=NULL;
1218     }
1219     return 1;
1220   }
1221   if(_dec->dc_qis==NULL){
1222     /*If we haven't been tracking DC quantization indices, there's no point in
1223        starting now.*/
1224     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1225     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1226      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1227     if(_dec->dc_qis==NULL)return 1;
1228     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1229   }
1230   else{
1231     unsigned char   *dc_qis;
1232     const ptrdiff_t *coded_fragis;
1233     ptrdiff_t        ncoded_fragis;
1234     ptrdiff_t        fragii;
1235     unsigned char    qi0;
1236     /*Update the DC quantization index of each coded block.*/
1237     dc_qis=_dec->dc_qis;
1238     coded_fragis=_dec->state.coded_fragis;
1239     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1240      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1241     qi0=(unsigned char)_dec->state.qis[0];
1242     for(fragii=0;fragii<ncoded_fragis;fragii++){
1243       dc_qis[coded_fragis[fragii]]=qi0;
1244     }
1245   }
1246   /*pp_level 1: Stop after updating DC quantization indices.*/
1247   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1248     if(_dec->variances!=NULL){
1249       _ogg_free(_dec->variances);
1250       _dec->variances=NULL;
1251       _ogg_free(_dec->pp_frame_data);
1252       _dec->pp_frame_data=NULL;
1253     }
1254     return 1;
1255   }
1256   if(_dec->variances==NULL){
1257     size_t frame_sz;
1258     size_t c_sz;
1259     int    c_w;
1260     int    c_h;
1261     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1262     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1263     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1264     c_sz=c_w*(size_t)c_h;
1265     /*Allocate space for the chroma planes, even if we're not going to use
1266        them; this simplifies allocation state management, though it may waste
1267        memory on the few systems that don't overcommit pages.*/
1268     frame_sz+=c_sz<<1;
1269     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1270      frame_sz*sizeof(_dec->pp_frame_data[0]));
1271     _dec->variances=(int *)_ogg_malloc(
1272      _dec->state.nfrags*sizeof(_dec->variances[0]));
1273     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1274       _ogg_free(_dec->pp_frame_data);
1275       _dec->pp_frame_data=NULL;
1276       _ogg_free(_dec->variances);
1277       _dec->variances=NULL;
1278       return 1;
1279     }
1280     /*Force an update of the PP buffer pointers.*/
1281     _dec->pp_frame_state=0;
1282   }
1283   /*Update the PP buffer pointers if necessary.*/
1284   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1285     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1286       /*If chroma processing is disabled, just use the PP luma plane.*/
1287       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1288       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1289       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1290       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1291        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1292     }
1293     else{
1294       size_t y_sz;
1295       size_t c_sz;
1296       int    c_w;
1297       int    c_h;
1298       /*Otherwise, set up pointers to all three PP planes.*/
1299       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1300       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1301       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1302       c_sz=c_w*(size_t)c_h;
1303       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1304       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1305       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1306       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1307       _dec->pp_frame_buf[1].width=c_w;
1308       _dec->pp_frame_buf[1].height=c_h;
1309       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1310       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1311       _dec->pp_frame_buf[2].width=c_w;
1312       _dec->pp_frame_buf[2].height=c_h;
1313       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1314       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1315       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1316     }
1317     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1318   }
1319   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1320   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1321     memcpy(_dec->pp_frame_buf+1,
1322      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1323      sizeof(_dec->pp_frame_buf[1])*2);
1324   }
1325   return 0;
1326 }
1327 
1328 
1329 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1330 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1331  oc_dec_pipeline_state *_pipe){
1332   const ptrdiff_t *coded_fragis;
1333   const ptrdiff_t *uncoded_fragis;
1334   int              flimit;
1335   int              pli;
1336   int              qii;
1337   int              qti;
1338   int              zzi;
1339   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1340      super block rows of Y' for each super block row of Cb and Cr.*/
1341   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1342   /*Initialize the token and extra bits indices for each plane and
1343      coefficient.*/
1344   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1345   /*Also copy over the initial the EOB run counts.*/
1346   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1347   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1348   coded_fragis=_dec->state.coded_fragis;
1349   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1350   for(pli=0;pli<3;pli++){
1351     ptrdiff_t ncoded_fragis;
1352     _pipe->coded_fragis[pli]=coded_fragis;
1353     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1354     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1355     coded_fragis+=ncoded_fragis;
1356     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1357   }
1358   /*Set up condensed quantizer tables.*/
1359   for(pli=0;pli<3;pli++){
1360     for(qii=0;qii<_dec->state.nqis;qii++){
1361       for(qti=0;qti<2;qti++){
1362         _pipe->dequant[pli][qii][qti]=
1363          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1364       }
1365     }
1366   }
1367   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1368   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1369   /*Initialize the bounding value array for the loop filter.*/
1370   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1371   _pipe->loop_filter=flimit!=0;
1372   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1373   /*Initialize any buffers needed for post-processing.
1374     We also save the current post-processing level, to guard against the user
1375      changing it from a callback.*/
1376   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1377   /*If we don't have enough information to post-process, disable it, regardless
1378      of the user-requested level.*/
1379   else{
1380     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1381     memcpy(_dec->pp_frame_buf,
1382      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1383      sizeof(_dec->pp_frame_buf[0])*3);
1384   }
1385   /*Clear down the DCT coefficient buffer for the first block.*/
1386   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1387 }
1388 
1389 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1390    rows).
1391   As a side effect, the number of coded and uncoded fragments in this plane of
1392    the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1393 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1394  oc_dec_pipeline_state *_pipe,int _pli){
1395   const oc_fragment_plane *fplane;
1396   oc_fragment             *frags;
1397   int                     *pred_last;
1398   ptrdiff_t                ncoded_fragis;
1399   ptrdiff_t                fragi;
1400   int                      fragx;
1401   int                      fragy;
1402   int                      fragy0;
1403   int                      fragy_end;
1404   int                      nhfrags;
1405   /*Compute the first and last fragment row of the current MCU for this
1406      plane.*/
1407   fplane=_dec->state.fplanes+_pli;
1408   fragy0=_pipe->fragy0[_pli];
1409   fragy_end=_pipe->fragy_end[_pli];
1410   nhfrags=fplane->nhfrags;
1411   pred_last=_pipe->pred_last[_pli];
1412   frags=_dec->state.frags;
1413   ncoded_fragis=0;
1414   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1415   for(fragy=fragy0;fragy<fragy_end;fragy++){
1416     if(fragy==0){
1417       /*For the first row, all of the cases reduce to just using the previous
1418          predictor for the same reference frame.*/
1419       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1420         if(frags[fragi].coded){
1421           int refi;
1422           refi=frags[fragi].refi;
1423           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1424           ncoded_fragis++;
1425         }
1426       }
1427     }
1428     else{
1429       oc_fragment *u_frags;
1430       int          l_ref;
1431       int          ul_ref;
1432       int          u_ref;
1433       u_frags=frags-nhfrags;
1434       l_ref=-1;
1435       ul_ref=-1;
1436       u_ref=u_frags[fragi].refi;
1437       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1438         int ur_ref;
1439         if(fragx+1>=nhfrags)ur_ref=-1;
1440         else ur_ref=u_frags[fragi+1].refi;
1441         if(frags[fragi].coded){
1442           int pred;
1443           int refi;
1444           refi=frags[fragi].refi;
1445           /*We break out a separate case based on which of our neighbors use
1446              the same reference frames.
1447             This is somewhat faster than trying to make a generic case which
1448              handles all of them, since it reduces lots of poorly predicted
1449              jumps to one switch statement, and also lets a number of the
1450              multiplications be optimized out by strength reduction.*/
1451           switch((l_ref==refi)|(ul_ref==refi)<<1|
1452            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1453             default:pred=pred_last[refi];break;
1454             case  1:
1455             case  3:pred=frags[fragi-1].dc;break;
1456             case  2:pred=u_frags[fragi-1].dc;break;
1457             case  4:
1458             case  6:
1459             case 12:pred=u_frags[fragi].dc;break;
1460             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1461             case  8:pred=u_frags[fragi+1].dc;break;
1462             case  9:
1463             case 11:
1464             case 13:{
1465               /*The TI compiler mis-compiles this line.*/
1466               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1467             }break;
1468             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1469             case 14:{
1470               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1471                +10*u_frags[fragi].dc)/16;
1472             }break;
1473             case  7:
1474             case 15:{
1475               int p0;
1476               int p1;
1477               int p2;
1478               p0=frags[fragi-1].dc;
1479               p1=u_frags[fragi-1].dc;
1480               p2=u_frags[fragi].dc;
1481               pred=(29*(p0+p2)-26*p1)/32;
1482               if(abs(pred-p2)>128)pred=p2;
1483               else if(abs(pred-p0)>128)pred=p0;
1484               else if(abs(pred-p1)>128)pred=p1;
1485             }break;
1486           }
1487           pred_last[refi]=frags[fragi].dc+=pred;
1488           ncoded_fragis++;
1489           l_ref=refi;
1490         }
1491         else l_ref=-1;
1492         ul_ref=u_ref;
1493         u_ref=ur_ref;
1494       }
1495     }
1496   }
1497   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1498   /*Also save the number of uncoded fragments so we know how many to copy.*/
1499   _pipe->nuncoded_fragis[_pli]=
1500    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1501 }
1502 
1503 /*Reconstructs all coded fragments in a single MCU (one or two super block
1504    rows).
1505   This requires that each coded fragment have a proper macro block mode and
1506    motion vector (if not in INTRA mode), and have its DC value decoded, with
1507    the DC prediction process reversed, and the number of coded and uncoded
1508    fragments in this plane of the MCU be counted.
1509   The token lists for each color plane and coefficient should also be filled
1510    in, along with initial token offsets, extra bits offsets, and EOB run
1511    counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1512 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1513  oc_dec_pipeline_state *_pipe,int _pli){
1514   unsigned char       *dct_tokens;
1515   const unsigned char *dct_fzig_zag;
1516   ogg_uint16_t         dc_quant[2];
1517   const oc_fragment   *frags;
1518   const ptrdiff_t     *coded_fragis;
1519   ptrdiff_t            ncoded_fragis;
1520   ptrdiff_t            fragii;
1521   ptrdiff_t           *ti;
1522   ptrdiff_t           *eob_runs;
1523   int                  qti;
1524   dct_tokens=_dec->dct_tokens;
1525   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1526   frags=_dec->state.frags;
1527   coded_fragis=_pipe->coded_fragis[_pli];
1528   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1529   ti=_pipe->ti[_pli];
1530   eob_runs=_pipe->eob_runs[_pli];
1531   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1532   for(fragii=0;fragii<ncoded_fragis;fragii++){
1533     const ogg_uint16_t *ac_quant;
1534     ptrdiff_t           fragi;
1535     int                 last_zzi;
1536     int                 zzi;
1537     fragi=coded_fragis[fragii];
1538     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1539     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1540     /*Decode the AC coefficients.*/
1541     for(zzi=0;zzi<64;){
1542       int token;
1543       last_zzi=zzi;
1544       if(eob_runs[zzi]){
1545         eob_runs[zzi]--;
1546         break;
1547       }
1548       else{
1549         ptrdiff_t eob;
1550         int       cw;
1551         int       rlen;
1552         int       coeff;
1553         int       lti;
1554         lti=ti[zzi];
1555         token=dct_tokens[lti++];
1556         cw=OC_DCT_CODE_WORD[token];
1557         /*These parts could be done branchless, but the branches are fairly
1558            predictable and the C code translates into more than a few
1559            instructions, so it's worth it to avoid them.*/
1560         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1561           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1562         }
1563         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1564         if(token==OC_DCT_TOKEN_FAT_EOB){
1565           eob+=dct_tokens[lti++]<<8;
1566           if(eob==0)eob=OC_DCT_EOB_FINISH;
1567         }
1568         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1569         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1570         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1571         eob_runs[zzi]=eob;
1572         ti[zzi]=lti;
1573         zzi+=rlen;
1574         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1575          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1576         zzi+=!eob;
1577       }
1578     }
1579     /*TODO: zzi should be exactly 64 here.
1580       If it's not, we should report some kind of warning.*/
1581     zzi=OC_MINI(zzi,64);
1582     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1583     /*last_zzi is always initialized.
1584       If your compiler thinks otherwise, it is dumb.*/
1585     oc_state_frag_recon(&_dec->state,fragi,_pli,
1586      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1587   }
1588   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1589   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1590   /*TODO: We make the decision here to always copy the uncoded blocks into it
1591      from the reference frame.
1592     We could also copy the coded blocks back over the reference frame, if we
1593      wait for an additional MCU to be decoded, which might be faster if only a
1594      small number of blocks are coded.
1595     However, this introduces more latency, creating a larger cache footprint.
1596     It's unknown which decision is better, but this one results in simpler
1597      code, and the hard case (high bitrate, high resolution) is handled
1598      correctly.*/
1599   /*Copy the uncoded blocks from the previous reference frame.*/
1600   if(_pipe->nuncoded_fragis[_pli]>0){
1601     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1602     oc_frag_copy_list(&_dec->state,
1603      _dec->state.ref_frame_data[OC_FRAME_SELF],
1604      _dec->state.ref_frame_data[OC_FRAME_PREV],
1605      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1606      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1607   }
1608 }
1609 
1610 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1611 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1612  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1613  int *_variance0,int *_variance1){
1614   unsigned char       *rdst;
1615   const unsigned char *rsrc;
1616   unsigned char       *cdst;
1617   const unsigned char *csrc;
1618   int                  r[10];
1619   int                  sum0;
1620   int                  sum1;
1621   int                  bx;
1622   int                  by;
1623   rdst=_dst;
1624   rsrc=_src;
1625   for(bx=0;bx<8;bx++){
1626     cdst=rdst;
1627     csrc=rsrc;
1628     for(by=0;by<10;by++){
1629       r[by]=*csrc;
1630       csrc+=_src_ystride;
1631     }
1632     sum0=sum1=0;
1633     for(by=0;by<4;by++){
1634       sum0+=abs(r[by+1]-r[by]);
1635       sum1+=abs(r[by+5]-r[by+6]);
1636     }
1637     *_variance0+=OC_MINI(255,sum0);
1638     *_variance1+=OC_MINI(255,sum1);
1639     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1640       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1641       cdst+=_dst_ystride;
1642       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1643       cdst+=_dst_ystride;
1644       for(by=0;by<4;by++){
1645         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1646          r[by+4]+r[by+5]+r[by+6]+4>>3);
1647         cdst+=_dst_ystride;
1648       }
1649       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1650       cdst+=_dst_ystride;
1651       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1652     }
1653     else{
1654       for(by=1;by<=8;by++){
1655         *cdst=(unsigned char)r[by];
1656         cdst+=_dst_ystride;
1657       }
1658     }
1659     rdst++;
1660     rsrc++;
1661   }
1662 }
1663 
1664 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1665 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1666  int _qstep,int _flimit,int *_variances){
1667   unsigned char       *rdst;
1668   const unsigned char *rsrc;
1669   unsigned char       *cdst;
1670   int                  r[10];
1671   int                  sum0;
1672   int                  sum1;
1673   int                  bx;
1674   int                  by;
1675   cdst=_dst;
1676   for(by=0;by<8;by++){
1677     rsrc=cdst-1;
1678     rdst=cdst;
1679     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1680     sum0=sum1=0;
1681     for(bx=0;bx<4;bx++){
1682       sum0+=abs(r[bx+1]-r[bx]);
1683       sum1+=abs(r[bx+5]-r[bx+6]);
1684     }
1685     _variances[0]+=OC_MINI(255,sum0);
1686     _variances[1]+=OC_MINI(255,sum1);
1687     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1688       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1689       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1690       for(bx=0;bx<4;bx++){
1691         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1692          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1693       }
1694       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1695       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1696     }
1697     cdst+=_dst_ystride;
1698   }
1699 }
1700 
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1701 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1702  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1703  int _fragy_end){
1704   oc_fragment_plane   *fplane;
1705   int                 *variance;
1706   unsigned char       *dc_qi;
1707   unsigned char       *dst;
1708   const unsigned char *src;
1709   ptrdiff_t            froffset;
1710   int                  dst_ystride;
1711   int                  src_ystride;
1712   int                  nhfrags;
1713   int                  width;
1714   int                  notstart;
1715   int                  notdone;
1716   int                  flimit;
1717   int                  qstep;
1718   int                  y_end;
1719   int                  y;
1720   int                  x;
1721   _dst+=_pli;
1722   _src+=_pli;
1723   fplane=_dec->state.fplanes+_pli;
1724   nhfrags=fplane->nhfrags;
1725   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1726   variance=_dec->variances+froffset;
1727   dc_qi=_dec->dc_qis+froffset;
1728   notstart=_fragy0>0;
1729   notdone=_fragy_end<fplane->nvfrags;
1730   /*We want to clear an extra row of variances, except at the end.*/
1731   memset(variance+(nhfrags&-notstart),0,
1732    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1733   /*Except for the first time, we want to point to the middle of the row.*/
1734   y=(_fragy0<<3)+(notstart<<2);
1735   dst_ystride=_dst->stride;
1736   src_ystride=_src->stride;
1737   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1738   src=_src->data+y*(ptrdiff_t)src_ystride;
1739   width=_dst->width;
1740   for(;y<4;y++){
1741     memcpy(dst,src,width*sizeof(dst[0]));
1742     dst+=dst_ystride;
1743     src+=src_ystride;
1744   }
1745   /*We also want to skip the last row in the frame for this loop.*/
1746   y_end=_fragy_end-!notdone<<3;
1747   for(;y<y_end;y+=8){
1748     qstep=_dec->pp_dc_scale[*dc_qi];
1749     flimit=(qstep*3)>>2;
1750     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1751      qstep,flimit,variance,variance+nhfrags);
1752     variance++;
1753     dc_qi++;
1754     for(x=8;x<width;x+=8){
1755       qstep=_dec->pp_dc_scale[*dc_qi];
1756       flimit=(qstep*3)>>2;
1757       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1758        qstep,flimit,variance,variance+nhfrags);
1759       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1760        qstep,flimit,variance-1);
1761       variance++;
1762       dc_qi++;
1763     }
1764     dst+=dst_ystride<<3;
1765     src+=src_ystride<<3;
1766   }
1767   /*And finally, handle the last row in the frame, if it's in the range.*/
1768   if(!notdone){
1769     int height;
1770     height=_dst->height;
1771     for(;y<height;y++){
1772       memcpy(dst,src,width*sizeof(dst[0]));
1773       dst+=dst_ystride;
1774       src+=src_ystride;
1775     }
1776     /*Filter the last row of vertical block edges.*/
1777     dc_qi++;
1778     for(x=8;x<width;x+=8){
1779       qstep=_dec->pp_dc_scale[*dc_qi++];
1780       flimit=(qstep*3)>>2;
1781       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1782        qstep,flimit,variance++);
1783     }
1784   }
1785 }
1786 
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1787 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1788  int _dc_scale,int _sharp_mod,int _strong){
1789   static const unsigned char OC_MOD_MAX[2]={24,32};
1790   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1791   const unsigned char *psrc;
1792   const unsigned char *src;
1793   const unsigned char *nsrc;
1794   unsigned char       *dst;
1795   int                  vmod[72];
1796   int                  hmod[72];
1797   int                  mod_hi;
1798   int                  by;
1799   int                  bx;
1800   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1801   dst=_idata;
1802   src=dst;
1803   psrc=src-(_ystride&-!(_b&4));
1804   for(by=0;by<9;by++){
1805     for(bx=0;bx<8;bx++){
1806       int mod;
1807       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1808       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1809     }
1810     psrc=src;
1811     src+=_ystride&-(!(_b&8)|by<7);
1812   }
1813   nsrc=dst;
1814   psrc=dst-!(_b&1);
1815   for(bx=0;bx<9;bx++){
1816     src=nsrc;
1817     for(by=0;by<8;by++){
1818       int mod;
1819       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1820       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1821       psrc+=_ystride;
1822       src+=_ystride;
1823     }
1824     psrc=nsrc;
1825     nsrc+=!(_b&2)|bx<7;
1826   }
1827   src=dst;
1828   psrc=src-(_ystride&-!(_b&4));
1829   nsrc=src+_ystride;
1830   for(by=0;by<8;by++){
1831     int a;
1832     int b;
1833     int w;
1834     a=128;
1835     b=64;
1836     w=hmod[by];
1837     a-=w;
1838     b+=w**(src-!(_b&1));
1839     w=vmod[by<<3];
1840     a-=w;
1841     b+=w*psrc[0];
1842     w=vmod[by+1<<3];
1843     a-=w;
1844     b+=w*nsrc[0];
1845     w=hmod[(1<<3)+by];
1846     a-=w;
1847     b+=w*src[1];
1848     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1849     for(bx=1;bx<7;bx++){
1850       a=128;
1851       b=64;
1852       w=hmod[(bx<<3)+by];
1853       a-=w;
1854       b+=w*src[bx-1];
1855       w=vmod[(by<<3)+bx];
1856       a-=w;
1857       b+=w*psrc[bx];
1858       w=vmod[(by+1<<3)+bx];
1859       a-=w;
1860       b+=w*nsrc[bx];
1861       w=hmod[(bx+1<<3)+by];
1862       a-=w;
1863       b+=w*src[bx+1];
1864       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1865     }
1866     a=128;
1867     b=64;
1868     w=hmod[(7<<3)+by];
1869     a-=w;
1870     b+=w*src[6];
1871     w=vmod[(by<<3)+7];
1872     a-=w;
1873     b+=w*psrc[7];
1874     w=vmod[(by+1<<3)+7];
1875     a-=w;
1876     b+=w*nsrc[7];
1877     w=hmod[(8<<3)+by];
1878     a-=w;
1879     b+=w*src[7+!(_b&2)];
1880     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1881     dst+=_ystride;
1882     psrc=src;
1883     src=nsrc;
1884     nsrc+=_ystride&-(!(_b&8)|by<6);
1885   }
1886 }
1887 
1888 #define OC_DERING_THRESH1 (384)
1889 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1890 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1891 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1892 
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1893 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1894  int _pli,int _fragy0,int _fragy_end){
1895   th_img_plane      *iplane;
1896   oc_fragment_plane *fplane;
1897   oc_fragment       *frag;
1898   int               *variance;
1899   unsigned char     *idata;
1900   ptrdiff_t          froffset;
1901   int                ystride;
1902   int                nhfrags;
1903   int                sthresh;
1904   int                strong;
1905   int                y_end;
1906   int                width;
1907   int                height;
1908   int                y;
1909   int                x;
1910   iplane=_img+_pli;
1911   fplane=_dec->state.fplanes+_pli;
1912   nhfrags=fplane->nhfrags;
1913   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1914   variance=_dec->variances+froffset;
1915   frag=_dec->state.frags+froffset;
1916   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1917   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1918   y=_fragy0<<3;
1919   ystride=iplane->stride;
1920   idata=iplane->data+y*(ptrdiff_t)ystride;
1921   y_end=_fragy_end<<3;
1922   width=iplane->width;
1923   height=iplane->height;
1924   for(;y<y_end;y+=8){
1925     for(x=0;x<width;x+=8){
1926       int b;
1927       int qi;
1928       int var;
1929       qi=_dec->state.qis[frag->qii];
1930       var=*variance;
1931       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1932       if(strong&&var>sthresh){
1933         oc_dering_block(idata+x,ystride,b,
1934          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1935         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1936          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1937          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1938          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1939           oc_dering_block(idata+x,ystride,b,
1940            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1941           oc_dering_block(idata+x,ystride,b,
1942            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1943         }
1944       }
1945       else if(var>OC_DERING_THRESH2){
1946         oc_dering_block(idata+x,ystride,b,
1947          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1948       }
1949       else if(var>OC_DERING_THRESH1){
1950         oc_dering_block(idata+x,ystride,b,
1951          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1952       }
1953       frag++;
1954       variance++;
1955     }
1956     idata+=ystride<<3;
1957   }
1958 }
1959 
1960 
1961 
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1962 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1963   oc_dec_ctx *dec;
1964   if(_info==NULL||_setup==NULL)return NULL;
1965   dec=oc_aligned_malloc(sizeof(*dec),16);
1966   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1967     oc_aligned_free(dec);
1968     return NULL;
1969   }
1970   dec->state.curframe_num=0;
1971   return dec;
1972 }
1973 
th_decode_free(th_dec_ctx * _dec)1974 void th_decode_free(th_dec_ctx *_dec){
1975   if(_dec!=NULL){
1976     oc_dec_clear(_dec);
1977     oc_aligned_free(_dec);
1978   }
1979 }
1980 
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1981 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1982  size_t _buf_sz){
1983   switch(_req){
1984   case TH_DECCTL_GET_PPLEVEL_MAX:{
1985     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1986     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1987     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1988     return 0;
1989   }break;
1990   case TH_DECCTL_SET_PPLEVEL:{
1991     int pp_level;
1992     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1993     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1994     pp_level=*(int *)_buf;
1995     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1996     _dec->pp_level=pp_level;
1997     return 0;
1998   }break;
1999   case TH_DECCTL_SET_GRANPOS:{
2000     ogg_int64_t granpos;
2001     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2002     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2003     granpos=*(ogg_int64_t *)_buf;
2004     if(granpos<0)return TH_EINVAL;
2005     _dec->state.granpos=granpos;
2006     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2007      -_dec->state.granpos_bias;
2008     _dec->state.curframe_num=_dec->state.keyframe_num
2009      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2010     return 0;
2011   }break;
2012   case TH_DECCTL_SET_STRIPE_CB:{
2013     th_stripe_callback *cb;
2014     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2015     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2016     cb=(th_stripe_callback *)_buf;
2017     _dec->stripe_cb.ctx=cb->ctx;
2018     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2019     return 0;
2020   }break;
2021 #ifdef HAVE_CAIRO
2022   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2023     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2024     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2025     _dec->telemetry=1;
2026     _dec->telemetry_mbmode=*(int *)_buf;
2027     return 0;
2028   }break;
2029   case TH_DECCTL_SET_TELEMETRY_MV:{
2030     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2031     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2032     _dec->telemetry=1;
2033     _dec->telemetry_mv=*(int *)_buf;
2034     return 0;
2035   }break;
2036   case TH_DECCTL_SET_TELEMETRY_QI:{
2037     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2038     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2039     _dec->telemetry=1;
2040     _dec->telemetry_qi=*(int *)_buf;
2041     return 0;
2042   }break;
2043   case TH_DECCTL_SET_TELEMETRY_BITS:{
2044     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2045     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2046     _dec->telemetry=1;
2047     _dec->telemetry_bits=*(int *)_buf;
2048     return 0;
2049   }break;
2050 #endif
2051   default:return TH_EIMPL;
2052   }
2053 }
2054 
2055 /*We're decoding an INTER frame, but have no initialized reference
2056    buffers (i.e., decoding did not start on a key frame).
2057   We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2058 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2059   th_info   *info;
2060   size_t     yplane_sz;
2061   size_t     cplane_sz;
2062   ptrdiff_t  yoffset;
2063   int        yhstride;
2064   int        yheight;
2065   int        chstride;
2066   int        cheight;
2067   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2068   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2069   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2070   _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2071    _dec->state.ref_frame_data[OC_FRAME_PREV]=
2072    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2073    _dec->state.ref_frame_bufs[0][0].data;
2074   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2075    sizeof(_dec->pp_frame_buf[0])*3);
2076   info=&_dec->state.info;
2077   yhstride=abs(_dec->state.ref_ystride[0]);
2078   yheight=info->frame_height+2*OC_UMV_PADDING;
2079   chstride=abs(_dec->state.ref_ystride[1]);
2080   cheight=yheight>>!(info->pixel_fmt&2);
2081   yplane_sz=yhstride*(size_t)yheight+16;
2082   cplane_sz=chstride*(size_t)cheight;
2083   yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2084   memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2085 }
2086 
2087 #if defined(HAVE_CAIRO)
oc_render_telemetry(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr,int _telemetry)2088 static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2089  int _telemetry){
2090   /*Stuff the plane into cairo.*/
2091   cairo_surface_t *cs;
2092   unsigned char   *data;
2093   unsigned char   *y_row;
2094   unsigned char   *u_row;
2095   unsigned char   *v_row;
2096   unsigned char   *rgb_row;
2097   int              cstride;
2098   int              w;
2099   int              h;
2100   int              x;
2101   int              y;
2102   int              hdec;
2103   int              vdec;
2104   w=_ycbcr[0].width;
2105   h=_ycbcr[0].height;
2106   hdec=!(_dec->state.info.pixel_fmt&1);
2107   vdec=!(_dec->state.info.pixel_fmt&2);
2108   /*Lazy data buffer init.
2109     We could try to re-use the post-processing buffer, which would save
2110      memory, but complicate the allocation logic there.
2111     I don't think anyone cares about memory usage when using telemetry; it is
2112      not meant for embedded devices.*/
2113   if(_dec->telemetry_frame_data==NULL){
2114     _dec->telemetry_frame_data=_ogg_malloc(
2115      (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2116     if(_dec->telemetry_frame_data==NULL)return;
2117   }
2118   cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2119   /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2120   data=cairo_image_surface_get_data(cs);
2121   if(data==NULL){
2122     cairo_surface_destroy(cs);
2123     return;
2124   }
2125   cstride=cairo_image_surface_get_stride(cs);
2126   y_row=_ycbcr[0].data;
2127   u_row=_ycbcr[1].data;
2128   v_row=_ycbcr[2].data;
2129   rgb_row=data;
2130   for(y=0;y<h;y++){
2131     for(x=0;x<w;x++){
2132       int r;
2133       int g;
2134       int b;
2135       r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2136       g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2137        -2672387*v_row[x>>hdec]+447306710)/3287200;
2138       b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2139       rgb_row[4*x+0]=OC_CLAMP255(b);
2140       rgb_row[4*x+1]=OC_CLAMP255(g);
2141       rgb_row[4*x+2]=OC_CLAMP255(r);
2142     }
2143     y_row+=_ycbcr[0].stride;
2144     u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2145     v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2146     rgb_row+=cstride;
2147   }
2148   /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2149   {
2150     cairo_t           *c;
2151     const oc_fragment *frags;
2152     oc_mv             *frag_mvs;
2153     const signed char *mb_modes;
2154     oc_mb_map         *mb_maps;
2155     size_t             nmbs;
2156     size_t             mbi;
2157     int                row2;
2158     int                col2;
2159     int                qim[3]={0,0,0};
2160     if(_dec->state.nqis==2){
2161       int bqi;
2162       bqi=_dec->state.qis[0];
2163       if(_dec->state.qis[1]>bqi)qim[1]=1;
2164       if(_dec->state.qis[1]<bqi)qim[1]=-1;
2165     }
2166     if(_dec->state.nqis==3){
2167       int bqi;
2168       int cqi;
2169       int dqi;
2170       bqi=_dec->state.qis[0];
2171       cqi=_dec->state.qis[1];
2172       dqi=_dec->state.qis[2];
2173       if(cqi>bqi&&dqi>bqi){
2174         if(dqi>cqi){
2175           qim[1]=1;
2176           qim[2]=2;
2177         }
2178         else{
2179           qim[1]=2;
2180           qim[2]=1;
2181         }
2182       }
2183       else if(cqi<bqi&&dqi<bqi){
2184         if(dqi<cqi){
2185           qim[1]=-1;
2186           qim[2]=-2;
2187         }
2188         else{
2189           qim[1]=-2;
2190           qim[2]=-1;
2191         }
2192       }
2193       else{
2194         if(cqi<bqi)qim[1]=-1;
2195         else qim[1]=1;
2196         if(dqi<bqi)qim[2]=-1;
2197         else qim[2]=1;
2198       }
2199     }
2200     c=cairo_create(cs);
2201     frags=_dec->state.frags;
2202     frag_mvs=_dec->state.frag_mvs;
2203     mb_modes=_dec->state.mb_modes;
2204     mb_maps=_dec->state.mb_maps;
2205     nmbs=_dec->state.nmbs;
2206     row2=0;
2207     col2=0;
2208     for(mbi=0;mbi<nmbs;mbi++){
2209       float x;
2210       float y;
2211       int   bi;
2212       y=h-(row2+((col2+1>>1)&1))*16-16;
2213       x=(col2>>1)*16;
2214       cairo_set_line_width(c,1.);
2215       /*Keyframe (all intra) red box.*/
2216       if(_dec->state.frame_type==OC_INTRA_FRAME){
2217         if(_dec->telemetry_mbmode&0x02){
2218           cairo_set_source_rgba(c,1.,0,0,.5);
2219           cairo_rectangle(c,x+2.5,y+2.5,11,11);
2220           cairo_stroke_preserve(c);
2221           cairo_set_source_rgba(c,1.,0,0,.25);
2222           cairo_fill(c);
2223         }
2224       }
2225       else{
2226         ptrdiff_t fragi;
2227         int       frag_mvx;
2228         int       frag_mvy;
2229         for(bi=0;bi<4;bi++){
2230           fragi=mb_maps[mbi][0][bi];
2231           if(fragi>=0&&frags[fragi].coded){
2232             frag_mvx=OC_MV_X(frag_mvs[fragi]);
2233             frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2234             break;
2235           }
2236         }
2237         if(bi<4){
2238           switch(mb_modes[mbi]){
2239             case OC_MODE_INTRA:{
2240               if(_dec->telemetry_mbmode&0x02){
2241                 cairo_set_source_rgba(c,1.,0,0,.5);
2242                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2243                 cairo_stroke_preserve(c);
2244                 cairo_set_source_rgba(c,1.,0,0,.25);
2245                 cairo_fill(c);
2246               }
2247             }break;
2248             case OC_MODE_INTER_NOMV:{
2249               if(_dec->telemetry_mbmode&0x01){
2250                 cairo_set_source_rgba(c,0,0,1.,.5);
2251                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2252                 cairo_stroke_preserve(c);
2253                 cairo_set_source_rgba(c,0,0,1.,.25);
2254                 cairo_fill(c);
2255               }
2256             }break;
2257             case OC_MODE_INTER_MV:{
2258               if(_dec->telemetry_mbmode&0x04){
2259                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2260                 cairo_set_source_rgba(c,0,1.,0,.5);
2261                 cairo_stroke(c);
2262               }
2263               if(_dec->telemetry_mv&0x04){
2264                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2265                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2266                 cairo_set_line_width(c,3.);
2267                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2268                 cairo_stroke_preserve(c);
2269                 cairo_set_line_width(c,2.);
2270                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2271                 cairo_stroke_preserve(c);
2272                 cairo_set_line_width(c,1.);
2273                 cairo_line_to(c,x+8,y+8);
2274                 cairo_stroke(c);
2275               }
2276             }break;
2277             case OC_MODE_INTER_MV_LAST:{
2278               if(_dec->telemetry_mbmode&0x08){
2279                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2280                 cairo_set_source_rgba(c,0,1.,0,.5);
2281                 cairo_move_to(c,x+13.5,y+2.5);
2282                 cairo_line_to(c,x+2.5,y+8);
2283                 cairo_line_to(c,x+13.5,y+13.5);
2284                 cairo_stroke(c);
2285               }
2286               if(_dec->telemetry_mv&0x08){
2287                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2288                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2289                 cairo_set_line_width(c,3.);
2290                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2291                 cairo_stroke_preserve(c);
2292                 cairo_set_line_width(c,2.);
2293                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2294                 cairo_stroke_preserve(c);
2295                 cairo_set_line_width(c,1.);
2296                 cairo_line_to(c,x+8,y+8);
2297                 cairo_stroke(c);
2298               }
2299             }break;
2300             case OC_MODE_INTER_MV_LAST2:{
2301               if(_dec->telemetry_mbmode&0x10){
2302                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2303                 cairo_set_source_rgba(c,0,1.,0,.5);
2304                 cairo_move_to(c,x+8,y+2.5);
2305                 cairo_line_to(c,x+2.5,y+8);
2306                 cairo_line_to(c,x+8,y+13.5);
2307                 cairo_move_to(c,x+13.5,y+2.5);
2308                 cairo_line_to(c,x+8,y+8);
2309                 cairo_line_to(c,x+13.5,y+13.5);
2310                 cairo_stroke(c);
2311               }
2312               if(_dec->telemetry_mv&0x10){
2313                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2314                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2315                 cairo_set_line_width(c,3.);
2316                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2317                 cairo_stroke_preserve(c);
2318                 cairo_set_line_width(c,2.);
2319                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2320                 cairo_stroke_preserve(c);
2321                 cairo_set_line_width(c,1.);
2322                 cairo_line_to(c,x+8,y+8);
2323                 cairo_stroke(c);
2324               }
2325             }break;
2326             case OC_MODE_GOLDEN_NOMV:{
2327               if(_dec->telemetry_mbmode&0x20){
2328                 cairo_set_source_rgba(c,1.,1.,0,.5);
2329                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2330                 cairo_stroke_preserve(c);
2331                 cairo_set_source_rgba(c,1.,1.,0,.25);
2332                 cairo_fill(c);
2333               }
2334             }break;
2335             case OC_MODE_GOLDEN_MV:{
2336               if(_dec->telemetry_mbmode&0x40){
2337                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2338                 cairo_set_source_rgba(c,1.,1.,0,.5);
2339                 cairo_stroke(c);
2340               }
2341               if(_dec->telemetry_mv&0x40){
2342                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2343                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2344                 cairo_set_line_width(c,3.);
2345                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2346                 cairo_stroke_preserve(c);
2347                 cairo_set_line_width(c,2.);
2348                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2349                 cairo_stroke_preserve(c);
2350                 cairo_set_line_width(c,1.);
2351                 cairo_line_to(c,x+8,y+8);
2352                 cairo_stroke(c);
2353               }
2354             }break;
2355             case OC_MODE_INTER_MV_FOUR:{
2356               if(_dec->telemetry_mbmode&0x80){
2357                 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2358                 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2359                 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2360                 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2361                 cairo_set_source_rgba(c,0,1.,0,.5);
2362                 cairo_stroke(c);
2363               }
2364               /*4mv is odd, coded in raster order.*/
2365               fragi=mb_maps[mbi][0][0];
2366               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2367                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2368                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2369                 cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2370                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2371                 cairo_set_line_width(c,3.);
2372                 cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2373                 cairo_stroke_preserve(c);
2374                 cairo_set_line_width(c,2.);
2375                 cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2376                 cairo_stroke_preserve(c);
2377                 cairo_set_line_width(c,1.);
2378                 cairo_line_to(c,x+4,y+12);
2379                 cairo_stroke(c);
2380               }
2381               fragi=mb_maps[mbi][0][1];
2382               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2383                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2384                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2385                 cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2386                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2387                 cairo_set_line_width(c,3.);
2388                 cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2389                 cairo_stroke_preserve(c);
2390                 cairo_set_line_width(c,2.);
2391                 cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2392                 cairo_stroke_preserve(c);
2393                 cairo_set_line_width(c,1.);
2394                 cairo_line_to(c,x+12,y+12);
2395                 cairo_stroke(c);
2396               }
2397               fragi=mb_maps[mbi][0][2];
2398               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2399                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2400                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2401                 cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2402                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2403                 cairo_set_line_width(c,3.);
2404                 cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2405                 cairo_stroke_preserve(c);
2406                 cairo_set_line_width(c,2.);
2407                 cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2408                 cairo_stroke_preserve(c);
2409                 cairo_set_line_width(c,1.);
2410                 cairo_line_to(c,x+4,y+4);
2411                 cairo_stroke(c);
2412               }
2413               fragi=mb_maps[mbi][0][3];
2414               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2415                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2416                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2417                 cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2418                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2419                 cairo_set_line_width(c,3.);
2420                 cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2421                 cairo_stroke_preserve(c);
2422                 cairo_set_line_width(c,2.);
2423                 cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2424                 cairo_stroke_preserve(c);
2425                 cairo_set_line_width(c,1.);
2426                 cairo_line_to(c,x+12,y+4);
2427                 cairo_stroke(c);
2428               }
2429             }break;
2430           }
2431         }
2432       }
2433       /*qii illustration.*/
2434       if(_dec->telemetry_qi&0x2){
2435         cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2436         for(bi=0;bi<4;bi++){
2437           ptrdiff_t fragi;
2438           int       qiv;
2439           int       xp;
2440           int       yp;
2441           xp=x+(bi&1)*8;
2442           yp=y+8-(bi&2)*4;
2443           fragi=mb_maps[mbi][0][bi];
2444           if(fragi>=0&&frags[fragi].coded){
2445             qiv=qim[frags[fragi].qii];
2446             cairo_set_line_width(c,3.);
2447             cairo_set_source_rgba(c,0.,0.,0.,.5);
2448             switch(qiv){
2449               /*Double plus:*/
2450               case 2:{
2451                 if((bi&1)^((bi&2)>>1)){
2452                   cairo_move_to(c,xp+2.5,yp+1.5);
2453                   cairo_line_to(c,xp+2.5,yp+3.5);
2454                   cairo_move_to(c,xp+1.5,yp+2.5);
2455                   cairo_line_to(c,xp+3.5,yp+2.5);
2456                   cairo_move_to(c,xp+5.5,yp+4.5);
2457                   cairo_line_to(c,xp+5.5,yp+6.5);
2458                   cairo_move_to(c,xp+4.5,yp+5.5);
2459                   cairo_line_to(c,xp+6.5,yp+5.5);
2460                   cairo_stroke_preserve(c);
2461                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2462                 }
2463                 else{
2464                   cairo_move_to(c,xp+5.5,yp+1.5);
2465                   cairo_line_to(c,xp+5.5,yp+3.5);
2466                   cairo_move_to(c,xp+4.5,yp+2.5);
2467                   cairo_line_to(c,xp+6.5,yp+2.5);
2468                   cairo_move_to(c,xp+2.5,yp+4.5);
2469                   cairo_line_to(c,xp+2.5,yp+6.5);
2470                   cairo_move_to(c,xp+1.5,yp+5.5);
2471                   cairo_line_to(c,xp+3.5,yp+5.5);
2472                   cairo_stroke_preserve(c);
2473                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2474                 }
2475               }break;
2476               /*Double minus:*/
2477               case -2:{
2478                 cairo_move_to(c,xp+2.5,yp+2.5);
2479                 cairo_line_to(c,xp+5.5,yp+2.5);
2480                 cairo_move_to(c,xp+2.5,yp+5.5);
2481                 cairo_line_to(c,xp+5.5,yp+5.5);
2482                 cairo_stroke_preserve(c);
2483                 cairo_set_source_rgba(c,1.,1.,1.,1.);
2484               }break;
2485               /*Plus:*/
2486               case 1:{
2487                 if((bi&2)==0)yp-=2;
2488                 if((bi&1)==0)xp-=2;
2489                 cairo_move_to(c,xp+4.5,yp+2.5);
2490                 cairo_line_to(c,xp+4.5,yp+6.5);
2491                 cairo_move_to(c,xp+2.5,yp+4.5);
2492                 cairo_line_to(c,xp+6.5,yp+4.5);
2493                 cairo_stroke_preserve(c);
2494                 cairo_set_source_rgba(c,.1,1.,.3,1.);
2495                 break;
2496               }
2497               /*Fall through.*/
2498               /*Minus:*/
2499               case -1:{
2500                 cairo_move_to(c,xp+2.5,yp+4.5);
2501                 cairo_line_to(c,xp+6.5,yp+4.5);
2502                 cairo_stroke_preserve(c);
2503                 cairo_set_source_rgba(c,1.,.3,.1,1.);
2504               }break;
2505               default:continue;
2506             }
2507             cairo_set_line_width(c,1.);
2508             cairo_stroke(c);
2509           }
2510         }
2511       }
2512       col2++;
2513       if((col2>>1)>=_dec->state.nhmbs){
2514         col2=0;
2515         row2+=2;
2516       }
2517     }
2518     /*Bit usage indicator[s]:*/
2519     if(_dec->telemetry_bits){
2520       int widths[6];
2521       int fpsn;
2522       int fpsd;
2523       int mult;
2524       int fullw;
2525       int padw;
2526       int i;
2527       fpsn=_dec->state.info.fps_numerator;
2528       fpsd=_dec->state.info.fps_denominator;
2529       mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2530       fullw=250.f*h*fpsd*mult/fpsn;
2531       padw=w-24;
2532       /*Header and coded block bits.*/
2533       if(_dec->telemetry_frame_bytes<0||
2534        _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2535         _dec->telemetry_frame_bytes=0;
2536       }
2537       if(_dec->telemetry_coding_bytes<0||
2538        _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2539         _dec->telemetry_coding_bytes=0;
2540       }
2541       if(_dec->telemetry_mode_bytes<0||
2542        _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2543         _dec->telemetry_mode_bytes=0;
2544       }
2545       if(_dec->telemetry_mv_bytes<0||
2546        _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2547         _dec->telemetry_mv_bytes=0;
2548       }
2549       if(_dec->telemetry_qi_bytes<0||
2550        _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2551         _dec->telemetry_qi_bytes=0;
2552       }
2553       if(_dec->telemetry_dc_bytes<0||
2554        _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2555         _dec->telemetry_dc_bytes=0;
2556       }
2557       widths[0]=padw*
2558        (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2559       widths[1]=padw*
2560        (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2561       widths[2]=padw*
2562        (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2563       widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2564       widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2565       widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2566       for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2567       cairo_set_source_rgba(c,.0,.0,.0,.6);
2568       cairo_rectangle(c,10,h-33,widths[0]+1,5);
2569       cairo_rectangle(c,10,h-29,widths[1]+1,5);
2570       cairo_rectangle(c,10,h-25,widths[2]+1,5);
2571       cairo_rectangle(c,10,h-21,widths[3]+1,5);
2572       cairo_rectangle(c,10,h-17,widths[4]+1,5);
2573       cairo_rectangle(c,10,h-13,widths[5]+1,5);
2574       cairo_fill(c);
2575       cairo_set_source_rgb(c,1,0,0);
2576       cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2577       cairo_fill(c);
2578       cairo_set_source_rgb(c,0,1,0);
2579       cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2580       cairo_fill(c);
2581       cairo_set_source_rgb(c,0,0,1);
2582       cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2583       cairo_fill(c);
2584       cairo_set_source_rgb(c,.6,.4,.0);
2585       cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2586       cairo_fill(c);
2587       cairo_set_source_rgb(c,.3,.3,.3);
2588       cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2589       cairo_fill(c);
2590       cairo_set_source_rgb(c,.5,.5,.8);
2591       cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2592       cairo_fill(c);
2593     }
2594     /*Master qi indicator[s]:*/
2595     if(_dec->telemetry_qi&0x1){
2596       cairo_text_extents_t extents;
2597       char                 buffer[10];
2598       int                  p;
2599       int                  y;
2600       p=0;
2601       y=h-7.5;
2602       if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2603       buffer[p++]=48+_dec->state.qis[0]%10;
2604       if(_dec->state.nqis>=2){
2605         buffer[p++]=' ';
2606         if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2607         buffer[p++]=48+_dec->state.qis[1]%10;
2608       }
2609       if(_dec->state.nqis==3){
2610         buffer[p++]=' ';
2611         if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2612         buffer[p++]=48+_dec->state.qis[2]%10;
2613       }
2614       buffer[p++]='\0';
2615       cairo_select_font_face(c,"sans",
2616        CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2617       cairo_set_font_size(c,18);
2618       cairo_text_extents(c,buffer,&extents);
2619       cairo_set_source_rgb(c,1,1,1);
2620       cairo_move_to(c,w-extents.x_advance-10,y);
2621       cairo_show_text(c,buffer);
2622       cairo_set_source_rgb(c,0,0,0);
2623       cairo_move_to(c,w-extents.x_advance-10,y);
2624       cairo_text_path(c,buffer);
2625       cairo_set_line_width(c,.8);
2626       cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2627       cairo_stroke(c);
2628     }
2629     cairo_destroy(c);
2630   }
2631   /*Out of the Cairo plane into the telemetry YUV buffer.*/
2632   _ycbcr[0].data=_dec->telemetry_frame_data;
2633   _ycbcr[0].stride=_ycbcr[0].width;
2634   _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2635   _ycbcr[1].stride=_ycbcr[1].width;
2636   _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2637   _ycbcr[2].stride=_ycbcr[2].width;
2638   y_row=_ycbcr[0].data;
2639   u_row=_ycbcr[1].data;
2640   v_row=_ycbcr[2].data;
2641   rgb_row=data;
2642   /*This is one of the few places it's worth handling chroma on a
2643      case-by-case basis.*/
2644   switch(_dec->state.info.pixel_fmt){
2645     case TH_PF_420:{
2646       for(y=0;y<h;y+=2){
2647         unsigned char *y_row2;
2648         unsigned char *rgb_row2;
2649         y_row2=y_row+_ycbcr[0].stride;
2650         rgb_row2=rgb_row+cstride;
2651         for(x=0;x<w;x+=2){
2652           int y;
2653           int u;
2654           int v;
2655           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2656            +24966*rgb_row[4*x+0]+4207500)/255000;
2657           y_row[x]=OC_CLAMP255(y);
2658           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2659            +24966*rgb_row[4*x+4]+4207500)/255000;
2660           y_row[x+1]=OC_CLAMP255(y);
2661           y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2662            +24966*rgb_row2[4*x+0]+4207500)/255000;
2663           y_row2[x]=OC_CLAMP255(y);
2664           y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2665            +24966*rgb_row2[4*x+4]+4207500)/255000;
2666           y_row2[x+1]=OC_CLAMP255(y);
2667           u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2668            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2669            -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2670            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2671            +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2672            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2673           v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2674            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2675            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2676             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2677            -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2678             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2679           u_row[x>>1]=OC_CLAMP255(u);
2680           v_row[x>>1]=OC_CLAMP255(v);
2681         }
2682         y_row+=_ycbcr[0].stride<<1;
2683         u_row+=_ycbcr[1].stride;
2684         v_row+=_ycbcr[2].stride;
2685         rgb_row+=cstride<<1;
2686       }
2687     }break;
2688     case TH_PF_422:{
2689       for(y=0;y<h;y++){
2690         for(x=0;x<w;x+=2){
2691           int y;
2692           int u;
2693           int v;
2694           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2695            +24966*rgb_row[4*x+0]+4207500)/255000;
2696           y_row[x]=OC_CLAMP255(y);
2697           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2698            +24966*rgb_row[4*x+4]+4207500)/255000;
2699           y_row[x+1]=OC_CLAMP255(y);
2700           u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2701            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2702            +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2703           v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2704            -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2705            -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2706           u_row[x>>1]=OC_CLAMP255(u);
2707           v_row[x>>1]=OC_CLAMP255(v);
2708         }
2709         y_row+=_ycbcr[0].stride;
2710         u_row+=_ycbcr[1].stride;
2711         v_row+=_ycbcr[2].stride;
2712         rgb_row+=cstride;
2713       }
2714     }break;
2715     /*case TH_PF_444:*/
2716     default:{
2717       for(y=0;y<h;y++){
2718         for(x=0;x<w;x++){
2719           int y;
2720           int u;
2721           int v;
2722           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2723            +24966*rgb_row[4*x+0]+4207500)/255000;
2724           u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2725            +99232*rgb_row[4*x+0]+29032005)/225930;
2726           v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2727            -25536*rgb_row[4*x+0]+45940035)/357510;
2728           y_row[x]=OC_CLAMP255(y);
2729           u_row[x]=OC_CLAMP255(u);
2730           v_row[x]=OC_CLAMP255(v);
2731         }
2732         y_row+=_ycbcr[0].stride;
2733         u_row+=_ycbcr[1].stride;
2734         v_row+=_ycbcr[2].stride;
2735         rgb_row+=cstride;
2736       }
2737     }break;
2738   }
2739   /*Finished.
2740     Destroy the surface.*/
2741   cairo_surface_destroy(cs);
2742 }
2743 #endif
2744 
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2745 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2746  ogg_int64_t *_granpos){
2747   int ret;
2748   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2749   /*A completely empty packet indicates a dropped frame and is treated exactly
2750      like an inter frame with no coded blocks.*/
2751   if(_op->bytes==0){
2752     _dec->state.frame_type=OC_INTER_FRAME;
2753     _dec->state.ntotal_coded_fragis=0;
2754   }
2755   else{
2756     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2757     ret=oc_dec_frame_header_unpack(_dec);
2758     if(ret<0)return ret;
2759     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2760     else oc_dec_coded_flags_unpack(_dec);
2761   }
2762   /*If there have been no reference frames, and we need one, initialize one.*/
2763   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2764    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2765    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2766     oc_dec_init_dummy_frame(_dec);
2767   }
2768   /*If this was an inter frame with no coded blocks...*/
2769   if(_dec->state.ntotal_coded_fragis<=0){
2770     /*Just update the granule position and return.*/
2771     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2772      _dec->state.info.keyframe_granule_shift)
2773      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2774     _dec->state.curframe_num++;
2775     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2776     return TH_DUPFRAME;
2777   }
2778   else{
2779     th_ycbcr_buffer stripe_buf;
2780     int             stripe_fragy;
2781     int             refi;
2782     int             pli;
2783     int             notstart;
2784     int             notdone;
2785 #ifdef HAVE_CAIRO
2786     int             telemetry;
2787     /*Save the current telemetry state.
2788       This prevents it from being modified in the middle of decoding this
2789        frame, which could cause us to skip calls to the striped decoding
2790        callback.*/
2791     telemetry=_dec->telemetry;
2792 #endif
2793     /*Select a free buffer to use for the reconstructed version of this frame.*/
2794     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2795      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2796     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2797     _dec->state.ref_frame_data[OC_FRAME_SELF]=
2798      _dec->state.ref_frame_bufs[refi][0].data;
2799 #if defined(HAVE_CAIRO)
2800     _dec->telemetry_frame_bytes=_op->bytes;
2801 #endif
2802     if(_dec->state.frame_type==OC_INTRA_FRAME){
2803       _dec->state.keyframe_num=_dec->state.curframe_num;
2804 #if defined(HAVE_CAIRO)
2805       _dec->telemetry_coding_bytes=
2806        _dec->telemetry_mode_bytes=
2807        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2808 #endif
2809     }
2810     else{
2811 #if defined(HAVE_CAIRO)
2812       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2813 #endif
2814       oc_dec_mb_modes_unpack(_dec);
2815 #if defined(HAVE_CAIRO)
2816       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2817 #endif
2818       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2819 #if defined(HAVE_CAIRO)
2820       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2821 #endif
2822     }
2823     oc_dec_block_qis_unpack(_dec);
2824 #if defined(HAVE_CAIRO)
2825     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2826 #endif
2827     oc_dec_residual_tokens_unpack(_dec);
2828     /*Update granule position.
2829       This must be done before the striped decode callbacks so that the
2830        application knows what to do with the frame data.*/
2831     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2832      _dec->state.info.keyframe_granule_shift)
2833      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2834     _dec->state.curframe_num++;
2835     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2836     /*All of the rest of the operations -- DC prediction reversal,
2837        reconstructing coded fragments, copying uncoded fragments, loop
2838        filtering, extending borders, and out-of-loop post-processing -- should
2839        be pipelined.
2840       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2841        copying are done for one or two super block rows, then loop filtering is
2842        run as far as it can, then bordering copying, then post-processing.
2843       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2844        block rows, and one chroma.
2845       Otherwise, an MCU consists of one super block row from each plane.
2846       Inside each MCU, we perform all of the steps on one color plane before
2847        moving on to the next.
2848       After reconstruction, the additional filtering stages introduce a delay
2849        since they need some pixels from the next fragment row.
2850       Thus the actual number of decoded rows available is slightly smaller for
2851        the first MCU, and slightly larger for the last.
2852 
2853       This entire process allows us to operate on the data while it is still in
2854        cache, resulting in big performance improvements.
2855       An application callback allows further application processing (blitting
2856        to video memory, color conversion, etc.) to also use the data while it's
2857        in cache.*/
2858     oc_dec_pipeline_init(_dec,&_dec->pipe);
2859     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2860     notstart=0;
2861     notdone=1;
2862     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2863       int avail_fragy0;
2864       int avail_fragy_end;
2865       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2866       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2867       for(pli=0;pli<3;pli++){
2868         oc_fragment_plane *fplane;
2869         int                frag_shift;
2870         int                pp_offset;
2871         int                sdelay;
2872         int                edelay;
2873         fplane=_dec->state.fplanes+pli;
2874         /*Compute the first and last fragment row of the current MCU for this
2875            plane.*/
2876         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2877         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2878         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2879          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2880         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2881         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2882         sdelay=edelay=0;
2883         if(_dec->pipe.loop_filter){
2884           sdelay+=notstart;
2885           edelay+=notdone;
2886           oc_state_loop_filter_frag_rows(&_dec->state,
2887            _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2888            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2889         }
2890         /*To fill the borders, we have an additional two pixel delay, since a
2891            fragment in the next row could filter its top edge, using two pixels
2892            from a fragment in this row.
2893           But there's no reason to delay a full fragment between the two.*/
2894         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2895          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2896          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2897         /*Out-of-loop post-processing.*/
2898         pp_offset=3*(pli!=0);
2899         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2900           /*Perform de-blocking in one plane.*/
2901           sdelay+=notstart;
2902           edelay+=notdone;
2903           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2904            _dec->state.ref_frame_bufs[refi],pli,
2905            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2906           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2907             /*Perform de-ringing in one plane.*/
2908             sdelay+=notstart;
2909             edelay+=notdone;
2910             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2911              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2912           }
2913         }
2914         /*If no post-processing is done, we still need to delay a row for the
2915            loop filter, thanks to the strange filtering order VP3 chose.*/
2916         else if(_dec->pipe.loop_filter){
2917           sdelay+=notstart;
2918           edelay+=notdone;
2919         }
2920         /*Compute the intersection of the available rows in all planes.
2921           If chroma is sub-sampled, the effect of each of its delays is
2922            doubled, but luma might have more post-processing filters enabled
2923            than chroma, so we don't know up front which one is the limiting
2924            factor.*/
2925         avail_fragy0=OC_MINI(avail_fragy0,
2926          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2927         avail_fragy_end=OC_MINI(avail_fragy_end,
2928          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2929       }
2930 #ifdef HAVE_CAIRO
2931       if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2932 #else
2933       if(_dec->stripe_cb.stripe_decoded!=NULL){
2934 #endif
2935         /*The callback might want to use the FPU, so let's make sure they can.
2936           We violate all kinds of ABI restrictions by not doing this until
2937            now, but none of them actually matter since we don't use floating
2938            point ourselves.*/
2939         oc_restore_fpu(&_dec->state);
2940         /*Make the callback, ensuring we flip the sense of the "start" and
2941            "end" of the available region upside down.*/
2942         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2943          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2944          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2945       }
2946       notstart=1;
2947     }
2948     /*Finish filling in the reference frame borders.*/
2949     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2950     /*Update the reference frame indices.*/
2951     if(_dec->state.frame_type==OC_INTRA_FRAME){
2952       /*The new frame becomes both the previous and gold reference frames.*/
2953       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2954        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2955        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2956       _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2957        _dec->state.ref_frame_data[OC_FRAME_PREV]=
2958        _dec->state.ref_frame_data[OC_FRAME_SELF];
2959     }
2960     else{
2961       /*Otherwise, just replace the previous reference frame.*/
2962       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2963        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2964       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2965        _dec->state.ref_frame_data[OC_FRAME_SELF];
2966     }
2967     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2968        gamma values, if nothing else).*/
2969     oc_restore_fpu(&_dec->state);
2970 #ifdef HAVE_CAIRO
2971     /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2972     if(telemetry){
2973       oc_render_telemetry(_dec,stripe_buf,telemetry);
2974       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2975       /*If we had a striped decoding callback, we skipped calling it above
2976          (because the telemetry wasn't rendered yet).
2977         Call it now with the whole frame.*/
2978       if(_dec->stripe_cb.stripe_decoded!=NULL){
2979         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2980          stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2981       }
2982     }
2983 #endif
2984 #if defined(OC_DUMP_IMAGES)
2985     /*We only dump images if there were some coded blocks.*/
2986     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2987 #endif
2988     return 0;
2989   }
2990 }
2991 
2992 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2993   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2994   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2995   return 0;
2996 }
2997