1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id$
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29 
30 
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49 
50 
51 
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94 
95 
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112 
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118 
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123 
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126 
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131 
132 /*The location of the (6) run length bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144 
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151 
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155 
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167 
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296 
297 
298 
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   /*Coding scheme:
301        Codeword            Run Length
302      0                       1
303      10x                     2-3
304      110x                    4-5
305      1110xx                  6-9
306      11110xxx                10-17
307      111110xxxx              18-33
308      111111xxxxxxxxxxxx      34-4129*/
309   static const ogg_int16_t OC_SB_RUN_TREE[22]={
310     4,
311      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
312      -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
313      -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
314      -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
315       2,
316        -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
317   };
318   int ret;
319   ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
320   if(ret>=0x10){
321     int offs;
322     offs=ret&0x1F;
323     ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
324   }
325   return ret;
326 }
327 
oc_block_run_unpack(oc_pack_buf * _opb)328 static int oc_block_run_unpack(oc_pack_buf *_opb){
329   /*Coding scheme:
330      Codeword             Run Length
331      0x                      1-2
332      10x                     3-4
333      110x                    5-6
334      1110xx                  7-10
335      11110xx                 11-14
336      11111xxxx               15-30*/
337   static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
338     5,
339      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
340      -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
341      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
342      -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
343      -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
344      -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
345      -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
346      33,       36,       39,       44,
347       1,-(1<<8|7),-(1<<8|8),
348       1,-(1<<8|9),-(1<<8|10),
349       2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
350       4,
351        -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
352        -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
353        -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
354        -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
355   };
356   return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
357 }
358 
359 
360 
oc_dec_accel_init_c(oc_dec_ctx * _dec)361 void oc_dec_accel_init_c(oc_dec_ctx *_dec){
362 # if defined(OC_DEC_USE_VTABLE)
363   _dec->opt_vtable.dc_unpredict_mcu_plane=
364    oc_dec_dc_unpredict_mcu_plane_c;
365 # endif
366 }
367 
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)368 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
369  const th_setup_info *_setup){
370   int qti;
371   int pli;
372   int qi;
373   int ret;
374   ret=oc_state_init(&_dec->state,_info,3);
375   if(ret<0)return ret;
376   ret=oc_huff_trees_copy(_dec->huff_tables,
377    (const ogg_int16_t *const *)_setup->huff_tables);
378   if(ret<0){
379     oc_state_clear(&_dec->state);
380     return ret;
381   }
382   /*For each fragment, allocate one byte for every DCT coefficient token, plus
383      one byte for extra-bits for each token, plus one more byte for the long
384      EOB run, just in case it's the very last token and has a run length of
385      one.*/
386   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
387    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
388   if(_dec->dct_tokens==NULL){
389     oc_huff_trees_clear(_dec->huff_tables);
390     oc_state_clear(&_dec->state);
391     return TH_EFAULT;
392   }
393   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
394     _dec->state.dequant_tables[qi][pli][qti]=
395      _dec->state.dequant_table_data[qi][pli][qti];
396   }
397   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
398    &_setup->qinfo);
399   for(qi=0;qi<64;qi++){
400     int qsum;
401     qsum=0;
402     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
403       qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
404        _dec->state.dequant_tables[qi][pli][qti][17]+
405        _dec->state.dequant_tables[qi][pli][qti][18]+
406        _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
407     }
408     _dec->pp_sharp_mod[qi]=-(qsum>>11);
409   }
410   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
411    sizeof(_dec->state.loop_filter_limits));
412   oc_dec_accel_init(_dec);
413   _dec->pp_level=OC_PP_LEVEL_DISABLED;
414   _dec->dc_qis=NULL;
415   _dec->variances=NULL;
416   _dec->pp_frame_data=NULL;
417   _dec->stripe_cb.ctx=NULL;
418   _dec->stripe_cb.stripe_decoded=NULL;
419 #if defined(HAVE_CAIRO)
420   _dec->telemetry_bits=0;
421   _dec->telemetry_qi=0;
422   _dec->telemetry_mbmode=0;
423   _dec->telemetry_mv=0;
424   _dec->telemetry_frame_data=NULL;
425 #endif
426   return 0;
427 }
428 
oc_dec_clear(oc_dec_ctx * _dec)429 static void oc_dec_clear(oc_dec_ctx *_dec){
430 #if defined(HAVE_CAIRO)
431   _ogg_free(_dec->telemetry_frame_data);
432 #endif
433   _ogg_free(_dec->pp_frame_data);
434   _ogg_free(_dec->variances);
435   _ogg_free(_dec->dc_qis);
436   _ogg_free(_dec->dct_tokens);
437   oc_huff_trees_clear(_dec->huff_tables);
438   oc_state_clear(&_dec->state);
439 }
440 
441 
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)442 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
443   long val;
444   /*Check to make sure this is a data packet.*/
445   val=oc_pack_read1(&_dec->opb);
446   if(val!=0)return TH_EBADPACKET;
447   /*Read in the frame type (I or P).*/
448   val=oc_pack_read1(&_dec->opb);
449   _dec->state.frame_type=(int)val;
450   /*Read in the qi list.*/
451   val=oc_pack_read(&_dec->opb,6);
452   _dec->state.qis[0]=(unsigned char)val;
453   val=oc_pack_read1(&_dec->opb);
454   if(!val)_dec->state.nqis=1;
455   else{
456     val=oc_pack_read(&_dec->opb,6);
457     _dec->state.qis[1]=(unsigned char)val;
458     val=oc_pack_read1(&_dec->opb);
459     if(!val)_dec->state.nqis=2;
460     else{
461       val=oc_pack_read(&_dec->opb,6);
462       _dec->state.qis[2]=(unsigned char)val;
463       _dec->state.nqis=3;
464     }
465   }
466   if(_dec->state.frame_type==OC_INTRA_FRAME){
467     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
468       Most of the other unused bits in the VP3 headers were eliminated.
469       I don't know why these remain.*/
470     /*I wanted to eliminate wasted bits, but not all config wiggle room
471        --Monty.*/
472     val=oc_pack_read(&_dec->opb,3);
473     if(val!=0)return TH_EIMPL;
474   }
475   return 0;
476 }
477 
478 /*Mark all fragments as coded and in OC_MODE_INTRA.
479   This also builds up the coded fragment list (in coded order), and clears the
480    uncoded fragment list.
481   It does not update the coded macro block list nor the super block flags, as
482    those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)483 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
484   const oc_sb_map   *sb_maps;
485   const oc_sb_flags *sb_flags;
486   oc_fragment       *frags;
487   ptrdiff_t         *coded_fragis;
488   ptrdiff_t          ncoded_fragis;
489   ptrdiff_t          prev_ncoded_fragis;
490   unsigned           nsbs;
491   unsigned           sbi;
492   int                pli;
493   coded_fragis=_dec->state.coded_fragis;
494   prev_ncoded_fragis=ncoded_fragis=0;
495   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
496   sb_flags=_dec->state.sb_flags;
497   frags=_dec->state.frags;
498   sbi=nsbs=0;
499   for(pli=0;pli<3;pli++){
500     nsbs+=_dec->state.fplanes[pli].nsbs;
501     for(;sbi<nsbs;sbi++){
502       int quadi;
503       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
504         int bi;
505         for(bi=0;bi<4;bi++){
506           ptrdiff_t fragi;
507           fragi=sb_maps[sbi][quadi][bi];
508           if(fragi>=0){
509             frags[fragi].coded=1;
510             frags[fragi].refi=OC_FRAME_SELF;
511             frags[fragi].mb_mode=OC_MODE_INTRA;
512             coded_fragis[ncoded_fragis++]=fragi;
513           }
514         }
515       }
516     }
517     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
518     prev_ncoded_fragis=ncoded_fragis;
519   }
520   _dec->state.ntotal_coded_fragis=ncoded_fragis;
521 }
522 
523 /*Decodes the bit flags indicating whether each super block is partially coded
524    or not.
525   Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)526 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
527   oc_sb_flags *sb_flags;
528   unsigned     nsbs;
529   unsigned     sbi;
530   unsigned     npartial;
531   unsigned     run_count;
532   long         val;
533   int          flag;
534   val=oc_pack_read1(&_dec->opb);
535   flag=(int)val;
536   sb_flags=_dec->state.sb_flags;
537   nsbs=_dec->state.nsbs;
538   sbi=npartial=0;
539   while(sbi<nsbs){
540     int full_run;
541     run_count=oc_sb_run_unpack(&_dec->opb);
542     full_run=run_count>=4129;
543     do{
544       sb_flags[sbi].coded_partially=flag;
545       sb_flags[sbi].coded_fully=0;
546       npartial+=flag;
547       sbi++;
548     }
549     while(--run_count>0&&sbi<nsbs);
550     if(full_run&&sbi<nsbs){
551       val=oc_pack_read1(&_dec->opb);
552       flag=(int)val;
553     }
554     else flag=!flag;
555   }
556   /*TODO: run_count should be 0 here.
557     If it's not, we should issue a warning of some kind.*/
558   return npartial;
559 }
560 
561 /*Decodes the bit flags for whether or not each non-partially-coded super
562    block is fully coded or not.
563   This function should only be called if there is at least one
564    non-partially-coded super block.
565   Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)566 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
567   oc_sb_flags *sb_flags;
568   unsigned     nsbs;
569   unsigned     sbi;
570   unsigned     run_count;
571   long         val;
572   int          flag;
573   sb_flags=_dec->state.sb_flags;
574   nsbs=_dec->state.nsbs;
575   /*Skip partially coded super blocks.*/
576   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
577   val=oc_pack_read1(&_dec->opb);
578   flag=(int)val;
579   do{
580     int full_run;
581     run_count=oc_sb_run_unpack(&_dec->opb);
582     full_run=run_count>=4129;
583     for(;sbi<nsbs;sbi++){
584       if(sb_flags[sbi].coded_partially)continue;
585       if(run_count--<=0)break;
586       sb_flags[sbi].coded_fully=flag;
587     }
588     if(full_run&&sbi<nsbs){
589       val=oc_pack_read1(&_dec->opb);
590       flag=(int)val;
591     }
592     else flag=!flag;
593   }
594   while(sbi<nsbs);
595   /*TODO: run_count should be 0 here.
596     If it's not, we should issue a warning of some kind.*/
597 }
598 
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)599 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
600   const oc_sb_map   *sb_maps;
601   const oc_sb_flags *sb_flags;
602   signed char       *mb_modes;
603   oc_fragment       *frags;
604   unsigned           nsbs;
605   unsigned           sbi;
606   unsigned           npartial;
607   long               val;
608   int                pli;
609   int                flag;
610   int                run_count;
611   ptrdiff_t         *coded_fragis;
612   ptrdiff_t         *uncoded_fragis;
613   ptrdiff_t          ncoded_fragis;
614   ptrdiff_t          nuncoded_fragis;
615   ptrdiff_t          prev_ncoded_fragis;
616   npartial=oc_dec_partial_sb_flags_unpack(_dec);
617   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
618   if(npartial>0){
619     val=oc_pack_read1(&_dec->opb);
620     flag=!(int)val;
621   }
622   else flag=0;
623   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
624   sb_flags=_dec->state.sb_flags;
625   mb_modes=_dec->state.mb_modes;
626   frags=_dec->state.frags;
627   sbi=nsbs=run_count=0;
628   coded_fragis=_dec->state.coded_fragis;
629   uncoded_fragis=coded_fragis+_dec->state.nfrags;
630   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
631   for(pli=0;pli<3;pli++){
632     nsbs+=_dec->state.fplanes[pli].nsbs;
633     for(;sbi<nsbs;sbi++){
634       int quadi;
635       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
636         int quad_coded;
637         int bi;
638         quad_coded=0;
639         for(bi=0;bi<4;bi++){
640           ptrdiff_t fragi;
641           fragi=sb_maps[sbi][quadi][bi];
642           if(fragi>=0){
643             int coded;
644             if(sb_flags[sbi].coded_fully)coded=1;
645             else if(!sb_flags[sbi].coded_partially)coded=0;
646             else{
647               if(run_count<=0){
648                 run_count=oc_block_run_unpack(&_dec->opb);
649                 flag=!flag;
650               }
651               run_count--;
652               coded=flag;
653             }
654             if(coded)coded_fragis[ncoded_fragis++]=fragi;
655             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
656             quad_coded|=coded;
657             frags[fragi].coded=coded;
658             frags[fragi].refi=OC_FRAME_NONE;
659           }
660         }
661         /*Remember if there's a coded luma block in this macro block.*/
662         if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
663       }
664     }
665     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
666     prev_ncoded_fragis=ncoded_fragis;
667   }
668   _dec->state.ntotal_coded_fragis=ncoded_fragis;
669   /*TODO: run_count should be 0 here.
670     If it's not, we should issue a warning of some kind.*/
671 }
672 
673 
674 /*Coding scheme:
675    Codeword            Mode Index
676    0                       0
677    10                      1
678    110                     2
679    1110                    3
680    11110                   4
681    111110                  5
682    1111110                 6
683    1111111                 7*/
684 static const ogg_int16_t OC_VLC_MODE_TREE[26]={
685   4,
686    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
687    -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
688    -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
689    -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
690     3,
691      -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
692      -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
693 };
694 
695 static const ogg_int16_t OC_CLC_MODE_TREE[9]={
696   3,
697    -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
698    -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
699 };
700 
701 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)702 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
703   signed char         *mb_modes;
704   const unsigned char *alphabet;
705   unsigned char        scheme0_alphabet[8];
706   const ogg_int16_t   *mode_tree;
707   size_t               nmbs;
708   size_t               mbi;
709   long                 val;
710   int                  mode_scheme;
711   val=oc_pack_read(&_dec->opb,3);
712   mode_scheme=(int)val;
713   if(mode_scheme==0){
714     int mi;
715     /*Just in case, initialize the modes to something.
716       If the bitstream doesn't contain each index exactly once, it's likely
717        corrupt and the rest of the packet is garbage anyway, but this way we
718        won't crash, and we'll decode SOMETHING.*/
719     /*LOOP VECTORIZES*/
720     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
721     for(mi=0;mi<OC_NMODES;mi++){
722       val=oc_pack_read(&_dec->opb,3);
723       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
724     }
725     alphabet=scheme0_alphabet;
726   }
727   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
728   mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
729   mb_modes=_dec->state.mb_modes;
730   nmbs=_dec->state.nmbs;
731   for(mbi=0;mbi<nmbs;mbi++){
732     if(mb_modes[mbi]>0){
733       /*We have a coded luma block; decode a mode.*/
734       mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
735     }
736     /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
737        fact that OC_MODE_INTER_NOMV is already 0.*/
738   }
739 }
740 
741 
742 
743 static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
744   5,
745    -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
746    -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
747    -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
748    -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
749    -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
750    33,          36,          39,          42,
751    45,          50,          55,          60,
752    65,          74,          83,          92,
753     1,-(1<<8|32+4),-(1<<8|32-4),
754     1,-(1<<8|32+5),-(1<<8|32-5),
755     1,-(1<<8|32+6),-(1<<8|32-6),
756     1,-(1<<8|32+7),-(1<<8|32-7),
757     2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
758     2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
759     2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
760     2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
761     3,
762      -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
763      -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
764     3,
765      -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
766      -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
767     3,
768      -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
769      -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
770     3,
771      -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
772      -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
773 };
774 
775 static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
776   6,
777    -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
778    -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
779    -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
780    -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
781    -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
782    -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
783    -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
784    -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
785    -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
786    -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
787    -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
788    -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
789    -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
790    -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
791    -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
792    -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
793 };
794 
795 
oc_mv_unpack(oc_pack_buf * _opb,const ogg_int16_t * _tree)796 static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
797   int dx;
798   int dy;
799   dx=oc_huff_token_decode(_opb,_tree)-32;
800   dy=oc_huff_token_decode(_opb,_tree)-32;
801   return OC_MV(dx,dy);
802 }
803 
804 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
805    block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)806 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
807   const oc_mb_map        *mb_maps;
808   const signed char      *mb_modes;
809   oc_set_chroma_mvs_func  set_chroma_mvs;
810   const ogg_int16_t      *mv_comp_tree;
811   oc_fragment            *frags;
812   oc_mv                  *frag_mvs;
813   const unsigned char    *map_idxs;
814   int                     map_nidxs;
815   oc_mv                   last_mv;
816   oc_mv                   prior_mv;
817   oc_mv                   cbmvs[4];
818   size_t                  nmbs;
819   size_t                  mbi;
820   long                    val;
821   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
822   val=oc_pack_read1(&_dec->opb);
823   mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
824   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
825   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
826   prior_mv=last_mv=0;
827   frags=_dec->state.frags;
828   frag_mvs=_dec->state.frag_mvs;
829   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
830   mb_modes=_dec->state.mb_modes;
831   nmbs=_dec->state.nmbs;
832   for(mbi=0;mbi<nmbs;mbi++){
833     int mb_mode;
834     mb_mode=mb_modes[mbi];
835     if(mb_mode!=OC_MODE_INVALID){
836       oc_mv     mbmv;
837       ptrdiff_t fragi;
838       int       mapi;
839       int       mapii;
840       int       refi;
841       if(mb_mode==OC_MODE_INTER_MV_FOUR){
842         oc_mv lbmvs[4];
843         int   bi;
844         prior_mv=last_mv;
845         for(bi=0;bi<4;bi++){
846           fragi=mb_maps[mbi][0][bi];
847           if(frags[fragi].coded){
848             frags[fragi].refi=OC_FRAME_PREV;
849             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
850             lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
851             frag_mvs[fragi]=lbmvs[bi];
852           }
853           else lbmvs[bi]=0;
854         }
855         (*set_chroma_mvs)(cbmvs,lbmvs);
856         for(mapii=4;mapii<map_nidxs;mapii++){
857           mapi=map_idxs[mapii];
858           bi=mapi&3;
859           fragi=mb_maps[mbi][mapi>>2][bi];
860           if(frags[fragi].coded){
861             frags[fragi].refi=OC_FRAME_PREV;
862             frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
863             frag_mvs[fragi]=cbmvs[bi];
864           }
865         }
866       }
867       else{
868         switch(mb_mode){
869           case OC_MODE_INTER_MV:{
870             prior_mv=last_mv;
871             last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
872           }break;
873           case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
874           case OC_MODE_INTER_MV_LAST2:{
875             mbmv=prior_mv;
876             prior_mv=last_mv;
877             last_mv=mbmv;
878           }break;
879           case OC_MODE_GOLDEN_MV:{
880             mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
881           }break;
882           default:mbmv=0;break;
883         }
884         /*Fill in the MVs for the fragments.*/
885         refi=OC_FRAME_FOR_MODE(mb_mode);
886         mapii=0;
887         do{
888           mapi=map_idxs[mapii];
889           fragi=mb_maps[mbi][mapi>>2][mapi&3];
890           if(frags[fragi].coded){
891             frags[fragi].refi=refi;
892             frags[fragi].mb_mode=mb_mode;
893             frag_mvs[fragi]=mbmv;
894           }
895         }
896         while(++mapii<map_nidxs);
897       }
898     }
899   }
900 }
901 
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)902 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
903   oc_fragment     *frags;
904   const ptrdiff_t *coded_fragis;
905   ptrdiff_t        ncoded_fragis;
906   ptrdiff_t        fragii;
907   ptrdiff_t        fragi;
908   ncoded_fragis=_dec->state.ntotal_coded_fragis;
909   if(ncoded_fragis<=0)return;
910   frags=_dec->state.frags;
911   coded_fragis=_dec->state.coded_fragis;
912   if(_dec->state.nqis==1){
913     /*If this frame has only a single qi value, then just use it for all coded
914        fragments.*/
915     for(fragii=0;fragii<ncoded_fragis;fragii++){
916       frags[coded_fragis[fragii]].qii=0;
917     }
918   }
919   else{
920     long val;
921     int  flag;
922     int  nqi1;
923     int  run_count;
924     /*Otherwise, we decode a qi index for each fragment, using two passes of
925       the same binary RLE scheme used for super-block coded bits.
926      The first pass marks each fragment as having a qii of 0 or greater than
927       0, and the second pass (if necessary), distinguishes between a qii of
928       1 and 2.
929      At first we just store the qii in the fragment.
930      After all the qii's are decoded, we make a final pass to replace them
931       with the corresponding qi's for this frame.*/
932     val=oc_pack_read1(&_dec->opb);
933     flag=(int)val;
934     nqi1=0;
935     fragii=0;
936     while(fragii<ncoded_fragis){
937       int full_run;
938       run_count=oc_sb_run_unpack(&_dec->opb);
939       full_run=run_count>=4129;
940       do{
941         frags[coded_fragis[fragii++]].qii=flag;
942         nqi1+=flag;
943       }
944       while(--run_count>0&&fragii<ncoded_fragis);
945       if(full_run&&fragii<ncoded_fragis){
946         val=oc_pack_read1(&_dec->opb);
947         flag=(int)val;
948       }
949       else flag=!flag;
950     }
951     /*TODO: run_count should be 0 here.
952       If it's not, we should issue a warning of some kind.*/
953     /*If we have 3 different qi's for this frame, and there was at least one
954        fragment with a non-zero qi, make the second pass.*/
955     if(_dec->state.nqis==3&&nqi1>0){
956       /*Skip qii==0 fragments.*/
957       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
958       val=oc_pack_read1(&_dec->opb);
959       flag=(int)val;
960       do{
961         int full_run;
962         run_count=oc_sb_run_unpack(&_dec->opb);
963         full_run=run_count>=4129;
964         for(;fragii<ncoded_fragis;fragii++){
965           fragi=coded_fragis[fragii];
966           if(frags[fragi].qii==0)continue;
967           if(run_count--<=0)break;
968           frags[fragi].qii+=flag;
969         }
970         if(full_run&&fragii<ncoded_fragis){
971           val=oc_pack_read1(&_dec->opb);
972           flag=(int)val;
973         }
974         else flag=!flag;
975       }
976       while(fragii<ncoded_fragis);
977       /*TODO: run_count should be 0 here.
978         If it's not, we should issue a warning of some kind.*/
979     }
980   }
981 }
982 
983 
984 
985 /*Unpacks the DC coefficient tokens.
986   Unlike when unpacking the AC coefficient tokens, we actually need to decode
987    the DC coefficient values now so that we can do DC prediction.
988   _huff_idx:   The index of the Huffman table to use for each color plane.
989   _ntoks_left: The number of tokens left to be decoded in each color plane for
990                 each coefficient.
991                This is updated as EOB tokens and zero run tokens are decoded.
992   Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])993 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
994  ptrdiff_t _ntoks_left[3][64]){
995   unsigned char   *dct_tokens;
996   oc_fragment     *frags;
997   const ptrdiff_t *coded_fragis;
998   ptrdiff_t        ncoded_fragis;
999   ptrdiff_t        fragii;
1000   ptrdiff_t        eobs;
1001   ptrdiff_t        ti;
1002   int              pli;
1003   dct_tokens=_dec->dct_tokens;
1004   frags=_dec->state.frags;
1005   coded_fragis=_dec->state.coded_fragis;
1006   ncoded_fragis=fragii=eobs=ti=0;
1007   for(pli=0;pli<3;pli++){
1008     ptrdiff_t run_counts[64];
1009     ptrdiff_t eob_count;
1010     ptrdiff_t eobi;
1011     int       rli;
1012     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
1013     memset(run_counts,0,sizeof(run_counts));
1014     _dec->eob_runs[pli][0]=eobs;
1015     _dec->ti0[pli][0]=ti;
1016     /*Continue any previous EOB run, if there was one.*/
1017     eobi=eobs;
1018     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
1019     eob_count=eobi;
1020     eobs-=eobi;
1021     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1022     while(fragii<ncoded_fragis){
1023       int token;
1024       int cw;
1025       int eb;
1026       int skip;
1027       token=oc_huff_token_decode(&_dec->opb,
1028        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1029       dct_tokens[ti++]=(unsigned char)token;
1030       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1031         eb=(int)oc_pack_read(&_dec->opb,
1032          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1033         dct_tokens[ti++]=(unsigned char)eb;
1034         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1035         eb<<=OC_DCT_TOKEN_EB_POS(token);
1036       }
1037       else eb=0;
1038       cw=OC_DCT_CODE_WORD[token]+eb;
1039       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1040       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1041       if(eobs){
1042         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1043         eob_count+=eobi;
1044         eobs-=eobi;
1045         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1046       }
1047       else{
1048         int coeff;
1049         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1050         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1051         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1052         if(skip)coeff=0;
1053         run_counts[skip]++;
1054         frags[coded_fragis[fragii++]].dc=coeff;
1055       }
1056     }
1057     /*Add the total EOB count to the longest run length.*/
1058     run_counts[63]+=eob_count;
1059     /*And convert the run_counts array to a moment table.*/
1060     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1061     /*Finally, subtract off the number of coefficients that have been
1062        accounted for by runs started in this coefficient.*/
1063     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1064   }
1065   _dec->dct_tokens_count=ti;
1066   return eobs;
1067 }
1068 
1069 /*Unpacks the AC coefficient tokens.
1070   This can completely discard coefficient values while unpacking, and so is
1071    somewhat simpler than unpacking the DC coefficient tokens.
1072   _huff_idx:   The index of the Huffman table to use for each color plane.
1073   _ntoks_left: The number of tokens left to be decoded in each color plane for
1074                 each coefficient.
1075                This is updated as EOB tokens and zero run tokens are decoded.
1076   _eobs:       The length of any outstanding EOB run from previous
1077                 coefficients.
1078   Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1079 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1080  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1081   unsigned char *dct_tokens;
1082   ptrdiff_t      ti;
1083   int            pli;
1084   dct_tokens=_dec->dct_tokens;
1085   ti=_dec->dct_tokens_count;
1086   for(pli=0;pli<3;pli++){
1087     ptrdiff_t run_counts[64];
1088     ptrdiff_t eob_count;
1089     size_t    ntoks_left;
1090     size_t    ntoks;
1091     int       rli;
1092     _dec->eob_runs[pli][_zzi]=_eobs;
1093     _dec->ti0[pli][_zzi]=ti;
1094     ntoks_left=_ntoks_left[pli][_zzi];
1095     memset(run_counts,0,sizeof(run_counts));
1096     eob_count=0;
1097     ntoks=0;
1098     while(ntoks+_eobs<ntoks_left){
1099       int token;
1100       int cw;
1101       int eb;
1102       int skip;
1103       ntoks+=_eobs;
1104       eob_count+=_eobs;
1105       token=oc_huff_token_decode(&_dec->opb,
1106        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1107       dct_tokens[ti++]=(unsigned char)token;
1108       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1109         eb=(int)oc_pack_read(&_dec->opb,
1110          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1111         dct_tokens[ti++]=(unsigned char)eb;
1112         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1113         eb<<=OC_DCT_TOKEN_EB_POS(token);
1114       }
1115       else eb=0;
1116       cw=OC_DCT_CODE_WORD[token]+eb;
1117       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1118       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1119       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1120       if(_eobs==0){
1121         run_counts[skip]++;
1122         ntoks++;
1123       }
1124     }
1125     /*Add the portion of the last EOB run actually used by this coefficient.*/
1126     eob_count+=ntoks_left-ntoks;
1127     /*And remove it from the remaining EOB count.*/
1128     _eobs-=ntoks_left-ntoks;
1129     /*Add the total EOB count to the longest run length.*/
1130     run_counts[63]+=eob_count;
1131     /*And convert the run_counts array to a moment table.*/
1132     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1133     /*Finally, subtract off the number of coefficients that have been
1134        accounted for by runs started in this coefficient.*/
1135     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1136   }
1137   _dec->dct_tokens_count=ti;
1138   return _eobs;
1139 }
1140 
1141 /*Tokens describing the DCT coefficients that belong to each fragment are
1142    stored in the bitstream grouped by coefficient, not by fragment.
1143 
1144   This means that we either decode all the tokens in order, building up a
1145    separate coefficient list for each fragment as we go, and then go back and
1146    do the iDCT on each fragment, or we have to create separate lists of tokens
1147    for each coefficient, so that we can pull the next token required off the
1148    head of the appropriate list when decoding a specific fragment.
1149 
1150   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1151    decoded coefficient values.
1152 
1153   We take the second option, which lets us store just one to three bytes per
1154    token (generally far fewer than the number of coefficients, due to EOB
1155    tokens and zero runs), and which requires us to only maintain a counter for
1156    each of the 64 coefficients, instead of a counter for every fragment to
1157    determine where the next token goes.
1158 
1159   We actually use 3 counters per coefficient, one for each color plane, so we
1160    can decode all color planes simultaneously.
1161   This lets color conversion, etc., be done as soon as a full MCU (one or
1162    two super block rows) is decoded, while the image data is still in cache.*/
1163 
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1164 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1165   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1166   ptrdiff_t  ntoks_left[3][64];
1167   int        huff_idxs[2];
1168   ptrdiff_t  eobs;
1169   long       val;
1170   int        pli;
1171   int        zzi;
1172   int        hgi;
1173   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1174     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1175   }
1176   val=oc_pack_read(&_dec->opb,4);
1177   huff_idxs[0]=(int)val;
1178   val=oc_pack_read(&_dec->opb,4);
1179   huff_idxs[1]=(int)val;
1180   _dec->eob_runs[0][0]=0;
1181   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1182 #if defined(HAVE_CAIRO)
1183   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1184 #endif
1185   val=oc_pack_read(&_dec->opb,4);
1186   huff_idxs[0]=(int)val;
1187   val=oc_pack_read(&_dec->opb,4);
1188   huff_idxs[1]=(int)val;
1189   zzi=1;
1190   for(hgi=1;hgi<5;hgi++){
1191     huff_idxs[0]+=16;
1192     huff_idxs[1]+=16;
1193     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1194       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1195     }
1196   }
1197   /*TODO: eobs should be exactly zero, or 4096 or greater.
1198     The second case occurs when an EOB run of size zero is encountered, which
1199      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1200     If neither of these conditions holds, then a warning should be issued.*/
1201 }
1202 
1203 
oc_dec_postprocess_init(oc_dec_ctx * _dec)1204 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1205   /*musl libc malloc()/realloc() calls might use floating point, so make sure
1206      we've cleared the MMX state for them.*/
1207   oc_restore_fpu(&_dec->state);
1208   /*pp_level 0: disabled; free any memory used and return*/
1209   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1210     if(_dec->dc_qis!=NULL){
1211       _ogg_free(_dec->dc_qis);
1212       _dec->dc_qis=NULL;
1213       _ogg_free(_dec->variances);
1214       _dec->variances=NULL;
1215       _ogg_free(_dec->pp_frame_data);
1216       _dec->pp_frame_data=NULL;
1217     }
1218     return 1;
1219   }
1220   if(_dec->dc_qis==NULL){
1221     /*If we haven't been tracking DC quantization indices, there's no point in
1222        starting now.*/
1223     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1224     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1225      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1226     if(_dec->dc_qis==NULL)return 1;
1227     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1228   }
1229   else{
1230     unsigned char   *dc_qis;
1231     const ptrdiff_t *coded_fragis;
1232     ptrdiff_t        ncoded_fragis;
1233     ptrdiff_t        fragii;
1234     unsigned char    qi0;
1235     /*Update the DC quantization index of each coded block.*/
1236     dc_qis=_dec->dc_qis;
1237     coded_fragis=_dec->state.coded_fragis;
1238     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1239      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1240     qi0=(unsigned char)_dec->state.qis[0];
1241     for(fragii=0;fragii<ncoded_fragis;fragii++){
1242       dc_qis[coded_fragis[fragii]]=qi0;
1243     }
1244   }
1245   /*pp_level 1: Stop after updating DC quantization indices.*/
1246   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1247     if(_dec->variances!=NULL){
1248       _ogg_free(_dec->variances);
1249       _dec->variances=NULL;
1250       _ogg_free(_dec->pp_frame_data);
1251       _dec->pp_frame_data=NULL;
1252     }
1253     return 1;
1254   }
1255   if(_dec->variances==NULL){
1256     size_t frame_sz;
1257     size_t c_sz;
1258     int    c_w;
1259     int    c_h;
1260     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1261     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1262     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1263     c_sz=c_w*(size_t)c_h;
1264     /*Allocate space for the chroma planes, even if we're not going to use
1265        them; this simplifies allocation state management, though it may waste
1266        memory on the few systems that don't overcommit pages.*/
1267     frame_sz+=c_sz<<1;
1268     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1269      frame_sz*sizeof(_dec->pp_frame_data[0]));
1270     _dec->variances=(int *)_ogg_malloc(
1271      _dec->state.nfrags*sizeof(_dec->variances[0]));
1272     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1273       _ogg_free(_dec->pp_frame_data);
1274       _dec->pp_frame_data=NULL;
1275       _ogg_free(_dec->variances);
1276       _dec->variances=NULL;
1277       return 1;
1278     }
1279     /*Force an update of the PP buffer pointers.*/
1280     _dec->pp_frame_state=0;
1281   }
1282   /*Update the PP buffer pointers if necessary.*/
1283   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1284     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1285       /*If chroma processing is disabled, just use the PP luma plane.*/
1286       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1287       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1288       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1289       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1290        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1291     }
1292     else{
1293       size_t y_sz;
1294       size_t c_sz;
1295       int    c_w;
1296       int    c_h;
1297       /*Otherwise, set up pointers to all three PP planes.*/
1298       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1299       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1300       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1301       c_sz=c_w*(size_t)c_h;
1302       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1303       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1304       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1305       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1306       _dec->pp_frame_buf[1].width=c_w;
1307       _dec->pp_frame_buf[1].height=c_h;
1308       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1309       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1310       _dec->pp_frame_buf[2].width=c_w;
1311       _dec->pp_frame_buf[2].height=c_h;
1312       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1313       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1314       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1315     }
1316     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1317   }
1318   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1319   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1320     memcpy(_dec->pp_frame_buf+1,
1321      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1322      sizeof(_dec->pp_frame_buf[1])*2);
1323   }
1324   return 0;
1325 }
1326 
1327 
1328 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1329 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1330  oc_dec_pipeline_state *_pipe){
1331   const ptrdiff_t *coded_fragis;
1332   const ptrdiff_t *uncoded_fragis;
1333   int              flimit;
1334   int              pli;
1335   int              qii;
1336   int              qti;
1337   int              zzi;
1338   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1339      super block rows of Y' for each super block row of Cb and Cr.*/
1340   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1341   /*Initialize the token and extra bits indices for each plane and
1342      coefficient.*/
1343   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1344   /*Also copy over the initial the EOB run counts.*/
1345   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1346   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1347   coded_fragis=_dec->state.coded_fragis;
1348   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1349   for(pli=0;pli<3;pli++){
1350     ptrdiff_t ncoded_fragis;
1351     _pipe->coded_fragis[pli]=coded_fragis;
1352     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1353     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1354     coded_fragis+=ncoded_fragis;
1355     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1356   }
1357   /*Set up condensed quantizer tables.*/
1358   for(pli=0;pli<3;pli++){
1359     for(qii=0;qii<_dec->state.nqis;qii++){
1360       for(qti=0;qti<2;qti++){
1361         _pipe->dequant[pli][qii][qti]=
1362          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1363       }
1364     }
1365   }
1366   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1367   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1368   /*Initialize the bounding value array for the loop filter.*/
1369   flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
1370   _pipe->loop_filter=flimit!=0;
1371   if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
1372   /*Initialize any buffers needed for post-processing.
1373     We also save the current post-processing level, to guard against the user
1374      changing it from a callback.*/
1375   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1376   /*If we don't have enough information to post-process, disable it, regardless
1377      of the user-requested level.*/
1378   else{
1379     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1380     memcpy(_dec->pp_frame_buf,
1381      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1382      sizeof(_dec->pp_frame_buf[0])*3);
1383   }
1384   /*Clear down the DCT coefficient buffer for the first block.*/
1385   for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
1386 }
1387 
1388 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1389    rows).
1390   As a side effect, the number of coded and uncoded fragments in this plane of
1391    the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1392 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
1393  oc_dec_pipeline_state *_pipe,int _pli){
1394   const oc_fragment_plane *fplane;
1395   oc_fragment             *frags;
1396   int                     *pred_last;
1397   ptrdiff_t                ncoded_fragis;
1398   ptrdiff_t                fragi;
1399   int                      fragx;
1400   int                      fragy;
1401   int                      fragy0;
1402   int                      fragy_end;
1403   int                      nhfrags;
1404   /*Compute the first and last fragment row of the current MCU for this
1405      plane.*/
1406   fplane=_dec->state.fplanes+_pli;
1407   fragy0=_pipe->fragy0[_pli];
1408   fragy_end=_pipe->fragy_end[_pli];
1409   nhfrags=fplane->nhfrags;
1410   pred_last=_pipe->pred_last[_pli];
1411   frags=_dec->state.frags;
1412   ncoded_fragis=0;
1413   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1414   for(fragy=fragy0;fragy<fragy_end;fragy++){
1415     if(fragy==0){
1416       /*For the first row, all of the cases reduce to just using the previous
1417          predictor for the same reference frame.*/
1418       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1419         if(frags[fragi].coded){
1420           int refi;
1421           refi=frags[fragi].refi;
1422           pred_last[refi]=frags[fragi].dc+=pred_last[refi];
1423           ncoded_fragis++;
1424         }
1425       }
1426     }
1427     else{
1428       oc_fragment *u_frags;
1429       int          l_ref;
1430       int          ul_ref;
1431       int          u_ref;
1432       u_frags=frags-nhfrags;
1433       l_ref=-1;
1434       ul_ref=-1;
1435       u_ref=u_frags[fragi].refi;
1436       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1437         int ur_ref;
1438         if(fragx+1>=nhfrags)ur_ref=-1;
1439         else ur_ref=u_frags[fragi+1].refi;
1440         if(frags[fragi].coded){
1441           int pred;
1442           int refi;
1443           refi=frags[fragi].refi;
1444           /*We break out a separate case based on which of our neighbors use
1445              the same reference frames.
1446             This is somewhat faster than trying to make a generic case which
1447              handles all of them, since it reduces lots of poorly predicted
1448              jumps to one switch statement, and also lets a number of the
1449              multiplications be optimized out by strength reduction.*/
1450           switch((l_ref==refi)|(ul_ref==refi)<<1|
1451            (u_ref==refi)<<2|(ur_ref==refi)<<3){
1452             default:pred=pred_last[refi];break;
1453             case  1:
1454             case  3:pred=frags[fragi-1].dc;break;
1455             case  2:pred=u_frags[fragi-1].dc;break;
1456             case  4:
1457             case  6:
1458             case 12:pred=u_frags[fragi].dc;break;
1459             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1460             case  8:pred=u_frags[fragi+1].dc;break;
1461             case  9:
1462             case 11:
1463             case 13:{
1464               /*The TI compiler mis-compiles this line.*/
1465               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1466             }break;
1467             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1468             case 14:{
1469               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1470                +10*u_frags[fragi].dc)/16;
1471             }break;
1472             case  7:
1473             case 15:{
1474               int p0;
1475               int p1;
1476               int p2;
1477               p0=frags[fragi-1].dc;
1478               p1=u_frags[fragi-1].dc;
1479               p2=u_frags[fragi].dc;
1480               pred=(29*(p0+p2)-26*p1)/32;
1481               if(abs(pred-p2)>128)pred=p2;
1482               else if(abs(pred-p0)>128)pred=p0;
1483               else if(abs(pred-p1)>128)pred=p1;
1484             }break;
1485           }
1486           pred_last[refi]=frags[fragi].dc+=pred;
1487           ncoded_fragis++;
1488           l_ref=refi;
1489         }
1490         else l_ref=-1;
1491         ul_ref=u_ref;
1492         u_ref=ur_ref;
1493       }
1494     }
1495   }
1496   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1497   /*Also save the number of uncoded fragments so we know how many to copy.*/
1498   _pipe->nuncoded_fragis[_pli]=
1499    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1500 }
1501 
1502 /*Reconstructs all coded fragments in a single MCU (one or two super block
1503    rows).
1504   This requires that each coded fragment have a proper macro block mode and
1505    motion vector (if not in INTRA mode), and have its DC value decoded, with
1506    the DC prediction process reversed, and the number of coded and uncoded
1507    fragments in this plane of the MCU be counted.
1508   The token lists for each color plane and coefficient should also be filled
1509    in, along with initial token offsets, extra bits offsets, and EOB run
1510    counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1511 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1512  oc_dec_pipeline_state *_pipe,int _pli){
1513   unsigned char       *dct_tokens;
1514   const unsigned char *dct_fzig_zag;
1515   ogg_uint16_t         dc_quant[2];
1516   const oc_fragment   *frags;
1517   const ptrdiff_t     *coded_fragis;
1518   ptrdiff_t            ncoded_fragis;
1519   ptrdiff_t            fragii;
1520   ptrdiff_t           *ti;
1521   ptrdiff_t           *eob_runs;
1522   int                  qti;
1523   dct_tokens=_dec->dct_tokens;
1524   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1525   frags=_dec->state.frags;
1526   coded_fragis=_pipe->coded_fragis[_pli];
1527   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1528   ti=_pipe->ti[_pli];
1529   eob_runs=_pipe->eob_runs[_pli];
1530   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1531   for(fragii=0;fragii<ncoded_fragis;fragii++){
1532     const ogg_uint16_t *ac_quant;
1533     ptrdiff_t           fragi;
1534     int                 last_zzi;
1535     int                 zzi;
1536     fragi=coded_fragis[fragii];
1537     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1538     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1539     /*Decode the AC coefficients.*/
1540     for(zzi=0;zzi<64;){
1541       int token;
1542       last_zzi=zzi;
1543       if(eob_runs[zzi]){
1544         eob_runs[zzi]--;
1545         break;
1546       }
1547       else{
1548         ptrdiff_t eob;
1549         int       cw;
1550         int       rlen;
1551         int       coeff;
1552         int       lti;
1553         lti=ti[zzi];
1554         token=dct_tokens[lti++];
1555         cw=OC_DCT_CODE_WORD[token];
1556         /*These parts could be done branchless, but the branches are fairly
1557            predictable and the C code translates into more than a few
1558            instructions, so it's worth it to avoid them.*/
1559         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1560           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1561         }
1562         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1563         if(token==OC_DCT_TOKEN_FAT_EOB){
1564           eob+=dct_tokens[lti++]<<8;
1565           if(eob==0)eob=OC_DCT_EOB_FINISH;
1566         }
1567         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1568         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1569         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1570         eob_runs[zzi]=eob;
1571         ti[zzi]=lti;
1572         zzi+=rlen;
1573         _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
1574          (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1575         zzi+=!eob;
1576       }
1577     }
1578     /*TODO: zzi should be exactly 64 here.
1579       If it's not, we should report some kind of warning.*/
1580     zzi=OC_MINI(zzi,64);
1581     _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1582     /*last_zzi is always initialized.
1583       If your compiler thinks otherwise, it is dumb.*/
1584     oc_state_frag_recon(&_dec->state,fragi,_pli,
1585      _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
1586   }
1587   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1588   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1589   /*TODO: We make the decision here to always copy the uncoded blocks into it
1590      from the reference frame.
1591     We could also copy the coded blocks back over the reference frame, if we
1592      wait for an additional MCU to be decoded, which might be faster if only a
1593      small number of blocks are coded.
1594     However, this introduces more latency, creating a larger cache footprint.
1595     It's unknown which decision is better, but this one results in simpler
1596      code, and the hard case (high bitrate, high resolution) is handled
1597      correctly.*/
1598   /*Copy the uncoded blocks from the previous reference frame.*/
1599   if(_pipe->nuncoded_fragis[_pli]>0){
1600     _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1601     oc_frag_copy_list(&_dec->state,
1602      _dec->state.ref_frame_data[OC_FRAME_SELF],
1603      _dec->state.ref_frame_data[OC_FRAME_PREV],
1604      _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
1605      _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
1606   }
1607 }
1608 
1609 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1610 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1611  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1612  int *_variance0,int *_variance1){
1613   unsigned char       *rdst;
1614   const unsigned char *rsrc;
1615   unsigned char       *cdst;
1616   const unsigned char *csrc;
1617   int                  r[10];
1618   int                  sum0;
1619   int                  sum1;
1620   int                  bx;
1621   int                  by;
1622   rdst=_dst;
1623   rsrc=_src;
1624   for(bx=0;bx<8;bx++){
1625     cdst=rdst;
1626     csrc=rsrc;
1627     for(by=0;by<10;by++){
1628       r[by]=*csrc;
1629       csrc+=_src_ystride;
1630     }
1631     sum0=sum1=0;
1632     for(by=0;by<4;by++){
1633       sum0+=abs(r[by+1]-r[by]);
1634       sum1+=abs(r[by+5]-r[by+6]);
1635     }
1636     *_variance0+=OC_MINI(255,sum0);
1637     *_variance1+=OC_MINI(255,sum1);
1638     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1639       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1640       cdst+=_dst_ystride;
1641       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1642       cdst+=_dst_ystride;
1643       for(by=0;by<4;by++){
1644         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1645          r[by+4]+r[by+5]+r[by+6]+4>>3);
1646         cdst+=_dst_ystride;
1647       }
1648       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1649       cdst+=_dst_ystride;
1650       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1651     }
1652     else{
1653       for(by=1;by<=8;by++){
1654         *cdst=(unsigned char)r[by];
1655         cdst+=_dst_ystride;
1656       }
1657     }
1658     rdst++;
1659     rsrc++;
1660   }
1661 }
1662 
1663 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1664 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1665  int _qstep,int _flimit,int *_variances){
1666   unsigned char       *rdst;
1667   const unsigned char *rsrc;
1668   unsigned char       *cdst;
1669   int                  r[10];
1670   int                  sum0;
1671   int                  sum1;
1672   int                  bx;
1673   int                  by;
1674   cdst=_dst;
1675   for(by=0;by<8;by++){
1676     rsrc=cdst-1;
1677     rdst=cdst;
1678     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1679     sum0=sum1=0;
1680     for(bx=0;bx<4;bx++){
1681       sum0+=abs(r[bx+1]-r[bx]);
1682       sum1+=abs(r[bx+5]-r[bx+6]);
1683     }
1684     _variances[0]+=OC_MINI(255,sum0);
1685     _variances[1]+=OC_MINI(255,sum1);
1686     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1687       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1688       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1689       for(bx=0;bx<4;bx++){
1690         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1691          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1692       }
1693       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1694       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1695     }
1696     cdst+=_dst_ystride;
1697   }
1698 }
1699 
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1700 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1701  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1702  int _fragy_end){
1703   oc_fragment_plane   *fplane;
1704   int                 *variance;
1705   unsigned char       *dc_qi;
1706   unsigned char       *dst;
1707   const unsigned char *src;
1708   ptrdiff_t            froffset;
1709   int                  dst_ystride;
1710   int                  src_ystride;
1711   int                  nhfrags;
1712   int                  width;
1713   int                  notstart;
1714   int                  notdone;
1715   int                  flimit;
1716   int                  qstep;
1717   int                  y_end;
1718   int                  y;
1719   int                  x;
1720   _dst+=_pli;
1721   _src+=_pli;
1722   fplane=_dec->state.fplanes+_pli;
1723   nhfrags=fplane->nhfrags;
1724   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1725   variance=_dec->variances+froffset;
1726   dc_qi=_dec->dc_qis+froffset;
1727   notstart=_fragy0>0;
1728   notdone=_fragy_end<fplane->nvfrags;
1729   /*We want to clear an extra row of variances, except at the end.*/
1730   memset(variance+(nhfrags&-notstart),0,
1731    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1732   /*Except for the first time, we want to point to the middle of the row.*/
1733   y=(_fragy0<<3)+(notstart<<2);
1734   dst_ystride=_dst->stride;
1735   src_ystride=_src->stride;
1736   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1737   src=_src->data+y*(ptrdiff_t)src_ystride;
1738   width=_dst->width;
1739   for(;y<4;y++){
1740     memcpy(dst,src,width*sizeof(dst[0]));
1741     dst+=dst_ystride;
1742     src+=src_ystride;
1743   }
1744   /*We also want to skip the last row in the frame for this loop.*/
1745   y_end=_fragy_end-!notdone<<3;
1746   for(;y<y_end;y+=8){
1747     qstep=_dec->pp_dc_scale[*dc_qi];
1748     flimit=(qstep*3)>>2;
1749     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1750      qstep,flimit,variance,variance+nhfrags);
1751     variance++;
1752     dc_qi++;
1753     for(x=8;x<width;x+=8){
1754       qstep=_dec->pp_dc_scale[*dc_qi];
1755       flimit=(qstep*3)>>2;
1756       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1757        qstep,flimit,variance,variance+nhfrags);
1758       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1759        qstep,flimit,variance-1);
1760       variance++;
1761       dc_qi++;
1762     }
1763     dst+=dst_ystride<<3;
1764     src+=src_ystride<<3;
1765   }
1766   /*And finally, handle the last row in the frame, if it's in the range.*/
1767   if(!notdone){
1768     int height;
1769     height=_dst->height;
1770     for(;y<height;y++){
1771       memcpy(dst,src,width*sizeof(dst[0]));
1772       dst+=dst_ystride;
1773       src+=src_ystride;
1774     }
1775     /*Filter the last row of vertical block edges.*/
1776     dc_qi++;
1777     for(x=8;x<width;x+=8){
1778       qstep=_dec->pp_dc_scale[*dc_qi++];
1779       flimit=(qstep*3)>>2;
1780       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1781        qstep,flimit,variance++);
1782     }
1783   }
1784 }
1785 
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1786 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1787  int _dc_scale,int _sharp_mod,int _strong){
1788   static const unsigned char OC_MOD_MAX[2]={24,32};
1789   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1790   const unsigned char *psrc;
1791   const unsigned char *src;
1792   const unsigned char *nsrc;
1793   unsigned char       *dst;
1794   int                  vmod[72];
1795   int                  hmod[72];
1796   int                  mod_hi;
1797   int                  by;
1798   int                  bx;
1799   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1800   dst=_idata;
1801   src=dst;
1802   psrc=src-(_ystride&-!(_b&4));
1803   for(by=0;by<9;by++){
1804     for(bx=0;bx<8;bx++){
1805       int mod;
1806       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1807       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1808     }
1809     psrc=src;
1810     src+=_ystride&-(!(_b&8)|by<7);
1811   }
1812   nsrc=dst;
1813   psrc=dst-!(_b&1);
1814   for(bx=0;bx<9;bx++){
1815     src=nsrc;
1816     for(by=0;by<8;by++){
1817       int mod;
1818       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1819       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1820       psrc+=_ystride;
1821       src+=_ystride;
1822     }
1823     psrc=nsrc;
1824     nsrc+=!(_b&2)|bx<7;
1825   }
1826   src=dst;
1827   psrc=src-(_ystride&-!(_b&4));
1828   nsrc=src+_ystride;
1829   for(by=0;by<8;by++){
1830     int a;
1831     int b;
1832     int w;
1833     a=128;
1834     b=64;
1835     w=hmod[by];
1836     a-=w;
1837     b+=w**(src-!(_b&1));
1838     w=vmod[by<<3];
1839     a-=w;
1840     b+=w*psrc[0];
1841     w=vmod[by+1<<3];
1842     a-=w;
1843     b+=w*nsrc[0];
1844     w=hmod[(1<<3)+by];
1845     a-=w;
1846     b+=w*src[1];
1847     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1848     for(bx=1;bx<7;bx++){
1849       a=128;
1850       b=64;
1851       w=hmod[(bx<<3)+by];
1852       a-=w;
1853       b+=w*src[bx-1];
1854       w=vmod[(by<<3)+bx];
1855       a-=w;
1856       b+=w*psrc[bx];
1857       w=vmod[(by+1<<3)+bx];
1858       a-=w;
1859       b+=w*nsrc[bx];
1860       w=hmod[(bx+1<<3)+by];
1861       a-=w;
1862       b+=w*src[bx+1];
1863       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1864     }
1865     a=128;
1866     b=64;
1867     w=hmod[(7<<3)+by];
1868     a-=w;
1869     b+=w*src[6];
1870     w=vmod[(by<<3)+7];
1871     a-=w;
1872     b+=w*psrc[7];
1873     w=vmod[(by+1<<3)+7];
1874     a-=w;
1875     b+=w*nsrc[7];
1876     w=hmod[(8<<3)+by];
1877     a-=w;
1878     b+=w*src[7+!(_b&2)];
1879     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1880     dst+=_ystride;
1881     psrc=src;
1882     src=nsrc;
1883     nsrc+=_ystride&-(!(_b&8)|by<6);
1884   }
1885 }
1886 
1887 #define OC_DERING_THRESH1 (384)
1888 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1889 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1890 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1891 
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1892 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1893  int _pli,int _fragy0,int _fragy_end){
1894   th_img_plane      *iplane;
1895   oc_fragment_plane *fplane;
1896   oc_fragment       *frag;
1897   int               *variance;
1898   unsigned char     *idata;
1899   ptrdiff_t          froffset;
1900   int                ystride;
1901   int                nhfrags;
1902   int                sthresh;
1903   int                strong;
1904   int                y_end;
1905   int                width;
1906   int                height;
1907   int                y;
1908   int                x;
1909   iplane=_img+_pli;
1910   fplane=_dec->state.fplanes+_pli;
1911   nhfrags=fplane->nhfrags;
1912   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1913   variance=_dec->variances+froffset;
1914   frag=_dec->state.frags+froffset;
1915   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1916   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1917   y=_fragy0<<3;
1918   ystride=iplane->stride;
1919   idata=iplane->data+y*(ptrdiff_t)ystride;
1920   y_end=_fragy_end<<3;
1921   width=iplane->width;
1922   height=iplane->height;
1923   for(;y<y_end;y+=8){
1924     for(x=0;x<width;x+=8){
1925       int b;
1926       int qi;
1927       int var;
1928       qi=_dec->state.qis[frag->qii];
1929       var=*variance;
1930       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1931       if(strong&&var>sthresh){
1932         oc_dering_block(idata+x,ystride,b,
1933          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1934         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1935          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1936          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1937          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1938           oc_dering_block(idata+x,ystride,b,
1939            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1940           oc_dering_block(idata+x,ystride,b,
1941            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1942         }
1943       }
1944       else if(var>OC_DERING_THRESH2){
1945         oc_dering_block(idata+x,ystride,b,
1946          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1947       }
1948       else if(var>OC_DERING_THRESH1){
1949         oc_dering_block(idata+x,ystride,b,
1950          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1951       }
1952       frag++;
1953       variance++;
1954     }
1955     idata+=ystride<<3;
1956   }
1957 }
1958 
1959 
1960 
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1961 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1962   oc_dec_ctx *dec;
1963   if(_info==NULL||_setup==NULL)return NULL;
1964   dec=oc_aligned_malloc(sizeof(*dec),16);
1965   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1966     oc_aligned_free(dec);
1967     return NULL;
1968   }
1969   dec->state.curframe_num=0;
1970   return dec;
1971 }
1972 
th_decode_free(th_dec_ctx * _dec)1973 void th_decode_free(th_dec_ctx *_dec){
1974   if(_dec!=NULL){
1975     oc_dec_clear(_dec);
1976     oc_aligned_free(_dec);
1977   }
1978 }
1979 
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1980 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1981  size_t _buf_sz){
1982   switch(_req){
1983   case TH_DECCTL_GET_PPLEVEL_MAX:{
1984     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1985     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1986     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1987     return 0;
1988   }break;
1989   case TH_DECCTL_SET_PPLEVEL:{
1990     int pp_level;
1991     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1992     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1993     pp_level=*(int *)_buf;
1994     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1995     _dec->pp_level=pp_level;
1996     return 0;
1997   }break;
1998   case TH_DECCTL_SET_GRANPOS:{
1999     ogg_int64_t granpos;
2000     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2001     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
2002     granpos=*(ogg_int64_t *)_buf;
2003     if(granpos<0)return TH_EINVAL;
2004     _dec->state.granpos=granpos;
2005     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
2006      -_dec->state.granpos_bias;
2007     _dec->state.curframe_num=_dec->state.keyframe_num
2008      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2009     return 0;
2010   }break;
2011   case TH_DECCTL_SET_STRIPE_CB:{
2012     th_stripe_callback *cb;
2013     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2014     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2015     cb=(th_stripe_callback *)_buf;
2016     _dec->stripe_cb.ctx=cb->ctx;
2017     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2018     return 0;
2019   }break;
2020 #ifdef HAVE_CAIRO
2021   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2022     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2023     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2024     _dec->telemetry_mbmode=*(int *)_buf;
2025     return 0;
2026   }break;
2027   case TH_DECCTL_SET_TELEMETRY_MV:{
2028     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2029     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2030     _dec->telemetry_mv=*(int *)_buf;
2031     return 0;
2032   }break;
2033   case TH_DECCTL_SET_TELEMETRY_QI:{
2034     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2035     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2036     _dec->telemetry_qi=*(int *)_buf;
2037     return 0;
2038   }break;
2039   case TH_DECCTL_SET_TELEMETRY_BITS:{
2040     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2041     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2042     _dec->telemetry_bits=*(int *)_buf;
2043     return 0;
2044   }break;
2045 #endif
2046   default:return TH_EIMPL;
2047   }
2048 }
2049 
2050 /*We're decoding an INTER frame, but have no initialized reference
2051    buffers (i.e., decoding did not start on a key frame).
2052   We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2053 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2054   th_info   *info;
2055   size_t     yplane_sz;
2056   size_t     cplane_sz;
2057   ptrdiff_t  yoffset;
2058   int        yhstride;
2059   int        yheight;
2060   int        chstride;
2061   int        cheight;
2062   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2063   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2064   _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
2065   _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2066    _dec->state.ref_frame_data[OC_FRAME_PREV]=
2067    _dec->state.ref_frame_data[OC_FRAME_SELF]=
2068    _dec->state.ref_frame_bufs[0][0].data;
2069   memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
2070    sizeof(_dec->pp_frame_buf[0])*3);
2071   info=&_dec->state.info;
2072   yhstride=abs(_dec->state.ref_ystride[0]);
2073   yheight=info->frame_height+2*OC_UMV_PADDING;
2074   chstride=abs(_dec->state.ref_ystride[1]);
2075   cheight=yheight>>!(info->pixel_fmt&2);
2076   yplane_sz=yhstride*(size_t)yheight+16;
2077   cplane_sz=chstride*(size_t)cheight;
2078   yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
2079   memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
2080 }
2081 
2082 #if defined(HAVE_CAIRO)
oc_render_telemetry(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr,int _telemetry)2083 static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
2084  int _telemetry){
2085   /*Stuff the plane into cairo.*/
2086   cairo_surface_t *cs;
2087   unsigned char   *data;
2088   unsigned char   *y_row;
2089   unsigned char   *u_row;
2090   unsigned char   *v_row;
2091   unsigned char   *rgb_row;
2092   int              cstride;
2093   int              w;
2094   int              h;
2095   int              x;
2096   int              y;
2097   int              hdec;
2098   int              vdec;
2099   w=_ycbcr[0].width;
2100   h=_ycbcr[0].height;
2101   hdec=!(_dec->state.info.pixel_fmt&1);
2102   vdec=!(_dec->state.info.pixel_fmt&2);
2103   /*Lazy data buffer init.
2104     We could try to re-use the post-processing buffer, which would save
2105      memory, but complicate the allocation logic there.
2106     I don't think anyone cares about memory usage when using telemetry; it is
2107      not meant for embedded devices.*/
2108   if(_dec->telemetry_frame_data==NULL){
2109     _dec->telemetry_frame_data=_ogg_malloc(
2110      (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2111     if(_dec->telemetry_frame_data==NULL)return;
2112   }
2113   cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2114   /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2115   data=cairo_image_surface_get_data(cs);
2116   if(data==NULL){
2117     cairo_surface_destroy(cs);
2118     return;
2119   }
2120   cstride=cairo_image_surface_get_stride(cs);
2121   y_row=_ycbcr[0].data;
2122   u_row=_ycbcr[1].data;
2123   v_row=_ycbcr[2].data;
2124   rgb_row=data;
2125   for(y=0;y<h;y++){
2126     for(x=0;x<w;x++){
2127       int r;
2128       int g;
2129       int b;
2130       r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2131       g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2132        -2672387*v_row[x>>hdec]+447306710)/3287200;
2133       b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2134       rgb_row[4*x+0]=OC_CLAMP255(b);
2135       rgb_row[4*x+1]=OC_CLAMP255(g);
2136       rgb_row[4*x+2]=OC_CLAMP255(r);
2137     }
2138     y_row+=_ycbcr[0].stride;
2139     u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2140     v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2141     rgb_row+=cstride;
2142   }
2143   /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2144   {
2145     cairo_t           *c;
2146     const oc_fragment *frags;
2147     oc_mv             *frag_mvs;
2148     const signed char *mb_modes;
2149     oc_mb_map         *mb_maps;
2150     size_t             nmbs;
2151     size_t             mbi;
2152     int                row2;
2153     int                col2;
2154     int                qim[3]={0,0,0};
2155     if(_dec->state.nqis==2){
2156       int bqi;
2157       bqi=_dec->state.qis[0];
2158       if(_dec->state.qis[1]>bqi)qim[1]=1;
2159       if(_dec->state.qis[1]<bqi)qim[1]=-1;
2160     }
2161     if(_dec->state.nqis==3){
2162       int bqi;
2163       int cqi;
2164       int dqi;
2165       bqi=_dec->state.qis[0];
2166       cqi=_dec->state.qis[1];
2167       dqi=_dec->state.qis[2];
2168       if(cqi>bqi&&dqi>bqi){
2169         if(dqi>cqi){
2170           qim[1]=1;
2171           qim[2]=2;
2172         }
2173         else{
2174           qim[1]=2;
2175           qim[2]=1;
2176         }
2177       }
2178       else if(cqi<bqi&&dqi<bqi){
2179         if(dqi<cqi){
2180           qim[1]=-1;
2181           qim[2]=-2;
2182         }
2183         else{
2184           qim[1]=-2;
2185           qim[2]=-1;
2186         }
2187       }
2188       else{
2189         if(cqi<bqi)qim[1]=-1;
2190         else qim[1]=1;
2191         if(dqi<bqi)qim[2]=-1;
2192         else qim[2]=1;
2193       }
2194     }
2195     c=cairo_create(cs);
2196     frags=_dec->state.frags;
2197     frag_mvs=_dec->state.frag_mvs;
2198     mb_modes=_dec->state.mb_modes;
2199     mb_maps=_dec->state.mb_maps;
2200     nmbs=_dec->state.nmbs;
2201     row2=0;
2202     col2=0;
2203     for(mbi=0;mbi<nmbs;mbi++){
2204       float x;
2205       float y;
2206       int   bi;
2207       y=h-(row2+((col2+1>>1)&1))*16-16;
2208       x=(col2>>1)*16;
2209       cairo_set_line_width(c,1.);
2210       /*Keyframe (all intra) red box.*/
2211       if(_dec->state.frame_type==OC_INTRA_FRAME){
2212         if(_dec->telemetry_mbmode&0x02){
2213           cairo_set_source_rgba(c,1.,0,0,.5);
2214           cairo_rectangle(c,x+2.5,y+2.5,11,11);
2215           cairo_stroke_preserve(c);
2216           cairo_set_source_rgba(c,1.,0,0,.25);
2217           cairo_fill(c);
2218         }
2219       }
2220       else{
2221         ptrdiff_t fragi;
2222         int       frag_mvx;
2223         int       frag_mvy;
2224         for(bi=0;bi<4;bi++){
2225           fragi=mb_maps[mbi][0][bi];
2226           if(fragi>=0&&frags[fragi].coded){
2227             frag_mvx=OC_MV_X(frag_mvs[fragi]);
2228             frag_mvy=OC_MV_Y(frag_mvs[fragi]);
2229             break;
2230           }
2231         }
2232         if(bi<4){
2233           switch(mb_modes[mbi]){
2234             case OC_MODE_INTRA:{
2235               if(_dec->telemetry_mbmode&0x02){
2236                 cairo_set_source_rgba(c,1.,0,0,.5);
2237                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2238                 cairo_stroke_preserve(c);
2239                 cairo_set_source_rgba(c,1.,0,0,.25);
2240                 cairo_fill(c);
2241               }
2242             }break;
2243             case OC_MODE_INTER_NOMV:{
2244               if(_dec->telemetry_mbmode&0x01){
2245                 cairo_set_source_rgba(c,0,0,1.,.5);
2246                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2247                 cairo_stroke_preserve(c);
2248                 cairo_set_source_rgba(c,0,0,1.,.25);
2249                 cairo_fill(c);
2250               }
2251             }break;
2252             case OC_MODE_INTER_MV:{
2253               if(_dec->telemetry_mbmode&0x04){
2254                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2255                 cairo_set_source_rgba(c,0,1.,0,.5);
2256                 cairo_stroke(c);
2257               }
2258               if(_dec->telemetry_mv&0x04){
2259                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2260                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2261                 cairo_set_line_width(c,3.);
2262                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2263                 cairo_stroke_preserve(c);
2264                 cairo_set_line_width(c,2.);
2265                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2266                 cairo_stroke_preserve(c);
2267                 cairo_set_line_width(c,1.);
2268                 cairo_line_to(c,x+8,y+8);
2269                 cairo_stroke(c);
2270               }
2271             }break;
2272             case OC_MODE_INTER_MV_LAST:{
2273               if(_dec->telemetry_mbmode&0x08){
2274                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2275                 cairo_set_source_rgba(c,0,1.,0,.5);
2276                 cairo_move_to(c,x+13.5,y+2.5);
2277                 cairo_line_to(c,x+2.5,y+8);
2278                 cairo_line_to(c,x+13.5,y+13.5);
2279                 cairo_stroke(c);
2280               }
2281               if(_dec->telemetry_mv&0x08){
2282                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2283                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2284                 cairo_set_line_width(c,3.);
2285                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2286                 cairo_stroke_preserve(c);
2287                 cairo_set_line_width(c,2.);
2288                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2289                 cairo_stroke_preserve(c);
2290                 cairo_set_line_width(c,1.);
2291                 cairo_line_to(c,x+8,y+8);
2292                 cairo_stroke(c);
2293               }
2294             }break;
2295             case OC_MODE_INTER_MV_LAST2:{
2296               if(_dec->telemetry_mbmode&0x10){
2297                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2298                 cairo_set_source_rgba(c,0,1.,0,.5);
2299                 cairo_move_to(c,x+8,y+2.5);
2300                 cairo_line_to(c,x+2.5,y+8);
2301                 cairo_line_to(c,x+8,y+13.5);
2302                 cairo_move_to(c,x+13.5,y+2.5);
2303                 cairo_line_to(c,x+8,y+8);
2304                 cairo_line_to(c,x+13.5,y+13.5);
2305                 cairo_stroke(c);
2306               }
2307               if(_dec->telemetry_mv&0x10){
2308                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2309                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2310                 cairo_set_line_width(c,3.);
2311                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2312                 cairo_stroke_preserve(c);
2313                 cairo_set_line_width(c,2.);
2314                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2315                 cairo_stroke_preserve(c);
2316                 cairo_set_line_width(c,1.);
2317                 cairo_line_to(c,x+8,y+8);
2318                 cairo_stroke(c);
2319               }
2320             }break;
2321             case OC_MODE_GOLDEN_NOMV:{
2322               if(_dec->telemetry_mbmode&0x20){
2323                 cairo_set_source_rgba(c,1.,1.,0,.5);
2324                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2325                 cairo_stroke_preserve(c);
2326                 cairo_set_source_rgba(c,1.,1.,0,.25);
2327                 cairo_fill(c);
2328               }
2329             }break;
2330             case OC_MODE_GOLDEN_MV:{
2331               if(_dec->telemetry_mbmode&0x40){
2332                 cairo_rectangle(c,x+2.5,y+2.5,11,11);
2333                 cairo_set_source_rgba(c,1.,1.,0,.5);
2334                 cairo_stroke(c);
2335               }
2336               if(_dec->telemetry_mv&0x40){
2337                 cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
2338                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2339                 cairo_set_line_width(c,3.);
2340                 cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
2341                 cairo_stroke_preserve(c);
2342                 cairo_set_line_width(c,2.);
2343                 cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
2344                 cairo_stroke_preserve(c);
2345                 cairo_set_line_width(c,1.);
2346                 cairo_line_to(c,x+8,y+8);
2347                 cairo_stroke(c);
2348               }
2349             }break;
2350             case OC_MODE_INTER_MV_FOUR:{
2351               if(_dec->telemetry_mbmode&0x80){
2352                 cairo_rectangle(c,x+2.5,y+2.5,4,4);
2353                 cairo_rectangle(c,x+9.5,y+2.5,4,4);
2354                 cairo_rectangle(c,x+2.5,y+9.5,4,4);
2355                 cairo_rectangle(c,x+9.5,y+9.5,4,4);
2356                 cairo_set_source_rgba(c,0,1.,0,.5);
2357                 cairo_stroke(c);
2358               }
2359               /*4mv is odd, coded in raster order.*/
2360               fragi=mb_maps[mbi][0][0];
2361               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2362                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2363                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2364                 cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
2365                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2366                 cairo_set_line_width(c,3.);
2367                 cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
2368                 cairo_stroke_preserve(c);
2369                 cairo_set_line_width(c,2.);
2370                 cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
2371                 cairo_stroke_preserve(c);
2372                 cairo_set_line_width(c,1.);
2373                 cairo_line_to(c,x+4,y+12);
2374                 cairo_stroke(c);
2375               }
2376               fragi=mb_maps[mbi][0][1];
2377               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2378                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2379                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2380                 cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
2381                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2382                 cairo_set_line_width(c,3.);
2383                 cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
2384                 cairo_stroke_preserve(c);
2385                 cairo_set_line_width(c,2.);
2386                 cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
2387                 cairo_stroke_preserve(c);
2388                 cairo_set_line_width(c,1.);
2389                 cairo_line_to(c,x+12,y+12);
2390                 cairo_stroke(c);
2391               }
2392               fragi=mb_maps[mbi][0][2];
2393               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2394                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2395                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2396                 cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
2397                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2398                 cairo_set_line_width(c,3.);
2399                 cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
2400                 cairo_stroke_preserve(c);
2401                 cairo_set_line_width(c,2.);
2402                 cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
2403                 cairo_stroke_preserve(c);
2404                 cairo_set_line_width(c,1.);
2405                 cairo_line_to(c,x+4,y+4);
2406                 cairo_stroke(c);
2407               }
2408               fragi=mb_maps[mbi][0][3];
2409               if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2410                 frag_mvx=OC_MV_X(frag_mvs[fragi]);
2411                 frag_mvx=OC_MV_Y(frag_mvs[fragi]);
2412                 cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
2413                 cairo_set_source_rgba(c,1.,1.,1.,.9);
2414                 cairo_set_line_width(c,3.);
2415                 cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
2416                 cairo_stroke_preserve(c);
2417                 cairo_set_line_width(c,2.);
2418                 cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
2419                 cairo_stroke_preserve(c);
2420                 cairo_set_line_width(c,1.);
2421                 cairo_line_to(c,x+12,y+4);
2422                 cairo_stroke(c);
2423               }
2424             }break;
2425           }
2426         }
2427       }
2428       /*qii illustration.*/
2429       if(_dec->telemetry_qi&0x2){
2430         cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2431         for(bi=0;bi<4;bi++){
2432           ptrdiff_t fragi;
2433           int       qiv;
2434           int       xp;
2435           int       yp;
2436           xp=x+(bi&1)*8;
2437           yp=y+8-(bi&2)*4;
2438           fragi=mb_maps[mbi][0][bi];
2439           if(fragi>=0&&frags[fragi].coded){
2440             qiv=qim[frags[fragi].qii];
2441             cairo_set_line_width(c,3.);
2442             cairo_set_source_rgba(c,0.,0.,0.,.5);
2443             switch(qiv){
2444               /*Double plus:*/
2445               case 2:{
2446                 if((bi&1)^((bi&2)>>1)){
2447                   cairo_move_to(c,xp+2.5,yp+1.5);
2448                   cairo_line_to(c,xp+2.5,yp+3.5);
2449                   cairo_move_to(c,xp+1.5,yp+2.5);
2450                   cairo_line_to(c,xp+3.5,yp+2.5);
2451                   cairo_move_to(c,xp+5.5,yp+4.5);
2452                   cairo_line_to(c,xp+5.5,yp+6.5);
2453                   cairo_move_to(c,xp+4.5,yp+5.5);
2454                   cairo_line_to(c,xp+6.5,yp+5.5);
2455                   cairo_stroke_preserve(c);
2456                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2457                 }
2458                 else{
2459                   cairo_move_to(c,xp+5.5,yp+1.5);
2460                   cairo_line_to(c,xp+5.5,yp+3.5);
2461                   cairo_move_to(c,xp+4.5,yp+2.5);
2462                   cairo_line_to(c,xp+6.5,yp+2.5);
2463                   cairo_move_to(c,xp+2.5,yp+4.5);
2464                   cairo_line_to(c,xp+2.5,yp+6.5);
2465                   cairo_move_to(c,xp+1.5,yp+5.5);
2466                   cairo_line_to(c,xp+3.5,yp+5.5);
2467                   cairo_stroke_preserve(c);
2468                   cairo_set_source_rgba(c,0.,1.,1.,1.);
2469                 }
2470               }break;
2471               /*Double minus:*/
2472               case -2:{
2473                 cairo_move_to(c,xp+2.5,yp+2.5);
2474                 cairo_line_to(c,xp+5.5,yp+2.5);
2475                 cairo_move_to(c,xp+2.5,yp+5.5);
2476                 cairo_line_to(c,xp+5.5,yp+5.5);
2477                 cairo_stroke_preserve(c);
2478                 cairo_set_source_rgba(c,1.,1.,1.,1.);
2479               }break;
2480               /*Plus:*/
2481               case 1:{
2482                 if((bi&2)==0)yp-=2;
2483                 if((bi&1)==0)xp-=2;
2484                 cairo_move_to(c,xp+4.5,yp+2.5);
2485                 cairo_line_to(c,xp+4.5,yp+6.5);
2486                 cairo_move_to(c,xp+2.5,yp+4.5);
2487                 cairo_line_to(c,xp+6.5,yp+4.5);
2488                 cairo_stroke_preserve(c);
2489                 cairo_set_source_rgba(c,.1,1.,.3,1.);
2490                 break;
2491               }
2492               /*Fall through.*/
2493               /*Minus:*/
2494               case -1:{
2495                 cairo_move_to(c,xp+2.5,yp+4.5);
2496                 cairo_line_to(c,xp+6.5,yp+4.5);
2497                 cairo_stroke_preserve(c);
2498                 cairo_set_source_rgba(c,1.,.3,.1,1.);
2499               }break;
2500               default:continue;
2501             }
2502             cairo_set_line_width(c,1.);
2503             cairo_stroke(c);
2504           }
2505         }
2506       }
2507       col2++;
2508       if((col2>>1)>=_dec->state.nhmbs){
2509         col2=0;
2510         row2+=2;
2511       }
2512     }
2513     /*Bit usage indicator[s]:*/
2514     if(_dec->telemetry_bits){
2515       int widths[6];
2516       int fpsn;
2517       int fpsd;
2518       int mult;
2519       int fullw;
2520       int padw;
2521       int i;
2522       fpsn=_dec->state.info.fps_numerator;
2523       fpsd=_dec->state.info.fps_denominator;
2524       mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2525       fullw=250.f*h*fpsd*mult/fpsn;
2526       padw=w-24;
2527       /*Header and coded block bits.*/
2528       if(_dec->telemetry_frame_bytes<0||
2529        _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2530         _dec->telemetry_frame_bytes=0;
2531       }
2532       if(_dec->telemetry_coding_bytes<0||
2533        _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2534         _dec->telemetry_coding_bytes=0;
2535       }
2536       if(_dec->telemetry_mode_bytes<0||
2537        _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2538         _dec->telemetry_mode_bytes=0;
2539       }
2540       if(_dec->telemetry_mv_bytes<0||
2541        _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2542         _dec->telemetry_mv_bytes=0;
2543       }
2544       if(_dec->telemetry_qi_bytes<0||
2545        _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2546         _dec->telemetry_qi_bytes=0;
2547       }
2548       if(_dec->telemetry_dc_bytes<0||
2549        _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2550         _dec->telemetry_dc_bytes=0;
2551       }
2552       widths[0]=padw*
2553        (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2554       widths[1]=padw*
2555        (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2556       widths[2]=padw*
2557        (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2558       widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2559       widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2560       widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2561       for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2562       cairo_set_source_rgba(c,.0,.0,.0,.6);
2563       cairo_rectangle(c,10,h-33,widths[0]+1,5);
2564       cairo_rectangle(c,10,h-29,widths[1]+1,5);
2565       cairo_rectangle(c,10,h-25,widths[2]+1,5);
2566       cairo_rectangle(c,10,h-21,widths[3]+1,5);
2567       cairo_rectangle(c,10,h-17,widths[4]+1,5);
2568       cairo_rectangle(c,10,h-13,widths[5]+1,5);
2569       cairo_fill(c);
2570       cairo_set_source_rgb(c,1,0,0);
2571       cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2572       cairo_fill(c);
2573       cairo_set_source_rgb(c,0,1,0);
2574       cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2575       cairo_fill(c);
2576       cairo_set_source_rgb(c,0,0,1);
2577       cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2578       cairo_fill(c);
2579       cairo_set_source_rgb(c,.6,.4,.0);
2580       cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2581       cairo_fill(c);
2582       cairo_set_source_rgb(c,.3,.3,.3);
2583       cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2584       cairo_fill(c);
2585       cairo_set_source_rgb(c,.5,.5,.8);
2586       cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2587       cairo_fill(c);
2588     }
2589     /*Master qi indicator[s]:*/
2590     if(_dec->telemetry_qi&0x1){
2591       cairo_text_extents_t extents;
2592       char                 buffer[10];
2593       int                  p;
2594       int                  y;
2595       p=0;
2596       y=h-7.5;
2597       if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2598       buffer[p++]=48+_dec->state.qis[0]%10;
2599       if(_dec->state.nqis>=2){
2600         buffer[p++]=' ';
2601         if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2602         buffer[p++]=48+_dec->state.qis[1]%10;
2603       }
2604       if(_dec->state.nqis==3){
2605         buffer[p++]=' ';
2606         if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2607         buffer[p++]=48+_dec->state.qis[2]%10;
2608       }
2609       buffer[p++]='\0';
2610       cairo_select_font_face(c,"sans",
2611        CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2612       cairo_set_font_size(c,18);
2613       cairo_text_extents(c,buffer,&extents);
2614       cairo_set_source_rgb(c,1,1,1);
2615       cairo_move_to(c,w-extents.x_advance-10,y);
2616       cairo_show_text(c,buffer);
2617       cairo_set_source_rgb(c,0,0,0);
2618       cairo_move_to(c,w-extents.x_advance-10,y);
2619       cairo_text_path(c,buffer);
2620       cairo_set_line_width(c,.8);
2621       cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2622       cairo_stroke(c);
2623     }
2624     cairo_destroy(c);
2625   }
2626   /*Out of the Cairo plane into the telemetry YUV buffer.*/
2627   _ycbcr[0].data=_dec->telemetry_frame_data;
2628   _ycbcr[0].stride=_ycbcr[0].width;
2629   _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2630   _ycbcr[1].stride=_ycbcr[1].width;
2631   _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2632   _ycbcr[2].stride=_ycbcr[2].width;
2633   y_row=_ycbcr[0].data;
2634   u_row=_ycbcr[1].data;
2635   v_row=_ycbcr[2].data;
2636   rgb_row=data;
2637   /*This is one of the few places it's worth handling chroma on a
2638      case-by-case basis.*/
2639   switch(_dec->state.info.pixel_fmt){
2640     case TH_PF_420:{
2641       for(y=0;y<h;y+=2){
2642         unsigned char *y_row2;
2643         unsigned char *rgb_row2;
2644         y_row2=y_row+_ycbcr[0].stride;
2645         rgb_row2=rgb_row+cstride;
2646         for(x=0;x<w;x+=2){
2647           int y;
2648           int u;
2649           int v;
2650           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2651            +24966*rgb_row[4*x+0]+4207500)/255000;
2652           y_row[x]=OC_CLAMP255(y);
2653           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2654            +24966*rgb_row[4*x+4]+4207500)/255000;
2655           y_row[x+1]=OC_CLAMP255(y);
2656           y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2657            +24966*rgb_row2[4*x+0]+4207500)/255000;
2658           y_row2[x]=OC_CLAMP255(y);
2659           y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2660            +24966*rgb_row2[4*x+4]+4207500)/255000;
2661           y_row2[x+1]=OC_CLAMP255(y);
2662           u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2663            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2664            -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2665            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2666            +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2667            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2668           v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2669            +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2670            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2671             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2672            -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2673             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2674           u_row[x>>1]=OC_CLAMP255(u);
2675           v_row[x>>1]=OC_CLAMP255(v);
2676         }
2677         y_row+=_ycbcr[0].stride<<1;
2678         u_row+=_ycbcr[1].stride;
2679         v_row+=_ycbcr[2].stride;
2680         rgb_row+=cstride<<1;
2681       }
2682     }break;
2683     case TH_PF_422:{
2684       for(y=0;y<h;y++){
2685         for(x=0;x<w;x+=2){
2686           int y;
2687           int u;
2688           int v;
2689           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2690            +24966*rgb_row[4*x+0]+4207500)/255000;
2691           y_row[x]=OC_CLAMP255(y);
2692           y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2693            +24966*rgb_row[4*x+4]+4207500)/255000;
2694           y_row[x+1]=OC_CLAMP255(y);
2695           u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2696            -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2697            +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2698           v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2699            -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2700            -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2701           u_row[x>>1]=OC_CLAMP255(u);
2702           v_row[x>>1]=OC_CLAMP255(v);
2703         }
2704         y_row+=_ycbcr[0].stride;
2705         u_row+=_ycbcr[1].stride;
2706         v_row+=_ycbcr[2].stride;
2707         rgb_row+=cstride;
2708       }
2709     }break;
2710     /*case TH_PF_444:*/
2711     default:{
2712       for(y=0;y<h;y++){
2713         for(x=0;x<w;x++){
2714           int y;
2715           int u;
2716           int v;
2717           y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2718            +24966*rgb_row[4*x+0]+4207500)/255000;
2719           u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2720            +99232*rgb_row[4*x+0]+29032005)/225930;
2721           v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2722            -25536*rgb_row[4*x+0]+45940035)/357510;
2723           y_row[x]=OC_CLAMP255(y);
2724           u_row[x]=OC_CLAMP255(u);
2725           v_row[x]=OC_CLAMP255(v);
2726         }
2727         y_row+=_ycbcr[0].stride;
2728         u_row+=_ycbcr[1].stride;
2729         v_row+=_ycbcr[2].stride;
2730         rgb_row+=cstride;
2731       }
2732     }break;
2733   }
2734   /*Finished.
2735     Destroy the surface.*/
2736   cairo_surface_destroy(cs);
2737 }
2738 #endif
2739 
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2740 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2741  ogg_int64_t *_granpos){
2742   int ret;
2743   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2744   /*A completely empty packet indicates a dropped frame and is treated exactly
2745      like an inter frame with no coded blocks.*/
2746   if(_op->bytes==0){
2747     _dec->state.frame_type=OC_INTER_FRAME;
2748     _dec->state.ntotal_coded_fragis=0;
2749   }
2750   else{
2751     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2752     ret=oc_dec_frame_header_unpack(_dec);
2753     if(ret<0)return ret;
2754     if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
2755     else oc_dec_coded_flags_unpack(_dec);
2756   }
2757   /*If there have been no reference frames, and we need one, initialize one.*/
2758   if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2759    (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2760    _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2761     oc_dec_init_dummy_frame(_dec);
2762   }
2763   /*If this was an inter frame with no coded blocks...*/
2764   if(_dec->state.ntotal_coded_fragis<=0){
2765     /*Just update the granule position and return.*/
2766     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2767      _dec->state.info.keyframe_granule_shift)
2768      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2769     _dec->state.curframe_num++;
2770     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2771     return TH_DUPFRAME;
2772   }
2773   else{
2774     th_ycbcr_buffer stripe_buf;
2775     int             stripe_fragy;
2776     int             refi;
2777     int             pli;
2778     int             notstart;
2779     int             notdone;
2780 #ifdef HAVE_CAIRO
2781     int             telemetry;
2782     /*Save the current telemetry state.
2783       This prevents it from being modified in the middle of decoding this
2784        frame, which could cause us to skip calls to the striped decoding
2785        callback.*/
2786     telemetry=_dec->telemetry_mbmode||_dec->telemetry_mv||
2787      _dec->telemetry_qi||_dec->telemetry_bits;
2788 #endif
2789     /*Select a free buffer to use for the reconstructed version of this frame.*/
2790     for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2791      refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2792     _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2793     _dec->state.ref_frame_data[OC_FRAME_SELF]=
2794      _dec->state.ref_frame_bufs[refi][0].data;
2795 #if defined(HAVE_CAIRO)
2796     _dec->telemetry_frame_bytes=_op->bytes;
2797 #endif
2798     if(_dec->state.frame_type==OC_INTRA_FRAME){
2799       _dec->state.keyframe_num=_dec->state.curframe_num;
2800 #if defined(HAVE_CAIRO)
2801       _dec->telemetry_coding_bytes=
2802        _dec->telemetry_mode_bytes=
2803        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2804 #endif
2805     }
2806     else{
2807 #if defined(HAVE_CAIRO)
2808       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2809 #endif
2810       oc_dec_mb_modes_unpack(_dec);
2811 #if defined(HAVE_CAIRO)
2812       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2813 #endif
2814       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2815 #if defined(HAVE_CAIRO)
2816       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2817 #endif
2818     }
2819     oc_dec_block_qis_unpack(_dec);
2820 #if defined(HAVE_CAIRO)
2821     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2822 #endif
2823     oc_dec_residual_tokens_unpack(_dec);
2824     /*Update granule position.
2825       This must be done before the striped decode callbacks so that the
2826        application knows what to do with the frame data.*/
2827     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2828      _dec->state.info.keyframe_granule_shift)
2829      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2830     _dec->state.curframe_num++;
2831     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2832     /*All of the rest of the operations -- DC prediction reversal,
2833        reconstructing coded fragments, copying uncoded fragments, loop
2834        filtering, extending borders, and out-of-loop post-processing -- should
2835        be pipelined.
2836       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2837        copying are done for one or two super block rows, then loop filtering is
2838        run as far as it can, then bordering copying, then post-processing.
2839       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2840        block rows, and one chroma.
2841       Otherwise, an MCU consists of one super block row from each plane.
2842       Inside each MCU, we perform all of the steps on one color plane before
2843        moving on to the next.
2844       After reconstruction, the additional filtering stages introduce a delay
2845        since they need some pixels from the next fragment row.
2846       Thus the actual number of decoded rows available is slightly smaller for
2847        the first MCU, and slightly larger for the last.
2848 
2849       This entire process allows us to operate on the data while it is still in
2850        cache, resulting in big performance improvements.
2851       An application callback allows further application processing (blitting
2852        to video memory, color conversion, etc.) to also use the data while it's
2853        in cache.*/
2854     oc_dec_pipeline_init(_dec,&_dec->pipe);
2855     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2856     notstart=0;
2857     notdone=1;
2858     for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
2859       int avail_fragy0;
2860       int avail_fragy_end;
2861       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2862       notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
2863       for(pli=0;pli<3;pli++){
2864         oc_fragment_plane *fplane;
2865         int                frag_shift;
2866         int                pp_offset;
2867         int                sdelay;
2868         int                edelay;
2869         fplane=_dec->state.fplanes+pli;
2870         /*Compute the first and last fragment row of the current MCU for this
2871            plane.*/
2872         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2873         _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2874         _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2875          _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
2876         oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
2877         oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
2878         sdelay=edelay=0;
2879         if(_dec->pipe.loop_filter){
2880           sdelay+=notstart;
2881           edelay+=notdone;
2882           oc_state_loop_filter_frag_rows(&_dec->state,
2883            _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
2884            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2885         }
2886         /*To fill the borders, we have an additional two pixel delay, since a
2887            fragment in the next row could filter its top edge, using two pixels
2888            from a fragment in this row.
2889           But there's no reason to delay a full fragment between the two.*/
2890         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2891          (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2892          (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2893         /*Out-of-loop post-processing.*/
2894         pp_offset=3*(pli!=0);
2895         if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2896           /*Perform de-blocking in one plane.*/
2897           sdelay+=notstart;
2898           edelay+=notdone;
2899           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2900            _dec->state.ref_frame_bufs[refi],pli,
2901            _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2902           if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2903             /*Perform de-ringing in one plane.*/
2904             sdelay+=notstart;
2905             edelay+=notdone;
2906             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2907              _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
2908           }
2909         }
2910         /*If no post-processing is done, we still need to delay a row for the
2911            loop filter, thanks to the strange filtering order VP3 chose.*/
2912         else if(_dec->pipe.loop_filter){
2913           sdelay+=notstart;
2914           edelay+=notdone;
2915         }
2916         /*Compute the intersection of the available rows in all planes.
2917           If chroma is sub-sampled, the effect of each of its delays is
2918            doubled, but luma might have more post-processing filters enabled
2919            than chroma, so we don't know up front which one is the limiting
2920            factor.*/
2921         avail_fragy0=OC_MINI(avail_fragy0,
2922          _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
2923         avail_fragy_end=OC_MINI(avail_fragy_end,
2924          _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
2925       }
2926 #ifdef HAVE_CAIRO
2927       if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
2928 #else
2929       if(_dec->stripe_cb.stripe_decoded!=NULL){
2930 #endif
2931         /*The callback might want to use the FPU, so let's make sure they can.
2932           We violate all kinds of ABI restrictions by not doing this until
2933            now, but none of them actually matter since we don't use floating
2934            point ourselves.*/
2935         oc_restore_fpu(&_dec->state);
2936         /*Make the callback, ensuring we flip the sense of the "start" and
2937            "end" of the available region upside down.*/
2938         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2939          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2940          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2941       }
2942       notstart=1;
2943     }
2944     /*Finish filling in the reference frame borders.*/
2945     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2946     /*Update the reference frame indices.*/
2947     if(_dec->state.frame_type==OC_INTRA_FRAME){
2948       /*The new frame becomes both the previous and gold reference frames.*/
2949       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2950        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2951        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2952       _dec->state.ref_frame_data[OC_FRAME_GOLD]=
2953        _dec->state.ref_frame_data[OC_FRAME_PREV]=
2954        _dec->state.ref_frame_data[OC_FRAME_SELF];
2955     }
2956     else{
2957       /*Otherwise, just replace the previous reference frame.*/
2958       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2959        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2960       _dec->state.ref_frame_data[OC_FRAME_PREV]=
2961        _dec->state.ref_frame_data[OC_FRAME_SELF];
2962     }
2963     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2964        gamma values, if nothing else).*/
2965     oc_restore_fpu(&_dec->state);
2966 #ifdef HAVE_CAIRO
2967     /*If telemetry ioctls are active, we need to draw to the output buffer.*/
2968     if(telemetry){
2969       oc_render_telemetry(_dec,stripe_buf,telemetry);
2970       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
2971       /*If we had a striped decoding callback, we skipped calling it above
2972          (because the telemetry wasn't rendered yet).
2973         Call it now with the whole frame.*/
2974       if(_dec->stripe_cb.stripe_decoded!=NULL){
2975         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
2976          stripe_buf,0,_dec->state.fplanes[0].nvfrags);
2977       }
2978     }
2979 #endif
2980 #if defined(OC_DUMP_IMAGES)
2981     /*We only dump images if there were some coded blocks.*/
2982     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2983 #endif
2984     return 0;
2985   }
2986 }
2987 
2988 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2989   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2990   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2991   return 0;
2992 }
2993