1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "decint.h"
22 #if defined(OC_DUMP_IMAGES)
23 # include <stdio.h>
24 # include "png.h"
25 #endif
26 #if defined(HAVE_CAIRO)
27 # include <cairo.h>
28 #endif
29 
30 
31 /*No post-processing.*/
32 #define OC_PP_LEVEL_DISABLED  (0)
33 /*Keep track of DC qi for each block only.*/
34 #define OC_PP_LEVEL_TRACKDCQI (1)
35 /*Deblock the luma plane.*/
36 #define OC_PP_LEVEL_DEBLOCKY  (2)
37 /*Dering the luma plane.*/
38 #define OC_PP_LEVEL_DERINGY   (3)
39 /*Stronger luma plane deringing.*/
40 #define OC_PP_LEVEL_SDERINGY  (4)
41 /*Deblock the chroma planes.*/
42 #define OC_PP_LEVEL_DEBLOCKC  (5)
43 /*Dering the chroma planes.*/
44 #define OC_PP_LEVEL_DERINGC   (6)
45 /*Stronger chroma plane deringing.*/
46 #define OC_PP_LEVEL_SDERINGC  (7)
47 /*Maximum valid post-processing level.*/
48 #define OC_PP_LEVEL_MAX       (7)
49 
50 
51 
52 /*The mode alphabets for the various mode coding schemes.
53   Scheme 0 uses a custom alphabet, which is not stored in this table.*/
54 static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
55   /*Last MV dominates */
56   {
57     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
58     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
59     OC_MODE_INTER_MV_FOUR
60   },
61   {
62     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
63     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
64     OC_MODE_INTER_MV_FOUR
65   },
66   {
67     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
68     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
69     OC_MODE_INTER_MV_FOUR
70   },
71   {
72     OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
73     OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
74     OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
75   },
76   /*No MV dominates.*/
77   {
78     OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
79     OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
80     OC_MODE_INTER_MV_FOUR
81   },
82   {
83     OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
84     OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
85     OC_MODE_INTER_MV_FOUR
86   },
87   /*Default ordering.*/
88   {
89     OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
90     OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
91     OC_MODE_INTER_MV_FOUR
92   }
93 };
94 
95 
96 /*The original DCT tokens are extended and reordered during the construction of
97    the Huffman tables.
98   The extension means more bits can be read with fewer calls to the bitpacker
99    during the Huffman decoding process (at the cost of larger Huffman tables),
100    and fewer tokens require additional extra bits (reducing the average storage
101    per decoded token).
102   The revised ordering reveals essential information in the token value
103    itself; specifically, whether or not there are additional extra bits to read
104    and the parameter to which those extra bits are applied.
105   The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
106   The extra bits are added into code word at the bit position inferred from the
107    token value, giving the final code word from which all required parameters
108    are derived.
109   The number of EOBs and the leading zero run length can be extracted directly.
110   The coefficient magnitude is optionally negated before extraction, according
111    to a 'flip' bit.*/
112 
113 /*The number of additional extra bits that are decoded with each of the
114    internal DCT tokens.*/
115 static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
116   12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
117 };
118 
119 /*Whether or not an internal token needs any additional extra bits.*/
120 #define OC_DCT_TOKEN_NEEDS_MORE(token) \
121  (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
122   sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
123 
124 /*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
125 #define OC_DCT_TOKEN_FAT_EOB (0)
126 
127 /*The number of EOBs to use for an end-of-frame token.
128   Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
129    is not yet available everywhere; this should be equivalent.*/
130 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
131 
132 /*The location of the (6) run legth bits in the code word.
133   These are placed at index 0 and given 8 bits (even though 6 would suffice)
134    because it may be faster to extract the lower byte on some platforms.*/
135 #define OC_DCT_CW_RLEN_SHIFT (0)
136 /*The location of the (12) EOB bits in the code word.*/
137 #define OC_DCT_CW_EOB_SHIFT  (8)
138 /*The location of the (1) flip bit in the code word.
139   This must be right under the magnitude bits.*/
140 #define OC_DCT_CW_FLIP_BIT   (20)
141 /*The location of the (11) token magnitude bits in the code word.
142   These must be last, and rely on a sign-extending right shift.*/
143 #define OC_DCT_CW_MAG_SHIFT  (21)
144 
145 /*Pack the given fields into a code word.*/
146 #define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
147  ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
148  (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
149  (_flip)<<OC_DCT_CW_FLIP_BIT| \
150  (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
151 
152 /*A special code word value that signals the end of the frame (a long EOB run
153    of zero).*/
154 #define OC_DCT_CW_FINISH (0)
155 
156 /*The position at which to insert the extra bits in the code word.
157   We use this formulation because Intel has no useful cmov.
158   A real architecture would probably do better with two of those.
159   This translates to 11 instructions(!), and is _still_ faster than either a
160    table lookup (just barely) or the naive double-ternary implementation (which
161    gcc translates to a jump and a cmov).
162   This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
163    you want to make one of the other shifts zero.*/
164 #define OC_DCT_TOKEN_EB_POS(_token) \
165  ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
166  +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
167 
168 /*The code words for each internal token.
169   See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
170    order.*/
171 static const ogg_int32_t OC_DCT_CODE_WORD[92]={
172   /*These tokens require additional extra bits for the EOB count.*/
173   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
174   OC_DCT_CW_FINISH,
175   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
176   OC_DCT_CW_PACK(16, 0,  0,0),
177   /*These tokens require additional extra bits for the magnitude.*/
178   /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
179   OC_DCT_CW_PACK( 0, 0, 13,0),
180   OC_DCT_CW_PACK( 0, 0, 13,1),
181   /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
182   OC_DCT_CW_PACK( 0, 0, 21,0),
183   OC_DCT_CW_PACK( 0, 0, 21,1),
184   /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
185   OC_DCT_CW_PACK( 0, 0, 37,0),
186   OC_DCT_CW_PACK( 0, 0, 37,1),
187   /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
188   OC_DCT_CW_PACK( 0, 0, 69,0),
189   OC_DCT_CW_PACK( 0, 0,325,0),
190   OC_DCT_CW_PACK( 0, 0, 69,1),
191   OC_DCT_CW_PACK( 0, 0,325,1),
192   /*These tokens require additional extra bits for the run length.*/
193   /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
194   OC_DCT_CW_PACK( 0,10, +1,0),
195   OC_DCT_CW_PACK( 0,10, -1,0),
196   /*OC_DCT_ZRL_TOKEN (6 extra bits)
197     Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
198   OC_DCT_CW_PACK( 0, 0,  0,1),
199   /*The remaining tokens require no additional extra bits.*/
200   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
201   OC_DCT_CW_PACK( 1, 0,  0,0),
202   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
203   OC_DCT_CW_PACK( 2, 0,  0,0),
204   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
205   OC_DCT_CW_PACK( 3, 0,  0,0),
206   /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
207   OC_DCT_CW_PACK( 0, 1, +1,0),
208   OC_DCT_CW_PACK( 0, 1, -1,0),
209   OC_DCT_CW_PACK( 0, 2, +1,0),
210   OC_DCT_CW_PACK( 0, 2, -1,0),
211   OC_DCT_CW_PACK( 0, 3, +1,0),
212   OC_DCT_CW_PACK( 0, 3, -1,0),
213   OC_DCT_CW_PACK( 0, 4, +1,0),
214   OC_DCT_CW_PACK( 0, 4, -1,0),
215   OC_DCT_CW_PACK( 0, 5, +1,0),
216   OC_DCT_CW_PACK( 0, 5, -1,0),
217   /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
218   OC_DCT_CW_PACK( 0, 1, +2,0),
219   OC_DCT_CW_PACK( 0, 1, +3,0),
220   OC_DCT_CW_PACK( 0, 1, -2,0),
221   OC_DCT_CW_PACK( 0, 1, -3,0),
222   /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
223   OC_DCT_CW_PACK( 0, 6, +1,0),
224   OC_DCT_CW_PACK( 0, 7, +1,0),
225   OC_DCT_CW_PACK( 0, 8, +1,0),
226   OC_DCT_CW_PACK( 0, 9, +1,0),
227   OC_DCT_CW_PACK( 0, 6, -1,0),
228   OC_DCT_CW_PACK( 0, 7, -1,0),
229   OC_DCT_CW_PACK( 0, 8, -1,0),
230   OC_DCT_CW_PACK( 0, 9, -1,0),
231   /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
232   OC_DCT_CW_PACK( 0, 2, +2,0),
233   OC_DCT_CW_PACK( 0, 3, +2,0),
234   OC_DCT_CW_PACK( 0, 2, +3,0),
235   OC_DCT_CW_PACK( 0, 3, +3,0),
236   OC_DCT_CW_PACK( 0, 2, -2,0),
237   OC_DCT_CW_PACK( 0, 3, -2,0),
238   OC_DCT_CW_PACK( 0, 2, -3,0),
239   OC_DCT_CW_PACK( 0, 3, -3,0),
240   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
241     Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
242   OC_DCT_CW_PACK( 0, 0,  0,1),
243   OC_DCT_CW_PACK( 0, 1,  0,0),
244   OC_DCT_CW_PACK( 0, 2,  0,0),
245   OC_DCT_CW_PACK( 0, 3,  0,0),
246   OC_DCT_CW_PACK( 0, 4,  0,0),
247   OC_DCT_CW_PACK( 0, 5,  0,0),
248   OC_DCT_CW_PACK( 0, 6,  0,0),
249   OC_DCT_CW_PACK( 0, 7,  0,0),
250   /*OC_ONE_TOKEN (0 extra bits)*/
251   OC_DCT_CW_PACK( 0, 0, +1,0),
252   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
253   OC_DCT_CW_PACK( 0, 0, -1,0),
254   /*OC_TWO_TOKEN (0 extra bits)*/
255   OC_DCT_CW_PACK( 0, 0, +2,0),
256   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
257   OC_DCT_CW_PACK( 0, 0, -2,0),
258   /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
259   OC_DCT_CW_PACK( 0, 0, +3,0),
260   OC_DCT_CW_PACK( 0, 0, -3,0),
261   OC_DCT_CW_PACK( 0, 0, +4,0),
262   OC_DCT_CW_PACK( 0, 0, -4,0),
263   OC_DCT_CW_PACK( 0, 0, +5,0),
264   OC_DCT_CW_PACK( 0, 0, -5,0),
265   OC_DCT_CW_PACK( 0, 0, +6,0),
266   OC_DCT_CW_PACK( 0, 0, -6,0),
267   /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
268   OC_DCT_CW_PACK( 0, 0, +7,0),
269   OC_DCT_CW_PACK( 0, 0, +8,0),
270   OC_DCT_CW_PACK( 0, 0, -7,0),
271   OC_DCT_CW_PACK( 0, 0, -8,0),
272   /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
273   OC_DCT_CW_PACK( 0, 0, +9,0),
274   OC_DCT_CW_PACK( 0, 0,+10,0),
275   OC_DCT_CW_PACK( 0, 0,+11,0),
276   OC_DCT_CW_PACK( 0, 0,+12,0),
277   OC_DCT_CW_PACK( 0, 0, -9,0),
278   OC_DCT_CW_PACK( 0, 0,-10,0),
279   OC_DCT_CW_PACK( 0, 0,-11,0),
280   OC_DCT_CW_PACK( 0, 0,-12,0),
281   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
282   OC_DCT_CW_PACK( 8, 0,  0,0),
283   OC_DCT_CW_PACK( 9, 0,  0,0),
284   OC_DCT_CW_PACK(10, 0,  0,0),
285   OC_DCT_CW_PACK(11, 0,  0,0),
286   OC_DCT_CW_PACK(12, 0,  0,0),
287   OC_DCT_CW_PACK(13, 0,  0,0),
288   OC_DCT_CW_PACK(14, 0,  0,0),
289   OC_DCT_CW_PACK(15, 0,  0,0),
290   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
291   OC_DCT_CW_PACK( 4, 0,  0,0),
292   OC_DCT_CW_PACK( 5, 0,  0,0),
293   OC_DCT_CW_PACK( 6, 0,  0,0),
294   OC_DCT_CW_PACK( 7, 0,  0,0),
295 };
296 
297 
298 
oc_sb_run_unpack(oc_pack_buf * _opb)299 static int oc_sb_run_unpack(oc_pack_buf *_opb){
300   long bits;
301   int ret;
302   /*Coding scheme:
303        Codeword            Run Length
304      0                       1
305      10x                     2-3
306      110x                    4-5
307      1110xx                  6-9
308      11110xxx                10-17
309      111110xxxx              18-33
310      111111xxxxxxxxxxxx      34-4129*/
311   bits=oc_pack_read1(_opb);
312   if(bits==0)return 1;
313   bits=oc_pack_read(_opb,2);
314   if((bits&2)==0)return 2+(int)bits;
315   else if((bits&1)==0){
316     bits=oc_pack_read1(_opb);
317     return 4+(int)bits;
318   }
319   bits=oc_pack_read(_opb,3);
320   if((bits&4)==0)return 6+(int)bits;
321   else if((bits&2)==0){
322     ret=10+((bits&1)<<2);
323     bits=oc_pack_read(_opb,2);
324     return ret+(int)bits;
325   }
326   else if((bits&1)==0){
327     bits=oc_pack_read(_opb,4);
328     return 18+(int)bits;
329   }
330   bits=oc_pack_read(_opb,12);
331   return 34+(int)bits;
332 }
333 
oc_block_run_unpack(oc_pack_buf * _opb)334 static int oc_block_run_unpack(oc_pack_buf *_opb){
335   long bits;
336   long bits2;
337   /*Coding scheme:
338      Codeword             Run Length
339      0x                      1-2
340      10x                     3-4
341      110x                    5-6
342      1110xx                  7-10
343      11110xx                 11-14
344      11111xxxx               15-30*/
345   bits=oc_pack_read(_opb,2);
346   if((bits&2)==0)return 1+(int)bits;
347   else if((bits&1)==0){
348     bits=oc_pack_read1(_opb);
349     return 3+(int)bits;
350   }
351   bits=oc_pack_read(_opb,2);
352   if((bits&2)==0)return 5+(int)bits;
353   else if((bits&1)==0){
354     bits=oc_pack_read(_opb,2);
355     return 7+(int)bits;
356   }
357   bits=oc_pack_read(_opb,3);
358   if((bits&4)==0)return 11+bits;
359   bits2=oc_pack_read(_opb,2);
360   return 15+((bits&3)<<2)+bits2;
361 }
362 
363 
364 
oc_dec_init(oc_dec_ctx * _dec,const th_info * _info,const th_setup_info * _setup)365 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
366  const th_setup_info *_setup){
367   int qti;
368   int pli;
369   int qi;
370   int ret;
371   ret=oc_state_init(&_dec->state,_info,3);
372   if(ret<0)return ret;
373   ret=oc_huff_trees_copy(_dec->huff_tables,
374    (const oc_huff_node *const *)_setup->huff_tables);
375   if(ret<0){
376     oc_state_clear(&_dec->state);
377     return ret;
378   }
379   /*For each fragment, allocate one byte for every DCT coefficient token, plus
380      one byte for extra-bits for each token, plus one more byte for the long
381      EOB run, just in case it's the very last token and has a run length of
382      one.*/
383   _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
384    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
385   if(_dec->dct_tokens==NULL){
386     oc_huff_trees_clear(_dec->huff_tables);
387     oc_state_clear(&_dec->state);
388     return TH_EFAULT;
389   }
390   for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
391     _dec->state.dequant_tables[qi][pli][qti]=
392      _dec->state.dequant_table_data[qi][pli][qti];
393   }
394   oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
395    &_setup->qinfo);
396   for(qi=0;qi<64;qi++){
397     int qsum;
398     qsum=0;
399     for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
400       qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
401        _dec->state.dequant_tables[qi][pli][qti][17]+
402        _dec->state.dequant_tables[qi][pli][qti][18]+
403        _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
404     }
405     _dec->pp_sharp_mod[qi]=-(qsum>>11);
406   }
407   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
408    sizeof(_dec->state.loop_filter_limits));
409   _dec->pp_level=OC_PP_LEVEL_DISABLED;
410   _dec->dc_qis=NULL;
411   _dec->variances=NULL;
412   _dec->pp_frame_data=NULL;
413   _dec->stripe_cb.ctx=NULL;
414   _dec->stripe_cb.stripe_decoded=NULL;
415 #if defined(HAVE_CAIRO)
416   _dec->telemetry=0;
417   _dec->telemetry_bits=0;
418   _dec->telemetry_qi=0;
419   _dec->telemetry_mbmode=0;
420   _dec->telemetry_mv=0;
421   _dec->telemetry_frame_data=NULL;
422 #endif
423   return 0;
424 }
425 
oc_dec_clear(oc_dec_ctx * _dec)426 static void oc_dec_clear(oc_dec_ctx *_dec){
427 #if defined(HAVE_CAIRO)
428   _ogg_free(_dec->telemetry_frame_data);
429 #endif
430   _ogg_free(_dec->pp_frame_data);
431   _ogg_free(_dec->variances);
432   _ogg_free(_dec->dc_qis);
433   _ogg_free(_dec->dct_tokens);
434   oc_huff_trees_clear(_dec->huff_tables);
435   oc_state_clear(&_dec->state);
436 }
437 
438 
oc_dec_frame_header_unpack(oc_dec_ctx * _dec)439 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
440   long val;
441   /*Check to make sure this is a data packet.*/
442   val=oc_pack_read1(&_dec->opb);
443   if(val!=0)return TH_EBADPACKET;
444   /*Read in the frame type (I or P).*/
445   val=oc_pack_read1(&_dec->opb);
446   _dec->state.frame_type=(int)val;
447   /*Read in the qi list.*/
448   val=oc_pack_read(&_dec->opb,6);
449   _dec->state.qis[0]=(unsigned char)val;
450   val=oc_pack_read1(&_dec->opb);
451   if(!val)_dec->state.nqis=1;
452   else{
453     val=oc_pack_read(&_dec->opb,6);
454     _dec->state.qis[1]=(unsigned char)val;
455     val=oc_pack_read1(&_dec->opb);
456     if(!val)_dec->state.nqis=2;
457     else{
458       val=oc_pack_read(&_dec->opb,6);
459       _dec->state.qis[2]=(unsigned char)val;
460       _dec->state.nqis=3;
461     }
462   }
463   if(_dec->state.frame_type==OC_INTRA_FRAME){
464     /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
465       Most of the other unused bits in the VP3 headers were eliminated.
466       I don't know why these remain.*/
467     /*I wanted to eliminate wasted bits, but not all config wiggle room
468        --Monty.*/
469     val=oc_pack_read(&_dec->opb,3);
470     if(val!=0)return TH_EIMPL;
471   }
472   return 0;
473 }
474 
475 /*Mark all fragments as coded and in OC_MODE_INTRA.
476   This also builds up the coded fragment list (in coded order), and clears the
477    uncoded fragment list.
478   It does not update the coded macro block list nor the super block flags, as
479    those are not used when decoding INTRA frames.*/
oc_dec_mark_all_intra(oc_dec_ctx * _dec)480 static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
481   const oc_sb_map   *sb_maps;
482   const oc_sb_flags *sb_flags;
483   oc_fragment       *frags;
484   ptrdiff_t         *coded_fragis;
485   ptrdiff_t          ncoded_fragis;
486   ptrdiff_t          prev_ncoded_fragis;
487   unsigned           nsbs;
488   unsigned           sbi;
489   int                pli;
490   coded_fragis=_dec->state.coded_fragis;
491   prev_ncoded_fragis=ncoded_fragis=0;
492   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
493   sb_flags=_dec->state.sb_flags;
494   frags=_dec->state.frags;
495   sbi=nsbs=0;
496   for(pli=0;pli<3;pli++){
497     nsbs+=_dec->state.fplanes[pli].nsbs;
498     for(;sbi<nsbs;sbi++){
499       int quadi;
500       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
501         int bi;
502         for(bi=0;bi<4;bi++){
503           ptrdiff_t fragi;
504           fragi=sb_maps[sbi][quadi][bi];
505           if(fragi>=0){
506             frags[fragi].coded=1;
507             frags[fragi].mb_mode=OC_MODE_INTRA;
508             coded_fragis[ncoded_fragis++]=fragi;
509           }
510         }
511       }
512     }
513     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
514     prev_ncoded_fragis=ncoded_fragis;
515   }
516   _dec->state.ntotal_coded_fragis=ncoded_fragis;
517 }
518 
519 /*Decodes the bit flags indicating whether each super block is partially coded
520    or not.
521   Return: The number of partially coded super blocks.*/
oc_dec_partial_sb_flags_unpack(oc_dec_ctx * _dec)522 static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
523   oc_sb_flags *sb_flags;
524   unsigned     nsbs;
525   unsigned     sbi;
526   unsigned     npartial;
527   unsigned     run_count;
528   long         val;
529   int          flag;
530   val=oc_pack_read1(&_dec->opb);
531   flag=(int)val;
532   sb_flags=_dec->state.sb_flags;
533   nsbs=_dec->state.nsbs;
534   sbi=npartial=0;
535   while(sbi<nsbs){
536     int full_run;
537     run_count=oc_sb_run_unpack(&_dec->opb);
538     full_run=run_count>=4129;
539     do{
540       sb_flags[sbi].coded_partially=flag;
541       sb_flags[sbi].coded_fully=0;
542       npartial+=flag;
543       sbi++;
544     }
545     while(--run_count>0&&sbi<nsbs);
546     if(full_run&&sbi<nsbs){
547       val=oc_pack_read1(&_dec->opb);
548       flag=(int)val;
549     }
550     else flag=!flag;
551   }
552   /*TODO: run_count should be 0 here.
553     If it's not, we should issue a warning of some kind.*/
554   return npartial;
555 }
556 
557 /*Decodes the bit flags for whether or not each non-partially-coded super
558    block is fully coded or not.
559   This function should only be called if there is at least one
560    non-partially-coded super block.
561   Return: The number of partially coded super blocks.*/
oc_dec_coded_sb_flags_unpack(oc_dec_ctx * _dec)562 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
563   oc_sb_flags *sb_flags;
564   unsigned     nsbs;
565   unsigned     sbi;
566   unsigned     run_count;
567   long         val;
568   int          flag;
569   sb_flags=_dec->state.sb_flags;
570   nsbs=_dec->state.nsbs;
571   /*Skip partially coded super blocks.*/
572   for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
573   val=oc_pack_read1(&_dec->opb);
574   flag=(int)val;
575   do{
576     int full_run;
577     run_count=oc_sb_run_unpack(&_dec->opb);
578     full_run=run_count>=4129;
579     for(;sbi<nsbs;sbi++){
580       if(sb_flags[sbi].coded_partially)continue;
581       if(run_count--<=0)break;
582       sb_flags[sbi].coded_fully=flag;
583     }
584     if(full_run&&sbi<nsbs){
585       val=oc_pack_read1(&_dec->opb);
586       flag=(int)val;
587     }
588     else flag=!flag;
589   }
590   while(sbi<nsbs);
591   /*TODO: run_count should be 0 here.
592     If it's not, we should issue a warning of some kind.*/
593 }
594 
oc_dec_coded_flags_unpack(oc_dec_ctx * _dec)595 static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
596   const oc_sb_map   *sb_maps;
597   const oc_sb_flags *sb_flags;
598   oc_fragment       *frags;
599   unsigned           nsbs;
600   unsigned           sbi;
601   unsigned           npartial;
602   long               val;
603   int                pli;
604   int                flag;
605   int                run_count;
606   ptrdiff_t         *coded_fragis;
607   ptrdiff_t         *uncoded_fragis;
608   ptrdiff_t          ncoded_fragis;
609   ptrdiff_t          nuncoded_fragis;
610   ptrdiff_t          prev_ncoded_fragis;
611   npartial=oc_dec_partial_sb_flags_unpack(_dec);
612   if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
613   if(npartial>0){
614     val=oc_pack_read1(&_dec->opb);
615     flag=!(int)val;
616   }
617   else flag=0;
618   sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
619   sb_flags=_dec->state.sb_flags;
620   frags=_dec->state.frags;
621   sbi=nsbs=run_count=0;
622   coded_fragis=_dec->state.coded_fragis;
623   uncoded_fragis=coded_fragis+_dec->state.nfrags;
624   prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
625   for(pli=0;pli<3;pli++){
626     nsbs+=_dec->state.fplanes[pli].nsbs;
627     for(;sbi<nsbs;sbi++){
628       int quadi;
629       for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
630         int bi;
631         for(bi=0;bi<4;bi++){
632           ptrdiff_t fragi;
633           fragi=sb_maps[sbi][quadi][bi];
634           if(fragi>=0){
635             int coded;
636             if(sb_flags[sbi].coded_fully)coded=1;
637             else if(!sb_flags[sbi].coded_partially)coded=0;
638             else{
639               if(run_count<=0){
640                 run_count=oc_block_run_unpack(&_dec->opb);
641                 flag=!flag;
642               }
643               run_count--;
644               coded=flag;
645             }
646             if(coded)coded_fragis[ncoded_fragis++]=fragi;
647             else *(uncoded_fragis-++nuncoded_fragis)=fragi;
648             frags[fragi].coded=coded;
649           }
650         }
651       }
652     }
653     _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
654     prev_ncoded_fragis=ncoded_fragis;
655   }
656   _dec->state.ntotal_coded_fragis=ncoded_fragis;
657   /*TODO: run_count should be 0 here.
658     If it's not, we should issue a warning of some kind.*/
659 }
660 
661 
662 
663 typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
664 
oc_vlc_mode_unpack(oc_pack_buf * _opb)665 static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
666   long val;
667   int  i;
668   for(i=0;i<7;i++){
669     val=oc_pack_read1(_opb);
670     if(!val)break;
671   }
672   return i;
673 }
674 
oc_clc_mode_unpack(oc_pack_buf * _opb)675 static int oc_clc_mode_unpack(oc_pack_buf *_opb){
676   long val;
677   val=oc_pack_read(_opb,3);
678   return (int)val;
679 }
680 
681 /*Unpacks the list of macro block modes for INTER frames.*/
oc_dec_mb_modes_unpack(oc_dec_ctx * _dec)682 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
683   const oc_mb_map     *mb_maps;
684   signed char         *mb_modes;
685   const oc_fragment   *frags;
686   const unsigned char *alphabet;
687   unsigned char        scheme0_alphabet[8];
688   oc_mode_unpack_func  mode_unpack;
689   size_t               nmbs;
690   size_t               mbi;
691   long                 val;
692   int                  mode_scheme;
693   val=oc_pack_read(&_dec->opb,3);
694   mode_scheme=(int)val;
695   if(mode_scheme==0){
696     int mi;
697     /*Just in case, initialize the modes to something.
698       If the bitstream doesn't contain each index exactly once, it's likely
699        corrupt and the rest of the packet is garbage anyway, but this way we
700        won't crash, and we'll decode SOMETHING.*/
701     /*LOOP VECTORIZES*/
702     for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
703     for(mi=0;mi<OC_NMODES;mi++){
704       val=oc_pack_read(&_dec->opb,3);
705       scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
706     }
707     alphabet=scheme0_alphabet;
708   }
709   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
710   if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
711   else mode_unpack=oc_vlc_mode_unpack;
712   mb_modes=_dec->state.mb_modes;
713   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
714   nmbs=_dec->state.nmbs;
715   frags=_dec->state.frags;
716   for(mbi=0;mbi<nmbs;mbi++){
717     if(mb_modes[mbi]!=OC_MODE_INVALID){
718       int bi;
719       /*Check for a coded luma block in this macro block.*/
720       for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
721       /*We found one, decode a mode.*/
722       if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
723       /*There were none: INTER_NOMV is forced.*/
724       else mb_modes[mbi]=OC_MODE_INTER_NOMV;
725     }
726   }
727 }
728 
729 
730 
731 typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
732 
oc_vlc_mv_comp_unpack(oc_pack_buf * _opb)733 static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
734   long bits;
735   int  mask;
736   int  mv;
737   bits=oc_pack_read(_opb,3);
738   switch(bits){
739     case  0:return 0;
740     case  1:return 1;
741     case  2:return -1;
742     case  3:
743     case  4:{
744       mv=(int)(bits-1);
745       bits=oc_pack_read1(_opb);
746     }break;
747     /*case  5:
748     case  6:
749     case  7:*/
750     default:{
751       mv=1<<bits-3;
752       bits=oc_pack_read(_opb,bits-2);
753       mv+=(int)(bits>>1);
754       bits&=1;
755     }break;
756   }
757   mask=-(int)bits;
758   return mv+mask^mask;
759 }
760 
oc_clc_mv_comp_unpack(oc_pack_buf * _opb)761 static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
762   long bits;
763   int  mask;
764   int  mv;
765   bits=oc_pack_read(_opb,6);
766   mv=(int)bits>>1;
767   mask=-((int)bits&1);
768   return mv+mask^mask;
769 }
770 
771 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
772    block modes and motion vectors to the individual fragments.*/
oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx * _dec)773 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
774   const oc_mb_map        *mb_maps;
775   const signed char      *mb_modes;
776   oc_set_chroma_mvs_func  set_chroma_mvs;
777   oc_mv_comp_unpack_func  mv_comp_unpack;
778   oc_fragment            *frags;
779   oc_mv                  *frag_mvs;
780   const unsigned char    *map_idxs;
781   int                     map_nidxs;
782   oc_mv                   last_mv[2];
783   oc_mv                   cbmvs[4];
784   size_t                  nmbs;
785   size_t                  mbi;
786   long                    val;
787   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
788   val=oc_pack_read1(&_dec->opb);
789   mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
790   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
791   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
792   memset(last_mv,0,sizeof(last_mv));
793   frags=_dec->state.frags;
794   frag_mvs=_dec->state.frag_mvs;
795   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
796   mb_modes=_dec->state.mb_modes;
797   nmbs=_dec->state.nmbs;
798   for(mbi=0;mbi<nmbs;mbi++){
799     int          mb_mode;
800     mb_mode=mb_modes[mbi];
801     if(mb_mode!=OC_MODE_INVALID){
802       oc_mv        mbmv;
803       ptrdiff_t    fragi;
804       int          coded[13];
805       int          codedi;
806       int          ncoded;
807       int          mapi;
808       int          mapii;
809       /*Search for at least one coded fragment.*/
810       ncoded=mapii=0;
811       do{
812         mapi=map_idxs[mapii];
813         fragi=mb_maps[mbi][mapi>>2][mapi&3];
814         if(frags[fragi].coded)coded[ncoded++]=mapi;
815       }
816       while(++mapii<map_nidxs);
817       if(ncoded<=0)continue;
818       switch(mb_mode){
819         case OC_MODE_INTER_MV_FOUR:{
820           oc_mv       lbmvs[4];
821           int         bi;
822           /*Mark the tail of the list, so we don't accidentally go past it.*/
823           coded[ncoded]=-1;
824           for(bi=codedi=0;bi<4;bi++){
825             if(coded[codedi]==bi){
826               codedi++;
827               fragi=mb_maps[mbi][0][bi];
828               frags[fragi].mb_mode=mb_mode;
829               lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
830               lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
831               memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
832             }
833             else lbmvs[bi][0]=lbmvs[bi][1]=0;
834           }
835           if(codedi>0){
836             memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
837             memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
838           }
839           if(codedi<ncoded){
840             (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
841             for(;codedi<ncoded;codedi++){
842               mapi=coded[codedi];
843               bi=mapi&3;
844               fragi=mb_maps[mbi][mapi>>2][bi];
845               frags[fragi].mb_mode=mb_mode;
846               memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
847             }
848           }
849         }break;
850         case OC_MODE_INTER_MV:{
851           memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
852           mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
853           mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
854         }break;
855         case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
856         case OC_MODE_INTER_MV_LAST2:{
857           memcpy(mbmv,last_mv[1],sizeof(mbmv));
858           memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
859           memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
860         }break;
861         case OC_MODE_GOLDEN_MV:{
862           mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
863           mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
864         }break;
865         default:memset(mbmv,0,sizeof(mbmv));break;
866       }
867       /*4MV mode fills in the fragments itself.
868         For all other modes we can use this common code.*/
869       if(mb_mode!=OC_MODE_INTER_MV_FOUR){
870         for(codedi=0;codedi<ncoded;codedi++){
871           mapi=coded[codedi];
872           fragi=mb_maps[mbi][mapi>>2][mapi&3];
873           frags[fragi].mb_mode=mb_mode;
874           memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
875         }
876       }
877     }
878   }
879 }
880 
oc_dec_block_qis_unpack(oc_dec_ctx * _dec)881 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
882   oc_fragment     *frags;
883   const ptrdiff_t *coded_fragis;
884   ptrdiff_t        ncoded_fragis;
885   ptrdiff_t        fragii;
886   ptrdiff_t        fragi;
887   ncoded_fragis=_dec->state.ntotal_coded_fragis;
888   if(ncoded_fragis<=0)return;
889   frags=_dec->state.frags;
890   coded_fragis=_dec->state.coded_fragis;
891   if(_dec->state.nqis==1){
892     /*If this frame has only a single qi value, then just use it for all coded
893        fragments.*/
894     for(fragii=0;fragii<ncoded_fragis;fragii++){
895       frags[coded_fragis[fragii]].qii=0;
896     }
897   }
898   else{
899     long val;
900     int  flag;
901     int  nqi1;
902     int  run_count;
903     /*Otherwise, we decode a qi index for each fragment, using two passes of
904       the same binary RLE scheme used for super-block coded bits.
905      The first pass marks each fragment as having a qii of 0 or greater than
906       0, and the second pass (if necessary), distinguishes between a qii of
907       1 and 2.
908      At first we just store the qii in the fragment.
909      After all the qii's are decoded, we make a final pass to replace them
910       with the corresponding qi's for this frame.*/
911     val=oc_pack_read1(&_dec->opb);
912     flag=(int)val;
913     nqi1=0;
914     fragii=0;
915     while(fragii<ncoded_fragis){
916       int full_run;
917       run_count=oc_sb_run_unpack(&_dec->opb);
918       full_run=run_count>=4129;
919       do{
920         frags[coded_fragis[fragii++]].qii=flag;
921         nqi1+=flag;
922       }
923       while(--run_count>0&&fragii<ncoded_fragis);
924       if(full_run&&fragii<ncoded_fragis){
925         val=oc_pack_read1(&_dec->opb);
926         flag=(int)val;
927       }
928       else flag=!flag;
929     }
930     /*TODO: run_count should be 0 here.
931       If it's not, we should issue a warning of some kind.*/
932     /*If we have 3 different qi's for this frame, and there was at least one
933        fragment with a non-zero qi, make the second pass.*/
934     if(_dec->state.nqis==3&&nqi1>0){
935       /*Skip qii==0 fragments.*/
936       for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
937       val=oc_pack_read1(&_dec->opb);
938       flag=(int)val;
939       do{
940         int full_run;
941         run_count=oc_sb_run_unpack(&_dec->opb);
942         full_run=run_count>=4129;
943         for(;fragii<ncoded_fragis;fragii++){
944           fragi=coded_fragis[fragii];
945           if(frags[fragi].qii==0)continue;
946           if(run_count--<=0)break;
947           frags[fragi].qii+=flag;
948         }
949         if(full_run&&fragii<ncoded_fragis){
950           val=oc_pack_read1(&_dec->opb);
951           flag=(int)val;
952         }
953         else flag=!flag;
954       }
955       while(fragii<ncoded_fragis);
956       /*TODO: run_count should be 0 here.
957         If it's not, we should issue a warning of some kind.*/
958     }
959   }
960 }
961 
962 
963 
964 /*Unpacks the DC coefficient tokens.
965   Unlike when unpacking the AC coefficient tokens, we actually need to decode
966    the DC coefficient values now so that we can do DC prediction.
967   _huff_idx:   The index of the Huffman table to use for each color plane.
968   _ntoks_left: The number of tokens left to be decoded in each color plane for
969                 each coefficient.
970                This is updated as EOB tokens and zero run tokens are decoded.
971   Return: The length of any outstanding EOB run.*/
oc_dec_dc_coeff_unpack(oc_dec_ctx * _dec,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64])972 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
973  ptrdiff_t _ntoks_left[3][64]){
974   unsigned char   *dct_tokens;
975   oc_fragment     *frags;
976   const ptrdiff_t *coded_fragis;
977   ptrdiff_t        ncoded_fragis;
978   ptrdiff_t        fragii;
979   ptrdiff_t        eobs;
980   ptrdiff_t        ti;
981   int              pli;
982   dct_tokens=_dec->dct_tokens;
983   frags=_dec->state.frags;
984   coded_fragis=_dec->state.coded_fragis;
985   ncoded_fragis=fragii=eobs=ti=0;
986   for(pli=0;pli<3;pli++){
987     ptrdiff_t run_counts[64];
988     ptrdiff_t eob_count;
989     ptrdiff_t eobi;
990     int       rli;
991     ncoded_fragis+=_dec->state.ncoded_fragis[pli];
992     memset(run_counts,0,sizeof(run_counts));
993     _dec->eob_runs[pli][0]=eobs;
994     _dec->ti0[pli][0]=ti;
995     /*Continue any previous EOB run, if there was one.*/
996     eobi=eobs;
997     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
998     eob_count=eobi;
999     eobs-=eobi;
1000     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1001     while(fragii<ncoded_fragis){
1002       int token;
1003       int cw;
1004       int eb;
1005       int skip;
1006       token=oc_huff_token_decode(&_dec->opb,
1007        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1008       dct_tokens[ti++]=(unsigned char)token;
1009       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1010         eb=(int)oc_pack_read(&_dec->opb,
1011          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1012         dct_tokens[ti++]=(unsigned char)eb;
1013         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1014         eb<<=OC_DCT_TOKEN_EB_POS(token);
1015       }
1016       else eb=0;
1017       cw=OC_DCT_CODE_WORD[token]+eb;
1018       eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1019       if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
1020       if(eobs){
1021         eobi=OC_MINI(eobs,ncoded_fragis-fragii);
1022         eob_count+=eobi;
1023         eobs-=eobi;
1024         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
1025       }
1026       else{
1027         int coeff;
1028         skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1029         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1030         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1031         if(skip)coeff=0;
1032         run_counts[skip]++;
1033         frags[coded_fragis[fragii++]].dc=coeff;
1034       }
1035     }
1036     /*Add the total EOB count to the longest run length.*/
1037     run_counts[63]+=eob_count;
1038     /*And convert the run_counts array to a moment table.*/
1039     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1040     /*Finally, subtract off the number of coefficients that have been
1041        accounted for by runs started in this coefficient.*/
1042     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
1043   }
1044   _dec->dct_tokens_count=ti;
1045   return eobs;
1046 }
1047 
1048 /*Unpacks the AC coefficient tokens.
1049   This can completely discard coefficient values while unpacking, and so is
1050    somewhat simpler than unpacking the DC coefficient tokens.
1051   _huff_idx:   The index of the Huffman table to use for each color plane.
1052   _ntoks_left: The number of tokens left to be decoded in each color plane for
1053                 each coefficient.
1054                This is updated as EOB tokens and zero run tokens are decoded.
1055   _eobs:       The length of any outstanding EOB run from previous
1056                 coefficients.
1057   Return: The length of any outstanding EOB run.*/
oc_dec_ac_coeff_unpack(oc_dec_ctx * _dec,int _zzi,int _huff_idxs[2],ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs)1058 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
1059  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
1060   unsigned char *dct_tokens;
1061   ptrdiff_t      ti;
1062   int            pli;
1063   dct_tokens=_dec->dct_tokens;
1064   ti=_dec->dct_tokens_count;
1065   for(pli=0;pli<3;pli++){
1066     ptrdiff_t run_counts[64];
1067     ptrdiff_t eob_count;
1068     size_t    ntoks_left;
1069     size_t    ntoks;
1070     int       rli;
1071     _dec->eob_runs[pli][_zzi]=_eobs;
1072     _dec->ti0[pli][_zzi]=ti;
1073     ntoks_left=_ntoks_left[pli][_zzi];
1074     memset(run_counts,0,sizeof(run_counts));
1075     eob_count=0;
1076     ntoks=0;
1077     while(ntoks+_eobs<ntoks_left){
1078       int token;
1079       int cw;
1080       int eb;
1081       int skip;
1082       ntoks+=_eobs;
1083       eob_count+=_eobs;
1084       token=oc_huff_token_decode(&_dec->opb,
1085        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
1086       dct_tokens[ti++]=(unsigned char)token;
1087       if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1088         eb=(int)oc_pack_read(&_dec->opb,
1089          OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
1090         dct_tokens[ti++]=(unsigned char)eb;
1091         if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
1092         eb<<=OC_DCT_TOKEN_EB_POS(token);
1093       }
1094       else eb=0;
1095       cw=OC_DCT_CODE_WORD[token]+eb;
1096       skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1097       _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1098       if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
1099       if(_eobs==0){
1100         run_counts[skip]++;
1101         ntoks++;
1102       }
1103     }
1104     /*Add the portion of the last EOB run actually used by this coefficient.*/
1105     eob_count+=ntoks_left-ntoks;
1106     /*And remove it from the remaining EOB count.*/
1107     _eobs-=ntoks_left-ntoks;
1108     /*Add the total EOB count to the longest run length.*/
1109     run_counts[63]+=eob_count;
1110     /*And convert the run_counts array to a moment table.*/
1111     for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
1112     /*Finally, subtract off the number of coefficients that have been
1113        accounted for by runs started in this coefficient.*/
1114     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
1115   }
1116   _dec->dct_tokens_count=ti;
1117   return _eobs;
1118 }
1119 
1120 /*Tokens describing the DCT coefficients that belong to each fragment are
1121    stored in the bitstream grouped by coefficient, not by fragment.
1122 
1123   This means that we either decode all the tokens in order, building up a
1124    separate coefficient list for each fragment as we go, and then go back and
1125    do the iDCT on each fragment, or we have to create separate lists of tokens
1126    for each coefficient, so that we can pull the next token required off the
1127    head of the appropriate list when decoding a specific fragment.
1128 
1129   The former was VP3's choice, and it meant 2*w*h extra storage for all the
1130    decoded coefficient values.
1131 
1132   We take the second option, which lets us store just one to three bytes per
1133    token (generally far fewer than the number of coefficients, due to EOB
1134    tokens and zero runs), and which requires us to only maintain a counter for
1135    each of the 64 coefficients, instead of a counter for every fragment to
1136    determine where the next token goes.
1137 
1138   We actually use 3 counters per coefficient, one for each color plane, so we
1139    can decode all color planes simultaneously.
1140   This lets color conversion, etc., be done as soon as a full MCU (one or
1141    two super block rows) is decoded, while the image data is still in cache.*/
1142 
oc_dec_residual_tokens_unpack(oc_dec_ctx * _dec)1143 static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
1144   static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
1145   ptrdiff_t  ntoks_left[3][64];
1146   int        huff_idxs[2];
1147   ptrdiff_t  eobs;
1148   long       val;
1149   int        pli;
1150   int        zzi;
1151   int        hgi;
1152   for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
1153     ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
1154   }
1155   val=oc_pack_read(&_dec->opb,4);
1156   huff_idxs[0]=(int)val;
1157   val=oc_pack_read(&_dec->opb,4);
1158   huff_idxs[1]=(int)val;
1159   _dec->eob_runs[0][0]=0;
1160   eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
1161 #if defined(HAVE_CAIRO)
1162   _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
1163 #endif
1164   val=oc_pack_read(&_dec->opb,4);
1165   huff_idxs[0]=(int)val;
1166   val=oc_pack_read(&_dec->opb,4);
1167   huff_idxs[1]=(int)val;
1168   zzi=1;
1169   for(hgi=1;hgi<5;hgi++){
1170     huff_idxs[0]+=16;
1171     huff_idxs[1]+=16;
1172     for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
1173       eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
1174     }
1175   }
1176   /*TODO: eobs should be exactly zero, or 4096 or greater.
1177     The second case occurs when an EOB run of size zero is encountered, which
1178      gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
1179     If neither of these conditions holds, then a warning should be issued.*/
1180 }
1181 
1182 
oc_dec_postprocess_init(oc_dec_ctx * _dec)1183 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
1184   /*pp_level 0: disabled; free any memory used and return*/
1185   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
1186     if(_dec->dc_qis!=NULL){
1187       _ogg_free(_dec->dc_qis);
1188       _dec->dc_qis=NULL;
1189       _ogg_free(_dec->variances);
1190       _dec->variances=NULL;
1191       _ogg_free(_dec->pp_frame_data);
1192       _dec->pp_frame_data=NULL;
1193     }
1194     return 1;
1195   }
1196   if(_dec->dc_qis==NULL){
1197     /*If we haven't been tracking DC quantization indices, there's no point in
1198        starting now.*/
1199     if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
1200     _dec->dc_qis=(unsigned char *)_ogg_malloc(
1201      _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
1202     if(_dec->dc_qis==NULL)return 1;
1203     memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
1204   }
1205   else{
1206     unsigned char   *dc_qis;
1207     const ptrdiff_t *coded_fragis;
1208     ptrdiff_t        ncoded_fragis;
1209     ptrdiff_t        fragii;
1210     unsigned char    qi0;
1211     /*Update the DC quantization index of each coded block.*/
1212     dc_qis=_dec->dc_qis;
1213     coded_fragis=_dec->state.coded_fragis;
1214     ncoded_fragis=_dec->state.ncoded_fragis[0]+
1215      _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
1216     qi0=(unsigned char)_dec->state.qis[0];
1217     for(fragii=0;fragii<ncoded_fragis;fragii++){
1218       dc_qis[coded_fragis[fragii]]=qi0;
1219     }
1220   }
1221   /*pp_level 1: Stop after updating DC quantization indices.*/
1222   if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
1223     if(_dec->variances!=NULL){
1224       _ogg_free(_dec->variances);
1225       _dec->variances=NULL;
1226       _ogg_free(_dec->pp_frame_data);
1227       _dec->pp_frame_data=NULL;
1228     }
1229     return 1;
1230   }
1231   if(_dec->variances==NULL){
1232     size_t frame_sz;
1233     size_t c_sz;
1234     int    c_w;
1235     int    c_h;
1236     frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1237     c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1238     c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1239     c_sz=c_w*(size_t)c_h;
1240     /*Allocate space for the chroma planes, even if we're not going to use
1241        them; this simplifies allocation state management, though it may waste
1242        memory on the few systems that don't overcommit pages.*/
1243     frame_sz+=c_sz<<1;
1244     _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
1245      frame_sz*sizeof(_dec->pp_frame_data[0]));
1246     _dec->variances=(int *)_ogg_malloc(
1247      _dec->state.nfrags*sizeof(_dec->variances[0]));
1248     if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
1249       _ogg_free(_dec->pp_frame_data);
1250       _dec->pp_frame_data=NULL;
1251       _ogg_free(_dec->variances);
1252       _dec->variances=NULL;
1253       return 1;
1254     }
1255     /*Force an update of the PP buffer pointers.*/
1256     _dec->pp_frame_state=0;
1257   }
1258   /*Update the PP buffer pointers if necessary.*/
1259   if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
1260     if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1261       /*If chroma processing is disabled, just use the PP luma plane.*/
1262       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1263       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1264       _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
1265       _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
1266        (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
1267     }
1268     else{
1269       size_t y_sz;
1270       size_t c_sz;
1271       int    c_w;
1272       int    c_h;
1273       /*Otherwise, set up pointers to all three PP planes.*/
1274       y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
1275       c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
1276       c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
1277       c_sz=c_w*(size_t)c_h;
1278       _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
1279       _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
1280       _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
1281       _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
1282       _dec->pp_frame_buf[1].width=c_w;
1283       _dec->pp_frame_buf[1].height=c_h;
1284       _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
1285       _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
1286       _dec->pp_frame_buf[2].width=c_w;
1287       _dec->pp_frame_buf[2].height=c_h;
1288       _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
1289       _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
1290       oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
1291     }
1292     _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
1293   }
1294   /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1295   if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
1296     memcpy(_dec->pp_frame_buf+1,
1297      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
1298      sizeof(_dec->pp_frame_buf[1])*2);
1299   }
1300   return 0;
1301 }
1302 
1303 
1304 
1305 typedef struct{
1306   int                 bounding_values[256];
1307   ptrdiff_t           ti[3][64];
1308   ptrdiff_t           eob_runs[3][64];
1309   const ptrdiff_t    *coded_fragis[3];
1310   const ptrdiff_t    *uncoded_fragis[3];
1311   ptrdiff_t           ncoded_fragis[3];
1312   ptrdiff_t           nuncoded_fragis[3];
1313   const ogg_uint16_t *dequant[3][3][2];
1314   int                 fragy0[3];
1315   int                 fragy_end[3];
1316   int                 pred_last[3][3];
1317   int                 mcu_nvfrags;
1318   int                 loop_filter;
1319   int                 pp_level;
1320 }oc_dec_pipeline_state;
1321 
1322 
1323 
1324 /*Initialize the main decoding pipeline.*/
oc_dec_pipeline_init(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe)1325 static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
1326  oc_dec_pipeline_state *_pipe){
1327   const ptrdiff_t *coded_fragis;
1328   const ptrdiff_t *uncoded_fragis;
1329   int              pli;
1330   int              qii;
1331   int              qti;
1332   /*If chroma is sub-sampled in the vertical direction, we have to decode two
1333      super block rows of Y' for each super block row of Cb and Cr.*/
1334   _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
1335   /*Initialize the token and extra bits indices for each plane and
1336      coefficient.*/
1337   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
1338   /*Also copy over the initial the EOB run counts.*/
1339   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
1340   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1341   coded_fragis=_dec->state.coded_fragis;
1342   uncoded_fragis=coded_fragis+_dec->state.nfrags;
1343   for(pli=0;pli<3;pli++){
1344     ptrdiff_t ncoded_fragis;
1345     _pipe->coded_fragis[pli]=coded_fragis;
1346     _pipe->uncoded_fragis[pli]=uncoded_fragis;
1347     ncoded_fragis=_dec->state.ncoded_fragis[pli];
1348     coded_fragis+=ncoded_fragis;
1349     uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
1350   }
1351   /*Set up condensed quantizer tables.*/
1352   for(pli=0;pli<3;pli++){
1353     for(qii=0;qii<_dec->state.nqis;qii++){
1354       for(qti=0;qti<2;qti++){
1355         _pipe->dequant[pli][qii][qti]=
1356          _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
1357       }
1358     }
1359   }
1360   /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1361   memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
1362   /*Initialize the bounding value array for the loop filter.*/
1363   _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
1364    _pipe->bounding_values);
1365   /*Initialize any buffers needed for post-processing.
1366     We also save the current post-processing level, to guard against the user
1367      changing it from a callback.*/
1368   if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
1369   /*If we don't have enough information to post-process, disable it, regardless
1370      of the user-requested level.*/
1371   else{
1372     _pipe->pp_level=OC_PP_LEVEL_DISABLED;
1373     memcpy(_dec->pp_frame_buf,
1374      _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
1375      sizeof(_dec->pp_frame_buf[0])*3);
1376   }
1377 }
1378 
1379 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1380    rows).
1381   As a side effect, the number of coded and uncoded fragments in this plane of
1382    the MCU is also computed.*/
oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1383 static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
1384  oc_dec_pipeline_state *_pipe,int _pli){
1385   const oc_fragment_plane *fplane;
1386   oc_fragment             *frags;
1387   int                     *pred_last;
1388   ptrdiff_t                ncoded_fragis;
1389   ptrdiff_t                fragi;
1390   int                      fragx;
1391   int                      fragy;
1392   int                      fragy0;
1393   int                      fragy_end;
1394   int                      nhfrags;
1395   /*Compute the first and last fragment row of the current MCU for this
1396      plane.*/
1397   fplane=_dec->state.fplanes+_pli;
1398   fragy0=_pipe->fragy0[_pli];
1399   fragy_end=_pipe->fragy_end[_pli];
1400   nhfrags=fplane->nhfrags;
1401   pred_last=_pipe->pred_last[_pli];
1402   frags=_dec->state.frags;
1403   ncoded_fragis=0;
1404   fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
1405   for(fragy=fragy0;fragy<fragy_end;fragy++){
1406     if(fragy==0){
1407       /*For the first row, all of the cases reduce to just using the previous
1408          predictor for the same reference frame.*/
1409       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1410         if(frags[fragi].coded){
1411           int ref;
1412           ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
1413           pred_last[ref]=frags[fragi].dc+=pred_last[ref];
1414           ncoded_fragis++;
1415         }
1416       }
1417     }
1418     else{
1419       oc_fragment *u_frags;
1420       int          l_ref;
1421       int          ul_ref;
1422       int          u_ref;
1423       u_frags=frags-nhfrags;
1424       l_ref=-1;
1425       ul_ref=-1;
1426       u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
1427       for(fragx=0;fragx<nhfrags;fragx++,fragi++){
1428         int ur_ref;
1429         if(fragx+1>=nhfrags)ur_ref=-1;
1430         else{
1431           ur_ref=u_frags[fragi+1].coded?
1432            OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
1433         }
1434         if(frags[fragi].coded){
1435           int pred;
1436           int ref;
1437           ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
1438           /*We break out a separate case based on which of our neighbors use
1439              the same reference frames.
1440             This is somewhat faster than trying to make a generic case which
1441              handles all of them, since it reduces lots of poorly predicted
1442              jumps to one switch statement, and also lets a number of the
1443              multiplications be optimized out by strength reduction.*/
1444           switch((l_ref==ref)|(ul_ref==ref)<<1|
1445            (u_ref==ref)<<2|(ur_ref==ref)<<3){
1446             default:pred=pred_last[ref];break;
1447             case  1:
1448             case  3:pred=frags[fragi-1].dc;break;
1449             case  2:pred=u_frags[fragi-1].dc;break;
1450             case  4:
1451             case  6:
1452             case 12:pred=u_frags[fragi].dc;break;
1453             case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
1454             case  8:pred=u_frags[fragi+1].dc;break;
1455             case  9:
1456             case 11:
1457             case 13:{
1458               pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
1459             }break;
1460             case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
1461             case 14:{
1462               pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
1463                +10*u_frags[fragi].dc)/16;
1464             }break;
1465             case  7:
1466             case 15:{
1467               int p0;
1468               int p1;
1469               int p2;
1470               p0=frags[fragi-1].dc;
1471               p1=u_frags[fragi-1].dc;
1472               p2=u_frags[fragi].dc;
1473               pred=(29*(p0+p2)-26*p1)/32;
1474               if(abs(pred-p2)>128)pred=p2;
1475               else if(abs(pred-p0)>128)pred=p0;
1476               else if(abs(pred-p1)>128)pred=p1;
1477             }break;
1478           }
1479           pred_last[ref]=frags[fragi].dc+=pred;
1480           ncoded_fragis++;
1481           l_ref=ref;
1482         }
1483         else l_ref=-1;
1484         ul_ref=u_ref;
1485         u_ref=ur_ref;
1486       }
1487     }
1488   }
1489   _pipe->ncoded_fragis[_pli]=ncoded_fragis;
1490   /*Also save the number of uncoded fragments so we know how many to copy.*/
1491   _pipe->nuncoded_fragis[_pli]=
1492    (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
1493 }
1494 
1495 /*Reconstructs all coded fragments in a single MCU (one or two super block
1496    rows).
1497   This requires that each coded fragment have a proper macro block mode and
1498    motion vector (if not in INTRA mode), and have it's DC value decoded, with
1499    the DC prediction process reversed, and the number of coded and uncoded
1500    fragments in this plane of the MCU be counted.
1501   The token lists for each color plane and coefficient should also be filled
1502    in, along with initial token offsets, extra bits offsets, and EOB run
1503    counts.*/
oc_dec_frags_recon_mcu_plane(oc_dec_ctx * _dec,oc_dec_pipeline_state * _pipe,int _pli)1504 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
1505  oc_dec_pipeline_state *_pipe,int _pli){
1506   unsigned char       *dct_tokens;
1507   const unsigned char *dct_fzig_zag;
1508   ogg_uint16_t         dc_quant[2];
1509   const oc_fragment   *frags;
1510   const ptrdiff_t     *coded_fragis;
1511   ptrdiff_t            ncoded_fragis;
1512   ptrdiff_t            fragii;
1513   ptrdiff_t           *ti;
1514   ptrdiff_t           *eob_runs;
1515   int                  qti;
1516   dct_tokens=_dec->dct_tokens;
1517   dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
1518   frags=_dec->state.frags;
1519   coded_fragis=_pipe->coded_fragis[_pli];
1520   ncoded_fragis=_pipe->ncoded_fragis[_pli];
1521   ti=_pipe->ti[_pli];
1522   eob_runs=_pipe->eob_runs[_pli];
1523   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
1524   for(fragii=0;fragii<ncoded_fragis;fragii++){
1525     /*This array is made one element larger because the zig-zag index array
1526        uses the final element as a dumping ground for out-of-range indices
1527        to protect us from buffer overflow.*/
1528     OC_ALIGN8(ogg_int16_t dct_coeffs[65]);
1529     const ogg_uint16_t *ac_quant;
1530     ptrdiff_t           fragi;
1531     int                 last_zzi;
1532     int                 zzi;
1533     fragi=coded_fragis[fragii];
1534     for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
1535     qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
1536     ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
1537     /*Decode the AC coefficients.*/
1538     for(zzi=0;zzi<64;){
1539       int token;
1540       last_zzi=zzi;
1541       if(eob_runs[zzi]){
1542         eob_runs[zzi]--;
1543         break;
1544       }
1545       else{
1546         ptrdiff_t eob;
1547         int       cw;
1548         int       rlen;
1549         int       coeff;
1550         int       lti;
1551         lti=ti[zzi];
1552         token=dct_tokens[lti++];
1553         cw=OC_DCT_CODE_WORD[token];
1554         /*These parts could be done branchless, but the branches are fairly
1555            predictable and the C code translates into more than a few
1556            instructions, so it's worth it to avoid them.*/
1557         if(OC_DCT_TOKEN_NEEDS_MORE(token)){
1558           cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
1559         }
1560         eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
1561         if(token==OC_DCT_TOKEN_FAT_EOB){
1562           eob+=dct_tokens[lti++]<<8;
1563           if(eob==0)eob=OC_DCT_EOB_FINISH;
1564         }
1565         rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
1566         cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
1567         coeff=cw>>OC_DCT_CW_MAG_SHIFT;
1568         eob_runs[zzi]=eob;
1569         ti[zzi]=lti;
1570         zzi+=rlen;
1571         dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
1572         zzi+=!eob;
1573       }
1574     }
1575     /*TODO: zzi should be exactly 64 here.
1576       If it's not, we should report some kind of warning.*/
1577     zzi=OC_MINI(zzi,64);
1578     dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
1579     /*last_zzi is always initialized.
1580       If your compiler thinks otherwise, it is dumb.*/
1581     oc_state_frag_recon(&_dec->state,fragi,_pli,
1582      dct_coeffs,last_zzi,dc_quant[qti]);
1583   }
1584   _pipe->coded_fragis[_pli]+=ncoded_fragis;
1585   /*Right now the reconstructed MCU has only the coded blocks in it.*/
1586   /*TODO: We make the decision here to always copy the uncoded blocks into it
1587      from the reference frame.
1588     We could also copy the coded blocks back over the reference frame, if we
1589      wait for an additional MCU to be decoded, which might be faster if only a
1590      small number of blocks are coded.
1591     However, this introduces more latency, creating a larger cache footprint.
1592     It's unknown which decision is better, but this one results in simpler
1593      code, and the hard case (high bitrate, high resolution) is handled
1594      correctly.*/
1595   /*Copy the uncoded blocks from the previous reference frame.*/
1596   _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
1597   oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
1598    _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
1599 }
1600 
1601 /*Filter a horizontal block edge.*/
oc_filter_hedge(unsigned char * _dst,int _dst_ystride,const unsigned char * _src,int _src_ystride,int _qstep,int _flimit,int * _variance0,int * _variance1)1602 static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
1603  const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
1604  int *_variance0,int *_variance1){
1605   unsigned char       *rdst;
1606   const unsigned char *rsrc;
1607   unsigned char       *cdst;
1608   const unsigned char *csrc;
1609   int                  r[10];
1610   int                  sum0;
1611   int                  sum1;
1612   int                  bx;
1613   int                  by;
1614   rdst=_dst;
1615   rsrc=_src;
1616   for(bx=0;bx<8;bx++){
1617     cdst=rdst;
1618     csrc=rsrc;
1619     for(by=0;by<10;by++){
1620       r[by]=*csrc;
1621       csrc+=_src_ystride;
1622     }
1623     sum0=sum1=0;
1624     for(by=0;by<4;by++){
1625       sum0+=abs(r[by+1]-r[by]);
1626       sum1+=abs(r[by+5]-r[by+6]);
1627     }
1628     *_variance0+=OC_MINI(255,sum0);
1629     *_variance1+=OC_MINI(255,sum1);
1630     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1631       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1632       cdst+=_dst_ystride;
1633       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1634       cdst+=_dst_ystride;
1635       for(by=0;by<4;by++){
1636         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
1637          r[by+4]+r[by+5]+r[by+6]+4>>3);
1638         cdst+=_dst_ystride;
1639       }
1640       *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1641       cdst+=_dst_ystride;
1642       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1643     }
1644     else{
1645       for(by=1;by<=8;by++){
1646         *cdst=(unsigned char)r[by];
1647         cdst+=_dst_ystride;
1648       }
1649     }
1650     rdst++;
1651     rsrc++;
1652   }
1653 }
1654 
1655 /*Filter a vertical block edge.*/
oc_filter_vedge(unsigned char * _dst,int _dst_ystride,int _qstep,int _flimit,int * _variances)1656 static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
1657  int _qstep,int _flimit,int *_variances){
1658   unsigned char       *rdst;
1659   const unsigned char *rsrc;
1660   unsigned char       *cdst;
1661   int                  r[10];
1662   int                  sum0;
1663   int                  sum1;
1664   int                  bx;
1665   int                  by;
1666   cdst=_dst;
1667   for(by=0;by<8;by++){
1668     rsrc=cdst-1;
1669     rdst=cdst;
1670     for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
1671     sum0=sum1=0;
1672     for(bx=0;bx<4;bx++){
1673       sum0+=abs(r[bx+1]-r[bx]);
1674       sum1+=abs(r[bx+5]-r[bx+6]);
1675     }
1676     _variances[0]+=OC_MINI(255,sum0);
1677     _variances[1]+=OC_MINI(255,sum1);
1678     if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
1679       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
1680       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
1681       for(bx=0;bx<4;bx++){
1682         *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
1683          r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
1684       }
1685       *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
1686       *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
1687     }
1688     cdst+=_dst_ystride;
1689   }
1690 }
1691 
oc_dec_deblock_frag_rows(oc_dec_ctx * _dec,th_img_plane * _dst,th_img_plane * _src,int _pli,int _fragy0,int _fragy_end)1692 static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
1693  th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
1694  int _fragy_end){
1695   oc_fragment_plane   *fplane;
1696   int                 *variance;
1697   unsigned char       *dc_qi;
1698   unsigned char       *dst;
1699   const unsigned char *src;
1700   ptrdiff_t            froffset;
1701   int                  dst_ystride;
1702   int                  src_ystride;
1703   int                  nhfrags;
1704   int                  width;
1705   int                  notstart;
1706   int                  notdone;
1707   int                  flimit;
1708   int                  qstep;
1709   int                  y_end;
1710   int                  y;
1711   int                  x;
1712   _dst+=_pli;
1713   _src+=_pli;
1714   fplane=_dec->state.fplanes+_pli;
1715   nhfrags=fplane->nhfrags;
1716   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1717   variance=_dec->variances+froffset;
1718   dc_qi=_dec->dc_qis+froffset;
1719   notstart=_fragy0>0;
1720   notdone=_fragy_end<fplane->nvfrags;
1721   /*We want to clear an extra row of variances, except at the end.*/
1722   memset(variance+(nhfrags&-notstart),0,
1723    (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
1724   /*Except for the first time, we want to point to the middle of the row.*/
1725   y=(_fragy0<<3)+(notstart<<2);
1726   dst_ystride=_dst->stride;
1727   src_ystride=_src->stride;
1728   dst=_dst->data+y*(ptrdiff_t)dst_ystride;
1729   src=_src->data+y*(ptrdiff_t)src_ystride;
1730   width=_dst->width;
1731   for(;y<4;y++){
1732     memcpy(dst,src,width*sizeof(dst[0]));
1733     dst+=dst_ystride;
1734     src+=src_ystride;
1735   }
1736   /*We also want to skip the last row in the frame for this loop.*/
1737   y_end=_fragy_end-!notdone<<3;
1738   for(;y<y_end;y+=8){
1739     qstep=_dec->pp_dc_scale[*dc_qi];
1740     flimit=(qstep*3)>>2;
1741     oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
1742      qstep,flimit,variance,variance+nhfrags);
1743     variance++;
1744     dc_qi++;
1745     for(x=8;x<width;x+=8){
1746       qstep=_dec->pp_dc_scale[*dc_qi];
1747       flimit=(qstep*3)>>2;
1748       oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
1749        qstep,flimit,variance,variance+nhfrags);
1750       oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
1751        qstep,flimit,variance-1);
1752       variance++;
1753       dc_qi++;
1754     }
1755     dst+=dst_ystride<<3;
1756     src+=src_ystride<<3;
1757   }
1758   /*And finally, handle the last row in the frame, if it's in the range.*/
1759   if(!notdone){
1760     int height;
1761     height=_dst->height;
1762     for(;y<height;y++){
1763       memcpy(dst,src,width*sizeof(dst[0]));
1764       dst+=dst_ystride;
1765       src+=src_ystride;
1766     }
1767     /*Filter the last row of vertical block edges.*/
1768     dc_qi++;
1769     for(x=8;x<width;x+=8){
1770       qstep=_dec->pp_dc_scale[*dc_qi++];
1771       flimit=(qstep*3)>>2;
1772       oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
1773        qstep,flimit,variance++);
1774     }
1775   }
1776 }
1777 
oc_dering_block(unsigned char * _idata,int _ystride,int _b,int _dc_scale,int _sharp_mod,int _strong)1778 static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
1779  int _dc_scale,int _sharp_mod,int _strong){
1780   static const unsigned char OC_MOD_MAX[2]={24,32};
1781   static const unsigned char OC_MOD_SHIFT[2]={1,0};
1782   const unsigned char *psrc;
1783   const unsigned char *src;
1784   const unsigned char *nsrc;
1785   unsigned char       *dst;
1786   int                  vmod[72];
1787   int                  hmod[72];
1788   int                  mod_hi;
1789   int                  by;
1790   int                  bx;
1791   mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
1792   dst=_idata;
1793   src=dst;
1794   psrc=src-(_ystride&-!(_b&4));
1795   for(by=0;by<9;by++){
1796     for(bx=0;bx<8;bx++){
1797       int mod;
1798       mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
1799       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1800     }
1801     psrc=src;
1802     src+=_ystride&-(!(_b&8)|by<7);
1803   }
1804   nsrc=dst;
1805   psrc=dst-!(_b&1);
1806   for(bx=0;bx<9;bx++){
1807     src=nsrc;
1808     for(by=0;by<8;by++){
1809       int mod;
1810       mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
1811       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
1812       psrc+=_ystride;
1813       src+=_ystride;
1814     }
1815     psrc=nsrc;
1816     nsrc+=!(_b&2)|bx<7;
1817   }
1818   src=dst;
1819   psrc=src-(_ystride&-!(_b&4));
1820   nsrc=src+_ystride;
1821   for(by=0;by<8;by++){
1822     int a;
1823     int b;
1824     int w;
1825     a=128;
1826     b=64;
1827     w=hmod[by];
1828     a-=w;
1829     b+=w**(src-!(_b&1));
1830     w=vmod[by<<3];
1831     a-=w;
1832     b+=w*psrc[0];
1833     w=vmod[by+1<<3];
1834     a-=w;
1835     b+=w*nsrc[0];
1836     w=hmod[(1<<3)+by];
1837     a-=w;
1838     b+=w*src[1];
1839     dst[0]=OC_CLAMP255(a*src[0]+b>>7);
1840     for(bx=1;bx<7;bx++){
1841       a=128;
1842       b=64;
1843       w=hmod[(bx<<3)+by];
1844       a-=w;
1845       b+=w*src[bx-1];
1846       w=vmod[(by<<3)+bx];
1847       a-=w;
1848       b+=w*psrc[bx];
1849       w=vmod[(by+1<<3)+bx];
1850       a-=w;
1851       b+=w*nsrc[bx];
1852       w=hmod[(bx+1<<3)+by];
1853       a-=w;
1854       b+=w*src[bx+1];
1855       dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
1856     }
1857     a=128;
1858     b=64;
1859     w=hmod[(7<<3)+by];
1860     a-=w;
1861     b+=w*src[6];
1862     w=vmod[(by<<3)+7];
1863     a-=w;
1864     b+=w*psrc[7];
1865     w=vmod[(by+1<<3)+7];
1866     a-=w;
1867     b+=w*nsrc[7];
1868     w=hmod[(8<<3)+by];
1869     a-=w;
1870     b+=w*src[7+!(_b&2)];
1871     dst[7]=OC_CLAMP255(a*src[7]+b>>7);
1872     dst+=_ystride;
1873     psrc=src;
1874     src=nsrc;
1875     nsrc+=_ystride&-(!(_b&8)|by<6);
1876   }
1877 }
1878 
1879 #define OC_DERING_THRESH1 (384)
1880 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1881 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1882 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1883 
oc_dec_dering_frag_rows(oc_dec_ctx * _dec,th_img_plane * _img,int _pli,int _fragy0,int _fragy_end)1884 static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
1885  int _pli,int _fragy0,int _fragy_end){
1886   th_img_plane      *iplane;
1887   oc_fragment_plane *fplane;
1888   oc_fragment       *frag;
1889   int               *variance;
1890   unsigned char     *idata;
1891   ptrdiff_t          froffset;
1892   int                ystride;
1893   int                nhfrags;
1894   int                sthresh;
1895   int                strong;
1896   int                y_end;
1897   int                width;
1898   int                height;
1899   int                y;
1900   int                x;
1901   iplane=_img+_pli;
1902   fplane=_dec->state.fplanes+_pli;
1903   nhfrags=fplane->nhfrags;
1904   froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
1905   variance=_dec->variances+froffset;
1906   frag=_dec->state.frags+froffset;
1907   strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
1908   sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
1909   y=_fragy0<<3;
1910   ystride=iplane->stride;
1911   idata=iplane->data+y*(ptrdiff_t)ystride;
1912   y_end=_fragy_end<<3;
1913   width=iplane->width;
1914   height=iplane->height;
1915   for(;y<y_end;y+=8){
1916     for(x=0;x<width;x+=8){
1917       int b;
1918       int qi;
1919       int var;
1920       qi=_dec->state.qis[frag->qii];
1921       var=*variance;
1922       b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
1923       if(strong&&var>sthresh){
1924         oc_dering_block(idata+x,ystride,b,
1925          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1926         if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
1927          !(b&2)&&variance[1]>OC_DERING_THRESH4||
1928          !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
1929          !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
1930           oc_dering_block(idata+x,ystride,b,
1931            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1932           oc_dering_block(idata+x,ystride,b,
1933            _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1934         }
1935       }
1936       else if(var>OC_DERING_THRESH2){
1937         oc_dering_block(idata+x,ystride,b,
1938          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
1939       }
1940       else if(var>OC_DERING_THRESH1){
1941         oc_dering_block(idata+x,ystride,b,
1942          _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
1943       }
1944       frag++;
1945       variance++;
1946     }
1947     idata+=ystride<<3;
1948   }
1949 }
1950 
1951 
1952 
th_decode_alloc(const th_info * _info,const th_setup_info * _setup)1953 th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
1954   oc_dec_ctx *dec;
1955   if(_info==NULL||_setup==NULL)return NULL;
1956   dec=_ogg_malloc(sizeof(*dec));
1957   if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
1958     _ogg_free(dec);
1959     return NULL;
1960   }
1961   dec->state.curframe_num=0;
1962   return dec;
1963 }
1964 
th_decode_free(th_dec_ctx * _dec)1965 void th_decode_free(th_dec_ctx *_dec){
1966   if(_dec!=NULL){
1967     oc_dec_clear(_dec);
1968     _ogg_free(_dec);
1969   }
1970 }
1971 
th_decode_ctl(th_dec_ctx * _dec,int _req,void * _buf,size_t _buf_sz)1972 int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
1973  size_t _buf_sz){
1974   switch(_req){
1975   case TH_DECCTL_GET_PPLEVEL_MAX:{
1976     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1977     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1978     (*(int *)_buf)=OC_PP_LEVEL_MAX;
1979     return 0;
1980   }break;
1981   case TH_DECCTL_SET_PPLEVEL:{
1982     int pp_level;
1983     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1984     if(_buf_sz!=sizeof(int))return TH_EINVAL;
1985     pp_level=*(int *)_buf;
1986     if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
1987     _dec->pp_level=pp_level;
1988     return 0;
1989   }break;
1990   case TH_DECCTL_SET_GRANPOS:{
1991     ogg_int64_t granpos;
1992     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
1993     if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
1994     granpos=*(ogg_int64_t *)_buf;
1995     if(granpos<0)return TH_EINVAL;
1996     _dec->state.granpos=granpos;
1997     _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
1998      -_dec->state.granpos_bias;
1999     _dec->state.curframe_num=_dec->state.keyframe_num
2000      +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
2001     return 0;
2002   }break;
2003   case TH_DECCTL_SET_STRIPE_CB:{
2004     th_stripe_callback *cb;
2005     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2006     if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
2007     cb=(th_stripe_callback *)_buf;
2008     _dec->stripe_cb.ctx=cb->ctx;
2009     _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
2010     return 0;
2011   }break;
2012 #ifdef HAVE_CAIRO
2013   case TH_DECCTL_SET_TELEMETRY_MBMODE:{
2014     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2015     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2016     _dec->telemetry=1;
2017     _dec->telemetry_mbmode=*(int *)_buf;
2018     return 0;
2019   }break;
2020   case TH_DECCTL_SET_TELEMETRY_MV:{
2021     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2022     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2023     _dec->telemetry=1;
2024     _dec->telemetry_mv=*(int *)_buf;
2025     return 0;
2026   }break;
2027   case TH_DECCTL_SET_TELEMETRY_QI:{
2028     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2029     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2030     _dec->telemetry=1;
2031     _dec->telemetry_qi=*(int *)_buf;
2032     return 0;
2033   }break;
2034   case TH_DECCTL_SET_TELEMETRY_BITS:{
2035     if(_dec==NULL||_buf==NULL)return TH_EFAULT;
2036     if(_buf_sz!=sizeof(int))return TH_EINVAL;
2037     _dec->telemetry=1;
2038     _dec->telemetry_bits=*(int *)_buf;
2039     return 0;
2040   }break;
2041 #endif
2042   default:return TH_EIMPL;
2043   }
2044 }
2045 
2046 /*We're decoding an INTER frame, but have no initialized reference
2047    buffers (i.e., decoding did not start on a key frame).
2048   We initialize them to a solid gray here.*/
oc_dec_init_dummy_frame(th_dec_ctx * _dec)2049 static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
2050   th_info *info;
2051   size_t   yplane_sz;
2052   size_t   cplane_sz;
2053   int      yhstride;
2054   int      yheight;
2055   int      chstride;
2056   int      cheight;
2057   _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
2058   _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
2059   _dec->state.ref_frame_idx[OC_FRAME_SELF]=1;
2060   info=&_dec->state.info;
2061   yhstride=info->frame_width+2*OC_UMV_PADDING;
2062   yheight=info->frame_height+2*OC_UMV_PADDING;
2063   chstride=yhstride>>!(info->pixel_fmt&1);
2064   cheight=yheight>>!(info->pixel_fmt&2);
2065   yplane_sz=yhstride*(size_t)yheight;
2066   cplane_sz=chstride*(size_t)cheight;
2067   memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
2068 }
2069 
th_decode_packetin(th_dec_ctx * _dec,const ogg_packet * _op,ogg_int64_t * _granpos)2070 int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
2071  ogg_int64_t *_granpos){
2072   int ret;
2073   if(_dec==NULL||_op==NULL)return TH_EFAULT;
2074   /*A completely empty packet indicates a dropped frame and is treated exactly
2075      like an inter frame with no coded blocks.
2076     Only proceed if we have a non-empty packet.*/
2077   if(_op->bytes!=0){
2078     oc_dec_pipeline_state pipe;
2079     th_ycbcr_buffer       stripe_buf;
2080     int                   stripe_fragy;
2081     int                   refi;
2082     int                   pli;
2083     int                   notstart;
2084     int                   notdone;
2085     oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
2086 #if defined(HAVE_CAIRO)
2087     _dec->telemetry_frame_bytes=_op->bytes;
2088 #endif
2089     ret=oc_dec_frame_header_unpack(_dec);
2090     if(ret<0)return ret;
2091     /*Select a free buffer to use for the reconstructed version of this
2092        frame.*/
2093     if(_dec->state.frame_type!=OC_INTRA_FRAME&&
2094      (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2095      _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
2096       /*No reference frames yet!*/
2097       oc_dec_init_dummy_frame(_dec);
2098       refi=_dec->state.ref_frame_idx[OC_FRAME_SELF];
2099     }
2100     else{
2101       for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
2102        refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
2103       _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2104     }
2105     if(_dec->state.frame_type==OC_INTRA_FRAME){
2106       oc_dec_mark_all_intra(_dec);
2107       _dec->state.keyframe_num=_dec->state.curframe_num;
2108 #if defined(HAVE_CAIRO)
2109       _dec->telemetry_coding_bytes=
2110        _dec->telemetry_mode_bytes=
2111        _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2112 #endif
2113     }
2114     else{
2115       oc_dec_coded_flags_unpack(_dec);
2116 #if defined(HAVE_CAIRO)
2117       _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
2118 #endif
2119       oc_dec_mb_modes_unpack(_dec);
2120 #if defined(HAVE_CAIRO)
2121       _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
2122 #endif
2123       oc_dec_mv_unpack_and_frag_modes_fill(_dec);
2124 #if defined(HAVE_CAIRO)
2125       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
2126 #endif
2127     }
2128     oc_dec_block_qis_unpack(_dec);
2129 #if defined(HAVE_CAIRO)
2130     _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
2131 #endif
2132     oc_dec_residual_tokens_unpack(_dec);
2133     /*Update granule position.
2134       This must be done before the striped decode callbacks so that the
2135        application knows what to do with the frame data.*/
2136     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2137      _dec->state.info.keyframe_granule_shift)
2138      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2139     _dec->state.curframe_num++;
2140     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2141     /*All of the rest of the operations -- DC prediction reversal,
2142        reconstructing coded fragments, copying uncoded fragments, loop
2143        filtering, extending borders, and out-of-loop post-processing -- should
2144        be pipelined.
2145       I.e., DC prediction reversal, reconstruction, and uncoded fragment
2146        copying are done for one or two super block rows, then loop filtering is
2147        run as far as it can, then bordering copying, then post-processing.
2148       For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2149        block rows, and one chroma.
2150       Otherwise, an MCU consists of one super block row from each plane.
2151       Inside each MCU, we perform all of the steps on one color plane before
2152        moving on to the next.
2153       After reconstruction, the additional filtering stages introduce a delay
2154        since they need some pixels from the next fragment row.
2155       Thus the actual number of decoded rows available is slightly smaller for
2156        the first MCU, and slightly larger for the last.
2157 
2158       This entire process allows us to operate on the data while it is still in
2159        cache, resulting in big performance improvements.
2160       An application callback allows further application processing (blitting
2161        to video memory, color conversion, etc.) to also use the data while it's
2162        in cache.*/
2163     oc_dec_pipeline_init(_dec,&pipe);
2164     oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
2165     notstart=0;
2166     notdone=1;
2167     for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
2168       int avail_fragy0;
2169       int avail_fragy_end;
2170       avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
2171       notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
2172       for(pli=0;pli<3;pli++){
2173         oc_fragment_plane *fplane;
2174         int                frag_shift;
2175         int                pp_offset;
2176         int                sdelay;
2177         int                edelay;
2178         fplane=_dec->state.fplanes+pli;
2179         /*Compute the first and last fragment row of the current MCU for this
2180            plane.*/
2181         frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
2182         pipe.fragy0[pli]=stripe_fragy>>frag_shift;
2183         pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
2184          pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
2185         oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
2186         oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
2187         sdelay=edelay=0;
2188         if(pipe.loop_filter){
2189           sdelay+=notstart;
2190           edelay+=notdone;
2191           oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
2192            refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2193         }
2194         /*To fill the borders, we have an additional two pixel delay, since a
2195            fragment in the next row could filter its top edge, using two pixels
2196            from a fragment in this row.
2197           But there's no reason to delay a full fragment between the two.*/
2198         oc_state_borders_fill_rows(&_dec->state,refi,pli,
2199          (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
2200          (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
2201         /*Out-of-loop post-processing.*/
2202         pp_offset=3*(pli!=0);
2203         if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
2204           /*Perform de-blocking in one plane.*/
2205           sdelay+=notstart;
2206           edelay+=notdone;
2207           oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
2208            _dec->state.ref_frame_bufs[refi],pli,
2209            pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2210           if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
2211             /*Perform de-ringing in one plane.*/
2212             sdelay+=notstart;
2213             edelay+=notdone;
2214             oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
2215              pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
2216           }
2217         }
2218         /*If no post-processing is done, we still need to delay a row for the
2219            loop filter, thanks to the strange filtering order VP3 chose.*/
2220         else if(pipe.loop_filter){
2221           sdelay+=notstart;
2222           edelay+=notdone;
2223         }
2224         /*Compute the intersection of the available rows in all planes.
2225           If chroma is sub-sampled, the effect of each of its delays is
2226            doubled, but luma might have more post-processing filters enabled
2227            than chroma, so we don't know up front which one is the limiting
2228            factor.*/
2229         avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
2230         avail_fragy_end=OC_MINI(avail_fragy_end,
2231          pipe.fragy_end[pli]-edelay<<frag_shift);
2232       }
2233       if(_dec->stripe_cb.stripe_decoded!=NULL){
2234         /*The callback might want to use the FPU, so let's make sure they can.
2235           We violate all kinds of ABI restrictions by not doing this until
2236            now, but none of them actually matter since we don't use floating
2237            point ourselves.*/
2238         oc_restore_fpu(&_dec->state);
2239         /*Make the callback, ensuring we flip the sense of the "start" and
2240            "end" of the available region upside down.*/
2241         (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
2242          _dec->state.fplanes[0].nvfrags-avail_fragy_end,
2243          _dec->state.fplanes[0].nvfrags-avail_fragy0);
2244       }
2245       notstart=1;
2246     }
2247     /*Finish filling in the reference frame borders.*/
2248     for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
2249     /*Update the reference frame indices.*/
2250     if(_dec->state.frame_type==OC_INTRA_FRAME){
2251       /*The new frame becomes both the previous and gold reference frames.*/
2252       _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
2253        _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2254        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2255     }
2256     else{
2257       /*Otherwise, just replace the previous reference frame.*/
2258       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
2259        _dec->state.ref_frame_idx[OC_FRAME_SELF];
2260     }
2261     /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
2262        gamma values, if nothing else).*/
2263     oc_restore_fpu(&_dec->state);
2264 #if defined(OC_DUMP_IMAGES)
2265     /*Don't dump images for dropped frames.*/
2266     oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
2267 #endif
2268     return 0;
2269   }
2270   else{
2271     if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
2272      _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){
2273       int refi;
2274       /*No reference frames yet!*/
2275       oc_dec_init_dummy_frame(_dec);
2276       refi=_dec->state.ref_frame_idx[OC_FRAME_PREV];
2277       _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
2278       memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi],
2279        sizeof(_dec->pp_frame_buf[0])*3);
2280     }
2281     /*Just update the granule position and return.*/
2282     _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
2283      _dec->state.info.keyframe_granule_shift)
2284      +(_dec->state.curframe_num-_dec->state.keyframe_num);
2285     _dec->state.curframe_num++;
2286     if(_granpos!=NULL)*_granpos=_dec->state.granpos;
2287     return TH_DUPFRAME;
2288   }
2289 }
2290 
th_decode_ycbcr_out(th_dec_ctx * _dec,th_ycbcr_buffer _ycbcr)2291 int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
2292   if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
2293   oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
2294 #if defined(HAVE_CAIRO)
2295   /*If telemetry ioctls are active, we need to draw to the output buffer.
2296     Stuff the plane into cairo.*/
2297   if(_dec->telemetry){
2298     cairo_surface_t *cs;
2299     unsigned char   *data;
2300     unsigned char   *y_row;
2301     unsigned char   *u_row;
2302     unsigned char   *v_row;
2303     unsigned char   *rgb_row;
2304     int              cstride;
2305     int              w;
2306     int              h;
2307     int              x;
2308     int              y;
2309     int              hdec;
2310     int              vdec;
2311     w=_ycbcr[0].width;
2312     h=_ycbcr[0].height;
2313     hdec=!(_dec->state.info.pixel_fmt&1);
2314     vdec=!(_dec->state.info.pixel_fmt&2);
2315     /*Lazy data buffer init.
2316       We could try to re-use the post-processing buffer, which would save
2317        memory, but complicate the allocation logic there.
2318       I don't think anyone cares about memory usage when using telemetry; it is
2319        not meant for embedded devices.*/
2320     if(_dec->telemetry_frame_data==NULL){
2321       _dec->telemetry_frame_data=_ogg_malloc(
2322        (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
2323       if(_dec->telemetry_frame_data==NULL)return 0;
2324     }
2325     cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
2326     /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
2327     data=cairo_image_surface_get_data(cs);
2328     if(data==NULL){
2329       cairo_surface_destroy(cs);
2330       return 0;
2331     }
2332     cstride=cairo_image_surface_get_stride(cs);
2333     y_row=_ycbcr[0].data;
2334     u_row=_ycbcr[1].data;
2335     v_row=_ycbcr[2].data;
2336     rgb_row=data;
2337     for(y=0;y<h;y++){
2338       for(x=0;x<w;x++){
2339         int r;
2340         int g;
2341         int b;
2342         r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
2343         g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
2344          -2672387*v_row[x>>hdec]+447306710)/3287200;
2345         b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
2346         rgb_row[4*x+0]=OC_CLAMP255(b);
2347         rgb_row[4*x+1]=OC_CLAMP255(g);
2348         rgb_row[4*x+2]=OC_CLAMP255(r);
2349       }
2350       y_row+=_ycbcr[0].stride;
2351       u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
2352       v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
2353       rgb_row+=cstride;
2354     }
2355     /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
2356     {
2357       cairo_t           *c;
2358       const oc_fragment *frags;
2359       oc_mv             *frag_mvs;
2360       const signed char *mb_modes;
2361       oc_mb_map         *mb_maps;
2362       size_t             nmbs;
2363       size_t             mbi;
2364       int                row2;
2365       int                col2;
2366       int                qim[3]={0,0,0};
2367       if(_dec->state.nqis==2){
2368         int bqi;
2369         bqi=_dec->state.qis[0];
2370         if(_dec->state.qis[1]>bqi)qim[1]=1;
2371         if(_dec->state.qis[1]<bqi)qim[1]=-1;
2372       }
2373       if(_dec->state.nqis==3){
2374         int bqi;
2375         int cqi;
2376         int dqi;
2377         bqi=_dec->state.qis[0];
2378         cqi=_dec->state.qis[1];
2379         dqi=_dec->state.qis[2];
2380         if(cqi>bqi&&dqi>bqi){
2381           if(dqi>cqi){
2382             qim[1]=1;
2383             qim[2]=2;
2384           }
2385           else{
2386             qim[1]=2;
2387             qim[2]=1;
2388           }
2389         }
2390         else if(cqi<bqi&&dqi<bqi){
2391           if(dqi<cqi){
2392             qim[1]=-1;
2393             qim[2]=-2;
2394           }
2395           else{
2396             qim[1]=-2;
2397             qim[2]=-1;
2398           }
2399         }
2400         else{
2401           if(cqi<bqi)qim[1]=-1;
2402           else qim[1]=1;
2403           if(dqi<bqi)qim[2]=-1;
2404           else qim[2]=1;
2405         }
2406       }
2407       c=cairo_create(cs);
2408       frags=_dec->state.frags;
2409       frag_mvs=_dec->state.frag_mvs;
2410       mb_modes=_dec->state.mb_modes;
2411       mb_maps=_dec->state.mb_maps;
2412       nmbs=_dec->state.nmbs;
2413       row2=0;
2414       col2=0;
2415       for(mbi=0;mbi<nmbs;mbi++){
2416         float x;
2417         float y;
2418         int   bi;
2419         y=h-(row2+((col2+1>>1)&1))*16-16;
2420         x=(col2>>1)*16;
2421         cairo_set_line_width(c,1.);
2422         /*Keyframe (all intra) red box.*/
2423         if(_dec->state.frame_type==OC_INTRA_FRAME){
2424           if(_dec->telemetry_mbmode&0x02){
2425             cairo_set_source_rgba(c,1.,0,0,.5);
2426             cairo_rectangle(c,x+2.5,y+2.5,11,11);
2427             cairo_stroke_preserve(c);
2428             cairo_set_source_rgba(c,1.,0,0,.25);
2429             cairo_fill(c);
2430           }
2431         }
2432         else{
2433           const signed char *frag_mv;
2434           ptrdiff_t          fragi;
2435           for(bi=0;bi<4;bi++){
2436             fragi=mb_maps[mbi][0][bi];
2437             if(fragi>=0&&frags[fragi].coded){
2438               frag_mv=frag_mvs[fragi];
2439               break;
2440             }
2441           }
2442           if(bi<4){
2443             switch(mb_modes[mbi]){
2444               case OC_MODE_INTRA:{
2445                 if(_dec->telemetry_mbmode&0x02){
2446                   cairo_set_source_rgba(c,1.,0,0,.5);
2447                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2448                   cairo_stroke_preserve(c);
2449                   cairo_set_source_rgba(c,1.,0,0,.25);
2450                   cairo_fill(c);
2451                 }
2452               }break;
2453               case OC_MODE_INTER_NOMV:{
2454                 if(_dec->telemetry_mbmode&0x01){
2455                   cairo_set_source_rgba(c,0,0,1.,.5);
2456                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2457                   cairo_stroke_preserve(c);
2458                   cairo_set_source_rgba(c,0,0,1.,.25);
2459                   cairo_fill(c);
2460                 }
2461               }break;
2462               case OC_MODE_INTER_MV:{
2463                 if(_dec->telemetry_mbmode&0x04){
2464                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2465                   cairo_set_source_rgba(c,0,1.,0,.5);
2466                   cairo_stroke(c);
2467                 }
2468                 if(_dec->telemetry_mv&0x04){
2469                   cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2470                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2471                   cairo_set_line_width(c,3.);
2472                   cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2473                   cairo_stroke_preserve(c);
2474                   cairo_set_line_width(c,2.);
2475                   cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2476                   cairo_stroke_preserve(c);
2477                   cairo_set_line_width(c,1.);
2478                   cairo_line_to(c,x+8,y+8);
2479                   cairo_stroke(c);
2480                 }
2481               }break;
2482               case OC_MODE_INTER_MV_LAST:{
2483                 if(_dec->telemetry_mbmode&0x08){
2484                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2485                   cairo_set_source_rgba(c,0,1.,0,.5);
2486                   cairo_move_to(c,x+13.5,y+2.5);
2487                   cairo_line_to(c,x+2.5,y+8);
2488                   cairo_line_to(c,x+13.5,y+13.5);
2489                   cairo_stroke(c);
2490                 }
2491                 if(_dec->telemetry_mv&0x08){
2492                   cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2493                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2494                   cairo_set_line_width(c,3.);
2495                   cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2496                   cairo_stroke_preserve(c);
2497                   cairo_set_line_width(c,2.);
2498                   cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2499                   cairo_stroke_preserve(c);
2500                   cairo_set_line_width(c,1.);
2501                   cairo_line_to(c,x+8,y+8);
2502                   cairo_stroke(c);
2503                 }
2504               }break;
2505               case OC_MODE_INTER_MV_LAST2:{
2506                 if(_dec->telemetry_mbmode&0x10){
2507                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2508                   cairo_set_source_rgba(c,0,1.,0,.5);
2509                   cairo_move_to(c,x+8,y+2.5);
2510                   cairo_line_to(c,x+2.5,y+8);
2511                   cairo_line_to(c,x+8,y+13.5);
2512                   cairo_move_to(c,x+13.5,y+2.5);
2513                   cairo_line_to(c,x+8,y+8);
2514                   cairo_line_to(c,x+13.5,y+13.5);
2515                   cairo_stroke(c);
2516                 }
2517                 if(_dec->telemetry_mv&0x10){
2518                   cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2519                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2520                   cairo_set_line_width(c,3.);
2521                   cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2522                   cairo_stroke_preserve(c);
2523                   cairo_set_line_width(c,2.);
2524                   cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2525                   cairo_stroke_preserve(c);
2526                   cairo_set_line_width(c,1.);
2527                   cairo_line_to(c,x+8,y+8);
2528                   cairo_stroke(c);
2529                 }
2530               }break;
2531               case OC_MODE_GOLDEN_NOMV:{
2532                 if(_dec->telemetry_mbmode&0x20){
2533                   cairo_set_source_rgba(c,1.,1.,0,.5);
2534                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2535                   cairo_stroke_preserve(c);
2536                   cairo_set_source_rgba(c,1.,1.,0,.25);
2537                   cairo_fill(c);
2538                 }
2539               }break;
2540               case OC_MODE_GOLDEN_MV:{
2541                 if(_dec->telemetry_mbmode&0x40){
2542                   cairo_rectangle(c,x+2.5,y+2.5,11,11);
2543                   cairo_set_source_rgba(c,1.,1.,0,.5);
2544                   cairo_stroke(c);
2545                 }
2546                 if(_dec->telemetry_mv&0x40){
2547                   cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
2548                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2549                   cairo_set_line_width(c,3.);
2550                   cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
2551                   cairo_stroke_preserve(c);
2552                   cairo_set_line_width(c,2.);
2553                   cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
2554                   cairo_stroke_preserve(c);
2555                   cairo_set_line_width(c,1.);
2556                   cairo_line_to(c,x+8,y+8);
2557                   cairo_stroke(c);
2558                 }
2559               }break;
2560               case OC_MODE_INTER_MV_FOUR:{
2561                 if(_dec->telemetry_mbmode&0x80){
2562                   cairo_rectangle(c,x+2.5,y+2.5,4,4);
2563                   cairo_rectangle(c,x+9.5,y+2.5,4,4);
2564                   cairo_rectangle(c,x+2.5,y+9.5,4,4);
2565                   cairo_rectangle(c,x+9.5,y+9.5,4,4);
2566                   cairo_set_source_rgba(c,0,1.,0,.5);
2567                   cairo_stroke(c);
2568                 }
2569                 /*4mv is odd, coded in raster order.*/
2570                 fragi=mb_maps[mbi][0][0];
2571                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2572                   frag_mv=frag_mvs[fragi];
2573                   cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
2574                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2575                   cairo_set_line_width(c,3.);
2576                   cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
2577                   cairo_stroke_preserve(c);
2578                   cairo_set_line_width(c,2.);
2579                   cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
2580                   cairo_stroke_preserve(c);
2581                   cairo_set_line_width(c,1.);
2582                   cairo_line_to(c,x+4,y+12);
2583                   cairo_stroke(c);
2584                 }
2585                 fragi=mb_maps[mbi][0][1];
2586                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2587                   frag_mv=frag_mvs[fragi];
2588                   cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
2589                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2590                   cairo_set_line_width(c,3.);
2591                   cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
2592                   cairo_stroke_preserve(c);
2593                   cairo_set_line_width(c,2.);
2594                   cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
2595                   cairo_stroke_preserve(c);
2596                   cairo_set_line_width(c,1.);
2597                   cairo_line_to(c,x+12,y+12);
2598                   cairo_stroke(c);
2599                 }
2600                 fragi=mb_maps[mbi][0][2];
2601                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2602                   frag_mv=frag_mvs[fragi];
2603                   cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
2604                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2605                   cairo_set_line_width(c,3.);
2606                   cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
2607                   cairo_stroke_preserve(c);
2608                   cairo_set_line_width(c,2.);
2609                   cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
2610                   cairo_stroke_preserve(c);
2611                   cairo_set_line_width(c,1.);
2612                   cairo_line_to(c,x+4,y+4);
2613                   cairo_stroke(c);
2614                 }
2615                 fragi=mb_maps[mbi][0][3];
2616                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
2617                   frag_mv=frag_mvs[fragi];
2618                   cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
2619                   cairo_set_source_rgba(c,1.,1.,1.,.9);
2620                   cairo_set_line_width(c,3.);
2621                   cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
2622                   cairo_stroke_preserve(c);
2623                   cairo_set_line_width(c,2.);
2624                   cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
2625                   cairo_stroke_preserve(c);
2626                   cairo_set_line_width(c,1.);
2627                   cairo_line_to(c,x+12,y+4);
2628                   cairo_stroke(c);
2629                 }
2630               }break;
2631             }
2632           }
2633         }
2634         /*qii illustration.*/
2635         if(_dec->telemetry_qi&0x2){
2636           cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
2637           for(bi=0;bi<4;bi++){
2638             ptrdiff_t fragi;
2639             int       qiv;
2640             int       xp;
2641             int       yp;
2642             xp=x+(bi&1)*8;
2643             yp=y+8-(bi&2)*4;
2644             fragi=mb_maps[mbi][0][bi];
2645             if(fragi>=0&&frags[fragi].coded){
2646               qiv=qim[frags[fragi].qii];
2647               cairo_set_line_width(c,3.);
2648               cairo_set_source_rgba(c,0.,0.,0.,.5);
2649               switch(qiv){
2650                 /*Double plus:*/
2651                 case 2:{
2652                   if((bi&1)^((bi&2)>>1)){
2653                     cairo_move_to(c,xp+2.5,yp+1.5);
2654                     cairo_line_to(c,xp+2.5,yp+3.5);
2655                     cairo_move_to(c,xp+1.5,yp+2.5);
2656                     cairo_line_to(c,xp+3.5,yp+2.5);
2657                     cairo_move_to(c,xp+5.5,yp+4.5);
2658                     cairo_line_to(c,xp+5.5,yp+6.5);
2659                     cairo_move_to(c,xp+4.5,yp+5.5);
2660                     cairo_line_to(c,xp+6.5,yp+5.5);
2661                     cairo_stroke_preserve(c);
2662                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2663                   }
2664                   else{
2665                     cairo_move_to(c,xp+5.5,yp+1.5);
2666                     cairo_line_to(c,xp+5.5,yp+3.5);
2667                     cairo_move_to(c,xp+4.5,yp+2.5);
2668                     cairo_line_to(c,xp+6.5,yp+2.5);
2669                     cairo_move_to(c,xp+2.5,yp+4.5);
2670                     cairo_line_to(c,xp+2.5,yp+6.5);
2671                     cairo_move_to(c,xp+1.5,yp+5.5);
2672                     cairo_line_to(c,xp+3.5,yp+5.5);
2673                     cairo_stroke_preserve(c);
2674                     cairo_set_source_rgba(c,0.,1.,1.,1.);
2675                   }
2676                 }break;
2677                 /*Double minus:*/
2678                 case -2:{
2679                   cairo_move_to(c,xp+2.5,yp+2.5);
2680                   cairo_line_to(c,xp+5.5,yp+2.5);
2681                   cairo_move_to(c,xp+2.5,yp+5.5);
2682                   cairo_line_to(c,xp+5.5,yp+5.5);
2683                   cairo_stroke_preserve(c);
2684                   cairo_set_source_rgba(c,1.,1.,1.,1.);
2685                 }break;
2686                 /*Plus:*/
2687                 case 1:{
2688                   if(bi&2==0)yp-=2;
2689                   if(bi&1==0)xp-=2;
2690                   cairo_move_to(c,xp+4.5,yp+2.5);
2691                   cairo_line_to(c,xp+4.5,yp+6.5);
2692                   cairo_move_to(c,xp+2.5,yp+4.5);
2693                   cairo_line_to(c,xp+6.5,yp+4.5);
2694                   cairo_stroke_preserve(c);
2695                   cairo_set_source_rgba(c,.1,1.,.3,1.);
2696                   break;
2697                 }
2698                 /*Fall through.*/
2699                 /*Minus:*/
2700                 case -1:{
2701                   cairo_move_to(c,xp+2.5,yp+4.5);
2702                   cairo_line_to(c,xp+6.5,yp+4.5);
2703                   cairo_stroke_preserve(c);
2704                   cairo_set_source_rgba(c,1.,.3,.1,1.);
2705                 }break;
2706                 default:continue;
2707               }
2708               cairo_set_line_width(c,1.);
2709               cairo_stroke(c);
2710             }
2711           }
2712         }
2713         col2++;
2714         if((col2>>1)>=_dec->state.nhmbs){
2715           col2=0;
2716           row2+=2;
2717         }
2718       }
2719       /*Bit usage indicator[s]:*/
2720       if(_dec->telemetry_bits){
2721         int widths[6];
2722         int fpsn;
2723         int fpsd;
2724         int mult;
2725         int fullw;
2726         int padw;
2727         int i;
2728         fpsn=_dec->state.info.fps_numerator;
2729         fpsd=_dec->state.info.fps_denominator;
2730         mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
2731         fullw=250.f*h*fpsd*mult/fpsn;
2732         padw=w-24;
2733         /*Header and coded block bits.*/
2734         if(_dec->telemetry_frame_bytes<0||
2735          _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
2736           _dec->telemetry_frame_bytes=0;
2737         }
2738         if(_dec->telemetry_coding_bytes<0||
2739          _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
2740           _dec->telemetry_coding_bytes=0;
2741         }
2742         if(_dec->telemetry_mode_bytes<0||
2743          _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
2744           _dec->telemetry_mode_bytes=0;
2745         }
2746         if(_dec->telemetry_mv_bytes<0||
2747          _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
2748           _dec->telemetry_mv_bytes=0;
2749         }
2750         if(_dec->telemetry_qi_bytes<0||
2751          _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
2752           _dec->telemetry_qi_bytes=0;
2753         }
2754         if(_dec->telemetry_dc_bytes<0||
2755          _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
2756           _dec->telemetry_dc_bytes=0;
2757         }
2758         widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
2759         widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
2760         widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
2761         widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
2762         widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
2763         widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
2764         for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
2765         cairo_set_source_rgba(c,.0,.0,.0,.6);
2766         cairo_rectangle(c,10,h-33,widths[0]+1,5);
2767         cairo_rectangle(c,10,h-29,widths[1]+1,5);
2768         cairo_rectangle(c,10,h-25,widths[2]+1,5);
2769         cairo_rectangle(c,10,h-21,widths[3]+1,5);
2770         cairo_rectangle(c,10,h-17,widths[4]+1,5);
2771         cairo_rectangle(c,10,h-13,widths[5]+1,5);
2772         cairo_fill(c);
2773         cairo_set_source_rgb(c,1,0,0);
2774         cairo_rectangle(c,10.5,h-32.5,widths[0],4);
2775         cairo_fill(c);
2776         cairo_set_source_rgb(c,0,1,0);
2777         cairo_rectangle(c,10.5,h-28.5,widths[1],4);
2778         cairo_fill(c);
2779         cairo_set_source_rgb(c,0,0,1);
2780         cairo_rectangle(c,10.5,h-24.5,widths[2],4);
2781         cairo_fill(c);
2782         cairo_set_source_rgb(c,.6,.4,.0);
2783         cairo_rectangle(c,10.5,h-20.5,widths[3],4);
2784         cairo_fill(c);
2785         cairo_set_source_rgb(c,.3,.3,.3);
2786         cairo_rectangle(c,10.5,h-16.5,widths[4],4);
2787         cairo_fill(c);
2788         cairo_set_source_rgb(c,.5,.5,.8);
2789         cairo_rectangle(c,10.5,h-12.5,widths[5],4);
2790         cairo_fill(c);
2791       }
2792       /*Master qi indicator[s]:*/
2793       if(_dec->telemetry_qi&0x1){
2794         cairo_text_extents_t extents;
2795         char                 buffer[10];
2796         int                  p;
2797         int                  y;
2798         p=0;
2799         y=h-7.5;
2800         if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
2801         buffer[p++]=48+_dec->state.qis[0]%10;
2802         if(_dec->state.nqis>=2){
2803           buffer[p++]=' ';
2804           if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
2805           buffer[p++]=48+_dec->state.qis[1]%10;
2806         }
2807         if(_dec->state.nqis==3){
2808           buffer[p++]=' ';
2809           if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
2810           buffer[p++]=48+_dec->state.qis[2]%10;
2811         }
2812         buffer[p++]='\0';
2813         cairo_select_font_face(c,"sans",
2814          CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
2815         cairo_set_font_size(c,18);
2816         cairo_text_extents(c,buffer,&extents);
2817         cairo_set_source_rgb(c,1,1,1);
2818         cairo_move_to(c,w-extents.x_advance-10,y);
2819         cairo_show_text(c,buffer);
2820         cairo_set_source_rgb(c,0,0,0);
2821         cairo_move_to(c,w-extents.x_advance-10,y);
2822         cairo_text_path(c,buffer);
2823         cairo_set_line_width(c,.8);
2824         cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
2825         cairo_stroke(c);
2826       }
2827       cairo_destroy(c);
2828     }
2829     /*Out of the Cairo plane into the telemetry YUV buffer.*/
2830     _ycbcr[0].data=_dec->telemetry_frame_data;
2831     _ycbcr[0].stride=_ycbcr[0].width;
2832     _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
2833     _ycbcr[1].stride=_ycbcr[1].width;
2834     _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
2835     _ycbcr[2].stride=_ycbcr[2].width;
2836     y_row=_ycbcr[0].data;
2837     u_row=_ycbcr[1].data;
2838     v_row=_ycbcr[2].data;
2839     rgb_row=data;
2840     /*This is one of the few places it's worth handling chroma on a
2841        case-by-case basis.*/
2842     switch(_dec->state.info.pixel_fmt){
2843       case TH_PF_420:{
2844         for(y=0;y<h;y+=2){
2845           unsigned char *y_row2;
2846           unsigned char *rgb_row2;
2847           y_row2=y_row+_ycbcr[0].stride;
2848           rgb_row2=rgb_row+cstride;
2849           for(x=0;x<w;x+=2){
2850             int y;
2851             int u;
2852             int v;
2853             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2854              +24966*rgb_row[4*x+0]+4207500)/255000;
2855             y_row[x]=OC_CLAMP255(y);
2856             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2857              +24966*rgb_row[4*x+4]+4207500)/255000;
2858             y_row[x+1]=OC_CLAMP255(y);
2859             y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
2860              +24966*rgb_row2[4*x+0]+4207500)/255000;
2861             y_row2[x]=OC_CLAMP255(y);
2862             y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
2863              +24966*rgb_row2[4*x+4]+4207500)/255000;
2864             y_row2[x+1]=OC_CLAMP255(y);
2865             u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
2866              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2867              -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
2868              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2869              +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
2870              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
2871             v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
2872              +rgb_row2[4*x+2]+rgb_row2[4*x+6])
2873              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
2874               +rgb_row2[4*x+1]+rgb_row2[4*x+5])
2875              -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
2876               +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
2877             u_row[x>>1]=OC_CLAMP255(u);
2878             v_row[x>>1]=OC_CLAMP255(v);
2879           }
2880           y_row+=_ycbcr[0].stride<<1;
2881           u_row+=_ycbcr[1].stride;
2882           v_row+=_ycbcr[2].stride;
2883           rgb_row+=cstride<<1;
2884         }
2885       }break;
2886       case TH_PF_422:{
2887         for(y=0;y<h;y++){
2888           for(x=0;x<w;x+=2){
2889             int y;
2890             int u;
2891             int v;
2892             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2893              +24966*rgb_row[4*x+0]+4207500)/255000;
2894             y_row[x]=OC_CLAMP255(y);
2895             y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
2896              +24966*rgb_row[4*x+4]+4207500)/255000;
2897             y_row[x+1]=OC_CLAMP255(y);
2898             u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
2899              -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
2900              +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
2901             v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
2902              -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
2903              -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
2904             u_row[x>>1]=OC_CLAMP255(u);
2905             v_row[x>>1]=OC_CLAMP255(v);
2906           }
2907           y_row+=_ycbcr[0].stride;
2908           u_row+=_ycbcr[1].stride;
2909           v_row+=_ycbcr[2].stride;
2910           rgb_row+=cstride;
2911         }
2912       }break;
2913       /*case TH_PF_444:*/
2914       default:{
2915         for(y=0;y<h;y++){
2916           for(x=0;x<w;x++){
2917             int y;
2918             int u;
2919             int v;
2920             y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
2921              +24966*rgb_row[4*x+0]+4207500)/255000;
2922             u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
2923              +99232*rgb_row[4*x+0]+29032005)/225930;
2924             v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
2925              -25536*rgb_row[4*x+0]+45940035)/357510;
2926             y_row[x]=OC_CLAMP255(y);
2927             u_row[x]=OC_CLAMP255(u);
2928             v_row[x]=OC_CLAMP255(v);
2929           }
2930           y_row+=_ycbcr[0].stride;
2931           u_row+=_ycbcr[1].stride;
2932           v_row+=_ycbcr[2].stride;
2933           rgb_row+=cstride;
2934         }
2935       }break;
2936     }
2937     /*Finished.
2938       Destroy the surface.*/
2939     cairo_surface_destroy(cs);
2940   }
2941 #endif
2942   return 0;
2943 }
2944