1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include "state.h"
21 #if defined(OC_DUMP_IMAGES)
22 # include <stdio.h>
23 # include "png.h"
24 #endif
25 
26 /*The function used to fill in the chroma plane motion vectors for a macro
27    block when 4 different motion vectors are specified in the luma plane.
28   This version is for use with chroma decimated in the X and Y directions
29    (4:2:0).
30   _cbmvs: The chroma block-level motion vectors to fill in.
31   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])32 static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
33   int dx;
34   int dy;
35   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
36    +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
37   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
38    +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
39   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
40 }
41 
42 /*The function used to fill in the chroma plane motion vectors for a macro
43    block when 4 different motion vectors are specified in the luma plane.
44   This version is for use with chroma decimated in the Y direction.
45   _cbmvs: The chroma block-level motion vectors to fill in.
46   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])47 static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
48   int dx;
49   int dy;
50   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
51   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
52   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
53   dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
54   dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
55   _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
56 }
57 
58 /*The function used to fill in the chroma plane motion vectors for a macro
59    block when 4 different motion vectors are specified in the luma plane.
60   This version is for use with chroma decimated in the X direction (4:2:2).
61   _cbmvs: The chroma block-level motion vectors to fill in.
62   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])63 static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
64   int dx;
65   int dy;
66   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
67   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
68   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
69   dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
70   dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
71   _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
72 }
73 
74 /*The function used to fill in the chroma plane motion vectors for a macro
75    block when 4 different motion vectors are specified in the luma plane.
76   This version is for use with no chroma decimation (4:4:4).
77   _cbmvs: The chroma block-level motion vectors to fill in.
78   _lmbmv: The luma macro-block level motion vector to fill in for use in
79            prediction.
80   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])81 static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
82   _cbmvs[0]=_lbmvs[0];
83   _cbmvs[1]=_lbmvs[1];
84   _cbmvs[2]=_lbmvs[2];
85   _cbmvs[3]=_lbmvs[3];
86 }
87 
88 /*A table of functions used to fill in the chroma plane motion vectors for a
89    macro block when 4 different motion vectors are specified in the luma
90    plane.*/
91 const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
92   (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
93   (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
94   (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
95   (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
96 };
97 
98 
99 
100 /*Returns the fragment index of the top-left block in a macro block.
101   This can be used to test whether or not the whole macro block is valid.
102   _sb_map: The super block map.
103   _quadi:  The quadrant number.
104   Return: The index of the fragment of the upper left block in the macro
105    block, or -1 if the block lies outside the coded frame.*/
oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi)106 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
107   /*It so happens that under the Hilbert curve ordering described below, the
108      upper-left block in each macro block is at index 0, except in macro block
109      3, where it is at index 2.*/
110   return _sb_map[_quadi][_quadi&_quadi<<1];
111 }
112 
113 /*Fills in the mapping from block positions to fragment numbers for a single
114    color plane.
115   This function also fills in the "valid" flag of each quadrant in the super
116    block flags.
117   _sb_maps:  The array of super block maps for the color plane.
118   _sb_flags: The array of super block flags for the color plane.
119   _frag0:    The index of the first fragment in the plane.
120   _hfrags:   The number of horizontal fragments in a coded frame.
121   _vfrags:   The number of vertical fragments in a coded frame.*/
oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags)122 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
123  oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
124   /*Contains the (macro_block,block) indices for a 4x4 grid of
125      fragments.
126     The pattern is a 4x4 Hilbert space-filling curve.
127     A Hilbert curve has the nice property that as the curve grows larger, its
128      fractal dimension approaches 2.
129     The intuition is that nearby blocks in the curve are also close spatially,
130      with the previous element always an immediate neighbor, so that runs of
131      blocks should be well correlated.*/
132   static const int SB_MAP[4][4][2]={
133     {{0,0},{0,1},{3,2},{3,3}},
134     {{0,3},{0,2},{3,1},{3,0}},
135     {{1,0},{1,3},{2,0},{2,3}},
136     {{1,1},{1,2},{2,1},{2,2}}
137   };
138   ptrdiff_t  yfrag;
139   unsigned   sbi;
140   int        y;
141   sbi=0;
142   yfrag=_frag0;
143   for(y=0;;y+=4){
144     int imax;
145     int x;
146     /*Figure out how many columns of blocks in this super block lie within the
147        image.*/
148     imax=_vfrags-y;
149     if(imax>4)imax=4;
150     else if(imax<=0)break;
151     for(x=0;;x+=4,sbi++){
152       ptrdiff_t xfrag;
153       int       jmax;
154       int       quadi;
155       int       i;
156       /*Figure out how many rows of blocks in this super block lie within the
157          image.*/
158       jmax=_hfrags-x;
159       if(jmax>4)jmax=4;
160       else if(jmax<=0)break;
161       /*By default, set all fragment indices to -1.*/
162       memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
163       /*Fill in the fragment map for this super block.*/
164       xfrag=yfrag+x;
165       for(i=0;i<imax;i++){
166         int j;
167         for(j=0;j<jmax;j++){
168           _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
169         }
170         xfrag+=_hfrags;
171       }
172       /*Mark which quadrants of this super block lie within the image.*/
173       for(quadi=0;quadi<4;quadi++){
174         _sb_flags[sbi].quad_valid|=
175          (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
176       }
177     }
178     yfrag+=_hfrags<<2;
179   }
180 }
181 
182 /*Fills in the Y plane fragment map for a macro block given the fragment
183    coordinates of its upper-left hand corner.
184   _mb_map:    The macro block map to fill.
185   _fplane: The description of the Y plane.
186   _xfrag0: The X location of the upper-left hand fragment in the luma plane.
187   _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],const oc_fragment_plane * _fplane,int _xfrag0,int _yfrag0)188 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
189  const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
190   int i;
191   int j;
192   for(i=0;i<2;i++)for(j=0;j<2;j++){
193     _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
194   }
195 }
196 
197 /*Fills in the chroma plane fragment maps for a macro block.
198   This version is for use with chroma decimated in the X and Y directions
199    (4:2:0).
200   _mb_map:  The macro block map to fill.
201   _fplanes: The descriptions of the fragment planes.
202   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
203   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)204 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
205  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
206   ptrdiff_t fragi;
207   _xfrag0>>=1;
208   _yfrag0>>=1;
209   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
210   _mb_map[1][0]=fragi+_fplanes[1].froffset;
211   _mb_map[2][0]=fragi+_fplanes[2].froffset;
212 }
213 
214 /*Fills in the chroma plane fragment maps for a macro block.
215   This version is for use with chroma decimated in the Y direction.
216   _mb_map:  The macro block map to fill.
217   _fplanes: The descriptions of the fragment planes.
218   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
219   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)220 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
221  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
222   ptrdiff_t fragi;
223   int       j;
224   _yfrag0>>=1;
225   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
226   for(j=0;j<2;j++){
227     _mb_map[1][j]=fragi+_fplanes[1].froffset;
228     _mb_map[2][j]=fragi+_fplanes[2].froffset;
229     fragi++;
230   }
231 }
232 
233 /*Fills in the chroma plane fragment maps for a macro block.
234   This version is for use with chroma decimated in the X direction (4:2:2).
235   _mb_map:  The macro block map to fill.
236   _fplanes: The descriptions of the fragment planes.
237   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
238   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)239 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
240  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
241   ptrdiff_t fragi;
242   int       i;
243   _xfrag0>>=1;
244   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
245   for(i=0;i<2;i++){
246     _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
247     _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
248     fragi+=_fplanes[1].nhfrags;
249   }
250 }
251 
252 /*Fills in the chroma plane fragment maps for a macro block.
253   This version is for use with no chroma decimation (4:4:4).
254   This uses the already filled-in luma plane values.
255   _mb_map:  The macro block map to fill.
256   _fplanes: The descriptions of the fragment planes.*/
oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3])257 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
258  const oc_fragment_plane _fplanes[3]){
259   int k;
260   for(k=0;k<4;k++){
261     _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
262     _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
263   }
264 }
265 
266 /*The function type used to fill in the chroma plane fragment maps for a
267    macro block.
268   _mb_map:  The macro block map to fill.
269   _fplanes: The descriptions of the fragment planes.
270   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
271   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
272 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
273  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
274 
275 /*A table of functions used to fill in the chroma plane fragment maps for a
276    macro block for each type of chrominance decimation.*/
277 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
278   oc_mb_fill_cmapping00,
279   oc_mb_fill_cmapping01,
280   oc_mb_fill_cmapping10,
281   (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
282 };
283 
284 /*Fills in the mapping from macro blocks to their corresponding fragment
285    numbers in each plane.
286   _mb_maps:   The list of macro block maps.
287   _mb_modes:  The list of macro block modes; macro blocks completely outside
288                the coded region are marked invalid.
289   _fplanes:   The descriptions of the fragment planes.
290   _pixel_fmt: The chroma decimation type.*/
oc_mb_create_mapping(oc_mb_map _mb_maps[],signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt)291 static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
292  signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
293   oc_mb_fill_cmapping_func  mb_fill_cmapping;
294   unsigned                  sbi;
295   int                       y;
296   mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
297   /*Loop through the luma plane super blocks.*/
298   for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
299     int x;
300     for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
301       int ymb;
302       /*Loop through the macro blocks in each super block in display order.*/
303       for(ymb=0;ymb<2;ymb++){
304         int xmb;
305         for(xmb=0;xmb<2;xmb++){
306           unsigned mbi;
307           int      mbx;
308           int      mby;
309           mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
310           mbx=x|xmb<<1;
311           mby=y|ymb<<1;
312           /*Initialize fragment indices to -1.*/
313           memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
314           /*Make sure this macro block is within the encoded region.*/
315           if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
316             _mb_modes[mbi]=OC_MODE_INVALID;
317             continue;
318           }
319           /*Fill in the fragment indices for the luma plane.*/
320           oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
321           /*Fill in the fragment indices for the chroma planes.*/
322           (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
323         }
324       }
325     }
326   }
327 }
328 
329 /*Marks the fragments which fall all or partially outside the displayable
330    region of the frame.
331   _state: The Theora state containing the fragments to be marked.*/
oc_state_border_init(oc_theora_state * _state)332 static void oc_state_border_init(oc_theora_state *_state){
333   oc_fragment       *frag;
334   oc_fragment       *yfrag_end;
335   oc_fragment       *xfrag_end;
336   oc_fragment_plane *fplane;
337   int                crop_x0;
338   int                crop_y0;
339   int                crop_xf;
340   int                crop_yf;
341   int                pli;
342   int                y;
343   int                x;
344   /*The method we use here is slow, but the code is dead simple and handles
345      all the special cases easily.
346     We only ever need to do it once.*/
347   /*Loop through the fragments, marking those completely outside the
348      displayable region and constructing a border mask for those that straddle
349      the border.*/
350   _state->nborders=0;
351   yfrag_end=frag=_state->frags;
352   for(pli=0;pli<3;pli++){
353     fplane=_state->fplanes+pli;
354     /*Set up the cropping rectangle for this plane.*/
355     crop_x0=_state->info.pic_x;
356     crop_xf=_state->info.pic_x+_state->info.pic_width;
357     crop_y0=_state->info.pic_y;
358     crop_yf=_state->info.pic_y+_state->info.pic_height;
359     if(pli>0){
360       if(!(_state->info.pixel_fmt&1)){
361         crop_x0=crop_x0>>1;
362         crop_xf=crop_xf+1>>1;
363       }
364       if(!(_state->info.pixel_fmt&2)){
365         crop_y0=crop_y0>>1;
366         crop_yf=crop_yf+1>>1;
367       }
368     }
369     y=0;
370     for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
371       x=0;
372       for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
373         /*First check to see if this fragment is completely outside the
374            displayable region.*/
375         /*Note the special checks for an empty cropping rectangle.
376           This guarantees that if we count a fragment as straddling the
377            border below, at least one pixel in the fragment will be inside
378            the displayable region.*/
379         if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
380          crop_x0>=crop_xf||crop_y0>=crop_yf){
381           frag->invalid=1;
382         }
383         /*Otherwise, check to see if it straddles the border.*/
384         else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
385          y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
386           ogg_int64_t mask;
387           int         npixels;
388           int         i;
389           mask=npixels=0;
390           for(i=0;i<8;i++){
391             int j;
392             for(j=0;j<8;j++){
393               if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
394                 mask|=(ogg_int64_t)1<<(i<<3|j);
395                 npixels++;
396               }
397             }
398           }
399           /*Search the fragment array for border info with the same pattern.
400             In general, there will be at most 8 different patterns (per
401              plane).*/
402           for(i=0;;i++){
403             if(i>=_state->nborders){
404               _state->nborders++;
405               _state->borders[i].mask=mask;
406               _state->borders[i].npixels=npixels;
407             }
408             else if(_state->borders[i].mask!=mask)continue;
409             frag->borderi=i;
410             break;
411           }
412         }
413         else frag->borderi=-1;
414       }
415     }
416   }
417 }
418 
oc_state_frarray_init(oc_theora_state * _state)419 static int oc_state_frarray_init(oc_theora_state *_state){
420   int       yhfrags;
421   int       yvfrags;
422   int       chfrags;
423   int       cvfrags;
424   ptrdiff_t yfrags;
425   ptrdiff_t cfrags;
426   ptrdiff_t nfrags;
427   unsigned  yhsbs;
428   unsigned  yvsbs;
429   unsigned  chsbs;
430   unsigned  cvsbs;
431   unsigned  ysbs;
432   unsigned  csbs;
433   unsigned  nsbs;
434   size_t    nmbs;
435   int       hdec;
436   int       vdec;
437   int       pli;
438   /*Figure out the number of fragments in each plane.*/
439   /*These parameters have already been validated to be multiples of 16.*/
440   yhfrags=_state->info.frame_width>>3;
441   yvfrags=_state->info.frame_height>>3;
442   hdec=!(_state->info.pixel_fmt&1);
443   vdec=!(_state->info.pixel_fmt&2);
444   chfrags=yhfrags+hdec>>hdec;
445   cvfrags=yvfrags+vdec>>vdec;
446   yfrags=yhfrags*(ptrdiff_t)yvfrags;
447   cfrags=chfrags*(ptrdiff_t)cvfrags;
448   nfrags=yfrags+2*cfrags;
449   /*Figure out the number of super blocks in each plane.*/
450   yhsbs=yhfrags+3>>2;
451   yvsbs=yvfrags+3>>2;
452   chsbs=chfrags+3>>2;
453   cvsbs=cvfrags+3>>2;
454   ysbs=yhsbs*yvsbs;
455   csbs=chsbs*cvsbs;
456   nsbs=ysbs+2*csbs;
457   nmbs=(size_t)ysbs<<2;
458   /*Check for overflow.
459     We support the ridiculous upper limits of the specification (1048560 by
460      1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
461      but for those with 32-bit pointers (or smaller!) we have to check.
462     If the caller wants to prevent denial-of-service by imposing a more
463      reasonable upper limit on the size of attempted allocations, they must do
464      so themselves; we have no platform independent way to determine how much
465      system memory there is nor an application-independent way to decide what a
466      "reasonable" allocation is.*/
467   if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
468    ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
469     return TH_EIMPL;
470   }
471   /*Initialize the fragment array.*/
472   _state->fplanes[0].nhfrags=yhfrags;
473   _state->fplanes[0].nvfrags=yvfrags;
474   _state->fplanes[0].froffset=0;
475   _state->fplanes[0].nfrags=yfrags;
476   _state->fplanes[0].nhsbs=yhsbs;
477   _state->fplanes[0].nvsbs=yvsbs;
478   _state->fplanes[0].sboffset=0;
479   _state->fplanes[0].nsbs=ysbs;
480   _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
481   _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
482   _state->fplanes[1].froffset=yfrags;
483   _state->fplanes[2].froffset=yfrags+cfrags;
484   _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
485   _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
486   _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
487   _state->fplanes[1].sboffset=ysbs;
488   _state->fplanes[2].sboffset=ysbs+csbs;
489   _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
490   _state->nfrags=nfrags;
491   _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
492   _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
493   _state->nsbs=nsbs;
494   _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
495   _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
496   _state->nhmbs=yhsbs<<1;
497   _state->nvmbs=yvsbs<<1;
498   _state->nmbs=nmbs;
499   _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
500   _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
501   _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
502   if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
503    _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
504    _state->coded_fragis==NULL){
505     return TH_EFAULT;
506   }
507   /*Create the mapping from super blocks to fragments.*/
508   for(pli=0;pli<3;pli++){
509     oc_fragment_plane *fplane;
510     fplane=_state->fplanes+pli;
511     oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
512      _state->sb_flags+fplane->sboffset,fplane->froffset,
513      fplane->nhfrags,fplane->nvfrags);
514   }
515   /*Create the mapping from macro blocks to fragments.*/
516   oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
517    _state->fplanes,_state->info.pixel_fmt);
518   /*Initialize the invalid and borderi fields of each fragment.*/
519   oc_state_border_init(_state);
520   return 0;
521 }
522 
oc_state_frarray_clear(oc_theora_state * _state)523 static void oc_state_frarray_clear(oc_theora_state *_state){
524   _ogg_free(_state->coded_fragis);
525   _ogg_free(_state->mb_modes);
526   _ogg_free(_state->mb_maps);
527   _ogg_free(_state->sb_flags);
528   _ogg_free(_state->sb_maps);
529   _ogg_free(_state->frag_mvs);
530   _ogg_free(_state->frags);
531 }
532 
533 
534 /*Initializes the buffers used for reconstructed frames.
535   These buffers are padded with 16 extra pixels on each side, to allow
536    unrestricted motion vectors without special casing the boundary.
537   If chroma is decimated in either direction, the padding is reduced by a
538    factor of 2 on the appropriate sides.
539   _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
oc_state_ref_bufs_init(oc_theora_state * _state,int _nrefs)540 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
541   th_info       *info;
542   unsigned char *ref_frame_data;
543   size_t         ref_frame_data_sz;
544   size_t         ref_frame_sz;
545   size_t         yplane_sz;
546   size_t         cplane_sz;
547   int            yhstride;
548   int            yheight;
549   int            chstride;
550   int            cheight;
551   ptrdiff_t      align;
552   ptrdiff_t      yoffset;
553   ptrdiff_t      coffset;
554   ptrdiff_t     *frag_buf_offs;
555   ptrdiff_t      fragi;
556   int            hdec;
557   int            vdec;
558   int            rfi;
559   int            pli;
560   if(_nrefs<3||_nrefs>6)return TH_EINVAL;
561   info=&_state->info;
562   /*Compute the image buffer parameters for each plane.*/
563   hdec=!(info->pixel_fmt&1);
564   vdec=!(info->pixel_fmt&2);
565   yhstride=info->frame_width+2*OC_UMV_PADDING;
566   yheight=info->frame_height+2*OC_UMV_PADDING;
567   /*Require 16-byte aligned rows in the chroma planes.*/
568   chstride=(yhstride>>hdec)+15&~15;
569   cheight=yheight>>vdec;
570   yplane_sz=yhstride*(size_t)yheight;
571   cplane_sz=chstride*(size_t)cheight;
572   yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
573   coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
574   /*Although we guarantee the rows of the chroma planes are a multiple of 16
575      bytes, the initial padding on the first row may only be 8 bytes.
576     Compute the offset needed to the actual image data to a multiple of 16.*/
577   align=-coffset&15;
578   ref_frame_sz=yplane_sz+2*cplane_sz+16;
579   ref_frame_data_sz=_nrefs*ref_frame_sz;
580   /*Check for overflow.
581     The same caveats apply as for oc_state_frarray_init().*/
582   if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
583    ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
584     return TH_EIMPL;
585   }
586   ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
587   frag_buf_offs=_state->frag_buf_offs=
588    _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
589   if(ref_frame_data==NULL||frag_buf_offs==NULL){
590     _ogg_free(frag_buf_offs);
591     oc_aligned_free(ref_frame_data);
592     return TH_EFAULT;
593   }
594   /*Set up the width, height and stride for the image buffers.*/
595   _state->ref_frame_bufs[0][0].width=info->frame_width;
596   _state->ref_frame_bufs[0][0].height=info->frame_height;
597   _state->ref_frame_bufs[0][0].stride=yhstride;
598   _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
599    info->frame_width>>hdec;
600   _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
601    info->frame_height>>vdec;
602   _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
603    chstride;
604   for(rfi=1;rfi<_nrefs;rfi++){
605     memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
606      sizeof(_state->ref_frame_bufs[0]));
607   }
608   _state->ref_frame_handle=ref_frame_data;
609   /*Set up the data pointers for the image buffers.*/
610   for(rfi=0;rfi<_nrefs;rfi++){
611     _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
612     ref_frame_data+=yplane_sz+align;
613     _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
614     ref_frame_data+=cplane_sz;
615     _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
616     ref_frame_data+=cplane_sz+(16-align);
617     /*Flip the buffer upside down.
618       This allows us to decode Theora's bottom-up frames in their natural
619        order, yet return a top-down buffer with a positive stride to the user.*/
620     oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
621      _state->ref_frame_bufs[rfi]);
622   }
623   _state->ref_ystride[0]=-yhstride;
624   _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
625   /*Initialize the fragment buffer offsets.*/
626   ref_frame_data=_state->ref_frame_bufs[0][0].data;
627   fragi=0;
628   for(pli=0;pli<3;pli++){
629     th_img_plane      *iplane;
630     oc_fragment_plane *fplane;
631     unsigned char     *vpix;
632     ptrdiff_t          stride;
633     ptrdiff_t          vfragi_end;
634     int                nhfrags;
635     iplane=_state->ref_frame_bufs[0]+pli;
636     fplane=_state->fplanes+pli;
637     vpix=iplane->data;
638     vfragi_end=fplane->froffset+fplane->nfrags;
639     nhfrags=fplane->nhfrags;
640     stride=iplane->stride;
641     while(fragi<vfragi_end){
642       ptrdiff_t      hfragi_end;
643       unsigned char *hpix;
644       hpix=vpix;
645       for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
646         frag_buf_offs[fragi]=hpix-ref_frame_data;
647         hpix+=8;
648       }
649       vpix+=stride<<3;
650     }
651   }
652   /*Initialize the reference frame pointers and indices.*/
653   _state->ref_frame_idx[OC_FRAME_GOLD]=
654    _state->ref_frame_idx[OC_FRAME_PREV]=
655    _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
656    _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
657    _state->ref_frame_idx[OC_FRAME_SELF]=
658    _state->ref_frame_idx[OC_FRAME_IO]=-1;
659   _state->ref_frame_data[OC_FRAME_GOLD]=
660    _state->ref_frame_data[OC_FRAME_PREV]=
661    _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
662    _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
663    _state->ref_frame_data[OC_FRAME_SELF]=
664    _state->ref_frame_data[OC_FRAME_IO]=NULL;
665   return 0;
666 }
667 
oc_state_ref_bufs_clear(oc_theora_state * _state)668 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
669   _ogg_free(_state->frag_buf_offs);
670   oc_aligned_free(_state->ref_frame_handle);
671 }
672 
673 
oc_state_accel_init_c(oc_theora_state * _state)674 void oc_state_accel_init_c(oc_theora_state *_state){
675   _state->cpu_flags=0;
676 #if defined(OC_STATE_USE_VTABLE)
677   _state->opt_vtable.frag_copy=oc_frag_copy_c;
678   _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
679   _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
680   _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
681   _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
682   _state->opt_vtable.idct8x8=oc_idct8x8_c;
683   _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
684   _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
685   _state->opt_vtable.state_loop_filter_frag_rows=
686    oc_state_loop_filter_frag_rows_c;
687   _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
688 #endif
689   _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
690 }
691 
692 
oc_state_init(oc_theora_state * _state,const th_info * _info,int _nrefs)693 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
694   int ret;
695   /*First validate the parameters.*/
696   if(_info==NULL)return TH_EFAULT;
697   /*The width and height of the encoded frame must be multiples of 16.
698     They must also, when divided by 16, fit into a 16-bit unsigned integer.
699     The displayable frame offset coordinates must fit into an 8-bit unsigned
700      integer.
701     Note that the offset Y in the API is specified on the opposite side from
702      how it is specified in the bitstream, because the Y axis is flipped in
703      the bitstream.
704     The displayable frame must fit inside the encoded frame.
705     The color space must be one known by the encoder.*/
706   if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
707    _info->frame_width<=0||_info->frame_width>=0x100000||
708    _info->frame_height<=0||_info->frame_height>=0x100000||
709    _info->pic_x+_info->pic_width>_info->frame_width||
710    _info->pic_y+_info->pic_height>_info->frame_height||
711    _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
712    /*Note: the following <0 comparisons may generate spurious warnings on
713       platforms where enums are unsigned.
714      We could cast them to unsigned and just use the following >= comparison,
715       but there are a number of compilers which will mis-optimize this.
716      It's better to live with the spurious warnings.*/
717    _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
718    _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
719     return TH_EINVAL;
720   }
721   memset(_state,0,sizeof(*_state));
722   memcpy(&_state->info,_info,sizeof(*_info));
723   /*Invert the sense of pic_y to match Theora's right-handed coordinate
724      system.*/
725   _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
726   _state->frame_type=OC_UNKWN_FRAME;
727   oc_state_accel_init(_state);
728   ret=oc_state_frarray_init(_state);
729   if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
730   if(ret<0){
731     oc_state_frarray_clear(_state);
732     return ret;
733   }
734   /*If the keyframe_granule_shift is out of range, use the maximum allowable
735      value.*/
736   if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
737     _state->info.keyframe_granule_shift=31;
738   }
739   _state->keyframe_num=0;
740   _state->curframe_num=-1;
741   /*3.2.0 streams mark the frame index instead of the frame count.
742     This was changed with stream version 3.2.1 to conform to other Ogg
743      codecs.
744     We add an extra bias when computing granule positions for new streams.*/
745   _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
746   return 0;
747 }
748 
oc_state_clear(oc_theora_state * _state)749 void oc_state_clear(oc_theora_state *_state){
750   oc_state_ref_bufs_clear(_state);
751   oc_state_frarray_clear(_state);
752 }
753 
754 
755 /*Duplicates the pixels on the border of the image plane out into the
756    surrounding padding for use by unrestricted motion vectors.
757   This function only adds the left and right borders, and only for the fragment
758    rows specified.
759   _refi: The index of the reference buffer to pad.
760   _pli:  The color plane.
761   _y0:   The Y coordinate of the first row to pad.
762   _yend: The Y coordinate of the row to stop padding at.*/
oc_state_borders_fill_rows(oc_theora_state * _state,int _refi,int _pli,int _y0,int _yend)763 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
764  int _y0,int _yend){
765   th_img_plane  *iplane;
766   unsigned char *apix;
767   unsigned char *bpix;
768   unsigned char *epix;
769   int            stride;
770   int            hpadding;
771   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
772   iplane=_state->ref_frame_bufs[_refi]+_pli;
773   stride=iplane->stride;
774   apix=iplane->data+_y0*(ptrdiff_t)stride;
775   bpix=apix+iplane->width-1;
776   epix=iplane->data+_yend*(ptrdiff_t)stride;
777   /*Note the use of != instead of <, which allows the stride to be negative.*/
778   while(apix!=epix){
779     memset(apix-hpadding,apix[0],hpadding);
780     memset(bpix+1,bpix[0],hpadding);
781     apix+=stride;
782     bpix+=stride;
783   }
784 }
785 
786 /*Duplicates the pixels on the border of the image plane out into the
787    surrounding padding for use by unrestricted motion vectors.
788   This function only adds the top and bottom borders, and must be called after
789    the left and right borders are added.
790   _refi:      The index of the reference buffer to pad.
791   _pli:       The color plane.*/
oc_state_borders_fill_caps(oc_theora_state * _state,int _refi,int _pli)792 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
793   th_img_plane  *iplane;
794   unsigned char *apix;
795   unsigned char *bpix;
796   unsigned char *epix;
797   int            stride;
798   int            hpadding;
799   int            vpadding;
800   int            fullw;
801   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
802   vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
803   iplane=_state->ref_frame_bufs[_refi]+_pli;
804   stride=iplane->stride;
805   fullw=iplane->width+(hpadding<<1);
806   apix=iplane->data-hpadding;
807   bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
808   epix=apix-stride*(ptrdiff_t)vpadding;
809   while(apix!=epix){
810     memcpy(apix-stride,apix,fullw);
811     memcpy(bpix+stride,bpix,fullw);
812     apix-=stride;
813     bpix+=stride;
814   }
815 }
816 
817 /*Duplicates the pixels on the border of the given reference image out into
818    the surrounding padding for use by unrestricted motion vectors.
819   _state: The context containing the reference buffers.
820   _refi:  The index of the reference buffer to pad.*/
oc_state_borders_fill(oc_theora_state * _state,int _refi)821 void oc_state_borders_fill(oc_theora_state *_state,int _refi){
822   int pli;
823   for(pli=0;pli<3;pli++){
824     oc_state_borders_fill_rows(_state,_refi,pli,0,
825      _state->ref_frame_bufs[_refi][pli].height);
826     oc_state_borders_fill_caps(_state,_refi,pli);
827   }
828 }
829 
830 /*Determines the offsets in an image buffer to use for motion compensation.
831   _state:   The Theora state the offsets are to be computed with.
832   _offsets: Returns the offset for the buffer(s).
833             _offsets[0] is always set.
834             _offsets[1] is set if the motion vector has non-zero fractional
835              components.
836   _pli:     The color plane index.
837   _mv:      The motion vector.
838   Return: The number of offsets returned: 1 or 2.*/
oc_state_get_mv_offsets(const oc_theora_state * _state,int _offsets[2],int _pli,oc_mv _mv)839 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
840  int _pli,oc_mv _mv){
841   /*Here is a brief description of how Theora handles motion vectors:
842     Motion vector components are specified to half-pixel accuracy in
843      undecimated directions of each plane, and quarter-pixel accuracy in
844      decimated directions.
845     Integer parts are extracted by dividing (not shifting) by the
846      appropriate amount, with truncation towards zero.
847     These integer values are used to calculate the first offset.
848 
849     If either of the fractional parts are non-zero, then a second offset is
850      computed.
851     No third or fourth offsets are computed, even if both components have
852      non-zero fractional parts.
853     The second offset is computed by dividing (not shifting) by the
854      appropriate amount, always truncating _away_ from zero.*/
855 #if 0
856   /*This version of the code doesn't use any tables, but is slower.*/
857   int ystride;
858   int xprec;
859   int yprec;
860   int xfrac;
861   int yfrac;
862   int offs;
863   int dx;
864   int dy;
865   ystride=_state->ref_ystride[_pli];
866   /*These two variables decide whether we are in half- or quarter-pixel
867      precision in each component.*/
868   xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
869   yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
870   dx=OC_MV_X(_mv);
871   dy=OC_MV_Y(_mv);
872   /*These two variables are either 0 if all the fractional bits are zero or -1
873      if any of them are non-zero.*/
874   xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
875   yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
876   offs=(dx>>xprec)+(dy>>yprec)*ystride;
877   if(xfrac||yfrac){
878     int xmask;
879     int ymask;
880     xmask=OC_SIGNMASK(dx);
881     ymask=OC_SIGNMASK(dy);
882     yfrac&=ystride;
883     _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
884     _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
885     return 2;
886   }
887   else{
888     _offsets[0]=offs;
889     return 1;
890   }
891 #else
892   /*Using tables simplifies the code, and there's enough arithmetic to hide the
893      latencies of the memory references.*/
894   static const signed char OC_MVMAP[2][64]={
895     {
896           -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
897        -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
898         0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
899         8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
900     },
901     {
902            -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
903        -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
904         0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
905         4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
906     }
907   };
908   static const signed char OC_MVMAP2[2][64]={
909     {
910         -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
911       0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
912       0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
913       0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
914     },
915     {
916         -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
917       0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
918       0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
919       0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
920     }
921   };
922   int ystride;
923   int qpx;
924   int qpy;
925   int mx;
926   int my;
927   int mx2;
928   int my2;
929   int offs;
930   int dx;
931   int dy;
932   ystride=_state->ref_ystride[_pli];
933   qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
934   dx=OC_MV_X(_mv);
935   dy=OC_MV_Y(_mv);
936   my=OC_MVMAP[qpy][dy+31];
937   my2=OC_MVMAP2[qpy][dy+31];
938   qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
939   mx=OC_MVMAP[qpx][dx+31];
940   mx2=OC_MVMAP2[qpx][dx+31];
941   offs=my*ystride+mx;
942   if(mx2||my2){
943     _offsets[1]=offs+my2*ystride+mx2;
944     _offsets[0]=offs;
945     return 2;
946   }
947   _offsets[0]=offs;
948   return 1;
949 #endif
950 }
951 
oc_state_frag_recon_c(const oc_theora_state * _state,ptrdiff_t _fragi,int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant)952 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
953  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
954   unsigned char *dst;
955   ptrdiff_t      frag_buf_off;
956   int            ystride;
957   int            refi;
958   /*Apply the inverse transform.*/
959   /*Special case only having a DC component.*/
960   if(_last_zzi<2){
961     ogg_int16_t p;
962     int         ci;
963     /*We round this dequant product (and not any of the others) because there's
964        no iDCT rounding.*/
965     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
966     /*LOOP VECTORIZES.*/
967     for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
968   }
969   else{
970     /*First, dequantize the DC coefficient.*/
971     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
972     oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
973   }
974   /*Fill in the target buffer.*/
975   frag_buf_off=_state->frag_buf_offs[_fragi];
976   refi=_state->frags[_fragi].refi;
977   ystride=_state->ref_ystride[_pli];
978   dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
979   if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
980   else{
981     const unsigned char *ref;
982     int                  mvoffsets[2];
983     ref=_state->ref_frame_data[refi]+frag_buf_off;
984     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
985      _state->frag_mvs[_fragi])>1){
986       oc_frag_recon_inter2(_state,
987        dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
988     }
989     else{
990       oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
991     }
992   }
993 }
994 
loop_filter_h(unsigned char * _pix,int _ystride,signed char * _bv)995 static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
996   int y;
997   _pix-=2;
998   for(y=0;y<8;y++){
999     int f;
1000     f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
1001     /*The _bv array is used to compute the function
1002       f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1003       where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1004     f=*(_bv+(f+4>>3));
1005     _pix[1]=OC_CLAMP255(_pix[1]+f);
1006     _pix[2]=OC_CLAMP255(_pix[2]-f);
1007     _pix+=_ystride;
1008   }
1009 }
1010 
loop_filter_v(unsigned char * _pix,int _ystride,signed char * _bv)1011 static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
1012   int x;
1013   _pix-=_ystride*2;
1014   for(x=0;x<8;x++){
1015     int f;
1016     f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
1017     /*The _bv array is used to compute the function
1018       f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1019       where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1020     f=*(_bv+(f+4>>3));
1021     _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
1022     _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
1023   }
1024 }
1025 
1026 /*Initialize the bounding values array used by the loop filter.
1027   _bv: Storage for the array.
1028   _flimit: The filter limit as defined in Section 7.10 of the spec.*/
oc_loop_filter_init_c(signed char _bv[256],int _flimit)1029 void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
1030   int i;
1031   memset(_bv,0,sizeof(_bv[0])*256);
1032   for(i=0;i<_flimit;i++){
1033     if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
1034     _bv[127-i]=(signed char)(-i);
1035     _bv[127+i]=(signed char)(i);
1036     if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
1037   }
1038 }
1039 
1040 /*Apply the loop filter to a given set of fragment rows in the given plane.
1041   The filter may be run on the bottom edge, affecting pixels in the next row of
1042    fragments, so this row also needs to be available.
1043   _bv:        The bounding values array.
1044   _refi:      The index of the frame buffer to filter.
1045   _pli:       The color plane to filter.
1046   _fragy0:    The Y coordinate of the first fragment row to filter.
1047   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
oc_state_loop_filter_frag_rows_c(const oc_theora_state * _state,signed char * _bv,int _refi,int _pli,int _fragy0,int _fragy_end)1048 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
1049  signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
1050   const oc_fragment_plane *fplane;
1051   const oc_fragment       *frags;
1052   const ptrdiff_t         *frag_buf_offs;
1053   unsigned char           *ref_frame_data;
1054   ptrdiff_t                fragi_top;
1055   ptrdiff_t                fragi_bot;
1056   ptrdiff_t                fragi0;
1057   ptrdiff_t                fragi0_end;
1058   int                      ystride;
1059   int                      nhfrags;
1060   _bv+=127;
1061   fplane=_state->fplanes+_pli;
1062   nhfrags=fplane->nhfrags;
1063   fragi_top=fplane->froffset;
1064   fragi_bot=fragi_top+fplane->nfrags;
1065   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1066   fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
1067   ystride=_state->ref_ystride[_pli];
1068   frags=_state->frags;
1069   frag_buf_offs=_state->frag_buf_offs;
1070   ref_frame_data=_state->ref_frame_data[_refi];
1071   /*The following loops are constructed somewhat non-intuitively on purpose.
1072     The main idea is: if a block boundary has at least one coded fragment on
1073      it, the filter is applied to it.
1074     However, the order that the filters are applied in matters, and VP3 chose
1075      the somewhat strange ordering used below.*/
1076   while(fragi0<fragi0_end){
1077     ptrdiff_t fragi;
1078     ptrdiff_t fragi_end;
1079     fragi=fragi0;
1080     fragi_end=fragi+nhfrags;
1081     while(fragi<fragi_end){
1082       if(frags[fragi].coded){
1083         unsigned char *ref;
1084         ref=ref_frame_data+frag_buf_offs[fragi];
1085         if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1086         if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1087         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1088           loop_filter_h(ref+8,ystride,_bv);
1089         }
1090         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1091           loop_filter_v(ref+(ystride<<3),ystride,_bv);
1092         }
1093       }
1094       fragi++;
1095     }
1096     fragi0+=nhfrags;
1097   }
1098 }
1099 
1100 #if defined(OC_DUMP_IMAGES)
oc_state_dump_frame(const oc_theora_state * _state,int _frame,const char * _suf)1101 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1102  const char *_suf){
1103   /*Dump a PNG of the reconstructed image.*/
1104   png_structp    png;
1105   png_infop      info;
1106   png_bytep     *image;
1107   FILE          *fp;
1108   char           fname[16];
1109   unsigned char *y_row;
1110   unsigned char *u_row;
1111   unsigned char *v_row;
1112   unsigned char *y;
1113   unsigned char *u;
1114   unsigned char *v;
1115   ogg_int64_t    iframe;
1116   ogg_int64_t    pframe;
1117   int            y_stride;
1118   int            u_stride;
1119   int            v_stride;
1120   int            framei;
1121   int            width;
1122   int            height;
1123   int            imgi;
1124   int            imgj;
1125   width=_state->info.frame_width;
1126   height=_state->info.frame_height;
1127   iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1128   pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1129   sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1130   fp=fopen(fname,"wb");
1131   if(fp==NULL)return TH_EFAULT;
1132   image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1133   if(image==NULL){
1134     fclose(fp);
1135     return TH_EFAULT;
1136   }
1137   png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1138   if(png==NULL){
1139     oc_free_2d(image);
1140     fclose(fp);
1141     return TH_EFAULT;
1142   }
1143   info=png_create_info_struct(png);
1144   if(info==NULL){
1145     png_destroy_write_struct(&png,NULL);
1146     oc_free_2d(image);
1147     fclose(fp);
1148     return TH_EFAULT;
1149   }
1150   if(setjmp(png_jmpbuf(png))){
1151     png_destroy_write_struct(&png,&info);
1152     oc_free_2d(image);
1153     fclose(fp);
1154     return TH_EFAULT;
1155   }
1156   framei=_state->ref_frame_idx[_frame];
1157   y_row=_state->ref_frame_bufs[framei][0].data;
1158   u_row=_state->ref_frame_bufs[framei][1].data;
1159   v_row=_state->ref_frame_bufs[framei][2].data;
1160   y_stride=_state->ref_frame_bufs[framei][0].stride;
1161   u_stride=_state->ref_frame_bufs[framei][1].stride;
1162   v_stride=_state->ref_frame_bufs[framei][2].stride;
1163   /*Chroma up-sampling is just done with a box filter.
1164     This is very likely what will actually be used in practice on a real
1165      display, and also removes one more layer to search in for the source of
1166      artifacts.
1167     As an added bonus, it's dead simple.*/
1168   for(imgi=height;imgi-->0;){
1169     int dc;
1170     y=y_row;
1171     u=u_row;
1172     v=v_row;
1173     for(imgj=0;imgj<6*width;){
1174       float    yval;
1175       float    uval;
1176       float    vval;
1177       unsigned rval;
1178       unsigned gval;
1179       unsigned bval;
1180       /*This is intentionally slow and very accurate.*/
1181       yval=(*y-16)*(1.0F/219);
1182       uval=(*u-128)*(2*(1-0.114F)/224);
1183       vval=(*v-128)*(2*(1-0.299F)/224);
1184       rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1185       gval=OC_CLAMPI(0,(int)(65535*(
1186        yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1187       bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1188       image[imgi][imgj++]=(unsigned char)(rval>>8);
1189       image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1190       image[imgi][imgj++]=(unsigned char)(gval>>8);
1191       image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1192       image[imgi][imgj++]=(unsigned char)(bval>>8);
1193       image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1194       dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1195       y++;
1196       u+=dc;
1197       v+=dc;
1198     }
1199     dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1200     y_row+=y_stride;
1201     u_row+=dc&u_stride;
1202     v_row+=dc&v_stride;
1203   }
1204   png_init_io(png,fp);
1205   png_set_compression_level(png,Z_BEST_COMPRESSION);
1206   png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1207    PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1208   switch(_state->info.colorspace){
1209     case TH_CS_ITU_REC_470M:{
1210       png_set_gAMA(png,info,2.2);
1211       png_set_cHRM_fixed(png,info,31006,31616,
1212        67000,32000,21000,71000,14000,8000);
1213     }break;
1214     case TH_CS_ITU_REC_470BG:{
1215       png_set_gAMA(png,info,2.67);
1216       png_set_cHRM_fixed(png,info,31271,32902,
1217        64000,33000,29000,60000,15000,6000);
1218     }break;
1219     default:break;
1220   }
1221   png_set_pHYs(png,info,_state->info.aspect_numerator,
1222    _state->info.aspect_denominator,0);
1223   png_set_rows(png,info,image);
1224   png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1225   png_write_end(png,info);
1226   png_destroy_write_struct(&png,&info);
1227   oc_free_2d(image);
1228   fclose(fp);
1229   return 0;
1230 }
1231 #endif
1232 
1233 
1234 
th_granule_frame(void * _encdec,ogg_int64_t _granpos)1235 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1236   oc_theora_state *state;
1237   state=(oc_theora_state *)_encdec;
1238   if(_granpos>=0){
1239     ogg_int64_t iframe;
1240     ogg_int64_t pframe;
1241     iframe=_granpos>>state->info.keyframe_granule_shift;
1242     pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1243     /*3.2.0 streams store the frame index in the granule position.
1244       3.2.1 and later store the frame count.
1245       We return the index, so adjust the value if we have a 3.2.1 or later
1246        stream.*/
1247     return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1248   }
1249   return -1;
1250 }
1251 
th_granule_time(void * _encdec,ogg_int64_t _granpos)1252 double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1253   oc_theora_state *state;
1254   state=(oc_theora_state *)_encdec;
1255   if(_granpos>=0){
1256     return (th_granule_frame(_encdec, _granpos)+1)*(
1257      (double)state->info.fps_denominator/state->info.fps_numerator);
1258   }
1259   return -1;
1260 }
1261