1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id$
15 
16  ********************************************************************/
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include "state.h"
21 #if defined(OC_DUMP_IMAGES)
22 # include <stdio.h>
23 # include "png.h"
24 # include "zlib.h"
25 #endif
26 
27 /*The function used to fill in the chroma plane motion vectors for a macro
28    block when 4 different motion vectors are specified in the luma plane.
29   This version is for use with chroma decimated in the X and Y directions
30    (4:2:0).
31   _cbmvs: The chroma block-level motion vectors to fill in.
32   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])33 static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
34   int dx;
35   int dy;
36   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
37    +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
38   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
39    +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
40   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
41 }
42 
43 /*The function used to fill in the chroma plane motion vectors for a macro
44    block when 4 different motion vectors are specified in the luma plane.
45   This version is for use with chroma decimated in the Y direction.
46   _cbmvs: The chroma block-level motion vectors to fill in.
47   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])48 static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
49   int dx;
50   int dy;
51   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
52   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
53   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
54   dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
55   dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
56   _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
57 }
58 
59 /*The function used to fill in the chroma plane motion vectors for a macro
60    block when 4 different motion vectors are specified in the luma plane.
61   This version is for use with chroma decimated in the X direction (4:2:2).
62   _cbmvs: The chroma block-level motion vectors to fill in.
63   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])64 static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
65   int dx;
66   int dy;
67   dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
68   dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
69   _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
70   dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
71   dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
72   _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
73 }
74 
75 /*The function used to fill in the chroma plane motion vectors for a macro
76    block when 4 different motion vectors are specified in the luma plane.
77   This version is for use with no chroma decimation (4:4:4).
78   _cbmvs: The chroma block-level motion vectors to fill in.
79   _lmbmv: The luma macro-block level motion vector to fill in for use in
80            prediction.
81   _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])82 static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
83   _cbmvs[0]=_lbmvs[0];
84   _cbmvs[1]=_lbmvs[1];
85   _cbmvs[2]=_lbmvs[2];
86   _cbmvs[3]=_lbmvs[3];
87 }
88 
89 /*A table of functions used to fill in the chroma plane motion vectors for a
90    macro block when 4 different motion vectors are specified in the luma
91    plane.*/
92 const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
93   (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
94   (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
95   (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
96   (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
97 };
98 
99 
100 
101 /*Returns the fragment index of the top-left block in a macro block.
102   This can be used to test whether or not the whole macro block is valid.
103   _sb_map: The super block map.
104   _quadi:  The quadrant number.
105   Return: The index of the fragment of the upper left block in the macro
106    block, or -1 if the block lies outside the coded frame.*/
oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi)107 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
108   /*It so happens that under the Hilbert curve ordering described below, the
109      upper-left block in each macro block is at index 0, except in macro block
110      3, where it is at index 2.*/
111   return _sb_map[_quadi][_quadi&_quadi<<1];
112 }
113 
114 /*Fills in the mapping from block positions to fragment numbers for a single
115    color plane.
116   This function also fills in the "valid" flag of each quadrant in the super
117    block flags.
118   _sb_maps:  The array of super block maps for the color plane.
119   _sb_flags: The array of super block flags for the color plane.
120   _frag0:    The index of the first fragment in the plane.
121   _hfrags:   The number of horizontal fragments in a coded frame.
122   _vfrags:   The number of vertical fragments in a coded frame.*/
oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags)123 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
124  oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
125   /*Contains the (macro_block,block) indices for a 4x4 grid of
126      fragments.
127     The pattern is a 4x4 Hilbert space-filling curve.
128     A Hilbert curve has the nice property that as the curve grows larger, its
129      fractal dimension approaches 2.
130     The intuition is that nearby blocks in the curve are also close spatially,
131      with the previous element always an immediate neighbor, so that runs of
132      blocks should be well correlated.*/
133   static const int SB_MAP[4][4][2]={
134     {{0,0},{0,1},{3,2},{3,3}},
135     {{0,3},{0,2},{3,1},{3,0}},
136     {{1,0},{1,3},{2,0},{2,3}},
137     {{1,1},{1,2},{2,1},{2,2}}
138   };
139   ptrdiff_t  yfrag;
140   unsigned   sbi;
141   int        y;
142   sbi=0;
143   yfrag=_frag0;
144   for(y=0;;y+=4){
145     int imax;
146     int x;
147     /*Figure out how many columns of blocks in this super block lie within the
148        image.*/
149     imax=_vfrags-y;
150     if(imax>4)imax=4;
151     else if(imax<=0)break;
152     for(x=0;;x+=4,sbi++){
153       ptrdiff_t xfrag;
154       int       jmax;
155       int       quadi;
156       int       i;
157       /*Figure out how many rows of blocks in this super block lie within the
158          image.*/
159       jmax=_hfrags-x;
160       if(jmax>4)jmax=4;
161       else if(jmax<=0)break;
162       /*By default, set all fragment indices to -1.*/
163       memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
164       /*Fill in the fragment map for this super block.*/
165       xfrag=yfrag+x;
166       for(i=0;i<imax;i++){
167         int j;
168         for(j=0;j<jmax;j++){
169           _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
170         }
171         xfrag+=_hfrags;
172       }
173       /*Mark which quadrants of this super block lie within the image.*/
174       for(quadi=0;quadi<4;quadi++){
175         _sb_flags[sbi].quad_valid|=
176          (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
177       }
178     }
179     yfrag+=_hfrags<<2;
180   }
181 }
182 
183 /*Fills in the Y plane fragment map for a macro block given the fragment
184    coordinates of its upper-left hand corner.
185   _mb_map:    The macro block map to fill.
186   _fplane: The description of the Y plane.
187   _xfrag0: The X location of the upper-left hand fragment in the luma plane.
188   _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],const oc_fragment_plane * _fplane,int _xfrag0,int _yfrag0)189 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
190  const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
191   int i;
192   int j;
193   for(i=0;i<2;i++)for(j=0;j<2;j++){
194     _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
195   }
196 }
197 
198 /*Fills in the chroma plane fragment maps for a macro block.
199   This version is for use with chroma decimated in the X and Y directions
200    (4:2:0).
201   _mb_map:  The macro block map to fill.
202   _fplanes: The descriptions of the fragment planes.
203   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
204   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)205 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
206  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
207   ptrdiff_t fragi;
208   _xfrag0>>=1;
209   _yfrag0>>=1;
210   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
211   _mb_map[1][0]=fragi+_fplanes[1].froffset;
212   _mb_map[2][0]=fragi+_fplanes[2].froffset;
213 }
214 
215 /*Fills in the chroma plane fragment maps for a macro block.
216   This version is for use with chroma decimated in the Y direction.
217   _mb_map:  The macro block map to fill.
218   _fplanes: The descriptions of the fragment planes.
219   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
220   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)221 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
222  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
223   ptrdiff_t fragi;
224   int       j;
225   _yfrag0>>=1;
226   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
227   for(j=0;j<2;j++){
228     _mb_map[1][j]=fragi+_fplanes[1].froffset;
229     _mb_map[2][j]=fragi+_fplanes[2].froffset;
230     fragi++;
231   }
232 }
233 
234 /*Fills in the chroma plane fragment maps for a macro block.
235   This version is for use with chroma decimated in the X direction (4:2:2).
236   _mb_map:  The macro block map to fill.
237   _fplanes: The descriptions of the fragment planes.
238   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
239   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)240 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
241  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
242   ptrdiff_t fragi;
243   int       i;
244   _xfrag0>>=1;
245   fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
246   for(i=0;i<2;i++){
247     _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
248     _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
249     fragi+=_fplanes[1].nhfrags;
250   }
251 }
252 
253 /*Fills in the chroma plane fragment maps for a macro block.
254   This version is for use with no chroma decimation (4:4:4).
255   This uses the already filled-in luma plane values.
256   _mb_map:  The macro block map to fill.
257   _fplanes: The descriptions of the fragment planes.
258   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
259   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)260 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
261  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
262   int k;
263   (void)_xfrag0;
264   (void)_yfrag0;
265   for(k=0;k<4;k++){
266     _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
267     _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
268   }
269 }
270 
271 /*The function type used to fill in the chroma plane fragment maps for a
272    macro block.
273   _mb_map:  The macro block map to fill.
274   _fplanes: The descriptions of the fragment planes.
275   _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
276   _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
277 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
278  const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
279 
280 /*A table of functions used to fill in the chroma plane fragment maps for a
281    macro block for each type of chrominance decimation.*/
282 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
283   oc_mb_fill_cmapping00,
284   oc_mb_fill_cmapping01,
285   oc_mb_fill_cmapping10,
286   oc_mb_fill_cmapping11
287 };
288 
289 /*Fills in the mapping from macro blocks to their corresponding fragment
290    numbers in each plane.
291   _mb_maps:   The list of macro block maps.
292   _mb_modes:  The list of macro block modes; macro blocks completely outside
293                the coded region are marked invalid.
294   _fplanes:   The descriptions of the fragment planes.
295   _pixel_fmt: The chroma decimation type.*/
oc_mb_create_mapping(oc_mb_map _mb_maps[],signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt)296 static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
297  signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
298   oc_mb_fill_cmapping_func  mb_fill_cmapping;
299   unsigned                  sbi;
300   int                       y;
301   mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
302   /*Loop through the luma plane super blocks.*/
303   for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
304     int x;
305     for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
306       int ymb;
307       /*Loop through the macro blocks in each super block in display order.*/
308       for(ymb=0;ymb<2;ymb++){
309         int xmb;
310         for(xmb=0;xmb<2;xmb++){
311           unsigned mbi;
312           int      mbx;
313           int      mby;
314           mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
315           mbx=x|xmb<<1;
316           mby=y|ymb<<1;
317           /*Initialize fragment indices to -1.*/
318           memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
319           /*Make sure this macro block is within the encoded region.*/
320           if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
321             _mb_modes[mbi]=OC_MODE_INVALID;
322             continue;
323           }
324           /*Fill in the fragment indices for the luma plane.*/
325           oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
326           /*Fill in the fragment indices for the chroma planes.*/
327           (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
328         }
329       }
330     }
331   }
332 }
333 
334 /*Marks the fragments which fall all or partially outside the displayable
335    region of the frame.
336   _state: The Theora state containing the fragments to be marked.*/
oc_state_border_init(oc_theora_state * _state)337 static void oc_state_border_init(oc_theora_state *_state){
338   oc_fragment       *frag;
339   oc_fragment       *yfrag_end;
340   oc_fragment       *xfrag_end;
341   oc_fragment_plane *fplane;
342   int                crop_x0;
343   int                crop_y0;
344   int                crop_xf;
345   int                crop_yf;
346   int                pli;
347   int                y;
348   int                x;
349   /*The method we use here is slow, but the code is dead simple and handles
350      all the special cases easily.
351     We only ever need to do it once.*/
352   /*Loop through the fragments, marking those completely outside the
353      displayable region and constructing a border mask for those that straddle
354      the border.*/
355   _state->nborders=0;
356   yfrag_end=frag=_state->frags;
357   for(pli=0;pli<3;pli++){
358     fplane=_state->fplanes+pli;
359     /*Set up the cropping rectangle for this plane.*/
360     crop_x0=_state->info.pic_x;
361     crop_xf=_state->info.pic_x+_state->info.pic_width;
362     crop_y0=_state->info.pic_y;
363     crop_yf=_state->info.pic_y+_state->info.pic_height;
364     if(pli>0){
365       if(!(_state->info.pixel_fmt&1)){
366         crop_x0=crop_x0>>1;
367         crop_xf=crop_xf+1>>1;
368       }
369       if(!(_state->info.pixel_fmt&2)){
370         crop_y0=crop_y0>>1;
371         crop_yf=crop_yf+1>>1;
372       }
373     }
374     y=0;
375     for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
376       x=0;
377       for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
378         /*First check to see if this fragment is completely outside the
379            displayable region.*/
380         /*Note the special checks for an empty cropping rectangle.
381           This guarantees that if we count a fragment as straddling the
382            border below, at least one pixel in the fragment will be inside
383            the displayable region.*/
384         if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
385          crop_x0>=crop_xf||crop_y0>=crop_yf){
386           frag->invalid=1;
387         }
388         /*Otherwise, check to see if it straddles the border.*/
389         else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
390          y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
391           ogg_int64_t mask;
392           int         npixels;
393           int         i;
394           mask=npixels=0;
395           for(i=0;i<8;i++){
396             int j;
397             for(j=0;j<8;j++){
398               if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
399                 mask|=(ogg_int64_t)1<<(i<<3|j);
400                 npixels++;
401               }
402             }
403           }
404           /*Search the fragment array for border info with the same pattern.
405             In general, there will be at most 8 different patterns (per
406              plane).*/
407           for(i=0;;i++){
408             if(i>=_state->nborders){
409               _state->nborders++;
410               _state->borders[i].mask=mask;
411               _state->borders[i].npixels=npixels;
412             }
413             else if(_state->borders[i].mask!=mask)continue;
414             frag->borderi=i;
415             break;
416           }
417         }
418         else frag->borderi=-1;
419       }
420     }
421   }
422 }
423 
oc_state_frarray_init(oc_theora_state * _state)424 static int oc_state_frarray_init(oc_theora_state *_state){
425   int       yhfrags;
426   int       yvfrags;
427   int       chfrags;
428   int       cvfrags;
429   ptrdiff_t yfrags;
430   ptrdiff_t cfrags;
431   ptrdiff_t nfrags;
432   unsigned  yhsbs;
433   unsigned  yvsbs;
434   unsigned  chsbs;
435   unsigned  cvsbs;
436   unsigned  ysbs;
437   unsigned  csbs;
438   unsigned  nsbs;
439   size_t    nmbs;
440   int       hdec;
441   int       vdec;
442   int       pli;
443   /*Figure out the number of fragments in each plane.*/
444   /*These parameters have already been validated to be multiples of 16.*/
445   yhfrags=_state->info.frame_width>>3;
446   yvfrags=_state->info.frame_height>>3;
447   hdec=!(_state->info.pixel_fmt&1);
448   vdec=!(_state->info.pixel_fmt&2);
449   chfrags=yhfrags+hdec>>hdec;
450   cvfrags=yvfrags+vdec>>vdec;
451   yfrags=yhfrags*(ptrdiff_t)yvfrags;
452   cfrags=chfrags*(ptrdiff_t)cvfrags;
453   nfrags=yfrags+2*cfrags;
454   /*Figure out the number of super blocks in each plane.*/
455   yhsbs=yhfrags+3>>2;
456   yvsbs=yvfrags+3>>2;
457   chsbs=chfrags+3>>2;
458   cvsbs=cvfrags+3>>2;
459   ysbs=yhsbs*yvsbs;
460   csbs=chsbs*cvsbs;
461   nsbs=ysbs+2*csbs;
462   nmbs=(size_t)ysbs<<2;
463   /*Check for overflow.
464     We support the ridiculous upper limits of the specification (1048560 by
465      1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
466      but for those with 32-bit pointers (or smaller!) we have to check.
467     If the caller wants to prevent denial-of-service by imposing a more
468      reasonable upper limit on the size of attempted allocations, they must do
469      so themselves; we have no platform independent way to determine how much
470      system memory there is nor an application-independent way to decide what a
471      "reasonable" allocation is.*/
472   if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
473    ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
474     return TH_EIMPL;
475   }
476   /*Initialize the fragment array.*/
477   _state->fplanes[0].nhfrags=yhfrags;
478   _state->fplanes[0].nvfrags=yvfrags;
479   _state->fplanes[0].froffset=0;
480   _state->fplanes[0].nfrags=yfrags;
481   _state->fplanes[0].nhsbs=yhsbs;
482   _state->fplanes[0].nvsbs=yvsbs;
483   _state->fplanes[0].sboffset=0;
484   _state->fplanes[0].nsbs=ysbs;
485   _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
486   _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
487   _state->fplanes[1].froffset=yfrags;
488   _state->fplanes[2].froffset=yfrags+cfrags;
489   _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
490   _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
491   _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
492   _state->fplanes[1].sboffset=ysbs;
493   _state->fplanes[2].sboffset=ysbs+csbs;
494   _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
495   _state->nfrags=nfrags;
496   _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
497   _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
498   _state->nsbs=nsbs;
499   _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
500   _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
501   _state->nhmbs=yhsbs<<1;
502   _state->nvmbs=yvsbs<<1;
503   _state->nmbs=nmbs;
504   _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
505   _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
506   _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
507   if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
508    _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
509    _state->coded_fragis==NULL){
510     return TH_EFAULT;
511   }
512   /*Create the mapping from super blocks to fragments.*/
513   for(pli=0;pli<3;pli++){
514     oc_fragment_plane *fplane;
515     fplane=_state->fplanes+pli;
516     oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
517      _state->sb_flags+fplane->sboffset,fplane->froffset,
518      fplane->nhfrags,fplane->nvfrags);
519   }
520   /*Create the mapping from macro blocks to fragments.*/
521   oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
522    _state->fplanes,_state->info.pixel_fmt);
523   /*Initialize the invalid and borderi fields of each fragment.*/
524   oc_state_border_init(_state);
525   return 0;
526 }
527 
oc_state_frarray_clear(oc_theora_state * _state)528 static void oc_state_frarray_clear(oc_theora_state *_state){
529   _ogg_free(_state->coded_fragis);
530   _ogg_free(_state->mb_modes);
531   _ogg_free(_state->mb_maps);
532   _ogg_free(_state->sb_flags);
533   _ogg_free(_state->sb_maps);
534   _ogg_free(_state->frag_mvs);
535   _ogg_free(_state->frags);
536 }
537 
538 
539 /*Initializes the buffers used for reconstructed frames.
540   These buffers are padded with 16 extra pixels on each side, to allow
541    unrestricted motion vectors without special casing the boundary.
542   If chroma is decimated in either direction, the padding is reduced by a
543    factor of 2 on the appropriate sides.
544   _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
oc_state_ref_bufs_init(oc_theora_state * _state,int _nrefs)545 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
546   th_info       *info;
547   unsigned char *ref_frame_data;
548   size_t         ref_frame_data_sz;
549   size_t         ref_frame_sz;
550   size_t         yplane_sz;
551   size_t         cplane_sz;
552   int            yhstride;
553   int            yheight;
554   int            chstride;
555   int            cheight;
556   ptrdiff_t      align;
557   ptrdiff_t      yoffset;
558   ptrdiff_t      coffset;
559   ptrdiff_t     *frag_buf_offs;
560   ptrdiff_t      fragi;
561   int            hdec;
562   int            vdec;
563   int            rfi;
564   int            pli;
565   if(_nrefs<3||_nrefs>6)return TH_EINVAL;
566   info=&_state->info;
567   /*Compute the image buffer parameters for each plane.*/
568   hdec=!(info->pixel_fmt&1);
569   vdec=!(info->pixel_fmt&2);
570   yhstride=info->frame_width+2*OC_UMV_PADDING;
571   yheight=info->frame_height+2*OC_UMV_PADDING;
572   /*Require 16-byte aligned rows in the chroma planes.*/
573   chstride=(yhstride>>hdec)+15&~15;
574   cheight=yheight>>vdec;
575   yplane_sz=yhstride*(size_t)yheight;
576   cplane_sz=chstride*(size_t)cheight;
577   yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
578   coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
579   /*Although we guarantee the rows of the chroma planes are a multiple of 16
580      bytes, the initial padding on the first row may only be 8 bytes.
581     Compute the offset needed to the actual image data to a multiple of 16.*/
582   align=-coffset&15;
583   ref_frame_sz=yplane_sz+2*cplane_sz+16;
584   ref_frame_data_sz=_nrefs*ref_frame_sz;
585   /*Check for overflow.
586     The same caveats apply as for oc_state_frarray_init().*/
587   if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
588    ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
589     return TH_EIMPL;
590   }
591   ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
592   frag_buf_offs=_state->frag_buf_offs=
593    _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
594   if(ref_frame_data==NULL||frag_buf_offs==NULL){
595     _ogg_free(frag_buf_offs);
596     oc_aligned_free(ref_frame_data);
597     return TH_EFAULT;
598   }
599   /*Set up the width, height and stride for the image buffers.*/
600   _state->ref_frame_bufs[0][0].width=info->frame_width;
601   _state->ref_frame_bufs[0][0].height=info->frame_height;
602   _state->ref_frame_bufs[0][0].stride=yhstride;
603   _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
604    info->frame_width>>hdec;
605   _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
606    info->frame_height>>vdec;
607   _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
608    chstride;
609   for(rfi=1;rfi<_nrefs;rfi++){
610     memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
611      sizeof(_state->ref_frame_bufs[0]));
612   }
613   _state->ref_frame_handle=ref_frame_data;
614   /*Set up the data pointers for the image buffers.*/
615   for(rfi=0;rfi<_nrefs;rfi++){
616     _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
617     ref_frame_data+=yplane_sz+align;
618     _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
619     ref_frame_data+=cplane_sz;
620     _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
621     ref_frame_data+=cplane_sz+(16-align);
622     /*Flip the buffer upside down.
623       This allows us to decode Theora's bottom-up frames in their natural
624        order, yet return a top-down buffer with a positive stride to the user.*/
625     oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
626      _state->ref_frame_bufs[rfi]);
627   }
628   _state->ref_ystride[0]=-yhstride;
629   _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
630   /*Initialize the fragment buffer offsets.*/
631   ref_frame_data=_state->ref_frame_bufs[0][0].data;
632   fragi=0;
633   for(pli=0;pli<3;pli++){
634     th_img_plane      *iplane;
635     oc_fragment_plane *fplane;
636     unsigned char     *vpix;
637     ptrdiff_t          stride;
638     ptrdiff_t          vfragi_end;
639     int                nhfrags;
640     iplane=_state->ref_frame_bufs[0]+pli;
641     fplane=_state->fplanes+pli;
642     vpix=iplane->data;
643     vfragi_end=fplane->froffset+fplane->nfrags;
644     nhfrags=fplane->nhfrags;
645     stride=iplane->stride;
646     while(fragi<vfragi_end){
647       ptrdiff_t      hfragi_end;
648       unsigned char *hpix;
649       hpix=vpix;
650       for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
651         frag_buf_offs[fragi]=hpix-ref_frame_data;
652         hpix+=8;
653       }
654       vpix+=stride<<3;
655     }
656   }
657   /*Initialize the reference frame pointers and indices.*/
658   _state->ref_frame_idx[OC_FRAME_GOLD]=
659    _state->ref_frame_idx[OC_FRAME_PREV]=
660    _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
661    _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
662    _state->ref_frame_idx[OC_FRAME_SELF]=
663    _state->ref_frame_idx[OC_FRAME_IO]=-1;
664   _state->ref_frame_data[OC_FRAME_GOLD]=
665    _state->ref_frame_data[OC_FRAME_PREV]=
666    _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
667    _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
668    _state->ref_frame_data[OC_FRAME_SELF]=
669    _state->ref_frame_data[OC_FRAME_IO]=NULL;
670   return 0;
671 }
672 
oc_state_ref_bufs_clear(oc_theora_state * _state)673 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
674   _ogg_free(_state->frag_buf_offs);
675   oc_aligned_free(_state->ref_frame_handle);
676 }
677 
678 
oc_state_accel_init_c(oc_theora_state * _state)679 void oc_state_accel_init_c(oc_theora_state *_state){
680   _state->cpu_flags=0;
681 #if defined(OC_STATE_USE_VTABLE)
682   _state->opt_vtable.frag_copy=oc_frag_copy_c;
683   _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
684   _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
685   _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
686   _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
687   _state->opt_vtable.idct8x8=oc_idct8x8_c;
688   _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
689   _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
690   _state->opt_vtable.state_loop_filter_frag_rows=
691    oc_state_loop_filter_frag_rows_c;
692   _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
693 #endif
694   _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
695 }
696 
697 
oc_state_init(oc_theora_state * _state,const th_info * _info,int _nrefs)698 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
699   int ret;
700   /*First validate the parameters.*/
701   if(_info==NULL)return TH_EFAULT;
702   /*The width and height of the encoded frame must be multiples of 16.
703     They must also, when divided by 16, fit into a 16-bit unsigned integer.
704     The displayable frame offset coordinates must fit into an 8-bit unsigned
705      integer.
706     Note that the offset Y in the API is specified on the opposite side from
707      how it is specified in the bitstream, because the Y axis is flipped in
708      the bitstream.
709     The displayable frame must fit inside the encoded frame.
710     The color space must be one known by the encoder.
711     The framerate ratio must not contain a zero value.*/
712   if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
713    _info->frame_width<=0||_info->frame_width>=0x100000||
714    _info->frame_height<=0||_info->frame_height>=0x100000||
715    _info->pic_x+_info->pic_width>_info->frame_width||
716    _info->pic_y+_info->pic_height>_info->frame_height||
717    _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
718    /*Note: the following <0 comparisons may generate spurious warnings on
719       platforms where enums are unsigned.
720      We could cast them to unsigned and just use the following >= comparison,
721       but there are a number of compilers which will mis-optimize this.
722      It's better to live with the spurious warnings.*/
723    _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
724    _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS||
725    _info->fps_numerator<1||_info->fps_denominator<1){
726     return TH_EINVAL;
727   }
728   memset(_state,0,sizeof(*_state));
729   memcpy(&_state->info,_info,sizeof(*_info));
730   /*Invert the sense of pic_y to match Theora's right-handed coordinate
731      system.*/
732   _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
733   _state->frame_type=OC_UNKWN_FRAME;
734   oc_state_accel_init(_state);
735   ret=oc_state_frarray_init(_state);
736   if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
737   if(ret<0){
738     oc_state_frarray_clear(_state);
739     return ret;
740   }
741   /*If the keyframe_granule_shift is out of range, use the maximum allowable
742      value.*/
743   if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
744     _state->info.keyframe_granule_shift=31;
745   }
746   _state->keyframe_num=0;
747   _state->curframe_num=-1;
748   /*3.2.0 streams mark the frame index instead of the frame count.
749     This was changed with stream version 3.2.1 to conform to other Ogg
750      codecs.
751     We add an extra bias when computing granule positions for new streams.*/
752   _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
753   return 0;
754 }
755 
oc_state_clear(oc_theora_state * _state)756 void oc_state_clear(oc_theora_state *_state){
757   oc_state_ref_bufs_clear(_state);
758   oc_state_frarray_clear(_state);
759 }
760 
761 
762 /*Duplicates the pixels on the border of the image plane out into the
763    surrounding padding for use by unrestricted motion vectors.
764   This function only adds the left and right borders, and only for the fragment
765    rows specified.
766   _refi: The index of the reference buffer to pad.
767   _pli:  The color plane.
768   _y0:   The Y coordinate of the first row to pad.
769   _yend: The Y coordinate of the row to stop padding at.*/
oc_state_borders_fill_rows(oc_theora_state * _state,int _refi,int _pli,int _y0,int _yend)770 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
771  int _y0,int _yend){
772   th_img_plane  *iplane;
773   unsigned char *apix;
774   unsigned char *bpix;
775   unsigned char *epix;
776   int            stride;
777   int            hpadding;
778   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
779   iplane=_state->ref_frame_bufs[_refi]+_pli;
780   stride=iplane->stride;
781   apix=iplane->data+_y0*(ptrdiff_t)stride;
782   bpix=apix+iplane->width-1;
783   epix=iplane->data+_yend*(ptrdiff_t)stride;
784   /*Note the use of != instead of <, which allows the stride to be negative.*/
785   while(apix!=epix){
786     memset(apix-hpadding,apix[0],hpadding);
787     memset(bpix+1,bpix[0],hpadding);
788     apix+=stride;
789     bpix+=stride;
790   }
791 }
792 
793 /*Duplicates the pixels on the border of the image plane out into the
794    surrounding padding for use by unrestricted motion vectors.
795   This function only adds the top and bottom borders, and must be called after
796    the left and right borders are added.
797   _refi:      The index of the reference buffer to pad.
798   _pli:       The color plane.*/
oc_state_borders_fill_caps(oc_theora_state * _state,int _refi,int _pli)799 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
800   th_img_plane  *iplane;
801   unsigned char *apix;
802   unsigned char *bpix;
803   unsigned char *epix;
804   int            stride;
805   int            hpadding;
806   int            vpadding;
807   int            fullw;
808   hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
809   vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
810   iplane=_state->ref_frame_bufs[_refi]+_pli;
811   stride=iplane->stride;
812   fullw=iplane->width+(hpadding<<1);
813   apix=iplane->data-hpadding;
814   bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
815   epix=apix-stride*(ptrdiff_t)vpadding;
816   while(apix!=epix){
817     memcpy(apix-stride,apix,fullw);
818     memcpy(bpix+stride,bpix,fullw);
819     apix-=stride;
820     bpix+=stride;
821   }
822 }
823 
824 /*Duplicates the pixels on the border of the given reference image out into
825    the surrounding padding for use by unrestricted motion vectors.
826   _state: The context containing the reference buffers.
827   _refi:  The index of the reference buffer to pad.*/
oc_state_borders_fill(oc_theora_state * _state,int _refi)828 void oc_state_borders_fill(oc_theora_state *_state,int _refi){
829   int pli;
830   for(pli=0;pli<3;pli++){
831     oc_state_borders_fill_rows(_state,_refi,pli,0,
832      _state->ref_frame_bufs[_refi][pli].height);
833     oc_state_borders_fill_caps(_state,_refi,pli);
834   }
835 }
836 
837 /*Determines the offsets in an image buffer to use for motion compensation.
838   _state:   The Theora state the offsets are to be computed with.
839   _offsets: Returns the offset for the buffer(s).
840             _offsets[0] is always set.
841             _offsets[1] is set if the motion vector has non-zero fractional
842              components.
843   _pli:     The color plane index.
844   _mv:      The motion vector.
845   Return: The number of offsets returned: 1 or 2.*/
oc_state_get_mv_offsets(const oc_theora_state * _state,int _offsets[2],int _pli,oc_mv _mv)846 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
847  int _pli,oc_mv _mv){
848   /*Here is a brief description of how Theora handles motion vectors:
849     Motion vector components are specified to half-pixel accuracy in
850      undecimated directions of each plane, and quarter-pixel accuracy in
851      decimated directions.
852     Integer parts are extracted by dividing (not shifting) by the
853      appropriate amount, with truncation towards zero.
854     These integer values are used to calculate the first offset.
855 
856     If either of the fractional parts are non-zero, then a second offset is
857      computed.
858     No third or fourth offsets are computed, even if both components have
859      non-zero fractional parts.
860     The second offset is computed by dividing (not shifting) by the
861      appropriate amount, always truncating _away_ from zero.*/
862 #if 0
863   /*This version of the code doesn't use any tables, but is slower.*/
864   int ystride;
865   int xprec;
866   int yprec;
867   int xfrac;
868   int yfrac;
869   int offs;
870   int dx;
871   int dy;
872   ystride=_state->ref_ystride[_pli];
873   /*These two variables decide whether we are in half- or quarter-pixel
874      precision in each component.*/
875   xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
876   yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
877   dx=OC_MV_X(_mv);
878   dy=OC_MV_Y(_mv);
879   /*These two variables are either 0 if all the fractional bits are zero or -1
880      if any of them are non-zero.*/
881   xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
882   yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
883   offs=(dx>>xprec)+(dy>>yprec)*ystride;
884   if(xfrac||yfrac){
885     int xmask;
886     int ymask;
887     xmask=OC_SIGNMASK(dx);
888     ymask=OC_SIGNMASK(dy);
889     yfrac&=ystride;
890     _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
891     _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
892     return 2;
893   }
894   else{
895     _offsets[0]=offs;
896     return 1;
897   }
898 #else
899   /*Using tables simplifies the code, and there's enough arithmetic to hide the
900      latencies of the memory references.*/
901   static const signed char OC_MVMAP[2][64]={
902     {
903           -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
904        -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
905         0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
906         8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
907     },
908     {
909            -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
910        -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
911         0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
912         4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
913     }
914   };
915   static const signed char OC_MVMAP2[2][64]={
916     {
917         -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
918       0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
919       0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
920       0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
921     },
922     {
923         -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
924       0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
925       0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
926       0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
927     }
928   };
929   int ystride;
930   int qpx;
931   int qpy;
932   int mx;
933   int my;
934   int mx2;
935   int my2;
936   int offs;
937   int dx;
938   int dy;
939   ystride=_state->ref_ystride[_pli];
940   qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
941   dx=OC_MV_X(_mv);
942   dy=OC_MV_Y(_mv);
943   my=OC_MVMAP[qpy][dy+31];
944   my2=OC_MVMAP2[qpy][dy+31];
945   qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
946   mx=OC_MVMAP[qpx][dx+31];
947   mx2=OC_MVMAP2[qpx][dx+31];
948   offs=my*ystride+mx;
949   if(mx2||my2){
950     _offsets[1]=offs+my2*ystride+mx2;
951     _offsets[0]=offs;
952     return 2;
953   }
954   _offsets[0]=offs;
955   return 1;
956 #endif
957 }
958 
oc_state_frag_recon_c(const oc_theora_state * _state,ptrdiff_t _fragi,int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant)959 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
960  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
961   unsigned char *dst;
962   ptrdiff_t      frag_buf_off;
963   int            ystride;
964   int            refi;
965   /*Apply the inverse transform.*/
966   /*Special case only having a DC component.*/
967   if(_last_zzi<2){
968     ogg_int16_t p;
969     int         ci;
970     /*We round this dequant product (and not any of the others) because there's
971        no iDCT rounding.*/
972     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
973     /*LOOP VECTORIZES.*/
974     for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
975   }
976   else{
977     /*First, dequantize the DC coefficient.*/
978     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
979     oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
980   }
981   /*Fill in the target buffer.*/
982   frag_buf_off=_state->frag_buf_offs[_fragi];
983   refi=_state->frags[_fragi].refi;
984   ystride=_state->ref_ystride[_pli];
985   dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
986   if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
987   else{
988     const unsigned char *ref;
989     int                  mvoffsets[2];
990     ref=_state->ref_frame_data[refi]+frag_buf_off;
991     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
992      _state->frag_mvs[_fragi])>1){
993       oc_frag_recon_inter2(_state,
994        dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
995     }
996     else{
997       oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
998     }
999   }
1000 }
1001 
loop_filter_h(unsigned char * _pix,int _ystride,signed char * _bv)1002 static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
1003   int y;
1004   _pix-=2;
1005   for(y=0;y<8;y++){
1006     int f;
1007     f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
1008     /*The _bv array is used to compute the function
1009       f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1010       where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1011     f=*(_bv+(f+4>>3));
1012     _pix[1]=OC_CLAMP255(_pix[1]+f);
1013     _pix[2]=OC_CLAMP255(_pix[2]-f);
1014     _pix+=_ystride;
1015   }
1016 }
1017 
loop_filter_v(unsigned char * _pix,int _ystride,signed char * _bv)1018 static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
1019   int x;
1020   _pix-=_ystride*2;
1021   for(x=0;x<8;x++){
1022     int f;
1023     f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
1024     /*The _bv array is used to compute the function
1025       f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1026       where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1027     f=*(_bv+(f+4>>3));
1028     _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
1029     _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
1030   }
1031 }
1032 
1033 /*Initialize the bounding values array used by the loop filter.
1034   _bv: Storage for the array.
1035   _flimit: The filter limit as defined in Section 7.10 of the spec.*/
oc_loop_filter_init_c(signed char _bv[256],int _flimit)1036 void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
1037   int i;
1038   memset(_bv,0,sizeof(_bv[0])*256);
1039   for(i=0;i<_flimit;i++){
1040     if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
1041     _bv[127-i]=(signed char)(-i);
1042     _bv[127+i]=(signed char)(i);
1043     if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
1044   }
1045 }
1046 
1047 /*Apply the loop filter to a given set of fragment rows in the given plane.
1048   The filter may be run on the bottom edge, affecting pixels in the next row of
1049    fragments, so this row also needs to be available.
1050   _bv:        The bounding values array.
1051   _refi:      The index of the frame buffer to filter.
1052   _pli:       The color plane to filter.
1053   _fragy0:    The Y coordinate of the first fragment row to filter.
1054   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
oc_state_loop_filter_frag_rows_c(const oc_theora_state * _state,signed char * _bv,int _refi,int _pli,int _fragy0,int _fragy_end)1055 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
1056  signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
1057   const oc_fragment_plane *fplane;
1058   const oc_fragment       *frags;
1059   const ptrdiff_t         *frag_buf_offs;
1060   unsigned char           *ref_frame_data;
1061   ptrdiff_t                fragi_top;
1062   ptrdiff_t                fragi_bot;
1063   ptrdiff_t                fragi0;
1064   ptrdiff_t                fragi0_end;
1065   int                      ystride;
1066   int                      nhfrags;
1067   _bv+=127;
1068   fplane=_state->fplanes+_pli;
1069   nhfrags=fplane->nhfrags;
1070   fragi_top=fplane->froffset;
1071   fragi_bot=fragi_top+fplane->nfrags;
1072   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1073   fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
1074   ystride=_state->ref_ystride[_pli];
1075   frags=_state->frags;
1076   frag_buf_offs=_state->frag_buf_offs;
1077   ref_frame_data=_state->ref_frame_data[_refi];
1078   /*The following loops are constructed somewhat non-intuitively on purpose.
1079     The main idea is: if a block boundary has at least one coded fragment on
1080      it, the filter is applied to it.
1081     However, the order that the filters are applied in matters, and VP3 chose
1082      the somewhat strange ordering used below.*/
1083   while(fragi0<fragi0_end){
1084     ptrdiff_t fragi;
1085     ptrdiff_t fragi_end;
1086     fragi=fragi0;
1087     fragi_end=fragi+nhfrags;
1088     while(fragi<fragi_end){
1089       if(frags[fragi].coded){
1090         unsigned char *ref;
1091         ref=ref_frame_data+frag_buf_offs[fragi];
1092         if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1093         if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1094         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1095           loop_filter_h(ref+8,ystride,_bv);
1096         }
1097         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1098           loop_filter_v(ref+(ystride<<3),ystride,_bv);
1099         }
1100       }
1101       fragi++;
1102     }
1103     fragi0+=nhfrags;
1104   }
1105 }
1106 
1107 #if defined(OC_DUMP_IMAGES)
oc_state_dump_frame(const oc_theora_state * _state,int _frame,const char * _suf)1108 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1109  const char *_suf){
1110   /*Dump a PNG of the reconstructed image.*/
1111   png_structp    png;
1112   png_infop      info;
1113   png_bytep     *image;
1114   FILE          *fp;
1115   char           fname[16];
1116   unsigned char *y_row;
1117   unsigned char *u_row;
1118   unsigned char *v_row;
1119   unsigned char *y;
1120   unsigned char *u;
1121   unsigned char *v;
1122   ogg_int64_t    iframe;
1123   ogg_int64_t    pframe;
1124   int            y_stride;
1125   int            u_stride;
1126   int            v_stride;
1127   int            framei;
1128   int            width;
1129   int            height;
1130   int            imgi;
1131   int            imgj;
1132   width=_state->info.frame_width;
1133   height=_state->info.frame_height;
1134   iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1135   pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1136   sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1137   fp=fopen(fname,"wb");
1138   if(fp==NULL)return TH_EFAULT;
1139   image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1140   if(image==NULL){
1141     fclose(fp);
1142     return TH_EFAULT;
1143   }
1144   png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1145   if(png==NULL){
1146     oc_free_2d(image);
1147     fclose(fp);
1148     return TH_EFAULT;
1149   }
1150   info=png_create_info_struct(png);
1151   if(info==NULL){
1152     png_destroy_write_struct(&png,NULL);
1153     oc_free_2d(image);
1154     fclose(fp);
1155     return TH_EFAULT;
1156   }
1157   if(setjmp(png_jmpbuf(png))){
1158     png_destroy_write_struct(&png,&info);
1159     oc_free_2d(image);
1160     fclose(fp);
1161     return TH_EFAULT;
1162   }
1163   framei=_state->ref_frame_idx[_frame];
1164   y_row=_state->ref_frame_bufs[framei][0].data;
1165   u_row=_state->ref_frame_bufs[framei][1].data;
1166   v_row=_state->ref_frame_bufs[framei][2].data;
1167   y_stride=_state->ref_frame_bufs[framei][0].stride;
1168   u_stride=_state->ref_frame_bufs[framei][1].stride;
1169   v_stride=_state->ref_frame_bufs[framei][2].stride;
1170   /*Chroma up-sampling is just done with a box filter.
1171     This is very likely what will actually be used in practice on a real
1172      display, and also removes one more layer to search in for the source of
1173      artifacts.
1174     As an added bonus, it's dead simple.*/
1175   for(imgi=height;imgi-->0;){
1176     int dc;
1177     y=y_row;
1178     u=u_row;
1179     v=v_row;
1180     for(imgj=0;imgj<6*width;){
1181       float    yval;
1182       float    uval;
1183       float    vval;
1184       unsigned rval;
1185       unsigned gval;
1186       unsigned bval;
1187       /*This is intentionally slow and very accurate.*/
1188       yval=(*y-16)*(1.0F/219);
1189       uval=(*u-128)*(2*(1-0.114F)/224);
1190       vval=(*v-128)*(2*(1-0.299F)/224);
1191       rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1192       gval=OC_CLAMPI(0,(int)(65535*(
1193        yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1194       bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1195       image[imgi][imgj++]=(unsigned char)(rval>>8);
1196       image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1197       image[imgi][imgj++]=(unsigned char)(gval>>8);
1198       image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1199       image[imgi][imgj++]=(unsigned char)(bval>>8);
1200       image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1201       dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1202       y++;
1203       u+=dc;
1204       v+=dc;
1205     }
1206     dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1207     y_row+=y_stride;
1208     u_row+=dc&u_stride;
1209     v_row+=dc&v_stride;
1210   }
1211   png_init_io(png,fp);
1212   png_set_compression_level(png,Z_BEST_COMPRESSION);
1213   png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1214    PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1215   switch(_state->info.colorspace){
1216     case TH_CS_ITU_REC_470M:{
1217       png_set_gAMA(png,info,2.2);
1218       png_set_cHRM_fixed(png,info,31006,31616,
1219        67000,32000,21000,71000,14000,8000);
1220     }break;
1221     case TH_CS_ITU_REC_470BG:{
1222       png_set_gAMA(png,info,2.67);
1223       png_set_cHRM_fixed(png,info,31271,32902,
1224        64000,33000,29000,60000,15000,6000);
1225     }break;
1226     default:break;
1227   }
1228   png_set_pHYs(png,info,_state->info.aspect_numerator,
1229    _state->info.aspect_denominator,0);
1230   png_set_rows(png,info,image);
1231   png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1232   png_write_end(png,info);
1233   png_destroy_write_struct(&png,&info);
1234   oc_free_2d(image);
1235   fclose(fp);
1236   return 0;
1237 }
1238 #endif
1239 
1240 
1241 
th_granule_frame(void * _encdec,ogg_int64_t _granpos)1242 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1243   oc_theora_state *state;
1244   state=(oc_theora_state *)_encdec;
1245   if(_granpos>=0){
1246     ogg_int64_t iframe;
1247     ogg_int64_t pframe;
1248     iframe=_granpos>>state->info.keyframe_granule_shift;
1249     pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1250     /*3.2.0 streams store the frame index in the granule position.
1251       3.2.1 and later store the frame count.
1252       We return the index, so adjust the value if we have a 3.2.1 or later
1253        stream.*/
1254     return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1255   }
1256   return -1;
1257 }
1258 
th_granule_time(void * _encdec,ogg_int64_t _granpos)1259 double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1260   oc_theora_state *state;
1261   state=(oc_theora_state *)_encdec;
1262   if(_granpos>=0){
1263     return (th_granule_frame(_encdec, _granpos)+1)*(
1264      (double)state->info.fps_denominator/state->info.fps_numerator);
1265   }
1266   return -1;
1267 }
1268