1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $
15
16 ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "state.h"
21 #if defined(OC_DUMP_IMAGES)
22 # include <stdio.h>
23 # include "png.h"
24 #endif
25
26 /*The function used to fill in the chroma plane motion vectors for a macro
27 block when 4 different motion vectors are specified in the luma plane.
28 This version is for use with chroma decimated in the X and Y directions
29 (4:2:0).
30 _cbmvs: The chroma block-level motion vectors to fill in.
31 _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])32 static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
33 int dx;
34 int dy;
35 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
36 +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
37 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
38 +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
39 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
40 }
41
42 /*The function used to fill in the chroma plane motion vectors for a macro
43 block when 4 different motion vectors are specified in the luma plane.
44 This version is for use with chroma decimated in the Y direction.
45 _cbmvs: The chroma block-level motion vectors to fill in.
46 _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])47 static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
48 int dx;
49 int dy;
50 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
51 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
52 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
53 dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
54 dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
55 _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
56 }
57
58 /*The function used to fill in the chroma plane motion vectors for a macro
59 block when 4 different motion vectors are specified in the luma plane.
60 This version is for use with chroma decimated in the X direction (4:2:2).
61 _cbmvs: The chroma block-level motion vectors to fill in.
62 _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])63 static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
64 int dx;
65 int dy;
66 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
67 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
68 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
69 dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
70 dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
71 _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
72 }
73
74 /*The function used to fill in the chroma plane motion vectors for a macro
75 block when 4 different motion vectors are specified in the luma plane.
76 This version is for use with no chroma decimation (4:4:4).
77 _cbmvs: The chroma block-level motion vectors to fill in.
78 _lmbmv: The luma macro-block level motion vector to fill in for use in
79 prediction.
80 _lbmvs: The luma block-level motion vectors.*/
oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4])81 static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
82 _cbmvs[0]=_lbmvs[0];
83 _cbmvs[1]=_lbmvs[1];
84 _cbmvs[2]=_lbmvs[2];
85 _cbmvs[3]=_lbmvs[3];
86 }
87
88 /*A table of functions used to fill in the chroma plane motion vectors for a
89 macro block when 4 different motion vectors are specified in the luma
90 plane.*/
91 const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
92 (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
93 (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
94 (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
95 (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
96 };
97
98
99
100 /*Returns the fragment index of the top-left block in a macro block.
101 This can be used to test whether or not the whole macro block is valid.
102 _sb_map: The super block map.
103 _quadi: The quadrant number.
104 Return: The index of the fragment of the upper left block in the macro
105 block, or -1 if the block lies outside the coded frame.*/
oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi)106 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
107 /*It so happens that under the Hilbert curve ordering described below, the
108 upper-left block in each macro block is at index 0, except in macro block
109 3, where it is at index 2.*/
110 return _sb_map[_quadi][_quadi&_quadi<<1];
111 }
112
113 /*Fills in the mapping from block positions to fragment numbers for a single
114 color plane.
115 This function also fills in the "valid" flag of each quadrant in the super
116 block flags.
117 _sb_maps: The array of super block maps for the color plane.
118 _sb_flags: The array of super block flags for the color plane.
119 _frag0: The index of the first fragment in the plane.
120 _hfrags: The number of horizontal fragments in a coded frame.
121 _vfrags: The number of vertical fragments in a coded frame.*/
oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags)122 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
123 oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
124 /*Contains the (macro_block,block) indices for a 4x4 grid of
125 fragments.
126 The pattern is a 4x4 Hilbert space-filling curve.
127 A Hilbert curve has the nice property that as the curve grows larger, its
128 fractal dimension approaches 2.
129 The intuition is that nearby blocks in the curve are also close spatially,
130 with the previous element always an immediate neighbor, so that runs of
131 blocks should be well correlated.*/
132 static const int SB_MAP[4][4][2]={
133 {{0,0},{0,1},{3,2},{3,3}},
134 {{0,3},{0,2},{3,1},{3,0}},
135 {{1,0},{1,3},{2,0},{2,3}},
136 {{1,1},{1,2},{2,1},{2,2}}
137 };
138 ptrdiff_t yfrag;
139 unsigned sbi;
140 int y;
141 sbi=0;
142 yfrag=_frag0;
143 for(y=0;;y+=4){
144 int imax;
145 int x;
146 /*Figure out how many columns of blocks in this super block lie within the
147 image.*/
148 imax=_vfrags-y;
149 if(imax>4)imax=4;
150 else if(imax<=0)break;
151 for(x=0;;x+=4,sbi++){
152 ptrdiff_t xfrag;
153 int jmax;
154 int quadi;
155 int i;
156 /*Figure out how many rows of blocks in this super block lie within the
157 image.*/
158 jmax=_hfrags-x;
159 if(jmax>4)jmax=4;
160 else if(jmax<=0)break;
161 /*By default, set all fragment indices to -1.*/
162 memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
163 /*Fill in the fragment map for this super block.*/
164 xfrag=yfrag+x;
165 for(i=0;i<imax;i++){
166 int j;
167 for(j=0;j<jmax;j++){
168 _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
169 }
170 xfrag+=_hfrags;
171 }
172 /*Mark which quadrants of this super block lie within the image.*/
173 for(quadi=0;quadi<4;quadi++){
174 _sb_flags[sbi].quad_valid|=
175 (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
176 }
177 }
178 yfrag+=_hfrags<<2;
179 }
180 }
181
182 /*Fills in the Y plane fragment map for a macro block given the fragment
183 coordinates of its upper-left hand corner.
184 _mb_map: The macro block map to fill.
185 _fplane: The description of the Y plane.
186 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
187 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],const oc_fragment_plane * _fplane,int _xfrag0,int _yfrag0)188 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
189 const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
190 int i;
191 int j;
192 for(i=0;i<2;i++)for(j=0;j<2;j++){
193 _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
194 }
195 }
196
197 /*Fills in the chroma plane fragment maps for a macro block.
198 This version is for use with chroma decimated in the X and Y directions
199 (4:2:0).
200 _mb_map: The macro block map to fill.
201 _fplanes: The descriptions of the fragment planes.
202 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
203 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)204 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
205 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
206 ptrdiff_t fragi;
207 _xfrag0>>=1;
208 _yfrag0>>=1;
209 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
210 _mb_map[1][0]=fragi+_fplanes[1].froffset;
211 _mb_map[2][0]=fragi+_fplanes[2].froffset;
212 }
213
214 /*Fills in the chroma plane fragment maps for a macro block.
215 This version is for use with chroma decimated in the Y direction.
216 _mb_map: The macro block map to fill.
217 _fplanes: The descriptions of the fragment planes.
218 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
219 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)220 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
221 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
222 ptrdiff_t fragi;
223 int j;
224 _yfrag0>>=1;
225 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
226 for(j=0;j<2;j++){
227 _mb_map[1][j]=fragi+_fplanes[1].froffset;
228 _mb_map[2][j]=fragi+_fplanes[2].froffset;
229 fragi++;
230 }
231 }
232
233 /*Fills in the chroma plane fragment maps for a macro block.
234 This version is for use with chroma decimated in the X direction (4:2:2).
235 _mb_map: The macro block map to fill.
236 _fplanes: The descriptions of the fragment planes.
237 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
238 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)239 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
240 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
241 ptrdiff_t fragi;
242 int i;
243 _xfrag0>>=1;
244 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
245 for(i=0;i<2;i++){
246 _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
247 _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
248 fragi+=_fplanes[1].nhfrags;
249 }
250 }
251
252 /*Fills in the chroma plane fragment maps for a macro block.
253 This version is for use with no chroma decimation (4:4:4).
254 This uses the already filled-in luma plane values.
255 _mb_map: The macro block map to fill.
256 _fplanes: The descriptions of the fragment planes.*/
oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3])257 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
258 const oc_fragment_plane _fplanes[3]){
259 int k;
260 for(k=0;k<4;k++){
261 _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
262 _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
263 }
264 }
265
266 /*The function type used to fill in the chroma plane fragment maps for a
267 macro block.
268 _mb_map: The macro block map to fill.
269 _fplanes: The descriptions of the fragment planes.
270 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
271 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
272 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
273 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
274
275 /*A table of functions used to fill in the chroma plane fragment maps for a
276 macro block for each type of chrominance decimation.*/
277 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
278 oc_mb_fill_cmapping00,
279 oc_mb_fill_cmapping01,
280 oc_mb_fill_cmapping10,
281 (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
282 };
283
284 /*Fills in the mapping from macro blocks to their corresponding fragment
285 numbers in each plane.
286 _mb_maps: The list of macro block maps.
287 _mb_modes: The list of macro block modes; macro blocks completely outside
288 the coded region are marked invalid.
289 _fplanes: The descriptions of the fragment planes.
290 _pixel_fmt: The chroma decimation type.*/
oc_mb_create_mapping(oc_mb_map _mb_maps[],signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt)291 static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
292 signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
293 oc_mb_fill_cmapping_func mb_fill_cmapping;
294 unsigned sbi;
295 int y;
296 mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
297 /*Loop through the luma plane super blocks.*/
298 for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
299 int x;
300 for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
301 int ymb;
302 /*Loop through the macro blocks in each super block in display order.*/
303 for(ymb=0;ymb<2;ymb++){
304 int xmb;
305 for(xmb=0;xmb<2;xmb++){
306 unsigned mbi;
307 int mbx;
308 int mby;
309 mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
310 mbx=x|xmb<<1;
311 mby=y|ymb<<1;
312 /*Initialize fragment indices to -1.*/
313 memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
314 /*Make sure this macro block is within the encoded region.*/
315 if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
316 _mb_modes[mbi]=OC_MODE_INVALID;
317 continue;
318 }
319 /*Fill in the fragment indices for the luma plane.*/
320 oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
321 /*Fill in the fragment indices for the chroma planes.*/
322 (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
323 }
324 }
325 }
326 }
327 }
328
329 /*Marks the fragments which fall all or partially outside the displayable
330 region of the frame.
331 _state: The Theora state containing the fragments to be marked.*/
oc_state_border_init(oc_theora_state * _state)332 static void oc_state_border_init(oc_theora_state *_state){
333 oc_fragment *frag;
334 oc_fragment *yfrag_end;
335 oc_fragment *xfrag_end;
336 oc_fragment_plane *fplane;
337 int crop_x0;
338 int crop_y0;
339 int crop_xf;
340 int crop_yf;
341 int pli;
342 int y;
343 int x;
344 /*The method we use here is slow, but the code is dead simple and handles
345 all the special cases easily.
346 We only ever need to do it once.*/
347 /*Loop through the fragments, marking those completely outside the
348 displayable region and constructing a border mask for those that straddle
349 the border.*/
350 _state->nborders=0;
351 yfrag_end=frag=_state->frags;
352 for(pli=0;pli<3;pli++){
353 fplane=_state->fplanes+pli;
354 /*Set up the cropping rectangle for this plane.*/
355 crop_x0=_state->info.pic_x;
356 crop_xf=_state->info.pic_x+_state->info.pic_width;
357 crop_y0=_state->info.pic_y;
358 crop_yf=_state->info.pic_y+_state->info.pic_height;
359 if(pli>0){
360 if(!(_state->info.pixel_fmt&1)){
361 crop_x0=crop_x0>>1;
362 crop_xf=crop_xf+1>>1;
363 }
364 if(!(_state->info.pixel_fmt&2)){
365 crop_y0=crop_y0>>1;
366 crop_yf=crop_yf+1>>1;
367 }
368 }
369 y=0;
370 for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
371 x=0;
372 for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
373 /*First check to see if this fragment is completely outside the
374 displayable region.*/
375 /*Note the special checks for an empty cropping rectangle.
376 This guarantees that if we count a fragment as straddling the
377 border below, at least one pixel in the fragment will be inside
378 the displayable region.*/
379 if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
380 crop_x0>=crop_xf||crop_y0>=crop_yf){
381 frag->invalid=1;
382 }
383 /*Otherwise, check to see if it straddles the border.*/
384 else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
385 y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
386 ogg_int64_t mask;
387 int npixels;
388 int i;
389 mask=npixels=0;
390 for(i=0;i<8;i++){
391 int j;
392 for(j=0;j<8;j++){
393 if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
394 mask|=(ogg_int64_t)1<<(i<<3|j);
395 npixels++;
396 }
397 }
398 }
399 /*Search the fragment array for border info with the same pattern.
400 In general, there will be at most 8 different patterns (per
401 plane).*/
402 for(i=0;;i++){
403 if(i>=_state->nborders){
404 _state->nborders++;
405 _state->borders[i].mask=mask;
406 _state->borders[i].npixels=npixels;
407 }
408 else if(_state->borders[i].mask!=mask)continue;
409 frag->borderi=i;
410 break;
411 }
412 }
413 else frag->borderi=-1;
414 }
415 }
416 }
417 }
418
oc_state_frarray_init(oc_theora_state * _state)419 static int oc_state_frarray_init(oc_theora_state *_state){
420 int yhfrags;
421 int yvfrags;
422 int chfrags;
423 int cvfrags;
424 ptrdiff_t yfrags;
425 ptrdiff_t cfrags;
426 ptrdiff_t nfrags;
427 unsigned yhsbs;
428 unsigned yvsbs;
429 unsigned chsbs;
430 unsigned cvsbs;
431 unsigned ysbs;
432 unsigned csbs;
433 unsigned nsbs;
434 size_t nmbs;
435 int hdec;
436 int vdec;
437 int pli;
438 /*Figure out the number of fragments in each plane.*/
439 /*These parameters have already been validated to be multiples of 16.*/
440 yhfrags=_state->info.frame_width>>3;
441 yvfrags=_state->info.frame_height>>3;
442 hdec=!(_state->info.pixel_fmt&1);
443 vdec=!(_state->info.pixel_fmt&2);
444 chfrags=yhfrags+hdec>>hdec;
445 cvfrags=yvfrags+vdec>>vdec;
446 yfrags=yhfrags*(ptrdiff_t)yvfrags;
447 cfrags=chfrags*(ptrdiff_t)cvfrags;
448 nfrags=yfrags+2*cfrags;
449 /*Figure out the number of super blocks in each plane.*/
450 yhsbs=yhfrags+3>>2;
451 yvsbs=yvfrags+3>>2;
452 chsbs=chfrags+3>>2;
453 cvsbs=cvfrags+3>>2;
454 ysbs=yhsbs*yvsbs;
455 csbs=chsbs*cvsbs;
456 nsbs=ysbs+2*csbs;
457 nmbs=(size_t)ysbs<<2;
458 /*Check for overflow.
459 We support the ridiculous upper limits of the specification (1048560 by
460 1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
461 but for those with 32-bit pointers (or smaller!) we have to check.
462 If the caller wants to prevent denial-of-service by imposing a more
463 reasonable upper limit on the size of attempted allocations, they must do
464 so themselves; we have no platform independent way to determine how much
465 system memory there is nor an application-independent way to decide what a
466 "reasonable" allocation is.*/
467 if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
468 ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
469 return TH_EIMPL;
470 }
471 /*Initialize the fragment array.*/
472 _state->fplanes[0].nhfrags=yhfrags;
473 _state->fplanes[0].nvfrags=yvfrags;
474 _state->fplanes[0].froffset=0;
475 _state->fplanes[0].nfrags=yfrags;
476 _state->fplanes[0].nhsbs=yhsbs;
477 _state->fplanes[0].nvsbs=yvsbs;
478 _state->fplanes[0].sboffset=0;
479 _state->fplanes[0].nsbs=ysbs;
480 _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
481 _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
482 _state->fplanes[1].froffset=yfrags;
483 _state->fplanes[2].froffset=yfrags+cfrags;
484 _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
485 _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
486 _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
487 _state->fplanes[1].sboffset=ysbs;
488 _state->fplanes[2].sboffset=ysbs+csbs;
489 _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
490 _state->nfrags=nfrags;
491 _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
492 _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
493 _state->nsbs=nsbs;
494 _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
495 _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
496 _state->nhmbs=yhsbs<<1;
497 _state->nvmbs=yvsbs<<1;
498 _state->nmbs=nmbs;
499 _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
500 _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
501 _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
502 if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
503 _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
504 _state->coded_fragis==NULL){
505 return TH_EFAULT;
506 }
507 /*Create the mapping from super blocks to fragments.*/
508 for(pli=0;pli<3;pli++){
509 oc_fragment_plane *fplane;
510 fplane=_state->fplanes+pli;
511 oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
512 _state->sb_flags+fplane->sboffset,fplane->froffset,
513 fplane->nhfrags,fplane->nvfrags);
514 }
515 /*Create the mapping from macro blocks to fragments.*/
516 oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
517 _state->fplanes,_state->info.pixel_fmt);
518 /*Initialize the invalid and borderi fields of each fragment.*/
519 oc_state_border_init(_state);
520 return 0;
521 }
522
oc_state_frarray_clear(oc_theora_state * _state)523 static void oc_state_frarray_clear(oc_theora_state *_state){
524 _ogg_free(_state->coded_fragis);
525 _ogg_free(_state->mb_modes);
526 _ogg_free(_state->mb_maps);
527 _ogg_free(_state->sb_flags);
528 _ogg_free(_state->sb_maps);
529 _ogg_free(_state->frag_mvs);
530 _ogg_free(_state->frags);
531 }
532
533
534 /*Initializes the buffers used for reconstructed frames.
535 These buffers are padded with 16 extra pixels on each side, to allow
536 unrestricted motion vectors without special casing the boundary.
537 If chroma is decimated in either direction, the padding is reduced by a
538 factor of 2 on the appropriate sides.
539 _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
oc_state_ref_bufs_init(oc_theora_state * _state,int _nrefs)540 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
541 th_info *info;
542 unsigned char *ref_frame_data;
543 size_t ref_frame_data_sz;
544 size_t ref_frame_sz;
545 size_t yplane_sz;
546 size_t cplane_sz;
547 int yhstride;
548 int yheight;
549 int chstride;
550 int cheight;
551 ptrdiff_t align;
552 ptrdiff_t yoffset;
553 ptrdiff_t coffset;
554 ptrdiff_t *frag_buf_offs;
555 ptrdiff_t fragi;
556 int hdec;
557 int vdec;
558 int rfi;
559 int pli;
560 if(_nrefs<3||_nrefs>6)return TH_EINVAL;
561 info=&_state->info;
562 /*Compute the image buffer parameters for each plane.*/
563 hdec=!(info->pixel_fmt&1);
564 vdec=!(info->pixel_fmt&2);
565 yhstride=info->frame_width+2*OC_UMV_PADDING;
566 yheight=info->frame_height+2*OC_UMV_PADDING;
567 /*Require 16-byte aligned rows in the chroma planes.*/
568 chstride=(yhstride>>hdec)+15&~15;
569 cheight=yheight>>vdec;
570 yplane_sz=yhstride*(size_t)yheight;
571 cplane_sz=chstride*(size_t)cheight;
572 yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
573 coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
574 /*Although we guarantee the rows of the chroma planes are a multiple of 16
575 bytes, the initial padding on the first row may only be 8 bytes.
576 Compute the offset needed to the actual image data to a multiple of 16.*/
577 align=-coffset&15;
578 ref_frame_sz=yplane_sz+2*cplane_sz+16;
579 ref_frame_data_sz=_nrefs*ref_frame_sz;
580 /*Check for overflow.
581 The same caveats apply as for oc_state_frarray_init().*/
582 if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
583 ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
584 return TH_EIMPL;
585 }
586 ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
587 frag_buf_offs=_state->frag_buf_offs=
588 _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
589 if(ref_frame_data==NULL||frag_buf_offs==NULL){
590 _ogg_free(frag_buf_offs);
591 oc_aligned_free(ref_frame_data);
592 return TH_EFAULT;
593 }
594 /*Set up the width, height and stride for the image buffers.*/
595 _state->ref_frame_bufs[0][0].width=info->frame_width;
596 _state->ref_frame_bufs[0][0].height=info->frame_height;
597 _state->ref_frame_bufs[0][0].stride=yhstride;
598 _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
599 info->frame_width>>hdec;
600 _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
601 info->frame_height>>vdec;
602 _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
603 chstride;
604 for(rfi=1;rfi<_nrefs;rfi++){
605 memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
606 sizeof(_state->ref_frame_bufs[0]));
607 }
608 _state->ref_frame_handle=ref_frame_data;
609 /*Set up the data pointers for the image buffers.*/
610 for(rfi=0;rfi<_nrefs;rfi++){
611 _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
612 ref_frame_data+=yplane_sz+align;
613 _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
614 ref_frame_data+=cplane_sz;
615 _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
616 ref_frame_data+=cplane_sz+(16-align);
617 /*Flip the buffer upside down.
618 This allows us to decode Theora's bottom-up frames in their natural
619 order, yet return a top-down buffer with a positive stride to the user.*/
620 oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
621 _state->ref_frame_bufs[rfi]);
622 }
623 _state->ref_ystride[0]=-yhstride;
624 _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
625 /*Initialize the fragment buffer offsets.*/
626 ref_frame_data=_state->ref_frame_bufs[0][0].data;
627 fragi=0;
628 for(pli=0;pli<3;pli++){
629 th_img_plane *iplane;
630 oc_fragment_plane *fplane;
631 unsigned char *vpix;
632 ptrdiff_t stride;
633 ptrdiff_t vfragi_end;
634 int nhfrags;
635 iplane=_state->ref_frame_bufs[0]+pli;
636 fplane=_state->fplanes+pli;
637 vpix=iplane->data;
638 vfragi_end=fplane->froffset+fplane->nfrags;
639 nhfrags=fplane->nhfrags;
640 stride=iplane->stride;
641 while(fragi<vfragi_end){
642 ptrdiff_t hfragi_end;
643 unsigned char *hpix;
644 hpix=vpix;
645 for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
646 frag_buf_offs[fragi]=hpix-ref_frame_data;
647 hpix+=8;
648 }
649 vpix+=stride<<3;
650 }
651 }
652 /*Initialize the reference frame pointers and indices.*/
653 _state->ref_frame_idx[OC_FRAME_GOLD]=
654 _state->ref_frame_idx[OC_FRAME_PREV]=
655 _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
656 _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
657 _state->ref_frame_idx[OC_FRAME_SELF]=
658 _state->ref_frame_idx[OC_FRAME_IO]=-1;
659 _state->ref_frame_data[OC_FRAME_GOLD]=
660 _state->ref_frame_data[OC_FRAME_PREV]=
661 _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
662 _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
663 _state->ref_frame_data[OC_FRAME_SELF]=
664 _state->ref_frame_data[OC_FRAME_IO]=NULL;
665 return 0;
666 }
667
oc_state_ref_bufs_clear(oc_theora_state * _state)668 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
669 _ogg_free(_state->frag_buf_offs);
670 oc_aligned_free(_state->ref_frame_handle);
671 }
672
673
oc_state_accel_init_c(oc_theora_state * _state)674 void oc_state_accel_init_c(oc_theora_state *_state){
675 _state->cpu_flags=0;
676 #if defined(OC_STATE_USE_VTABLE)
677 _state->opt_vtable.frag_copy=oc_frag_copy_c;
678 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
679 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
680 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
681 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
682 _state->opt_vtable.idct8x8=oc_idct8x8_c;
683 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
684 _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
685 _state->opt_vtable.state_loop_filter_frag_rows=
686 oc_state_loop_filter_frag_rows_c;
687 _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
688 #endif
689 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
690 }
691
692
oc_state_init(oc_theora_state * _state,const th_info * _info,int _nrefs)693 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
694 int ret;
695 /*First validate the parameters.*/
696 if(_info==NULL)return TH_EFAULT;
697 /*The width and height of the encoded frame must be multiples of 16.
698 They must also, when divided by 16, fit into a 16-bit unsigned integer.
699 The displayable frame offset coordinates must fit into an 8-bit unsigned
700 integer.
701 Note that the offset Y in the API is specified on the opposite side from
702 how it is specified in the bitstream, because the Y axis is flipped in
703 the bitstream.
704 The displayable frame must fit inside the encoded frame.
705 The color space must be one known by the encoder.*/
706 if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
707 _info->frame_width<=0||_info->frame_width>=0x100000||
708 _info->frame_height<=0||_info->frame_height>=0x100000||
709 _info->pic_x+_info->pic_width>_info->frame_width||
710 _info->pic_y+_info->pic_height>_info->frame_height||
711 _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
712 /*Note: the following <0 comparisons may generate spurious warnings on
713 platforms where enums are unsigned.
714 We could cast them to unsigned and just use the following >= comparison,
715 but there are a number of compilers which will mis-optimize this.
716 It's better to live with the spurious warnings.*/
717 _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
718 _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
719 return TH_EINVAL;
720 }
721 memset(_state,0,sizeof(*_state));
722 memcpy(&_state->info,_info,sizeof(*_info));
723 /*Invert the sense of pic_y to match Theora's right-handed coordinate
724 system.*/
725 _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
726 _state->frame_type=OC_UNKWN_FRAME;
727 oc_state_accel_init(_state);
728 ret=oc_state_frarray_init(_state);
729 if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
730 if(ret<0){
731 oc_state_frarray_clear(_state);
732 return ret;
733 }
734 /*If the keyframe_granule_shift is out of range, use the maximum allowable
735 value.*/
736 if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
737 _state->info.keyframe_granule_shift=31;
738 }
739 _state->keyframe_num=0;
740 _state->curframe_num=-1;
741 /*3.2.0 streams mark the frame index instead of the frame count.
742 This was changed with stream version 3.2.1 to conform to other Ogg
743 codecs.
744 We add an extra bias when computing granule positions for new streams.*/
745 _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
746 return 0;
747 }
748
oc_state_clear(oc_theora_state * _state)749 void oc_state_clear(oc_theora_state *_state){
750 oc_state_ref_bufs_clear(_state);
751 oc_state_frarray_clear(_state);
752 }
753
754
755 /*Duplicates the pixels on the border of the image plane out into the
756 surrounding padding for use by unrestricted motion vectors.
757 This function only adds the left and right borders, and only for the fragment
758 rows specified.
759 _refi: The index of the reference buffer to pad.
760 _pli: The color plane.
761 _y0: The Y coordinate of the first row to pad.
762 _yend: The Y coordinate of the row to stop padding at.*/
oc_state_borders_fill_rows(oc_theora_state * _state,int _refi,int _pli,int _y0,int _yend)763 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
764 int _y0,int _yend){
765 th_img_plane *iplane;
766 unsigned char *apix;
767 unsigned char *bpix;
768 unsigned char *epix;
769 int stride;
770 int hpadding;
771 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
772 iplane=_state->ref_frame_bufs[_refi]+_pli;
773 stride=iplane->stride;
774 apix=iplane->data+_y0*(ptrdiff_t)stride;
775 bpix=apix+iplane->width-1;
776 epix=iplane->data+_yend*(ptrdiff_t)stride;
777 /*Note the use of != instead of <, which allows the stride to be negative.*/
778 while(apix!=epix){
779 memset(apix-hpadding,apix[0],hpadding);
780 memset(bpix+1,bpix[0],hpadding);
781 apix+=stride;
782 bpix+=stride;
783 }
784 }
785
786 /*Duplicates the pixels on the border of the image plane out into the
787 surrounding padding for use by unrestricted motion vectors.
788 This function only adds the top and bottom borders, and must be called after
789 the left and right borders are added.
790 _refi: The index of the reference buffer to pad.
791 _pli: The color plane.*/
oc_state_borders_fill_caps(oc_theora_state * _state,int _refi,int _pli)792 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
793 th_img_plane *iplane;
794 unsigned char *apix;
795 unsigned char *bpix;
796 unsigned char *epix;
797 int stride;
798 int hpadding;
799 int vpadding;
800 int fullw;
801 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
802 vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
803 iplane=_state->ref_frame_bufs[_refi]+_pli;
804 stride=iplane->stride;
805 fullw=iplane->width+(hpadding<<1);
806 apix=iplane->data-hpadding;
807 bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
808 epix=apix-stride*(ptrdiff_t)vpadding;
809 while(apix!=epix){
810 memcpy(apix-stride,apix,fullw);
811 memcpy(bpix+stride,bpix,fullw);
812 apix-=stride;
813 bpix+=stride;
814 }
815 }
816
817 /*Duplicates the pixels on the border of the given reference image out into
818 the surrounding padding for use by unrestricted motion vectors.
819 _state: The context containing the reference buffers.
820 _refi: The index of the reference buffer to pad.*/
oc_state_borders_fill(oc_theora_state * _state,int _refi)821 void oc_state_borders_fill(oc_theora_state *_state,int _refi){
822 int pli;
823 for(pli=0;pli<3;pli++){
824 oc_state_borders_fill_rows(_state,_refi,pli,0,
825 _state->ref_frame_bufs[_refi][pli].height);
826 oc_state_borders_fill_caps(_state,_refi,pli);
827 }
828 }
829
830 /*Determines the offsets in an image buffer to use for motion compensation.
831 _state: The Theora state the offsets are to be computed with.
832 _offsets: Returns the offset for the buffer(s).
833 _offsets[0] is always set.
834 _offsets[1] is set if the motion vector has non-zero fractional
835 components.
836 _pli: The color plane index.
837 _mv: The motion vector.
838 Return: The number of offsets returned: 1 or 2.*/
oc_state_get_mv_offsets(const oc_theora_state * _state,int _offsets[2],int _pli,oc_mv _mv)839 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
840 int _pli,oc_mv _mv){
841 /*Here is a brief description of how Theora handles motion vectors:
842 Motion vector components are specified to half-pixel accuracy in
843 undecimated directions of each plane, and quarter-pixel accuracy in
844 decimated directions.
845 Integer parts are extracted by dividing (not shifting) by the
846 appropriate amount, with truncation towards zero.
847 These integer values are used to calculate the first offset.
848
849 If either of the fractional parts are non-zero, then a second offset is
850 computed.
851 No third or fourth offsets are computed, even if both components have
852 non-zero fractional parts.
853 The second offset is computed by dividing (not shifting) by the
854 appropriate amount, always truncating _away_ from zero.*/
855 #if 0
856 /*This version of the code doesn't use any tables, but is slower.*/
857 int ystride;
858 int xprec;
859 int yprec;
860 int xfrac;
861 int yfrac;
862 int offs;
863 int dx;
864 int dy;
865 ystride=_state->ref_ystride[_pli];
866 /*These two variables decide whether we are in half- or quarter-pixel
867 precision in each component.*/
868 xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
869 yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
870 dx=OC_MV_X(_mv);
871 dy=OC_MV_Y(_mv);
872 /*These two variables are either 0 if all the fractional bits are zero or -1
873 if any of them are non-zero.*/
874 xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
875 yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
876 offs=(dx>>xprec)+(dy>>yprec)*ystride;
877 if(xfrac||yfrac){
878 int xmask;
879 int ymask;
880 xmask=OC_SIGNMASK(dx);
881 ymask=OC_SIGNMASK(dy);
882 yfrac&=ystride;
883 _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
884 _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
885 return 2;
886 }
887 else{
888 _offsets[0]=offs;
889 return 1;
890 }
891 #else
892 /*Using tables simplifies the code, and there's enough arithmetic to hide the
893 latencies of the memory references.*/
894 static const signed char OC_MVMAP[2][64]={
895 {
896 -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
897 -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0,
898 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
899 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
900 },
901 {
902 -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
903 -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0,
904 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
905 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7
906 }
907 };
908 static const signed char OC_MVMAP2[2][64]={
909 {
910 -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
911 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
912 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
913 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
914 },
915 {
916 -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
917 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
918 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
919 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
920 }
921 };
922 int ystride;
923 int qpx;
924 int qpy;
925 int mx;
926 int my;
927 int mx2;
928 int my2;
929 int offs;
930 int dx;
931 int dy;
932 ystride=_state->ref_ystride[_pli];
933 qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
934 dx=OC_MV_X(_mv);
935 dy=OC_MV_Y(_mv);
936 my=OC_MVMAP[qpy][dy+31];
937 my2=OC_MVMAP2[qpy][dy+31];
938 qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
939 mx=OC_MVMAP[qpx][dx+31];
940 mx2=OC_MVMAP2[qpx][dx+31];
941 offs=my*ystride+mx;
942 if(mx2||my2){
943 _offsets[1]=offs+my2*ystride+mx2;
944 _offsets[0]=offs;
945 return 2;
946 }
947 _offsets[0]=offs;
948 return 1;
949 #endif
950 }
951
oc_state_frag_recon_c(const oc_theora_state * _state,ptrdiff_t _fragi,int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant)952 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
953 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
954 unsigned char *dst;
955 ptrdiff_t frag_buf_off;
956 int ystride;
957 int refi;
958 /*Apply the inverse transform.*/
959 /*Special case only having a DC component.*/
960 if(_last_zzi<2){
961 ogg_int16_t p;
962 int ci;
963 /*We round this dequant product (and not any of the others) because there's
964 no iDCT rounding.*/
965 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
966 /*LOOP VECTORIZES.*/
967 for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
968 }
969 else{
970 /*First, dequantize the DC coefficient.*/
971 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
972 oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
973 }
974 /*Fill in the target buffer.*/
975 frag_buf_off=_state->frag_buf_offs[_fragi];
976 refi=_state->frags[_fragi].refi;
977 ystride=_state->ref_ystride[_pli];
978 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
979 if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
980 else{
981 const unsigned char *ref;
982 int mvoffsets[2];
983 ref=_state->ref_frame_data[refi]+frag_buf_off;
984 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
985 _state->frag_mvs[_fragi])>1){
986 oc_frag_recon_inter2(_state,
987 dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
988 }
989 else{
990 oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
991 }
992 }
993 }
994
loop_filter_h(unsigned char * _pix,int _ystride,signed char * _bv)995 static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
996 int y;
997 _pix-=2;
998 for(y=0;y<8;y++){
999 int f;
1000 f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
1001 /*The _bv array is used to compute the function
1002 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1003 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1004 f=*(_bv+(f+4>>3));
1005 _pix[1]=OC_CLAMP255(_pix[1]+f);
1006 _pix[2]=OC_CLAMP255(_pix[2]-f);
1007 _pix+=_ystride;
1008 }
1009 }
1010
loop_filter_v(unsigned char * _pix,int _ystride,signed char * _bv)1011 static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
1012 int x;
1013 _pix-=_ystride*2;
1014 for(x=0;x<8;x++){
1015 int f;
1016 f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
1017 /*The _bv array is used to compute the function
1018 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1019 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1020 f=*(_bv+(f+4>>3));
1021 _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
1022 _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
1023 }
1024 }
1025
1026 /*Initialize the bounding values array used by the loop filter.
1027 _bv: Storage for the array.
1028 _flimit: The filter limit as defined in Section 7.10 of the spec.*/
oc_loop_filter_init_c(signed char _bv[256],int _flimit)1029 void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
1030 int i;
1031 memset(_bv,0,sizeof(_bv[0])*256);
1032 for(i=0;i<_flimit;i++){
1033 if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
1034 _bv[127-i]=(signed char)(-i);
1035 _bv[127+i]=(signed char)(i);
1036 if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
1037 }
1038 }
1039
1040 /*Apply the loop filter to a given set of fragment rows in the given plane.
1041 The filter may be run on the bottom edge, affecting pixels in the next row of
1042 fragments, so this row also needs to be available.
1043 _bv: The bounding values array.
1044 _refi: The index of the frame buffer to filter.
1045 _pli: The color plane to filter.
1046 _fragy0: The Y coordinate of the first fragment row to filter.
1047 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
oc_state_loop_filter_frag_rows_c(const oc_theora_state * _state,signed char * _bv,int _refi,int _pli,int _fragy0,int _fragy_end)1048 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
1049 signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
1050 const oc_fragment_plane *fplane;
1051 const oc_fragment *frags;
1052 const ptrdiff_t *frag_buf_offs;
1053 unsigned char *ref_frame_data;
1054 ptrdiff_t fragi_top;
1055 ptrdiff_t fragi_bot;
1056 ptrdiff_t fragi0;
1057 ptrdiff_t fragi0_end;
1058 int ystride;
1059 int nhfrags;
1060 _bv+=127;
1061 fplane=_state->fplanes+_pli;
1062 nhfrags=fplane->nhfrags;
1063 fragi_top=fplane->froffset;
1064 fragi_bot=fragi_top+fplane->nfrags;
1065 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1066 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
1067 ystride=_state->ref_ystride[_pli];
1068 frags=_state->frags;
1069 frag_buf_offs=_state->frag_buf_offs;
1070 ref_frame_data=_state->ref_frame_data[_refi];
1071 /*The following loops are constructed somewhat non-intuitively on purpose.
1072 The main idea is: if a block boundary has at least one coded fragment on
1073 it, the filter is applied to it.
1074 However, the order that the filters are applied in matters, and VP3 chose
1075 the somewhat strange ordering used below.*/
1076 while(fragi0<fragi0_end){
1077 ptrdiff_t fragi;
1078 ptrdiff_t fragi_end;
1079 fragi=fragi0;
1080 fragi_end=fragi+nhfrags;
1081 while(fragi<fragi_end){
1082 if(frags[fragi].coded){
1083 unsigned char *ref;
1084 ref=ref_frame_data+frag_buf_offs[fragi];
1085 if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1086 if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1087 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1088 loop_filter_h(ref+8,ystride,_bv);
1089 }
1090 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1091 loop_filter_v(ref+(ystride<<3),ystride,_bv);
1092 }
1093 }
1094 fragi++;
1095 }
1096 fragi0+=nhfrags;
1097 }
1098 }
1099
1100 #if defined(OC_DUMP_IMAGES)
oc_state_dump_frame(const oc_theora_state * _state,int _frame,const char * _suf)1101 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1102 const char *_suf){
1103 /*Dump a PNG of the reconstructed image.*/
1104 png_structp png;
1105 png_infop info;
1106 png_bytep *image;
1107 FILE *fp;
1108 char fname[16];
1109 unsigned char *y_row;
1110 unsigned char *u_row;
1111 unsigned char *v_row;
1112 unsigned char *y;
1113 unsigned char *u;
1114 unsigned char *v;
1115 ogg_int64_t iframe;
1116 ogg_int64_t pframe;
1117 int y_stride;
1118 int u_stride;
1119 int v_stride;
1120 int framei;
1121 int width;
1122 int height;
1123 int imgi;
1124 int imgj;
1125 width=_state->info.frame_width;
1126 height=_state->info.frame_height;
1127 iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1128 pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1129 sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1130 fp=fopen(fname,"wb");
1131 if(fp==NULL)return TH_EFAULT;
1132 image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1133 if(image==NULL){
1134 fclose(fp);
1135 return TH_EFAULT;
1136 }
1137 png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1138 if(png==NULL){
1139 oc_free_2d(image);
1140 fclose(fp);
1141 return TH_EFAULT;
1142 }
1143 info=png_create_info_struct(png);
1144 if(info==NULL){
1145 png_destroy_write_struct(&png,NULL);
1146 oc_free_2d(image);
1147 fclose(fp);
1148 return TH_EFAULT;
1149 }
1150 if(setjmp(png_jmpbuf(png))){
1151 png_destroy_write_struct(&png,&info);
1152 oc_free_2d(image);
1153 fclose(fp);
1154 return TH_EFAULT;
1155 }
1156 framei=_state->ref_frame_idx[_frame];
1157 y_row=_state->ref_frame_bufs[framei][0].data;
1158 u_row=_state->ref_frame_bufs[framei][1].data;
1159 v_row=_state->ref_frame_bufs[framei][2].data;
1160 y_stride=_state->ref_frame_bufs[framei][0].stride;
1161 u_stride=_state->ref_frame_bufs[framei][1].stride;
1162 v_stride=_state->ref_frame_bufs[framei][2].stride;
1163 /*Chroma up-sampling is just done with a box filter.
1164 This is very likely what will actually be used in practice on a real
1165 display, and also removes one more layer to search in for the source of
1166 artifacts.
1167 As an added bonus, it's dead simple.*/
1168 for(imgi=height;imgi-->0;){
1169 int dc;
1170 y=y_row;
1171 u=u_row;
1172 v=v_row;
1173 for(imgj=0;imgj<6*width;){
1174 float yval;
1175 float uval;
1176 float vval;
1177 unsigned rval;
1178 unsigned gval;
1179 unsigned bval;
1180 /*This is intentionally slow and very accurate.*/
1181 yval=(*y-16)*(1.0F/219);
1182 uval=(*u-128)*(2*(1-0.114F)/224);
1183 vval=(*v-128)*(2*(1-0.299F)/224);
1184 rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1185 gval=OC_CLAMPI(0,(int)(65535*(
1186 yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1187 bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1188 image[imgi][imgj++]=(unsigned char)(rval>>8);
1189 image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1190 image[imgi][imgj++]=(unsigned char)(gval>>8);
1191 image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1192 image[imgi][imgj++]=(unsigned char)(bval>>8);
1193 image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1194 dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1195 y++;
1196 u+=dc;
1197 v+=dc;
1198 }
1199 dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1200 y_row+=y_stride;
1201 u_row+=dc&u_stride;
1202 v_row+=dc&v_stride;
1203 }
1204 png_init_io(png,fp);
1205 png_set_compression_level(png,Z_BEST_COMPRESSION);
1206 png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1207 PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1208 switch(_state->info.colorspace){
1209 case TH_CS_ITU_REC_470M:{
1210 png_set_gAMA(png,info,2.2);
1211 png_set_cHRM_fixed(png,info,31006,31616,
1212 67000,32000,21000,71000,14000,8000);
1213 }break;
1214 case TH_CS_ITU_REC_470BG:{
1215 png_set_gAMA(png,info,2.67);
1216 png_set_cHRM_fixed(png,info,31271,32902,
1217 64000,33000,29000,60000,15000,6000);
1218 }break;
1219 default:break;
1220 }
1221 png_set_pHYs(png,info,_state->info.aspect_numerator,
1222 _state->info.aspect_denominator,0);
1223 png_set_rows(png,info,image);
1224 png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1225 png_write_end(png,info);
1226 png_destroy_write_struct(&png,&info);
1227 oc_free_2d(image);
1228 fclose(fp);
1229 return 0;
1230 }
1231 #endif
1232
1233
1234
th_granule_frame(void * _encdec,ogg_int64_t _granpos)1235 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1236 oc_theora_state *state;
1237 state=(oc_theora_state *)_encdec;
1238 if(_granpos>=0){
1239 ogg_int64_t iframe;
1240 ogg_int64_t pframe;
1241 iframe=_granpos>>state->info.keyframe_granule_shift;
1242 pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1243 /*3.2.0 streams store the frame index in the granule position.
1244 3.2.1 and later store the frame count.
1245 We return the index, so adjust the value if we have a 3.2.1 or later
1246 stream.*/
1247 return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1248 }
1249 return -1;
1250 }
1251
th_granule_time(void * _encdec,ogg_int64_t _granpos)1252 double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1253 oc_theora_state *state;
1254 state=(oc_theora_state *)_encdec;
1255 if(_granpos>=0){
1256 return (th_granule_frame(_encdec, _granpos)+1)*(
1257 (double)state->info.fps_denominator/state->info.fps_numerator);
1258 }
1259 return -1;
1260 }
1261