1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id$
15
16 ********************************************************************/
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "internal.h"
21 #if defined(OC_X86_ASM)
22 #if defined(_MSC_VER)
23 # include "x86_vc/x86int.h"
24 #else
25 # include "x86/x86int.h"
26 #endif
27 #endif
28 #if defined(OC_DUMP_IMAGES)
29 # include <stdio.h>
30 # include "png.h"
31 #endif
32
33 /*Returns the fragment index of the top-left block in a macro block.
34 This can be used to test whether or not the whole macro block is valid.
35 _sb_map: The super block map.
36 _quadi: The quadrant number.
37 Return: The index of the fragment of the upper left block in the macro
38 block, or -1 if the block lies outside the coded frame.*/
oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi)39 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
40 /*It so happens that under the Hilbert curve ordering described below, the
41 upper-left block in each macro block is at index 0, except in macro block
42 3, where it is at index 2.*/
43 return _sb_map[_quadi][_quadi&_quadi<<1];
44 }
45
46 /*Fills in the mapping from block positions to fragment numbers for a single
47 color plane.
48 This function also fills in the "valid" flag of each quadrant in the super
49 block flags.
50 _sb_maps: The array of super block maps for the color plane.
51 _sb_flags: The array of super block flags for the color plane.
52 _frag0: The index of the first fragment in the plane.
53 _hfrags: The number of horizontal fragments in a coded frame.
54 _vfrags: The number of vertical fragments in a coded frame.*/
oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags)55 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
56 oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
57 /*Contains the (macro_block,block) indices for a 4x4 grid of
58 fragments.
59 The pattern is a 4x4 Hilbert space-filling curve.
60 A Hilbert curve has the nice property that as the curve grows larger, its
61 fractal dimension approaches 2.
62 The intuition is that nearby blocks in the curve are also close spatially,
63 with the previous element always an immediate neighbor, so that runs of
64 blocks should be well correlated.*/
65 static const int SB_MAP[4][4][2]={
66 {{0,0},{0,1},{3,2},{3,3}},
67 {{0,3},{0,2},{3,1},{3,0}},
68 {{1,0},{1,3},{2,0},{2,3}},
69 {{1,1},{1,2},{2,1},{2,2}}
70 };
71 ptrdiff_t yfrag;
72 unsigned sbi;
73 int y;
74 sbi=0;
75 yfrag=_frag0;
76 for(y=0;;y+=4){
77 int imax;
78 int x;
79 /*Figure out how many columns of blocks in this super block lie within the
80 image.*/
81 imax=_vfrags-y;
82 if(imax>4)imax=4;
83 else if(imax<=0)break;
84 for(x=0;;x+=4,sbi++){
85 ptrdiff_t xfrag;
86 int jmax;
87 int quadi;
88 int i;
89 /*Figure out how many rows of blocks in this super block lie within the
90 image.*/
91 jmax=_hfrags-x;
92 if(jmax>4)jmax=4;
93 else if(jmax<=0)break;
94 /*By default, set all fragment indices to -1.*/
95 memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
96 /*Fill in the fragment map for this super block.*/
97 xfrag=yfrag+x;
98 for(i=0;i<imax;i++){
99 int j;
100 for(j=0;j<jmax;j++){
101 _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
102 }
103 xfrag+=_hfrags;
104 }
105 /*Mark which quadrants of this super block lie within the image.*/
106 for(quadi=0;quadi<4;quadi++){
107 _sb_flags[sbi].quad_valid|=
108 (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
109 }
110 }
111 yfrag+=_hfrags<<2;
112 }
113 }
114
115 /*Fills in the Y plane fragment map for a macro block given the fragment
116 coordinates of its upper-left hand corner.
117 _mb_map: The macro block map to fill.
118 _fplane: The description of the Y plane.
119 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
120 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],const oc_fragment_plane * _fplane,int _xfrag0,int _yfrag0)121 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
122 const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
123 int i;
124 int j;
125 for(i=0;i<2;i++)for(j=0;j<2;j++){
126 _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
127 }
128 }
129
130 /*Fills in the chroma plane fragment maps for a macro block.
131 This version is for use with chroma decimated in the X and Y directions
132 (4:2:0).
133 _mb_map: The macro block map to fill.
134 _fplanes: The descriptions of the fragment planes.
135 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
136 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)137 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
138 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
139 ptrdiff_t fragi;
140 _xfrag0>>=1;
141 _yfrag0>>=1;
142 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
143 _mb_map[1][0]=fragi+_fplanes[1].froffset;
144 _mb_map[2][0]=fragi+_fplanes[2].froffset;
145 }
146
147 /*Fills in the chroma plane fragment maps for a macro block.
148 This version is for use with chroma decimated in the Y direction.
149 _mb_map: The macro block map to fill.
150 _fplanes: The descriptions of the fragment planes.
151 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
152 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)153 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
154 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
155 ptrdiff_t fragi;
156 int j;
157 _yfrag0>>=1;
158 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
159 for(j=0;j<2;j++){
160 _mb_map[1][j]=fragi+_fplanes[1].froffset;
161 _mb_map[2][j]=fragi+_fplanes[2].froffset;
162 fragi++;
163 }
164 }
165
166 /*Fills in the chroma plane fragment maps for a macro block.
167 This version is for use with chroma decimated in the X direction (4:2:2).
168 _mb_map: The macro block map to fill.
169 _fplanes: The descriptions of the fragment planes.
170 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
171 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0)172 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
173 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
174 ptrdiff_t fragi;
175 int i;
176 _xfrag0>>=1;
177 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
178 for(i=0;i<2;i++){
179 _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
180 _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
181 fragi+=_fplanes[1].nhfrags;
182 }
183 }
184
185 /*Fills in the chroma plane fragment maps for a macro block.
186 This version is for use with no chroma decimation (4:4:4).
187 This uses the already filled-in luma plane values.
188 _mb_map: The macro block map to fill.
189 _fplanes: The descriptions of the fragment planes.*/
oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],const oc_fragment_plane _fplanes[3])190 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
191 const oc_fragment_plane _fplanes[3]){
192 int k;
193 for(k=0;k<4;k++){
194 _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
195 _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
196 }
197 }
198
199 /*The function type used to fill in the chroma plane fragment maps for a
200 macro block.
201 _mb_map: The macro block map to fill.
202 _fplanes: The descriptions of the fragment planes.
203 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
204 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
205 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
206 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
207
208 /*A table of functions used to fill in the chroma plane fragment maps for a
209 macro block for each type of chrominance decimation.*/
210 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
211 oc_mb_fill_cmapping00,
212 oc_mb_fill_cmapping01,
213 oc_mb_fill_cmapping10,
214 (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
215 };
216
217 /*Fills in the mapping from macro blocks to their corresponding fragment
218 numbers in each plane.
219 _mb_maps: The list of macro block maps.
220 _mb_modes: The list of macro block modes; macro blocks completely outside
221 the coded region are marked invalid.
222 _fplanes: The descriptions of the fragment planes.
223 _pixel_fmt: The chroma decimation type.*/
oc_mb_create_mapping(oc_mb_map _mb_maps[],signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt)224 static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
225 signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
226 oc_mb_fill_cmapping_func mb_fill_cmapping;
227 unsigned sbi;
228 int y;
229 mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
230 /*Loop through the luma plane super blocks.*/
231 for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
232 int x;
233 for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
234 int ymb;
235 /*Loop through the macro blocks in each super block in display order.*/
236 for(ymb=0;ymb<2;ymb++){
237 int xmb;
238 for(xmb=0;xmb<2;xmb++){
239 unsigned mbi;
240 int mbx;
241 int mby;
242 mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
243 mbx=x|xmb<<1;
244 mby=y|ymb<<1;
245 /*Initialize fragment indices to -1.*/
246 memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
247 /*Make sure this macro block is within the encoded region.*/
248 if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
249 _mb_modes[mbi]=OC_MODE_INVALID;
250 continue;
251 }
252 /*Fill in the fragment indices for the luma plane.*/
253 oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
254 /*Fill in the fragment indices for the chroma planes.*/
255 (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
256 }
257 }
258 }
259 }
260 }
261
262 /*Marks the fragments which fall all or partially outside the displayable
263 region of the frame.
264 _state: The Theora state containing the fragments to be marked.*/
oc_state_border_init(oc_theora_state * _state)265 static void oc_state_border_init(oc_theora_state *_state){
266 oc_fragment *frag;
267 oc_fragment *yfrag_end;
268 oc_fragment *xfrag_end;
269 oc_fragment_plane *fplane;
270 int crop_x0;
271 int crop_y0;
272 int crop_xf;
273 int crop_yf;
274 int pli;
275 int y;
276 int x;
277 /*The method we use here is slow, but the code is dead simple and handles
278 all the special cases easily.
279 We only ever need to do it once.*/
280 /*Loop through the fragments, marking those completely outside the
281 displayable region and constructing a border mask for those that straddle
282 the border.*/
283 _state->nborders=0;
284 yfrag_end=frag=_state->frags;
285 for(pli=0;pli<3;pli++){
286 fplane=_state->fplanes+pli;
287 /*Set up the cropping rectangle for this plane.*/
288 crop_x0=_state->info.pic_x;
289 crop_xf=_state->info.pic_x+_state->info.pic_width;
290 crop_y0=_state->info.pic_y;
291 crop_yf=_state->info.pic_y+_state->info.pic_height;
292 if(pli>0){
293 if(!(_state->info.pixel_fmt&1)){
294 crop_x0=crop_x0>>1;
295 crop_xf=crop_xf+1>>1;
296 }
297 if(!(_state->info.pixel_fmt&2)){
298 crop_y0=crop_y0>>1;
299 crop_yf=crop_yf+1>>1;
300 }
301 }
302 y=0;
303 for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
304 x=0;
305 for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
306 /*First check to see if this fragment is completely outside the
307 displayable region.*/
308 /*Note the special checks for an empty cropping rectangle.
309 This guarantees that if we count a fragment as straddling the
310 border below, at least one pixel in the fragment will be inside
311 the displayable region.*/
312 if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
313 crop_x0>=crop_xf||crop_y0>=crop_yf){
314 frag->invalid=1;
315 }
316 /*Otherwise, check to see if it straddles the border.*/
317 else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
318 y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
319 ogg_int64_t mask;
320 int npixels;
321 int i;
322 mask=npixels=0;
323 for(i=0;i<8;i++){
324 int j;
325 for(j=0;j<8;j++){
326 if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
327 mask|=(ogg_int64_t)1<<(i<<3|j);
328 npixels++;
329 }
330 }
331 }
332 /*Search the fragment array for border info with the same pattern.
333 In general, there will be at most 8 different patterns (per
334 plane).*/
335 for(i=0;;i++){
336 if(i>=_state->nborders){
337 _state->nborders++;
338 _state->borders[i].mask=mask;
339 _state->borders[i].npixels=npixels;
340 }
341 else if(_state->borders[i].mask!=mask)continue;
342 frag->borderi=i;
343 break;
344 }
345 }
346 else frag->borderi=-1;
347 }
348 }
349 }
350 }
351
oc_state_frarray_init(oc_theora_state * _state)352 static int oc_state_frarray_init(oc_theora_state *_state){
353 int yhfrags;
354 int yvfrags;
355 int chfrags;
356 int cvfrags;
357 ptrdiff_t yfrags;
358 ptrdiff_t cfrags;
359 ptrdiff_t nfrags;
360 unsigned yhsbs;
361 unsigned yvsbs;
362 unsigned chsbs;
363 unsigned cvsbs;
364 unsigned ysbs;
365 unsigned csbs;
366 unsigned nsbs;
367 size_t nmbs;
368 int hdec;
369 int vdec;
370 int pli;
371 /*Figure out the number of fragments in each plane.*/
372 /*These parameters have already been validated to be multiples of 16.*/
373 yhfrags=_state->info.frame_width>>3;
374 yvfrags=_state->info.frame_height>>3;
375 hdec=!(_state->info.pixel_fmt&1);
376 vdec=!(_state->info.pixel_fmt&2);
377 chfrags=yhfrags+hdec>>hdec;
378 cvfrags=yvfrags+vdec>>vdec;
379 yfrags=yhfrags*(ptrdiff_t)yvfrags;
380 cfrags=chfrags*(ptrdiff_t)cvfrags;
381 nfrags=yfrags+2*cfrags;
382 /*Figure out the number of super blocks in each plane.*/
383 yhsbs=yhfrags+3>>2;
384 yvsbs=yvfrags+3>>2;
385 chsbs=chfrags+3>>2;
386 cvsbs=cvfrags+3>>2;
387 ysbs=yhsbs*yvsbs;
388 csbs=chsbs*cvsbs;
389 nsbs=ysbs+2*csbs;
390 nmbs=(size_t)ysbs<<2;
391 /*Check for overflow.
392 We support the ridiculous upper limits of the specification (1048560 by
393 1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
394 but for those with 32-bit pointers (or smaller!) we have to check.
395 If the caller wants to prevent denial-of-service by imposing a more
396 reasonable upper limit on the size of attempted allocations, they must do
397 so themselves; we have no platform independent way to determine how much
398 system memory there is nor an application-independent way to decide what a
399 "reasonable" allocation is.*/
400 if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
401 ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
402 return TH_EIMPL;
403 }
404 /*Initialize the fragment array.*/
405 _state->fplanes[0].nhfrags=yhfrags;
406 _state->fplanes[0].nvfrags=yvfrags;
407 _state->fplanes[0].froffset=0;
408 _state->fplanes[0].nfrags=yfrags;
409 _state->fplanes[0].nhsbs=yhsbs;
410 _state->fplanes[0].nvsbs=yvsbs;
411 _state->fplanes[0].sboffset=0;
412 _state->fplanes[0].nsbs=ysbs;
413 _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
414 _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
415 _state->fplanes[1].froffset=yfrags;
416 _state->fplanes[2].froffset=yfrags+cfrags;
417 _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
418 _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
419 _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
420 _state->fplanes[1].sboffset=ysbs;
421 _state->fplanes[2].sboffset=ysbs+csbs;
422 _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
423 _state->nfrags=nfrags;
424 _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
425 _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
426 _state->nsbs=nsbs;
427 _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
428 _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
429 _state->nhmbs=yhsbs<<1;
430 _state->nvmbs=yvsbs<<1;
431 _state->nmbs=nmbs;
432 _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
433 _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
434 _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
435 if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
436 _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
437 _state->coded_fragis==NULL){
438 return TH_EFAULT;
439 }
440 /*Create the mapping from super blocks to fragments.*/
441 for(pli=0;pli<3;pli++){
442 oc_fragment_plane *fplane;
443 fplane=_state->fplanes+pli;
444 oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
445 _state->sb_flags+fplane->sboffset,fplane->froffset,
446 fplane->nhfrags,fplane->nvfrags);
447 }
448 /*Create the mapping from macro blocks to fragments.*/
449 oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
450 _state->fplanes,_state->info.pixel_fmt);
451 /*Initialize the invalid and borderi fields of each fragment.*/
452 oc_state_border_init(_state);
453 return 0;
454 }
455
oc_state_frarray_clear(oc_theora_state * _state)456 static void oc_state_frarray_clear(oc_theora_state *_state){
457 _ogg_free(_state->coded_fragis);
458 _ogg_free(_state->mb_modes);
459 _ogg_free(_state->mb_maps);
460 _ogg_free(_state->sb_flags);
461 _ogg_free(_state->sb_maps);
462 _ogg_free(_state->frag_mvs);
463 _ogg_free(_state->frags);
464 }
465
466
467 /*Initializes the buffers used for reconstructed frames.
468 These buffers are padded with 16 extra pixels on each side, to allow
469 unrestricted motion vectors without special casing the boundary.
470 If chroma is decimated in either direction, the padding is reduced by a
471 factor of 2 on the appropriate sides.
472 _nrefs: The number of reference buffers to init; must be 3 or 4.*/
oc_state_ref_bufs_init(oc_theora_state * _state,int _nrefs)473 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
474 th_info *info;
475 unsigned char *ref_frame_data;
476 size_t ref_frame_data_sz;
477 size_t ref_frame_sz;
478 size_t yplane_sz;
479 size_t cplane_sz;
480 int yhstride;
481 int yheight;
482 int chstride;
483 int cheight;
484 ptrdiff_t yoffset;
485 ptrdiff_t coffset;
486 ptrdiff_t *frag_buf_offs;
487 ptrdiff_t fragi;
488 int hdec;
489 int vdec;
490 int rfi;
491 int pli;
492 if(_nrefs<3||_nrefs>4)return TH_EINVAL;
493 info=&_state->info;
494 /*Compute the image buffer parameters for each plane.*/
495 hdec=!(info->pixel_fmt&1);
496 vdec=!(info->pixel_fmt&2);
497 yhstride=info->frame_width+2*OC_UMV_PADDING;
498 yheight=info->frame_height+2*OC_UMV_PADDING;
499 chstride=yhstride>>hdec;
500 cheight=yheight>>vdec;
501 yplane_sz=yhstride*(size_t)yheight;
502 cplane_sz=chstride*(size_t)cheight;
503 yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
504 coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
505 ref_frame_sz=yplane_sz+2*cplane_sz;
506 ref_frame_data_sz=_nrefs*ref_frame_sz;
507 /*Check for overflow.
508 The same caveats apply as for oc_state_frarray_init().*/
509 if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
510 ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
511 return TH_EIMPL;
512 }
513 ref_frame_data=_ogg_malloc(ref_frame_data_sz);
514 frag_buf_offs=_state->frag_buf_offs=
515 _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
516 if(ref_frame_data==NULL||frag_buf_offs==NULL){
517 _ogg_free(frag_buf_offs);
518 _ogg_free(ref_frame_data);
519 return TH_EFAULT;
520 }
521 /*Set up the width, height and stride for the image buffers.*/
522 _state->ref_frame_bufs[0][0].width=info->frame_width;
523 _state->ref_frame_bufs[0][0].height=info->frame_height;
524 _state->ref_frame_bufs[0][0].stride=yhstride;
525 _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
526 info->frame_width>>hdec;
527 _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
528 info->frame_height>>vdec;
529 _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
530 chstride;
531 for(rfi=1;rfi<_nrefs;rfi++){
532 memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
533 sizeof(_state->ref_frame_bufs[0]));
534 }
535 /*Set up the data pointers for the image buffers.*/
536 for(rfi=0;rfi<_nrefs;rfi++){
537 _state->ref_frame_data[rfi]=ref_frame_data;
538 _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
539 ref_frame_data+=yplane_sz;
540 _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
541 ref_frame_data+=cplane_sz;
542 _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
543 ref_frame_data+=cplane_sz;
544 /*Flip the buffer upside down.
545 This allows us to decode Theora's bottom-up frames in their natural
546 order, yet return a top-down buffer with a positive stride to the user.*/
547 oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
548 _state->ref_frame_bufs[rfi]);
549 }
550 _state->ref_ystride[0]=-yhstride;
551 _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
552 /*Initialize the fragment buffer offsets.*/
553 ref_frame_data=_state->ref_frame_data[0];
554 fragi=0;
555 for(pli=0;pli<3;pli++){
556 th_img_plane *iplane;
557 oc_fragment_plane *fplane;
558 unsigned char *vpix;
559 ptrdiff_t stride;
560 ptrdiff_t vfragi_end;
561 int nhfrags;
562 iplane=_state->ref_frame_bufs[0]+pli;
563 fplane=_state->fplanes+pli;
564 vpix=iplane->data;
565 vfragi_end=fplane->froffset+fplane->nfrags;
566 nhfrags=fplane->nhfrags;
567 stride=iplane->stride;
568 while(fragi<vfragi_end){
569 ptrdiff_t hfragi_end;
570 unsigned char *hpix;
571 hpix=vpix;
572 for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
573 frag_buf_offs[fragi]=hpix-ref_frame_data;
574 hpix+=8;
575 }
576 vpix+=stride<<3;
577 }
578 }
579 /*Initialize the reference frame indices.*/
580 _state->ref_frame_idx[OC_FRAME_GOLD]=
581 _state->ref_frame_idx[OC_FRAME_PREV]=
582 _state->ref_frame_idx[OC_FRAME_SELF]=-1;
583 _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1;
584 return 0;
585 }
586
oc_state_ref_bufs_clear(oc_theora_state * _state)587 static void oc_state_ref_bufs_clear(oc_theora_state *_state){
588 _ogg_free(_state->frag_buf_offs);
589 _ogg_free(_state->ref_frame_data[0]);
590 }
591
592
oc_state_vtable_init_c(oc_theora_state * _state)593 void oc_state_vtable_init_c(oc_theora_state *_state){
594 _state->opt_vtable.frag_copy=oc_frag_copy_c;
595 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
596 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
597 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
598 _state->opt_vtable.idct8x8=oc_idct8x8_c;
599 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
600 _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
601 _state->opt_vtable.state_loop_filter_frag_rows=
602 oc_state_loop_filter_frag_rows_c;
603 _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
604 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
605 }
606
607 /*Initialize the accelerated function pointers.*/
oc_state_vtable_init(oc_theora_state * _state)608 void oc_state_vtable_init(oc_theora_state *_state){
609 #if defined(OC_X86_ASM)
610 oc_state_vtable_init_x86(_state);
611 #else
612 oc_state_vtable_init_c(_state);
613 #endif
614 }
615
616
oc_state_init(oc_theora_state * _state,const th_info * _info,int _nrefs)617 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
618 int ret;
619 /*First validate the parameters.*/
620 if(_info==NULL)return TH_EFAULT;
621 /*The width and height of the encoded frame must be multiples of 16.
622 They must also, when divided by 16, fit into a 16-bit unsigned integer.
623 The displayable frame offset coordinates must fit into an 8-bit unsigned
624 integer.
625 Note that the offset Y in the API is specified on the opposite side from
626 how it is specified in the bitstream, because the Y axis is flipped in
627 the bitstream.
628 The displayable frame must fit inside the encoded frame.
629 The color space must be one known by the encoder.*/
630 if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
631 _info->frame_width<=0||_info->frame_width>=0x100000||
632 _info->frame_height<=0||_info->frame_height>=0x100000||
633 _info->pic_x+_info->pic_width>_info->frame_width||
634 _info->pic_y+_info->pic_height>_info->frame_height||
635 _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
636 /*Note: the following <0 comparisons may generate spurious warnings on
637 platforms where enums are unsigned.
638 We could cast them to unsigned and just use the following >= comparison,
639 but there are a number of compilers which will mis-optimize this.
640 It's better to live with the spurious warnings.*/
641 _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
642 _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
643 return TH_EINVAL;
644 }
645 memset(_state,0,sizeof(*_state));
646 memcpy(&_state->info,_info,sizeof(*_info));
647 /*Invert the sense of pic_y to match Theora's right-handed coordinate
648 system.*/
649 _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
650 _state->frame_type=OC_UNKWN_FRAME;
651 oc_state_vtable_init(_state);
652 ret=oc_state_frarray_init(_state);
653 if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
654 if(ret<0){
655 oc_state_frarray_clear(_state);
656 return ret;
657 }
658 /*If the keyframe_granule_shift is out of range, use the maximum allowable
659 value.*/
660 if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
661 _state->info.keyframe_granule_shift=31;
662 }
663 _state->keyframe_num=0;
664 _state->curframe_num=-1;
665 /*3.2.0 streams mark the frame index instead of the frame count.
666 This was changed with stream version 3.2.1 to conform to other Ogg
667 codecs.
668 We add an extra bias when computing granule positions for new streams.*/
669 _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
670 return 0;
671 }
672
oc_state_clear(oc_theora_state * _state)673 void oc_state_clear(oc_theora_state *_state){
674 oc_state_ref_bufs_clear(_state);
675 oc_state_frarray_clear(_state);
676 }
677
678
679 /*Duplicates the pixels on the border of the image plane out into the
680 surrounding padding for use by unrestricted motion vectors.
681 This function only adds the left and right borders, and only for the fragment
682 rows specified.
683 _refi: The index of the reference buffer to pad.
684 _pli: The color plane.
685 _y0: The Y coordinate of the first row to pad.
686 _yend: The Y coordinate of the row to stop padding at.*/
oc_state_borders_fill_rows(oc_theora_state * _state,int _refi,int _pli,int _y0,int _yend)687 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
688 int _y0,int _yend){
689 th_img_plane *iplane;
690 unsigned char *apix;
691 unsigned char *bpix;
692 unsigned char *epix;
693 int stride;
694 int hpadding;
695 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
696 iplane=_state->ref_frame_bufs[_refi]+_pli;
697 stride=iplane->stride;
698 apix=iplane->data+_y0*(ptrdiff_t)stride;
699 bpix=apix+iplane->width-1;
700 epix=iplane->data+_yend*(ptrdiff_t)stride;
701 /*Note the use of != instead of <, which allows the stride to be negative.*/
702 while(apix!=epix){
703 memset(apix-hpadding,apix[0],hpadding);
704 memset(bpix+1,bpix[0],hpadding);
705 apix+=stride;
706 bpix+=stride;
707 }
708 }
709
710 /*Duplicates the pixels on the border of the image plane out into the
711 surrounding padding for use by unrestricted motion vectors.
712 This function only adds the top and bottom borders, and must be called after
713 the left and right borders are added.
714 _refi: The index of the reference buffer to pad.
715 _pli: The color plane.*/
oc_state_borders_fill_caps(oc_theora_state * _state,int _refi,int _pli)716 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
717 th_img_plane *iplane;
718 unsigned char *apix;
719 unsigned char *bpix;
720 unsigned char *epix;
721 int stride;
722 int hpadding;
723 int vpadding;
724 int fullw;
725 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
726 vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
727 iplane=_state->ref_frame_bufs[_refi]+_pli;
728 stride=iplane->stride;
729 fullw=iplane->width+(hpadding<<1);
730 apix=iplane->data-hpadding;
731 bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
732 epix=apix-stride*(ptrdiff_t)vpadding;
733 while(apix!=epix){
734 memcpy(apix-stride,apix,fullw);
735 memcpy(bpix+stride,bpix,fullw);
736 apix-=stride;
737 bpix+=stride;
738 }
739 }
740
741 /*Duplicates the pixels on the border of the given reference image out into
742 the surrounding padding for use by unrestricted motion vectors.
743 _state: The context containing the reference buffers.
744 _refi: The index of the reference buffer to pad.*/
oc_state_borders_fill(oc_theora_state * _state,int _refi)745 void oc_state_borders_fill(oc_theora_state *_state,int _refi){
746 int pli;
747 for(pli=0;pli<3;pli++){
748 oc_state_borders_fill_rows(_state,_refi,pli,0,
749 _state->ref_frame_bufs[_refi][pli].height);
750 oc_state_borders_fill_caps(_state,_refi,pli);
751 }
752 }
753
754 /*Determines the offsets in an image buffer to use for motion compensation.
755 _state: The Theora state the offsets are to be computed with.
756 _offsets: Returns the offset for the buffer(s).
757 _offsets[0] is always set.
758 _offsets[1] is set if the motion vector has non-zero fractional
759 components.
760 _pli: The color plane index.
761 _dx: The X component of the motion vector.
762 _dy: The Y component of the motion vector.
763 Return: The number of offsets returned: 1 or 2.*/
oc_state_get_mv_offsets(const oc_theora_state * _state,int _offsets[2],int _pli,int _dx,int _dy)764 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
765 int _pli,int _dx,int _dy){
766 /*Here is a brief description of how Theora handles motion vectors:
767 Motion vector components are specified to half-pixel accuracy in
768 undecimated directions of each plane, and quarter-pixel accuracy in
769 decimated directions.
770 Integer parts are extracted by dividing (not shifting) by the
771 appropriate amount, with truncation towards zero.
772 These integer values are used to calculate the first offset.
773
774 If either of the fractional parts are non-zero, then a second offset is
775 computed.
776 No third or fourth offsets are computed, even if both components have
777 non-zero fractional parts.
778 The second offset is computed by dividing (not shifting) by the
779 appropriate amount, always truncating _away_ from zero.*/
780 #if 0
781 /*This version of the code doesn't use any tables, but is slower.*/
782 int ystride;
783 int xprec;
784 int yprec;
785 int xfrac;
786 int yfrac;
787 int offs;
788 ystride=_state->ref_ystride[_pli];
789 /*These two variables decide whether we are in half- or quarter-pixel
790 precision in each component.*/
791 xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
792 yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
793 /*These two variables are either 0 if all the fractional bits are zero or -1
794 if any of them are non-zero.*/
795 xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
796 yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
797 offs=(_dx>>xprec)+(_dy>>yprec)*ystride;
798 if(xfrac||yfrac){
799 int xmask;
800 int ymask;
801 xmask=OC_SIGNMASK(_dx);
802 ymask=OC_SIGNMASK(_dy);
803 yfrac&=ystride;
804 _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
805 _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
806 return 2;
807 }
808 else{
809 _offsets[0]=offs;
810 return 1;
811 }
812 #else
813 /*Using tables simplifies the code, and there's enough arithmetic to hide the
814 latencies of the memory references.*/
815 static const signed char OC_MVMAP[2][64]={
816 {
817 -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
818 -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0,
819 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
820 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
821 },
822 {
823 -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
824 -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0,
825 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
826 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7
827 }
828 };
829 static const signed char OC_MVMAP2[2][64]={
830 {
831 -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
832 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
833 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
834 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
835 },
836 {
837 -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
838 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
839 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
840 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
841 }
842 };
843 int ystride;
844 int qpx;
845 int qpy;
846 int mx;
847 int my;
848 int mx2;
849 int my2;
850 int offs;
851 ystride=_state->ref_ystride[_pli];
852 qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
853 my=OC_MVMAP[qpy][_dy+31];
854 my2=OC_MVMAP2[qpy][_dy+31];
855 qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
856 mx=OC_MVMAP[qpx][_dx+31];
857 mx2=OC_MVMAP2[qpx][_dx+31];
858 offs=my*ystride+mx;
859 if(mx2||my2){
860 _offsets[1]=offs+my2*ystride+mx2;
861 _offsets[0]=offs;
862 return 2;
863 }
864 _offsets[0]=offs;
865 return 1;
866 #endif
867 }
868
oc_state_frag_recon(const oc_theora_state * _state,ptrdiff_t _fragi,int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant)869 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
870 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
871 _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
872 _last_zzi,_dc_quant);
873 }
874
oc_state_frag_recon_c(const oc_theora_state * _state,ptrdiff_t _fragi,int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant)875 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
876 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
877 unsigned char *dst;
878 ptrdiff_t frag_buf_off;
879 int ystride;
880 int mb_mode;
881 /*Apply the inverse transform.*/
882 /*Special case only having a DC component.*/
883 if(_last_zzi<2){
884 ogg_int16_t p;
885 int ci;
886 /*We round this dequant product (and not any of the others) because there's
887 no iDCT rounding.*/
888 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
889 /*LOOP VECTORIZES.*/
890 for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p;
891 }
892 else{
893 /*First, dequantize the DC coefficient.*/
894 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
895 oc_idct8x8(_state,_dct_coeffs,_last_zzi);
896 }
897 /*Fill in the target buffer.*/
898 frag_buf_off=_state->frag_buf_offs[_fragi];
899 mb_mode=_state->frags[_fragi].mb_mode;
900 ystride=_state->ref_ystride[_pli];
901 dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
902 if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs);
903 else{
904 const unsigned char *ref;
905 int mvoffsets[2];
906 ref=
907 _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
908 +frag_buf_off;
909 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
910 _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
911 oc_frag_recon_inter2(_state,
912 dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
913 }
914 else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs);
915 }
916 }
917
918 /*Copies the fragments specified by the lists of fragment indices from one
919 frame to another.
920 _fragis: A pointer to a list of fragment indices.
921 _nfragis: The number of fragment indices to copy.
922 _dst_frame: The reference frame to copy to.
923 _src_frame: The reference frame to copy from.
924 _pli: The color plane the fragments lie in.*/
oc_state_frag_copy_list(const oc_theora_state * _state,const ptrdiff_t * _fragis,ptrdiff_t _nfragis,int _dst_frame,int _src_frame,int _pli)925 void oc_state_frag_copy_list(const oc_theora_state *_state,
926 const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
927 int _dst_frame,int _src_frame,int _pli){
928 _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
929 _src_frame,_pli);
930 }
931
oc_state_frag_copy_list_c(const oc_theora_state * _state,const ptrdiff_t * _fragis,ptrdiff_t _nfragis,int _dst_frame,int _src_frame,int _pli)932 void oc_state_frag_copy_list_c(const oc_theora_state *_state,
933 const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
934 int _dst_frame,int _src_frame,int _pli){
935 const ptrdiff_t *frag_buf_offs;
936 const unsigned char *src_frame_data;
937 unsigned char *dst_frame_data;
938 ptrdiff_t fragii;
939 int ystride;
940 dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
941 src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
942 ystride=_state->ref_ystride[_pli];
943 frag_buf_offs=_state->frag_buf_offs;
944 for(fragii=0;fragii<_nfragis;fragii++){
945 ptrdiff_t frag_buf_off;
946 frag_buf_off=frag_buf_offs[_fragis[fragii]];
947 oc_frag_copy(_state,dst_frame_data+frag_buf_off,
948 src_frame_data+frag_buf_off,ystride);
949 }
950 }
951
loop_filter_h(unsigned char * _pix,int _ystride,int * _bv)952 static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
953 int y;
954 _pix-=2;
955 for(y=0;y<8;y++){
956 int f;
957 f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
958 /*The _bv array is used to compute the function
959 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
960 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
961 f=*(_bv+(f+4>>3));
962 _pix[1]=OC_CLAMP255(_pix[1]+f);
963 _pix[2]=OC_CLAMP255(_pix[2]-f);
964 _pix+=_ystride;
965 }
966 }
967
loop_filter_v(unsigned char * _pix,int _ystride,int * _bv)968 static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
969 int x;
970 _pix-=_ystride*2;
971 for(x=0;x<8;x++){
972 int f;
973 f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
974 /*The _bv array is used to compute the function
975 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
976 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
977 f=*(_bv+(f+4>>3));
978 _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
979 _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
980 }
981 }
982
983 /*Initialize the bounding values array used by the loop filter.
984 _bv: Storage for the array.
985 Return: 0 on success, or a non-zero value if no filtering need be applied.*/
oc_state_loop_filter_init(oc_theora_state * _state,int _bv[256])986 int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
987 int flimit;
988 int i;
989 flimit=_state->loop_filter_limits[_state->qis[0]];
990 if(flimit==0)return 1;
991 memset(_bv,0,sizeof(_bv[0])*256);
992 for(i=0;i<flimit;i++){
993 if(127-i-flimit>=0)_bv[127-i-flimit]=i-flimit;
994 _bv[127-i]=-i;
995 _bv[127+i]=i;
996 if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i;
997 }
998 return 0;
999 }
1000
1001 /*Apply the loop filter to a given set of fragment rows in the given plane.
1002 The filter may be run on the bottom edge, affecting pixels in the next row of
1003 fragments, so this row also needs to be available.
1004 _bv: The bounding values array.
1005 _refi: The index of the frame buffer to filter.
1006 _pli: The color plane to filter.
1007 _fragy0: The Y coordinate of the first fragment row to filter.
1008 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
oc_state_loop_filter_frag_rows(const oc_theora_state * _state,int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end)1009 void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
1010 int _refi,int _pli,int _fragy0,int _fragy_end){
1011 _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
1012 _fragy0,_fragy_end);
1013 }
1014
oc_state_loop_filter_frag_rows_c(const oc_theora_state * _state,int * _bv,int _refi,int _pli,int _fragy0,int _fragy_end)1015 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
1016 int _refi,int _pli,int _fragy0,int _fragy_end){
1017 const oc_fragment_plane *fplane;
1018 const oc_fragment *frags;
1019 const ptrdiff_t *frag_buf_offs;
1020 unsigned char *ref_frame_data;
1021 ptrdiff_t fragi_top;
1022 ptrdiff_t fragi_bot;
1023 ptrdiff_t fragi0;
1024 ptrdiff_t fragi0_end;
1025 int ystride;
1026 int nhfrags;
1027 _bv+=127;
1028 fplane=_state->fplanes+_pli;
1029 nhfrags=fplane->nhfrags;
1030 fragi_top=fplane->froffset;
1031 fragi_bot=fragi_top+fplane->nfrags;
1032 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1033 fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
1034 ystride=_state->ref_ystride[_pli];
1035 frags=_state->frags;
1036 frag_buf_offs=_state->frag_buf_offs;
1037 ref_frame_data=_state->ref_frame_data[_refi];
1038 /*The following loops are constructed somewhat non-intuitively on purpose.
1039 The main idea is: if a block boundary has at least one coded fragment on
1040 it, the filter is applied to it.
1041 However, the order that the filters are applied in matters, and VP3 chose
1042 the somewhat strange ordering used below.*/
1043 while(fragi0<fragi0_end){
1044 ptrdiff_t fragi;
1045 ptrdiff_t fragi_end;
1046 fragi=fragi0;
1047 fragi_end=fragi+nhfrags;
1048 while(fragi<fragi_end){
1049 if(frags[fragi].coded){
1050 unsigned char *ref;
1051 ref=ref_frame_data+frag_buf_offs[fragi];
1052 if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1053 if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1054 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1055 loop_filter_h(ref+8,ystride,_bv);
1056 }
1057 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1058 loop_filter_v(ref+(ystride<<3),ystride,_bv);
1059 }
1060 }
1061 fragi++;
1062 }
1063 fragi0+=nhfrags;
1064 }
1065 }
1066
1067 #if defined(OC_DUMP_IMAGES)
oc_state_dump_frame(const oc_theora_state * _state,int _frame,const char * _suf)1068 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1069 const char *_suf){
1070 /*Dump a PNG of the reconstructed image.*/
1071 png_structp png;
1072 png_infop info;
1073 png_bytep *image;
1074 FILE *fp;
1075 char fname[16];
1076 unsigned char *y_row;
1077 unsigned char *u_row;
1078 unsigned char *v_row;
1079 unsigned char *y;
1080 unsigned char *u;
1081 unsigned char *v;
1082 ogg_int64_t iframe;
1083 ogg_int64_t pframe;
1084 int y_stride;
1085 int u_stride;
1086 int v_stride;
1087 int framei;
1088 int width;
1089 int height;
1090 int imgi;
1091 int imgj;
1092 width=_state->info.frame_width;
1093 height=_state->info.frame_height;
1094 iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1095 pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1096 sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1097 fp=fopen(fname,"wb");
1098 if(fp==NULL)return TH_EFAULT;
1099 image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1100 if(image==NULL){
1101 fclose(fp);
1102 return TH_EFAULT;
1103 }
1104 png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1105 if(png==NULL){
1106 oc_free_2d(image);
1107 fclose(fp);
1108 return TH_EFAULT;
1109 }
1110 info=png_create_info_struct(png);
1111 if(info==NULL){
1112 png_destroy_write_struct(&png,NULL);
1113 oc_free_2d(image);
1114 fclose(fp);
1115 return TH_EFAULT;
1116 }
1117 if(setjmp(png_jmpbuf(png))){
1118 png_destroy_write_struct(&png,&info);
1119 oc_free_2d(image);
1120 fclose(fp);
1121 return TH_EFAULT;
1122 }
1123 framei=_state->ref_frame_idx[_frame];
1124 y_row=_state->ref_frame_bufs[framei][0].data;
1125 u_row=_state->ref_frame_bufs[framei][1].data;
1126 v_row=_state->ref_frame_bufs[framei][2].data;
1127 y_stride=_state->ref_frame_bufs[framei][0].stride;
1128 u_stride=_state->ref_frame_bufs[framei][1].stride;
1129 v_stride=_state->ref_frame_bufs[framei][2].stride;
1130 /*Chroma up-sampling is just done with a box filter.
1131 This is very likely what will actually be used in practice on a real
1132 display, and also removes one more layer to search in for the source of
1133 artifacts.
1134 As an added bonus, it's dead simple.*/
1135 for(imgi=height;imgi-->0;){
1136 int dc;
1137 y=y_row;
1138 u=u_row;
1139 v=v_row;
1140 for(imgj=0;imgj<6*width;){
1141 float yval;
1142 float uval;
1143 float vval;
1144 unsigned rval;
1145 unsigned gval;
1146 unsigned bval;
1147 /*This is intentionally slow and very accurate.*/
1148 yval=(*y-16)*(1.0F/219);
1149 uval=(*u-128)*(2*(1-0.114F)/224);
1150 vval=(*v-128)*(2*(1-0.299F)/224);
1151 rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1152 gval=OC_CLAMPI(0,(int)(65535*(
1153 yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1154 bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1155 image[imgi][imgj++]=(unsigned char)(rval>>8);
1156 image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1157 image[imgi][imgj++]=(unsigned char)(gval>>8);
1158 image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1159 image[imgi][imgj++]=(unsigned char)(bval>>8);
1160 image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1161 dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1162 y++;
1163 u+=dc;
1164 v+=dc;
1165 }
1166 dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1167 y_row+=y_stride;
1168 u_row+=dc&u_stride;
1169 v_row+=dc&v_stride;
1170 }
1171 png_init_io(png,fp);
1172 png_set_compression_level(png,Z_BEST_COMPRESSION);
1173 png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1174 PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1175 switch(_state->info.colorspace){
1176 case TH_CS_ITU_REC_470M:{
1177 png_set_gAMA(png,info,2.2);
1178 png_set_cHRM_fixed(png,info,31006,31616,
1179 67000,32000,21000,71000,14000,8000);
1180 }break;
1181 case TH_CS_ITU_REC_470BG:{
1182 png_set_gAMA(png,info,2.67);
1183 png_set_cHRM_fixed(png,info,31271,32902,
1184 64000,33000,29000,60000,15000,6000);
1185 }break;
1186 default:break;
1187 }
1188 png_set_pHYs(png,info,_state->info.aspect_numerator,
1189 _state->info.aspect_denominator,0);
1190 png_set_rows(png,info,image);
1191 png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1192 png_write_end(png,info);
1193 png_destroy_write_struct(&png,&info);
1194 oc_free_2d(image);
1195 fclose(fp);
1196 return 0;
1197 }
1198 #endif
1199
1200
1201
th_granule_frame(void * _encdec,ogg_int64_t _granpos)1202 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1203 oc_theora_state *state;
1204 state=(oc_theora_state *)_encdec;
1205 if(_granpos>=0){
1206 ogg_int64_t iframe;
1207 ogg_int64_t pframe;
1208 iframe=_granpos>>state->info.keyframe_granule_shift;
1209 pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1210 /*3.2.0 streams store the frame index in the granule position.
1211 3.2.1 and later store the frame count.
1212 We return the index, so adjust the value if we have a 3.2.1 or later
1213 stream.*/
1214 return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1215 }
1216 return -1;
1217 }
1218
th_granule_time(void * _encdec,ogg_int64_t _granpos)1219 double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1220 oc_theora_state *state;
1221 state=(oc_theora_state *)_encdec;
1222 if(_granpos>=0){
1223 return (th_granule_frame(_encdec, _granpos)+1)*(
1224 (double)state->info.fps_denominator/state->info.fps_numerator);
1225 }
1226 return -1;
1227 }
1228