1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id$
15 
16  ********************************************************************/
17 #if !defined(_internal_H)
18 # define _internal_H (1)
19 # include <stdlib.h>
20 # include <limits.h>
21 # if defined(HAVE_CONFIG_H)
22 #  include <config.h>
23 # endif
24 # include "theora/codec.h"
25 # include "theora/theora.h"
26 
27 # if defined(_MSC_VER)
28 /*Disable missing EMMS warnings.*/
29 #  pragma warning(disable:4799)
30 /*Thank you Microsoft, I know the order of operations.*/
31 #  pragma warning(disable:4554)
32 # endif
33 /*You, too, gcc.*/
34 # if defined(__GNUC_PREREQ)
35 #  if __GNUC_PREREQ(4,2)
36 #   pragma GCC diagnostic ignored "-Wparentheses"
37 #  endif
38 # endif
39 
40 # include "ocintrin.h"
41 # include "huffman.h"
42 # include "quant.h"
43 
44 /*Some assembly constructs require aligned operands.*/
45 # if defined(OC_X86_ASM)
46 #  if defined(__GNUC__)
47 #   define OC_ALIGN8(expr) expr __attribute__((aligned(8)))
48 #   define OC_ALIGN16(expr) expr __attribute__((aligned(16)))
49 #  elif defined(_MSC_VER)
50 #   define OC_ALIGN8(expr) __declspec (align(8)) expr
51 #   define OC_ALIGN16(expr) __declspec (align(16)) expr
52 #  endif
53 # endif
54 # if !defined(OC_ALIGN8)
55 #  define OC_ALIGN8(expr) expr
56 # endif
57 # if !defined(OC_ALIGN16)
58 #  define OC_ALIGN16(expr) expr
59 # endif
60 
61 
62 
63 typedef struct oc_sb_flags              oc_sb_flags;
64 typedef struct oc_border_info           oc_border_info;
65 typedef struct oc_fragment              oc_fragment;
66 typedef struct oc_fragment_plane        oc_fragment_plane;
67 typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
68 typedef struct oc_base_opt_data         oc_base_opt_data;
69 typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
70 typedef struct oc_theora_state          oc_theora_state;
71 
72 
73 
74 /*This library's version.*/
75 # define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)"
76 
77 /*Theora bitstream version.*/
78 # define TH_VERSION_MAJOR (3)
79 # define TH_VERSION_MINOR (2)
80 # define TH_VERSION_SUB   (1)
81 # define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
82  ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
83  ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
84  (_info)->version_subminor>=(_sub)))
85 
86 /*A keyframe.*/
87 #define OC_INTRA_FRAME (0)
88 /*A predicted frame.*/
89 #define OC_INTER_FRAME (1)
90 /*A frame of unknown type (frame type decision has not yet been made).*/
91 #define OC_UNKWN_FRAME (-1)
92 
93 /*The amount of padding to add to the reconstructed frame buffers on all
94    sides.
95   This is used to allow unrestricted motion vectors without special casing.
96   This must be a multiple of 2.*/
97 #define OC_UMV_PADDING (16)
98 
99 /*Frame classification indices.*/
100 /*The previous golden frame.*/
101 #define OC_FRAME_GOLD (0)
102 /*The previous frame.*/
103 #define OC_FRAME_PREV (1)
104 /*The current frame.*/
105 #define OC_FRAME_SELF (2)
106 
107 /*The input or output buffer.*/
108 #define OC_FRAME_IO   (3)
109 
110 /*Macroblock modes.*/
111 /*Macro block is invalid: It is never coded.*/
112 #define OC_MODE_INVALID        (-1)
113 /*Encoded difference from the same macro block in the previous frame.*/
114 #define OC_MODE_INTER_NOMV     (0)
115 /*Encoded with no motion compensated prediction.*/
116 #define OC_MODE_INTRA          (1)
117 /*Encoded difference from the previous frame offset by the given motion
118   vector.*/
119 #define OC_MODE_INTER_MV       (2)
120 /*Encoded difference from the previous frame offset by the last coded motion
121   vector.*/
122 #define OC_MODE_INTER_MV_LAST  (3)
123 /*Encoded difference from the previous frame offset by the second to last
124   coded motion vector.*/
125 #define OC_MODE_INTER_MV_LAST2 (4)
126 /*Encoded difference from the same macro block in the previous golden
127   frame.*/
128 #define OC_MODE_GOLDEN_NOMV    (5)
129 /*Encoded difference from the previous golden frame offset by the given motion
130   vector.*/
131 #define OC_MODE_GOLDEN_MV      (6)
132 /*Encoded difference from the previous frame offset by the individual motion
133   vectors given for each block.*/
134 #define OC_MODE_INTER_MV_FOUR  (7)
135 /*The number of (coded) modes.*/
136 #define OC_NMODES              (8)
137 
138 /*Determines the reference frame used for a given MB mode.*/
139 #define OC_FRAME_FOR_MODE(_x) \
140  OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
141   OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
142 
143 /*Constants for the packet state machine common between encoder and decoder.*/
144 
145 /*Next packet to emit/read: Codec info header.*/
146 #define OC_PACKET_INFO_HDR    (-3)
147 /*Next packet to emit/read: Comment header.*/
148 #define OC_PACKET_COMMENT_HDR (-2)
149 /*Next packet to emit/read: Codec setup header.*/
150 #define OC_PACKET_SETUP_HDR   (-1)
151 /*No more packets to emit/read.*/
152 #define OC_PACKET_DONE        (INT_MAX)
153 
154 
155 
156 /*Super blocks are 32x32 segments of pixels in a single color plane indexed
157    in image order.
158   Internally, super blocks are broken up into four quadrants, each of which
159    contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
160   Quadrants, and the blocks within them, are indexed in a special order called
161    a "Hilbert curve" within the super block.
162 
163   In order to differentiate between the Hilbert-curve indexing strategy and
164    the regular image order indexing strategy, blocks indexed in image order
165    are called "fragments".
166   Fragments are indexed in image order, left to right, then bottom to top,
167    from Y' plane to Cb plane to Cr plane.
168 
169   The co-located fragments in all image planes corresponding to the location
170    of a single quadrant of a luma plane super block form a macro block.
171   Thus there is only a single set of macro blocks for all planes, each of which
172    contains between 6 and 12 fragments, depending on the pixel format.
173   Therefore macro block information is kept in a separate set of arrays from
174    super blocks to avoid unused space in the other planes.
175   The lists are indexed in super block order.
176   That is, the macro block corresponding to the macro block mbi in (luma plane)
177    super block sbi is at index (sbi<<2|mbi).
178   Thus the number of macro blocks in each dimension is always twice the number
179    of super blocks, even when only an odd number fall inside the coded frame.
180   These "extra" macro blocks are just an artifact of our internal data layout,
181    and not part of the coded stream; they are flagged with a negative MB mode.*/
182 
183 
184 
185 /*A single quadrant of the map from a super block to fragment numbers.*/
186 typedef ptrdiff_t       oc_sb_map_quad[4];
187 /*A map from a super block to fragment numbers.*/
188 typedef oc_sb_map_quad  oc_sb_map[4];
189 /*A single plane of the map from a macro block to fragment numbers.*/
190 typedef ptrdiff_t       oc_mb_map_plane[4];
191 /*A map from a macro block to fragment numbers.*/
192 typedef oc_mb_map_plane oc_mb_map[3];
193 /*A motion vector.*/
194 typedef signed char     oc_mv[2];
195 
196 
197 
198 /*Super block information.*/
199 struct oc_sb_flags{
200   unsigned char coded_fully:1;
201   unsigned char coded_partially:1;
202   unsigned char quad_valid:4;
203 };
204 
205 
206 
207 /*Information about a fragment which intersects the border of the displayable
208    region.
209   This marks which pixels belong to the displayable region.*/
210 struct oc_border_info{
211   /*A bit mask marking which pixels are in the displayable region.
212     Pixel (x,y) corresponds to bit (y<<3|x).*/
213   ogg_int64_t mask;
214   /*The number of pixels in the displayable region.
215     This is always positive, and always less than 64.*/
216   int         npixels;
217 };
218 
219 
220 
221 /*Fragment information.*/
222 struct oc_fragment{
223   /*A flag indicating whether or not this fragment is coded.*/
224   unsigned   coded:1;
225   /*A flag indicating that this entire fragment lies outside the displayable
226      region of the frame.
227     Note the contrast with an invalid macro block, which is outside the coded
228      frame, not just the displayable one.
229     There are no fragments outside the coded frame by construction.*/
230   unsigned   invalid:1;
231   /*The index of the quality index used for this fragment's AC coefficients.*/
232   unsigned   qii:6;
233   /*The mode of the macroblock this fragment belongs to.*/
234   unsigned   mb_mode:3;
235   /*The index of the associated border information for fragments which lie
236      partially outside the displayable region.
237     For fragments completely inside or outside this region, this is -1.
238     Note that the C standard requires an explicit signed keyword for bitfield
239      types, since some compilers may treat them as unsigned without it.*/
240   signed int borderi:5;
241   /*The prediction-corrected DC component.
242     Note that the C standard requires an explicit signed keyword for bitfield
243      types, since some compilers may treat them as unsigned without it.*/
244   signed int dc:16;
245 };
246 
247 
248 
249 /*A description of each fragment plane.*/
250 struct oc_fragment_plane{
251   /*The number of fragments in the horizontal direction.*/
252   int       nhfrags;
253   /*The number of fragments in the vertical direction.*/
254   int       nvfrags;
255   /*The offset of the first fragment in the plane.*/
256   ptrdiff_t froffset;
257   /*The total number of fragments in the plane.*/
258   ptrdiff_t nfrags;
259   /*The number of super blocks in the horizontal direction.*/
260   unsigned  nhsbs;
261   /*The number of super blocks in the vertical direction.*/
262   unsigned  nvsbs;
263   /*The offset of the first super block in the plane.*/
264   unsigned  sboffset;
265   /*The total number of super blocks in the plane.*/
266   unsigned  nsbs;
267 };
268 
269 
270 
271 /*The shared (encoder and decoder) functions that have accelerated variants.*/
272 struct oc_base_opt_vtable{
273   void (*frag_copy)(unsigned char *_dst,
274    const unsigned char *_src,int _ystride);
275   void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
276    const ogg_int16_t _residue[64]);
277   void (*frag_recon_inter)(unsigned char *_dst,
278    const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
279   void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
280    const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
281   void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
282   void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
283    int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
284   void (*state_frag_copy_list)(const oc_theora_state *_state,
285    const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
286    int _dst_frame,int _src_frame,int _pli);
287   void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
288    int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
289   void (*restore_fpu)(void);
290 };
291 
292 /*The shared (encoder and decoder) tables that vary according to which variants
293    of the above functions are used.*/
294 struct oc_base_opt_data{
295   const unsigned char *dct_fzig_zag;
296 };
297 
298 
299 /*State information common to both the encoder and decoder.*/
300 struct oc_theora_state{
301   /*The stream information.*/
302   th_info             info;
303   /*Table for shared accelerated functions.*/
304   oc_base_opt_vtable  opt_vtable;
305   /*Table for shared data used by accelerated functions.*/
306   oc_base_opt_data    opt_data;
307   /*CPU flags to detect the presence of extended instruction sets.*/
308   ogg_uint32_t        cpu_flags;
309   /*The fragment plane descriptions.*/
310   oc_fragment_plane   fplanes[3];
311   /*The list of fragments, indexed in image order.*/
312   oc_fragment        *frags;
313   /*The the offset into the reference frame buffer to the upper-left pixel of
314      each fragment.*/
315   ptrdiff_t          *frag_buf_offs;
316   /*The motion vector for each fragment.*/
317   oc_mv              *frag_mvs;
318   /*The total number of fragments in a single frame.*/
319   ptrdiff_t           nfrags;
320   /*The list of super block maps, indexed in image order.*/
321   oc_sb_map          *sb_maps;
322   /*The list of super block flags, indexed in image order.*/
323   oc_sb_flags        *sb_flags;
324   /*The total number of super blocks in a single frame.*/
325   unsigned            nsbs;
326   /*The fragments from each color plane that belong to each macro block.
327     Fragments are stored in image order (left to right then top to bottom).
328     When chroma components are decimated, the extra fragments have an index of
329      -1.*/
330   oc_mb_map          *mb_maps;
331   /*The list of macro block modes.
332     A negative number indicates the macro block lies entirely outside the
333      coded frame.*/
334   signed char        *mb_modes;
335   /*The number of macro blocks in the X direction.*/
336   unsigned            nhmbs;
337   /*The number of macro blocks in the Y direction.*/
338   unsigned            nvmbs;
339   /*The total number of macro blocks.*/
340   size_t              nmbs;
341   /*The list of coded fragments, in coded order.
342     Uncoded fragments are stored in reverse order from the end of the list.*/
343   ptrdiff_t          *coded_fragis;
344   /*The number of coded fragments in each plane.*/
345   ptrdiff_t           ncoded_fragis[3];
346   /*The total number of coded fragments.*/
347   ptrdiff_t           ntotal_coded_fragis;
348   /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
349   int                 ref_frame_idx[4];
350   /*The actual buffers used for the previously decoded frames.*/
351   th_ycbcr_buffer     ref_frame_bufs[4];
352   /*The storage for the reference frame buffers.*/
353   unsigned char      *ref_frame_data[4];
354   /*The strides for each plane in the reference frames.*/
355   int                 ref_ystride[3];
356   /*The number of unique border patterns.*/
357   int                 nborders;
358   /*The unique border patterns for all border fragments.
359     The borderi field of fragments which straddle the border indexes this
360      list.*/
361   oc_border_info      borders[16];
362   /*The frame number of the last keyframe.*/
363   ogg_int64_t         keyframe_num;
364   /*The frame number of the current frame.*/
365   ogg_int64_t         curframe_num;
366   /*The granpos of the current frame.*/
367   ogg_int64_t         granpos;
368   /*The type of the current frame.*/
369   unsigned char       frame_type;
370   /*The bias to add to the frame count when computing granule positions.*/
371   unsigned char       granpos_bias;
372   /*The number of quality indices used in the current frame.*/
373   unsigned char       nqis;
374   /*The quality indices of the current frame.*/
375   unsigned char       qis[3];
376   /*The dequantization tables, stored in zig-zag order, and indexed by
377      qi, pli, qti, and zzi.*/
378   ogg_uint16_t       *dequant_tables[64][3][2];
379   OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
380   /*Loop filter strength parameters.*/
381   unsigned char       loop_filter_limits[64];
382 };
383 
384 
385 
386 /*The function type used to fill in the chroma plane motion vectors for a
387    macro block when 4 different motion vectors are specified in the luma
388    plane.
389   _cbmvs: The chroma block-level motion vectors to fill in.
390   _lmbmv: The luma macro-block level motion vector to fill in for use in
391            prediction.
392   _lbmvs: The luma block-level motion vectors.*/
393 typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
394 
395 
396 
397 /*A map from the index in the zig zag scan to the coefficient number in a
398    block.*/
399 extern const unsigned char OC_FZIG_ZAG[128];
400 /*A map from the coefficient number in a block to its index in the zig zag
401    scan.*/
402 extern const unsigned char OC_IZIG_ZAG[64];
403 /*A map from physical macro block ordering to bitstream macro block
404    ordering within a super block.*/
405 extern const unsigned char OC_MB_MAP[2][2];
406 /*A list of the indices in the oc_mb_map array that can be valid for each of
407    the various chroma decimation types.*/
408 extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
409 /*The number of indices in the oc_mb_map array that can be valid for each of
410    the various chroma decimation types.*/
411 extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
412 /*A table of functions used to fill in the Cb,Cr plane motion vectors for a
413    macro block when 4 different motion vectors are specified in the luma
414    plane.*/
415 extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
416 
417 
418 
419 int oc_ilog(unsigned _v);
420 void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
421 void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
422 void oc_free_2d(void *_ptr);
423 
424 void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
425  const th_ycbcr_buffer _src);
426 
427 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
428 void oc_state_clear(oc_theora_state *_state);
429 void oc_state_vtable_init_c(oc_theora_state *_state);
430 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
431  int _y0,int _yend);
432 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
433 void oc_state_borders_fill(oc_theora_state *_state,int _refi);
434 void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
435  th_ycbcr_buffer _img);
436 int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
437 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
438  int _pli,int _dx,int _dy);
439 
440 int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
441 void oc_state_loop_filter(oc_theora_state *_state,int _frame);
442 #if defined(OC_DUMP_IMAGES)
443 int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
444  const char *_suf);
445 #endif
446 
447 /*Shared accelerated functions.*/
448 void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
449  const unsigned char *_src,int _ystride);
450 void oc_frag_recon_intra(const oc_theora_state *_state,
451  unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]);
452 void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
453  const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
454 void oc_frag_recon_inter2(const oc_theora_state *_state,
455  unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2,
456  int _ystride,const ogg_int16_t _residue[64]);
457 void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
458 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
459  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
460 void oc_state_frag_copy_list(const oc_theora_state *_state,
461  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
462  int _dst_frame,int _src_frame,int _pli);
463 void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
464  int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
465 void oc_restore_fpu(const oc_theora_state *_state);
466 
467 /*Default pure-C implementations.*/
468 void oc_frag_copy_c(unsigned char *_dst,
469  const unsigned char *_src,int _src_ystride);
470 void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
471  const ogg_int16_t _residue[64]);
472 void oc_frag_recon_inter_c(unsigned char *_dst,
473  const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
474 void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
475  const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
476 void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
477 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
478  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
479 void oc_state_frag_copy_list_c(const oc_theora_state *_state,
480  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
481  int _dst_frame,int _src_frame,int _pli);
482 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
483  int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
484 void oc_restore_fpu_c(void);
485 
486 /*We need a way to call a few encoder functions without introducing a link-time
487    dependency into the decoder, while still allowing the old alpha API which
488    does not distinguish between encoder and decoder objects to be used.
489   We do this by placing a function table at the start of the encoder object
490    which can dispatch into the encoder library.
491   We do a similar thing for the decoder in case we ever decide to split off a
492    common base library.*/
493 typedef void (*oc_state_clear_func)(theora_state *_th);
494 typedef int (*oc_state_control_func)(theora_state *th,int _req,
495  void *_buf,size_t _buf_sz);
496 typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
497  ogg_int64_t _granulepos);
498 typedef double (*oc_state_granule_time_func)(theora_state *_th,
499  ogg_int64_t _granulepos);
500 
501 
502 struct oc_state_dispatch_vtable{
503   oc_state_clear_func         clear;
504   oc_state_control_func       control;
505   oc_state_granule_frame_func granule_frame;
506   oc_state_granule_time_func  granule_time;
507 };
508 
509 #endif
510