1 /******************************************************************** 2 * * 3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 7 * * 8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * 9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 10 * * 11 ******************************************************************** 12 13 function: 14 last mod: $Id$ 15 16 ********************************************************************/ 17 #if !defined(_internal_H) 18 # define _internal_H (1) 19 # include <stdlib.h> 20 # include <limits.h> 21 # if defined(HAVE_CONFIG_H) 22 # include <config.h> 23 # endif 24 # include "theora/codec.h" 25 # include "theora/theora.h" 26 27 # if defined(_MSC_VER) 28 /*Disable missing EMMS warnings.*/ 29 # pragma warning(disable:4799) 30 /*Thank you Microsoft, I know the order of operations.*/ 31 # pragma warning(disable:4554) 32 # endif 33 /*You, too, gcc.*/ 34 # if defined(__GNUC_PREREQ) 35 # if __GNUC_PREREQ(4,2) 36 # pragma GCC diagnostic ignored "-Wparentheses" 37 # endif 38 # endif 39 40 # include "ocintrin.h" 41 # include "huffman.h" 42 # include "quant.h" 43 44 /*Some assembly constructs require aligned operands.*/ 45 # if defined(OC_X86_ASM) 46 # if defined(__GNUC__) 47 # define OC_ALIGN8(expr) expr __attribute__((aligned(8))) 48 # define OC_ALIGN16(expr) expr __attribute__((aligned(16))) 49 # elif defined(_MSC_VER) 50 # define OC_ALIGN8(expr) __declspec (align(8)) expr 51 # define OC_ALIGN16(expr) __declspec (align(16)) expr 52 # endif 53 # endif 54 # if !defined(OC_ALIGN8) 55 # define OC_ALIGN8(expr) expr 56 # endif 57 # if !defined(OC_ALIGN16) 58 # define OC_ALIGN16(expr) expr 59 # endif 60 61 62 63 typedef struct oc_sb_flags oc_sb_flags; 64 typedef struct oc_border_info oc_border_info; 65 typedef struct oc_fragment oc_fragment; 66 typedef struct oc_fragment_plane oc_fragment_plane; 67 typedef struct oc_base_opt_vtable oc_base_opt_vtable; 68 typedef struct oc_base_opt_data oc_base_opt_data; 69 typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable; 70 typedef struct oc_theora_state oc_theora_state; 71 72 73 74 /*This library's version.*/ 75 # define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)" 76 77 /*Theora bitstream version.*/ 78 # define TH_VERSION_MAJOR (3) 79 # define TH_VERSION_MINOR (2) 80 # define TH_VERSION_SUB (1) 81 # define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ 82 ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \ 83 ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \ 84 (_info)->version_subminor>=(_sub))) 85 86 /*A keyframe.*/ 87 #define OC_INTRA_FRAME (0) 88 /*A predicted frame.*/ 89 #define OC_INTER_FRAME (1) 90 /*A frame of unknown type (frame type decision has not yet been made).*/ 91 #define OC_UNKWN_FRAME (-1) 92 93 /*The amount of padding to add to the reconstructed frame buffers on all 94 sides. 95 This is used to allow unrestricted motion vectors without special casing. 96 This must be a multiple of 2.*/ 97 #define OC_UMV_PADDING (16) 98 99 /*Frame classification indices.*/ 100 /*The previous golden frame.*/ 101 #define OC_FRAME_GOLD (0) 102 /*The previous frame.*/ 103 #define OC_FRAME_PREV (1) 104 /*The current frame.*/ 105 #define OC_FRAME_SELF (2) 106 107 /*The input or output buffer.*/ 108 #define OC_FRAME_IO (3) 109 110 /*Macroblock modes.*/ 111 /*Macro block is invalid: It is never coded.*/ 112 #define OC_MODE_INVALID (-1) 113 /*Encoded difference from the same macro block in the previous frame.*/ 114 #define OC_MODE_INTER_NOMV (0) 115 /*Encoded with no motion compensated prediction.*/ 116 #define OC_MODE_INTRA (1) 117 /*Encoded difference from the previous frame offset by the given motion 118 vector.*/ 119 #define OC_MODE_INTER_MV (2) 120 /*Encoded difference from the previous frame offset by the last coded motion 121 vector.*/ 122 #define OC_MODE_INTER_MV_LAST (3) 123 /*Encoded difference from the previous frame offset by the second to last 124 coded motion vector.*/ 125 #define OC_MODE_INTER_MV_LAST2 (4) 126 /*Encoded difference from the same macro block in the previous golden 127 frame.*/ 128 #define OC_MODE_GOLDEN_NOMV (5) 129 /*Encoded difference from the previous golden frame offset by the given motion 130 vector.*/ 131 #define OC_MODE_GOLDEN_MV (6) 132 /*Encoded difference from the previous frame offset by the individual motion 133 vectors given for each block.*/ 134 #define OC_MODE_INTER_MV_FOUR (7) 135 /*The number of (coded) modes.*/ 136 #define OC_NMODES (8) 137 138 /*Determines the reference frame used for a given MB mode.*/ 139 #define OC_FRAME_FOR_MODE(_x) \ 140 OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \ 141 OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x)) 142 143 /*Constants for the packet state machine common between encoder and decoder.*/ 144 145 /*Next packet to emit/read: Codec info header.*/ 146 #define OC_PACKET_INFO_HDR (-3) 147 /*Next packet to emit/read: Comment header.*/ 148 #define OC_PACKET_COMMENT_HDR (-2) 149 /*Next packet to emit/read: Codec setup header.*/ 150 #define OC_PACKET_SETUP_HDR (-1) 151 /*No more packets to emit/read.*/ 152 #define OC_PACKET_DONE (INT_MAX) 153 154 155 156 /*Super blocks are 32x32 segments of pixels in a single color plane indexed 157 in image order. 158 Internally, super blocks are broken up into four quadrants, each of which 159 contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels. 160 Quadrants, and the blocks within them, are indexed in a special order called 161 a "Hilbert curve" within the super block. 162 163 In order to differentiate between the Hilbert-curve indexing strategy and 164 the regular image order indexing strategy, blocks indexed in image order 165 are called "fragments". 166 Fragments are indexed in image order, left to right, then bottom to top, 167 from Y' plane to Cb plane to Cr plane. 168 169 The co-located fragments in all image planes corresponding to the location 170 of a single quadrant of a luma plane super block form a macro block. 171 Thus there is only a single set of macro blocks for all planes, each of which 172 contains between 6 and 12 fragments, depending on the pixel format. 173 Therefore macro block information is kept in a separate set of arrays from 174 super blocks to avoid unused space in the other planes. 175 The lists are indexed in super block order. 176 That is, the macro block corresponding to the macro block mbi in (luma plane) 177 super block sbi is at index (sbi<<2|mbi). 178 Thus the number of macro blocks in each dimension is always twice the number 179 of super blocks, even when only an odd number fall inside the coded frame. 180 These "extra" macro blocks are just an artifact of our internal data layout, 181 and not part of the coded stream; they are flagged with a negative MB mode.*/ 182 183 184 185 /*A single quadrant of the map from a super block to fragment numbers.*/ 186 typedef ptrdiff_t oc_sb_map_quad[4]; 187 /*A map from a super block to fragment numbers.*/ 188 typedef oc_sb_map_quad oc_sb_map[4]; 189 /*A single plane of the map from a macro block to fragment numbers.*/ 190 typedef ptrdiff_t oc_mb_map_plane[4]; 191 /*A map from a macro block to fragment numbers.*/ 192 typedef oc_mb_map_plane oc_mb_map[3]; 193 /*A motion vector.*/ 194 typedef signed char oc_mv[2]; 195 196 197 198 /*Super block information.*/ 199 struct oc_sb_flags{ 200 unsigned char coded_fully:1; 201 unsigned char coded_partially:1; 202 unsigned char quad_valid:4; 203 }; 204 205 206 207 /*Information about a fragment which intersects the border of the displayable 208 region. 209 This marks which pixels belong to the displayable region.*/ 210 struct oc_border_info{ 211 /*A bit mask marking which pixels are in the displayable region. 212 Pixel (x,y) corresponds to bit (y<<3|x).*/ 213 ogg_int64_t mask; 214 /*The number of pixels in the displayable region. 215 This is always positive, and always less than 64.*/ 216 int npixels; 217 }; 218 219 220 221 /*Fragment information.*/ 222 struct oc_fragment{ 223 /*A flag indicating whether or not this fragment is coded.*/ 224 unsigned coded:1; 225 /*A flag indicating that this entire fragment lies outside the displayable 226 region of the frame. 227 Note the contrast with an invalid macro block, which is outside the coded 228 frame, not just the displayable one. 229 There are no fragments outside the coded frame by construction.*/ 230 unsigned invalid:1; 231 /*The index of the quality index used for this fragment's AC coefficients.*/ 232 unsigned qii:6; 233 /*The mode of the macroblock this fragment belongs to.*/ 234 unsigned mb_mode:3; 235 /*The index of the associated border information for fragments which lie 236 partially outside the displayable region. 237 For fragments completely inside or outside this region, this is -1. 238 Note that the C standard requires an explicit signed keyword for bitfield 239 types, since some compilers may treat them as unsigned without it.*/ 240 signed int borderi:5; 241 /*The prediction-corrected DC component. 242 Note that the C standard requires an explicit signed keyword for bitfield 243 types, since some compilers may treat them as unsigned without it.*/ 244 signed int dc:16; 245 }; 246 247 248 249 /*A description of each fragment plane.*/ 250 struct oc_fragment_plane{ 251 /*The number of fragments in the horizontal direction.*/ 252 int nhfrags; 253 /*The number of fragments in the vertical direction.*/ 254 int nvfrags; 255 /*The offset of the first fragment in the plane.*/ 256 ptrdiff_t froffset; 257 /*The total number of fragments in the plane.*/ 258 ptrdiff_t nfrags; 259 /*The number of super blocks in the horizontal direction.*/ 260 unsigned nhsbs; 261 /*The number of super blocks in the vertical direction.*/ 262 unsigned nvsbs; 263 /*The offset of the first super block in the plane.*/ 264 unsigned sboffset; 265 /*The total number of super blocks in the plane.*/ 266 unsigned nsbs; 267 }; 268 269 270 271 /*The shared (encoder and decoder) functions that have accelerated variants.*/ 272 struct oc_base_opt_vtable{ 273 void (*frag_copy)(unsigned char *_dst, 274 const unsigned char *_src,int _ystride); 275 void (*frag_recon_intra)(unsigned char *_dst,int _ystride, 276 const ogg_int16_t _residue[64]); 277 void (*frag_recon_inter)(unsigned char *_dst, 278 const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); 279 void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1, 280 const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); 281 void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi); 282 void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi, 283 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); 284 void (*state_frag_copy_list)(const oc_theora_state *_state, 285 const ptrdiff_t *_fragis,ptrdiff_t _nfragis, 286 int _dst_frame,int _src_frame,int _pli); 287 void (*state_loop_filter_frag_rows)(const oc_theora_state *_state, 288 int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); 289 void (*restore_fpu)(void); 290 }; 291 292 /*The shared (encoder and decoder) tables that vary according to which variants 293 of the above functions are used.*/ 294 struct oc_base_opt_data{ 295 const unsigned char *dct_fzig_zag; 296 }; 297 298 299 /*State information common to both the encoder and decoder.*/ 300 struct oc_theora_state{ 301 /*The stream information.*/ 302 th_info info; 303 /*Table for shared accelerated functions.*/ 304 oc_base_opt_vtable opt_vtable; 305 /*Table for shared data used by accelerated functions.*/ 306 oc_base_opt_data opt_data; 307 /*CPU flags to detect the presence of extended instruction sets.*/ 308 ogg_uint32_t cpu_flags; 309 /*The fragment plane descriptions.*/ 310 oc_fragment_plane fplanes[3]; 311 /*The list of fragments, indexed in image order.*/ 312 oc_fragment *frags; 313 /*The the offset into the reference frame buffer to the upper-left pixel of 314 each fragment.*/ 315 ptrdiff_t *frag_buf_offs; 316 /*The motion vector for each fragment.*/ 317 oc_mv *frag_mvs; 318 /*The total number of fragments in a single frame.*/ 319 ptrdiff_t nfrags; 320 /*The list of super block maps, indexed in image order.*/ 321 oc_sb_map *sb_maps; 322 /*The list of super block flags, indexed in image order.*/ 323 oc_sb_flags *sb_flags; 324 /*The total number of super blocks in a single frame.*/ 325 unsigned nsbs; 326 /*The fragments from each color plane that belong to each macro block. 327 Fragments are stored in image order (left to right then top to bottom). 328 When chroma components are decimated, the extra fragments have an index of 329 -1.*/ 330 oc_mb_map *mb_maps; 331 /*The list of macro block modes. 332 A negative number indicates the macro block lies entirely outside the 333 coded frame.*/ 334 signed char *mb_modes; 335 /*The number of macro blocks in the X direction.*/ 336 unsigned nhmbs; 337 /*The number of macro blocks in the Y direction.*/ 338 unsigned nvmbs; 339 /*The total number of macro blocks.*/ 340 size_t nmbs; 341 /*The list of coded fragments, in coded order. 342 Uncoded fragments are stored in reverse order from the end of the list.*/ 343 ptrdiff_t *coded_fragis; 344 /*The number of coded fragments in each plane.*/ 345 ptrdiff_t ncoded_fragis[3]; 346 /*The total number of coded fragments.*/ 347 ptrdiff_t ntotal_coded_fragis; 348 /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ 349 int ref_frame_idx[4]; 350 /*The actual buffers used for the previously decoded frames.*/ 351 th_ycbcr_buffer ref_frame_bufs[4]; 352 /*The storage for the reference frame buffers.*/ 353 unsigned char *ref_frame_data[4]; 354 /*The strides for each plane in the reference frames.*/ 355 int ref_ystride[3]; 356 /*The number of unique border patterns.*/ 357 int nborders; 358 /*The unique border patterns for all border fragments. 359 The borderi field of fragments which straddle the border indexes this 360 list.*/ 361 oc_border_info borders[16]; 362 /*The frame number of the last keyframe.*/ 363 ogg_int64_t keyframe_num; 364 /*The frame number of the current frame.*/ 365 ogg_int64_t curframe_num; 366 /*The granpos of the current frame.*/ 367 ogg_int64_t granpos; 368 /*The type of the current frame.*/ 369 unsigned char frame_type; 370 /*The bias to add to the frame count when computing granule positions.*/ 371 unsigned char granpos_bias; 372 /*The number of quality indices used in the current frame.*/ 373 unsigned char nqis; 374 /*The quality indices of the current frame.*/ 375 unsigned char qis[3]; 376 /*The dequantization tables, stored in zig-zag order, and indexed by 377 qi, pli, qti, and zzi.*/ 378 ogg_uint16_t *dequant_tables[64][3][2]; 379 OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]); 380 /*Loop filter strength parameters.*/ 381 unsigned char loop_filter_limits[64]; 382 }; 383 384 385 386 /*The function type used to fill in the chroma plane motion vectors for a 387 macro block when 4 different motion vectors are specified in the luma 388 plane. 389 _cbmvs: The chroma block-level motion vectors to fill in. 390 _lmbmv: The luma macro-block level motion vector to fill in for use in 391 prediction. 392 _lbmvs: The luma block-level motion vectors.*/ 393 typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]); 394 395 396 397 /*A map from the index in the zig zag scan to the coefficient number in a 398 block.*/ 399 extern const unsigned char OC_FZIG_ZAG[128]; 400 /*A map from the coefficient number in a block to its index in the zig zag 401 scan.*/ 402 extern const unsigned char OC_IZIG_ZAG[64]; 403 /*A map from physical macro block ordering to bitstream macro block 404 ordering within a super block.*/ 405 extern const unsigned char OC_MB_MAP[2][2]; 406 /*A list of the indices in the oc_mb_map array that can be valid for each of 407 the various chroma decimation types.*/ 408 extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; 409 /*The number of indices in the oc_mb_map array that can be valid for each of 410 the various chroma decimation types.*/ 411 extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; 412 /*A table of functions used to fill in the Cb,Cr plane motion vectors for a 413 macro block when 4 different motion vectors are specified in the luma 414 plane.*/ 415 extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]; 416 417 418 419 int oc_ilog(unsigned _v); 420 void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz); 421 void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz); 422 void oc_free_2d(void *_ptr); 423 424 void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, 425 const th_ycbcr_buffer _src); 426 427 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs); 428 void oc_state_clear(oc_theora_state *_state); 429 void oc_state_vtable_init_c(oc_theora_state *_state); 430 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, 431 int _y0,int _yend); 432 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli); 433 void oc_state_borders_fill(oc_theora_state *_state,int _refi); 434 void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, 435 th_ycbcr_buffer _img); 436 int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby); 437 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], 438 int _pli,int _dx,int _dy); 439 440 int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv); 441 void oc_state_loop_filter(oc_theora_state *_state,int _frame); 442 #if defined(OC_DUMP_IMAGES) 443 int oc_state_dump_frame(const oc_theora_state *_state,int _frame, 444 const char *_suf); 445 #endif 446 447 /*Shared accelerated functions.*/ 448 void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, 449 const unsigned char *_src,int _ystride); 450 void oc_frag_recon_intra(const oc_theora_state *_state, 451 unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]); 452 void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, 453 const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); 454 void oc_frag_recon_inter2(const oc_theora_state *_state, 455 unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2, 456 int _ystride,const ogg_int16_t _residue[64]); 457 void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi); 458 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, 459 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); 460 void oc_state_frag_copy_list(const oc_theora_state *_state, 461 const ptrdiff_t *_fragis,ptrdiff_t _nfragis, 462 int _dst_frame,int _src_frame,int _pli); 463 void oc_state_loop_filter_frag_rows(const oc_theora_state *_state, 464 int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); 465 void oc_restore_fpu(const oc_theora_state *_state); 466 467 /*Default pure-C implementations.*/ 468 void oc_frag_copy_c(unsigned char *_dst, 469 const unsigned char *_src,int _src_ystride); 470 void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, 471 const ogg_int16_t _residue[64]); 472 void oc_frag_recon_inter_c(unsigned char *_dst, 473 const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); 474 void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, 475 const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); 476 void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi); 477 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, 478 int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); 479 void oc_state_frag_copy_list_c(const oc_theora_state *_state, 480 const ptrdiff_t *_fragis,ptrdiff_t _nfragis, 481 int _dst_frame,int _src_frame,int _pli); 482 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, 483 int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); 484 void oc_restore_fpu_c(void); 485 486 /*We need a way to call a few encoder functions without introducing a link-time 487 dependency into the decoder, while still allowing the old alpha API which 488 does not distinguish between encoder and decoder objects to be used. 489 We do this by placing a function table at the start of the encoder object 490 which can dispatch into the encoder library. 491 We do a similar thing for the decoder in case we ever decide to split off a 492 common base library.*/ 493 typedef void (*oc_state_clear_func)(theora_state *_th); 494 typedef int (*oc_state_control_func)(theora_state *th,int _req, 495 void *_buf,size_t _buf_sz); 496 typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th, 497 ogg_int64_t _granulepos); 498 typedef double (*oc_state_granule_time_func)(theora_state *_th, 499 ogg_int64_t _granulepos); 500 501 502 struct oc_state_dispatch_vtable{ 503 oc_state_clear_func clear; 504 oc_state_control_func control; 505 oc_state_granule_frame_func granule_frame; 506 oc_state_granule_time_func granule_time; 507 }; 508 509 #endif 510