1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14     last mod: $Id: x86int.h 17578 2010-10-29 04:21:26Z tterribe $
15 
16  ********************************************************************/
17 
18 #if !defined(_x86_x86int_H)
19 # define _x86_x86int_H (1)
20 # include "../internal.h"
21 
22 # if defined(OC_X86_ASM)
23 #  define oc_state_accel_init oc_state_accel_init_x86
24 #  if defined(OC_X86_64_ASM)
25 /*x86-64 guarantees SIMD support up through at least SSE2.
26   If the best routine we have available only needs SSE2 (which at the moment
27    covers all of them), then we can avoid runtime detection and the indirect
28    call.*/
29 #   define oc_frag_copy(_state,_dst,_src,_ystride) \
30   oc_frag_copy_mmx(_dst,_src,_ystride)
31 #   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
32  _fragis,_nfragis,_frag_buf_offs) \
33   oc_frag_copy_list_mmx(_dst_frame,_src_frame,_ystride, \
34    _fragis,_nfragis,_frag_buf_offs)
35 #   define oc_frag_recon_intra(_state,_dst,_ystride,_residue) \
36   oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
37 #   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
38   oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
39 #   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
40   oc_frag_recon_inter2_mmx(_dst,_src1,_src2,_ystride,_residue)
41 #   define oc_idct8x8(_state,_y,_x,_last_zzi) \
42   oc_idct8x8_sse2(_y,_x,_last_zzi)
43 #   define oc_state_frag_recon oc_state_frag_recon_mmx
44 #   define oc_loop_filter_init(_state,_bv,_flimit) \
45   oc_loop_filter_init_mmxext(_bv,_flimit)
46 #   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_mmxext
47 #   define oc_restore_fpu(_state) \
48   oc_restore_fpu_mmx()
49 #  else
50 #   define OC_STATE_USE_VTABLE (1)
51 #  endif
52 # endif
53 
54 # include "../state.h"
55 # include "x86cpu.h"
56 
57 /*Converts the expression in the argument to a string.*/
58 #define OC_M2STR(_s) #_s
59 
60 /*Memory operands do not always include an offset.
61   To avoid warnings, we force an offset with %H (which adds 8).*/
62 # if __GNUC_PREREQ(4,0)
63 #  define OC_MEM_OFFS(_offs,_name) \
64   OC_M2STR(_offs-8+%H[_name])
65 # endif
66 /*If your gcc version does't support %H, then you get to suffer the warnings.
67   Note that Apple's gas breaks on things like _offs+(%esp): it throws away the
68    whole offset, instead of substituting in 0 for the missing operand to +.*/
69 # if !defined(OC_MEM_OFFS)
70 #  define OC_MEM_OFFS(_offs,_name) \
71   OC_M2STR(_offs+%[_name])
72 # endif
73 
74 /*Declare an array operand with an exact size.
75   This tells gcc we're going to clobber this memory region, without having to
76    clobber all of "memory" and lets us access local buffers directly using the
77    stack pointer, without allocating a separate register to point to them.*/
78 #define OC_ARRAY_OPERAND(_type,_ptr,_size) \
79   (*({ \
80     struct{_type array_value__[(_size)];} *array_addr__=(void *)(_ptr); \
81     array_addr__; \
82   }))
83 
84 /*Declare an array operand with an exact size.
85   This tells gcc we're going to clobber this memory region, without having to
86    clobber all of "memory" and lets us access local buffers directly using the
87    stack pointer, without allocating a separate register to point to them.*/
88 #define OC_CONST_ARRAY_OPERAND(_type,_ptr,_size) \
89   (*({ \
90     const struct{_type array_value__[(_size)];} *array_addr__= \
91      (const void *)(_ptr); \
92     array_addr__; \
93   }))
94 
95 extern const unsigned short __attribute__((aligned(16))) OC_IDCT_CONSTS[64];
96 
97 void oc_state_accel_init_x86(oc_theora_state *_state);
98 
99 void oc_frag_copy_mmx(unsigned char *_dst,
100  const unsigned char *_src,int _ystride);
101 void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
102  const unsigned char *_src_frame,int _ystride,
103  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
104 void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
105  const ogg_int16_t *_residue);
106 void oc_frag_recon_inter_mmx(unsigned char *_dst,
107  const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
108 void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
109  const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
110 void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
111 void oc_idct8x8_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
112 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
113  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
114 void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit);
115 void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit);
116 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
117  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
118 void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
119  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
120 void oc_restore_fpu_mmx(void);
121 
122 #endif
123