1 /*****************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - Sum Of Absolute Difference header  -
5  *
6  *  Copyright(C) 2001-2010 Peter Ross <pross@xvid.org>
7  *
8  *  This program is free software ; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation ; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program ; if not, write to the Free Software
20  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21  *
22  * $Id: sad.h 1985 2011-05-18 09:02:35Z Isibaar $
23  *
24  ****************************************************************************/
25 
26 #ifndef _ENCODER_SAD_H_
27 #define _ENCODER_SAD_H_
28 
29 #include "../portab.h"
30 
31 typedef void (sadInitFunc) (void);
32 typedef sadInitFunc *sadInitFuncPtr;
33 
34 extern sadInitFuncPtr sadInit;
35 sadInitFunc sadInit_altivec;
36 
37 typedef uint32_t(sad16Func) (const uint8_t * const cur,
38 							 const uint8_t * const ref,
39 							 const uint32_t stride,
40 							 const uint32_t best_sad);
41 typedef sad16Func *sad16FuncPtr;
42 extern sad16FuncPtr sad16;
43 sad16Func sad16_c;
44 
45 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
46 sad16Func sad16_mmx;
47 sad16Func sad16_xmm;
48 sad16Func sad16_3dne;
49 sad16Func sad16_sse2;
50 sad16Func sad16_sse3;
51 #endif
52 
53 #ifdef ARCH_IS_IA64
54 sad16Func sad16_ia64;
55 #endif
56 
57 #ifdef ARCH_IS_PPC
58 sad16Func sad16_altivec_c;
59 #endif
60 
61 sad16Func mrsad16_c;
62 
63 typedef uint32_t(sad8Func) (const uint8_t * const cur,
64 							const uint8_t * const ref,
65 							const uint32_t stride);
66 typedef sad8Func *sad8FuncPtr;
67 extern sad8FuncPtr sad8;
68 sad8Func sad8_c;
69 
70 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
71 sad8Func sad8_mmx;
72 sad8Func sad8_xmm;
73 sad8Func sad8_3dne;
74 #endif
75 
76 #ifdef ARCH_IS_IA64
77 sad8Func sad8_ia64;
78 #endif
79 
80 #ifdef ARCH_IS_PPC
81 sad8Func sad8_altivec_c;
82 #endif
83 
84 typedef uint32_t(sad16biFunc) (const uint8_t * const cur,
85 							   const uint8_t * const ref1,
86 							   const uint8_t * const ref2,
87 							   const uint32_t stride);
88 typedef sad16biFunc *sad16biFuncPtr;
89 extern sad16biFuncPtr sad16bi;
90 sad16biFunc sad16bi_c;
91 
92 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
93 sad16biFunc sad16bi_mmx;
94 sad16biFunc sad16bi_xmm;
95 sad16biFunc sad16bi_3dne;
96 sad16biFunc sad16bi_3dn;
97 #endif
98 
99 #ifdef ARCH_IS_IA64
100 sad16biFunc sad16bi_ia64;
101 #endif
102 
103 #ifdef ARCH_IS_PPC
104 sad16biFunc sad16bi_altivec_c;
105 #endif
106 
107 typedef uint32_t(sad8biFunc) (const uint8_t * const cur,
108 							   const uint8_t * const ref1,
109 							   const uint8_t * const ref2,
110 							   const uint32_t stride);
111 typedef sad8biFunc *sad8biFuncPtr;
112 extern sad8biFuncPtr sad8bi;
113 sad8biFunc sad8bi_c;
114 
115 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
116 sad8biFunc sad8bi_mmx;
117 sad8biFunc sad8bi_xmm;
118 sad8biFunc sad8bi_3dne;
119 sad8biFunc sad8bi_3dn;
120 #endif
121 
122 typedef uint32_t(dev16Func) (const uint8_t * const cur,
123 							 const uint32_t stride);
124 typedef dev16Func *dev16FuncPtr;
125 extern dev16FuncPtr dev16;
126 dev16Func dev16_c;
127 
128 typedef uint32_t (sad16vFunc)(	const uint8_t * const cur,
129 								const uint8_t * const ref,
130 								const uint32_t stride, int32_t *sad8);
131 typedef sad16vFunc *sad16vFuncPtr;
132 extern sad16vFuncPtr sad16v;
133 
134 sad16vFunc sad16v_c;
135 sad16vFunc sad32v_c;
136 
137 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
138 dev16Func dev16_mmx;
139 dev16Func dev16_xmm;
140 dev16Func dev16_3dne;
141 dev16Func dev16_sse2;
142 dev16Func dev16_sse3;
143 sad16vFunc sad16v_xmm;
144 sad16vFunc sad16v_mmx;
145 #endif
146 
147 #ifdef ARCH_IS_IA64
148 dev16Func dev16_ia64;
149 #endif
150 
151 #ifdef ARCH_IS_PPC
152 dev16Func dev16_altivec_c;
153 #endif
154 
155 /* This function assumes blocks use 16bit signed elements */
156 typedef uint32_t (sse8Func_16bit)(const int16_t * cur,
157 								  const int16_t * ref,
158 								  const uint32_t stride);
159 typedef sse8Func_16bit *sse8Func_16bitPtr;
160 extern sse8Func_16bitPtr sse8_16bit;
161 
162 sse8Func_16bit sse8_16bit_c;
163 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
164 sse8Func_16bit sse8_16bit_mmx;
165 #endif
166 
167 #ifdef ARCH_IS_PPC
168 sse8Func_16bit sse8_16bit_altivec_c;
169 #endif
170 
171 /* This function assumes blocks use 8bit *un*signed elements */
172 typedef uint32_t (sse8Func_8bit)(const uint8_t * cur,
173 								 const uint8_t * ref,
174 								 const uint32_t stride);
175 typedef sse8Func_8bit *sse8Func_8bitPtr;
176 extern sse8Func_8bitPtr sse8_8bit;
177 
178 sse8Func_8bit sse8_8bit_c;
179 
180 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
181 sse8Func_8bit sse8_8bit_mmx;
182 #endif
183 
184 typedef uint32_t (sseh8Func_16bit)(const int16_t * cur,
185 								   const int16_t * ref,
186 								   uint16_t mask);
187 typedef sseh8Func_16bit *sseh8Func_16bitPtr;
188 extern sseh8Func_16bitPtr sseh8_16bit;
189 
190 sseh8Func_16bit sseh8_16bit_c;
191 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
192 sseh8Func_16bit sseh8_16bit_sse2;
193 #endif
194 
195 typedef uint32_t (coeff8_energyFunc)(const int16_t * cur);
196 typedef coeff8_energyFunc *coeff8_energyFunc_Ptr;
197 extern coeff8_energyFunc_Ptr coeff8_energy;
198 
199 coeff8_energyFunc coeff8_energy_c;
200 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
201 coeff8_energyFunc coeff8_energy_sse2;
202 #endif
203 
204 typedef uint32_t (blocksum8Func)(const uint8_t * cur, int stride,
205 								 uint16_t sums[4], uint32_t squares[4]);
206 typedef blocksum8Func *blocksum8Func_Ptr;
207 extern blocksum8Func_Ptr blocksum8;
208 
209 blocksum8Func blocksum8_c;
210 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
211 blocksum8Func blocksum8_sse2;
212 #endif
213 
214 /* Coeffs for MSE_H calculation */
215 
216 static const int16_t Inv_iMask_Coeff[64] = {
217        0,   155,   128,   328,   737,  2048,  3329,  4763,
218      184,   184,   251,   462,   865,  4306,  4608,  3872,
219      251,   216,   328,   737,  2048,  4159,  6094,  4014,
220      251,   370,   620,  1076,  3329,  9688,  8192,  4920,
221      415,   620,  1752,  4014,  5919, 15207, 13579,  7589,
222      737,  1568,  3872,  5243,  8398, 13844, 16345, 10834,
223     3073,  5243,  7787,  9688, 13579, 18741, 18433, 13057,
224     6636, 10834, 11552, 12294, 16056, 12800, 13579, 12545
225 };
226 
227 static const uint16_t iCSF_Coeff[64] = {
228 	26353, 38331, 42164, 26353, 17568, 10541, 8268, 6912,
229 	35137, 35137, 30117, 22192, 16217,  7270, 7027, 7666,
230 	30117, 32434, 26353, 17568, 10541,  7397, 6111, 7529,
231 	30117, 24803, 19166, 14539,  8268,  4846, 5271, 6801,
232 	23425, 19166, 11396,  7529,  6201,  3868, 4094, 5476,
233 	17568, 12047,  7666,  6588,  5205,  4054, 3731, 4583,
234 	 8605,  6588,  5406,  4846,  4094,  3485, 3514, 4175,
235 	 5856,  4583,  4438,  4302,  3765,  4216, 4094, 4259
236 };
237 
238 static const uint16_t iCSF_Round[64] = {
239 	1, 1, 1, 1, 2, 3, 4, 5,
240 	1, 1, 1, 1, 2, 5, 5, 4,
241 	1, 1, 1, 2, 3, 4, 5, 4,
242 	1, 1, 2, 2, 4, 7, 6, 5,
243 	1, 2, 3, 4, 5, 8, 8, 6,
244 	2, 3, 4, 5, 6, 8, 9, 7,
245 	4, 5, 6, 7, 8, 9, 9, 8,
246 	6, 7, 7, 8, 9, 8, 8, 8
247 };
248 
249 #endif							/* _ENCODER_SAD_H_ */
250