1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - QPel interpolation -
5 *
6 * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
7 *
8 * This program is free software ; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation ; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program ; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * $Id: qpel.c 1985 2011-05-18 09:02:35Z Isibaar $
23 *
24 ****************************************************************************/
25
26 #ifndef XVID_AUTO_INCLUDE
27
28 #include <stdio.h>
29
30 #include "../portab.h"
31 #include "qpel.h"
32
33 /* Quarterpel FIR definition
34 ****************************************************************************/
35
36 static const int32_t FIR_Tab_8[9][8] = {
37 { 14, -3, 2, -1, 0, 0, 0, 0 },
38 { 23, 19, -6, 3, -1, 0, 0, 0 },
39 { -7, 20, 20, -6, 3, -1, 0, 0 },
40 { 3, -6, 20, 20, -6, 3, -1, 0 },
41 { -1, 3, -6, 20, 20, -6, 3, -1 },
42 { 0, -1, 3, -6, 20, 20, -6, 3 },
43 { 0, 0, -1, 3, -6, 20, 20, -7 },
44 { 0, 0, 0, -1, 3, -6, 19, 23 },
45 { 0, 0, 0, 0, -1, 2, -3, 14 }
46 };
47
48 static const int32_t FIR_Tab_16[17][16] = {
49 { 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50 { 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51 { -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52 { 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53 { -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 },
54 { 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 },
55 { 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
56 { 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 },
57 { 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 },
58 { 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 },
59 { 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 },
60 { 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 },
61 { 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 },
62 { 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 },
63 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 },
64 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 },
65 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 }
66 };
67
68 /* Implementation
69 ****************************************************************************/
70
71 #define XVID_AUTO_INCLUDE
72 /* First auto include this file to generate reference code for SIMD versions
73 * This set of functions are good for educational purpose, because they're
74 * straightforward to understand, use loops and so on... But obviously they
75 * sux when it comes to speed */
76 #define REFERENCE_CODE
77
78 /* 16x? filters */
79
80 #define SIZE 16
81 #define TABLE FIR_Tab_16
82
83 #define STORE(d,s) (d) = (s)
84 #define FUNC_H H_Pass_16_C_ref
85 #define FUNC_V V_Pass_16_C_ref
86 #define FUNC_HA H_Pass_Avrg_16_C_ref
87 #define FUNC_VA V_Pass_Avrg_16_C_ref
88 #define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref
89 #define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref
90
91 #include "qpel.c" /* self-include ourself */
92
93 /* note: B-frame always uses Rnd=0... */
94 #define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
95 #define FUNC_H H_Pass_16_Add_C_ref
96 #define FUNC_V V_Pass_16_Add_C_ref
97 #define FUNC_HA H_Pass_Avrg_16_Add_C_ref
98 #define FUNC_VA V_Pass_Avrg_16_Add_C_ref
99 #define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref
100 #define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref
101
102 #include "qpel.c" /* self-include ourself */
103
104 #undef SIZE
105 #undef TABLE
106
107 /* 8x? filters */
108
109 #define SIZE 8
110 #define TABLE FIR_Tab_8
111
112 #define STORE(d,s) (d) = (s)
113 #define FUNC_H H_Pass_8_C_ref
114 #define FUNC_V V_Pass_8_C_ref
115 #define FUNC_HA H_Pass_Avrg_8_C_ref
116 #define FUNC_VA V_Pass_Avrg_8_C_ref
117 #define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref
118 #define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref
119
120 #include "qpel.c" /* self-include ourself */
121
122 /* note: B-frame always uses Rnd=0... */
123 #define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
124 #define FUNC_H H_Pass_8_Add_C_ref
125 #define FUNC_V V_Pass_8_Add_C_ref
126 #define FUNC_HA H_Pass_Avrg_8_Add_C_ref
127 #define FUNC_VA V_Pass_Avrg_8_Add_C_ref
128 #define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref
129 #define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref
130
131 #include "qpel.c" /* self-include ourself */
132
133 #undef SIZE
134 #undef TABLE
135
136 /* Then we define more optimized C version where loops are unrolled, where
137 * FIR coeffcients are not read from memory but are hardcoded in instructions
138 * They should be faster */
139 #undef REFERENCE_CODE
140
141 /* 16x? filters */
142
143 #define SIZE 16
144
145 #define STORE(d,s) (d) = (s)
146 #define FUNC_H H_Pass_16_C
147 #define FUNC_V V_Pass_16_C
148 #define FUNC_HA H_Pass_Avrg_16_C
149 #define FUNC_VA V_Pass_Avrg_16_C
150 #define FUNC_HA_UP H_Pass_Avrg_Up_16_C
151 #define FUNC_VA_UP V_Pass_Avrg_Up_16_C
152
153 #include "qpel.c" /* self-include ourself */
154
155 /* note: B-frame always uses Rnd=0... */
156 #define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
157 #define FUNC_H H_Pass_16_Add_C
158 #define FUNC_V V_Pass_16_Add_C
159 #define FUNC_HA H_Pass_Avrg_16_Add_C
160 #define FUNC_VA V_Pass_Avrg_16_Add_C
161 #define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C
162 #define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C
163
164 #include "qpel.c" /* self-include ourself */
165
166 #undef SIZE
167 #undef TABLE
168
169 /* 8x? filters */
170
171 #define SIZE 8
172 #define TABLE FIR_Tab_8
173
174 #define STORE(d,s) (d) = (s)
175 #define FUNC_H H_Pass_8_C
176 #define FUNC_V V_Pass_8_C
177 #define FUNC_HA H_Pass_Avrg_8_C
178 #define FUNC_VA V_Pass_Avrg_8_C
179 #define FUNC_HA_UP H_Pass_Avrg_Up_8_C
180 #define FUNC_VA_UP V_Pass_Avrg_Up_8_C
181
182 #include "qpel.c" /* self-include ourself */
183
184 /* note: B-frame always uses Rnd=0... */
185 #define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
186 #define FUNC_H H_Pass_8_Add_C
187 #define FUNC_V V_Pass_8_Add_C
188 #define FUNC_HA H_Pass_Avrg_8_Add_C
189 #define FUNC_VA V_Pass_Avrg_8_Add_C
190 #define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C
191 #define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C
192
193 #include "qpel.c" /* self-include ourself */
194
195 #undef SIZE
196 #undef TABLE
197 #undef XVID_AUTO_INCLUDE
198
199 /* Global scope hooks
200 ****************************************************************************/
201
202 XVID_QP_FUNCS *xvid_QP_Funcs = NULL;
203 XVID_QP_FUNCS *xvid_QP_Add_Funcs = NULL;
204
205 /* Reference plain C impl. declaration
206 ****************************************************************************/
207
208 XVID_QP_FUNCS xvid_QP_Funcs_C_ref = {
209 H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref,
210 V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref,
211
212 H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref,
213 V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref
214 };
215
216 XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = {
217 H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref,
218 V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref,
219
220 H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref,
221 V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref
222 };
223
224 /* Plain C impl. declaration (faster than ref one)
225 ****************************************************************************/
226
227 XVID_QP_FUNCS xvid_QP_Funcs_C = {
228 H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C,
229 V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C,
230
231 H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C,
232 V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C
233 };
234
235 XVID_QP_FUNCS xvid_QP_Add_Funcs_C = {
236 H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C,
237 V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C,
238
239 H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C,
240 V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C
241 };
242
243 /* mmx impl. declaration (see. qpel_mmx.asm
244 ****************************************************************************/
245
246 #if defined (ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
247 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx);
248 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx);
249 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx);
250 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_mmx);
251 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_mmx);
252 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_mmx);
253
254 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_mmx);
255 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_mmx);
256 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_mmx);
257 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_mmx);
258 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx);
259 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx);
260
261 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx);
262 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx);
263 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx);
264 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_mmx);
265 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_mmx);
266 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_mmx);
267
268 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_mmx);
269 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_mmx);
270 extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_mmx);
271 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_mmx);
272 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx);
273 extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx);
274
275 XVID_QP_FUNCS xvid_QP_Funcs_mmx = {
276 xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx,
277 xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx,
278
279 xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx,
280 xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx
281 };
282
283 XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = {
284 xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx,
285 xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx,
286
287 xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx,
288 xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx,
289 };
290 #endif /* ARCH_IS_IA32 */
291
292
293 /* altivec impl. declaration (see qpel_altivec.c)
294 ****************************************************************************/
295
296 #ifdef ARCH_IS_PPC
297
298 extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Altivec_C);
299 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Altivec_C);
300 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Altivec_C);
301 extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Altivec_C);
302 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Altivec_C);
303 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Altivec_C);
304
305 extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Altivec_C);
306 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Altivec_C);
307 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Altivec_C);
308 extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Altivec_C);
309 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Altivec_C);
310 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Altivec_C);
311
312
313 extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Add_Altivec_C);
314 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Add_Altivec_C);
315 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Add_Altivec_C);
316 extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Add_Altivec_C);
317 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Add_Altivec_C);
318 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Add_Altivec_C);
319
320 extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Add_Altivec_C);
321 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Add_Altivec_C);
322 extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Add_Altivec_C);
323 extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Add_Altivec_C);
324 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Add_Altivec_C);
325 extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Add_Altivec_C);
326
327 XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C = {
328 H_Pass_16_Altivec_C, H_Pass_Avrg_16_Altivec_C, H_Pass_Avrg_Up_16_Altivec_C,
329 V_Pass_16_Altivec_C, V_Pass_Avrg_16_Altivec_C, V_Pass_Avrg_Up_16_Altivec_C,
330
331 H_Pass_8_Altivec_C, H_Pass_Avrg_8_Altivec_C, H_Pass_Avrg_Up_8_Altivec_C,
332 V_Pass_8_Altivec_C, V_Pass_Avrg_8_Altivec_C, V_Pass_Avrg_Up_8_Altivec_C
333 };
334
335 XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C = {
336 H_Pass_16_Add_Altivec_C, H_Pass_Avrg_16_Add_Altivec_C, H_Pass_Avrg_Up_16_Add_Altivec_C,
337 V_Pass_16_Add_Altivec_C, V_Pass_Avrg_16_Add_Altivec_C, V_Pass_Avrg_Up_16_Add_Altivec_C,
338
339 H_Pass_8_Add_Altivec_C, H_Pass_Avrg_8_Add_Altivec_C, H_Pass_Avrg_Up_8_Add_Altivec_C,
340 V_Pass_8_Add_Altivec_C, V_Pass_Avrg_8_Add_Altivec_C, V_Pass_Avrg_Up_8_Add_Altivec_C
341 };
342
343 #endif /* ARCH_IS_PPC */
344
345 /* tables for ASM
346 ****************************************************************************/
347
348
349 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
350 /* These symbols will be used outside this file, so tell the compiler
351 * they're global. */
352 extern uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
353
354 extern int16_t xvid_FIR_1_0_0_0[256][4];
355 extern int16_t xvid_FIR_3_1_0_0[256][4];
356 extern int16_t xvid_FIR_6_3_1_0[256][4];
357 extern int16_t xvid_FIR_14_3_2_1[256][4];
358 extern int16_t xvid_FIR_20_6_3_1[256][4];
359 extern int16_t xvid_FIR_20_20_6_3[256][4];
360 extern int16_t xvid_FIR_23_19_6_3[256][4];
361 extern int16_t xvid_FIR_7_20_20_6[256][4];
362 extern int16_t xvid_FIR_6_20_20_6[256][4];
363 extern int16_t xvid_FIR_6_20_20_7[256][4];
364 extern int16_t xvid_FIR_3_6_20_20[256][4];
365 extern int16_t xvid_FIR_3_6_19_23[256][4];
366 extern int16_t xvid_FIR_1_3_6_20[256][4];
367 extern int16_t xvid_FIR_1_2_3_14[256][4];
368 extern int16_t xvid_FIR_0_1_3_6[256][4];
369 extern int16_t xvid_FIR_0_0_1_3[256][4];
370 extern int16_t xvid_FIR_0_0_0_1[256][4];
371 #endif
372
373 /* Arrays definitions, according to the target platform */
374
375 #if !defined(ARCH_IS_X86_64) && !defined(ARCH_IS_IA32)
376 /* Only ia32/ia64 will use these tables outside this file so mark them
377 * static for all other archs */
378 #define __SCOPE static
379 __SCOPE int16_t xvid_FIR_1_0_0_0[256][4];
380 __SCOPE int16_t xvid_FIR_3_1_0_0[256][4];
381 __SCOPE int16_t xvid_FIR_6_3_1_0[256][4];
382 __SCOPE int16_t xvid_FIR_14_3_2_1[256][4];
383 __SCOPE int16_t xvid_FIR_20_6_3_1[256][4];
384 __SCOPE int16_t xvid_FIR_20_20_6_3[256][4];
385 __SCOPE int16_t xvid_FIR_23_19_6_3[256][4];
386 __SCOPE int16_t xvid_FIR_7_20_20_6[256][4];
387 __SCOPE int16_t xvid_FIR_6_20_20_6[256][4];
388 __SCOPE int16_t xvid_FIR_6_20_20_7[256][4];
389 __SCOPE int16_t xvid_FIR_3_6_20_20[256][4];
390 __SCOPE int16_t xvid_FIR_3_6_19_23[256][4];
391 __SCOPE int16_t xvid_FIR_1_3_6_20[256][4];
392 __SCOPE int16_t xvid_FIR_1_2_3_14[256][4];
393 __SCOPE int16_t xvid_FIR_0_1_3_6[256][4];
394 __SCOPE int16_t xvid_FIR_0_0_1_3[256][4];
395 __SCOPE int16_t xvid_FIR_0_0_0_1[256][4];
396 #endif
397
Init_FIR_Table(int16_t Tab[][4],int A,int B,int C,int D)398 static void Init_FIR_Table(int16_t Tab[][4],
399 int A, int B, int C, int D)
400 {
401 int i;
402 for(i=0; i<256; ++i) {
403 Tab[i][0] = i*A;
404 Tab[i][1] = i*B;
405 Tab[i][2] = i*C;
406 Tab[i][3] = i*D;
407 }
408 }
409
410
xvid_Init_QP(void)411 void xvid_Init_QP(void)
412 {
413 #if defined (ARCH_IS_IA32) || defined (ARCH_IS_X86_64)
414 int i;
415
416 for(i=0; i<256; ++i) {
417 xvid_Expand_mmx[i][0] = i;
418 xvid_Expand_mmx[i][1] = i;
419 xvid_Expand_mmx[i][2] = i;
420 xvid_Expand_mmx[i][3] = i;
421 }
422 #endif
423
424 /* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
425
426 Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0);
427 Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0);
428 Init_FIR_Table(xvid_FIR_6_3_1_0, -6, 3, -1, 0);
429 Init_FIR_Table(xvid_FIR_14_3_2_1, 14, -3, 2, -1);
430 Init_FIR_Table(xvid_FIR_20_6_3_1, 20, -6, 3, -1);
431 Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6, 3);
432 Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6, 3);
433 Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
434 Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
435 Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
436 Init_FIR_Table(xvid_FIR_3_6_20_20, 3, -6, 20, 20);
437 Init_FIR_Table(xvid_FIR_3_6_19_23, 3, -6, 19, 23);
438 Init_FIR_Table(xvid_FIR_1_3_6_20, -1, 3, -6, 20);
439 Init_FIR_Table(xvid_FIR_1_2_3_14, -1, 2, -3, 14);
440 Init_FIR_Table(xvid_FIR_0_1_3_6, 0, -1, 3, -6);
441 Init_FIR_Table(xvid_FIR_0_0_1_3, 0, 0, -1, 3);
442 Init_FIR_Table(xvid_FIR_0_0_0_1, 0, 0, 0, -1);
443
444 }
445
446 #endif /* !XVID_AUTO_INCLUDE */
447
448 #if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE)
449
450 /*****************************************************************************
451 * "reference" filters impl. in plain C
452 ****************************************************************************/
453
454 static
FUNC_H(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t Rnd)455 void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
456 {
457 while(H-->0) {
458 int32_t i, k;
459 int32_t Sums[SIZE] = { 0 };
460 for(i=0; i<=SIZE; ++i)
461 for(k=0; k<SIZE; ++k)
462 Sums[k] += TABLE[i][k] * Src[i];
463
464 for(i=0; i<SIZE; ++i) {
465 int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
466 if (C<0) C = 0; else if (C>255) C = 255;
467 STORE(Dst[i], C);
468 }
469 Src += BpS;
470 Dst += BpS;
471 }
472 }
473
474 static
FUNC_V(uint8_t * Dst,const uint8_t * Src,int32_t W,int32_t BpS,int32_t Rnd)475 void FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
476 {
477 while(W-->0) {
478 int32_t i, k;
479 int32_t Sums[SIZE] = { 0 };
480 const uint8_t *S = Src++;
481 uint8_t *D = Dst++;
482 for(i=0; i<=SIZE; ++i) {
483 for(k=0; k<SIZE; ++k)
484 Sums[k] += TABLE[i][k] * S[0];
485 S += BpS;
486 }
487
488 for(i=0; i<SIZE; ++i) {
489 int32_t C = ( Sums[i] + 16-Rnd )>>5;
490 if (C<0) C = 0; else if (C>255) C = 255;
491 STORE(D[0], C);
492 D += BpS;
493 }
494 }
495 }
496
497 static
FUNC_HA(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t Rnd)498 void FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
499 {
500 while(H-->0) {
501 int32_t i, k;
502 int32_t Sums[SIZE] = { 0 };
503 for(i=0; i<=SIZE; ++i)
504 for(k=0; k<SIZE; ++k)
505 Sums[k] += TABLE[i][k] * Src[i];
506
507 for(i=0; i<SIZE; ++i) {
508 int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
509 if (C<0) C = 0; else if (C>255) C = 255;
510 C = (C+Src[i]+1-Rnd) >> 1;
511 STORE(Dst[i], C);
512 }
513 Src += BpS;
514 Dst += BpS;
515 }
516 }
517
518 static
FUNC_HA_UP(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t Rnd)519 void FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
520 {
521 while(H-->0) {
522 int32_t i, k;
523 int32_t Sums[SIZE] = { 0 };
524 for(i=0; i<=SIZE; ++i)
525 for(k=0; k<SIZE; ++k)
526 Sums[k] += TABLE[i][k] * Src[i];
527
528 for(i=0; i<SIZE; ++i) {
529 int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
530 if (C<0) C = 0; else if (C>255) C = 255;
531 C = (C+Src[i+1]+1-Rnd) >> 1;
532 STORE(Dst[i], C);
533 }
534 Src += BpS;
535 Dst += BpS;
536 }
537 }
538
539 static
FUNC_VA(uint8_t * Dst,const uint8_t * Src,int32_t W,int32_t BpS,int32_t Rnd)540 void FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
541 {
542 while(W-->0) {
543 int32_t i, k;
544 int32_t Sums[SIZE] = { 0 };
545 const uint8_t *S = Src;
546 uint8_t *D = Dst;
547
548 for(i=0; i<=SIZE; ++i) {
549 for(k=0; k<SIZE; ++k)
550 Sums[k] += TABLE[i][k] * S[0];
551 S += BpS;
552 }
553
554 S = Src;
555 for(i=0; i<SIZE; ++i) {
556 int32_t C = ( Sums[i] + 16-Rnd )>>5;
557 if (C<0) C = 0; else if (C>255) C = 255;
558 C = ( C+S[0]+1-Rnd ) >> 1;
559 STORE(D[0], C);
560 D += BpS;
561 S += BpS;
562 }
563 Src++;
564 Dst++;
565 }
566 }
567
568 static
FUNC_VA_UP(uint8_t * Dst,const uint8_t * Src,int32_t W,int32_t BpS,int32_t Rnd)569 void FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
570 {
571 while(W-->0) {
572 int32_t i, k;
573 int32_t Sums[SIZE] = { 0 };
574 const uint8_t *S = Src;
575 uint8_t *D = Dst;
576
577 for(i=0; i<=SIZE; ++i) {
578 for(k=0; k<SIZE; ++k)
579 Sums[k] += TABLE[i][k] * S[0];
580 S += BpS;
581 }
582
583 S = Src + BpS;
584 for(i=0; i<SIZE; ++i) {
585 int32_t C = ( Sums[i] + 16-Rnd )>>5;
586 if (C<0) C = 0; else if (C>255) C = 255;
587 C = ( C+S[0]+1-Rnd ) >> 1;
588 STORE(D[0], C);
589 D += BpS;
590 S += BpS;
591 }
592 Dst++;
593 Src++;
594 }
595 }
596
597 #undef STORE
598 #undef FUNC_H
599 #undef FUNC_V
600 #undef FUNC_HA
601 #undef FUNC_VA
602 #undef FUNC_HA_UP
603 #undef FUNC_VA_UP
604
605 #elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE)
606
607 /*****************************************************************************
608 * "fast" filters impl. in plain C
609 ****************************************************************************/
610
611 #define CLIP_STORE(D,C) \
612 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
613 STORE(D, C)
614
615 static void
FUNC_H(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)616 FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
617 {
618 #if (SIZE==16)
619 while(H-->0) {
620 int C;
621 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
622 CLIP_STORE(Dst[ 0],C);
623 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
624 CLIP_STORE(Dst[ 1],C);
625 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
626 CLIP_STORE(Dst[ 2],C);
627 C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
628 CLIP_STORE(Dst[ 3],C);
629 C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
630 CLIP_STORE(Dst[ 4],C);
631 C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
632 CLIP_STORE(Dst[ 5],C);
633 C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
634 CLIP_STORE(Dst[ 6],C);
635 C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
636 CLIP_STORE(Dst[ 7],C);
637 C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
638 CLIP_STORE(Dst[ 8],C);
639 C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
640 CLIP_STORE(Dst[ 9],C);
641 C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
642 CLIP_STORE(Dst[10],C);
643 C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
644 CLIP_STORE(Dst[11],C);
645 C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
646 CLIP_STORE(Dst[12],C);
647 C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
648 CLIP_STORE(Dst[13],C);
649 C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
650 CLIP_STORE(Dst[14],C);
651 C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
652 CLIP_STORE(Dst[15],C);
653 Src += BpS;
654 Dst += BpS;
655 }
656 #else
657 while(H-->0) {
658 int C;
659 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
660 CLIP_STORE(Dst[0],C);
661 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
662 CLIP_STORE(Dst[1],C);
663 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
664 CLIP_STORE(Dst[2],C);
665 C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
666 CLIP_STORE(Dst[3],C);
667 C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
668 CLIP_STORE(Dst[4],C);
669 C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
670 CLIP_STORE(Dst[5],C);
671 C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
672 CLIP_STORE(Dst[6],C);
673 C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
674 CLIP_STORE(Dst[7],C);
675 Src += BpS;
676 Dst += BpS;
677 }
678 #endif
679 }
680 #undef CLIP_STORE
681
682 #define CLIP_STORE(i,C) \
683 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
684 C = (C+Src[i]+1-RND) >> 1; \
685 STORE(Dst[i], C)
686
687 static void
FUNC_HA(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)688 FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
689 {
690 #if (SIZE==16)
691 while(H-->0) {
692 int C;
693 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
694 CLIP_STORE(0,C);
695 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
696 CLIP_STORE( 1,C);
697 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
698 CLIP_STORE( 2,C);
699 C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
700 CLIP_STORE( 3,C);
701 C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
702 CLIP_STORE( 4,C);
703 C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
704 CLIP_STORE( 5,C);
705 C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
706 CLIP_STORE( 6,C);
707 C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
708 CLIP_STORE( 7,C);
709 C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
710 CLIP_STORE( 8,C);
711 C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
712 CLIP_STORE( 9,C);
713 C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
714 CLIP_STORE(10,C);
715 C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
716 CLIP_STORE(11,C);
717 C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
718 CLIP_STORE(12,C);
719 C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
720 CLIP_STORE(13,C);
721 C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
722 CLIP_STORE(14,C);
723 C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
724 CLIP_STORE(15,C);
725 Src += BpS;
726 Dst += BpS;
727 }
728 #else
729 while(H-->0) {
730 int C;
731 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
732 CLIP_STORE(0,C);
733 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
734 CLIP_STORE(1,C);
735 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
736 CLIP_STORE(2,C);
737 C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
738 CLIP_STORE(3,C);
739 C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
740 CLIP_STORE(4,C);
741 C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
742 CLIP_STORE(5,C);
743 C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
744 CLIP_STORE(6,C);
745 C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
746 CLIP_STORE(7,C);
747 Src += BpS;
748 Dst += BpS;
749 }
750 #endif
751 }
752 #undef CLIP_STORE
753
754 #define CLIP_STORE(i,C) \
755 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
756 C = (C+Src[i+1]+1-RND) >> 1; \
757 STORE(Dst[i], C)
758
759 static void
FUNC_HA_UP(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)760 FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
761 {
762 #if (SIZE==16)
763 while(H-->0) {
764 int C;
765 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
766 CLIP_STORE(0,C);
767 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
768 CLIP_STORE( 1,C);
769 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
770 CLIP_STORE( 2,C);
771 C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
772 CLIP_STORE( 3,C);
773 C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
774 CLIP_STORE( 4,C);
775 C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
776 CLIP_STORE( 5,C);
777 C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
778 CLIP_STORE( 6,C);
779 C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
780 CLIP_STORE( 7,C);
781 C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
782 CLIP_STORE( 8,C);
783 C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
784 CLIP_STORE( 9,C);
785 C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
786 CLIP_STORE(10,C);
787 C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
788 CLIP_STORE(11,C);
789 C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
790 CLIP_STORE(12,C);
791 C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
792 CLIP_STORE(13,C);
793 C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
794 CLIP_STORE(14,C);
795 C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
796 CLIP_STORE(15,C);
797 Src += BpS;
798 Dst += BpS;
799 }
800 #else
801 while(H-->0) {
802 int C;
803 C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
804 CLIP_STORE(0,C);
805 C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
806 CLIP_STORE(1,C);
807 C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
808 CLIP_STORE(2,C);
809 C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
810 CLIP_STORE(3,C);
811 C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
812 CLIP_STORE(4,C);
813 C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
814 CLIP_STORE(5,C);
815 C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
816 CLIP_STORE(6,C);
817 C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
818 CLIP_STORE(7,C);
819 Src += BpS;
820 Dst += BpS;
821 }
822 #endif
823 }
824 #undef CLIP_STORE
825
826 //////////////////////////////////////////////////////////
827 // vertical passes
828 //////////////////////////////////////////////////////////
829 // Note: for vertical passes, width (W) needs only be 8 or 16.
830
831 #define CLIP_STORE(D,C) \
832 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
833 STORE(D, C)
834
835 static void
FUNC_V(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)836 FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
837 {
838 #if (SIZE==16)
839 while(H-->0) {
840 int C;
841 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
842 CLIP_STORE(Dst[BpS* 0],C);
843 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
844 CLIP_STORE(Dst[BpS* 1],C);
845 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
846 CLIP_STORE(Dst[BpS* 2],C);
847 C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
848 CLIP_STORE(Dst[BpS* 3],C);
849 C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
850 CLIP_STORE(Dst[BpS* 4],C);
851 C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
852 CLIP_STORE(Dst[BpS* 5],C);
853 C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
854 CLIP_STORE(Dst[BpS* 6],C);
855 C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
856 CLIP_STORE(Dst[BpS* 7],C);
857 C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
858 CLIP_STORE(Dst[BpS* 8],C);
859 C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
860 CLIP_STORE(Dst[BpS* 9],C);
861 C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
862 CLIP_STORE(Dst[BpS*10],C);
863 C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
864 CLIP_STORE(Dst[BpS*11],C);
865 C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
866 CLIP_STORE(Dst[BpS*12],C);
867 C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
868 CLIP_STORE(Dst[BpS*13],C);
869 C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
870 CLIP_STORE(Dst[BpS*14],C);
871 C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
872 CLIP_STORE(Dst[BpS*15],C);
873 Src += 1;
874 Dst += 1;
875 }
876 #else
877 while(H-->0) {
878 int C;
879 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
880 CLIP_STORE(Dst[BpS*0],C);
881 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
882 CLIP_STORE(Dst[BpS*1],C);
883 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
884 CLIP_STORE(Dst[BpS*2],C);
885 C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
886 CLIP_STORE(Dst[BpS*3],C);
887 C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
888 CLIP_STORE(Dst[BpS*4],C);
889 C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
890 CLIP_STORE(Dst[BpS*5],C);
891 C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
892 CLIP_STORE(Dst[BpS*6],C);
893 C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
894 CLIP_STORE(Dst[BpS*7],C);
895 Src += 1;
896 Dst += 1;
897 }
898 #endif
899 }
900 #undef CLIP_STORE
901
902 #define CLIP_STORE(i,C) \
903 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
904 C = (C+Src[BpS*i]+1-RND) >> 1; \
905 STORE(Dst[BpS*i], C)
906
907 static void
FUNC_VA(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)908 FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
909 {
910 #if (SIZE==16)
911 while(H-->0) {
912 int C;
913 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
914 CLIP_STORE(0,C);
915 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
916 CLIP_STORE( 1,C);
917 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
918 CLIP_STORE( 2,C);
919 C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
920 CLIP_STORE( 3,C);
921 C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
922 CLIP_STORE( 4,C);
923 C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
924 CLIP_STORE( 5,C);
925 C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
926 CLIP_STORE( 6,C);
927 C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
928 CLIP_STORE( 7,C);
929 C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
930 CLIP_STORE( 8,C);
931 C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
932 CLIP_STORE( 9,C);
933 C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
934 CLIP_STORE(10,C);
935 C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
936 CLIP_STORE(11,C);
937 C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
938 CLIP_STORE(12,C);
939 C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
940 CLIP_STORE(13,C);
941 C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
942 CLIP_STORE(14,C);
943 C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
944 CLIP_STORE(15,C);
945 Src += 1;
946 Dst += 1;
947 }
948 #else
949 while(H-->0) {
950 int C;
951 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
952 CLIP_STORE(0,C);
953 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
954 CLIP_STORE(1,C);
955 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
956 CLIP_STORE(2,C);
957 C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
958 CLIP_STORE(3,C);
959 C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
960 CLIP_STORE(4,C);
961 C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
962 CLIP_STORE(5,C);
963 C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
964 CLIP_STORE(6,C);
965 C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
966 CLIP_STORE(7,C);
967 Src += 1;
968 Dst += 1;
969 }
970 #endif
971 }
972 #undef CLIP_STORE
973
974 #define CLIP_STORE(i,C) \
975 if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
976 C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \
977 STORE(Dst[BpS*i], C)
978
979 static void
FUNC_VA_UP(uint8_t * Dst,const uint8_t * Src,int32_t H,int32_t BpS,int32_t RND)980 FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
981 {
982 #if (SIZE==16)
983 while(H-->0) {
984 int C;
985 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
986 CLIP_STORE(0,C);
987 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
988 CLIP_STORE( 1,C);
989 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
990 CLIP_STORE( 2,C);
991 C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
992 CLIP_STORE( 3,C);
993 C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
994 CLIP_STORE( 4,C);
995 C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
996 CLIP_STORE( 5,C);
997 C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
998 CLIP_STORE( 6,C);
999 C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
1000 CLIP_STORE( 7,C);
1001 C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
1002 CLIP_STORE( 8,C);
1003 C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
1004 CLIP_STORE( 9,C);
1005 C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
1006 CLIP_STORE(10,C);
1007 C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
1008 CLIP_STORE(11,C);
1009 C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
1010 CLIP_STORE(12,C);
1011 C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
1012 CLIP_STORE(13,C);
1013 C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
1014 CLIP_STORE(14,C);
1015 C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
1016 CLIP_STORE(15,C);
1017 Src += 1;
1018 Dst += 1;
1019 }
1020 #else
1021 while(H-->0) {
1022 int C;
1023 C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] - Src[BpS*4];
1024 CLIP_STORE(0,C);
1025 C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
1026 CLIP_STORE(1,C);
1027 C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
1028 CLIP_STORE(2,C);
1029 C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
1030 CLIP_STORE(3,C);
1031 C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
1032 CLIP_STORE(4,C);
1033 C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
1034 CLIP_STORE(5,C);
1035 C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
1036 CLIP_STORE(6,C);
1037 C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
1038 CLIP_STORE(7,C);
1039 Src += 1;
1040 Dst += 1;
1041 }
1042 #endif
1043 }
1044 #undef CLIP_STORE
1045
1046 #undef STORE
1047 #undef FUNC_H
1048 #undef FUNC_V
1049 #undef FUNC_HA
1050 #undef FUNC_VA
1051 #undef FUNC_HA_UP
1052 #undef FUNC_VA_UP
1053
1054
1055 #endif /* XVID_AUTO_INCLUDE && !defined(REF) */
1056