1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Interpolation related header -
5 *
6 * Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
7 *
8 * This program is free software ; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation ; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program ; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * $Id: interpolate8x8.h 1985 2011-05-18 09:02:35Z Isibaar $
23 *
24 ****************************************************************************/
25
26 #ifndef _INTERPOLATE8X8_H_
27 #define _INTERPOLATE8X8_H_
28
29 #include "../utils/mem_transfer.h"
30
31 typedef void (INTERPOLATE8X8) (uint8_t * const dst,
32 const uint8_t * const src,
33 const uint32_t stride,
34 const uint32_t rounding);
35 typedef INTERPOLATE8X8 *INTERPOLATE8X8_PTR;
36
37 typedef void (INTERPOLATE8X4) (uint8_t * const dst,
38 const uint8_t * const src,
39 const uint32_t stride,
40 const uint32_t rounding);
41 typedef INTERPOLATE8X4 *INTERPOLATE8X4_PTR;
42
43 typedef void (INTERPOLATE8X8_AVG2) (uint8_t *dst,
44 const uint8_t *src1,
45 const uint8_t *src2,
46 const uint32_t stride,
47 const uint32_t rounding,
48 const uint32_t height);
49 typedef INTERPOLATE8X8_AVG2 *INTERPOLATE8X8_AVG2_PTR;
50
51 typedef void (INTERPOLATE8X8_AVG4) (uint8_t *dst,
52 const uint8_t *src1,
53 const uint8_t *src2,
54 const uint8_t *src3,
55 const uint8_t *src4,
56 const uint32_t stride,
57 const uint32_t rounding);
58 typedef INTERPOLATE8X8_AVG4 *INTERPOLATE8X8_AVG4_PTR;
59
60 typedef void (INTERPOLATE_LOWPASS) (uint8_t *dst,
61 uint8_t *src,
62 int32_t stride,
63 int32_t rounding);
64
65 typedef INTERPOLATE_LOWPASS *INTERPOLATE_LOWPASS_PTR;
66
67 typedef void (INTERPOLATE_LOWPASS_HV) (uint8_t *dst1,
68 uint8_t *dst2,
69 uint8_t *src,
70 int32_t stride,
71 int32_t rounding);
72
73 typedef INTERPOLATE_LOWPASS_HV *INTERPOLATE_LOWPASS_HV_PTR;
74
75 typedef void (INTERPOLATE8X8_6TAP_LOWPASS) (uint8_t *dst,
76 uint8_t *src,
77 int32_t stride,
78 int32_t rounding);
79
80 typedef INTERPOLATE8X8_6TAP_LOWPASS *INTERPOLATE8X8_6TAP_LOWPASS_PTR;
81
82 /* These function do: dst = interpolate(src) */
83 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
84 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
85 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
86
87 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_h;
88 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_v;
89 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_hv;
90
91 /* These functions do: dst = (dst+interpolate(src) + 1)/2
92 * Suitable for direct/interpolated bvop prediction block
93 * building w/o the need for intermediate interpolated result
94 * storing/reading
95 * NB: the rounding applies to the interpolation, but not
96 * the averaging step which will always use rounding=0 */
97 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_add;
98 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_h_add;
99 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_v_add;
100 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv_add;
101
102 extern INTERPOLATE8X8_AVG2_PTR interpolate8x8_avg2;
103 extern INTERPOLATE8X8_AVG4_PTR interpolate8x8_avg4;
104
105 extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_h;
106 extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_v;
107
108 extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_h;
109 extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_v;
110
111 extern INTERPOLATE_LOWPASS_HV_PTR interpolate8x8_lowpass_hv;
112 extern INTERPOLATE_LOWPASS_HV_PTR interpolate16x16_lowpass_hv;
113
114 extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_h;
115 extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_v;
116
117 INTERPOLATE8X8 interpolate8x8_halfpel_h_c;
118 INTERPOLATE8X8 interpolate8x8_halfpel_v_c;
119 INTERPOLATE8X8 interpolate8x8_halfpel_hv_c;
120
121 INTERPOLATE8X4 interpolate8x4_halfpel_h_c;
122 INTERPOLATE8X4 interpolate8x4_halfpel_v_c;
123 INTERPOLATE8X4 interpolate8x4_halfpel_hv_c;
124
125 INTERPOLATE8X8 interpolate8x8_halfpel_add_c;
126 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_c;
127 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_c;
128 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_c;
129
130 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
131 INTERPOLATE8X8 interpolate8x8_halfpel_h_mmx;
132 INTERPOLATE8X8 interpolate8x8_halfpel_v_mmx;
133 INTERPOLATE8X8 interpolate8x8_halfpel_hv_mmx;
134
135 INTERPOLATE8X4 interpolate8x4_halfpel_h_mmx;
136 INTERPOLATE8X4 interpolate8x4_halfpel_v_mmx;
137 INTERPOLATE8X4 interpolate8x4_halfpel_hv_mmx;
138
139 INTERPOLATE8X8 interpolate8x8_halfpel_add_mmx;
140 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_mmx;
141 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_mmx;
142 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_mmx;
143
144 INTERPOLATE8X8 interpolate8x8_halfpel_h_xmm;
145 INTERPOLATE8X8 interpolate8x8_halfpel_v_xmm;
146 INTERPOLATE8X8 interpolate8x8_halfpel_hv_xmm;
147
148 INTERPOLATE8X4 interpolate8x4_halfpel_h_xmm;
149 INTERPOLATE8X4 interpolate8x4_halfpel_v_xmm;
150 INTERPOLATE8X4 interpolate8x4_halfpel_hv_xmm;
151
152 INTERPOLATE8X8 interpolate8x8_halfpel_add_xmm;
153 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_xmm;
154 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_xmm;
155 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_xmm;
156
157 INTERPOLATE8X8 interpolate8x8_halfpel_h_3dn;
158 INTERPOLATE8X8 interpolate8x8_halfpel_v_3dn;
159 INTERPOLATE8X8 interpolate8x8_halfpel_hv_3dn;
160
161 INTERPOLATE8X4 interpolate8x4_halfpel_h_3dn;
162 INTERPOLATE8X4 interpolate8x4_halfpel_v_3dn;
163 INTERPOLATE8X4 interpolate8x4_halfpel_hv_3dn;
164
165 INTERPOLATE8X8 interpolate8x8_halfpel_h_3dne;
166 INTERPOLATE8X8 interpolate8x8_halfpel_v_3dne;
167 INTERPOLATE8X8 interpolate8x8_halfpel_hv_3dne;
168
169 INTERPOLATE8X4 interpolate8x4_halfpel_h_3dne;
170 INTERPOLATE8X4 interpolate8x4_halfpel_v_3dne;
171 INTERPOLATE8X4 interpolate8x4_halfpel_hv_3dne;
172 #endif
173
174 #ifdef ARCH_IS_IA64
175 INTERPOLATE8X8 interpolate8x8_halfpel_h_ia64;
176 INTERPOLATE8X8 interpolate8x8_halfpel_v_ia64;
177 INTERPOLATE8X8 interpolate8x8_halfpel_hv_ia64;
178 #endif
179
180 #ifdef ARCH_IS_PPC
181 INTERPOLATE8X8 interpolate8x8_halfpel_h_altivec_c;
182 INTERPOLATE8X8 interpolate8x8_halfpel_v_altivec_c;
183 INTERPOLATE8X8 interpolate8x8_halfpel_hv_altivec_c;
184
185 INTERPOLATE8X8 interpolate8x8_halfpel_add_altivec_c;
186 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_altivec_c;
187 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_altivec_c;
188 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_altivec_c;
189 #endif
190
191 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_c;
192 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_c;
193
194 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
195 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_mmx;
196 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_mmx;
197 #endif
198
199 #ifdef ARCH_IS_PPC
200 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_altivec_c;
201 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_altivec_c;
202 #endif
203
204 INTERPOLATE_LOWPASS interpolate8x8_lowpass_h_c;
205 INTERPOLATE_LOWPASS interpolate8x8_lowpass_v_c;
206
207 INTERPOLATE_LOWPASS interpolate16x16_lowpass_h_c;
208 INTERPOLATE_LOWPASS interpolate16x16_lowpass_v_c;
209
210 INTERPOLATE_LOWPASS_HV interpolate8x8_lowpass_hv_c;
211 INTERPOLATE_LOWPASS_HV interpolate16x16_lowpass_hv_c;
212
213 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_c;
214 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_v_c;
215
216 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
217 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_mmx;
218 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_v_mmx;
219 #endif
220
221 #ifdef ARCH_IS_PPC
222 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_altivec_c;
223 #endif
224
225 static __inline void
interpolate8x4_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)226 interpolate8x4_switch(uint8_t * const cur,
227 const uint8_t * const refn,
228 const uint32_t x,
229 const uint32_t y,
230 const int32_t dx,
231 const int dy,
232 const uint32_t stride,
233 const uint32_t rounding)
234 {
235
236 const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
237 uint8_t * const dst = cur + (int)(y * stride + x);
238
239 switch (((dx & 1) << 1) + (dy & 1))
240 { /* ((dx%2)?2:0)+((dy%2)?1:0) */
241 case 0:
242 transfer8x4_copy(dst, src, stride);
243 break;
244 case 1:
245 interpolate8x4_halfpel_v(dst, src, stride, rounding);
246 break;
247 case 2:
248 interpolate8x4_halfpel_h(dst, src, stride, rounding);
249 break;
250 default:
251 interpolate8x4_halfpel_hv(dst, src, stride, rounding);
252 break;
253 }
254 }
255
256 static __inline void
interpolate8x8_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)257 interpolate8x8_switch(uint8_t * const cur,
258 const uint8_t * const refn,
259 const uint32_t x,
260 const uint32_t y,
261 const int32_t dx,
262 const int dy,
263 const uint32_t stride,
264 const uint32_t rounding)
265 {
266
267 const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
268 uint8_t * const dst = cur + (int)(y * stride + x);
269
270 switch (((dx & 1) << 1) + (dy & 1)) { /* ((dx%2)?2:0)+((dy%2)?1:0) */
271 case 0:
272 transfer8x8_copy(dst, src, stride);
273 break;
274 case 1:
275 interpolate8x8_halfpel_v(dst, src, stride, rounding);
276 break;
277 case 2:
278 interpolate8x8_halfpel_h(dst, src, stride, rounding);
279 break;
280 default:
281 interpolate8x8_halfpel_hv(dst, src, stride, rounding);
282 break;
283 }
284 }
285
286 static __inline void
interpolate8x8_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)287 interpolate8x8_add_switch(uint8_t * const cur,
288 const uint8_t * const refn,
289 const uint32_t x,
290 const uint32_t y,
291 const int32_t dx,
292 const int dy,
293 const uint32_t stride,
294 const uint32_t rounding)
295 {
296
297 const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
298 uint8_t * const dst = cur + (int)(y * stride + x);
299
300 switch (((dx & 1) << 1) + (dy & 1)) { /* ((dx%2)?2:0)+((dy%2)?1:0) */
301 case 0:
302 interpolate8x8_halfpel_add(dst, src, stride, rounding);
303 break;
304 case 1:
305 interpolate8x8_halfpel_v_add(dst, src, stride, rounding);
306 break;
307 case 2:
308 interpolate8x8_halfpel_h_add(dst, src, stride, rounding);
309 break;
310 default:
311 interpolate8x8_halfpel_hv_add(dst, src, stride, rounding);
312 break;
313 }
314 }
315
316 static __inline void
interpolate16x16_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)317 interpolate16x16_switch(uint8_t * const cur,
318 const uint8_t * const refn,
319 const uint32_t x,
320 const uint32_t y,
321 const int32_t dx,
322 const int dy,
323 const uint32_t stride,
324 const uint32_t rounding)
325 {
326 interpolate8x8_switch(cur, refn, x, y, dx, dy, stride, rounding);
327 interpolate8x8_switch(cur, refn, x+8, y, dx, dy, stride, rounding);
328 interpolate8x8_switch(cur, refn, x, y+8, dx, dy, stride, rounding);
329 interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
330 }
331
332 static __inline void
interpolate16x16_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)333 interpolate16x16_add_switch(uint8_t * const cur,
334 const uint8_t * const refn,
335 const uint32_t x,
336 const uint32_t y,
337 const int32_t dx,
338 const int dy,
339 const uint32_t stride,
340 const uint32_t rounding)
341 {
342 interpolate8x8_add_switch(cur, refn, x, y, dx, dy, stride, rounding);
343 interpolate8x8_add_switch(cur, refn, x+8, y, dx, dy, stride, rounding);
344 interpolate8x8_add_switch(cur, refn, x, y+8, dx, dy, stride, rounding);
345 interpolate8x8_add_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
346 }
347
348 static __inline void
interpolate32x32_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)349 interpolate32x32_switch(uint8_t * const cur,
350 const uint8_t * const refn,
351 const uint32_t x,
352 const uint32_t y,
353 const int32_t dx,
354 const int dy,
355 const uint32_t stride,
356 const uint32_t rounding)
357 {
358 interpolate16x16_switch(cur, refn, x, y, dx, dy, stride, rounding);
359 interpolate16x16_switch(cur, refn, x+16, y, dx, dy, stride, rounding);
360 interpolate16x16_switch(cur, refn, x, y+16, dx, dy, stride, rounding);
361 interpolate16x16_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
362 }
363
364 static __inline void
interpolate32x32_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)365 interpolate32x32_add_switch(uint8_t * const cur,
366 const uint8_t * const refn,
367 const uint32_t x,
368 const uint32_t y,
369 const int32_t dx,
370 const int dy,
371 const uint32_t stride,
372 const uint32_t rounding)
373 {
374 interpolate16x16_add_switch(cur, refn, x, y, dx, dy, stride, rounding);
375 interpolate16x16_add_switch(cur, refn, x+16, y, dx, dy, stride, rounding);
376 interpolate16x16_add_switch(cur, refn, x, y+16, dx, dy, stride, rounding);
377 interpolate16x16_add_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
378 }
379
380 static __inline uint8_t *
interpolate8x8_switch2(uint8_t * const buffer,const uint8_t * const refn,const int x,const int y,const int dx,const int dy,const uint32_t stride,const uint32_t rounding)381 interpolate8x8_switch2(uint8_t * const buffer,
382 const uint8_t * const refn,
383 const int x,
384 const int y,
385 const int dx,
386 const int dy,
387 const uint32_t stride,
388 const uint32_t rounding)
389 {
390
391 const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
392
393 switch (((dx & 1) << 1) + (dy & 1)) { /* ((dx%2)?2:0)+((dy%2)?1:0) */
394 case 0:
395 return (uint8_t *)src;
396 case 1:
397 interpolate8x8_halfpel_v(buffer, src, stride, rounding);
398 break;
399 case 2:
400 interpolate8x8_halfpel_h(buffer, src, stride, rounding);
401 break;
402 default:
403 interpolate8x8_halfpel_hv(buffer, src, stride, rounding);
404 break;
405 }
406 return buffer;
407 }
408
409 #endif
410