1 /*****************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - Interpolation related header  -
5  *
6  *  Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
7  *
8  *  This program is free software ; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation ; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program ; if not, write to the Free Software
20  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21  *
22  * $Id: interpolate8x8.h 1985 2011-05-18 09:02:35Z Isibaar $
23  *
24  ****************************************************************************/
25 
26 #ifndef _INTERPOLATE8X8_H_
27 #define _INTERPOLATE8X8_H_
28 
29 #include "../utils/mem_transfer.h"
30 
31 typedef void (INTERPOLATE8X8) (uint8_t * const dst,
32 							   const uint8_t * const src,
33 							   const uint32_t stride,
34 							   const uint32_t rounding);
35 typedef INTERPOLATE8X8 *INTERPOLATE8X8_PTR;
36 
37 typedef void (INTERPOLATE8X4) (uint8_t * const dst,
38 							   const uint8_t * const src,
39 							   const uint32_t stride,
40 							   const uint32_t rounding);
41 typedef INTERPOLATE8X4 *INTERPOLATE8X4_PTR;
42 
43 typedef void (INTERPOLATE8X8_AVG2) (uint8_t *dst,
44 									const uint8_t *src1,
45 									const uint8_t *src2,
46 									const uint32_t stride,
47 									const uint32_t rounding,
48 									const uint32_t height);
49 typedef INTERPOLATE8X8_AVG2 *INTERPOLATE8X8_AVG2_PTR;
50 
51 typedef void (INTERPOLATE8X8_AVG4) (uint8_t *dst,
52 									const uint8_t *src1,
53 									const uint8_t *src2,
54 									const uint8_t *src3,
55 									const uint8_t *src4,
56 									const uint32_t stride,
57 									const uint32_t rounding);
58 typedef INTERPOLATE8X8_AVG4 *INTERPOLATE8X8_AVG4_PTR;
59 
60 typedef void (INTERPOLATE_LOWPASS) (uint8_t *dst,
61 									   uint8_t *src,
62 									   int32_t stride,
63 									   int32_t rounding);
64 
65 typedef INTERPOLATE_LOWPASS *INTERPOLATE_LOWPASS_PTR;
66 
67 typedef void (INTERPOLATE_LOWPASS_HV) (uint8_t *dst1,
68 										  uint8_t *dst2,
69 										  uint8_t *src,
70 										  int32_t stride,
71 										  int32_t rounding);
72 
73 typedef INTERPOLATE_LOWPASS_HV *INTERPOLATE_LOWPASS_HV_PTR;
74 
75 typedef void (INTERPOLATE8X8_6TAP_LOWPASS) (uint8_t *dst,
76 									        uint8_t *src,
77 									        int32_t stride,
78 									        int32_t rounding);
79 
80 typedef INTERPOLATE8X8_6TAP_LOWPASS *INTERPOLATE8X8_6TAP_LOWPASS_PTR;
81 
82 /* These function do: dst = interpolate(src) */
83 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
84 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
85 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
86 
87 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_h;
88 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_v;
89 extern INTERPOLATE8X4_PTR interpolate8x4_halfpel_hv;
90 
91 /* These functions do: dst = (dst+interpolate(src) + 1)/2
92  * Suitable for direct/interpolated bvop prediction block
93  * building w/o the need for intermediate interpolated result
94  * storing/reading
95  * NB: the rounding applies to the interpolation, but not
96  *     the averaging step which will always use rounding=0 */
97 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_add;
98 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_h_add;
99 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_v_add;
100 extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv_add;
101 
102 extern INTERPOLATE8X8_AVG2_PTR interpolate8x8_avg2;
103 extern INTERPOLATE8X8_AVG4_PTR interpolate8x8_avg4;
104 
105 extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_h;
106 extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_v;
107 
108 extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_h;
109 extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_v;
110 
111 extern INTERPOLATE_LOWPASS_HV_PTR interpolate8x8_lowpass_hv;
112 extern INTERPOLATE_LOWPASS_HV_PTR interpolate16x16_lowpass_hv;
113 
114 extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_h;
115 extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_v;
116 
117 INTERPOLATE8X8 interpolate8x8_halfpel_h_c;
118 INTERPOLATE8X8 interpolate8x8_halfpel_v_c;
119 INTERPOLATE8X8 interpolate8x8_halfpel_hv_c;
120 
121 INTERPOLATE8X4 interpolate8x4_halfpel_h_c;
122 INTERPOLATE8X4 interpolate8x4_halfpel_v_c;
123 INTERPOLATE8X4 interpolate8x4_halfpel_hv_c;
124 
125 INTERPOLATE8X8 interpolate8x8_halfpel_add_c;
126 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_c;
127 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_c;
128 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_c;
129 
130 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
131 INTERPOLATE8X8 interpolate8x8_halfpel_h_mmx;
132 INTERPOLATE8X8 interpolate8x8_halfpel_v_mmx;
133 INTERPOLATE8X8 interpolate8x8_halfpel_hv_mmx;
134 
135 INTERPOLATE8X4 interpolate8x4_halfpel_h_mmx;
136 INTERPOLATE8X4 interpolate8x4_halfpel_v_mmx;
137 INTERPOLATE8X4 interpolate8x4_halfpel_hv_mmx;
138 
139 INTERPOLATE8X8 interpolate8x8_halfpel_add_mmx;
140 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_mmx;
141 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_mmx;
142 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_mmx;
143 
144 INTERPOLATE8X8 interpolate8x8_halfpel_h_xmm;
145 INTERPOLATE8X8 interpolate8x8_halfpel_v_xmm;
146 INTERPOLATE8X8 interpolate8x8_halfpel_hv_xmm;
147 
148 INTERPOLATE8X4 interpolate8x4_halfpel_h_xmm;
149 INTERPOLATE8X4 interpolate8x4_halfpel_v_xmm;
150 INTERPOLATE8X4 interpolate8x4_halfpel_hv_xmm;
151 
152 INTERPOLATE8X8 interpolate8x8_halfpel_add_xmm;
153 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_xmm;
154 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_xmm;
155 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_xmm;
156 
157 INTERPOLATE8X8 interpolate8x8_halfpel_h_3dn;
158 INTERPOLATE8X8 interpolate8x8_halfpel_v_3dn;
159 INTERPOLATE8X8 interpolate8x8_halfpel_hv_3dn;
160 
161 INTERPOLATE8X4 interpolate8x4_halfpel_h_3dn;
162 INTERPOLATE8X4 interpolate8x4_halfpel_v_3dn;
163 INTERPOLATE8X4 interpolate8x4_halfpel_hv_3dn;
164 
165 INTERPOLATE8X8 interpolate8x8_halfpel_h_3dne;
166 INTERPOLATE8X8 interpolate8x8_halfpel_v_3dne;
167 INTERPOLATE8X8 interpolate8x8_halfpel_hv_3dne;
168 
169 INTERPOLATE8X4 interpolate8x4_halfpel_h_3dne;
170 INTERPOLATE8X4 interpolate8x4_halfpel_v_3dne;
171 INTERPOLATE8X4 interpolate8x4_halfpel_hv_3dne;
172 #endif
173 
174 #ifdef ARCH_IS_IA64
175 INTERPOLATE8X8 interpolate8x8_halfpel_h_ia64;
176 INTERPOLATE8X8 interpolate8x8_halfpel_v_ia64;
177 INTERPOLATE8X8 interpolate8x8_halfpel_hv_ia64;
178 #endif
179 
180 #ifdef ARCH_IS_PPC
181 INTERPOLATE8X8 interpolate8x8_halfpel_h_altivec_c;
182 INTERPOLATE8X8 interpolate8x8_halfpel_v_altivec_c;
183 INTERPOLATE8X8 interpolate8x8_halfpel_hv_altivec_c;
184 
185 INTERPOLATE8X8 interpolate8x8_halfpel_add_altivec_c;
186 INTERPOLATE8X8 interpolate8x8_halfpel_h_add_altivec_c;
187 INTERPOLATE8X8 interpolate8x8_halfpel_v_add_altivec_c;
188 INTERPOLATE8X8 interpolate8x8_halfpel_hv_add_altivec_c;
189 #endif
190 
191 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_c;
192 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_c;
193 
194 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
195 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_mmx;
196 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_mmx;
197 #endif
198 
199 #ifdef ARCH_IS_PPC
200 INTERPOLATE8X8_AVG2 interpolate8x8_avg2_altivec_c;
201 INTERPOLATE8X8_AVG4 interpolate8x8_avg4_altivec_c;
202 #endif
203 
204 INTERPOLATE_LOWPASS interpolate8x8_lowpass_h_c;
205 INTERPOLATE_LOWPASS interpolate8x8_lowpass_v_c;
206 
207 INTERPOLATE_LOWPASS interpolate16x16_lowpass_h_c;
208 INTERPOLATE_LOWPASS interpolate16x16_lowpass_v_c;
209 
210 INTERPOLATE_LOWPASS_HV interpolate8x8_lowpass_hv_c;
211 INTERPOLATE_LOWPASS_HV interpolate16x16_lowpass_hv_c;
212 
213 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_c;
214 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_v_c;
215 
216 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
217 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_mmx;
218 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_v_mmx;
219 #endif
220 
221 #ifdef ARCH_IS_PPC
222 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_h_altivec_c;
223 #endif
224 
225 static __inline void
interpolate8x4_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)226 interpolate8x4_switch(uint8_t * const cur,
227 					  const uint8_t * const refn,
228 					  const uint32_t x,
229 					  const uint32_t y,
230 					  const int32_t dx,
231 					  const int dy,
232 					  const uint32_t stride,
233 					  const uint32_t rounding)
234 {
235 
236 	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
237 	uint8_t * const dst = cur + (int)(y * stride + x);
238 
239 	switch (((dx & 1) << 1) + (dy & 1))
240 	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
241 	case 0:
242 		transfer8x4_copy(dst, src, stride);
243 		break;
244 	case 1:
245 		interpolate8x4_halfpel_v(dst, src, stride, rounding);
246 		break;
247 	case 2:
248 		interpolate8x4_halfpel_h(dst, src, stride, rounding);
249 		break;
250 	default:
251 		interpolate8x4_halfpel_hv(dst, src, stride, rounding);
252 		break;
253 	}
254 }
255 
256 static __inline void
interpolate8x8_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)257 interpolate8x8_switch(uint8_t * const cur,
258 					  const uint8_t * const refn,
259 					  const uint32_t x,
260 					  const uint32_t y,
261 					  const int32_t dx,
262 					  const int dy,
263 					  const uint32_t stride,
264 					  const uint32_t rounding)
265 {
266 
267 	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
268 	uint8_t * const dst = cur + (int)(y * stride + x);
269 
270 	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
271 	case 0:
272 		transfer8x8_copy(dst, src, stride);
273 		break;
274 	case 1:
275 		interpolate8x8_halfpel_v(dst, src, stride, rounding);
276 		break;
277 	case 2:
278 		interpolate8x8_halfpel_h(dst, src, stride, rounding);
279 		break;
280 	default:
281 		interpolate8x8_halfpel_hv(dst, src, stride, rounding);
282 		break;
283 	}
284 }
285 
286 static __inline void
interpolate8x8_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)287 interpolate8x8_add_switch(uint8_t * const cur,
288 					  const uint8_t * const refn,
289 					  const uint32_t x,
290 					  const uint32_t y,
291 					  const int32_t dx,
292 					  const int dy,
293 					  const uint32_t stride,
294 					  const uint32_t rounding)
295 {
296 
297 	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
298 	uint8_t * const dst = cur + (int)(y * stride + x);
299 
300 	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
301 	case 0:
302 		interpolate8x8_halfpel_add(dst, src, stride, rounding);
303 		break;
304 	case 1:
305 		interpolate8x8_halfpel_v_add(dst, src, stride, rounding);
306 		break;
307 	case 2:
308 		interpolate8x8_halfpel_h_add(dst, src, stride, rounding);
309 		break;
310 	default:
311 		interpolate8x8_halfpel_hv_add(dst, src, stride, rounding);
312 		break;
313 	}
314 }
315 
316 static __inline void
interpolate16x16_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)317 interpolate16x16_switch(uint8_t * const cur,
318 					  const uint8_t * const refn,
319 					  const uint32_t x,
320 					  const uint32_t y,
321 					  const int32_t dx,
322 					  const int dy,
323 					  const uint32_t stride,
324 					  const uint32_t rounding)
325 {
326 	interpolate8x8_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
327 	interpolate8x8_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
328 	interpolate8x8_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
329 	interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
330 }
331 
332 static __inline void
interpolate16x16_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)333 interpolate16x16_add_switch(uint8_t * const cur,
334 					  const uint8_t * const refn,
335 					  const uint32_t x,
336 					  const uint32_t y,
337 					  const int32_t dx,
338 					  const int dy,
339 					  const uint32_t stride,
340 					  const uint32_t rounding)
341 {
342 	interpolate8x8_add_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
343 	interpolate8x8_add_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
344 	interpolate8x8_add_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
345 	interpolate8x8_add_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
346 }
347 
348 static __inline void
interpolate32x32_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)349 interpolate32x32_switch(uint8_t * const cur,
350 					  const uint8_t * const refn,
351 					  const uint32_t x,
352 					  const uint32_t y,
353 					  const int32_t dx,
354 					  const int dy,
355 					  const uint32_t stride,
356 					  const uint32_t rounding)
357 {
358 	interpolate16x16_switch(cur, refn, x,    y,    dx, dy, stride, rounding);
359 	interpolate16x16_switch(cur, refn, x+16, y,    dx, dy, stride, rounding);
360 	interpolate16x16_switch(cur, refn, x,    y+16, dx, dy, stride, rounding);
361 	interpolate16x16_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
362 }
363 
364 static __inline void
interpolate32x32_add_switch(uint8_t * const cur,const uint8_t * const refn,const uint32_t x,const uint32_t y,const int32_t dx,const int dy,const uint32_t stride,const uint32_t rounding)365 interpolate32x32_add_switch(uint8_t * const cur,
366 					  const uint8_t * const refn,
367 					  const uint32_t x,
368 					  const uint32_t y,
369 					  const int32_t dx,
370 					  const int dy,
371 					  const uint32_t stride,
372 					  const uint32_t rounding)
373 {
374 	interpolate16x16_add_switch(cur, refn, x,    y,    dx, dy, stride, rounding);
375 	interpolate16x16_add_switch(cur, refn, x+16, y,    dx, dy, stride, rounding);
376 	interpolate16x16_add_switch(cur, refn, x,    y+16, dx, dy, stride, rounding);
377 	interpolate16x16_add_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
378 }
379 
380 static __inline uint8_t *
interpolate8x8_switch2(uint8_t * const buffer,const uint8_t * const refn,const int x,const int y,const int dx,const int dy,const uint32_t stride,const uint32_t rounding)381 interpolate8x8_switch2(uint8_t * const buffer,
382 					  const uint8_t * const refn,
383 					  const int x,
384 					  const int y,
385 					  const int dx,
386 					  const int dy,
387 					  const uint32_t stride,
388 					  const uint32_t rounding)
389 {
390 
391 	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
392 
393 	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
394 	case 0:
395 		return (uint8_t *)src;
396 	case 1:
397 		interpolate8x8_halfpel_v(buffer, src, stride, rounding);
398 		break;
399 	case 2:
400 		interpolate8x8_halfpel_h(buffer, src, stride, rounding);
401 		break;
402 	default:
403 		interpolate8x8_halfpel_hv(buffer, src, stride, rounding);
404 		break;
405 	}
406 	return buffer;
407 }
408 
409 #endif
410