1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - 8x8 block-based halfpel interpolation -
5 *
6 * Copyright(C) 2001-2003 Peter Ross <pross@xvid.org>
7 *
8 * This program is free software ; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation ; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program ; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * $Id: interpolate8x8.c 1985 2011-05-18 09:02:35Z Isibaar $
23 *
24 ****************************************************************************/
25
26 #include "../portab.h"
27 #include "../global.h"
28 #include "interpolate8x8.h"
29
30 /* function pointers */
31 INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
32 INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
33 INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
34
35 INTERPOLATE8X8_PTR interpolate8x4_halfpel_h;
36 INTERPOLATE8X8_PTR interpolate8x4_halfpel_v;
37 INTERPOLATE8X8_PTR interpolate8x4_halfpel_hv;
38
39 INTERPOLATE8X8_PTR interpolate8x8_halfpel_add;
40 INTERPOLATE8X8_PTR interpolate8x8_halfpel_h_add;
41 INTERPOLATE8X8_PTR interpolate8x8_halfpel_v_add;
42 INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv_add;
43
44 INTERPOLATE8X8_AVG2_PTR interpolate8x8_avg2;
45 INTERPOLATE8X8_AVG4_PTR interpolate8x8_avg4;
46
47 INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_h;
48 INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_v;
49
50 INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_h;
51 INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_v;
52
53 INTERPOLATE_LOWPASS_HV_PTR interpolate8x8_lowpass_hv;
54 INTERPOLATE_LOWPASS_HV_PTR interpolate16x16_lowpass_hv;
55
56 INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_h;
57 INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_v;
58
59 void
interpolate8x8_avg2_c(uint8_t * dst,const uint8_t * src1,const uint8_t * src2,const uint32_t stride,const uint32_t rounding,const uint32_t height)60 interpolate8x8_avg2_c(uint8_t * dst, const uint8_t * src1, const uint8_t *src2, const uint32_t stride, const uint32_t rounding, const uint32_t height)
61 {
62 uint32_t i;
63 const int32_t round = 1 - rounding;
64
65 for(i = 0; i < height; i++) {
66 dst[0] = (src1[0] + src2[0] + round) >> 1;
67 dst[1] = (src1[1] + src2[1] + round) >> 1;
68 dst[2] = (src1[2] + src2[2] + round) >> 1;
69 dst[3] = (src1[3] + src2[3] + round) >> 1;
70 dst[4] = (src1[4] + src2[4] + round) >> 1;
71 dst[5] = (src1[5] + src2[5] + round) >> 1;
72 dst[6] = (src1[6] + src2[6] + round) >> 1;
73 dst[7] = (src1[7] + src2[7] + round) >> 1;
74
75 dst += stride;
76 src1 += stride;
77 src2 += stride;
78 }
79 }
80
81 void
interpolate8x8_halfpel_add_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)82 interpolate8x8_halfpel_add_c(uint8_t * const dst, const uint8_t * const src, const uint32_t stride, const uint32_t rounding)
83 {
84 interpolate8x8_avg2_c(dst, dst, src, stride, 0, 8);
85 }
86
interpolate8x8_avg4_c(uint8_t * dst,const uint8_t * src1,const uint8_t * src2,const uint8_t * src3,const uint8_t * src4,const uint32_t stride,const uint32_t rounding)87 void interpolate8x8_avg4_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4, const uint32_t stride, const uint32_t rounding)
88 {
89 int32_t i;
90 const int32_t round = 2 - rounding;
91
92 for(i = 0; i < 8; i++) {
93 dst[0] = (src1[0] + src2[0] + src3[0] + src4[0] + round) >> 2;
94 dst[1] = (src1[1] + src2[1] + src3[1] + src4[1] + round) >> 2;
95 dst[2] = (src1[2] + src2[2] + src3[2] + src4[2] + round) >> 2;
96 dst[3] = (src1[3] + src2[3] + src3[3] + src4[3] + round) >> 2;
97 dst[4] = (src1[4] + src2[4] + src3[4] + src4[4] + round) >> 2;
98 dst[5] = (src1[5] + src2[5] + src3[5] + src4[5] + round) >> 2;
99 dst[6] = (src1[6] + src2[6] + src3[6] + src4[6] + round) >> 2;
100 dst[7] = (src1[7] + src2[7] + src3[7] + src4[7] + round) >> 2;
101
102 dst += stride;
103 src1 += stride;
104 src2 += stride;
105 src3 += stride;
106 src4 += stride;
107 }
108 }
109
110 /* dst = interpolate(src) */
111
112 void
interpolate8x8_halfpel_h_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)113 interpolate8x8_halfpel_h_c(uint8_t * const dst,
114 const uint8_t * const src,
115 const uint32_t stride,
116 const uint32_t rounding)
117 {
118 uintptr_t j;
119
120 if (rounding) {
121 for (j = 0; j < 8*stride; j+=stride) {
122 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
123 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
124 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
125 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
126 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
127 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
128 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
129 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
130 }
131 } else {
132 for (j = 0; j < 8*stride; j+=stride) {
133 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
134 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
135 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
136 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
137 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
138 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
139 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
140 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
141 }
142 }
143 }
144
145 /* dst = interpolate(src) */
146
147 void
interpolate8x4_halfpel_h_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)148 interpolate8x4_halfpel_h_c(uint8_t * const dst,
149 const uint8_t * const src,
150 const uint32_t stride,
151 const uint32_t rounding)
152 {
153 uintptr_t j;
154
155 if (rounding) {
156 for (j = 0; j < 4*stride; j+=stride) {
157 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
158 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
159 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
160 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
161 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
162 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
163 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
164 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
165 }
166 } else {
167 for (j = 0; j < 4*stride; j+=stride) {
168 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
169 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
170 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
171 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
172 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
173 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
174 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
175 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
176 }
177 }
178 }
179
180 /* dst = (dst + interpolate(src)/2 */
181
182 void
interpolate8x8_halfpel_h_add_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)183 interpolate8x8_halfpel_h_add_c(uint8_t * const dst,
184 const uint8_t * const src,
185 const uint32_t stride,
186 const uint32_t rounding)
187 {
188 uintptr_t j;
189
190 if (rounding) {
191 for (j = 0; j < 8*stride; j+=stride) {
192 dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] )>>1) + dst[j+0] + 1)>>1);
193 dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] )>>1) + dst[j+1] + 1)>>1);
194 dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] )>>1) + dst[j+2] + 1)>>1);
195 dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] )>>1) + dst[j+3] + 1)>>1);
196 dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] )>>1) + dst[j+4] + 1)>>1);
197 dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] )>>1) + dst[j+5] + 1)>>1);
198 dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] )>>1) + dst[j+6] + 1)>>1);
199 dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] )>>1) + dst[j+7] + 1)>>1);
200 }
201 } else {
202 for (j = 0; j < 8*stride; j+=stride) {
203 dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] + 1)>>1) + dst[j+0] + 1)>>1);
204 dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] + 1)>>1) + dst[j+1] + 1)>>1);
205 dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] + 1)>>1) + dst[j+2] + 1)>>1);
206 dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] + 1)>>1) + dst[j+3] + 1)>>1);
207 dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] + 1)>>1) + dst[j+4] + 1)>>1);
208 dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] + 1)>>1) + dst[j+5] + 1)>>1);
209 dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] + 1)>>1) + dst[j+6] + 1)>>1);
210 dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] + 1)>>1) + dst[j+7] + 1)>>1);
211 }
212 }
213 }
214
215 /* dst = interpolate(src) */
216
217 void
interpolate8x8_halfpel_v_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)218 interpolate8x8_halfpel_v_c(uint8_t * const dst,
219 const uint8_t * const src,
220 const uint32_t stride,
221 const uint32_t rounding)
222 {
223 uintptr_t j;
224
225
226 if (rounding) {
227 for (j = 0; j < 8*stride; j+=stride) {
228 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
229 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
230 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
231 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
232 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
233 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
234 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
235 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
236 }
237 } else {
238 for (j = 0; j < 8*stride; j+=stride) {
239 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
240 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
241 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
242 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
243 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
244 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
245 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
246 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
247 }
248 }
249 }
250
251 /* dst = interpolate(src) */
252
253 void
interpolate8x4_halfpel_v_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)254 interpolate8x4_halfpel_v_c(uint8_t * const dst,
255 const uint8_t * const src,
256 const uint32_t stride,
257 const uint32_t rounding)
258 {
259 uintptr_t j;
260
261
262 if (rounding) {
263 for (j = 0; j < 4*stride; j+=stride) {
264 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
265 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
266 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
267 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
268 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
269 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
270 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
271 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
272 }
273 } else {
274 for (j = 0; j < 4*stride; j+=stride) {
275 dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
276 dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
277 dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
278 dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
279 dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
280 dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
281 dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
282 dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
283 }
284 }
285 }
286
287 /* dst = (dst + interpolate(src))/2 */
288
289 void
interpolate8x8_halfpel_v_add_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)290 interpolate8x8_halfpel_v_add_c(uint8_t * const dst,
291 const uint8_t * const src,
292 const uint32_t stride,
293 const uint32_t rounding)
294 {
295 uintptr_t j;
296
297
298 if (rounding) {
299 for (j = 0; j < 8*stride; j+=stride) {
300 dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] )>>1) + dst[j+0] + 1)>>1);
301 dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] )>>1) + dst[j+1] + 1)>>1);
302 dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] )>>1) + dst[j+2] + 1)>>1);
303 dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] )>>1) + dst[j+3] + 1)>>1);
304 dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] )>>1) + dst[j+4] + 1)>>1);
305 dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] )>>1) + dst[j+5] + 1)>>1);
306 dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] )>>1) + dst[j+6] + 1)>>1);
307 dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] )>>1) + dst[j+7] + 1)>>1);
308 }
309 } else {
310 for (j = 0; j < 8*stride; j+=stride) {
311 dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] + 1)>>1) + dst[j+0] + 1)>>1);
312 dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] + 1)>>1) + dst[j+1] + 1)>>1);
313 dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] + 1)>>1) + dst[j+2] + 1)>>1);
314 dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] + 1)>>1) + dst[j+3] + 1)>>1);
315 dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] + 1)>>1) + dst[j+4] + 1)>>1);
316 dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] + 1)>>1) + dst[j+5] + 1)>>1);
317 dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] + 1)>>1) + dst[j+6] + 1)>>1);
318 dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] + 1)>>1) + dst[j+7] + 1)>>1);
319 }
320 }
321 }
322
323 /* dst = interpolate(src) */
324
325 void
interpolate8x8_halfpel_hv_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)326 interpolate8x8_halfpel_hv_c(uint8_t * const dst,
327 const uint8_t * const src,
328 const uint32_t stride,
329 const uint32_t rounding)
330 {
331 uintptr_t j;
332
333 if (rounding) {
334 for (j = 0; j < 8*stride; j+=stride) {
335 dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
336 dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
337 dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
338 dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
339 dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
340 dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
341 dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
342 dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
343 }
344 } else {
345 for (j = 0; j < 8*stride; j+=stride) {
346 dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
347 dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
348 dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
349 dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
350 dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
351 dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
352 dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
353 dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
354 }
355 }
356 }
357
358 /* dst = interpolate(src) */
359
360 void
interpolate8x4_halfpel_hv_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)361 interpolate8x4_halfpel_hv_c(uint8_t * const dst,
362 const uint8_t * const src,
363 const uint32_t stride,
364 const uint32_t rounding)
365 {
366 uintptr_t j;
367
368 if (rounding) {
369 for (j = 0; j < 4*stride; j+=stride) {
370 dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
371 dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
372 dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
373 dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
374 dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
375 dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
376 dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
377 dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
378 }
379 } else {
380 for (j = 0; j < 4*stride; j+=stride) {
381 dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
382 dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
383 dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
384 dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
385 dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
386 dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
387 dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
388 dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
389 }
390 }
391 }
392
393 /* dst = (interpolate(src) + dst)/2 */
394
395 void
interpolate8x8_halfpel_hv_add_c(uint8_t * const dst,const uint8_t * const src,const uint32_t stride,const uint32_t rounding)396 interpolate8x8_halfpel_hv_add_c(uint8_t * const dst,
397 const uint8_t * const src,
398 const uint32_t stride,
399 const uint32_t rounding)
400 {
401 uintptr_t j;
402
403 if (rounding) {
404 for (j = 0; j < 8*stride; j+=stride) {
405 dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2) + dst[j+0])>>1);
406 dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2) + dst[j+1])>>1);
407 dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2) + dst[j+2])>>1);
408 dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2) + dst[j+3])>>1);
409 dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2) + dst[j+4])>>1);
410 dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2) + dst[j+5])>>1);
411 dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2) + dst[j+6])>>1);
412 dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2) + dst[j+7])>>1);
413 }
414 } else {
415 for (j = 0; j < 8*stride; j+=stride) {
416 dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2) + dst[j+0] + 1)>>1);
417 dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2) + dst[j+1] + 1)>>1);
418 dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2) + dst[j+2] + 1)>>1);
419 dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2) + dst[j+3] + 1)>>1);
420 dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2) + dst[j+4] + 1)>>1);
421 dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2) + dst[j+5] + 1)>>1);
422 dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2) + dst[j+6] + 1)>>1);
423 dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2) + dst[j+7] + 1)>>1);
424 }
425 }
426 }
427
428 /*************************************************************
429 * QPEL STUFF STARTS HERE *
430 *************************************************************/
431
interpolate8x8_6tap_lowpass_h_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)432 void interpolate8x8_6tap_lowpass_h_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
433 {
434 int32_t i;
435 uint8_t round_add = 16 - rounding;
436
437 for(i = 0; i < 8; i++)
438 {
439
440 dst[0] = CLIP((((src[-2] + src[3]) + 5 * (((src[0] + src[1])<<2) - (src[-1] + src[2])) + round_add) >> 5), 0, 255);
441 dst[1] = CLIP((((src[-1] + src[4]) + 5 * (((src[1] + src[2])<<2) - (src[0] + src[3])) + round_add) >> 5), 0, 255);
442 dst[2] = CLIP((((src[0] + src[5]) + 5 * (((src[2] + src[3])<<2) - (src[1] + src[4])) + round_add) >> 5), 0, 255);
443 dst[3] = CLIP((((src[1] + src[6]) + 5 * (((src[3] + src[4])<<2) - (src[2] + src[5])) + round_add) >> 5), 0, 255);
444 dst[4] = CLIP((((src[2] + src[7]) + 5 * (((src[4] + src[5])<<2) - (src[3] + src[6])) + round_add) >> 5), 0, 255);
445 dst[5] = CLIP((((src[3] + src[8]) + 5 * (((src[5] + src[6])<<2) - (src[4] + src[7])) + round_add) >> 5), 0, 255);
446 dst[6] = CLIP((((src[4] + src[9]) + 5 * (((src[6] + src[7])<<2) - (src[5] + src[8])) + round_add) >> 5), 0, 255);
447 dst[7] = CLIP((((src[5] + src[10]) + 5 * (((src[7] + src[8])<<2) - (src[6] + src[9])) + round_add) >> 5), 0, 255);
448
449 dst += stride;
450 src += stride;
451 }
452 }
453
interpolate16x16_lowpass_h_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)454 void interpolate16x16_lowpass_h_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
455 {
456 int32_t i;
457 uint8_t round_add = 16 - rounding;
458
459 for(i = 0; i < 17; i++)
460 {
461
462 dst[0] = CLIP(((7 * ((src[0]<<1) - src[2]) + 23 * src[1] + 3 * src[3] - src[4] + round_add) >> 5), 0, 255);
463 dst[1] = CLIP(((19 * src[1] + 20 * src[2] - src[5] + 3 * (src[4] - src[0] - (src[3]<<1)) + round_add) >> 5), 0, 255);
464 dst[2] = CLIP(((20 * (src[2] + src[3]) + (src[0]<<1) + 3 * (src[5] - ((src[1] + src[4])<<1)) - src[6] + round_add) >> 5), 0, 255);
465
466 dst[3] = CLIP(((20 * (src[3] + src[4]) + 3 * ((src[6] + src[1]) - ((src[2] + src[5])<<1)) - (src[0] + src[7]) + round_add) >> 5), 0, 255);
467 dst[4] = CLIP(((20 * (src[4] + src[5]) - 3 * (((src[3] + src[6])<<1) - (src[2] + src[7])) - (src[1] + src[8]) + round_add) >> 5), 0, 255);
468 dst[5] = CLIP(((20 * (src[5] + src[6]) - 3 * (((src[4] + src[7])<<1) - (src[3] + src[8])) - (src[2] + src[9]) + round_add) >> 5), 0, 255);
469 dst[6] = CLIP(((20 * (src[6] + src[7]) - 3 * (((src[5] + src[8])<<1) - (src[4] + src[9])) - (src[3] + src[10]) + round_add) >> 5), 0, 255);
470 dst[7] = CLIP(((20 * (src[7] + src[8]) - 3 * (((src[6] + src[9])<<1) - (src[5] + src[10])) - (src[4] + src[11]) + round_add) >> 5), 0, 255);
471 dst[8] = CLIP(((20 * (src[8] + src[9]) - 3 * (((src[7] + src[10])<<1) - (src[6] + src[11])) - (src[5] + src[12]) + round_add) >> 5), 0, 255);
472 dst[9] = CLIP(((20 * (src[9] + src[10]) - 3 * (((src[8] + src[11])<<1) - (src[7] + src[12])) - (src[6] + src[13]) + round_add) >> 5), 0, 255);
473 dst[10] = CLIP(((20 * (src[10] + src[11]) - 3 * (((src[9] + src[12])<<1) - (src[8] + src[13])) - (src[7] + src[14]) + round_add) >> 5), 0, 255);
474 dst[11] = CLIP(((20 * (src[11] + src[12]) - 3 * (((src[10] + src[13])<<1) - (src[9] + src[14])) - (src[8] + src[15]) + round_add) >> 5), 0, 255);
475 dst[12] = CLIP(((20 * (src[12] + src[13]) - 3 * (((src[11] + src[14])<<1) - (src[10] + src[15])) - (src[9] + src[16]) + round_add) >> 5), 0, 255);
476
477 dst[13] = CLIP(((20 * (src[13] + src[14]) + (src[16]<<1) + 3 * (src[11] - ((src[12] + src[15]) << 1)) - src[10] + round_add) >> 5), 0, 255);
478 dst[14] = CLIP(((19 * src[15] + 20 * src[14] + 3 * (src[12] - src[16] - (src[13] << 1)) - src[11] + round_add) >> 5), 0, 255);
479 dst[15] = CLIP(((23 * src[15] + 7 * ((src[16]<<1) - src[14]) + 3 * src[13] - src[12] + round_add) >> 5), 0, 255);
480
481 dst += stride;
482 src += stride;
483 }
484 }
485
interpolate8x8_lowpass_h_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)486 void interpolate8x8_lowpass_h_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
487 {
488 int32_t i;
489 uint8_t round_add = 16 - rounding;
490
491 for(i = 0; i < 9; i++)
492 {
493
494 dst[0] = CLIP(((7 * ((src[0]<<1) - src[2]) + 23 * src[1] + 3 * src[3] - src[4] + round_add) >> 5), 0, 255);
495 dst[1] = CLIP(((19 * src[1] + 20 * src[2] - src[5] + 3 * (src[4] - src[0] - (src[3]<<1)) + round_add) >> 5), 0, 255);
496 dst[2] = CLIP(((20 * (src[2] + src[3]) + (src[0]<<1) + 3 * (src[5] - ((src[1] + src[4])<<1)) - src[6] + round_add) >> 5), 0, 255);
497 dst[3] = CLIP(((20 * (src[3] + src[4]) + 3 * ((src[6] + src[1]) - ((src[2] + src[5])<<1)) - (src[0] + src[7]) + round_add) >> 5), 0, 255);
498 dst[4] = CLIP(((20 * (src[4] + src[5]) - 3 * (((src[3] + src[6])<<1) - (src[2] + src[7])) - (src[1] + src[8]) + round_add) >> 5), 0, 255);
499 dst[5] = CLIP(((20 * (src[5] + src[6]) + (src[8]<<1) + 3 * (src[3] - ((src[4] + src[7]) << 1)) - src[2] + round_add) >> 5), 0, 255);
500 dst[6] = CLIP(((19 * src[7] + 20 * src[6] + 3 * (src[4] - src[8] - (src[5] << 1)) - src[3] + round_add) >> 5), 0, 255);
501 dst[7] = CLIP(((23 * src[7] + 7 * ((src[8]<<1) - src[6]) + 3 * src[5] - src[4] + round_add) >> 5), 0, 255);
502
503 dst += stride;
504 src += stride;
505 }
506 }
507
interpolate8x8_6tap_lowpass_v_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)508 void interpolate8x8_6tap_lowpass_v_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
509 {
510 int32_t i;
511 uint8_t round_add = 16 - rounding;
512
513 for(i = 0; i < 8; i++)
514 {
515 int32_t src_2 = src[-2*stride];
516 int32_t src_1 = src[-stride];
517 int32_t src0 = src[0];
518 int32_t src1 = src[stride];
519 int32_t src2 = src[2 * stride];
520 int32_t src3 = src[3 * stride];
521 int32_t src4 = src[4 * stride];
522 int32_t src5 = src[5 * stride];
523 int32_t src6 = src[6 * stride];
524 int32_t src7 = src[7 * stride];
525 int32_t src8 = src[8 * stride];
526 int32_t src9 = src[9 * stride];
527 int32_t src10 = src[10 * stride];
528
529 dst[0] = CLIP((((src_2 + src3) + 5 * (((src0 + src1)<<2) - (src_1 + src2)) + round_add) >> 5), 0, 255);
530 dst[stride] = CLIP((((src_1 + src4) + 5 * (((src1 + src2)<<2) - (src0 + src3)) + round_add) >> 5), 0, 255);
531 dst[2 * stride] = CLIP((((src0 + src5) + 5 * (((src2 + src3)<<2) - (src1 + src4)) + round_add) >> 5), 0, 255);
532 dst[3 * stride] = CLIP((((src1 + src6) + 5 * (((src3 + src4)<<2) - (src2 + src5)) + round_add) >> 5), 0, 255);
533 dst[4 * stride] = CLIP((((src2 + src7) + 5 * (((src4 + src5)<<2) - (src3 + src6)) + round_add) >> 5), 0, 255);
534 dst[5 * stride] = CLIP((((src3 + src8) + 5 * (((src5 + src6)<<2) - (src4 + src7)) + round_add) >> 5), 0, 255);
535 dst[6 * stride] = CLIP((((src4 + src9) + 5 * (((src6 + src7)<<2) - (src5 + src8)) + round_add) >> 5), 0, 255);
536 dst[7 * stride] = CLIP((((src5 + src10) + 5 * (((src7 + src8)<<2) - (src6 + src9)) + round_add) >> 5), 0, 255);
537
538 dst++;
539 src++;
540 }
541 }
542
interpolate16x16_lowpass_v_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)543 void interpolate16x16_lowpass_v_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
544 {
545 int32_t i;
546 uint8_t round_add = 16 - rounding;
547
548 for(i = 0; i < 17; i++)
549 {
550 int32_t src0 = src[0];
551 int32_t src1 = src[stride];
552 int32_t src2 = src[2 * stride];
553 int32_t src3 = src[3 * stride];
554 int32_t src4 = src[4 * stride];
555 int32_t src5 = src[5 * stride];
556 int32_t src6 = src[6 * stride];
557 int32_t src7 = src[7 * stride];
558 int32_t src8 = src[8 * stride];
559 int32_t src9 = src[9 * stride];
560 int32_t src10 = src[10 * stride];
561 int32_t src11 = src[11 * stride];
562 int32_t src12 = src[12 * stride];
563 int32_t src13 = src[13 * stride];
564 int32_t src14 = src[14 * stride];
565 int32_t src15 = src[15 * stride];
566 int32_t src16 = src[16 * stride];
567
568
569 dst[0] = CLIP(((7 * ((src0<<1) - src2) + 23 * src1 + 3 * src3 - src4 + round_add) >> 5), 0, 255);
570 dst[stride] = CLIP(((19 * src1 + 20 * src2 - src5 + 3 * (src4 - src0 - (src3<<1)) + round_add) >> 5), 0, 255);
571 dst[2*stride] = CLIP(((20 * (src2 + src3) + (src0<<1) + 3 * (src5 - ((src1 + src4)<<1)) - src6 + round_add) >> 5), 0, 255);
572
573 dst[3*stride] = CLIP(((20 * (src3 + src4) + 3 * ((src6 + src1) - ((src2 + src5)<<1)) - (src0 + src7) + round_add) >> 5), 0, 255);
574 dst[4*stride] = CLIP(((20 * (src4 + src5) - 3 * (((src3 + src6)<<1) - (src2 + src7)) - (src1 + src8) + round_add) >> 5), 0, 255);
575 dst[5*stride] = CLIP(((20 * (src5 + src6) - 3 * (((src4 + src7)<<1) - (src3 + src8)) - (src2 + src9) + round_add) >> 5), 0, 255);
576 dst[6*stride] = CLIP(((20 * (src6 + src7) - 3 * (((src5 + src8)<<1) - (src4 + src9)) - (src3 + src10) + round_add) >> 5), 0, 255);
577 dst[7*stride] = CLIP(((20 * (src7 + src8) - 3 * (((src6 + src9)<<1) - (src5 + src10)) - (src4 + src11) + round_add) >> 5), 0, 255);
578 dst[8*stride] = CLIP(((20 * (src8 + src9) - 3 * (((src7 + src10)<<1) - (src6 + src11)) - (src5 + src12) + round_add) >> 5), 0, 255);
579 dst[9*stride] = CLIP(((20 * (src9 + src10) - 3 * (((src8 + src11)<<1) - (src7 + src12)) - (src6 + src13) + round_add) >> 5), 0, 255);
580 dst[10*stride] = CLIP(((20 * (src10 + src11) - 3 * (((src9 + src12)<<1) - (src8 + src13)) - (src7 + src14) + round_add) >> 5), 0, 255);
581 dst[11*stride] = CLIP(((20 * (src11 + src12) - 3 * (((src10 + src13)<<1) - (src9 + src14)) - (src8 + src15) + round_add) >> 5), 0, 255);
582 dst[12*stride] = CLIP(((20 * (src12 + src13) - 3 * (((src11 + src14)<<1) - (src10 + src15)) - (src9 + src16) + round_add) >> 5), 0, 255);
583
584 dst[13*stride] = CLIP(((20 * (src13 + src14) + (src16<<1) + 3 * (src11 - ((src12 + src15) << 1)) - src10 + round_add) >> 5), 0, 255);
585 dst[14*stride] = CLIP(((19 * src15 + 20 * src14 + 3 * (src12 - src16 - (src13 << 1)) - src11 + round_add) >> 5), 0, 255);
586 dst[15*stride] = CLIP(((23 * src15 + 7 * ((src16<<1) - src14) + 3 * src13 - src12 + round_add) >> 5), 0, 255);
587
588 dst++;
589 src++;
590 }
591 }
592
interpolate8x8_lowpass_v_c(uint8_t * dst,uint8_t * src,int32_t stride,int32_t rounding)593 void interpolate8x8_lowpass_v_c(uint8_t *dst, uint8_t *src, int32_t stride, int32_t rounding)
594 {
595 int32_t i;
596 uint8_t round_add = 16 - rounding;
597
598 for(i = 0; i < 9; i++)
599 {
600 int32_t src0 = src[0];
601 int32_t src1 = src[stride];
602 int32_t src2 = src[2 * stride];
603 int32_t src3 = src[3 * stride];
604 int32_t src4 = src[4 * stride];
605 int32_t src5 = src[5 * stride];
606 int32_t src6 = src[6 * stride];
607 int32_t src7 = src[7 * stride];
608 int32_t src8 = src[8 * stride];
609
610 dst[0] = CLIP(((7 * ((src0<<1) - src2) + 23 * src1 + 3 * src3 - src4 + round_add) >> 5), 0, 255);
611 dst[stride] = CLIP(((19 * src1 + 20 * src2 - src5 + 3 * (src4 - src0 - (src3 << 1)) + round_add) >> 5), 0, 255);
612 dst[2 * stride] = CLIP(((20 * (src2 + src3) + (src0<<1) + 3 * (src5 - ((src1 + src4) <<1 )) - src6 + round_add) >> 5), 0, 255);
613 dst[3 * stride] = CLIP(((20 * (src3 + src4) + 3 * ((src6 + src1) - ((src2 + src5)<<1)) - (src0 + src7) + round_add) >> 5), 0, 255);
614 dst[4 * stride] = CLIP(((20 * (src4 + src5) + 3 * ((src2 + src7) - ((src3 + src6)<<1)) - (src1 + src8) + round_add) >> 5), 0, 255);
615 dst[5 * stride] = CLIP(((20 * (src5 + src6) + (src8<<1) + 3 * (src3 - ((src4 + src7) << 1)) - src2 + round_add) >> 5), 0, 255);
616 dst[6 * stride] = CLIP(((19 * src7 + 20 * src6 - src3 + 3 * (src4 - src8 - (src5 << 1)) + round_add) >> 5), 0, 255);
617 dst[7 * stride] = CLIP(((7 * ((src8<<1) - src6) + 23 * src7 + 3 * src5 - src4 + round_add) >> 5), 0, 255);
618
619 dst++;
620 src++;
621 }
622 }
623
interpolate16x16_lowpass_hv_c(uint8_t * dst1,uint8_t * dst2,uint8_t * src,int32_t stride,int32_t rounding)624 void interpolate16x16_lowpass_hv_c(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int32_t stride, int32_t rounding)
625 {
626 int32_t i;
627 uint8_t round_add = 16 - rounding;
628 uint8_t *h_ptr = dst2;
629
630 for(i = 0; i < 17; i++)
631 {
632
633 h_ptr[0] = CLIP(((7 * ((src[0]<<1) - src[2]) + 23 * src[1] + 3 * src[3] - src[4] + round_add) >> 5), 0, 255);
634 h_ptr[1] = CLIP(((19 * src[1] + 20 * src[2] - src[5] + 3 * (src[4] - src[0] - (src[3]<<1)) + round_add) >> 5), 0, 255);
635 h_ptr[2] = CLIP(((20 * (src[2] + src[3]) + (src[0]<<1) + 3 * (src[5] - ((src[1] + src[4])<<1)) - src[6] + round_add) >> 5), 0, 255);
636
637 h_ptr[3] = CLIP(((20 * (src[3] + src[4]) + 3 * ((src[6] + src[1]) - ((src[2] + src[5])<<1)) - (src[0] + src[7]) + round_add) >> 5), 0, 255);
638 h_ptr[4] = CLIP(((20 * (src[4] + src[5]) - 3 * (((src[3] + src[6])<<1) - (src[2] + src[7])) - (src[1] + src[8]) + round_add) >> 5), 0, 255);
639 h_ptr[5] = CLIP(((20 * (src[5] + src[6]) - 3 * (((src[4] + src[7])<<1) - (src[3] + src[8])) - (src[2] + src[9]) + round_add) >> 5), 0, 255);
640 h_ptr[6] = CLIP(((20 * (src[6] + src[7]) - 3 * (((src[5] + src[8])<<1) - (src[4] + src[9])) - (src[3] + src[10]) + round_add) >> 5), 0, 255);
641 h_ptr[7] = CLIP(((20 * (src[7] + src[8]) - 3 * (((src[6] + src[9])<<1) - (src[5] + src[10])) - (src[4] + src[11]) + round_add) >> 5), 0, 255);
642 h_ptr[8] = CLIP(((20 * (src[8] + src[9]) - 3 * (((src[7] + src[10])<<1) - (src[6] + src[11])) - (src[5] + src[12]) + round_add) >> 5), 0, 255);
643 h_ptr[9] = CLIP(((20 * (src[9] + src[10]) - 3 * (((src[8] + src[11])<<1) - (src[7] + src[12])) - (src[6] + src[13]) + round_add) >> 5), 0, 255);
644 h_ptr[10] = CLIP(((20 * (src[10] + src[11]) - 3 * (((src[9] + src[12])<<1) - (src[8] + src[13])) - (src[7] + src[14]) + round_add) >> 5), 0, 255);
645 h_ptr[11] = CLIP(((20 * (src[11] + src[12]) - 3 * (((src[10] + src[13])<<1) - (src[9] + src[14])) - (src[8] + src[15]) + round_add) >> 5), 0, 255);
646 h_ptr[12] = CLIP(((20 * (src[12] + src[13]) - 3 * (((src[11] + src[14])<<1) - (src[10] + src[15])) - (src[9] + src[16]) + round_add) >> 5), 0, 255);
647
648 h_ptr[13] = CLIP(((20 * (src[13] + src[14]) + (src[16]<<1) + 3 * (src[11] - ((src[12] + src[15]) << 1)) - src[10] + round_add) >> 5), 0, 255);
649 h_ptr[14] = CLIP(((19 * src[15] + 20 * src[14] + 3 * (src[12] - src[16] - (src[13] << 1)) - src[11] + round_add) >> 5), 0, 255);
650 h_ptr[15] = CLIP(((23 * src[15] + 7 * ((src[16]<<1) - src[14]) + 3 * src[13] - src[12] + round_add) >> 5), 0, 255);
651
652 h_ptr += stride;
653 src += stride;
654 }
655
656 interpolate16x16_lowpass_v_c(dst1, dst2, stride, rounding);
657
658 }
659
interpolate8x8_lowpass_hv_c(uint8_t * dst1,uint8_t * dst2,uint8_t * src,int32_t stride,int32_t rounding)660 void interpolate8x8_lowpass_hv_c(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int32_t stride, int32_t rounding)
661 {
662 int32_t i;
663 uint8_t round_add = 16 - rounding;
664 uint8_t *h_ptr = dst2;
665
666 for(i = 0; i < 9; i++)
667 {
668
669 h_ptr[0] = CLIP(((7 * ((src[0]<<1) - src[2]) + 23 * src[1] + 3 * src[3] - src[4] + round_add) >> 5), 0, 255);
670 h_ptr[1] = CLIP(((19 * src[1] + 20 * src[2] - src[5] + 3 * (src[4] - src[0] - (src[3]<<1)) + round_add) >> 5), 0, 255);
671 h_ptr[2] = CLIP(((20 * (src[2] + src[3]) + (src[0]<<1) + 3 * (src[5] - ((src[1] + src[4])<<1)) - src[6] + round_add) >> 5), 0, 255);
672 h_ptr[3] = CLIP(((20 * (src[3] + src[4]) + 3 * ((src[6] + src[1]) - ((src[2] + src[5])<<1)) - (src[0] + src[7]) + round_add) >> 5), 0, 255);
673 h_ptr[4] = CLIP(((20 * (src[4] + src[5]) - 3 * (((src[3] + src[6])<<1) - (src[2] + src[7])) - (src[1] + src[8]) + round_add) >> 5), 0, 255);
674 h_ptr[5] = CLIP(((20 * (src[5] + src[6]) + (src[8]<<1) + 3 * (src[3] - ((src[4] + src[7]) << 1)) - src[2] + round_add) >> 5), 0, 255);
675 h_ptr[6] = CLIP(((19 * src[7] + 20 * src[6] + 3 * (src[4] - src[8] - (src[5] << 1)) - src[3] + round_add) >> 5), 0, 255);
676 h_ptr[7] = CLIP(((23 * src[7] + 7 * ((src[8]<<1) - src[6]) + 3 * src[5] - src[4] + round_add) >> 5), 0, 255);
677
678 h_ptr += stride;
679 src += stride;
680 }
681
682 interpolate8x8_lowpass_v_c(dst1, dst2, stride, rounding);
683
684 }
685