1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Altivec 8bit<->16bit transfer -
5 *
6 * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch>
7 *
8 * This program is free software ; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation ; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program ; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * $Id: mem_transfer_altivec.c 1985 2011-05-18 09:02:35Z Isibaar $
23 *
24 ****************************************************************************/
25
26 #ifdef HAVE_ALTIVEC_H
27 #include <altivec.h>
28 #endif
29
30 #include "../../portab.h"
31
32
33 /* Turn this on if you like debugging the alignment */
34 #undef DEBUG
35
36 #include <stdio.h>
37
38 /* This function assumes:
39 * dst: 16 byte aligned
40 */
41
42 #define COPY8TO16() \
43 s = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\
44 vec_st((vector signed short)vec_mergeh(zerovec,s),0,dst);\
45 src += stride;\
46 dst += 8
47
48 void
transfer_8to16copy_altivec_c(int16_t * dst,uint8_t * src,uint32_t stride)49 transfer_8to16copy_altivec_c(int16_t *dst,
50 uint8_t * src,
51 uint32_t stride)
52 {
53 register vector unsigned char s;
54 register vector unsigned char zerovec;
55
56 #ifdef DEBUG
57 /* Check the alignment */
58 if((long)dst & 0xf)
59 fprintf(stderr, "transfer_8to16copy_altivec_c:incorrect align, dst: %lx\n", (long)dst);
60 #endif
61
62 /* initialization */
63 zerovec = vec_splat_u8(0);
64
65 COPY8TO16();
66 COPY8TO16();
67 COPY8TO16();
68 COPY8TO16();
69
70 COPY8TO16();
71 COPY8TO16();
72 COPY8TO16();
73 COPY8TO16();
74 }
75
76
77 /*
78 * This function assumes dst is 8 byte aligned and stride is a multiple of 8
79 * src may be unaligned
80 */
81
82 #define COPY16TO8() \
83 s = vec_perm(src[0], src[1], load_src_perm); \
84 packed = vec_packsu(s, vec_splat_s16(0)); \
85 mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \
86 packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \
87 packed = vec_sel(packed, vec_ld(0, dst), mask); \
88 vec_st(packed, 0, dst); \
89 src++; \
90 dst += stride
91
transfer_16to8copy_altivec_c(uint8_t * dst,vector signed short * src,uint32_t stride)92 void transfer_16to8copy_altivec_c(uint8_t *dst,
93 vector signed short *src,
94 uint32_t stride)
95 {
96 register vector signed short s;
97 register vector unsigned char packed;
98 register vector unsigned char mask_stencil;
99 register vector unsigned char mask;
100 register vector unsigned char load_src_perm;
101
102 #ifdef DEBUG
103 /* if this is on, print alignment errors */
104 if(((unsigned long) dst) & 0x7)
105 fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx\n", (long)dst);
106 if(stride & 0x7)
107 fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride);
108 #endif
109 /* Initialisation stuff */
110 load_src_perm = vec_lvsl(0, (unsigned char*)src);
111 mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
112
113 COPY16TO8();
114 COPY16TO8();
115 COPY16TO8();
116 COPY16TO8();
117
118 COPY16TO8();
119 COPY16TO8();
120 COPY16TO8();
121 COPY16TO8();
122 }
123
124
125
126 /*
127 * This function assumes dst is 8 byte aligned and src is unaligned. Stride has
128 * to be a multiple of 8
129 */
130
131 #define COPY8TO8() \
132 tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \
133 t0 = vec_perm(tmp, tmp, vec_lvsl(0, dst));\
134 t1 = vec_perm(mask, mask, vec_lvsl(0, dst));\
135 tmp = vec_sel(t0, vec_ld(0, dst), t1);\
136 vec_st(tmp, 0, dst);\
137 dst += stride;\
138 src += stride
139
140 void
transfer8x8_copy_altivec_c(uint8_t * dst,uint8_t * src,uint32_t stride)141 transfer8x8_copy_altivec_c( uint8_t * dst,
142 uint8_t * src,
143 uint32_t stride)
144 {
145 register vector unsigned char tmp;
146 register vector unsigned char mask;
147 register vector unsigned char t0, t1;
148
149 #ifdef DEBUG
150 if(((unsigned long)dst) & 0x7)
151 fprintf(stderr, "transfer8x8_copy_altivec:incorrect align, dst: %lx\n", (long)dst);
152 if(stride & 0x7)
153 fprintf(stderr, "transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride);
154 #endif
155 mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
156
157 COPY8TO8();
158 COPY8TO8();
159 COPY8TO8();
160 COPY8TO8();
161
162 COPY8TO8();
163 COPY8TO8();
164 COPY8TO8();
165 COPY8TO8();
166 }
167
168
169 #define SUB8TO16() \
170 c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
171 r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
172 cs = (vector signed short)vec_mergeh(ox00,c);\
173 rs = (vector signed short)vec_mergeh(ox00,r);\
174 \
175 c = vec_lvsr(0,cur);\
176 mask = vec_perm(mask_00ff, mask_00ff, c);\
177 r = vec_perm(r, r, c);\
178 r = vec_sel(r, vec_ld(0,cur), mask);\
179 vec_st(r,0,cur);\
180 vec_st( vec_sub(cs,rs), 0, dct );\
181 \
182 dct += 8;\
183 cur += stride;\
184 ref += stride
185
186
187 /* This function assumes:
188 * dct: 16 Byte aligned
189 * cur: 8 Byte aligned
190 * stride: multiple of 8
191 */
192
193 void
transfer_8to16sub_altivec_c(int16_t * dct,uint8_t * cur,uint8_t * ref,const uint32_t stride)194 transfer_8to16sub_altivec_c(int16_t * dct,
195 uint8_t * cur,
196 uint8_t * ref,
197 const uint32_t stride)
198 {
199 register vector unsigned char c,r;
200 register vector unsigned char ox00;
201 register vector unsigned char mask_00ff;
202 register vector unsigned char mask;
203 register vector signed short cs,rs;
204
205 #ifdef DEBUG
206 if((long)dct & 0xf)
207 fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx\n", (long)dct);
208 if((long)cur & 0x7)
209 fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx\n", (long)cur);
210 if(stride & 0x7)
211 fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu\n", (long)stride);
212 #endif
213 /* initialize */
214 ox00 = vec_splat_u8(0);
215 mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1));
216
217 SUB8TO16();
218 SUB8TO16();
219 SUB8TO16();
220 SUB8TO16();
221
222 SUB8TO16();
223 SUB8TO16();
224 SUB8TO16();
225 SUB8TO16();
226 }
227
228
229 #define SUBRO8TO16() \
230 c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\
231 r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\
232 cs = (vector signed short)vec_mergeh(z,c);\
233 rs = (vector signed short)vec_mergeh(z,r);\
234 vec_st( vec_sub(cs,rs), 0, dct );\
235 dct += 8;\
236 cur += stride;\
237 ref += stride
238
239
240 /* This function assumes:
241 * dct: 16 Byte aligned
242 */
243
244 void
transfer_8to16subro_altivec_c(int16_t * dct,const uint8_t * cur,const uint8_t * ref,const uint32_t stride)245 transfer_8to16subro_altivec_c(int16_t * dct,
246 const uint8_t * cur,
247 const uint8_t * ref,
248 const uint32_t stride)
249 {
250 register vector unsigned char c;
251 register vector unsigned char r;
252 register vector unsigned char z;
253 register vector signed short cs;
254 register vector signed short rs;
255
256 #ifdef DEBUG
257 /* Check the alignment assumptions if this is on */
258 if((long)dct & 0xf)
259 fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %lx\n", (long)dct);
260 #endif
261 /* initialize */
262 z = vec_splat_u8(0);
263
264 SUBRO8TO16();
265 SUBRO8TO16();
266 SUBRO8TO16();
267 SUBRO8TO16();
268
269 SUBRO8TO16();
270 SUBRO8TO16();
271 SUBRO8TO16();
272 SUBRO8TO16();
273 }
274
275 /*
276 * This function assumes:
277 * dct: 16 bytes alignment
278 * cur: 8 bytes alignment
279 * ref1: unaligned
280 * ref2: unaligned
281 * stride: multiple of 8
282 */
283
284 #define SUB28TO16() \
285 r1 = vec_perm(vec_ld(0, ref1), vec_ld(16, ref1), vec_lvsl(0, ref1)); \
286 r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \
287 c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \
288 r = vec_avg(r1, r2); \
289 cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \
290 rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \
291 c = vec_perm(mask, mask, vec_lvsl(0, cur));\
292 r = vec_sel(r, vec_ld(0, cur), c);\
293 vec_st(r, 0, cur); \
294 *dct++ = vec_sub(cs, rs); \
295 cur += stride; \
296 ref1 += stride; \
297 ref2 += stride
298
299 void
transfer_8to16sub2_altivec_c(vector signed short * dct,uint8_t * cur,uint8_t * ref1,uint8_t * ref2,const uint32_t stride)300 transfer_8to16sub2_altivec_c(vector signed short *dct,
301 uint8_t *cur,
302 uint8_t *ref1,
303 uint8_t *ref2,
304 const uint32_t stride)
305 {
306 vector unsigned char r1;
307 vector unsigned char r2;
308 vector unsigned char r;
309 vector unsigned char c;
310 vector unsigned char mask;
311 vector signed short cs;
312 vector signed short rs;
313
314 #ifdef DEBUG
315 /* Dump alignment erros if DEBUG is set */
316 if(((unsigned long)dct) & 0xf)
317 fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %lx\n", (long)dct);
318 if(((unsigned long)cur) & 0x7)
319 fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %lx\n", (long)cur);
320 if(stride & 0x7)
321 fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride);
322 #endif
323
324 /* Initialisation */
325 mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));
326
327 SUB28TO16();
328 SUB28TO16();
329 SUB28TO16();
330 SUB28TO16();
331
332 SUB28TO16();
333 SUB28TO16();
334 SUB28TO16();
335 SUB28TO16();
336 }
337
338
339
340 /*
341 * This function assumes:
342 * dst: 8 byte aligned
343 * src: unaligned
344 * stride: multiple of 8
345 */
346
347 #define ADD16TO8() \
348 s = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \
349 d = vec_perm(vec_ld(0, dst), vec_ld(16, dst), vec_lvsl(0, dst)); \
350 ds = (vector signed short)vec_mergeh(vec_splat_u8(0), d); \
351 ds = vec_add(ds, s); \
352 packed = vec_packsu(ds, vec_splat_s16(0)); \
353 mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \
354 mask = vec_perm(mask, mask, vec_lvsl(0, dst)); \
355 packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \
356 packed = vec_sel(packed, vec_ld(0, dst), mask); \
357 vec_st(packed, 0, dst); \
358 src += 8; \
359 dst += stride
360
361 void
transfer_16to8add_altivec_c(uint8_t * dst,int16_t * src,uint32_t stride)362 transfer_16to8add_altivec_c(uint8_t *dst,
363 int16_t *src,
364 uint32_t stride)
365 {
366 vector signed short s;
367 vector signed short ds;
368 vector unsigned char d;
369 vector unsigned char packed;
370 vector unsigned char mask;
371
372 #ifdef DEBUG
373 /* if this is set, dump alignment errors */
374 if(((unsigned long)dst) & 0x7)
375 fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %lx\n", (long)dst);
376 if(stride & 0x7)
377 fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride);
378 #endif
379
380 ADD16TO8();
381 ADD16TO8();
382 ADD16TO8();
383 ADD16TO8();
384
385 ADD16TO8();
386 ADD16TO8();
387 ADD16TO8();
388 ADD16TO8();
389 }
390