1 /*
2 	decode.c: decoding samples...
3 
4 	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Michael Hipp
7 	altivec optimization by tmkk
8 */
9 
10 #include "mpg123lib_intern.h"
11 
12 #ifndef __APPLE__
13 #include <altivec.h>
14 #endif
15 
16 /* A macro for normal synth functions */
17 #define SYNTH_ALTIVEC(B0STEP) \
18 	v1 = vec_ld(0,window); \
19 	v2 = vec_ld(16,window); \
20 	v3 = vec_ld(32,window); \
21 	v4 = vec_ld(48,window); \
22 	v5 = vec_ld(64,window); \
23 	v1 = vec_perm(v1,v2,vperm1); \
24 	v6 = vec_ld(0,b0); \
25 	v2 = vec_perm(v2,v3,vperm1); \
26 	v7 = vec_ld(16,b0); \
27 	v3 = vec_perm(v3,v4,vperm1); \
28 	v8 = vec_ld(32,b0); \
29 	v4 = vec_perm(v4,v5,vperm1); \
30 	v9 = vec_ld(48,b0); \
31 	 \
32 	vsum = vec_madd(v1,v6,vzero); \
33 	vsum = vec_madd(v2,v7,vsum); \
34 	vsum = vec_madd(v3,v8,vsum); \
35 	vsum = vec_madd(v4,v9,vsum); \
36 	 \
37 	window += 32; \
38 	b0 += B0STEP; \
39 	 \
40 	v1 = vec_ld(0,window); \
41 	v2 = vec_ld(16,window); \
42 	v3 = vec_ld(32,window); \
43 	v4 = vec_ld(48,window); \
44 	v5 = vec_ld(64,window); \
45 	v1 = vec_perm(v1,v2,vperm1); \
46 	v6 = vec_ld(0,b0); \
47 	v2 = vec_perm(v2,v3,vperm1); \
48 	v7 = vec_ld(16,b0); \
49 	v3 = vec_perm(v3,v4,vperm1); \
50 	v8 = vec_ld(32,b0); \
51 	v4 = vec_perm(v4,v5,vperm1); \
52 	v9 = vec_ld(48,b0); \
53 	 \
54 	vsum2 = vec_madd(v1,v6,vzero); \
55 	vsum2 = vec_madd(v2,v7,vsum2); \
56 	vsum2 = vec_madd(v3,v8,vsum2); \
57 	vsum2 = vec_madd(v4,v9,vsum2); \
58 	 \
59 	window += 32; \
60 	b0 += B0STEP; \
61 	 \
62 	v1 = vec_ld(0,window); \
63 	v2 = vec_ld(16,window); \
64 	v3 = vec_ld(32,window); \
65 	v4 = vec_ld(48,window); \
66 	v5 = vec_ld(64,window); \
67 	v1 = vec_perm(v1,v2,vperm1); \
68 	v6 = vec_ld(0,b0); \
69 	v2 = vec_perm(v2,v3,vperm1); \
70 	v7 = vec_ld(16,b0); \
71 	v3 = vec_perm(v3,v4,vperm1); \
72 	v8 = vec_ld(32,b0); \
73 	v4 = vec_perm(v4,v5,vperm1); \
74 	v9 = vec_ld(48,b0); \
75 	 \
76 	vsum3 = vec_madd(v1,v6,vzero); \
77 	vsum3 = vec_madd(v2,v7,vsum3); \
78 	vsum3 = vec_madd(v3,v8,vsum3); \
79 	vsum3 = vec_madd(v4,v9,vsum3); \
80 	 \
81 	window += 32; \
82 	b0 += B0STEP; \
83 	 \
84 	v1 = vec_ld(0,window); \
85 	v2 = vec_ld(16,window); \
86 	v3 = vec_ld(32,window); \
87 	v4 = vec_ld(48,window); \
88 	v5 = vec_ld(64,window); \
89 	v1 = vec_perm(v1,v2,vperm1); \
90 	v6 = vec_ld(0,b0); \
91 	v2 = vec_perm(v2,v3,vperm1); \
92 	v7 = vec_ld(16,b0); \
93 	v3 = vec_perm(v3,v4,vperm1); \
94 	v8 = vec_ld(32,b0); \
95 	v4 = vec_perm(v4,v5,vperm1); \
96 	v9 = vec_ld(48,b0); \
97 	 \
98 	vsum4 = vec_madd(v1,v6,vzero); \
99 	vsum4 = vec_madd(v2,v7,vsum4); \
100 	vsum4 = vec_madd(v3,v8,vsum4); \
101 	vsum4 = vec_madd(v4,v9,vsum4); \
102 	 \
103 	window += 32; \
104 	b0 += B0STEP; \
105 	 \
106 	v1 = vec_mergeh(vsum,vsum3); \
107 	v2 = vec_mergeh(vsum2,vsum4); \
108 	v3 = vec_mergel(vsum,vsum3); \
109 	v4 = vec_mergel(vsum2,vsum4); \
110 	v5 = vec_mergeh(v1,v2); \
111 	v6 = vec_mergel(v1,v2); \
112 	v7 = vec_mergeh(v3,v4); \
113 	v8 = vec_mergel(v3,v4);
114 
115 /* A macro for stereo synth functions */
116 #define SYNTH_STEREO_ALTIVEC(B0STEP) \
117 	v1 = vec_ld(0,window); \
118 	v2 = vec_ld(16,window); \
119 	v3 = vec_ld(32,window); \
120 	v4 = vec_ld(48,window); \
121 	v5 = vec_ld(64,window); \
122 	v1 = vec_perm(v1,v2,vperm1); \
123 	v6 = vec_ld(0,b0l); \
124 	v10 = vec_ld(0,b0r); \
125 	v2 = vec_perm(v2,v3,vperm1); \
126 	v7 = vec_ld(16,b0l); \
127 	v11 = vec_ld(16,b0r); \
128 	v3 = vec_perm(v3,v4,vperm1); \
129 	v8 = vec_ld(32,b0l); \
130 	v12 = vec_ld(32,b0r); \
131 	v4 = vec_perm(v4,v5,vperm1); \
132 	v9 = vec_ld(48,b0l); \
133 	v13 = vec_ld(48,b0r); \
134 	 \
135 	vsum = vec_madd(v1,v6,vzero); \
136 	vsum5 = vec_madd(v1,v10,vzero); \
137 	vsum = vec_madd(v2,v7,vsum); \
138 	vsum5 = vec_madd(v2,v11,vsum5); \
139 	vsum = vec_madd(v3,v8,vsum); \
140 	vsum5 = vec_madd(v3,v12,vsum5); \
141 	vsum = vec_madd(v4,v9,vsum); \
142 	vsum5 = vec_madd(v4,v13,vsum5); \
143 	 \
144 	window += 32; \
145 	b0l += B0STEP; \
146 	b0r += B0STEP; \
147 	 \
148 	v1 = vec_ld(0,window); \
149 	v2 = vec_ld(16,window); \
150 	v3 = vec_ld(32,window); \
151 	v4 = vec_ld(48,window); \
152 	v5 = vec_ld(64,window); \
153 	v1 = vec_perm(v1,v2,vperm1); \
154 	v6 = vec_ld(0,b0l); \
155 	v10 = vec_ld(0,b0r); \
156 	v2 = vec_perm(v2,v3,vperm1); \
157 	v7 = vec_ld(16,b0l); \
158 	v11 = vec_ld(16,b0r); \
159 	v3 = vec_perm(v3,v4,vperm1); \
160 	v8 = vec_ld(32,b0l); \
161 	v12 = vec_ld(32,b0r); \
162 	v4 = vec_perm(v4,v5,vperm1); \
163 	v9 = vec_ld(48,b0l); \
164 	v13 = vec_ld(48,b0r); \
165 	 \
166 	vsum2 = vec_madd(v1,v6,vzero); \
167 	vsum6 = vec_madd(v1,v10,vzero); \
168 	vsum2 = vec_madd(v2,v7,vsum2); \
169 	vsum6 = vec_madd(v2,v11,vsum6); \
170 	vsum2 = vec_madd(v3,v8,vsum2); \
171 	vsum6 = vec_madd(v3,v12,vsum6); \
172 	vsum2 = vec_madd(v4,v9,vsum2); \
173 	vsum6 = vec_madd(v4,v13,vsum6); \
174 	 \
175 	window += 32; \
176 	b0l += B0STEP; \
177 	b0r += B0STEP; \
178 	 \
179 	v1 = vec_ld(0,window); \
180 	v2 = vec_ld(16,window); \
181 	v3 = vec_ld(32,window); \
182 	v4 = vec_ld(48,window); \
183 	v5 = vec_ld(64,window); \
184 	v1 = vec_perm(v1,v2,vperm1); \
185 	v6 = vec_ld(0,b0l); \
186 	v10 = vec_ld(0,b0r); \
187 	v2 = vec_perm(v2,v3,vperm1); \
188 	v7 = vec_ld(16,b0l); \
189 	v11 = vec_ld(16,b0r); \
190 	v3 = vec_perm(v3,v4,vperm1); \
191 	v8 = vec_ld(32,b0l); \
192 	v12 = vec_ld(32,b0r); \
193 	v4 = vec_perm(v4,v5,vperm1); \
194 	v9 = vec_ld(48,b0l); \
195 	v13 = vec_ld(48,b0r); \
196 	 \
197 	vsum3 = vec_madd(v1,v6,vzero); \
198 	vsum7 = vec_madd(v1,v10,vzero); \
199 	vsum3 = vec_madd(v2,v7,vsum3); \
200 	vsum7 = vec_madd(v2,v11,vsum7); \
201 	vsum3 = vec_madd(v3,v8,vsum3); \
202 	vsum7 = vec_madd(v3,v12,vsum7); \
203 	vsum3 = vec_madd(v4,v9,vsum3); \
204 	vsum7 = vec_madd(v4,v13,vsum7); \
205 	 \
206 	window += 32; \
207 	b0l += B0STEP; \
208 	b0r += B0STEP; \
209 	 \
210 	v1 = vec_ld(0,window); \
211 	v2 = vec_ld(16,window); \
212 	v3 = vec_ld(32,window); \
213 	v4 = vec_ld(48,window); \
214 	v5 = vec_ld(64,window); \
215 	v1 = vec_perm(v1,v2,vperm1); \
216 	v6 = vec_ld(0,b0l); \
217 	v10 = vec_ld(0,b0r); \
218 	v2 = vec_perm(v2,v3,vperm1); \
219 	v7 = vec_ld(16,b0l); \
220 	v11 = vec_ld(16,b0r); \
221 	v3 = vec_perm(v3,v4,vperm1); \
222 	v8 = vec_ld(32,b0l); \
223 	v12 = vec_ld(32,b0r); \
224 	v4 = vec_perm(v4,v5,vperm1); \
225 	v9 = vec_ld(48,b0l); \
226 	v13 = vec_ld(48,b0r); \
227 	 \
228 	vsum4 = vec_madd(v1,v6,vzero); \
229 	vsum8 = vec_madd(v1,v10,vzero); \
230 	vsum4 = vec_madd(v2,v7,vsum4); \
231 	vsum8 = vec_madd(v2,v11,vsum8); \
232 	vsum4 = vec_madd(v3,v8,vsum4); \
233 	vsum8 = vec_madd(v3,v12,vsum8); \
234 	vsum4 = vec_madd(v4,v9,vsum4); \
235 	vsum8 = vec_madd(v4,v13,vsum8); \
236 	 \
237 	window += 32; \
238 	b0l += B0STEP; \
239 	b0r += B0STEP; \
240 	 \
241 	v1 = vec_mergeh(vsum,vsum3); \
242 	v5 = vec_mergeh(vsum5,vsum7); \
243 	v2 = vec_mergeh(vsum2,vsum4); \
244 	v6 = vec_mergeh(vsum6,vsum8); \
245 	v3 = vec_mergel(vsum,vsum3); \
246 	v7 = vec_mergel(vsum5,vsum7); \
247 	v4 = vec_mergel(vsum2,vsum4); \
248 	v8 = vec_mergel(vsum6,vsum8); \
249 	vsum = vec_mergeh(v1,v2); \
250 	vsum5 = vec_mergeh(v5,v6); \
251 	vsum2 = vec_mergel(v1,v2); \
252 	vsum6 = vec_mergel(v5,v6); \
253 	vsum3 = vec_mergeh(v3,v4); \
254 	vsum7 = vec_mergeh(v7,v8); \
255 	vsum4 = vec_mergel(v3,v4); \
256 	vsum8 = vec_mergel(v7,v8);
257 
258 int synth_1to1_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
259 {
260 	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
261 
262 	real *b0, **buf;
263 	int clip;
264 	int bo1;
265 #ifndef NO_EQUALIZER
266 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
267 #endif
268 	if(!channel)
269 	{
270 		fr->bo--;
271 		fr->bo &= 0xf;
272 		buf = fr->real_buffs[0];
273 	}
274 	else
275 	{
276 		samples++;
277 		buf = fr->real_buffs[1];
278 	}
279 
280 	if(fr->bo & 0x1)
281 	{
282 		b0 = buf[0];
283 		bo1 = fr->bo;
284 		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
285 	}
286 	else
287 	{
288 		b0 = buf[1];
289 		bo1 = fr->bo+1;
290 		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
291 	}
292 
293 
294 	{
295 		register int j;
296 		real *window = fr->decwin + 16 - bo1;
297 
298 		ALIGNED(16) int clip_tmp[4];
299 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
300 		vector unsigned char vperm1,vperm2,vperm3,vperm4;
301 		vector float vsum,vsum2,vsum3,vsum4,vmin,vmax,vzero;
302 		vector signed int vclip;
303 		vector signed short vsample1,vsample2;
304 		vector unsigned int vshift;
305 		vclip = vec_xor(vclip,vclip);
306 		vzero = vec_xor(vzero,vzero);
307 		vshift = vec_splat_u32(-1); /* 31 */
308 #ifdef __APPLE__
309 		vmax = (vector float)(32767.0f);
310 		vmin = (vector float)(-32768.0f);
311 		vperm4 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);
312 #else
313 		vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
314 		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
315 		vperm4 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};
316 #endif
317 
318 		vperm1 = vec_lvsl(0,window);
319 		vperm2 = vec_lvsl(0,samples);
320 		vperm3 = vec_lvsr(0,samples);
321 		for (j=4;j;j--)
322 		{
323 			SYNTH_ALTIVEC(16);
324 
325 			vsum = vec_sub(v5,v6);
326 			v9 = vec_sub(v7,v8);
327 			vsum = vec_add(vsum,v9);
328 
329 			v3 = vec_round(vsum);
330 			v3 = (vector float)vec_cts(v3,0);
331 			v1 = (vector float)vec_cmpgt(vsum,vmax);
332 			v2 = (vector float)vec_cmplt(vsum,vmin);
333 			vsample1 = vec_ld(0,samples);
334 			vsample2 = vec_ld(15,samples);
335 			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
336 			v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
337 			v5 = (vector float)vec_perm(v3,v4,vperm4);
338 			v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
339 			v7 = (vector float)vec_perm(v5,v6,vperm3);
340 			v8 = (vector float)vec_perm(v6,v5,vperm3);
341 			vec_st((vector signed short)v7,15,samples);
342 			vec_st((vector signed short)v8,0,samples);
343 			samples += 8;
344 
345 			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
346 			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
347 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
348 			vclip = vec_sums((vector signed int)v1,vclip);
349 		}
350 
351 		for (j=4;j;j--)
352 		{
353 			SYNTH_ALTIVEC(-16);
354 
355 			vsum = vec_add(v5,v6);
356 			v9 = vec_add(v7,v8);
357 			vsum = vec_add(vsum,v9);
358 
359 			v3 = vec_round(vsum);
360 			v3 = (vector float)vec_cts(v3,0);
361 			v1 = (vector float)vec_cmpgt(vsum,vmax);
362 			v2 = (vector float)vec_cmplt(vsum,vmin);
363 			vsample1 = vec_ld(0,samples);
364 			vsample2 = vec_ld(15,samples);
365 			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
366 			v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
367 			v5 = (vector float)vec_perm(v3,v4,vperm4);
368 			v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
369 			v7 = (vector float)vec_perm(v5,v6,vperm3);
370 			v8 = (vector float)vec_perm(v6,v5,vperm3);
371 			vec_st((vector signed short)v7,15,samples);
372 			vec_st((vector signed short)v8,0,samples);
373 			samples += 8;
374 
375 			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
376 			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
377 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
378 			vclip = vec_sums((vector signed int)v1,vclip);
379 		}
380 
381 		vec_st(vclip,0,clip_tmp);
382 		clip = clip_tmp[3];
383 	}
384 	if(final) fr->buffer.fill += 128;
385 
386 	return clip;
387 }
388 
389 int synth_1to1_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
390 {
391 	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
392 
393 	real *b0l, *b0r, **bufl, **bufr;
394 	int clip;
395 	int bo1;
396 #ifndef NO_EQUALIZER
397 	if(fr->have_eq_settings)
398 	{
399 		do_equalizer(bandPtr_l,0,fr->equalizer);
400 		do_equalizer(bandPtr_r,1,fr->equalizer);
401 	}
402 #endif
403 	fr->bo--;
404 	fr->bo &= 0xf;
405 	bufl = fr->real_buffs[0];
406 	bufr = fr->real_buffs[1];
407 
408 	if(fr->bo & 0x1)
409 	{
410 		b0l = bufl[0];
411 		b0r = bufr[0];
412 		bo1 = fr->bo;
413 		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
414 		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
415 	}
416 	else
417 	{
418 		b0l = bufl[1];
419 		b0r = bufr[1];
420 		bo1 = fr->bo+1;
421 		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
422 		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
423 	}
424 
425 
426 	{
427 		register int j;
428 		real *window = fr->decwin + 16 - bo1;
429 
430 		ALIGNED(16) int clip_tmp[4];
431 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
432 		vector unsigned char vperm1,vperm2;
433 		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmin,vmax,vzero;
434 		vector signed int vclip;
435 		vector unsigned int vshift;
436 		vector signed short vprev;
437 		vclip = vec_xor(vclip,vclip);
438 		vzero = vec_xor(vzero,vzero);
439 		vshift = vec_splat_u32(-1); /* 31 */
440 #ifdef __APPLE__
441 		vmax = (vector float)(32767.0f);
442 		vmin = (vector float)(-32768.0f);
443 #else
444 		vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
445 		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
446 #endif
447 
448 		vperm1 = vec_lvsl(0,window);
449 		vperm2 = vec_lvsr(0,samples);
450 		vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
451 		for (j=4;j;j--)
452 		{
453 			SYNTH_STEREO_ALTIVEC(16);
454 
455 			vsum = vec_sub(vsum,vsum2);
456 			vsum2 = vec_sub(vsum5,vsum6);
457 			vsum3 = vec_sub(vsum3,vsum4);
458 			vsum4 = vec_sub(vsum7,vsum8);
459 			vsum = vec_add(vsum,vsum3);
460 			vsum2 = vec_add(vsum2,vsum4);
461 
462 			v1 = vec_round(vsum);
463 			v2 = vec_round(vsum2);
464 			v1 = (vector float)vec_cts(v1,0);
465 			v2 = (vector float)vec_cts(v2,0);
466 			v3 = vec_mergeh(v1, v2);
467 			v4 = vec_mergel(v1, v2);
468 			v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
469 			v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
470 			vprev = (vector signed short)v5;
471 			v1 = (vector float)vec_cmpgt(vsum,vmax);
472 			v2 = (vector float)vec_cmplt(vsum,vmin);
473 			v3 = (vector float)vec_cmpgt(vsum2,vmax);
474 			v4 = (vector float)vec_cmplt(vsum2,vmin);
475 			vec_st((vector signed short)v6,0,samples);
476 			samples += 8;
477 
478 			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
479 			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
480 			v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
481 			v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
482 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
483 			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
484 			vclip = vec_sums((vector signed int)v1,vclip);
485 			vclip = vec_sums((vector signed int)v2,vclip);
486 		}
487 
488 		for (j=4;j;j--)
489 		{
490 			SYNTH_STEREO_ALTIVEC(-16);
491 
492 			vsum = vec_add(vsum,vsum2);
493 			vsum2 = vec_add(vsum5,vsum6);
494 			vsum3 = vec_add(vsum3,vsum4);
495 			vsum4 = vec_add(vsum7,vsum8);
496 			vsum = vec_add(vsum,vsum3);
497 			vsum2 = vec_add(vsum2,vsum4);
498 
499 			v1 = vec_round(vsum);
500 			v2 = vec_round(vsum2);
501 			v1 = (vector float)vec_cts(v1,0);
502 			v2 = (vector float)vec_cts(v2,0);
503 			v3 = vec_mergeh(v1, v2);
504 			v4 = vec_mergel(v1, v2);
505 			v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
506 			v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
507 			vprev = (vector signed short)v5;
508 			v1 = (vector float)vec_cmpgt(vsum,vmax);
509 			v2 = (vector float)vec_cmplt(vsum,vmin);
510 			v3 = (vector float)vec_cmpgt(vsum2,vmax);
511 			v4 = (vector float)vec_cmplt(vsum2,vmin);
512 			vec_st((vector signed short)v6,0,samples);
513 			samples += 8;
514 
515 			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
516 			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
517 			v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
518 			v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
519 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
520 			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
521 			vclip = vec_sums((vector signed int)v1,vclip);
522 			vclip = vec_sums((vector signed int)v2,vclip);
523 		}
524 
525 		if((size_t)samples & 0xf)
526 		{
527 			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
528 			v2 = (vector float)vec_perm(vprev,(vector signed short)v1,vperm2);
529 			vec_st((vector signed short)v2,0,samples);
530 		}
531 
532 		vec_st(vclip,0,clip_tmp);
533 		clip = clip_tmp[3];
534 	}
535 	fr->buffer.fill += 128;
536 
537 	return clip;
538 }
539 
540 int synth_1to1_real_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
541 {
542 	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
543 
544 	real *b0, **buf;
545 	int bo1;
546 #ifndef NO_EQUALIZER
547 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
548 #endif
549 	if(!channel)
550 	{
551 		fr->bo--;
552 		fr->bo &= 0xf;
553 		buf = fr->real_buffs[0];
554 	}
555 	else
556 	{
557 		samples++;
558 		buf = fr->real_buffs[1];
559 	}
560 
561 	if(fr->bo & 0x1)
562 	{
563 		b0 = buf[0];
564 		bo1 = fr->bo;
565 		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
566 	}
567 	else
568 	{
569 		b0 = buf[1];
570 		bo1 = fr->bo+1;
571 		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
572 	}
573 
574 
575 	{
576 		register int j;
577 		real *window = fr->decwin + 16 - bo1;
578 
579 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
580 		vector unsigned char vperm1,vperm2,vperm3,vperm4, vperm5;
581 		vector float vsum,vsum2,vsum3,vsum4,vscale,vzero;
582 		vector float vsample1,vsample2,vsample3;
583 		vzero = vec_xor(vzero, vzero);
584 #ifdef __APPLE__
585 		vscale = (vector float)(1.0f/32768.0f);
586 		vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
587 		vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
588 #else
589 		vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
590 		vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
591 		vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
592 #endif
593 
594 		vperm1 = vec_lvsl(0,window);
595 		vperm2 = vec_lvsl(0,samples);
596 		vperm3 = vec_lvsr(0,samples);
597 		for (j=4;j;j--)
598 		{
599 			SYNTH_ALTIVEC(16);
600 
601 			vsum = vec_sub(v5,v6);
602 			v9 = vec_sub(v7,v8);
603 			vsum = vec_add(vsum,v9);
604 			vsum = vec_madd(vsum, vscale, vzero);
605 
606 			vsample1 = vec_ld(0,samples);
607 			vsample2 = vec_ld(16,samples);
608 			vsample3 = vec_ld(31,samples);
609 			v1 = vec_perm(vsample1, vsample2, vperm2);
610 			v2 = vec_perm(vsample2, vsample3, vperm2);
611 			v1 = vec_perm(vsum, v1, vperm4);
612 			v2 = vec_perm(vsum, v2, vperm5);
613 			v3 = vec_perm(vsample3, vsample2, vperm2);
614 			v4 = vec_perm(vsample2, vsample1, vperm2);
615 			v5 = vec_perm(v2, v3, vperm3);
616 			v6 = vec_perm(v1, v2, vperm3);
617 			v7 = vec_perm(v4, v1, vperm3);
618 			vec_st(v5,31,samples);
619 			vec_st(v6,16,samples);
620 			vec_st(v7,0,samples);
621 			samples += 8;
622 		}
623 
624 		for (j=4;j;j--)
625 		{
626 			SYNTH_ALTIVEC(-16);
627 
628 			vsum = vec_add(v5,v6);
629 			v9 = vec_add(v7,v8);
630 			vsum = vec_add(vsum,v9);
631 			vsum = vec_madd(vsum, vscale, vzero);
632 
633 			vsample1 = vec_ld(0,samples);
634 			vsample2 = vec_ld(16,samples);
635 			vsample3 = vec_ld(31,samples);
636 			v1 = vec_perm(vsample1, vsample2, vperm2);
637 			v2 = vec_perm(vsample2, vsample3, vperm2);
638 			v1 = vec_perm(vsum, v1, vperm4);
639 			v2 = vec_perm(vsum, v2, vperm5);
640 			v3 = vec_perm(vsample3, vsample2, vperm2);
641 			v4 = vec_perm(vsample2, vsample1, vperm2);
642 			v5 = vec_perm(v2, v3, vperm3);
643 			v6 = vec_perm(v1, v2, vperm3);
644 			v7 = vec_perm(v4, v1, vperm3);
645 			vec_st(v5,31,samples);
646 			vec_st(v6,16,samples);
647 			vec_st(v7,0,samples);
648 			samples += 8;
649 		}
650 	}
651 	if(final) fr->buffer.fill += 256;
652 
653 	return 0;
654 }
655 
656 int synth_1to1_fltst_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
657 {
658 	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
659 
660 	real *b0l, *b0r, **bufl, **bufr;
661 	int bo1;
662 #ifndef NO_EQUALIZER
663 	if(fr->have_eq_settings)
664 	{
665 		do_equalizer(bandPtr_l,0,fr->equalizer);
666 		do_equalizer(bandPtr_r,1,fr->equalizer);
667 	}
668 #endif
669 	fr->bo--;
670 	fr->bo &= 0xf;
671 	bufl = fr->real_buffs[0];
672 	bufr = fr->real_buffs[1];
673 
674 	if(fr->bo & 0x1)
675 	{
676 		b0l = bufl[0];
677 		b0r = bufr[0];
678 		bo1 = fr->bo;
679 		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
680 		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
681 	}
682 	else
683 	{
684 		b0l = bufl[1];
685 		b0r = bufr[1];
686 		bo1 = fr->bo+1;
687 		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
688 		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
689 	}
690 
691 
692 	{
693 		register int j;
694 		real *window = fr->decwin + 16 - bo1;
695 
696 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
697 		vector unsigned char vperm1,vperm2;
698 		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vscale,vzero;
699 		vector float vprev;
700 		vzero = vec_xor(vzero,vzero);
701 #ifdef __APPLE__
702 		vscale = (vector float)(1.0f/32768.0f);
703 #else
704 		vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
705 #endif
706 
707 		vperm1 = vec_lvsl(0,window);
708 		vperm2 = vec_lvsr(0,samples);
709 		vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
710 		for (j=4;j;j--)
711 		{
712 			SYNTH_STEREO_ALTIVEC(16);
713 
714 			vsum = vec_sub(vsum,vsum2);
715 			vsum2 = vec_sub(vsum5,vsum6);
716 			vsum3 = vec_sub(vsum3,vsum4);
717 			vsum4 = vec_sub(vsum7,vsum8);
718 			vsum = vec_add(vsum,vsum3);
719 			vsum2 = vec_add(vsum2,vsum4);
720 			vsum = vec_madd(vsum, vscale, vzero);
721 			vsum2 = vec_madd(vsum2, vscale, vzero);
722 
723 			v1 = vec_mergeh(vsum, vsum2);
724 			v2 = vec_mergel(vsum, vsum2);
725 			v3 = vec_perm(vprev,v1,vperm2);
726 			v4 = vec_perm(v1,v2,vperm2);
727 			vprev = v2;
728 			vec_st(v3,0,samples);
729 			vec_st(v4,16,samples);
730 			samples += 8;
731 		}
732 
733 		for (j=4;j;j--)
734 		{
735 			SYNTH_STEREO_ALTIVEC(-16);
736 
737 			vsum = vec_add(vsum,vsum2);
738 			vsum2 = vec_add(vsum5,vsum6);
739 			vsum3 = vec_add(vsum3,vsum4);
740 			vsum4 = vec_add(vsum7,vsum8);
741 			vsum = vec_add(vsum,vsum3);
742 			vsum2 = vec_add(vsum2,vsum4);
743 			vsum = vec_madd(vsum, vscale, vzero);
744 			vsum2 = vec_madd(vsum2, vscale, vzero);
745 
746 			v1 = vec_mergeh(vsum, vsum2);
747 			v2 = vec_mergel(vsum, vsum2);
748 			v3 = vec_perm(vprev,v1,vperm2);
749 			v4 = vec_perm(v1,v2,vperm2);
750 			vprev = v2;
751 			vec_st(v3,0,samples);
752 			vec_st(v4,16,samples);
753 			samples += 8;
754 		}
755 
756 		if((size_t)samples & 0xf)
757 		{
758 			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
759 			v2 = (vector float)vec_perm(vprev,v1,vperm2);
760 			vec_st(v2,0,samples);
761 		}
762 	}
763 	fr->buffer.fill += 256;
764 
765 	return 0;
766 }
767 
768 int synth_1to1_s32_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
769 {
770 	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
771 
772 	real *b0, **buf;
773 	int clip;
774 	int bo1;
775 #ifndef NO_EQUALIZER
776 	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
777 #endif
778 	if(!channel)
779 	{
780 		fr->bo--;
781 		fr->bo &= 0xf;
782 		buf = fr->real_buffs[0];
783 	}
784 	else
785 	{
786 		samples++;
787 		buf = fr->real_buffs[1];
788 	}
789 
790 	if(fr->bo & 0x1)
791 	{
792 		b0 = buf[0];
793 		bo1 = fr->bo;
794 		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
795 	}
796 	else
797 	{
798 		b0 = buf[1];
799 		bo1 = fr->bo+1;
800 		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
801 	}
802 
803 
804 	{
805 		register int j;
806 		real *window = fr->decwin + 16 - bo1;
807 
808 		ALIGNED(16) int clip_tmp[4];
809 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
810 		vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5;
811 		vector float vsum,vsum2,vsum3,vsum4,vmax,vmin,vzero;
812 		vector signed int vsample1,vsample2,vsample3;
813 		vector unsigned int vshift;
814 		vector signed int vclip;
815 		vzero = vec_xor(vzero, vzero);
816 		vclip = vec_xor(vclip, vclip);
817 		vshift = vec_splat_u32(-1); /* 31 */
818 #ifdef __APPLE__
819 		vmax = (vector float)(32767.999f);
820 		vmin = (vector float)(-32768.0f);
821 		vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
822 		vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
823 #else
824 		vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
825 		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
826 		vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
827 		vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
828 #endif
829 
830 		vperm1 = vec_lvsl(0,window);
831 		vperm2 = vec_lvsl(0,samples);
832 		vperm3 = vec_lvsr(0,samples);
833 		for (j=4;j;j--)
834 		{
835 			SYNTH_ALTIVEC(16);
836 
837 			vsum = vec_sub(v5,v6);
838 			v9 = vec_sub(v7,v8);
839 			v1 = vec_add(vsum,v9);
840 			vsum = (vector float)vec_cts(v1,16);
841 			v8 = (vector float)vec_cmpgt(v1,vmax);
842 			v9 = (vector float)vec_cmplt(v1,vmin);
843 
844 			vsample1 = vec_ld(0,samples);
845 			vsample2 = vec_ld(16,samples);
846 			vsample3 = vec_ld(31,samples);
847 			v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
848 			v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
849 			v1 = vec_perm(vsum, v1, vperm4);
850 			v2 = vec_perm(vsum, v2, vperm5);
851 			v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
852 			v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
853 			v5 = vec_perm(v2, v3, vperm3);
854 			v6 = vec_perm(v1, v2, vperm3);
855 			v7 = vec_perm(v4, v1, vperm3);
856 			vec_st((vector signed int)v5,31,samples);
857 			vec_st((vector signed int)v6,16,samples);
858 			vec_st((vector signed int)v7,0,samples);
859 			samples += 8;
860 
861 			v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
862 			v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
863 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
864 			vclip = vec_sums((vector signed int)v1,vclip);
865 		}
866 
867 		for (j=4;j;j--)
868 		{
869 			SYNTH_ALTIVEC(-16);
870 
871 			vsum = vec_add(v5,v6);
872 			v9 = vec_add(v7,v8);
873 			v1 = vec_add(vsum,v9);
874 			vsum = (vector float)vec_cts(v1,16);
875 			v8 = (vector float)vec_cmpgt(v1,vmax);
876 			v9 = (vector float)vec_cmplt(v1,vmin);
877 
878 			vsample1 = vec_ld(0,samples);
879 			vsample2 = vec_ld(16,samples);
880 			vsample3 = vec_ld(31,samples);
881 			v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
882 			v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
883 			v1 = vec_perm(vsum, v1, vperm4);
884 			v2 = vec_perm(vsum, v2, vperm5);
885 			v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
886 			v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
887 			v5 = vec_perm(v2, v3, vperm3);
888 			v6 = vec_perm(v1, v2, vperm3);
889 			v7 = vec_perm(v4, v1, vperm3);
890 			vec_st((vector signed int)v5,31,samples);
891 			vec_st((vector signed int)v6,16,samples);
892 			vec_st((vector signed int)v7,0,samples);
893 			samples += 8;
894 
895 			v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
896 			v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
897 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
898 			vclip = vec_sums((vector signed int)v1,vclip);
899 		}
900 
901 		vec_st(vclip,0,clip_tmp);
902 		clip = clip_tmp[3];
903 	}
904 	if(final) fr->buffer.fill += 256;
905 
906 	return clip;
907 }
908 
909 
910 int synth_1to1_s32_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
911 {
912 	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
913 
914 	real *b0l, *b0r, **bufl, **bufr;
915 	int clip;
916 	int bo1;
917 #ifndef NO_EQUALIZER
918 	if(fr->have_eq_settings)
919 	{
920 		do_equalizer(bandPtr_l,0,fr->equalizer);
921 		do_equalizer(bandPtr_r,1,fr->equalizer);
922 	}
923 #endif
924 	fr->bo--;
925 	fr->bo &= 0xf;
926 	bufl = fr->real_buffs[0];
927 	bufr = fr->real_buffs[1];
928 
929 	if(fr->bo & 0x1)
930 	{
931 		b0l = bufl[0];
932 		b0r = bufr[0];
933 		bo1 = fr->bo;
934 		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
935 		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
936 	}
937 	else
938 	{
939 		b0l = bufl[1];
940 		b0r = bufr[1];
941 		bo1 = fr->bo+1;
942 		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
943 		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
944 	}
945 
946 
947 	{
948 		register int j;
949 		real *window = fr->decwin + 16 - bo1;
950 
951 		ALIGNED(16) int clip_tmp[4];
952 		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
953 		vector unsigned char vperm1,vperm2;
954 		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmax,vmin,vzero;
955 		vector float vprev;
956 		vector unsigned int vshift;
957 		vector signed int vclip;
958 		vzero = vec_xor(vzero, vzero);
959 		vclip = vec_xor(vclip, vclip);
960 		vshift = vec_splat_u32(-1); /* 31 */
961 #ifdef __APPLE__
962 		vmax = (vector float)(32767.999f);
963 		vmin = (vector float)(-32768.0f);
964 #else
965 		vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
966 		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
967 #endif
968 
969 		vperm1 = vec_lvsl(0,window);
970 		vperm2 = vec_lvsr(0,samples);
971 		vprev = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
972 		for (j=4;j;j--)
973 		{
974 			SYNTH_STEREO_ALTIVEC(16);
975 
976 			vsum = vec_sub(vsum,vsum2);
977 			vsum2 = vec_sub(vsum5,vsum6);
978 			vsum3 = vec_sub(vsum3,vsum4);
979 			vsum4 = vec_sub(vsum7,vsum8);
980 			v1 = vec_add(vsum,vsum3);
981 			v2 = vec_add(vsum2,vsum4);
982 			vsum = (vector float)vec_cts(v1,16);
983 			vsum2 = (vector float)vec_cts(v2,16);
984 			v5 = (vector float)vec_cmpgt(v1,vmax);
985 			v6 = (vector float)vec_cmplt(v1,vmin);
986 			v7 = (vector float)vec_cmpgt(v2,vmax);
987 			v8 = (vector float)vec_cmplt(v2,vmin);
988 
989 			v1 = vec_mergeh(vsum, vsum2);
990 			v2 = vec_mergel(vsum, vsum2);
991 			v3 = vec_perm(vprev,v1,vperm2);
992 			v4 = vec_perm(v1,v2,vperm2);
993 			vprev = v2;
994 			vec_st((vector signed int)v3,0,samples);
995 			vec_st((vector signed int)v4,16,samples);
996 			samples += 8;
997 
998 			v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
999 			v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
1000 			v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
1001 			v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
1002 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
1003 			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
1004 			vclip = vec_sums((vector signed int)v1,vclip);
1005 			vclip = vec_sums((vector signed int)v2,vclip);
1006 		}
1007 
1008 		for (j=4;j;j--)
1009 		{
1010 			SYNTH_STEREO_ALTIVEC(-16);
1011 
1012 			vsum = vec_add(vsum,vsum2);
1013 			vsum2 = vec_add(vsum5,vsum6);
1014 			vsum3 = vec_add(vsum3,vsum4);
1015 			vsum4 = vec_add(vsum7,vsum8);
1016 			v1 = vec_add(vsum,vsum3);
1017 			v2 = vec_add(vsum2,vsum4);
1018 			vsum = (vector float)vec_cts(v1,16);
1019 			vsum2 = (vector float)vec_cts(v2,16);
1020 			v5 = (vector float)vec_cmpgt(v1,vmax);
1021 			v6 = (vector float)vec_cmplt(v1,vmin);
1022 			v7 = (vector float)vec_cmpgt(v2,vmax);
1023 			v8 = (vector float)vec_cmplt(v2,vmin);
1024 
1025 			v1 = vec_mergeh(vsum, vsum2);
1026 			v2 = vec_mergel(vsum, vsum2);
1027 			v3 = vec_perm(vprev,v1,vperm2);
1028 			v4 = vec_perm(v1,v2,vperm2);
1029 			vprev = v2;
1030 			vec_st((vector signed int)v3,0,samples);
1031 			vec_st((vector signed int)v4,16,samples);
1032 			samples += 8;
1033 
1034 			v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
1035 			v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
1036 			v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
1037 			v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
1038 			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
1039 			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
1040 			vclip = vec_sums((vector signed int)v1,vclip);
1041 			vclip = vec_sums((vector signed int)v2,vclip);
1042 		}
1043 
1044 		if((size_t)samples & 0xf)
1045 		{
1046 			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
1047 			v2 = (vector float)vec_perm(vprev,v1,vperm2);
1048 			vec_st((vector signed int)v2,0,samples);
1049 		}
1050 
1051 		vec_st(vclip,0,clip_tmp);
1052 		clip = clip_tmp[3];
1053 	}
1054 	fr->buffer.fill += 256;
1055 
1056 	return clip;
1057 }
1058