1 /*
2 Copyright (C) 2000 Paul Davis
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17
18 */
19
20 #define _ISOC9X_SOURCE 1
21 #define _ISOC99_SOURCE 1
22
23 #define __USE_ISOC9X 1
24 #define __USE_ISOC99 1
25
26 #include <stdio.h>
27 #include <string.h>
28 #include <math.h>
29 #include <memory.h>
30 #include <stdlib.h>
31 #include <stdint.h>
32 #include <limits.h>
33 #ifdef __linux__
34 #include <endian.h>
35 #endif
36 #include "memops.h"
37
38 #if defined (__SSE2__) && !defined (__sun__)
39 #include <emmintrin.h>
40 #ifdef __SSE4_1__
41 #include <smmintrin.h>
42 #endif
43 #endif
44
45 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
46 #include <arm_neon.h>
47 #endif
48
49 /* Notes about these *_SCALING values.
50
51 the MAX_<N>BIT values are floating point. when multiplied by
52 a full-scale normalized floating point sample value (-1.0..+1.0)
53 they should give the maximum value representable with an integer
54 sample type of N bits. Note that this is asymmetric. Sample ranges
55 for signed integer, 2's complement values are -(2^(N-1) to +(2^(N-1)-1)
56
57 Complications
58 -------------
59 If we use +2^(N-1) for the scaling factors, we run into a problem:
60
61 if we start with a normalized float value of -1.0, scaling
62 to 24 bits would give -8388608 (-2^23), which is ideal.
63 But with +1.0, we get +8388608, which is technically out of range.
64
65 We never multiply a full range normalized value by this constant,
66 but we could multiply it by a positive value that is close enough to +1.0
67 to produce a value > +(2^(N-1)-1.
68
69 There is no way around this paradox without wasting CPU cycles to determine
70 which scaling factor to use (i.e. determine if its negative or not,
71 use the right factor).
72
73 So, for now (October 2008) we use 2^(N-1)-1 as the scaling factor.
74 */
75
76 #define SAMPLE_24BIT_SCALING 8388607.0f
77 #define SAMPLE_16BIT_SCALING 32767.0f
78
79 /* these are just values to use if the floating point value was out of range
80
81 advice from Fons Adriaensen: make the limits symmetrical
82 */
83
84 #define SAMPLE_24BIT_MAX 8388607
85 #define SAMPLE_24BIT_MIN -8388607
86 #define SAMPLE_24BIT_MAX_F 8388607.0f
87 #define SAMPLE_24BIT_MIN_F -8388607.0f
88
89 #define SAMPLE_16BIT_MAX 32767
90 #define SAMPLE_16BIT_MIN -32767
91 #define SAMPLE_16BIT_MAX_F 32767.0f
92 #define SAMPLE_16BIT_MIN_F -32767.0f
93
94 /* these mark the outer edges of the range considered "within" range
95 for a floating point sample value. values outside (and on the boundaries)
96 of this range will be clipped before conversion; values within this
97 range will be scaled to appropriate values for the target sample
98 type.
99 */
100
101 #define NORMALIZED_FLOAT_MIN -1.0f
102 #define NORMALIZED_FLOAT_MAX 1.0f
103
104 /* define this in case we end up on a platform that is missing
105 the real lrintf functions
106 */
107
108 #define f_round(f) lrintf(f)
109
110 #define float_16(s, d)\
111 if ((s) <= NORMALIZED_FLOAT_MIN) {\
112 (d) = SAMPLE_16BIT_MIN;\
113 } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
114 (d) = SAMPLE_16BIT_MAX;\
115 } else {\
116 (d) = f_round ((s) * SAMPLE_16BIT_SCALING);\
117 }
118
119 /* call this when "s" has already been scaled (e.g. when dithering)
120 */
121
122 #define float_16_scaled(s, d)\
123 if ((s) <= SAMPLE_16BIT_MIN_F) {\
124 (d) = SAMPLE_16BIT_MIN_F;\
125 } else if ((s) >= SAMPLE_16BIT_MAX_F) { \
126 (d) = SAMPLE_16BIT_MAX;\
127 } else {\
128 (d) = f_round ((s));\
129 }
130
131 #define float_24u32(s, d) \
132 if ((s) <= NORMALIZED_FLOAT_MIN) {\
133 (d) = SAMPLE_24BIT_MIN << 8;\
134 } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
135 (d) = SAMPLE_24BIT_MAX << 8;\
136 } else {\
137 (d) = f_round ((s) * SAMPLE_24BIT_SCALING) << 8;\
138 }
139
140 /* call this when "s" has already been scaled (e.g. when dithering)
141 */
142
143 #define float_24u32_scaled(s, d)\
144 if ((s) <= SAMPLE_24BIT_MIN_F) {\
145 (d) = SAMPLE_24BIT_MIN << 8;\
146 } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
147 (d) = SAMPLE_24BIT_MAX << 8; \
148 } else {\
149 (d) = f_round ((s)) << 8; \
150 }
151
152 #define float_24(s, d) \
153 if ((s) <= NORMALIZED_FLOAT_MIN) {\
154 (d) = SAMPLE_24BIT_MIN;\
155 } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
156 (d) = SAMPLE_24BIT_MAX;\
157 } else {\
158 (d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
159 }
160
161 /* call this when "s" has already been scaled (e.g. when dithering)
162 */
163
164 #define float_24_scaled(s, d)\
165 if ((s) <= SAMPLE_24BIT_MIN_F) {\
166 (d) = SAMPLE_24BIT_MIN;\
167 } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
168 (d) = SAMPLE_24BIT_MAX; \
169 } else {\
170 (d) = f_round ((s)); \
171 }
172
173
174 #if defined (__SSE2__) && !defined (__sun__)
175
176 /* generates same as _mm_set_ps(1.f, 1.f, 1f., 1f) but faster */
gen_one(void)177 static inline __m128 gen_one(void)
178 {
179 volatile __m128i x = { 0 }; /* shut up, GCC */
180 __m128i ones = _mm_cmpeq_epi32(x, x);
181 return (__m128)_mm_slli_epi32 (_mm_srli_epi32(ones, 25), 23);
182 }
183
clip(__m128 s,__m128 min,__m128 max)184 static inline __m128 clip(__m128 s, __m128 min, __m128 max)
185 {
186 return _mm_min_ps(max, _mm_max_ps(s, min));
187 }
188
float_24_sse(__m128 s)189 static inline __m128i float_24_sse(__m128 s)
190 {
191 const __m128 upper_bound = gen_one(); /* NORMALIZED_FLOAT_MAX */
192 const __m128 lower_bound = _mm_sub_ps(_mm_setzero_ps(), upper_bound);
193
194 __m128 clipped = clip(s, lower_bound, upper_bound);
195 __m128 scaled = _mm_mul_ps(clipped, _mm_set1_ps(SAMPLE_24BIT_SCALING));
196 return _mm_cvtps_epi32(scaled);
197 }
198 #endif
199
200
201 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
202
clip(float32x4_t s,float32x4_t min,float32x4_t max)203 static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max)
204 {
205 return vminq_f32(max, vmaxq_f32(s, min));
206 }
207
float_24_neon(float32x4_t s)208 static inline int32x4_t float_24_neon(float32x4_t s)
209 {
210 const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
211 const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);
212
213 float32x4_t clipped = clip(s, lower_bound, upper_bound);
214 float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_24BIT_SCALING));
215 return vcvtq_s32_f32(scaled);
216 }
217
float_16_neon(float32x4_t s)218 static inline int16x4_t float_16_neon(float32x4_t s)
219 {
220 const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
221 const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);
222
223 float32x4_t clipped = clip(s, lower_bound, upper_bound);
224 float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_16BIT_SCALING));
225 return vmovn_s32(vcvtq_s32_f32(scaled));
226 }
227 #endif
228
229 /* Linear Congruential noise generator. From the music-dsp list
230 * less random than rand(), but good enough and 10x faster
231 */
232 static unsigned int seed = 22222;
233
fast_rand()234 static inline unsigned int fast_rand() {
235 seed = (seed * 196314165) + 907633515;
236 return seed;
237 }
238
239 /* functions for native float sample data */
240
sample_move_floatLE_sSs(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)241 void sample_move_floatLE_sSs (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) {
242 while (nsamples--) {
243 *dst = *((float *) src);
244 dst++;
245 src += src_skip;
246 }
247 }
248
sample_move_dS_floatLE(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)249 void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) {
250 while (nsamples--) {
251 *((float *) dst) = *src;
252 dst += dst_skip;
253 src++;
254 }
255 }
256
257 /* NOTES on function naming:
258
259 foo_bar_d<TYPE>_s<TYPE>
260
261 the "d<TYPE>" component defines the destination type for the operation
262 the "s<TYPE>" component defines the source type for the operation
263
264 TYPE can be one of:
265
266 S - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value
267 Ss - like S but reverse endian from the host CPU
268 32u24 - sample is an signed 32 bit integer value, but data is in upper 24 bits only
269 32u24s - like 32u24 but reverse endian from the host CPU
270 24 - sample is an signed 24 bit integer value
271 24s - like 24 but reverse endian from the host CPU
272 16 - sample is an signed 16 bit integer value
273 16s - like 16 but reverse endian from the host CPU
274
275 For obvious reasons, the reverse endian versions only show as source types.
276
277 This covers all known sample formats at 16 bits or larger.
278 */
279
280 /* functions for native integer sample data */
281
sample_move_d32u24_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)282 void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
283 {
284 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
285 unsigned long unrolled = nsamples / 4;
286 nsamples = nsamples & 3;
287
288 while (unrolled--) {
289 float32x4_t samples = vld1q_f32(src);
290 int32x4_t converted = float_24_neon(samples);
291 int32x4_t shifted = vshlq_n_s32(converted, 8);
292 shifted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(shifted)));
293
294 switch(dst_skip) {
295 case 4:
296 vst1q_s32((int32_t*)dst, shifted);
297 break;
298 default:
299 vst1q_lane_s32((int32_t*)(dst), shifted, 0);
300 vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
301 vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
302 vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
303 break;
304 }
305 dst += 4*dst_skip;
306 src+= 4;
307 }
308 #endif
309
310 int32_t z;
311
312 while (nsamples--) {
313
314 float_24u32 (*src, z);
315
316 #if __BYTE_ORDER == __LITTLE_ENDIAN
317 dst[0]=(char)(z>>24);
318 dst[1]=(char)(z>>16);
319 dst[2]=(char)(z>>8);
320 dst[3]=(char)(z);
321 #elif __BYTE_ORDER == __BIG_ENDIAN
322 dst[0]=(char)(z);
323 dst[1]=(char)(z>>8);
324 dst[2]=(char)(z>>16);
325 dst[3]=(char)(z>>24);
326 #endif
327 dst += dst_skip;
328 src++;
329 }
330 }
331
sample_move_d32u24_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)332 void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
333 {
334 #if defined (__SSE2__) && !defined (__sun__)
335 __m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F);
336 __m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
337 __m128 factor = int_max;
338
339 unsigned long unrolled = nsamples / 4;
340 nsamples = nsamples & 3;
341
342 while (unrolled--) {
343 __m128 in = _mm_load_ps(src);
344 __m128 scaled = _mm_mul_ps(in, factor);
345 __m128 clipped = clip(scaled, int_min, int_max);
346
347 __m128i y = _mm_cvttps_epi32(clipped);
348 __m128i shifted = _mm_slli_epi32(y, 8);
349
350 #ifdef __SSE4_1__
351 *(int32_t*)dst = _mm_extract_epi32(shifted, 0);
352 *(int32_t*)(dst+dst_skip) = _mm_extract_epi32(shifted, 1);
353 *(int32_t*)(dst+2*dst_skip) = _mm_extract_epi32(shifted, 2);
354 *(int32_t*)(dst+3*dst_skip) = _mm_extract_epi32(shifted, 3);
355 #else
356 __m128i shuffled1 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(0, 3, 2, 1));
357 __m128i shuffled2 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(1, 0, 3, 2));
358 __m128i shuffled3 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(2, 1, 0, 3));
359
360 _mm_store_ss((float*)dst, (__m128)shifted);
361
362 _mm_store_ss((float*)(dst+dst_skip), (__m128)shuffled1);
363 _mm_store_ss((float*)(dst+2*dst_skip), (__m128)shuffled2);
364 _mm_store_ss((float*)(dst+3*dst_skip), (__m128)shuffled3);
365 #endif
366 dst += 4*dst_skip;
367
368 src+= 4;
369 }
370
371 while (nsamples--) {
372 __m128 in = _mm_load_ss(src);
373 __m128 scaled = _mm_mul_ss(in, factor);
374 __m128 clipped = _mm_min_ss(int_max, _mm_max_ss(scaled, int_min));
375
376 int y = _mm_cvttss_si32(clipped);
377 *((int *) dst) = y<<8;
378
379 dst += dst_skip;
380 src++;
381 }
382
383 #elif defined (__ARM_NEON__) || defined (__ARM_NEON)
384 unsigned long unrolled = nsamples / 4;
385 nsamples = nsamples & 3;
386
387 while (unrolled--) {
388 float32x4_t samples = vld1q_f32(src);
389 int32x4_t converted = float_24_neon(samples);
390 int32x4_t shifted = vshlq_n_s32(converted, 8);
391
392 switch(dst_skip) {
393 case 4:
394 vst1q_s32((int32_t*)dst, shifted);
395 break;
396 default:
397 vst1q_lane_s32((int32_t*)(dst), shifted, 0);
398 vst1q_lane_s32((int32_t*)(dst+dst_skip), shifted, 1);
399 vst1q_lane_s32((int32_t*)(dst+2*dst_skip), shifted, 2);
400 vst1q_lane_s32((int32_t*)(dst+3*dst_skip), shifted, 3);
401 break;
402 }
403 dst += 4*dst_skip;
404
405 src+= 4;
406 }
407 #endif
408
409 #if !defined (__SSE2__)
410 while (nsamples--) {
411 float_24u32 (*src, *((int32_t*) dst));
412 dst += dst_skip;
413 src++;
414 }
415 #endif
416 }
417
sample_move_dS_s32u24s(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)418 void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
419 {
420 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
421 float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING);
422 unsigned long unrolled = nsamples / 4;
423 while (unrolled--) {
424 int32x4_t src128;
425 switch(src_skip)
426 {
427 case 4:
428 src128 = vld1q_s32((int32_t*)src);
429 break;
430 case 8:
431 src128 = vld2q_s32((int32_t*)src).val[0];
432 break;
433 default:
434 src128 = vld1q_lane_s32((int32_t*)src, src128, 0);
435 src128 = vld1q_lane_s32((int32_t*)(src+src_skip), src128, 1);
436 src128 = vld1q_lane_s32((int32_t*)(src+2*src_skip), src128, 2);
437 src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3);
438 break;
439 }
440 src128 = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(src128)));
441 int32x4_t shifted = vshrq_n_s32(src128, 8);
442 float32x4_t as_float = vcvtq_f32_s32(shifted);
443 float32x4_t divided = vmulq_f32(as_float, factor);
444 vst1q_f32(dst, divided);
445
446 src += 4*src_skip;
447 dst += 4;
448 }
449 nsamples = nsamples & 3;
450 #endif
451
452 /* ALERT: signed sign-extension portability !!! */
453
454 const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
455
456 while (nsamples--) {
457 int x;
458 #if __BYTE_ORDER == __LITTLE_ENDIAN
459 x = (unsigned char)(src[0]);
460 x <<= 8;
461 x |= (unsigned char)(src[1]);
462 x <<= 8;
463 x |= (unsigned char)(src[2]);
464 x <<= 8;
465 x |= (unsigned char)(src[3]);
466 #elif __BYTE_ORDER == __BIG_ENDIAN
467 x = (unsigned char)(src[3]);
468 x <<= 8;
469 x |= (unsigned char)(src[2]);
470 x <<= 8;
471 x |= (unsigned char)(src[1]);
472 x <<= 8;
473 x |= (unsigned char)(src[0]);
474 #endif
475 *dst = (x >> 8) * scaling;
476 dst++;
477 src += src_skip;
478 }
479 }
480
sample_move_dS_s32u24(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)481 void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
482 {
483 #if defined (__SSE2__) && !defined (__sun__)
484 unsigned long unrolled = nsamples / 4;
485 static float inv_sample_max_24bit = 1.0 / SAMPLE_24BIT_SCALING;
486 __m128 factor = _mm_set1_ps(inv_sample_max_24bit);
487 while (unrolled--)
488 {
489 int i1 = *((int *) src);
490 src+= src_skip;
491 int i2 = *((int *) src);
492 src+= src_skip;
493 int i3 = *((int *) src);
494 src+= src_skip;
495 int i4 = *((int *) src);
496 src+= src_skip;
497
498 __m128i src = _mm_set_epi32(i4, i3, i2, i1);
499 __m128i shifted = _mm_srai_epi32(src, 8);
500
501 __m128 as_float = _mm_cvtepi32_ps(shifted);
502 __m128 divided = _mm_mul_ps(as_float, factor);
503
504 _mm_storeu_ps(dst, divided);
505
506 dst += 4;
507 }
508 nsamples = nsamples & 3;
509 #elif defined (__ARM_NEON__) || defined (__ARM_NEON)
510 unsigned long unrolled = nsamples / 4;
511 float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING);
512 while (unrolled--) {
513 int32x4_t src128;
514 switch(src_skip) {
515 case 4:
516 src128 = vld1q_s32((int32_t*)src);
517 break;
518 case 8:
519 src128 = vld2q_s32((int32_t*)src).val[0];
520 break;
521 default:
522 src128 = vld1q_lane_s32((int32_t*)src, src128, 0);
523 src128 = vld1q_lane_s32((int32_t*)(src+src_skip), src128, 1);
524 src128 = vld1q_lane_s32((int32_t*)(src+2*src_skip), src128, 2);
525 src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3);
526 break;
527 }
528 int32x4_t shifted = vshrq_n_s32(src128, 8);
529 float32x4_t as_float = vcvtq_f32_s32(shifted);
530 float32x4_t divided = vmulq_f32(as_float, factor);
531 vst1q_f32(dst, divided);
532
533 src += 4*src_skip;
534 dst += 4;
535 }
536 nsamples = nsamples & 3;
537 #endif
538
539 /* ALERT: signed sign-extension portability !!! */
540
541 const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
542 while (nsamples--) {
543 *dst = (*((int *) src) >> 8) * scaling;
544 dst++;
545 src += src_skip;
546 }
547 }
548
sample_move_d24_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)549 void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
550 {
551 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
552 unsigned long unrolled = nsamples / 4;
553 while (unrolled--) {
554 int i;
555 int32_t z[4];
556 float32x4_t samples = vld1q_f32(src);
557 int32x4_t converted = float_24_neon(samples);
558 converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));
559 vst1q_s32(z, converted);
560
561 for (i = 0; i != 4; ++i) {
562 memcpy (dst, ((char*)(z+i))+1, 3);
563 dst += dst_skip;
564 }
565 src += 4;
566 }
567 nsamples = nsamples & 3;
568 #endif
569
570 int32_t z;
571
572 while (nsamples--) {
573 float_24 (*src, z);
574 #if __BYTE_ORDER == __LITTLE_ENDIAN
575 dst[0]=(char)(z>>16);
576 dst[1]=(char)(z>>8);
577 dst[2]=(char)(z);
578 #elif __BYTE_ORDER == __BIG_ENDIAN
579 dst[0]=(char)(z);
580 dst[1]=(char)(z>>8);
581 dst[2]=(char)(z>>16);
582 #endif
583 dst += dst_skip;
584 src++;
585 }
586 }
587
sample_move_d24_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)588 void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
589 {
590 #if defined (__SSE2__) && !defined (__sun__)
591 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
592 while (nsamples >= 4) {
593 int i;
594 int32_t z[4];
595 __m128 samples = _mm_loadu_ps(src);
596 __m128i converted = float_24_sse(samples);
597
598 #ifdef __SSE4_1__
599 z[0] = _mm_extract_epi32(converted, 0);
600 z[1] = _mm_extract_epi32(converted, 1);
601 z[2] = _mm_extract_epi32(converted, 2);
602 z[3] = _mm_extract_epi32(converted, 3);
603 #else
604 __m128i shuffled1 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(0, 3, 2, 1));
605 __m128i shuffled2 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(1, 0, 3, 2));
606 __m128i shuffled3 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(2, 1, 0, 3));
607
608 _mm_store_ss((float*)z, (__m128)converted);
609 _mm_store_ss((float*)z+1, (__m128)shuffled1);
610 _mm_store_ss((float*)z+2, (__m128)shuffled2);
611 _mm_store_ss((float*)z+3, (__m128)shuffled3);
612 #endif
613
614 for (i = 0; i != 4; ++i) {
615 memcpy (dst, z+i, 3);
616 dst += dst_skip;
617 }
618
619 nsamples -= 4;
620 src += 4;
621 }
622 #elif defined (__ARM_NEON__) || defined (__ARM_NEON)
623 unsigned long unrolled = nsamples / 4;
624 while (unrolled--) {
625 int i;
626 int32_t z[4];
627 float32x4_t samples = vld1q_f32(src);
628 int32x4_t converted = float_24_neon(samples);
629 vst1q_s32(z, converted);
630
631 for (i = 0; i != 4; ++i) {
632 memcpy (dst, z+i, 3);
633 dst += dst_skip;
634 }
635 src += 4;
636 }
637 nsamples = nsamples & 3;
638 #endif
639
640 int32_t z;
641
642 while (nsamples--) {
643 float_24 (*src, z);
644 #if __BYTE_ORDER == __LITTLE_ENDIAN
645 memcpy (dst, &z, 3);
646 #elif __BYTE_ORDER == __BIG_ENDIAN
647 memcpy (dst, (char *)&z + 1, 3);
648 #endif
649 dst += dst_skip;
650 src++;
651 }
652 }
653
sample_move_dS_s24s(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)654 void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
655 {
656 const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
657
658 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
659 // we shift 8 to the right by dividing by 256.0 -> no sign extra handling
660 const float32x4_t vscaling = vdupq_n_f32(scaling/256.0);
661 int32_t x[4];
662 memset(x, 0, sizeof(x));
663 unsigned long unrolled = nsamples / 4;
664 while (unrolled--) {
665 #if __BYTE_ORDER == __BIG_ENDIAN /* ARM big endian?? */
666 // right aligned / inverse sequence below -> *256
667 memcpy(((char*)&x[0])+1, src, 3);
668 memcpy(((char*)&x[1])+1, src+src_skip, 3);
669 memcpy(((char*)&x[2])+1, src+2*src_skip, 3);
670 memcpy(((char*)&x[3])+1, src+3*src_skip, 3);
671 #else
672 memcpy(&x[0], src, 3);
673 memcpy(&x[1], src+src_skip, 3);
674 memcpy(&x[2], src+2*src_skip, 3);
675 memcpy(&x[3], src+3*src_skip, 3);
676 #endif
677 src += 4 * src_skip;
678
679 int32x4_t source = vld1q_s32(x);
680 source = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(source)));
681 float32x4_t converted = vcvtq_f32_s32(source);
682 float32x4_t scaled = vmulq_f32(converted, vscaling);
683 vst1q_f32(dst, scaled);
684 dst += 4;
685 }
686 nsamples = nsamples & 3;
687 #endif
688
689 /* ALERT: signed sign-extension portability !!! */
690
691 while (nsamples--) {
692 int x;
693 #if __BYTE_ORDER == __LITTLE_ENDIAN
694 x = (unsigned char)(src[0]);
695 x <<= 8;
696 x |= (unsigned char)(src[1]);
697 x <<= 8;
698 x |= (unsigned char)(src[2]);
699 /* correct sign bit and the rest of the top byte */
700 if (src[0] & 0x80) {
701 x |= 0xff << 24;
702 }
703 #elif __BYTE_ORDER == __BIG_ENDIAN
704 x = (unsigned char)(src[2]);
705 x <<= 8;
706 x |= (unsigned char)(src[1]);
707 x <<= 8;
708 x |= (unsigned char)(src[0]);
709 /* correct sign bit and the rest of the top byte */
710 if (src[2] & 0x80) {
711 x |= 0xff << 24;
712 }
713 #endif
714 *dst = x * scaling;
715 dst++;
716 src += src_skip;
717 }
718 }
719
sample_move_dS_s24(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)720 void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
721 {
722 const jack_default_audio_sample_t scaling = 1.f/SAMPLE_24BIT_SCALING;
723
724 #if defined (__SSE2__) && !defined (__sun__)
725 const __m128 scaling_block = _mm_set_ps1(scaling);
726 while (nsamples >= 4) {
727 int x0, x1, x2, x3;
728
729 memcpy((char*)&x0 + 1, src, 3);
730 memcpy((char*)&x1 + 1, src+src_skip, 3);
731 memcpy((char*)&x2 + 1, src+2*src_skip, 3);
732 memcpy((char*)&x3 + 1, src+3*src_skip, 3);
733 src += 4 * src_skip;
734
735 const __m128i block_i = _mm_set_epi32(x3, x2, x1, x0);
736 const __m128i shifted = _mm_srai_epi32(block_i, 8);
737 const __m128 converted = _mm_cvtepi32_ps (shifted);
738 const __m128 scaled = _mm_mul_ps(converted, scaling_block);
739 _mm_storeu_ps(dst, scaled);
740 dst += 4;
741 nsamples -= 4;
742 }
743 #elif defined (__ARM_NEON__) || defined (__ARM_NEON)
744 // we shift 8 to the right by dividing by 256.0 -> no sign extra handling
745 const float32x4_t vscaling = vdupq_n_f32(scaling/256.0);
746 int32_t x[4];
747 memset(x, 0, sizeof(x));
748 unsigned long unrolled = nsamples / 4;
749 while (unrolled--) {
750 #if __BYTE_ORDER == __BIG_ENDIAN /* ARM big endian?? */
751 // left aligned -> *256
752 memcpy(&x[0], src, 3);
753 memcpy(&x[1], src+src_skip, 3);
754 memcpy(&x[2], src+2*src_skip, 3);
755 memcpy(&x[3], src+3*src_skip, 3);
756 #else
757 memcpy(((char*)&x[0])+1, src, 3);
758 memcpy(((char*)&x[1])+1, src+src_skip, 3);
759 memcpy(((char*)&x[2])+1, src+2*src_skip, 3);
760 memcpy(((char*)&x[3])+1, src+3*src_skip, 3);
761 #endif
762 src += 4 * src_skip;
763
764 int32x4_t source = vld1q_s32(x);
765 float32x4_t converted = vcvtq_f32_s32(source);
766 float32x4_t scaled = vmulq_f32(converted, vscaling);
767 vst1q_f32(dst, scaled);
768 dst += 4;
769 }
770 nsamples = nsamples & 3;
771 #endif
772
773 while (nsamples--) {
774 int x;
775 #if __BYTE_ORDER == __LITTLE_ENDIAN
776 memcpy((char*)&x + 1, src, 3);
777 #elif __BYTE_ORDER == __BIG_ENDIAN
778 memcpy(&x, src, 3);
779 #endif
780 x >>= 8;
781 *dst = x * scaling;
782 dst++;
783 src += src_skip;
784 }
785 }
786
787
sample_move_d16_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)788 void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
789 {
790 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
791 unsigned long unrolled = nsamples / 4;
792 nsamples = nsamples & 3;
793
794 while (unrolled--) {
795 float32x4_t samples = vld1q_f32(src);
796 int16x4_t converted = float_16_neon(samples);
797 converted = vreinterpret_s16_u8(vrev16_u8(vreinterpret_u8_s16(converted)));
798
799 switch(dst_skip) {
800 case 2:
801 vst1_s16((int16_t*)dst, converted);
802 break;
803 default:
804 vst1_lane_s16((int16_t*)(dst), converted, 0);
805 vst1_lane_s16((int16_t*)(dst+dst_skip), converted, 1);
806 vst1_lane_s16((int16_t*)(dst+2*dst_skip), converted, 2);
807 vst1_lane_s16((int16_t*)(dst+3*dst_skip), converted, 3);
808 break;
809 }
810 dst += 4*dst_skip;
811 src+= 4;
812 }
813 #endif
814 int16_t tmp;
815
816 while (nsamples--) {
817 // float_16 (*src, tmp);
818
819 if (*src <= NORMALIZED_FLOAT_MIN) {
820 tmp = SAMPLE_16BIT_MIN;
821 } else if (*src >= NORMALIZED_FLOAT_MAX) {
822 tmp = SAMPLE_16BIT_MAX;
823 } else {
824 tmp = (int16_t) f_round (*src * SAMPLE_16BIT_SCALING);
825 }
826
827 #if __BYTE_ORDER == __LITTLE_ENDIAN
828 dst[0]=(char)(tmp>>8);
829 dst[1]=(char)(tmp);
830 #elif __BYTE_ORDER == __BIG_ENDIAN
831 dst[0]=(char)(tmp);
832 dst[1]=(char)(tmp>>8);
833 #endif
834 dst += dst_skip;
835 src++;
836 }
837 }
838
sample_move_d16_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)839 void sample_move_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
840 {
841 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
842 unsigned long unrolled = nsamples / 4;
843 nsamples = nsamples & 3;
844
845 while (unrolled--) {
846 float32x4_t samples = vld1q_f32(src);
847 int16x4_t converted = float_16_neon(samples);
848
849 switch(dst_skip) {
850 case 2:
851 vst1_s16((int16_t*)dst, converted);
852 break;
853 default:
854 vst1_lane_s16((int16_t*)(dst), converted, 0);
855 vst1_lane_s16((int16_t*)(dst+dst_skip), converted, 1);
856 vst1_lane_s16((int16_t*)(dst+2*dst_skip), converted, 2);
857 vst1_lane_s16((int16_t*)(dst+3*dst_skip), converted, 3);
858 break;
859 }
860 dst += 4*dst_skip;
861 src+= 4;
862 }
863 #endif
864 while (nsamples--) {
865 float_16 (*src, *((int16_t*) dst));
866 dst += dst_skip;
867 src++;
868 }
869 }
870
sample_move_dither_rect_d16_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)871 void sample_move_dither_rect_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
872 {
873 jack_default_audio_sample_t val;
874 int16_t tmp;
875
876 while (nsamples--) {
877 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float) UINT_MAX - 0.5f;
878 float_16_scaled (val, tmp);
879 #if __BYTE_ORDER == __LITTLE_ENDIAN
880 dst[0]=(char)(tmp>>8);
881 dst[1]=(char)(tmp);
882 #elif __BYTE_ORDER == __BIG_ENDIAN
883 dst[0]=(char)(tmp);
884 dst[1]=(char)(tmp>>8);
885 #endif
886 dst += dst_skip;
887 src++;
888 }
889 }
890
sample_move_dither_rect_d16_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)891 void sample_move_dither_rect_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
892 {
893 jack_default_audio_sample_t val;
894
895 while (nsamples--) {
896 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float)UINT_MAX - 0.5f;
897 float_16_scaled (val, *((int16_t*) dst));
898 dst += dst_skip;
899 src++;
900 }
901 }
902
sample_move_dither_tri_d16_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)903 void sample_move_dither_tri_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
904 {
905 jack_default_audio_sample_t val;
906 int16_t tmp;
907
908 while (nsamples--) {
909 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
910 float_16_scaled (val, tmp);
911
912 #if __BYTE_ORDER == __LITTLE_ENDIAN
913 dst[0]=(char)(tmp>>8);
914 dst[1]=(char)(tmp);
915 #elif __BYTE_ORDER == __BIG_ENDIAN
916 dst[0]=(char)(tmp);
917 dst[1]=(char)(tmp>>8);
918 #endif
919 dst += dst_skip;
920 src++;
921 }
922 }
923
sample_move_dither_tri_d16_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)924 void sample_move_dither_tri_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
925 {
926 jack_default_audio_sample_t val;
927
928 while (nsamples--) {
929 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
930 float_16_scaled (val, *((int16_t*) dst));
931 dst += dst_skip;
932 src++;
933 }
934 }
935
sample_move_dither_shaped_d16_sSs(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)936 void sample_move_dither_shaped_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
937 {
938 jack_default_audio_sample_t x;
939 jack_default_audio_sample_t xe; /* the innput sample - filtered error */
940 jack_default_audio_sample_t xp; /* x' */
941 float r;
942 float rm1 = state->rm1;
943 unsigned int idx = state->idx;
944 int16_t tmp;
945
946 while (nsamples--) {
947 x = *src * SAMPLE_16BIT_SCALING;
948 r = ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
949 /* Filter the error with Lipshitz's minimally audible FIR:
950 [2.033 -2.165 1.959 -1.590 0.6149] */
951 xe = x
952 - state->e[idx] * 2.033f
953 + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
954 - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
955 + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
956 - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
957 xp = xe + r - rm1;
958 rm1 = r;
959
960 float_16_scaled (xp, tmp);
961
962 /* Intrinsic z^-1 delay */
963 idx = (idx + 1) & DITHER_BUF_MASK;
964 state->e[idx] = xp - xe;
965
966 #if __BYTE_ORDER == __LITTLE_ENDIAN
967 dst[0]=(char)(tmp>>8);
968 dst[1]=(char)(tmp);
969 #elif __BYTE_ORDER == __BIG_ENDIAN
970 dst[0]=(char)(tmp);
971 dst[1]=(char)(tmp>>8);
972 #endif
973 dst += dst_skip;
974 src++;
975 }
976 state->rm1 = rm1;
977 state->idx = idx;
978 }
979
sample_move_dither_shaped_d16_sS(char * dst,jack_default_audio_sample_t * src,unsigned long nsamples,unsigned long dst_skip,dither_state_t * state)980 void sample_move_dither_shaped_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
981 {
982 jack_default_audio_sample_t x;
983 jack_default_audio_sample_t xe; /* the innput sample - filtered error */
984 jack_default_audio_sample_t xp; /* x' */
985 float r;
986 float rm1 = state->rm1;
987 unsigned int idx = state->idx;
988
989 while (nsamples--) {
990 x = *src * SAMPLE_16BIT_SCALING;
991 r = ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
992 /* Filter the error with Lipshitz's minimally audible FIR:
993 [2.033 -2.165 1.959 -1.590 0.6149] */
994 xe = x
995 - state->e[idx] * 2.033f
996 + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
997 - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
998 + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
999 - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
1000 xp = xe + r - rm1;
1001 rm1 = r;
1002
1003 float_16_scaled (xp, *((int16_t*) dst));
1004
1005 /* Intrinsic z^-1 delay */
1006 idx = (idx + 1) & DITHER_BUF_MASK;
1007 state->e[idx] = *((int16_t*) dst) - xe;
1008
1009 dst += dst_skip;
1010 src++;
1011 }
1012 state->rm1 = rm1;
1013 state->idx = idx;
1014 }
1015
sample_move_dS_s16s(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)1016 void sample_move_dS_s16s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
1017 {
1018 short z;
1019 const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
1020 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
1021 const float32x4_t vscaling = vdupq_n_f32(scaling);
1022 unsigned long unrolled = nsamples / 4;
1023 while (unrolled--) {
1024 int16x4_t source16x4;
1025 switch(src_skip) {
1026 case 2:
1027 source16x4 = vld1_s16((int16_t*)src);
1028 break;
1029 case 4:
1030 source16x4 = vld2_s16((int16_t*)src).val[0];
1031 break;
1032 default:
1033 source16x4 = vld1_lane_s16((int16_t*)src, source16x4, 0);
1034 source16x4 = vld1_lane_s16((int16_t*)(src+src_skip), source16x4, 1);
1035 source16x4 = vld1_lane_s16((int16_t*)(src+2*src_skip), source16x4, 2);
1036 source16x4 = vld1_lane_s16((int16_t*)(src+3*src_skip), source16x4, 3);
1037 break;
1038 }
1039 source16x4 = vreinterpret_s16_u8(vrev16_u8(vreinterpret_u8_s16(source16x4)));
1040 int32x4_t source32x4 = vmovl_s16(source16x4);
1041 src += 4 * src_skip;
1042
1043 float32x4_t converted = vcvtq_f32_s32(source32x4);
1044 float32x4_t scaled = vmulq_f32(converted, vscaling);
1045 vst1q_f32(dst, scaled);
1046 dst += 4;
1047 }
1048 nsamples = nsamples & 3;
1049 #endif
1050
1051 /* ALERT: signed sign-extension portability !!! */
1052 while (nsamples--) {
1053 #if __BYTE_ORDER == __LITTLE_ENDIAN
1054 z = (unsigned char)(src[0]);
1055 z <<= 8;
1056 z |= (unsigned char)(src[1]);
1057 #elif __BYTE_ORDER == __BIG_ENDIAN
1058 z = (unsigned char)(src[1]);
1059 z <<= 8;
1060 z |= (unsigned char)(src[0]);
1061 #endif
1062 *dst = z * scaling;
1063 dst++;
1064 src += src_skip;
1065 }
1066 }
1067
sample_move_dS_s16(jack_default_audio_sample_t * dst,char * src,unsigned long nsamples,unsigned long src_skip)1068 void sample_move_dS_s16 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
1069 {
1070 /* ALERT: signed sign-extension portability !!! */
1071 const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
1072 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
1073 const float32x4_t vscaling = vdupq_n_f32(scaling);
1074 unsigned long unrolled = nsamples / 4;
1075 while (unrolled--) {
1076 int16x4_t source16x4;
1077 switch(src_skip) {
1078 case 2:
1079 source16x4 = vld1_s16((int16_t*)src);
1080 break;
1081 case 4:
1082 source16x4 = vld2_s16((int16_t*)src).val[0];
1083 break;
1084 default:
1085 source16x4 = vld1_lane_s16((int16_t*)src, source16x4, 0);
1086 source16x4 = vld1_lane_s16((int16_t*)(src+src_skip), source16x4, 1);
1087 source16x4 = vld1_lane_s16((int16_t*)(src+2*src_skip), source16x4, 2);
1088 source16x4 = vld1_lane_s16((int16_t*)(src+3*src_skip), source16x4, 3);
1089 break;
1090 }
1091 int32x4_t source32x4 = vmovl_s16(source16x4);
1092 src += 4 * src_skip;
1093
1094 float32x4_t converted = vcvtq_f32_s32(source32x4);
1095 float32x4_t scaled = vmulq_f32(converted, vscaling);
1096 vst1q_f32(dst, scaled);
1097 dst += 4;
1098 }
1099 nsamples = nsamples & 3;
1100 #endif
1101
1102 while (nsamples--) {
1103 *dst = (*((short *) src)) * scaling;
1104 dst++;
1105 src += src_skip;
1106 }
1107 }
1108
memset_interleave(char * dst,char val,unsigned long bytes,unsigned long unit_bytes,unsigned long skip_bytes)1109 void memset_interleave (char *dst, char val, unsigned long bytes,
1110 unsigned long unit_bytes,
1111 unsigned long skip_bytes)
1112 {
1113 switch (unit_bytes) {
1114 case 1:
1115 while (bytes--) {
1116 *dst = val;
1117 dst += skip_bytes;
1118 }
1119 break;
1120 case 2:
1121 while (bytes) {
1122 *((short *) dst) = (short) val;
1123 dst += skip_bytes;
1124 bytes -= 2;
1125 }
1126 break;
1127 case 4:
1128 while (bytes) {
1129 *((int *) dst) = (int) val;
1130 dst += skip_bytes;
1131 bytes -= 4;
1132 }
1133 break;
1134 default:
1135 while (bytes) {
1136 memset(dst, val, unit_bytes);
1137 dst += skip_bytes;
1138 bytes -= unit_bytes;
1139 }
1140 break;
1141 }
1142 }
1143
1144 /* COPY FUNCTIONS: used to move data from an input channel to an
1145 output channel. Note that we assume that the skip distance
1146 is the same for both channels. This is completely fine
1147 unless the input and output were on different audio interfaces that
1148 were interleaved differently. We don't try to handle that.
1149 */
1150
1151 void
memcpy_fake(char * dst,char * src,unsigned long src_bytes,unsigned long foo,unsigned long bar)1152 memcpy_fake (char *dst, char *src, unsigned long src_bytes, unsigned long foo, unsigned long bar)
1153 {
1154 memcpy (dst, src, src_bytes);
1155 }
1156
1157 void
memcpy_interleave_d16_s16(char * dst,char * src,unsigned long src_bytes,unsigned long dst_skip_bytes,unsigned long src_skip_bytes)1158 memcpy_interleave_d16_s16 (char *dst, char *src, unsigned long src_bytes,
1159 unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
1160 {
1161 while (src_bytes) {
1162 *((short *) dst) = *((short *) src);
1163 dst += dst_skip_bytes;
1164 src += src_skip_bytes;
1165 src_bytes -= 2;
1166 }
1167 }
1168
1169 void
memcpy_interleave_d24_s24(char * dst,char * src,unsigned long src_bytes,unsigned long dst_skip_bytes,unsigned long src_skip_bytes)1170 memcpy_interleave_d24_s24 (char *dst, char *src, unsigned long src_bytes,
1171 unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
1172 {
1173 while (src_bytes) {
1174 memcpy(dst, src, 3);
1175 dst += dst_skip_bytes;
1176 src += src_skip_bytes;
1177 src_bytes -= 3;
1178 }
1179 }
1180
1181 void
memcpy_interleave_d32_s32(char * dst,char * src,unsigned long src_bytes,unsigned long dst_skip_bytes,unsigned long src_skip_bytes)1182 memcpy_interleave_d32_s32 (char *dst, char *src, unsigned long src_bytes,
1183 unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
1184 {
1185 while (src_bytes) {
1186 *((int *) dst) = *((int *) src);
1187 dst += dst_skip_bytes;
1188 src += src_skip_bytes;
1189 src_bytes -= 4;
1190 }
1191 }
1192
1193