1 /* Spa
2  *
3  * Copyright © 2019 Wim Taymans
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <math.h>
26 #ifdef __FreeBSD__
27 #include <sys/endian.h>
28 #define bswap_16 bswap16
29 #define bswap_32 bswap32
30 #else
31 #include <byteswap.h>
32 #endif
33 
34 #include <spa/utils/defs.h>
35 
36 #define U8_MIN		0
37 #define U8_MAX		255
38 #define U8_SCALE	127.5f
39 #define U8_OFFS		128
40 #define U8_TO_F32(v)	((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
41 #define F32_TO_U8(v)	(uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
42 
43 #define S8_MIN		-127
44 #define S8_MAX		127
45 #define S8_MAX_F	127.0f
46 #define S8_SCALE	127.0f
47 #define S8_TO_F32(v)	(((int8_t)(v)) * (1.0f / S8_SCALE))
48 #define F32_TO_S8(v)	(int8_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S8_SCALE)
49 
50 #define U16_MIN		0
51 #define U16_MAX		65535
52 #define U16_SCALE	32767.5f
53 #define U16_OFFS	32768
54 #define U16_TO_F32(v)	((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
55 #define U16S_TO_F32(v)	(((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
56 #define F32_TO_U16(v)	(uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)
57 #define F32_TO_U16S(v)	((uint16_t)bswap_16((uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)))
58 
59 #define S16_MIN		-32767
60 #define S16_MAX		32767
61 #define S16_MAX_F	32767.0f
62 #define S16_SCALE	32767.0f
63 #define S16_TO_F32(v)	(((int16_t)(v)) * (1.0f / S16_SCALE))
64 #define S16S_TO_F32(v)	(((int16_t)bswap_16((uint16_t)v)) * (1.0f / S16_SCALE))
65 #define F32_TO_S16(v)	(int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
66 #define F32_TO_S16S(v)	((int16_t)bswap_16((uint16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)))
67 
68 #define U24_MIN		0
69 #define U24_MAX		16777215
70 #define U24_SCALE	8388607.5f
71 #define U24_OFFS	8388608
72 #define U24_TO_F32(v)	((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0)
73 #define F32_TO_U24(v)	(uint32_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U24_SCALE) + U24_OFFS)
74 
75 #define S24_MIN		-8388607
76 #define S24_MAX		8388607
77 #define S24_MAX_F	8388607.0f
78 #define S24_SCALE	8388607.0f
79 #define S24_TO_F32(v)	(((int32_t)(v)) * (1.0f / S24_SCALE))
80 #define F32_TO_S24(v)	(int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
81 
82 #define U32_TO_F32(v)	U24_TO_F32(((uint32_t)(v)) >> 8)
83 #define F32_TO_U32(v)	(F32_TO_U24(v) << 8)
84 
85 #define S32_SCALE	2147483648.0f
86 #define S32_MIN		2147483520.0f
87 
88 #define S32_TO_F32(v)	S24_TO_F32(((int32_t)(v)) >> 8)
89 #define S32S_TO_F32(v)	S24_TO_F32(((int32_t)bswap_32(v)) >> 8)
90 #define F32_TO_S32(v)	(F32_TO_S24(v) << 8)
91 #define F32_TO_S32S(v)	bswap_32((F32_TO_S24(v) << 8))
92 
93 #define U24_32_TO_F32(v)	U32_TO_F32((v)<<8)
94 #define U24_32S_TO_F32(v)	U32_TO_F32(((int32_t)bswap_32(v))<<8)
95 #define F32_TO_U24_32(v)	F32_TO_U24(v)
96 #define F32_TO_U24_32S(v)	bswap_32(F32_TO_U24(v))
97 
98 #define S24_32_TO_F32(v)	S32_TO_F32((v)<<8)
99 #define S24_32S_TO_F32(v)	S32_TO_F32(((int32_t)bswap_32(v))<<8)
100 #define F32_TO_S24_32(v)	F32_TO_S24(v)
101 #define F32_TO_S24_32S(v)	bswap_32(F32_TO_S24(v))
102 
read_u24(const void * src)103 static inline uint32_t read_u24(const void *src)
104 {
105 	const uint8_t *s = src;
106 #if __BYTE_ORDER == __LITTLE_ENDIAN
107 	return (((uint32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
108 #else
109 	return (((uint32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
110 #endif
111 }
112 
read_s24(const void * src)113 static inline int32_t read_s24(const void *src)
114 {
115 	const int8_t *s = src;
116 #if __BYTE_ORDER == __LITTLE_ENDIAN
117 	return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
118 #else
119 	return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
120 #endif
121 }
122 
read_s24s(const void * src)123 static inline int32_t read_s24s(const void *src)
124 {
125 	const int8_t *s = src;
126 #if __BYTE_ORDER == __LITTLE_ENDIAN
127 	return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
128 #else
129 	return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
130 #endif
131 }
132 
write_u24(void * dst,uint32_t val)133 static inline void write_u24(void *dst, uint32_t val)
134 {
135 	uint8_t *d = dst;
136 #if __BYTE_ORDER == __LITTLE_ENDIAN
137 	d[0] = (uint8_t) (val);
138 	d[1] = (uint8_t) (val >> 8);
139 	d[2] = (uint8_t) (val >> 16);
140 #else
141 	d[0] = (uint8_t) (val >> 16);
142 	d[1] = (uint8_t) (val >> 8);
143 	d[2] = (uint8_t) (val);
144 #endif
145 }
146 
write_s24(void * dst,int32_t val)147 static inline void write_s24(void *dst, int32_t val)
148 {
149 	uint8_t *d = dst;
150 #if __BYTE_ORDER == __LITTLE_ENDIAN
151 	d[0] = (uint8_t) (val);
152 	d[1] = (uint8_t) (val >> 8);
153 	d[2] = (uint8_t) (val >> 16);
154 #else
155 	d[0] = (uint8_t) (val >> 16);
156 	d[1] = (uint8_t) (val >> 8);
157 	d[2] = (uint8_t) (val);
158 #endif
159 }
160 
write_s24s(void * dst,int32_t val)161 static inline void write_s24s(void *dst, int32_t val)
162 {
163 	uint8_t *d = dst;
164 #if __BYTE_ORDER == __LITTLE_ENDIAN
165 	d[0] = (uint8_t) (val >> 16);
166 	d[1] = (uint8_t) (val >> 8);
167 	d[2] = (uint8_t) (val);
168 #else
169 	d[0] = (uint8_t) (val);
170 	d[1] = (uint8_t) (val >> 8);
171 	d[2] = (uint8_t) (val >> 16);
172 #endif
173 }
174 
175 #define MAX_NS	64
176 
177 struct convert {
178 	uint32_t src_fmt;
179 	uint32_t dst_fmt;
180 	uint32_t n_channels;
181 	uint32_t cpu_flags;
182 
183 	unsigned int is_passthrough:1;
184 	float ns_data[MAX_NS];
185 	uint32_t ns_idx;
186 	uint32_t ns_size;
187 
188 	void (*process) (struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
189 			uint32_t n_samples);
190 	void (*free) (struct convert *conv);
191 };
192 
193 int convert_init(struct convert *conv);
194 
195 #define convert_process(conv,...)	(conv)->process(conv, __VA_ARGS__)
196 #define convert_free(conv)		(conv)->free(conv)
197 
198 #define DEFINE_FUNCTION(name,arch) \
199 void conv_##name##_##arch(struct convert *conv, void * SPA_RESTRICT dst[],	\
200 		const void * SPA_RESTRICT src[], uint32_t n_samples)		\
201 
202 DEFINE_FUNCTION(copy8d, c);
203 DEFINE_FUNCTION(copy8, c);
204 DEFINE_FUNCTION(copy16d, c);
205 DEFINE_FUNCTION(copy16, c);
206 DEFINE_FUNCTION(copy24d, c);
207 DEFINE_FUNCTION(copy24, c);
208 DEFINE_FUNCTION(copy32d, c);
209 DEFINE_FUNCTION(copy32, c);
210 DEFINE_FUNCTION(u8d_to_f32d, c);
211 DEFINE_FUNCTION(u8_to_f32, c);
212 DEFINE_FUNCTION(u8_to_f32d, c);
213 DEFINE_FUNCTION(u8d_to_f32, c);
214 DEFINE_FUNCTION(s8d_to_f32d, c);
215 DEFINE_FUNCTION(s8_to_f32, c);
216 DEFINE_FUNCTION(s8_to_f32d, c);
217 DEFINE_FUNCTION(s8d_to_f32, c);
218 DEFINE_FUNCTION(ulaw_to_f32d, c);
219 DEFINE_FUNCTION(alaw_to_f32d, c);
220 DEFINE_FUNCTION(u16_to_f32, c);
221 DEFINE_FUNCTION(u16_to_f32d, c);
222 DEFINE_FUNCTION(s16d_to_f32d, c);
223 DEFINE_FUNCTION(s16_to_f32, c);
224 DEFINE_FUNCTION(s16_to_f32d, c);
225 DEFINE_FUNCTION(s16s_to_f32d, c);
226 DEFINE_FUNCTION(s16d_to_f32, c);
227 DEFINE_FUNCTION(u32_to_f32, c);
228 DEFINE_FUNCTION(u32_to_f32d, c);
229 DEFINE_FUNCTION(s32d_to_f32d, c);
230 DEFINE_FUNCTION(s32_to_f32, c);
231 DEFINE_FUNCTION(s32_to_f32d, c);
232 DEFINE_FUNCTION(s32s_to_f32d, c);
233 DEFINE_FUNCTION(s32d_to_f32, c);
234 DEFINE_FUNCTION(u24_to_f32, c);
235 DEFINE_FUNCTION(u24_to_f32d, c);
236 DEFINE_FUNCTION(s24d_to_f32d, c);
237 DEFINE_FUNCTION(s24_to_f32, c);
238 DEFINE_FUNCTION(s24_to_f32d, c);
239 DEFINE_FUNCTION(s24s_to_f32d, c);
240 DEFINE_FUNCTION(s24d_to_f32, c);
241 DEFINE_FUNCTION(u24_32_to_f32, c);
242 DEFINE_FUNCTION(u24_32_to_f32d, c);
243 DEFINE_FUNCTION(s24_32d_to_f32d, c);
244 DEFINE_FUNCTION(s24_32_to_f32, c);
245 DEFINE_FUNCTION(s24_32_to_f32d, c);
246 DEFINE_FUNCTION(s24_32s_to_f32d, c);
247 DEFINE_FUNCTION(s24_32d_to_f32, c);
248 DEFINE_FUNCTION(f32d_to_u8d, c);
249 DEFINE_FUNCTION(f32_to_u8, c);
250 DEFINE_FUNCTION(f32_to_u8d, c);
251 DEFINE_FUNCTION(f32d_to_u8, c);
252 DEFINE_FUNCTION(f32d_to_s8d, c);
253 DEFINE_FUNCTION(f32_to_s8, c);
254 DEFINE_FUNCTION(f32_to_s8d, c);
255 DEFINE_FUNCTION(f32d_to_s8, c);
256 DEFINE_FUNCTION(f32d_to_alaw, c);
257 DEFINE_FUNCTION(f32d_to_ulaw, c);
258 DEFINE_FUNCTION(f32_to_u16, c);
259 DEFINE_FUNCTION(f32d_to_u16, c);
260 DEFINE_FUNCTION(f32d_to_s16d, c);
261 DEFINE_FUNCTION(f32_to_s16, c);
262 DEFINE_FUNCTION(f32_to_s16d, c);
263 DEFINE_FUNCTION(f32d_to_s16, c);
264 DEFINE_FUNCTION(f32d_to_s16s, c);
265 DEFINE_FUNCTION(f32_to_u32, c);
266 DEFINE_FUNCTION(f32d_to_u32, c);
267 DEFINE_FUNCTION(f32d_to_s32d, c);
268 DEFINE_FUNCTION(f32_to_s32, c);
269 DEFINE_FUNCTION(f32_to_s32d, c);
270 DEFINE_FUNCTION(f32d_to_s32, c);
271 DEFINE_FUNCTION(f32d_to_s32s, c);
272 DEFINE_FUNCTION(f32_to_u24, c);
273 DEFINE_FUNCTION(f32d_to_u24, c);
274 DEFINE_FUNCTION(f32d_to_s24d, c);
275 DEFINE_FUNCTION(f32_to_s24, c);
276 DEFINE_FUNCTION(f32_to_s24d, c);
277 DEFINE_FUNCTION(f32d_to_s24, c);
278 DEFINE_FUNCTION(f32d_to_s24s, c);
279 DEFINE_FUNCTION(f32_to_u24_32, c);
280 DEFINE_FUNCTION(f32d_to_u24_32, c);
281 DEFINE_FUNCTION(f32d_to_s24_32d, c);
282 DEFINE_FUNCTION(f32_to_s24_32, c);
283 DEFINE_FUNCTION(f32_to_s24_32d, c);
284 DEFINE_FUNCTION(f32d_to_s24_32, c);
285 DEFINE_FUNCTION(f32d_to_s24_32s, c);
286 DEFINE_FUNCTION(deinterleave_8, c);
287 DEFINE_FUNCTION(deinterleave_16, c);
288 DEFINE_FUNCTION(deinterleave_24, c);
289 DEFINE_FUNCTION(deinterleave_32, c);
290 DEFINE_FUNCTION(deinterleave_32s, c);
291 DEFINE_FUNCTION(interleave_8, c);
292 DEFINE_FUNCTION(interleave_16, c);
293 DEFINE_FUNCTION(interleave_24, c);
294 DEFINE_FUNCTION(interleave_32, c);
295 DEFINE_FUNCTION(interleave_32s, c);
296 
297 #if defined(HAVE_NEON)
298 DEFINE_FUNCTION(s16_to_f32d_2, neon);
299 DEFINE_FUNCTION(s16_to_f32d, neon);
300 DEFINE_FUNCTION(f32d_to_s16, neon);
301 #endif
302 #if defined(HAVE_SSE2)
303 DEFINE_FUNCTION(s16_to_f32d_2, sse2);
304 DEFINE_FUNCTION(s16_to_f32d, sse2);
305 DEFINE_FUNCTION(s24_to_f32d, sse2);
306 DEFINE_FUNCTION(s32_to_f32d, sse2);
307 DEFINE_FUNCTION(f32d_to_s32, sse2);
308 DEFINE_FUNCTION(f32_to_s16, sse2);
309 DEFINE_FUNCTION(f32d_to_s16_2, sse2);
310 DEFINE_FUNCTION(f32d_to_s16, sse2);
311 DEFINE_FUNCTION(f32d_to_s16d, sse2);
312 #endif
313 #if defined(HAVE_SSSE3)
314 DEFINE_FUNCTION(s24_to_f32d, ssse3);
315 #endif
316 #if defined(HAVE_SSE41)
317 DEFINE_FUNCTION(s24_to_f32d, sse41);
318 #endif
319 #if defined(HAVE_AVX2)
320 DEFINE_FUNCTION(s16_to_f32d_2, avx2);
321 DEFINE_FUNCTION(s16_to_f32d, avx2);
322 DEFINE_FUNCTION(s24_to_f32d, avx2);
323 DEFINE_FUNCTION(s32_to_f32d, avx2);
324 DEFINE_FUNCTION(f32d_to_s32, avx2);
325 DEFINE_FUNCTION(f32d_to_s16_4, avx2);
326 DEFINE_FUNCTION(f32d_to_s16_2, avx2);
327 DEFINE_FUNCTION(f32d_to_s16, avx2);
328 #endif
329 
330 #undef DEFINE_FUNCTION
331