1 /* Spa
2 *
3 * Copyright © 2019 Wim Taymans
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <math.h>
26 #ifdef __FreeBSD__
27 #include <sys/endian.h>
28 #define bswap_16 bswap16
29 #define bswap_32 bswap32
30 #else
31 #include <byteswap.h>
32 #endif
33
34 #include <spa/utils/defs.h>
35
36 #define U8_MIN 0
37 #define U8_MAX 255
38 #define U8_SCALE 127.5f
39 #define U8_OFFS 128
40 #define U8_TO_F32(v) ((((uint8_t)(v)) * (1.0f / U8_OFFS)) - 1.0)
41 #define F32_TO_U8(v) (uint8_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U8_SCALE) + U8_OFFS)
42
43 #define S8_MIN -127
44 #define S8_MAX 127
45 #define S8_MAX_F 127.0f
46 #define S8_SCALE 127.0f
47 #define S8_TO_F32(v) (((int8_t)(v)) * (1.0f / S8_SCALE))
48 #define F32_TO_S8(v) (int8_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S8_SCALE)
49
50 #define U16_MIN 0
51 #define U16_MAX 65535
52 #define U16_SCALE 32767.5f
53 #define U16_OFFS 32768
54 #define U16_TO_F32(v) ((((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
55 #define U16S_TO_F32(v) (((uint16_t)bswap_16((uint16_t)(v)) * (1.0f / U16_OFFS)) - 1.0)
56 #define F32_TO_U16(v) (uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)
57 #define F32_TO_U16S(v) ((uint16_t)bswap_16((uint16_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U16_SCALE) + U16_OFFS)))
58
59 #define S16_MIN -32767
60 #define S16_MAX 32767
61 #define S16_MAX_F 32767.0f
62 #define S16_SCALE 32767.0f
63 #define S16_TO_F32(v) (((int16_t)(v)) * (1.0f / S16_SCALE))
64 #define S16S_TO_F32(v) (((int16_t)bswap_16((uint16_t)v)) * (1.0f / S16_SCALE))
65 #define F32_TO_S16(v) (int16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)
66 #define F32_TO_S16S(v) ((int16_t)bswap_16((uint16_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S16_SCALE)))
67
68 #define U24_MIN 0
69 #define U24_MAX 16777215
70 #define U24_SCALE 8388607.5f
71 #define U24_OFFS 8388608
72 #define U24_TO_F32(v) ((((uint32_t)(v)) * (1.0f / U24_OFFS)) - 1.0)
73 #define F32_TO_U24(v) (uint32_t)((SPA_CLAMP(v, -1.0f, 1.0f) * U24_SCALE) + U24_OFFS)
74
75 #define S24_MIN -8388607
76 #define S24_MAX 8388607
77 #define S24_MAX_F 8388607.0f
78 #define S24_SCALE 8388607.0f
79 #define S24_TO_F32(v) (((int32_t)(v)) * (1.0f / S24_SCALE))
80 #define F32_TO_S24(v) (int32_t)(SPA_CLAMP(v, -1.0f, 1.0f) * S24_SCALE)
81
82 #define U32_TO_F32(v) U24_TO_F32(((uint32_t)(v)) >> 8)
83 #define F32_TO_U32(v) (F32_TO_U24(v) << 8)
84
85 #define S32_SCALE 2147483648.0f
86 #define S32_MIN 2147483520.0f
87
88 #define S32_TO_F32(v) S24_TO_F32(((int32_t)(v)) >> 8)
89 #define S32S_TO_F32(v) S24_TO_F32(((int32_t)bswap_32(v)) >> 8)
90 #define F32_TO_S32(v) (F32_TO_S24(v) << 8)
91 #define F32_TO_S32S(v) bswap_32((F32_TO_S24(v) << 8))
92
93 #define U24_32_TO_F32(v) U32_TO_F32((v)<<8)
94 #define U24_32S_TO_F32(v) U32_TO_F32(((int32_t)bswap_32(v))<<8)
95 #define F32_TO_U24_32(v) F32_TO_U24(v)
96 #define F32_TO_U24_32S(v) bswap_32(F32_TO_U24(v))
97
98 #define S24_32_TO_F32(v) S32_TO_F32((v)<<8)
99 #define S24_32S_TO_F32(v) S32_TO_F32(((int32_t)bswap_32(v))<<8)
100 #define F32_TO_S24_32(v) F32_TO_S24(v)
101 #define F32_TO_S24_32S(v) bswap_32(F32_TO_S24(v))
102
read_u24(const void * src)103 static inline uint32_t read_u24(const void *src)
104 {
105 const uint8_t *s = src;
106 #if __BYTE_ORDER == __LITTLE_ENDIAN
107 return (((uint32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
108 #else
109 return (((uint32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
110 #endif
111 }
112
read_s24(const void * src)113 static inline int32_t read_s24(const void *src)
114 {
115 const int8_t *s = src;
116 #if __BYTE_ORDER == __LITTLE_ENDIAN
117 return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
118 #else
119 return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
120 #endif
121 }
122
read_s24s(const void * src)123 static inline int32_t read_s24s(const void *src)
124 {
125 const int8_t *s = src;
126 #if __BYTE_ORDER == __LITTLE_ENDIAN
127 return (((int32_t)s[0] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[2]);
128 #else
129 return (((int32_t)s[2] << 16) | ((uint32_t)(uint8_t)s[1] << 8) | (uint32_t)(uint8_t)s[0]);
130 #endif
131 }
132
write_u24(void * dst,uint32_t val)133 static inline void write_u24(void *dst, uint32_t val)
134 {
135 uint8_t *d = dst;
136 #if __BYTE_ORDER == __LITTLE_ENDIAN
137 d[0] = (uint8_t) (val);
138 d[1] = (uint8_t) (val >> 8);
139 d[2] = (uint8_t) (val >> 16);
140 #else
141 d[0] = (uint8_t) (val >> 16);
142 d[1] = (uint8_t) (val >> 8);
143 d[2] = (uint8_t) (val);
144 #endif
145 }
146
write_s24(void * dst,int32_t val)147 static inline void write_s24(void *dst, int32_t val)
148 {
149 uint8_t *d = dst;
150 #if __BYTE_ORDER == __LITTLE_ENDIAN
151 d[0] = (uint8_t) (val);
152 d[1] = (uint8_t) (val >> 8);
153 d[2] = (uint8_t) (val >> 16);
154 #else
155 d[0] = (uint8_t) (val >> 16);
156 d[1] = (uint8_t) (val >> 8);
157 d[2] = (uint8_t) (val);
158 #endif
159 }
160
write_s24s(void * dst,int32_t val)161 static inline void write_s24s(void *dst, int32_t val)
162 {
163 uint8_t *d = dst;
164 #if __BYTE_ORDER == __LITTLE_ENDIAN
165 d[0] = (uint8_t) (val >> 16);
166 d[1] = (uint8_t) (val >> 8);
167 d[2] = (uint8_t) (val);
168 #else
169 d[0] = (uint8_t) (val);
170 d[1] = (uint8_t) (val >> 8);
171 d[2] = (uint8_t) (val >> 16);
172 #endif
173 }
174
175 #define MAX_NS 64
176
177 struct convert {
178 uint32_t src_fmt;
179 uint32_t dst_fmt;
180 uint32_t n_channels;
181 uint32_t cpu_flags;
182
183 unsigned int is_passthrough:1;
184 float ns_data[MAX_NS];
185 uint32_t ns_idx;
186 uint32_t ns_size;
187
188 void (*process) (struct convert *conv, void * SPA_RESTRICT dst[], const void * SPA_RESTRICT src[],
189 uint32_t n_samples);
190 void (*free) (struct convert *conv);
191 };
192
193 int convert_init(struct convert *conv);
194
195 #define convert_process(conv,...) (conv)->process(conv, __VA_ARGS__)
196 #define convert_free(conv) (conv)->free(conv)
197
198 #define DEFINE_FUNCTION(name,arch) \
199 void conv_##name##_##arch(struct convert *conv, void * SPA_RESTRICT dst[], \
200 const void * SPA_RESTRICT src[], uint32_t n_samples) \
201
202 DEFINE_FUNCTION(copy8d, c);
203 DEFINE_FUNCTION(copy8, c);
204 DEFINE_FUNCTION(copy16d, c);
205 DEFINE_FUNCTION(copy16, c);
206 DEFINE_FUNCTION(copy24d, c);
207 DEFINE_FUNCTION(copy24, c);
208 DEFINE_FUNCTION(copy32d, c);
209 DEFINE_FUNCTION(copy32, c);
210 DEFINE_FUNCTION(u8d_to_f32d, c);
211 DEFINE_FUNCTION(u8_to_f32, c);
212 DEFINE_FUNCTION(u8_to_f32d, c);
213 DEFINE_FUNCTION(u8d_to_f32, c);
214 DEFINE_FUNCTION(s8d_to_f32d, c);
215 DEFINE_FUNCTION(s8_to_f32, c);
216 DEFINE_FUNCTION(s8_to_f32d, c);
217 DEFINE_FUNCTION(s8d_to_f32, c);
218 DEFINE_FUNCTION(ulaw_to_f32d, c);
219 DEFINE_FUNCTION(alaw_to_f32d, c);
220 DEFINE_FUNCTION(u16_to_f32, c);
221 DEFINE_FUNCTION(u16_to_f32d, c);
222 DEFINE_FUNCTION(s16d_to_f32d, c);
223 DEFINE_FUNCTION(s16_to_f32, c);
224 DEFINE_FUNCTION(s16_to_f32d, c);
225 DEFINE_FUNCTION(s16s_to_f32d, c);
226 DEFINE_FUNCTION(s16d_to_f32, c);
227 DEFINE_FUNCTION(u32_to_f32, c);
228 DEFINE_FUNCTION(u32_to_f32d, c);
229 DEFINE_FUNCTION(s32d_to_f32d, c);
230 DEFINE_FUNCTION(s32_to_f32, c);
231 DEFINE_FUNCTION(s32_to_f32d, c);
232 DEFINE_FUNCTION(s32s_to_f32d, c);
233 DEFINE_FUNCTION(s32d_to_f32, c);
234 DEFINE_FUNCTION(u24_to_f32, c);
235 DEFINE_FUNCTION(u24_to_f32d, c);
236 DEFINE_FUNCTION(s24d_to_f32d, c);
237 DEFINE_FUNCTION(s24_to_f32, c);
238 DEFINE_FUNCTION(s24_to_f32d, c);
239 DEFINE_FUNCTION(s24s_to_f32d, c);
240 DEFINE_FUNCTION(s24d_to_f32, c);
241 DEFINE_FUNCTION(u24_32_to_f32, c);
242 DEFINE_FUNCTION(u24_32_to_f32d, c);
243 DEFINE_FUNCTION(s24_32d_to_f32d, c);
244 DEFINE_FUNCTION(s24_32_to_f32, c);
245 DEFINE_FUNCTION(s24_32_to_f32d, c);
246 DEFINE_FUNCTION(s24_32s_to_f32d, c);
247 DEFINE_FUNCTION(s24_32d_to_f32, c);
248 DEFINE_FUNCTION(f32d_to_u8d, c);
249 DEFINE_FUNCTION(f32_to_u8, c);
250 DEFINE_FUNCTION(f32_to_u8d, c);
251 DEFINE_FUNCTION(f32d_to_u8, c);
252 DEFINE_FUNCTION(f32d_to_s8d, c);
253 DEFINE_FUNCTION(f32_to_s8, c);
254 DEFINE_FUNCTION(f32_to_s8d, c);
255 DEFINE_FUNCTION(f32d_to_s8, c);
256 DEFINE_FUNCTION(f32d_to_alaw, c);
257 DEFINE_FUNCTION(f32d_to_ulaw, c);
258 DEFINE_FUNCTION(f32_to_u16, c);
259 DEFINE_FUNCTION(f32d_to_u16, c);
260 DEFINE_FUNCTION(f32d_to_s16d, c);
261 DEFINE_FUNCTION(f32_to_s16, c);
262 DEFINE_FUNCTION(f32_to_s16d, c);
263 DEFINE_FUNCTION(f32d_to_s16, c);
264 DEFINE_FUNCTION(f32d_to_s16s, c);
265 DEFINE_FUNCTION(f32_to_u32, c);
266 DEFINE_FUNCTION(f32d_to_u32, c);
267 DEFINE_FUNCTION(f32d_to_s32d, c);
268 DEFINE_FUNCTION(f32_to_s32, c);
269 DEFINE_FUNCTION(f32_to_s32d, c);
270 DEFINE_FUNCTION(f32d_to_s32, c);
271 DEFINE_FUNCTION(f32d_to_s32s, c);
272 DEFINE_FUNCTION(f32_to_u24, c);
273 DEFINE_FUNCTION(f32d_to_u24, c);
274 DEFINE_FUNCTION(f32d_to_s24d, c);
275 DEFINE_FUNCTION(f32_to_s24, c);
276 DEFINE_FUNCTION(f32_to_s24d, c);
277 DEFINE_FUNCTION(f32d_to_s24, c);
278 DEFINE_FUNCTION(f32d_to_s24s, c);
279 DEFINE_FUNCTION(f32_to_u24_32, c);
280 DEFINE_FUNCTION(f32d_to_u24_32, c);
281 DEFINE_FUNCTION(f32d_to_s24_32d, c);
282 DEFINE_FUNCTION(f32_to_s24_32, c);
283 DEFINE_FUNCTION(f32_to_s24_32d, c);
284 DEFINE_FUNCTION(f32d_to_s24_32, c);
285 DEFINE_FUNCTION(f32d_to_s24_32s, c);
286 DEFINE_FUNCTION(deinterleave_8, c);
287 DEFINE_FUNCTION(deinterleave_16, c);
288 DEFINE_FUNCTION(deinterleave_24, c);
289 DEFINE_FUNCTION(deinterleave_32, c);
290 DEFINE_FUNCTION(deinterleave_32s, c);
291 DEFINE_FUNCTION(interleave_8, c);
292 DEFINE_FUNCTION(interleave_16, c);
293 DEFINE_FUNCTION(interleave_24, c);
294 DEFINE_FUNCTION(interleave_32, c);
295 DEFINE_FUNCTION(interleave_32s, c);
296
297 #if defined(HAVE_NEON)
298 DEFINE_FUNCTION(s16_to_f32d_2, neon);
299 DEFINE_FUNCTION(s16_to_f32d, neon);
300 DEFINE_FUNCTION(f32d_to_s16, neon);
301 #endif
302 #if defined(HAVE_SSE2)
303 DEFINE_FUNCTION(s16_to_f32d_2, sse2);
304 DEFINE_FUNCTION(s16_to_f32d, sse2);
305 DEFINE_FUNCTION(s24_to_f32d, sse2);
306 DEFINE_FUNCTION(s32_to_f32d, sse2);
307 DEFINE_FUNCTION(f32d_to_s32, sse2);
308 DEFINE_FUNCTION(f32_to_s16, sse2);
309 DEFINE_FUNCTION(f32d_to_s16_2, sse2);
310 DEFINE_FUNCTION(f32d_to_s16, sse2);
311 DEFINE_FUNCTION(f32d_to_s16d, sse2);
312 #endif
313 #if defined(HAVE_SSSE3)
314 DEFINE_FUNCTION(s24_to_f32d, ssse3);
315 #endif
316 #if defined(HAVE_SSE41)
317 DEFINE_FUNCTION(s24_to_f32d, sse41);
318 #endif
319 #if defined(HAVE_AVX2)
320 DEFINE_FUNCTION(s16_to_f32d_2, avx2);
321 DEFINE_FUNCTION(s16_to_f32d, avx2);
322 DEFINE_FUNCTION(s24_to_f32d, avx2);
323 DEFINE_FUNCTION(s32_to_f32d, avx2);
324 DEFINE_FUNCTION(f32d_to_s32, avx2);
325 DEFINE_FUNCTION(f32d_to_s16_4, avx2);
326 DEFINE_FUNCTION(f32d_to_s16_2, avx2);
327 DEFINE_FUNCTION(f32d_to_s16, avx2);
328 #endif
329
330 #undef DEFINE_FUNCTION
331