1 /*
2 resample: low-latency usable and quick resampler
3
4 copyright 2018-2020 by the mpg123 project
5 licensed under the terms of the LGPL 2.1
6 see COPYING and AUTHORS files in distribution or http://mpg123.org
7
8 initially written by Thomas Orgis
9
10 The resampler consists of these steps:
11
12 1. If output rate < 1/4 input rate:
13 1.1 Repeat until output >= 1/4 input rate:
14 1.1.1 Low pass with transition band from 1/4 to 1/2 input bandwidth.
15 1.1.2 Drop samples to halve the input sample rate.
16 1.2 Pre-emphasis to compensate for interpolator response.
17
18 2. If output rate > 1/2 input rate:
19 2.1 Pre-emphasis to compensate for interpolator response.
20 2.2 2x oversampling (zero-stuffing).
21
22 3. Low pass filter with >84% bandwidth to cutoff of the smaller of
23 target rate or input rate, between 1/4 and 1/2 of intermediate
24 sample rate.
25
26 4. Interpolate to target rate.
27
28 The filters are IIR-type. The interpolators are based on Olli Niemitalo's
29 2001 paper titled 'Polynomial Interpolators for High-Quality Resampling of
30 Oversampled Audio', available as download at
31 http://yehar.com/blog/wp-content/uploads/2009/08/deip.pdf . It is these
32 interpolators that drastically improve the quality compared to cubic spline
33 interpolation.
34
35 There are different pre-emphasis filters for the case with and without
36 oversampling. At all times, the signal band before interpolation only
37 extends up to 1/2 of Nyquist, enabling the use of interpolators designed
38 for oversampled input. Without this spectral headroom, things would look
39 much more distorted.
40
41 The higher quality mode achieves distortion below 108 dB with a bandwith
42 above 84%, while the lower quality mode weakens the final low pass to
43 72 dB, and has distortion around that, with slightly better bandwidth.
44
45 The downside of this all is serious phase distortion, but that does
46 not matter much to the human ear for resampling before playback of the
47 stream, as opposed to mixing resampled streams with some phase relations
48 in mind. If you want to resample your instrument samples before combining
49 them into music, you have the full signal at hand and can just use the
50 best method regardless of latency. This here is not your resampler, then.
51
52 The upside is that there is virtually no latency compared to FIR-based
53 resamplers and it should be rather easy to use. You have a direct
54 relation between input buffer and output buffer and do not have to
55 think about draining internal windowing buffers. Data with one sample
56 rate in, data with another sample rate out. Simple as that.
57
58 The performance is very good compared to windowed-sinc interpolators,
59 but is behind the FFT-based approach of libsoxr. You trade in massive
60 latency for a performance gain, there. Vectorization of the code does
61 not seem to bring much. A test with SSE intrinsics yielded about 10%
62 speedup for single-channel operation. This is a basic problem of recursive
63 computations. There is not that much dumb parallelism --- and the SIMD
64 parallelism offered in current CPUs is rather dumb and useless without
65 the specific data flow to be efficient. Or maybe I am just doing it
66 wrong. Perhaps many channels may benefit from SIMD.
67
68 If you want to optimize: Most time is spent in the low pass filtering. The
69 interpolation itself is rather cheap. The decimation filter is also worth
70 considering. Currently, it is not even optimized for efficient coefficient
71 access (that's a TODO), let alone using the more efficient Direct Form II,
72 or anything more fancy.
73
74 TODO: efficient ringbuffer access for the decimation filter
75 TODO: initialize with first sample or zero? is there an actual benefit? impulse
76 response?
77 */
78
79 #define NO_GROW_BUF
80 #define NO_SMAX
81 #define NO_SMIN
82 // SSIZE_MAX is not standard C:-/
83 #define _POSIX_C_SOURCE 200809L
84 #include "syn123_int.h"
85 #include "debug.h"
86
87 // coefficient tables generated from coeff-ellip-6-0.01-36-16.txd
88 // (coeff-{filter_type}-{order}-{passband_ripple}-{rejection_dB}-{points})
89
90 // static const unsigned int lpf_db = 36;
91
92 // Order of the lowpass filter.
93 // You'd wish to have that as a multiple of 4 to help vectorization, but
94 // even 12 is too small to have real benefit from SSE (let alone wider stuff),
95 // and 6 seems to be a limit for stability. The filter needs to be of some minimal
96 // order to be steep enough and becomes unstable, especially with interpolated
97 // coefficients, with higher orders. This is a value I settled on after a long
98 // and painful process. This works between 1/4 and 1/2 of Nyquist. I used to
99 // believe that this can be extended down to 1/16 or even less, but this is
100 // noisy at best.
101 #define LPF_ORDER 6
102
103 // points in filter coefficient interpolation tables
104 #define LPF_POINTS 16
105
106 // filter critical frequency in fractions of Nyquist
107 static const float lpf_w_c[LPF_POINTS] =
108 {
109 +1.9500000000e-01
110 , +1.9867850841e-01
111 , +2.0682630822e-01
112 , +2.1914822332e-01
113 , +2.3516664873e-01
114 , +2.5424242424e-01
115 , +2.7560276877e-01
116 , +2.9837505460e-01
117 , +3.2162494540e-01
118 , +3.4439723123e-01
119 , +3.6575757576e-01
120 , +3.8483335127e-01
121 , +4.0085177668e-01
122 , +4.1317369178e-01
123 , +4.2132149159e-01
124 , +4.2500000000e-01
125 };
126
127 // actual cutoff frequency in fractions of Nyquist
128 static const float lpf_cutoff[LPF_POINTS] =
129 {
130 +2.4618202393e-01
131 , +2.5064642208e-01
132 , +2.6050157602e-01
133 , +2.7531601376e-01
134 , +2.9440817636e-01
135 , +3.1688954073e-01
136 , +3.4172256063e-01
137 , +3.6778811727e-01
138 , +3.9395465344e-01
139 , +4.1914062138e-01
140 , +4.4236385073e-01
141 , +4.6277512916e-01
142 , +4.7967708029e-01
143 , +4.9253194430e-01
144 , +5.0096263453e-01
145 , +5.0475083567e-01
146 };
147
148 // filter bandwidth (passband fraction in percent)
149 static const unsigned char lpf_bw[LPF_POINTS] =
150 {
151 85
152 , 85
153 , 85
154 , 85
155 , 85
156 , 86
157 , 86
158 , 86
159 , 87
160 , 87
161 , 88
162 , 88
163 , 88
164 , 89
165 , 89
166 , 89
167 };
168
169 // coefficient b_0
170 static const float lpf_b0[LPF_POINTS] =
171 {
172 +2.5060910358e-02
173 , +2.5545546150e-02
174 , +2.6658960397e-02
175 , +2.8449469363e-02
176 , +3.0975593417e-02
177 , +3.4290689410e-02
178 , +3.8423542124e-02
179 , +4.3355834055e-02
180 , +4.8998511458e-02
181 , +5.5171358029e-02
182 , +6.1592382077e-02
183 , +6.7884279531e-02
184 , +7.3603031799e-02
185 , +7.8288471817e-02
186 , +8.1529628810e-02
187 , +8.3031304399e-02
188 };
189
190 // derivative d b_0/d w_c
191 static const float lpf_mb0[LPF_POINTS] =
192 {
193 +1.3023622859e-01
194 , +1.3326373417e-01
195 , +1.4006275119e-01
196 , +1.5061692736e-01
197 , +1.6489815146e-01
198 , +1.8285716898e-01
199 , +2.0437030679e-01
200 , +2.2914833420e-01
201 , +2.5663256095e-01
202 , +2.8591120057e-01
203 , +3.1568709259e-01
204 , +3.4431969261e-01
205 , +3.6995119359e-01
206 , +3.9070858956e-01
207 , +4.0495210591e-01
208 , +4.1152140765e-01
209 };
210
211 // coefficients b_1 to b_n
212 static const float lpf_b[LPF_POINTS][LPF_ORDER] =
213 {
214 { -5.1762847048e-02, +8.4153207389e-02, -8.3511150073e-02
215 , +8.4153207389e-02, -5.1762847048e-02, +2.5060910358e-02 }
216 , { -5.0602124846e-02, +8.2431045673e-02, -8.0086433599e-02
217 , +8.2431045673e-02, -5.0602124846e-02, +2.5545546150e-02 }
218 , { -4.7865340043e-02, +7.8920478243e-02, -7.2561589822e-02
219 , +7.8920478243e-02, -4.7865340043e-02, +2.6658960397e-02 }
220 , { -4.3255354022e-02, +7.4480386867e-02, -6.1251009130e-02
221 , +7.4480386867e-02, -4.3255354022e-02, +2.8449469363e-02 }
222 , { -3.6312512281e-02, +7.0487561023e-02, -4.6392636636e-02
223 , +7.0487561023e-02, -3.6312512281e-02, +3.0975593417e-02 }
224 , { -2.6445440921e-02, +6.8811704409e-02, -2.7845395780e-02
225 , +6.8811704409e-02, -2.6445440921e-02, +3.4290689410e-02 }
226 , { -1.3021846935e-02, +7.1687243737e-02, -4.8530219060e-03
227 , +7.1687243737e-02, -1.3021846935e-02, +3.8423542124e-02 }
228 , { +4.4706427843e-03, +8.1414913409e-02, +2.3941615853e-02
229 , +8.1414913409e-02, +4.4706427843e-03, +4.3355834055e-02 }
230 , { +2.6209675772e-02, +9.9864368285e-02, +6.0088123327e-02
231 , +9.9864368285e-02, +2.6209675772e-02, +4.8998511458e-02 }
232 , { +5.1814692315e-02, +1.2783927823e-01, +1.0454069691e-01
233 , +1.2783927823e-01, +5.1814692315e-02, +5.5171358029e-02 }
234 , { +8.0182308668e-02, +1.6447692384e-01, +1.5672311528e-01
235 , +1.6447692384e-01, +8.0182308668e-02, +6.1592382077e-02 }
236 , { +1.0945240695e-01, +2.0692267707e-01, +2.1383540251e-01
237 , +2.0692267707e-01, +1.0945240695e-01, +6.7884279531e-02 }
238 , { +1.3715527898e-01, +2.5049018459e-01, +2.7077975696e-01
239 , +2.5049018459e-01, +1.3715527898e-01, +7.3603031799e-02 }
240 , { +1.6054360691e-01, +2.8938069305e-01, +3.2088440740e-01
241 , +2.8938069305e-01, +1.6054360691e-01, +7.8288471817e-02 }
242 , { +1.7705368035e-01, +3.1783679836e-01, +3.5729711842e-01
243 , +3.1783679836e-01, +1.7705368035e-01, +8.1529628810e-02 }
244 , { +1.8478918080e-01, +3.3142891364e-01, +3.7464012836e-01
245 , +3.3142891364e-01, +1.8478918080e-01, +8.3031304399e-02 }
246 };
247
248 // derivatives (d b_1/d w_c) to (d b_n/d w_c)
249 static const float lpf_mb[LPF_POINTS][LPF_ORDER] =
250 {
251 { +3.0947524511e-01, -4.7927490698e-01, +9.3391532013e-01
252 , -4.7927490702e-01, +3.0947524507e-01, +1.3023622860e-01 }
253 , { +3.2168974799e-01, -4.5690983225e-01, +9.2827504448e-01
254 , -4.5690983214e-01, +3.2168974795e-01, +1.3326373418e-01 }
255 , { +3.5049895985e-01, -4.0402713817e-01, +9.1976126033e-01
256 , -4.0402713816e-01, +3.5049895984e-01, +1.4006275120e-01 }
257 , { +3.9869096973e-01, -3.1471026348e-01, +9.1858562318e-01
258 , -3.1471026347e-01, +3.9869096972e-01, +1.5061692736e-01 }
259 , { +4.6976534041e-01, -1.8018044797e-01, +9.4154172198e-01
260 , -1.8018044798e-01, +4.6976534042e-01, +1.6489815146e-01 }
261 , { +5.6706828511e-01, +1.0247608319e-02, +1.0113425453e+00
262 , +1.0247608337e-02, +5.6706828510e-01, +1.8285716898e-01 }
263 , { +6.9280120305e-01, +2.6709404538e-01, +1.1535785914e+00
264 , +2.6709404540e-01, +6.9280120305e-01, +2.0437030678e-01 }
265 , { +8.4703144845e-01, +5.9748112137e-01, +1.3910603904e+00
266 , +5.9748112138e-01, +8.4703144845e-01, +2.2914833420e-01 }
267 , { +1.0268432839e+00, +1.0012919713e+00, +1.7366400230e+00
268 , +1.0012919713e+00, +1.0268432839e+00, +2.5663256095e-01 }
269 , { +1.2257925890e+00, +1.4678100718e+00, +2.1866645243e+00
270 , +1.4678100718e+00, +1.2257925891e+00, +2.8591120056e-01 }
271 , { +1.4338418956e+00, +1.9740692730e+00, +2.7173819083e+00
272 , +1.9740692730e+00, +1.4338418956e+00, +3.1568709258e-01 }
273 , { +1.6379144032e+00, +2.4857766605e+00, +3.2858109638e+00
274 , +2.4857766605e+00, +1.6379144032e+00, +3.4431969261e-01 }
275 , { +1.8231025322e+00, +2.9609624523e+00, +3.8352350558e+00
276 , +2.9609624523e+00, +1.8231025322e+00, +3.6995119359e-01 }
277 , { +1.9744172816e+00, +3.3557576586e+00, +4.3041807299e+00
278 , +3.3557576587e+00, +1.9744172816e+00, +3.9070858955e-01 }
279 , { +2.0788103417e+00, +3.6311428529e+00, +4.6368982737e+00
280 , +3.6311428526e+00, +2.0788103416e+00, +4.0495210591e-01 }
281 , { +2.1270907033e+00, +3.7592676993e+00, +4.7930965582e+00
282 , +3.7592676992e+00, +2.1270907033e+00, +4.1152140769e-01 }
283 };
284
285 // coefficients a_1 to a_n
286 static const float lpf_a[LPF_POINTS][LPF_ORDER] =
287 {
288 { -3.9678160947e+00, +7.1582705272e+00, -7.3103432700e+00
289 , +4.4271625605e+00, -1.4968822486e+00, +2.2103607837e-01 }
290 , { -3.9217586689e+00, +7.0245007818e+00, -7.1375017960e+00
291 , +4.3093638600e+00, -1.4547982662e+00, +2.1489651918e-01 }
292 , { -3.8190188341e+00, +6.7322820225e+00, -6.7643407898e+00
293 , +4.0573843887e+00, -1.3653052102e+00, +2.0191441068e-01 }
294 , { -3.6618359076e+00, +6.3015570638e+00, -6.2251488099e+00
295 , +3.6990273202e+00, -1.2392343109e+00, +1.8379956602e-01 }
296 , { -3.4544704643e+00, +5.7631160859e+00, -5.5687366968e+00
297 , +3.2721548186e+00, -1.0907981304e+00, +1.6273969374e-01 }
298 , { -3.2034885530e+00, +5.1559829152e+00, -4.8508981080e+00
299 , +2.8177192974e+00, -9.3461709876e-01, +1.4091459119e-01 }
300 , { -2.9178775764e+00, +4.5233648501e+00, -4.1259259822e+00
301 , +2.3730384158e+00, -7.8317396639e-01, +1.2011720880e-01 }
302 , { -2.6088711387e+00, +3.9075669790e+00, -3.4391290636e+00
303 , +1.9669306242e+00, -6.4531233315e-01, +1.0156466880e-01 }
304 , { -2.2894468741e+00, +3.3447965777e+00, -2.8221523134e+00
305 , +1.6175400427e+00, -5.2592289748e-01, +8.5891269383e-02 }
306 , { -1.9735577122e+00, +2.8610227169e+00, -2.2920156678e+00
307 , +1.3327010249e+00, -4.2655597907e-01, +7.3258414200e-02 }
308 , { -1.6752255568e+00, +2.4698591405e+00, -1.8535096177e+00
309 , +1.1119902037e+00, -3.4650830575e-01, +6.3506595124e-02 }
310 , { -1.4076456238e+00, +2.1728886190e+00, -1.5035402243e+00
311 , +9.4944185607e-01, -2.8395564864e-01, +5.6296779542e-02 }
312 , { -1.1824218415e+00, +1.9621904134e+00, -1.2356636112e+00
313 , +8.3617447471e-01, -2.3684730680e-01, +5.1219220854e-02 }
314 , { -1.0089985054e+00, +1.8243200388e+00, -1.0434432700e+00
315 , +7.6259361700e-01, -2.0344554024e-01, +4.7870210629e-02 }
316 , { -8.9429544087e-01, +1.7447560305e+00, -9.2208898364e-01
317 , +7.2012730057e-01, -1.8252811740e-01, +4.5906155400e-02 }
318 , { -8.4251214238e-01, +1.7118654093e+00, -8.6865163772e-01
319 , +7.0252569491e-01, -1.7335864376e-01, +4.5082431755e-02 }
320 };
321
322 // derivatives (d a_1/d w_c) to (d a_n/d w_c)
323 static const float lpf_ma[LPF_POINTS][LPF_ORDER] =
324 {
325 { +1.2492433161e+01, -3.6517018434e+01, +4.7357395713e+01
326 , -3.2369784989e+01, +1.1586055408e+01, -1.6933077254e+00 }
327 , { +1.2548734957e+01, -3.6212194682e+01, +4.6616582888e+01
328 , -3.1678742548e+01, +1.1296037667e+01, -1.6449911143e+00 }
329 , { +1.2669371015e+01, -3.5511519947e+01, +4.4983356850e+01
330 , -3.0180967586e+01, +1.0676583147e+01, -1.5427341805e+00 }
331 , { +1.2841272347e+01, -3.4388021134e+01, +4.2540425267e+01
332 , -2.8002157377e+01, +9.7976496751e+00, -1.3997892255e+00 }
333 , { +1.3046121049e+01, -3.2820302039e+01, +3.9430990541e+01
334 , -2.5324586959e+01, +8.7534918962e+00, -1.2330749768e+00 }
335 , { +1.3263302462e+01, -3.0810041034e+01, +3.5857000371e+01
336 , -2.2361001245e+01, +7.6447176195e+00, -1.0593731098e+00 }
337 , { +1.3473094693e+01, -2.8396265557e+01, +3.2063474406e+01
338 , -1.9323493868e+01, +6.5616850615e+00, -8.9237379829e-01 }
339 , { +1.3659446627e+01, -2.5661488138e+01, +2.8308712196e+01
340 , -1.6394785270e+01, +5.5731925958e+00, -7.4123740640e-01 }
341 , { +1.3811818531e+01, -2.2727774254e+01, +2.4827395931e+01
342 , -1.3709404604e+01, +4.7220289931e+00, -6.1062450746e-01 }
343 , { +1.3925825330e+01, -1.9744316114e+01, +2.1797535816e+01
344 , -1.1348774538e+01, +4.0264909835e+00, -5.0168575497e-01 }
345 , { +1.4002735155e+01, -1.6870564740e+01, +1.9320924822e+01
346 , -9.3489487064e+00, +3.4855436350e+00, -4.1340259191e-01 }
347 , { +1.4048120627e+01, -1.4259541011e+01, +1.7421558040e+01
348 , -7.7156336592e+00, +3.0852238544e+00, -3.4381636627e-01 }
349 , { +1.4070068384e+01, -1.2044779182e+01, +1.6060419989e+01
350 , -6.4399486362e+00, +2.8047048140e+00, -2.9090554179e-01 }
351 , { +1.4077328201e+01, -1.0332356944e+01, +1.5160803823e+01
352 , -5.5100288590e+00, +2.6213968366e+00, -2.5305663714e-01 }
353 , { +1.4077683041e+01, -9.1976058009e+00, +1.4636862941e+01
354 , -4.9166414668e+00, +2.5150332199e+00, -2.2919336158e-01 }
355 , { +1.4076709161e+01, -8.6849570278e+00, +1.4418874781e+01
356 , -4.6538327513e+00, +2.4707840042e+00, -2.1869079956e-01 }
357 };
358
359 // Low pass with 1/4 pass band, 1/4 transition band on top, for
360 // instant decimation by 2 and further processing to a rate below
361 // 1/4 of original sampling rate.
362 // This tuned instance needs only order 12 once and gives more
363 // attenuation than the regular resampling lowpass filter. Also, the
364 // pass band is maximaly preserved for no additional bandwidth loss.
365 // It is an Chebychev type 2 filter with clean pass band but of course
366 // wild phase shifts. Designed with octave's cheby2(12,112,0.5).
367
368 #define LPF_4_ORDER 12
369
370 static const float lpf_4_coeff[2][LPF_4_ORDER+1] =
371 {
372 { +4.942274456389e-04, +2.456588519653e-03, +7.333608109998e-03
373 , +1.564190404912e-02, +2.595749312226e-02, +3.475411633752e-02
374 , +3.823783863736e-02, +3.475411633752e-02, +2.595749312226e-02
375 , +1.564190404912e-02, +7.333608109998e-03, +2.456588519652e-03
376 , +4.942274456389e-04 }
377 , { +1.000000000000e+00, -3.414853772012e+00, +6.812049477837e+00
378 , -8.988639858221e+00, +8.737226725937e+00, -6.382745001626e+00
379 , +3.590542256812e+00, -1.543464216385e+00, +5.036659057430e-01
380 , -1.203614775500e-01, +2.006360736124e-02, -2.070940463771e-03
381 , +1.010063725617e-04 }
382 };
383
384 // 4th-order pre-emphasis filters for the differing interpolations
385
386 #define PREEMP_ORDER 5
387
388 static const float preemp_1xopt4p4o_coeff[2][6] =
389 {
390 { +1.4537908355e+00, -1.5878441906e-01, +7.6588945557e-01
391 , -1.1187528142e-01, +4.9169512092e-02, -6.1692905372e-03 }
392 , { +1.0000000000e+00, +1.9208553603e-01, +6.4223479412e-01
393 , +8.4340248522e-02, +6.9072561856e-02, +4.2876716239e-03 }
394 };
395 static const float preemp_1xopt6p5o_coeff[2][6] =
396 {
397 { +1.7736168976e+00, -2.9739831101e-01, +8.3183026784e-01
398 , -1.5434578823e-01, +3.0236127398e-02, -2.1054630699e-03 }
399 , { +1.0000000000e+00, +2.9206686404e-01, +6.5605633523e-01
400 , +1.4922672324e-01, +7.4792388477e-02, +9.6914195316e-03 }
401 };
402 static const float preemp_2xopt4p4o_coeff[2][6] =
403 {
404 { +1.1866573548e+00, +1.1973031478e+00, -9.4241923873e-02
405 , -4.0863707262e-01, -8.0536389300e-02, -3.8911856607e-04 }
406 , { +1.0000000000e+00, +1.2122990222e+00, +1.0444255630e-01
407 , -3.7674911658e-01, -1.3106237366e-01, -8.7740900285e-03 }
408 };
409 static const float preemp_2xopt6p5o_coeff[2][6] =
410 {
411 { +1.2995411641e+00, +1.3148944902e+00, -4.3762040619e-02
412 , -3.7595940266e-01, -6.2452466903e-02, +8.5987740998e-04 }
413 , { +1.0000000000e+00, +1.3227482221e+00, +2.6592565819e-01
414 , -3.1166281821e-01, -1.3266510256e-01, -1.1224337963e-02 }
415 };
416
417 // TODO: switch the decimation filter to Direct Form II
418 struct lpf4_hist
419 {
420 // Filter history (to be switched to Direct Form 2).
421 float x[LPF_4_ORDER];
422 float y[LPF_4_ORDER];
423 };
424
425 #ifndef NO_DE_DENORM
426 // Battling denormals. They naturally occur in IIR filters and
427 // need to be avoided since they hurt performance on many CPUs.
428 // Usually, gcc -ffast-math would also avoid them, but we cannot
429 // count on that and debugging performance issues becomes confusing.
430 // The idea is to add an alternatingly positive/negative small number
431 // that is orders of magnitude above denormals to freshly computed
432 // samples.
433 // Downside: You never see a true zero if ther is true zero on input.
434 // Upside would be that you'd convert to 24 or 32 bit integer anyway,
435 // where you'd get your zero back (1 in 32 bit is around 5e-10.
436 static const float denorm_base = 1e-15;
437 #define DE_DENORM(val, base) (val) += (base);
438 #define DE_DENORM_FLIP(base) (base) = -(base);
439 #define DE_DENORM_INIT(base, sign) (base) = (sign)*denorm_base;
440 #else
441 #define DE_DENORM(val, base)
442 #define DE_DENORM_FLIP(base)
443 #define DE_DENORM_INIT(base, sign)
444 #endif
445
446 struct decimator_state
447 {
448 unsigned int sflags; // also stores decimation position (0 or 1)
449 unsigned int n1;
450 struct lpf4_hist *ch; // one for each channel, pointer into common array
451 float *out_hist; // channels*STAGE_HISTORY, pointer to common block
452 #ifndef NO_DE_DENORM
453 float dede; // current de-denormalization offset
454 #endif
455 };
456
457 #ifdef SYN123_HIGH_PREC
458 typedef double lpf_sum_type;
459 #else
460 typedef float lpf_sum_type;
461 #endif
462
463 // The amount of lowpass runs determines much of the quality.
464 // Each run gives lpf_db reduction. The lowpass also includes the
465 // pre-emphasis and oversampling.
466 #define LPF_TIMES 3
467 #define DIRTY_LPF_TIMES 2
468 #define LOWPASS lowpass3_df2
469 #define LOWPASS_PREEMP lowpass3_df2_preemp
470 #define LOWPASS_PREEMP_2X lowpass3_df2_preemp_2x
471 #define DIRTY_LOWPASS lowpass2_df2
472 #define DIRTY_LOWPASS_PREEMP lowpass2_df2_preemp
473 #define DIRTY_LOWPASS_PREEMP_2X lowpass2_df2_preemp_2x
474 // The number of points used in the interpolators.
475 #define FINE_POINTS 6
476 #define DIRTY_POINTS 4
477
478 // The data structure accomodates a fixed amount of low pass applications.
479 #define LPF_MAX_TIMES LPF_TIMES
480
481 // Let's define what we see a a reasonable history size for an IIR lowpass.
482 // On arrival of a new sample, <order> previous samples are needed, both
483 // input and output (recursive). Let's simply double that to cater for
484 // the recursive nature, ensuring that each recursive input at least already
485 // contains something from the new samples
486 #define LPF_HISTORY(order) (2*(order))
487
488 // Number of samples to keep for smooth rate changes on
489 // addition/removal of stages.
490 #define STAGE_HISTORY 48
491
492 enum state_flags
493 {
494 inter_flow = 1<<0
495 , preemp_configured = 1<<1
496 , preemp_flow = 1<<2
497 , decimate_store = 1<<3 // one skipped sample from last buffer
498 , oversample_2x = 1<<4 // x2 oversampling or not
499 , lowpass_configured = 1<<5
500 , lowpass_flow = 1<<6
501 , dirty_method = 1<<7
502 , smooth_change = 1<<8
503 };
504
505 // TODO: tune that to the smallest sensible value.
506 #ifndef SYN123_BATCH
507 #define BATCH 128
508 #else
509 #define BATCH SYN123_BATCH
510 #endif
511
512 #if (BATCH < STAGE_HISTORY)
513 #error "Batch size cannot be smaller than the stage history."
514 #endif
515
516 struct channel_history
517 {
518 // Direct Form II histories for preemp and lowpass
519 float pre_w[PREEMP_ORDER]; // history for Direct Form II
520 float lpf_w[LPF_MAX_TIMES][LPF_ORDER];
521 float x[5]; // past input samples for interpolator
522 float c[6]; // interpolator coefficients
523 };
524
525 struct resample_data
526 {
527 unsigned int sflags;
528 // The configured resampling function to call.
529 // Decides: oversampling/nothing/decimation, fine/dirty
530 size_t (*resample_func)(struct resample_data *, float *, size_t, float *);
531 // The lowpass functions used before interpolation, including preemp.
532 void (*lowpass_2x_func)(struct resample_data *, float *, size_t, float *);
533 void (*lowpass_func) (struct resample_data *, float *, size_t);
534 // Maximum number of input samples that can be fed in one go to have the
535 // number of output samples safely within the limit of size_t.
536 size_t input_limit;
537 // Decimator state. Each stage has a an instance of lpf_4 and the decimator.
538 unsigned int decim_stages;
539 struct decimator_state *decim;
540 // storage for the above to avoid nested malloc
541 struct lpf4_hist *decim_hist;
542 struct channel_history *ch;
543 // Blocks of STAGE_HISTORY*channels samples.
544 // 0: plain input for first decimation, oversampling or direct interpolation
545 // 1: output of first decimation
546 // 2: output of second decimation, etc.
547 float *stage_history; // storage for past input of final (2x) lowpass
548 float *frame; // One PCM frame to work on (one sample for each channel).
549 float *prebuf; // [channels*BATCH]
550 float *upbuf; // [channels*2*BATCH]
551 #ifndef NO_DE_DENORM
552 float dede; // de-denormalization offset, alternatingly used both in preemp and lpf
553 #endif
554 // Final lowpass filter setup.
555 float lpf_cutoff;
556 float lpf_w_c;
557 unsigned char lpf_bw; // Bandwidth in percent, rounded to integer.
558 // Pre-emphasis filter coefficients and history.
559 float pre_b0;
560 unsigned char pre_n1;
561 float pre_b[PREEMP_ORDER][PREEMP_ORDER];
562 float pre_a[PREEMP_ORDER][PREEMP_ORDER];
563 // Same for the low pass.
564 float lpf_b0;
565 // Low pass filter history in ringbuffer. For each ringbuffer state, there is
566 // one mirroring set of coefficients to avoid re-sorting during application.
567 unsigned char lpf_n1; // Current index of the most recent past.
568 // It may help to have these aligned, but since LPF_ORDER is not 4 nor 8, things
569 // just don't add up for vector instructions.
570 float lpf_b[LPF_ORDER][LPF_ORDER];
571 float lpf_a[LPF_ORDER][LPF_ORDER];
572 // Interpolator state.
573 // In future, I could pull the same trick with ringbuffer and copies
574 // of coefficients (matrices, even) as for lpf.
575 // TODO: maybe store more of these for better filter adaption on rate changes, or
576 // at least always the full set of 5.
577 long offset; // interpolator offset
578 unsigned int channels;
579 long inrate;
580 long vinrate;
581 long outrate;
582 long voutrate;
583 };
584
585 // Interpolation of low pass coefficients for specified cutoff, using
586 // the precomputed table. Instead of properly computing the filter
587 // at runtime, a cheap spline interpolation in the stored tables
588 // does the job, too. This works for filters that are not too unstable.
589 // The employed 6th-order filter still works with the associated rounding
590 // errors, in the limited range of cutoff between 1/4 and 1/2 of Nyquist.
591
592 // Constructing the low pass filter for a bit less than claimed cutoff,
593 // numerics shift things a bit.
594 static const float lpf_cut_scale = 0.995;
595
596 // Reference implementation of the cubic spline evaluation.
597 // x: time coordinate to evaluate at
598 // xa: lower interval boundary
599 // xb: upper interval boundary
600 // a: value at xa
601 // b: value at xb
602 // da: derivative at xa
603 // db: derivative at db
spline_val(float x,float xa,float xb,float a,float b,float da,float db)604 static float spline_val( float x, float xa, float xb
605 , float a, float b, float da, float db )
606 {
607 float d = xb-xa;
608 float t = (x-xa)/d;
609 float t2 = t*t;
610 float t3 = t2*t;
611 float h00 = 2*t3-3*t2+1;
612 float h10 = -2*t3+3*t2;
613 float h01d = (t3-2*t2+t)*d;
614 float h11d = (t3-t2)*d;
615 return h00*a + h10*b + h01d*da + h11d*db;
616 }
617
618 // Find coordiante x in interpolation table val.
619 // Returns index for the lower boundary. Monotonic increase
620 // is assumed.
lpf_entry(const float * val,float x)621 static unsigned int lpf_entry(const float *val, float x)
622 {
623 unsigned int i = 0;
624 while(i<(LPF_POINTS-2) && val[i+1] <= x)
625 ++i;
626 return i;
627 }
628
629 // Derivative of value with respect to w_c at given
630 // index. Intervals are not of equal width ... so central
631 // difference is not straightforward. I'll simply use a
632 // inverse-weighted mean of both sides.
lpf_deriv(const float x[LPF_POINTS],const float val[LPF_POINTS],unsigned int i)633 static float lpf_deriv( const float x[LPF_POINTS]
634 , const float val[LPF_POINTS], unsigned int i )
635 {
636 float w = 0;
637 float m = 0;
638 // lpf_w_c assumed to be monotonically increasing
639 if(i<LPF_POINTS-1)
640 {
641 float w1 = x[i+1]-x[i];
642 m += (val[i+1]-val[i])/(w1*w1);
643 w += 1./w1;
644 }
645 if(i>0)
646 {
647 float w0 = x[i]-x[i-1];
648 m += (val[i]-val[i-1])/(w0*w0);
649 w += 1./w0;
650 }
651 return m/w;
652 }
653
654 // Static ring buffer index used for filter history and coefficients.
655 // i: offset
656 // n1: current position of the first entry
657 // s: size of the ring buffer
658 #define RING_INDEX(i, n1, s) ( ((n1)+(i)) % s )
659
660 // Compute actual low pass coefficients for the cutoff in the handle.
661 // This interpolates in the tables without subsequent scaling for zero
662 // DC gain, which has been shown not to be useful.
lpf_init(struct resample_data * rd)663 static void lpf_init(struct resample_data *rd)
664 {
665 rd->sflags &= ~lowpass_configured;
666 float real_cutoff = rd->lpf_cutoff * lpf_cut_scale;
667 // It does not make sense to return an error here. It is an internal
668 // implementation error if the table is too small. Just run with the
669 // best value we have.
670 if(real_cutoff < lpf_cutoff[0])
671 real_cutoff = lpf_cutoff[0];
672 if(real_cutoff > lpf_cutoff[LPF_POINTS-1])
673 real_cutoff = lpf_cutoff[LPF_POINTS-1];
674 // The coefficient interpolation needs w_c != cutoff.
675 unsigned int lpfi = lpf_entry(lpf_cutoff, real_cutoff);
676 rd->lpf_w_c = spline_val
677 (
678 real_cutoff, lpf_cutoff[lpfi], lpf_cutoff[lpfi+1]
679 , lpf_w_c[lpfi], lpf_w_c[lpfi+1]
680 , lpf_deriv(lpf_cutoff, lpf_w_c, lpfi)
681 , lpf_deriv(lpf_cutoff, lpf_w_c, lpfi+1)
682 );
683 mdebug("lowpass cutoff %g; w_c %g", rd->lpf_cutoff, rd->lpf_w_c);
684 lpfi = lpf_entry(lpf_w_c, rd->lpf_w_c);
685 rd->lpf_bw = (unsigned char)( 0.5 + lpf_cut_scale*(lpf_bw[lpfi]
686 + (rd->lpf_w_c-lpf_w_c[lpfi])/(lpf_w_c[lpfi+1]-lpf_w_c[lpfi])
687 * (lpf_bw[lpfi+1]-lpf_bw[lpfi])) );
688 mdebug("lowpass bandwidth: %u%%", rd->lpf_bw);
689 rd->lpf_b0 = spline_val( rd->lpf_w_c, lpf_w_c[lpfi], lpf_w_c[lpfi+1]
690 , lpf_b0[lpfi], lpf_b0[lpfi+1]
691 , lpf_mb0[lpfi], lpf_mb0[lpfi+1] );
692 float b[LPF_ORDER];
693 float a[LPF_ORDER];
694 for(int i=0; i<LPF_ORDER; ++i)
695 {
696 b[i] = spline_val( rd->lpf_w_c
697 , lpf_w_c[lpfi], lpf_w_c[lpfi+1]
698 , lpf_b[lpfi][i], lpf_b[lpfi+1][i]
699 , lpf_mb[lpfi][i], lpf_mb[lpfi+1][i] );
700 a[i] = spline_val( rd->lpf_w_c
701 , lpf_w_c[lpfi], lpf_w_c[lpfi+1]
702 , lpf_a[lpfi][i], lpf_a[lpfi+1][i]
703 , lpf_ma[lpfi][i], lpf_ma[lpfi+1][i] );
704 }
705 memset(rd->lpf_a, 0, sizeof(float)*LPF_ORDER*LPF_ORDER);
706 memset(rd->lpf_b, 0, sizeof(float)*LPF_ORDER*LPF_ORDER);
707 for(int i=0; i<LPF_ORDER; ++i)
708 {
709 for(int j=0; j<LPF_ORDER; ++j)
710 {
711 rd->lpf_b[j][RING_INDEX(i,j,LPF_ORDER)] = b[i];
712 rd->lpf_a[j][RING_INDEX(i,j,LPF_ORDER)] = a[i];
713 }
714 }
715 debug("lpf coefficients [b,a]:");
716 mdebug("%.15e", rd->lpf_b0);
717 for(int i=0; i<LPF_ORDER; ++i)
718 mdebug("%.15e", rd->lpf_b[0][i]);
719 mdebug("%.15e", 1.);
720 for(int i=0; i<LPF_ORDER; ++i)
721 mdebug("%.15e", rd->lpf_a[0][i]);
722 rd->sflags |= lowpass_configured;
723 }
724
725 // Compute an estimate of the magnitude of lowpass history values.
726 // This is used to re-scale those values on sample ratio changes to
727 // try to keep a smooth ride. Relate this to df2_initval().
728 // First approach: b_0 / (1+sum(a))
lpf_history_scale(struct resample_data * rd)729 static float lpf_history_scale(struct resample_data *rd)
730 {
731 float asum = 1;
732 for(unsigned int i=0; i<LPF_ORDER; ++i)
733 asum += rd->lpf_a[0][i];
734 return rd->lpf_b0/asum;
735 }
736
preemp_init(struct resample_data * rd)737 static void preemp_init(struct resample_data *rd)
738 {
739 const float* pre_b;
740 const float* pre_a;
741 rd->sflags &= ~preemp_configured;
742
743 #define PREEMP_COEFF(name) \
744 pre_b = preemp_ ## name ## _coeff[0]; \
745 pre_a = preemp_ ## name ## _coeff[1];
746 if(rd->sflags & oversample_2x)
747 {
748 if(rd->sflags & dirty_method)
749 {
750 PREEMP_COEFF(2xopt4p4o);
751 } else
752 {
753 PREEMP_COEFF(2xopt6p5o);
754 }
755 }
756 else
757 {
758 if(rd->sflags & dirty_method)
759 {
760 PREEMP_COEFF(1xopt4p4o);
761 } else
762 {
763 PREEMP_COEFF(1xopt6p5o);
764 }
765 }
766
767 rd->pre_b0 = pre_b[0];
768 for(int i=0; i<PREEMP_ORDER; ++i)
769 {
770 for(int j=0; j<PREEMP_ORDER; ++j)
771 {
772 rd->pre_b[j][RING_INDEX(i,j,PREEMP_ORDER)] = pre_b[i+1];
773 rd->pre_a[j][RING_INDEX(i,j,PREEMP_ORDER)] = pre_a[i+1];
774 }
775 }
776
777 rd->sflags |= preemp_configured;
778 }
779
780
781 // Store the last samples of given input data in the respective
782 // stage history buffer.
stage_history(struct resample_data * rd,unsigned int stage,float * in,size_t ins)783 static void stage_history( struct resample_data *rd
784 , unsigned int stage, float *in, size_t ins )
785 {
786 if(!rd->stage_history)
787 return;
788 // Case 1: We got enough and can fill it all.
789 // Case 2: Just a bit, have to shift existing
790 // history to append to.
791 float *hist = rd->stage_history + stage*STAGE_HISTORY*rd->channels;
792 if(ins >= STAGE_HISTORY)
793 memcpy( hist, in+(ins-STAGE_HISTORY)*rd->channels
794 , STAGE_HISTORY*rd->channels*sizeof(float) );
795 else
796 {
797 memmove( hist, hist+ins*rd->channels
798 , (STAGE_HISTORY-ins)*rd->channels*sizeof(float) );
799 memcpy( hist+(STAGE_HISTORY-ins)*rd->channels, in
800 , ins*rd->channels*sizeof(float) );
801 }
802 }
803
804
stage_history_init(struct resample_data * rd,unsigned int stage,float * in)805 static void stage_history_init( struct resample_data *rd
806 , unsigned int stage, float *in )
807 {
808 if(!rd->stage_history)
809 return;
810 float *hist = rd->stage_history + stage*STAGE_HISTORY*rd->channels;
811 for(size_t i=0; i<STAGE_HISTORY; ++i)
812 for(unsigned int c=0; c<rd->channels; ++c)
813 hist[i*rd->channels+c] = in[c];
814 }
815
816 // Actual resampling workers.
817
818 // To make things easier to compilers, things like the number of times
819 // a low pass filter is repeatedly applied are hardcoded in functions
820 // generated with a set of macros to avoid code duplication.
821
822 // One decimation step: low pass with transition band from 1/2 to 1/4,
823 // drop every second sample. This works within the input buffer.
decimate(struct resample_data * rrd,unsigned int dstage,float * in,size_t ins)824 static size_t decimate( struct resample_data *rrd, unsigned int dstage
825 , float *in, size_t ins )
826 {
827 struct decimator_state *rd = rrd->decim+dstage;
828 if(!ins)
829 return 0;
830 mdebug("decimating %zu samples", ins);
831 if(!(rd->sflags & lowpass_flow))
832 {
833 for(unsigned int c=0; c<rrd->channels; ++c)
834 for(unsigned int j=0; j<LPF_4_ORDER; ++j)
835 rd->ch[c].x[j] = rd->ch[c].y[j] = in[c];
836 rd->n1 = 0;
837 rd->sflags |= lowpass_flow|decimate_store;
838 DE_DENORM_INIT(rd->dede, dstage % 2 ? -1 : +1)
839 stage_history_init(rrd, dstage+1, in);
840 }
841 float *out = in; // output worker
842 float *oout = in; // for history storage
843 size_t outs = 0;
844 #ifndef SYN123_NO_CASES
845 switch(rrd->channels)
846 {
847 case 1: for(size_t i=0; i<ins; ++i)
848 {
849 int ni[LPF_4_ORDER];
850 for(int j=0; j<LPF_4_ORDER; ++j)
851 ni[j] = RING_INDEX(j, rd->n1, LPF_4_ORDER);
852 rd->n1 = ni[LPF_4_ORDER-1];
853 // y0 = b0x0 + b1x1 + ... + bxyn - a1y1 - ... - anyn
854 // Switching y to double pushes roundoff hf noise down (?).
855 lpf_sum_type ny = lpf_4_coeff[0][0] * in[i]; // a0 == 1 implicit here
856 for(int j=0; j<LPF_4_ORDER; ++j)
857 {
858 // This direct summing without intermediate asum and bsum
859 // pushs roundoff hf noise down.
860 ny += rd->ch[0].x[ni[j]]*lpf_4_coeff[0][j+1];
861 ny -= rd->ch[0].y[ni[j]]*lpf_4_coeff[1][j+1];
862 }
863 DE_DENORM(ny, rd->dede)
864 rd->ch[0].x[rd->n1] = in[i];
865 rd->ch[0].y[rd->n1] = ny;
866 // Drop every second sample.
867 // Maybe its faster doing this in a separate step after all?
868 if(rd->sflags & decimate_store)
869 {
870 DE_DENORM_FLIP(rd->dede)
871 *(out++) = ny;
872 rd->sflags &= ~decimate_store;
873 ++outs;
874 } else
875 rd->sflags |= decimate_store;
876 }
877 break;
878 case 2: for(size_t i=0; i<ins; ++i)
879 {
880 int ni[LPF_4_ORDER];
881 for(int j=0; j<LPF_4_ORDER; ++j)
882 ni[j] = RING_INDEX(j, rd->n1, LPF_4_ORDER);
883 rd->n1 = ni[LPF_4_ORDER-1];
884 float frame[2];
885 for(unsigned int c=0; c<2; ++c)
886 {
887 // y0 = b0x0 + b1x1 + ... + bxyn - a1y1 - ... - anyn
888 // Switching y to double pushes roundoff hf noise down (?).
889 lpf_sum_type ny = lpf_4_coeff[0][0] * in[c]; // a0 == 1 implicit here
890 for(int j=0; j<LPF_4_ORDER; ++j)
891 {
892 // This direct summing without intermediate asum and bsum
893 // pushs roundoff hf noise down.
894 ny += rd->ch[c].x[ni[j]]*lpf_4_coeff[0][j+1];
895 ny -= rd->ch[c].y[ni[j]]*lpf_4_coeff[1][j+1];
896 }
897 DE_DENORM(ny, rd->dede)
898 rd->ch[c].x[rd->n1] = in[c];
899 rd->ch[c].y[rd->n1] = ny;
900 frame[c] = ny;
901 }
902 in += 2;
903 // Drop every second sample.
904 // Maybe its faster doing this in a separate step after all?
905 if(rd->sflags & decimate_store)
906 {
907 DE_DENORM_FLIP(rd->dede)
908 *(out++) = frame[0];
909 *(out++) = frame[1];
910 rd->sflags &= ~decimate_store;
911 ++outs;
912 } else
913 rd->sflags |= decimate_store;
914 }
915 break;
916 default:
917 #endif
918 for(size_t i=0; i<ins; ++i)
919 {
920 int ni[LPF_4_ORDER];
921 for(int j=0; j<LPF_4_ORDER; ++j)
922 ni[j] = RING_INDEX(j, rd->n1, LPF_4_ORDER);
923 rd->n1 = ni[LPF_4_ORDER-1];
924 for(unsigned int c=0; c<rrd->channels; ++c)
925 {
926 // y0 = b0x0 + b1x1 + ... + bxyn - a1y1 - ... - anyn
927 // Switching y to double pushes roundoff hf noise down (?).
928 lpf_sum_type ny = lpf_4_coeff[0][0] * in[c]; // a0 == 1 implicit here
929 for(int j=0; j<LPF_4_ORDER; ++j)
930 {
931 // This direct summing without intermediate asum and bsum
932 // pushs roundoff hf noise down.
933 ny += rd->ch[c].x[ni[j]]*lpf_4_coeff[0][j+1];
934 ny -= rd->ch[c].y[ni[j]]*lpf_4_coeff[1][j+1];
935 }
936 DE_DENORM(ny, rd->dede)
937 rd->ch[c].x[rd->n1] = in[c];
938 rd->ch[c].y[rd->n1] = ny;
939 rrd->frame[c] = ny;
940 }
941 in += rrd->channels;
942 // Drop every second sample.
943 // Maybe its faster doing this in a separate step after all?
944 if(rd->sflags & decimate_store)
945 {
946 DE_DENORM_FLIP(rd->dede)
947 for(unsigned int c=0; c<rrd->channels; ++c)
948 *(out++) = rrd->frame[c];
949 rd->sflags &= ~decimate_store;
950 ++outs;
951 } else
952 rd->sflags |= decimate_store;
953 }
954 #ifndef SYN123_NO_CASES
955 }
956 #endif
957 stage_history(rrd, dstage+1, oout, outs);
958 return outs;
959 }
960
961 // Initialization of Direct Form 2 history.
962 // Return a constant value for w[n] that represents an endless history of given insample.
963 // Given the recursive definition: w[n] = x[n] - sum_i(a[i] * w[n-i])
964 // Given my ansatz of endless stream of x[n-i] = x[n] and hence w[n-i] = w[n], it follows
965 // that
966 // w[n] = x[n] - sum_i(a[i] * w[n]) = x[n] - sum_i(a[i]) * w[n]
967 // <=> (1 + sum_i(a[i])) w[n] = x[n]
968 // <=> w[n] = 1 / (1 + sum_i(a[i])) * x[n]
969 // Looks simple. Just a scale value.
df2_initval(unsigned int order,float * filter_a,float insample)970 static float df2_initval(unsigned int order, float *filter_a, float insample)
971 {
972 float asum = 1.;
973 for(unsigned int i=0; i<order; ++i)
974 asum += filter_a[i];
975 float scale = 1.f/asum;
976 return scale * insample;
977 }
978
979 #define PREEMP_DF2_BEGIN(initval, channels, ret) \
980 if(!(rd->sflags & preemp_flow)) \
981 { \
982 if(!(rd->sflags & preemp_configured)) \
983 { \
984 fprintf(stderr, "unconfigured pre-emphasis\n"); \
985 return ret; \
986 } \
987 for(int c=0; c<channels; ++c) \
988 { \
989 float iv = df2_initval(PREEMP_ORDER, rd->pre_a[0], initval[c]); \
990 for(int i=0; i<PREEMP_ORDER; ++i) \
991 rd->ch[c].pre_w[i] = iv; \
992 } \
993 rd->pre_n1 = 0; \
994 rd->sflags |= preemp_flow; \
995 }
996
997 // Might not be faster than DFI, but for sure needs less storage.
998 #define PREEMP_DF2_SAMPLE(in, out, channels) \
999 { \
1000 unsigned char n1 = rd->pre_n1; \
1001 rd->pre_n1 = RING_INDEX(PREEMP_ORDER-1, rd->pre_n1, PREEMP_ORDER); \
1002 for(unsigned int c=0; c<channels; ++c) \
1003 { \
1004 lpf_sum_type ny = 0; \
1005 lpf_sum_type nw = 0; \
1006 for(unsigned char i=0; i<PREEMP_ORDER; ++i) \
1007 { \
1008 ny += rd->ch[c].pre_w[i]*rd->pre_b[n1][i]; \
1009 nw -= rd->ch[c].pre_w[i]*rd->pre_a[n1][i]; \
1010 } \
1011 nw += in[c]; \
1012 DE_DENORM(nw, rd->dede) \
1013 ny += rd->pre_b0 * nw; \
1014 rd->ch[c].pre_w[rd->pre_n1] = nw; \
1015 out[c] = ny; \
1016 } \
1017 }
1018
1019 // Beginning of a low pass function with on-the-fly initialization
1020 // of filter history.
1021 // For oversampling lowpass with zero-stuffing, initialize to half of first
1022 // input sample.
1023 #define LPF_DF2_BEGIN(times,initval,channels,ret) \
1024 if(!(rd->sflags & lowpass_flow)) \
1025 { \
1026 if(!(rd->sflags & lowpass_configured)) \
1027 { \
1028 fprintf(stderr, "unconfigured lowpass\n"); \
1029 return ret; \
1030 } \
1031 rd->lpf_n1 = 0; \
1032 for(unsigned int c=0; c<channels; ++c) \
1033 { \
1034 float iv = df2_initval(LPF_ORDER, rd->lpf_a[0], initval[c]); \
1035 for(int j=0; j<times; ++j) \
1036 for(int i=0; i<LPF_ORDER; ++i) \
1037 rd->ch[c].lpf_w[j][i] = iv; \
1038 } \
1039 rd->sflags |= lowpass_flow; \
1040 } \
1041 int n1 = rd->lpf_n1; \
1042 int n1n;
1043
1044 #define LPF_DF2_SAMPLE(times, in, out, channels) \
1045 n1n = RING_INDEX(LPF_ORDER-1, n1, LPF_ORDER); \
1046 for(unsigned int c=0; c<channels; ++c) \
1047 { \
1048 float old_y = in[c]; \
1049 for(int j=0; j<times; ++j) \
1050 { \
1051 lpf_sum_type w = old_y; \
1052 lpf_sum_type y = 0; \
1053 for(int k=0; k<LPF_ORDER; ++k) \
1054 { \
1055 y += rd->ch[c].lpf_w[j][k]*rd->lpf_b[n1][k]; \
1056 w -= rd->ch[c].lpf_w[j][k]*rd->lpf_a[n1][k]; \
1057 } \
1058 DE_DENORM(w, rd->dede) \
1059 rd->ch[c].lpf_w[j][n1n] = w; \
1060 y += rd->lpf_b0*w; \
1061 old_y = y; \
1062 } \
1063 out[c] = old_y; \
1064 } \
1065 n1 = n1n;
1066
1067 #define LPF_DF2_END \
1068 rd->lpf_n1 = n1;
1069
1070 // Define normal and oversampling low pass with pre-emphasis in direct form 2.
1071 // The parameter determines the number of repeated applications of the same
1072 // low pass.
1073
1074 #ifndef SYN123_NO_CASES
1075 #define LOWPASS_DF2_FUNCSX(times) \
1076 \
1077 static void lowpass##times##_df2_preemp_2x(struct resample_data *rd, float *in, size_t ins, float *out) \
1078 { \
1079 if(!ins) \
1080 return; \
1081 LPF_DF2_BEGIN(times,in,rd->channels,) \
1082 PREEMP_DF2_BEGIN(in,rd->channels,); \
1083 switch(rd->channels) \
1084 { \
1085 case 1: for(size_t i=0; i<ins; ++i) \
1086 { \
1087 float sample; \
1088 PREEMP_DF2_SAMPLE(in, (&sample), 1) \
1089 /* Zero-stuffing! Insert zero after making up for energy loss. */ \
1090 sample *= 2; \
1091 LPF_DF2_SAMPLE(times, (&sample), out, 1) \
1092 out++; \
1093 sample = 0; \
1094 LPF_DF2_SAMPLE(times, (&sample), out, 1) \
1095 DE_DENORM_FLIP(rd->dede) \
1096 out++; \
1097 in++; \
1098 } \
1099 break; \
1100 case 2: for(size_t i=0; i<ins; ++i) \
1101 { \
1102 float frame[2]; \
1103 PREEMP_DF2_SAMPLE(in, frame, 2) \
1104 /* Zero-stuffing! Insert zero after making up for energy loss. */ \
1105 frame[0] *= 2; \
1106 frame[1] *= 2; \
1107 LPF_DF2_SAMPLE(times, frame, out, 2) \
1108 out += 2; \
1109 frame[1] = frame[0] = 0; \
1110 LPF_DF2_SAMPLE(times, frame, out, 2) \
1111 DE_DENORM_FLIP(rd->dede) \
1112 out += 2; \
1113 in += 2; \
1114 } \
1115 break; \
1116 default: for(size_t i=0; i<ins; ++i) \
1117 { \
1118 PREEMP_DF2_SAMPLE(in, rd->frame, rd->channels) \
1119 /* Zero-stuffing! Insert zero after making up for energy loss. */ \
1120 for(unsigned int c=0; c<rd->channels; ++c) \
1121 rd->frame[c] *= 2; \
1122 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1123 out += rd->channels; \
1124 for(unsigned int c=0; c<rd->channels; ++c) \
1125 rd->frame[c] = 0; \
1126 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1127 DE_DENORM_FLIP(rd->dede) \
1128 out += rd->channels; \
1129 in += rd->channels; \
1130 } \
1131 } \
1132 LPF_DF2_END \
1133 } \
1134 \
1135 static void lowpass##times##_df2_preemp(struct resample_data *rd, float *in, size_t ins) \
1136 { \
1137 if(!ins) \
1138 return; \
1139 float *out = in; \
1140 LPF_DF2_BEGIN(times, in, rd->channels,) \
1141 PREEMP_DF2_BEGIN(in, rd->channels,); \
1142 switch(rd->channels) \
1143 { \
1144 case 1: for(size_t i=0; i<ins; ++i) \
1145 { \
1146 float sample; \
1147 PREEMP_DF2_SAMPLE(in, (&sample), 1) \
1148 LPF_DF2_SAMPLE(times, (&sample), out, 1) \
1149 DE_DENORM_FLIP(rd->dede) \
1150 in++; \
1151 out++; \
1152 } \
1153 break; \
1154 case 2: for(size_t i=0; i<ins; ++i) \
1155 { \
1156 float frame[2]; \
1157 PREEMP_DF2_SAMPLE(in, frame, 2) \
1158 LPF_DF2_SAMPLE(times, frame, out, 2) \
1159 DE_DENORM_FLIP(rd->dede) \
1160 in += 2; \
1161 out += 2; \
1162 } \
1163 break; \
1164 default: for(size_t i=0; i<ins; ++i) \
1165 { \
1166 PREEMP_DF2_SAMPLE(in, rd->frame, rd->channels) \
1167 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1168 DE_DENORM_FLIP(rd->dede) \
1169 in += rd->channels; \
1170 out += rd->channels; \
1171 } \
1172 } \
1173 LPF_DF2_END \
1174 }
1175 #else
1176 #define LOWPASS_DF2_FUNCSX(times) \
1177 \
1178 static void lowpass##times##_df2_preemp_2x(struct resample_data *rd, float *in, size_t ins, float *out) \
1179 { \
1180 if(!ins) \
1181 return; \
1182 LPF_DF2_BEGIN(times,in,rd->channels,) \
1183 PREEMP_DF2_BEGIN(in,rd->channels,); \
1184 for(size_t i=0; i<ins; ++i) \
1185 { \
1186 PREEMP_DF2_SAMPLE(in, rd->frame, rd->channels) \
1187 /* Zero-stuffing! Insert zero after making up for energy loss. */ \
1188 for(unsigned int c=0; c<rd->channels; ++c) \
1189 rd->frame[c] *= 2; \
1190 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1191 out += rd->channels; \
1192 for(unsigned int c=0; c<rd->channels; ++c) \
1193 rd->frame[c] = 0; \
1194 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1195 DE_DENORM_FLIP(rd->dede) \
1196 out += rd->channels; \
1197 in += rd->channels; \
1198 } \
1199 LPF_DF2_END \
1200 } \
1201 \
1202 static void lowpass##times##_df2_preemp(struct resample_data *rd, float *in, size_t ins) \
1203 { \
1204 if(!ins) \
1205 return; \
1206 float *out = in; \
1207 LPF_DF2_BEGIN(times, in, rd->channels,) \
1208 PREEMP_DF2_BEGIN(in, rd->channels,); \
1209 for(size_t i=0; i<ins; ++i) \
1210 { \
1211 PREEMP_DF2_SAMPLE(in, rd->frame, rd->channels) \
1212 LPF_DF2_SAMPLE(times, rd->frame, out, rd->channels) \
1213 DE_DENORM_FLIP(rd->dede) \
1214 in += rd->channels; \
1215 out += rd->channels; \
1216 } \
1217 LPF_DF2_END \
1218 }
1219 #endif
1220
1221
1222 // Need that indirection so that times is expanded properly for concatenation.
1223 #define LOWPASS_DF2_FUNCS(times) \
1224 LOWPASS_DF2_FUNCSX(times)
1225
1226 LOWPASS_DF2_FUNCS(LPF_TIMES)
LOWPASS_DF2_FUNCS(DIRTY_LPF_TIMES)1227 LOWPASS_DF2_FUNCS(DIRTY_LPF_TIMES)
1228
1229 // Some interpolators based on the deip paper:
1230 // Polynomial Interpolators for High-Quality Resampling of Oversampled Audio
1231 // REVISED VERSION
1232 // by Olli Niemitalo in October 2001
1233 // http://www.student.oulu.fi/~oniemita/DSP/INDEX.HTM
1234 // http://yehar.com/blog/wp-content/uploads/2009/08/deip.pdf
1235
1236 #define OPT4P4O_BEGIN(in) \
1237 if(!(rd->sflags & inter_flow)) \
1238 { \
1239 for(unsigned int c=0; c<rd->channels; ++c) \
1240 rd->ch[c].x[0] = rd->ch[c].x[1] = rd->ch[c].x[2] = in[c]; \
1241 rd->offset = -rd->vinrate; \
1242 rd->sflags |= inter_flow; \
1243 }
1244
1245 /* See OPT6P5O_INTERPOL for logic hints. */
1246 #define OPT4P4O_INTERPOL(in, out, outs, channels) \
1247 { \
1248 long sampleoff = rd->offset; \
1249 if(sampleoff+rd->vinrate < rd->voutrate) \
1250 { \
1251 for(unsigned int c=0; c<channels; ++c) \
1252 { \
1253 float even1 = rd->ch[c].x[2]+rd->ch[c].x[1]; \
1254 float odd1 = rd->ch[c].x[2]-rd->ch[c].x[1]; \
1255 float even2 = in[c]+rd->ch[c].x[0]; \
1256 float odd2 = in[c]-rd->ch[c].x[0]; \
1257 rd->ch[c].c[0] = even1*0.45645918406487612f \
1258 + even2*0.04354173901996461f; \
1259 rd->ch[c].c[1] = odd1*0.47236675362442071f \
1260 + odd2*0.17686613581136501f; \
1261 rd->ch[c].c[2] = even1*-0.253674794204558521f \
1262 + even2*0.25371918651882464f; \
1263 rd->ch[c].c[3] = odd1*-0.37917091811631082f \
1264 + odd2*0.11952965967158000f; \
1265 rd->ch[c].c[4] = even1*0.04252164479749607f \
1266 + even2*-0.04289144034653719f; \
1267 } \
1268 do { \
1269 sampleoff += rd->vinrate; \
1270 float z = ((float)sampleoff/rd->voutrate) - (float)0.5; \
1271 for(unsigned int c=0; c<channels; ++c) \
1272 out[c] = \
1273 ( ( ( \
1274 rd->ch[c].c[4] \
1275 *z + rd->ch[c].c[3] \
1276 )*z + rd->ch[c].c[2] \
1277 )*z + rd->ch[c].c[1] \
1278 )*z + rd->ch[c].c[0]; \
1279 outs++; \
1280 out += channels; \
1281 } while(sampleoff+rd->vinrate < rd->voutrate); \
1282 } \
1283 for(unsigned int c=0; c<channels; ++c) \
1284 { \
1285 rd->ch[c].x[0] = rd->ch[c].x[1]; \
1286 rd->ch[c].x[1] = rd->ch[c].x[2]; \
1287 rd->ch[c].x[2] = in[c]; \
1288 } \
1289 rd->offset = sampleoff - rd->voutrate; \
1290 }
1291
1292 #define OPT4P4O_END
1293
1294 // Optimal 2x (4-point, 4th-order) (z-form)
1295 static size_t resample_opt4p4o(struct resample_data *rd, float*in
1296 , size_t ins, float *out)
1297 {
1298 size_t outs = 0;
1299 if(!ins)
1300 return outs;
1301 OPT4P4O_BEGIN(in)
1302 #ifndef SYN123_NO_CASES
1303 switch(rd->channels)
1304 {
1305 case 1: for(size_t i=0; i<ins; ++i)
1306 {
1307 OPT4P4O_INTERPOL(in, out, outs, 1)
1308 in++;
1309 }
1310 break;
1311 case 2: for(size_t i=0; i<ins; ++i)
1312 {
1313 OPT4P4O_INTERPOL(in, out, outs, 2)
1314 in += 2;
1315 }
1316 break;
1317 default:
1318 #endif
1319 for(size_t i=0; i<ins; ++i)
1320 {
1321 OPT4P4O_INTERPOL(in, out, outs, rd->channels)
1322 in += rd->channels;
1323 }
1324 #ifndef SYN123_NO_CASES
1325 }
1326 #endif
1327 OPT4P4O_END
1328 return outs;
1329 }
1330
resample_opt4p4o_2batch(struct resample_data * rd,float * in,float * out)1331 static size_t resample_opt4p4o_2batch(struct resample_data *rd, float*in, float *out)
1332 {
1333 size_t outs = 0;
1334 OPT4P4O_BEGIN(in)
1335 #ifndef SYN123_NO_CASES
1336 switch(rd->channels)
1337 {
1338 case 1: for(size_t i=0; i<2*BATCH; ++i)
1339 {
1340 OPT4P4O_INTERPOL(in, out, outs, 1)
1341 in++;
1342 }
1343 break;
1344 case 2: for(size_t i=0; i<2*BATCH; ++i)
1345 {
1346 OPT4P4O_INTERPOL(in, out, outs, 2)
1347 in += 2;
1348 }
1349 break;
1350 default:
1351 #endif
1352 for(size_t i=0; i<2*BATCH; ++i)
1353 {
1354 OPT4P4O_INTERPOL(in, out, outs, rd->channels)
1355 in += rd->channels;
1356 }
1357 #ifndef SYN123_NO_CASES
1358 }
1359 #endif
1360 OPT4P4O_END
1361 return outs;
1362 }
1363
1364 // This one has low distortion, somewhat more problematic response
1365 // Optimal 2x (6-point, 5th-order) (z-form)
1366
1367 #define OPT6P5O_BEGIN(in) \
1368 if(!(rd->sflags & inter_flow)) \
1369 { \
1370 for(unsigned int c=0; c<rd->channels; ++c) \
1371 { \
1372 rd->ch[c].x[0] = rd->ch[c].x[1] = rd->ch[c].x[2] \
1373 = rd->ch[c].x[3] = rd->ch[c].x[4] = in[c]; \
1374 } \
1375 rd->offset = -rd->vinrate; \
1376 rd->sflags |= inter_flow; \
1377 }
1378
1379 #define OPT6P5O_INTERPOL(in, out, outs, channels) \
1380 { \
1381 /* Offset is how long ago the last sample occured. */ \
1382 long sampleoff = rd->offset; \
1383 /* First check if this interval is interesting at all. */ \
1384 if(sampleoff < rd->voutrate-rd->vinrate) \
1385 { \
1386 /* Not sure if one big loop over the channels would be better. SIMD?! */ \
1387 for(unsigned int c=0; c<channels; ++c) \
1388 { \
1389 float even1 = rd->ch[c].x[3]+rd->ch[c].x[2]; \
1390 float odd1 = rd->ch[c].x[3]-rd->ch[c].x[2]; \
1391 float even2 = rd->ch[c].x[4]+rd->ch[c].x[1]; \
1392 float odd2 = rd->ch[c].x[4]-rd->ch[c].x[1]; \
1393 float even3 = in[c]+rd->ch[c].x[0]; \
1394 float odd3 = in[c]-rd->ch[c].x[0]; \
1395 rd->ch[c].c[0] = even1*0.40513396007145713f + even2*0.09251794438424393f \
1396 + even3*0.00234806603570670f; \
1397 rd->ch[c].c[1] = odd1*0.28342806338906690f + odd2*0.21703277024054901f \
1398 + odd3*0.01309294748731515f; \
1399 rd->ch[c].c[2] = even1*-0.191337682540351941f + even2*0.16187844487943592f \
1400 + even3*0.02946017143111912f; \
1401 rd->ch[c].c[3] = odd1*-0.16471626190554542f + odd2*-0.00154547203542499f \
1402 + odd3*0.03399271444851909f; \
1403 rd->ch[c].c[4] = even1*0.03845798729588149f + even2*-0.05712936104242644f \
1404 + even3*0.01866750929921070f; \
1405 rd->ch[c].c[5] = odd1*0.04317950185225609f + odd2*-0.01802814255926417f \
1406 + odd3*0.00152170021558204f; \
1407 } \
1408 /* Only reaching this code if at least one evaluation is due. */ \
1409 do { \
1410 sampleoff += rd->vinrate; \
1411 /* Evaluating as vector sum may be worthwhile for strong upsampling, */ \
1412 /* possibly many channels. Otherwise, it's just more muls. */ \
1413 float z = ((float)sampleoff/rd->voutrate) - (float)0.5; \
1414 for(unsigned int c=0; c<channels; ++c) \
1415 out[c] = \
1416 ( ( ( ( \
1417 rd->ch[c].c[5] \
1418 *z + rd->ch[c].c[4] \
1419 )*z + rd->ch[c].c[3] \
1420 )*z + rd->ch[c].c[2] \
1421 )*z + rd->ch[c].c[1] \
1422 )*z + rd->ch[c].c[0]; \
1423 outs++; \
1424 out += channels; \
1425 } while(sampleoff < rd->voutrate-rd->vinrate); \
1426 } \
1427 for(unsigned int c=0; c<channels; ++c) \
1428 { \
1429 /* Store shifted history. Think about using RING_INDEX here, too, */ \
1430 rd->ch[c].x[0] = rd->ch[c].x[1]; \
1431 rd->ch[c].x[1] = rd->ch[c].x[2]; \
1432 rd->ch[c].x[2] = rd->ch[c].x[3]; \
1433 rd->ch[c].x[3] = rd->ch[c].x[4]; \
1434 rd->ch[c].x[4] = in[c]; \
1435 } \
1436 rd->offset = sampleoff - rd->voutrate; \
1437 }
1438
1439 #define OPT6P5O_END
1440
resample_opt6p5o(struct resample_data * rd,float * in,size_t ins,float * out)1441 static size_t resample_opt6p5o(struct resample_data *rd, float*in
1442 , size_t ins, float *out)
1443 {
1444 size_t outs = 0;
1445 if(!ins)
1446 return outs;
1447 OPT6P5O_BEGIN(in)
1448 #ifndef SYN123_NO_CASES
1449 switch(rd->channels)
1450 {
1451 case 1: for(size_t i=0; i<ins; ++i)
1452 {
1453 OPT6P5O_INTERPOL(in, out, outs, 1)
1454 in++;
1455 }
1456 break;
1457 case 2: for(size_t i=0; i<ins; ++i)
1458 {
1459 OPT6P5O_INTERPOL(in, out, outs, 2)
1460 in += 2;
1461 }
1462 break;
1463 default:
1464 #endif
1465 for(size_t i=0; i<ins; ++i)
1466 {
1467 OPT6P5O_INTERPOL(in, out, outs, rd->channels)
1468 in += rd->channels;
1469 }
1470 #ifndef SYN123_NO_CASES
1471 }
1472 #endif
1473 OPT6P5O_END
1474 return outs;
1475 }
1476
1477 // TODO: Remove the 2batch version after verifying that it doesn't improve things.
resample_opt6p5o_2batch(struct resample_data * rd,float * in,float * out)1478 static size_t resample_opt6p5o_2batch(struct resample_data *rd, float*in
1479 , float *out)
1480 {
1481 size_t outs = 0;
1482 OPT6P5O_BEGIN(in)
1483 #ifndef SYN123_NO_CASES
1484 switch(rd->channels)
1485 {
1486 case 1: for(size_t i=0; i<2*BATCH; ++i)
1487 {
1488 OPT6P5O_INTERPOL(in, out, outs, 1)
1489 in++;
1490 }
1491 break;
1492 case 2: for(size_t i=0; i<2*BATCH; ++i)
1493 {
1494 OPT6P5O_INTERPOL(in, out, outs, 2)
1495 in += 2;
1496 }
1497 break;
1498 default:
1499 #endif
1500 for(size_t i=0; i<2*BATCH; ++i)
1501 {
1502 OPT6P5O_INTERPOL(in, out, outs, rd->channels)
1503 in += rd->channels;
1504 }
1505 #ifndef SYN123_NO_CASES
1506 }
1507 #endif
1508 OPT6P5O_END
1509 return outs;
1510 }
1511
1512 // Full resample function with oversampling.
1513 #define RESAMPLE_FULL_2X(name, lpf_2x, lpf, interpolate_2batch, interpolate) \
1514 static size_t name( struct resample_data *rd \
1515 , float *in, size_t ins, float *out ) \
1516 { \
1517 mxdebug(#name " in %zu", ins); \
1518 float *iin = in; \
1519 size_t iins = ins; \
1520 if(!(rd->sflags & inter_flow) && ins) \
1521 stage_history_init(rd, 0, iin); \
1522 size_t outs = 0; \
1523 size_t nouts = 0; \
1524 size_t blocks = ins/BATCH; \
1525 for(size_t bi = 0; bi<blocks; ++bi) \
1526 { \
1527 /* One batch in, two batches out. */ \
1528 mxdebug(#name " batch %zu lowpass " #lpf_2x, bi); \
1529 lpf_2x(rd, in, BATCH, rd->upbuf); \
1530 mxdebug(#name " batch %zu interpol " #interpolate_2batch, bi); \
1531 nouts = interpolate_2batch(rd, rd->upbuf, out); \
1532 outs += nouts; \
1533 out += nouts*rd->channels; \
1534 in += BATCH*rd->channels; \
1535 } \
1536 ins -= blocks*BATCH; \
1537 xdebug(#name " end lowpass " #lpf_2x); \
1538 lpf_2x(rd, in, ins, rd->upbuf); \
1539 xdebug(#name " end interpol " #interpolate); \
1540 nouts = interpolate(rd, rd->upbuf, 2*ins, out); \
1541 outs += nouts; \
1542 mxdebug(#name " out %zu", outs); \
1543 stage_history(rd, 0, iin, iins); \
1544 return outs; \
1545 }
1546
1547 // Full resample method without oversampling.
1548 #define RESAMPLE_FULL_1X(name, lpf, interpolate) \
1549 static size_t name( struct resample_data *rd \
1550 , float *in, size_t ins, float *out ) \
1551 { \
1552 mxdebug(#name " in %zu", ins); \
1553 float *iin = in; \
1554 size_t iins = ins; \
1555 if(!(rd->sflags & inter_flow) && ins) \
1556 stage_history_init(rd, 0, iin); \
1557 size_t outs = 0; \
1558 size_t nouts = 0; \
1559 size_t blocks = ins/BATCH; \
1560 for(size_t bi = 0; bi<blocks; ++bi) \
1561 { \
1562 mxdebug(#name " batch %zu lowpass " #lpf, bi); \
1563 memcpy(rd->prebuf, in, BATCH*sizeof(*in)*rd->channels); \
1564 lpf(rd, rd->prebuf, BATCH); \
1565 mxdebug(#name " batch %zu interpol " #interpolate, bi); \
1566 nouts = interpolate(rd, rd->prebuf, BATCH, out); \
1567 outs += nouts; \
1568 out += nouts*rd->channels; \
1569 in += BATCH*rd->channels; \
1570 } \
1571 ins -= blocks*BATCH; \
1572 xdebug(#name " end lowpass " #lpf); \
1573 memcpy(rd->prebuf, in, ins*sizeof(*in)*rd->channels); \
1574 lpf(rd, rd->prebuf, ins); \
1575 xdebug(#name " end interpol " #interpolate); \
1576 nouts = interpolate(rd, rd->prebuf, ins, out); \
1577 outs += nouts; \
1578 mxdebug(#name " out %zu", outs); \
1579 stage_history(rd, 0, iin, iins); \
1580 return outs; \
1581 }
1582
1583 // Full resample method with decimation.
1584 #define RESAMPLE_FULL_0X(name, lpf, interpolate) \
1585 static size_t name( struct resample_data *rd \
1586 , float *in, size_t ins, float *out ) \
1587 { \
1588 mxdebug(#name " in %zu", ins); \
1589 float *iin = in; \
1590 size_t iins = ins; \
1591 if(!(rd->sflags & inter_flow) && ins) \
1592 stage_history_init(rd, 0, iin); \
1593 size_t outs = 0; \
1594 size_t nouts = 0; \
1595 size_t blocks = ins/BATCH; \
1596 for(size_t bi = 0; bi<blocks; ++bi) \
1597 { \
1598 mxdebug( #name " batch %zu decim %d and lowpass " #lpf \
1599 , bi, rd->decim_stages ); \
1600 int fill = BATCH; \
1601 memcpy(rd->prebuf, in, fill*sizeof(*in)*rd->channels); \
1602 for(int ds = 0; ds<rd->decim_stages; ++ds) \
1603 fill = decimate(rd, ds, rd->prebuf, fill); \
1604 lpf(rd, rd->prebuf, fill); \
1605 mxdebug(#name " batch %zu interpol " #interpolate, bi); \
1606 nouts = interpolate(rd, rd->prebuf, fill, out); \
1607 outs += nouts; \
1608 out += nouts*rd->channels; \
1609 in += BATCH*rd->channels; \
1610 } \
1611 ins -= blocks*BATCH; \
1612 int fill = ins; \
1613 mxdebug( #name " end decim %d and lowpass " #lpf \
1614 , rd->decim_stages ); \
1615 memcpy(rd->prebuf, in, fill*sizeof(*in)*rd->channels); \
1616 for(int ds = 0; ds<rd->decim_stages; ++ds) \
1617 fill = decimate(rd, ds, rd->prebuf, fill); \
1618 lpf(rd, rd->prebuf, fill); \
1619 xdebug(#name " end interpol " #interpolate); \
1620 nouts = interpolate(rd, rd->prebuf, fill, out); \
1621 outs += nouts; \
1622 mxdebug(#name " out %zu", outs); \
1623 stage_history(rd, 0, iin, iins); \
1624 return outs; \
1625 }
1626
1627 // Define the set of resamplers for a given set of lowpasses and interpolators.
1628 #define RESAMPLE_FULL( name_2x, name_1x, name_0x \
1629 , lpf_2x, lpf, interpolate_2batch, interpolate ) \
1630 RESAMPLE_FULL_2X(name_2x, lpf_2x, lpf, interpolate_2batch, interpolate) \
1631 RESAMPLE_FULL_1X(name_1x, lpf, interpolate) \
1632 RESAMPLE_FULL_0X(name_0x, lpf, interpolate)
1633
1634 // Finally define our dirty and fine resamplers.
RESAMPLE_FULL(resample_2x_dirty,resample_1x_dirty,resample_0x_dirty,DIRTY_LOWPASS_PREEMP_2X,DIRTY_LOWPASS_PREEMP,resample_opt4p4o_2batch,resample_opt4p4o)1635 RESAMPLE_FULL( resample_2x_dirty, resample_1x_dirty, resample_0x_dirty
1636 , DIRTY_LOWPASS_PREEMP_2X, DIRTY_LOWPASS_PREEMP
1637 , resample_opt4p4o_2batch, resample_opt4p4o )
1638 RESAMPLE_FULL( resample_2x_fine, resample_1x_fine, resample_0x_fine
1639 , LOWPASS_PREEMP_2X, LOWPASS_PREEMP
1640 , resample_opt6p5o_2batch, resample_opt6p5o )
1641
1642 // Here I go and implement a bit of big number functionality.
1643 // The task is just to truly be able to compute a*b/c with unsigned
1644 // integers for cases where the result does not overflow but the intermediate
1645 // product may not fit into the type.
1646 // I do not want to rely on a 128 bit type being availbe, so I take
1647 // uint64_t and run with it.
1648
1649 // Multiply two 64 bit unsigned integers and return the result as two
1650 // halves of 128 bit.
1651 static void mul64(uint64_t a, uint64_t b, uint64_t * m1, uint64_t * m0)
1652 {
1653 uint64_t lowmask = ((uint64_t)1 << 32) - 1;
1654 uint64_t a1 = a >> 32;
1655 uint64_t a0 = a & lowmask;
1656 uint64_t b1 = b >> 32;
1657 uint64_t b0 = b & lowmask;
1658 uint64_t prod1 = a1 * b1;
1659 uint64_t prod0 = a0 * b0;
1660 uint64_t a1b0 = a1 * b0;
1661 uint64_t a0b1 = a0 * b1;
1662 prod1 += a0b1 >> 32;
1663 prod1 += a1b0 >> 32;
1664 uint64_t prod0t = prod0 + ((a0b1 & lowmask) << 32);
1665 if(prod0t < prod0)
1666 prod1 += 1;
1667 prod0 = prod0t + ((a1b0 & lowmask) << 32);
1668 if(prod0 < prod0t)
1669 prod1 += 1;
1670 *m1 = prod1;
1671 *m0 = prod0;
1672 }
1673
1674 // Multiply two unsigned integers numbers and divide by a third one,
1675 // returning the result in the same integer width, if no overflow
1676 // occurs. Also catches division by zero. On error, zero is returned
1677 // and the error code set to a non-zero value (1==overflow, 2==divbyzero).
1678 // The division result is computed on two halves of 128 bit internally,
1679 // and discarded if the higher half contains set bits.
1680 // Also, if m != NULL, the remainder is stored there.
1681 // The actually interesting function for the interpolation:
1682 // e = (a*b+off)/d
1683 // This is bounded by zero: If (a*b+off) < 0, this returns just
1684 // zero, also zero remainder.
muloffdiv64(uint64_t a,uint64_t b,int64_t off,uint64_t c,int * err,uint64_t * m)1685 static uint64_t muloffdiv64( uint64_t a, uint64_t b, int64_t off, uint64_t c
1686 , int *err, uint64_t *m )
1687 {
1688 uint64_t prod1, prod0;
1689 uint64_t div1, div0, div0old;
1690 if(c < 1)
1691 {
1692 if (err)
1693 *err = 2;
1694 return 0;
1695 }
1696 mul64(a, b, &prod1, &prod0);
1697 if(off)
1698 {
1699 uint64_t prod0old = prod0;
1700 prod0 += off;
1701 // Offset can be positive or negative, small or large.
1702 // When adding to prod0, these cases can happen:
1703 // offset > 0, result > prod0: All fine.
1704 // offset > 0, result < prod0: Overflow.
1705 // offset < 0, result < prod0: All fine.
1706 // offset < 0, result > prod0: Underflow.
1707 if(off > 0 && prod0 < prod0old)
1708 {
1709 if(prod1 == UINT64_MAX)
1710 {
1711 if(err)
1712 *err = 1;
1713 return 0;
1714 }
1715 ++prod1;
1716 }
1717 if(off < 0 && prod0 > prod0old)
1718 {
1719 // Pin to zero on total underflow.
1720 if(prod1 == 0)
1721 prod0 = 0;
1722 else
1723 --prod1;
1724 }
1725 }
1726 if(c == 1)
1727 {
1728 div1 = prod1;
1729 div0 = prod0;
1730 if(m)
1731 *m = 0;
1732 } else
1733 {
1734 div1 = 0;
1735 div0 = 0;
1736 uint64_t ctimes = ((uint64_t) - 1) / c;
1737 uint64_t cblock = ctimes * c;
1738 while(prod1)
1739 {
1740 uint64_t cmult1, cmult0;
1741 mul64(ctimes, prod1, &cmult1, &cmult0);
1742 div1 += cmult1;
1743 div0old = div0;
1744 div0 += cmult0;
1745 if(div0 < div0old)
1746 div1++;
1747 mul64(cblock, prod1, &cmult1, &cmult0);
1748 prod1 -= cmult1;
1749 if(prod0 < cmult0)
1750 prod1--;
1751 prod0 -= cmult0;
1752 }
1753 div0old = div0;
1754 div0 += prod0 / c;
1755 if(m)
1756 *m = prod0 % c;
1757 if(div0 < div0old)
1758 div1++;
1759 }
1760 if(div1)
1761 {
1762 if(err)
1763 *err = 1;
1764 return 0;
1765 }
1766 if(err)
1767 *err = 0;
1768 return div0;
1769 }
1770
1771 // Ensure that the sample rates are not above half of the
1772 // possible range of the used data type, including a safeguard
1773 // that 64 bits will always be enough to work with the values.
1774 // This limits you to 63 bits precision in your sampling rate
1775 // ratio for all times, or a maximum speedup/slowdown by 4.6e18.
1776 // This is still a rather big number.
1777 #if LONG_MAX > INT64_MAX
1778 #define RATE_LIMIT INT64_MAX/2
1779 #else
1780 #define RATE_LIMIT LONG_MAX/2
1781 #endif
1782
1783 long attribute_align_arg
syn123_resample_maxrate(void)1784 syn123_resample_maxrate(void)
1785 {
1786 return RATE_LIMIT;
1787 }
1788
1789 // Settle oversampling and decimation stages for the given
1790 // input/output rate ratio.
rate_setup(long inrate,long outrate,int * oversample,unsigned int * decim_stages)1791 static int rate_setup( long inrate, long outrate
1792 , int *oversample, unsigned int *decim_stages )
1793 {
1794 // Want enough headroom so that the sum of two rates can be represented.
1795 // And of course we don't do reverse or frozen time.
1796 if( inrate < 1 || inrate > RATE_LIMIT ||
1797 outrate < 1 || outrate > RATE_LIMIT )
1798 return -1;
1799 *oversample = outrate*2 > inrate ? 1 : 0;
1800 *decim_stages = 0;
1801 if(outrate <= LONG_MAX/4) while(outrate*4 < inrate)
1802 {
1803 (*decim_stages)++;
1804 outrate *= 2;
1805 }
1806 return 0;
1807 }
1808
1809 // Revisit the cases:
1810 // 2x: to keep the implementation free, one must assume to be able to hold
1811 // 2*ins in the data type, then 2*ins/2*inrate*outrate+1 = ins*outrate/inrate+1
1812 // 1x: no doubling, just ins/inrate*outrate+1
1813 // but since the rate to interpolate to is <= 1/2 input, outsamples
1814 // always will be less, so SIZE_MAX
1815 // 0x: output < 1/4 input. Rounding error of 2 does not matter. Things get smaller,
1816 // so SIZE_MAX.
1817
1818 size_t attribute_align_arg
syn123_resample_maxincount(long inrate,long outrate)1819 syn123_resample_maxincount(long inrate, long outrate)
1820 {
1821 int oversample;
1822 unsigned int decim_stages;
1823 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
1824 return 0;
1825 if(oversample)
1826 {
1827 uint64_t outlimit;
1828 // I'm crazy. I am planning for size_t being more than 64 bits.
1829 if(SIZE_MAX-1 > UINT64_MAX)
1830 outlimit = UINT64_MAX;
1831 else
1832 outlimit = SIZE_MAX-1;
1833 // SIZE_MAX = ins*outrate/inrate+1
1834 // SIZE_MAX-1 = ins*outrate/inrate
1835 // (SIZE_MAX-1)*inrate = ins*outrate
1836 // ins = (SIZE_MAX-1)*inrate/outrate
1837 // Integer math always rounding down, this must be below the limit.
1838 int err;
1839 uint64_t maxin = muloffdiv64(outlimit, inrate, 0, outrate, &err, NULL);
1840 // The only possible error is genuine overflow. Meaning: We can
1841 // give as much input as can be represented in the variable.
1842 if(err)
1843 maxin = UINT64_MAX;
1844 return maxin > SIZE_MAX ? SIZE_MAX : (size_t)maxin;
1845 }
1846 else
1847 return SIZE_MAX;
1848 }
1849
1850 // The history is returned as size_t, which usually provide plenty of range.
1851 // The idea is that the history needs to fit into memory, although that is not
1852 // really the case. It is just impractical to consider the full history of
1853 // extreme resampling ratios.
1854 size_t attribute_align_arg
syn123_resample_history(long inrate,long outrate,int dirty)1855 syn123_resample_history(long inrate, long outrate, int dirty)
1856 {
1857 int oversample;
1858 unsigned int decim_stages;
1859 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
1860 return 0;
1861 if(oversample && decim_stages)
1862 return 0;
1863 // Either 4p4o or 6p5o interpolation at the end. We only need the points
1864 // before the current sample in the history, so one less.
1865 size_t history = (dirty ? DIRTY_POINTS : FINE_POINTS) - 1;
1866 // For the oldest of these, we need a history of input before it to
1867 // fill the low pass histories. The number of previous low pass points
1868 // simply comes on top. The number of low pass applications does not matter.
1869 history += LPF_HISTORY(LPF_ORDER);
1870
1871 if(oversample)
1872 {
1873 // Need only half the input, but no less.
1874 history = (history+1)/2;
1875 }
1876 // Here, the numbers can potentially become very large.
1877 // Each decimation step doubles the needed input samples and
1878 // adds the state of the decimation filter.
1879 while(decim_stages--)
1880 {
1881 // Reverse decimation ...
1882 if(history > SIZE_MAX/2+1)
1883 return SIZE_MAX;
1884 history = (history-1) + history;
1885 if(history > SIZE_MAX-LPF_HISTORY(LPF_4_ORDER))
1886 return SIZE_MAX;
1887 history += LPF_HISTORY(LPF_4_ORDER);
1888 }
1889 return history;
1890 }
1891
1892 #if 0
1893 // This should be the essence of what the interpolation does regarding
1894 // input and output sample count.
1895 int64_t simulate_interpolate(long vinrate, long voutrate, int64_t ins)
1896 {
1897 int64_t outs = 0;
1898 long offset = -vinrate;
1899 for(int64_t n = 0; n<ins; ++n)
1900 {
1901 while(offset+vinrate < voutrate)
1902 {
1903 ++outs;
1904 offset += vinrate;
1905 }
1906 offset -= voutrate;
1907 }
1908 return outs;
1909 }
1910 #endif
1911
1912 // The exact output sample count given total input size.
1913 // It assumes zero history.
1914 // This returns a negative code on error.
1915 // The combination of interpolation and stages of decimation calls
1916 // for more than simple integer division. The overall rounding error
1917 // is limited at 2 samples, btw.
1918 int64_t attribute_align_arg
syn123_resample_total_64(long inrate,long outrate,int64_t ins)1919 syn123_resample_total_64(long inrate, long outrate, int64_t ins)
1920 {
1921 if(ins < 0)
1922 return -1;
1923 int oversample;
1924 unsigned int decim_stages;
1925 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
1926 return SYN123_BAD_FMT;
1927 if(oversample && decim_stages)
1928 return SYN123_WEIRD;
1929
1930 uint64_t vins = ins;
1931 uint64_t vinrate = inrate;
1932 uint64_t voutrate = outrate;
1933 if(decim_stages)
1934 {
1935 // The interpolation uses the virtual output rate to get the same
1936 // ratio as with decimated input rate.
1937 voutrate <<= decim_stages;
1938 // The first sample of a block gets used, the following are dropped.
1939 // So I get one sample out of the first input sample, regardless of
1940 // decimation stage count. But the next sample comes only after
1941 // 2^decim_stages further input samples.
1942 // The formula for one stage: y = (x+1)/2
1943 // For n stages ... my guess is wrong. Heck, n is small, less than
1944 // 64 (see RATE_LIMIT). Let's just mimick the decimations in a loop.
1945 // Since vins < UINT64_MAX/2, adding 1 is no issue.
1946 for(unsigned int i=0; i<decim_stages; ++i)
1947 vins = (vins+1)/2;
1948 }
1949 if(oversample)
1950 {
1951 vins *= 2;
1952 vinrate *= 2;
1953 }
1954 int err;
1955 // Interpolation model:
1956 // outs = (vins*voutrate - offset - 1)/vinrate
1957 // with offset = -vinrate at the beginning. This is the rounding, no
1958 // need to deal with the remainder.
1959 uint64_t tot = muloffdiv64(vins, voutrate, vinrate-1, vinrate, &err, NULL);
1960 // Any error is treated as overflow (div by zero -> infinity).
1961 if(err)
1962 return SYN123_OVERFLOW;
1963 return (tot <= INT64_MAX) ? (int64_t)tot : SYN123_OVERFLOW;
1964 }
1965
1966 int32_t attribute_align_arg
syn123_resample_total_32(int32_t inrate,int32_t outrate,int32_t ins)1967 syn123_resample_total_32(int32_t inrate, int32_t outrate, int32_t ins)
1968 {
1969 int64_t tot = syn123_resample_total_64(inrate, outrate, ins);
1970 return (tot <= INT32_MAX) ? (int32_t)tot : SYN123_OVERFLOW;
1971 }
1972
syn123_resample_total(long inrate,long outrate,lfs_alias_t ins)1973 lfs_alias_t syn123_resample_total(long inrate, long outrate, lfs_alias_t ins)
1974 {
1975 #if LFS_ALIAS_BITS+0 == 64
1976 return syn123_resample_total_64(inrate, outrate, ins);
1977 #elif LFS_ALIAS_BITS+0 == 32
1978 return syn123_resample_total_32(inrate, outrate, ins);
1979 #else
1980 #error "Unexpected LFS_ALIAS_BITS value."
1981 #endif
1982 }
1983
1984
1985 // The inverse function: How many input samples are needed to get at least
1986 // the desired amount of output?
1987 int64_t attribute_align_arg
syn123_resample_intotal_64(long inrate,long outrate,int64_t outs)1988 syn123_resample_intotal_64(long inrate, long outrate, int64_t outs)
1989 {
1990 if(outs < 1)
1991 return outs == 0 ? 0 : -1;
1992 int oversample;
1993 unsigned int decim_stages;
1994 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
1995 return SYN123_BAD_FMT;
1996 if(oversample && decim_stages)
1997 return SYN123_WEIRD;
1998 uint64_t voutrate = outrate;
1999 voutrate <<= decim_stages;
2000 uint64_t vinrate = inrate;
2001 if(oversample)
2002 vinrate *= 2;
2003 // This works for outs > 0:
2004 // ins = (outs*inrate+offset)/outrate+1
2005 // First offset is -inrate.
2006 // You may want to work it out for yourself. Or trust me;-)
2007 int err;
2008 uint64_t vtot = muloffdiv64(outs, vinrate, -(int64_t)vinrate, voutrate, &err, NULL);
2009 if(err)
2010 return SYN123_OVERFLOW;
2011 if(vtot == UINT64_MAX)
2012 return SYN123_OVERFLOW;
2013 ++vtot; // Must be at least 1 now!
2014 uint64_t tot = vtot;
2015 // Un-do oversampling, taking care not to hit overflow.
2016 if(oversample)
2017 {
2018 tot /= 2;
2019 if(tot*2 < vtot)
2020 ++tot;
2021 }
2022 // Now tot is the minimum needed input sample count before interpolation.
2023 // Decimation still needs to be accounted for.
2024 // With decimation, I need the smallest input that fulfills out=(2*in+1)/2.
2025 for(unsigned int i=0; i<decim_stages; ++i)
2026 {
2027 if(!tot)
2028 return 0;
2029 if(tot > UINT64_MAX/2+1)
2030 return SYN123_OVERFLOW;
2031 tot = (tot - 1) + tot;
2032 }
2033 return (tot <= INT64_MAX) ? (int64_t)tot : SYN123_OVERFLOW;
2034 }
2035
2036 int32_t attribute_align_arg
syn123_resample_intotal_32(int32_t inrate,int32_t outrate,int32_t outs)2037 syn123_resample_intotal_32(int32_t inrate, int32_t outrate, int32_t outs)
2038 {
2039 int64_t tot = syn123_resample_intotal_64(inrate, outrate, outs);
2040 return (tot <= INT32_MAX) ? (int32_t)tot : SYN123_OVERFLOW;
2041 }
2042
syn123_resample_intotal(long inrate,long outrate,lfs_alias_t outs)2043 lfs_alias_t syn123_resample_intotal(long inrate, long outrate, lfs_alias_t outs)
2044 {
2045 #if LFS_ALIAS_BITS+0 == 64
2046 return syn123_resample_intotal_64(inrate, outrate, outs);
2047 #elif LFS_ALIAS_BITS+0 == 32
2048 return syn123_resample_intotal_32(inrate, outrate, outs);
2049 #else
2050 #error "Unexpected LFS_ALIAS_BITS value."
2051 #endif
2052 }
2053
2054 // As any sensible return value is at least 1, this uses the unsigned
2055 // type and 0 for error/pathological input.
2056 // This function could be simplified to
2057 // uintmax_t count = prod/inrate + (prod%inrate ? 1 : 0) + 1;
2058 // to accomodate for possible rounding during decimation and interpolation,
2059 // but it would be a teensy bit less accurate. The computation time
2060 // should not matter much. The simple formula can be used as a test
2061 // to check if the elaborate function comes to a value that only
2062 // sometimes is less by 1, but not more.
2063 size_t attribute_align_arg
syn123_resample_count(long inrate,long outrate,size_t ins)2064 syn123_resample_count(long inrate, long outrate, size_t ins)
2065 {
2066 // The largest output you get from the beginning. So this can just treat ins
2067 // as offset from there.
2068 if( ins > syn123_resample_maxincount(inrate, outrate)
2069 #if SIZE_MAX > INT64_MAX
2070 || ins > INT64_MAX
2071 #endif
2072 )
2073 return 0;
2074 int64_t tot = syn123_resample_total_64(inrate, outrate, (int64_t)ins);
2075 return (tot >= 0 && tot <= SIZE_MAX) ? (size_t)tot : 0;
2076 }
2077
2078 size_t attribute_align_arg
syn123_resample_incount(long inrate,long outrate,size_t outs)2079 syn123_resample_incount(long inrate, long outrate, size_t outs)
2080 {
2081 // I had logic here about abusing syn123_resample_intotal(), but that is wrong.
2082 // Do not assume you know what interval to pick to get the maximum value.
2083 // Work the algorithm for the theoretical maximum at each point, even if that
2084 // may not be hit with your particular rate ratio and playback time.
2085 #if SIZE_MAX > INT64_MAX
2086 if(outs > INT64_MAX-1)
2087 return 0;
2088 #endif
2089 int oversample;
2090 unsigned int decim_stages;
2091 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
2092 return 0;
2093 if(oversample && decim_stages)
2094 return 0;
2095 uint64_t voutrate = outrate;
2096 voutrate <<= decim_stages;
2097 uint64_t vinrate = inrate;
2098 if(oversample)
2099 vinrate *= 2;
2100 // This works for outs > 0:
2101 // ins = (outs*inrate+offset)/outrate+1
2102 // First offset is -inrate, maximum offset is -1. I think.
2103 // You may want to work it out for yourself. Or trust me;-)
2104 int err;
2105 uint64_t vtot = muloffdiv64(outs, vinrate, -1, voutrate, &err, NULL);
2106 if(err)
2107 return SYN123_OVERFLOW;
2108 if(vtot == UINT64_MAX)
2109 return SYN123_OVERFLOW;
2110 ++vtot; // Must be at least 1 now!
2111 uint64_t tot = vtot;
2112 // Un-do oversampling, taking care not to hit overflow.
2113 if(oversample)
2114 {
2115 tot /= 2;
2116 if(tot*2 < vtot)
2117 ++tot;
2118 }
2119 // Now tot is the minimum needed input sample count before interpolation.
2120 // Decimation still needs to be accounted for. Worst case: We got no
2121 // sample buffered (dropped) in any stage: So just multiply.
2122 if(tot > UINT64_MAX>>decim_stages)
2123 return SYN123_OVERFLOW;
2124 tot<<=decim_stages;
2125 return (tot <= SIZE_MAX) ? (size_t)tot : 0;
2126 }
2127
2128 // A bit of convenience. Just rely on the other functions.
2129 size_t attribute_align_arg
syn123_resample_fillcount(long input_rate,long output_rate,size_t outs)2130 syn123_resample_fillcount(long input_rate, long output_rate, size_t outs)
2131 {
2132 size_t block = syn123_resample_incount(input_rate, output_rate, outs);
2133 // Reduce that to ensure that we never get more than a buffer's fill.
2134 while( block &&
2135 syn123_resample_count(input_rate, output_rate, block) > outs )
2136 --block;
2137 return block;
2138 }
2139
2140 // The exact predictor: How many output samples will I get _now_
2141 // after feeding the indicated amount? This is concerned with
2142 // Buffer sizes, so let's drop the 32/64 bit distinction.
2143 // Since overflow can occur, we need a sign bit to signal errors.
2144 ssize_t attribute_align_arg
syn123_resample_expect(syn123_handle * sh,size_t ins)2145 syn123_resample_expect(syn123_handle *sh, size_t ins)
2146 {
2147 if(!sh || !sh->rd)
2148 return SYN123_BAD_HANDLE;
2149 if(ins < 1)
2150 return 0;
2151 struct resample_data *rd = sh->rd;
2152 // Need to account for the current resampler state. That is:
2153 // 1. decimation stages having swallowed a sample or not,
2154 // 2. current interpolator offset.
2155 // The first sample of a block gets used, the following are dropped.
2156 // So I get one sample out of the first input sample, regardless of
2157 // decimation stage count. But the next sample comes only after
2158 // 2^decim_stages further input samples.
2159 // The formula for one stage: y = (x+1)/2
2160 // For n stages ... my guess is wrong. Heck, n is small, less than
2161 // 64 (see RATE_LIMIT). Let's just mimick the decimations in a loop.
2162 // Since vins < UINT64_MAX/2, adding 1 is no issue.
2163 for(unsigned int i=0; i<rd->decim_stages; ++i)
2164 {
2165 // If the stage is fresh or has explictly a past sample in memorian,
2166 // we can add one virtual input sample. Divide first to avoid overflow.
2167 size_t nins = ins / 2;
2168 // When there is one sample left over, a second one from storage
2169 // adds to the decimated count.
2170 if( nins*2 < ins && (!(rd->decim[i].sflags & lowpass_flow)
2171 || (rd->decim[i].sflags & decimate_store)) )
2172 ++nins;
2173 ins = nins;
2174 }
2175 #if SIZE_MAX > UINT64_MAX
2176 if(ins > UINT64_MAX) // Really?
2177 return SYN123_OVERFLOW;
2178 #endif
2179 uint64_t vins = ins;
2180 if(rd->sflags & oversample_2x)
2181 {
2182 if(vins > UINT64_MAX/2)
2183 return SYN123_OVERFLOW;
2184 vins *= 2;
2185 }
2186 int err;
2187 int64_t offset = rd->sflags & inter_flow ? rd->offset : -rd->vinrate;
2188 // Interpolation model:
2189 // outs = (vins*voutrate - offset - 1)/vinrate
2190 uint64_t tot = muloffdiv64( vins, (uint64_t)rd->voutrate
2191 , (int64_t)(-offset-1), (uint64_t)rd->vinrate, &err, NULL );
2192 // Any error is treated as overflow (div by zero -> infinity).
2193 if(err)
2194 return SYN123_OVERFLOW;
2195 return (tot <= SSIZE_MAX) ? (ssize_t)tot : SYN123_OVERFLOW;
2196 }
2197
2198 // How many input samples are minimally needed to get at least the
2199 // minimally desired output right now?
2200 ssize_t attribute_align_arg
syn123_resample_inexpect(syn123_handle * sh,size_t outs)2201 syn123_resample_inexpect(syn123_handle *sh, size_t outs)
2202 {
2203 if(!sh || !sh->rd)
2204 return SYN123_BAD_HANDLE;
2205 if(outs < 1)
2206 return 0;
2207 struct resample_data *rd = sh->rd;
2208 // This works for outs > 0:
2209 // ins = (outs*inrate+offset)/outrate+1
2210 int err;
2211 int64_t offset = rd->sflags & inter_flow ? rd->offset : -rd->vinrate;
2212 uint64_t vtot = muloffdiv64(outs, sh->rd->vinrate, offset, rd->voutrate
2213 , &err, NULL);
2214 if(err)
2215 return SYN123_OVERFLOW;
2216 if(vtot == UINT64_MAX)
2217 return SYN123_OVERFLOW;
2218 ++vtot;
2219 uint64_t tot = vtot;
2220 // Un-do oversampling, taking care not to hit overflow.
2221 if(rd->sflags & oversample_2x)
2222 {
2223 tot /= 2;
2224 if(tot*2 < vtot)
2225 ++tot;
2226 }
2227 // Still: tot > 0
2228 // Got needed output of deciation, for each stage
2229 // a) need twice that amount if there is no sample buffered
2230 // b) need one less than twice the amount if there is a sample buffered
2231 for(unsigned int j=0; j<rd->decim_stages; ++j)
2232 {
2233 unsigned int i = rd->decim_stages - 1 - j;
2234 if(tot > UINT64_MAX/2+1)
2235 return SYN123_OVERFLOW;
2236 tot = (tot - 1) + tot;
2237 if( (rd->decim[i].sflags & lowpass_flow)
2238 && !(rd->decim[i].sflags & decimate_store) )
2239 {
2240 if(tot == UINT64_MAX)
2241 return SYN123_OVERFLOW;
2242 ++tot;
2243 }
2244 }
2245 return (tot <= SSIZE_MAX) ? (ssize_t)tot : SYN123_OVERFLOW;
2246 }
2247
resample_free(struct resample_data * rd)2248 void resample_free(struct resample_data *rd)
2249 {
2250 if(!rd)
2251 return;
2252 if(rd->stage_history)
2253 free(rd->stage_history);
2254 if(rd->decim_hist)
2255 free(rd->decim_hist);
2256 if(rd->decim)
2257 free(rd->decim);
2258 if(rd->upbuf)
2259 free(rd->upbuf);
2260 if(rd->prebuf)
2261 free(rd->prebuf);
2262 if(rd->ch)
2263 free(rd->ch);
2264 if(rd->frame)
2265 free(rd->frame);
2266 free(rd);
2267 }
2268
2269 // Want to support smooth rate changes.
2270 // If there is a handle present, try to keep as much as possible.
2271 // If you change the dirty flag, things get refreshed totally.
2272 int attribute_align_arg
syn123_setup_resample(syn123_handle * sh,long inrate,long outrate,int channels,int dirty,int smooth)2273 syn123_setup_resample( syn123_handle *sh, long inrate, long outrate
2274 , int channels, int dirty, int smooth )
2275 {
2276 int err = 0;
2277 struct resample_data *rd = NULL;
2278 int fresh = 0;
2279
2280 if(inrate == 0 && outrate == 0 && channels == 0)
2281 {
2282 err = SYN123_BAD_FMT;
2283 goto setup_resample_cleanup;
2284 }
2285
2286 if(channels < 1)
2287 {
2288 err = SYN123_BAD_FMT;
2289 goto setup_resample_cleanup;
2290 }
2291
2292 // If dirty setyp changed, start anew.
2293 // TODO: implement proper smooth rate change.
2294 if( sh->rd && ( (sh->rd->channels != channels)
2295 || (!(sh->rd->sflags & dirty_method) != !dirty)
2296 || (!(sh->rd->sflags & smooth_change) != !smooth) ) )
2297 {
2298 resample_free(sh->rd);
2299 sh->rd = NULL;
2300 }
2301
2302 int oversample;
2303 unsigned int decim_stages;
2304 if(rate_setup(inrate, outrate, &oversample, &decim_stages))
2305 return SYN123_BAD_FMT;
2306 if(oversample && decim_stages)
2307 return SYN123_WEIRD;
2308 long voutrate = outrate<<decim_stages;
2309 long vinrate = oversample ? inrate*2 : inrate;
2310 if(!sh->rd)
2311 {
2312 fresh = 1;
2313 sh->rd = malloc(sizeof(*(sh->rd)));
2314 if(!sh->rd)
2315 return SYN123_DOOM;
2316 memset(sh->rd, 0, sizeof(*(sh->rd)));
2317 rd = sh->rd;
2318 rd->inrate = rd->vinrate = rd->voutrate = rd->outrate = -1;
2319 rd->resample_func = NULL;
2320 rd->lowpass_func = dirty ? DIRTY_LOWPASS_PREEMP : LOWPASS_PREEMP;
2321 rd->lowpass_2x_func = dirty ? DIRTY_LOWPASS_PREEMP_2X : LOWPASS_PREEMP_2X;
2322 rd->decim = NULL;
2323 rd->decim_hist = NULL;
2324 rd->channels = channels;
2325 rd->stage_history = NULL;
2326 DE_DENORM_INIT(rd->dede, +1)
2327 rd->frame = NULL;
2328 #ifndef SYN123_NO_CASES
2329 if(channels > 2)
2330 #endif
2331 {
2332 rd->frame = malloc(sizeof(float)*channels);
2333 if(!rd->frame)
2334 {
2335 free(rd);
2336 return SYN123_DOOM;
2337 }
2338 }
2339 rd->ch = malloc(sizeof(struct channel_history)*channels);
2340 rd->prebuf = malloc(sizeof(float)*channels*BATCH);
2341 rd->upbuf = malloc(sizeof(float)*channels*2*BATCH);
2342 if(!rd->ch || !rd->prebuf || !rd->upbuf)
2343 {
2344 if(rd->upbuf)
2345 free(rd->upbuf);
2346 if(rd->prebuf)
2347 free(rd->prebuf);
2348 if(rd->ch)
2349 free(rd->ch);
2350 if(rd->stage_history)
2351 free(rd->stage_history);
2352 if(rd->frame)
2353 free(rd->frame);
2354 free(rd);
2355 return SYN123_DOOM;
2356 }
2357 memset(rd->ch, 0, sizeof(struct channel_history)*channels);
2358 }
2359 else
2360 rd = sh->rd;
2361
2362 mdebug("init in %ld out %ld", inrate, outrate);
2363 // To support on-the-fly rate changes, superfluous decimator stages are
2364 // forgotten, new ones added as needed.
2365 // If smoothing is desired and the stream already flowing, new stages will
2366 // get started on the previously utmost stage's history.
2367 if(fresh || decim_stages != rd->decim_stages)
2368 {
2369 if(smooth)
2370 {
2371 float *sth = safe_realloc( rd->stage_history
2372 , sizeof(float)*STAGE_HISTORY*channels*(decim_stages+1) );
2373 if(!sth)
2374 {
2375 err = SYN123_DOOM;
2376 goto setup_resample_cleanup;
2377 }
2378 rd->stage_history = sth;
2379 }
2380 struct decimator_state *nd = safe_realloc( rd->decim
2381 , sizeof(*rd->decim)*decim_stages );
2382 struct lpf4_hist *ndh = safe_realloc( rd->decim_hist
2383 , sizeof(*rd->decim_hist)*decim_stages*channels );
2384 if(nd)
2385 rd->decim = nd;
2386 if(ndh)
2387 rd->decim_hist = ndh;
2388 if(!nd || !ndh)
2389 {
2390 perror("cannot allocate decimator state");
2391 err = SYN123_DOOM;
2392 goto setup_resample_cleanup;
2393 }
2394 // Link up the common memory blocks after each realloc.
2395 for(unsigned int dc=0; dc<decim_stages; ++dc)
2396 {
2397 rd->decim[dc].ch = ndh+dc*channels;
2398 rd->decim[dc].out_hist = rd->stage_history
2399 ? rd->stage_history+(dc+1)*STAGE_HISTORY*channels
2400 : NULL;
2401 }
2402 // Smoothness does matter if the interpolator produced something
2403 // previously, so inter_flow is a good flag to test.
2404 int init_stages = rd->stage_history && rd->sflags & inter_flow;
2405 // The situation: Got stage rd->decim_stages (possibly zero).
2406 // with proper output history. Need to start up the additional
2407 // stages with the output of the preceeding stages. I'll pretend
2408 // each stage history being complete, even if I know that I am
2409 // halving the count in each step.
2410 for(unsigned int dc=rd->decim_stages; dc<decim_stages; ++dc)
2411 {
2412 rd->decim[dc].sflags = 0;
2413 if(init_stages)
2414 {
2415 // Take previous stage output dc, process through
2416 // the new stage and have it make up its output history.
2417 // As decimate() overwrites its input, work on a copy.
2418 memcpy( rd->prebuf
2419 , rd->stage_history+dc*STAGE_HISTORY*rd->channels
2420 , STAGE_HISTORY*rd->channels*sizeof(float) );
2421 decimate(rd, dc, rd->prebuf, STAGE_HISTORY);
2422 }
2423 }
2424 rd->decim_stages = decim_stages;
2425 }
2426
2427 if(rd->sflags & inter_flow)
2428 {
2429 // If the virtual input rate changes (esp. increases), the interpolation
2430 // offset needs adjustment to stay at/below predicted sample counts.
2431 // Could always re-initialize the interpolation, but want to keep things
2432 // smooth.
2433 long sign = rd->offset < 0 ? -1 : +1;
2434 // Zero on overflow, suits me just fine.
2435 uint64_t noff = muloffdiv64( (uint64_t)(sign*rd->offset)
2436 , (uint64_t)vinrate, 0, (uint64_t)rd->vinrate, NULL, NULL );
2437 // Magnitude must not be too large. Too small is OK.
2438 if(noff > LONG_MAX)
2439 noff = LONG_MAX;
2440 rd->offset = sign*noff;
2441 // Total paranoia. This is the crucial condition.
2442 if(rd->offset < -vinrate)
2443 rd->offset = -vinrate;
2444 }
2445 rd->inrate = inrate;
2446 rd->outrate = outrate;
2447 rd->vinrate = vinrate;
2448 rd->voutrate = voutrate;
2449 if(oversample)
2450 rd->sflags |= oversample_2x;
2451 else
2452 rd->sflags &= ~oversample_2x;
2453
2454 // TODO: channels!
2455 if(rd->sflags & oversample_2x)
2456 rd->resample_func = dirty ? resample_2x_dirty : resample_2x_fine;
2457 else if(rd->decim_stages)
2458 rd->resample_func = dirty ? resample_0x_dirty : resample_0x_fine;
2459 else
2460 rd->resample_func = dirty ? resample_1x_dirty : resample_1x_fine;
2461
2462 if(dirty)
2463 rd->sflags |= dirty_method;
2464 if(smooth)
2465 rd->sflags |= smooth_change;
2466
2467 mdebug( "%u times decimation by 2"
2468 ", virtual output rate %ld, %ldx oversampling (%i)"
2469 , rd->decim_stages, rd->voutrate, rd->vinrate / rd->inrate
2470 , rd->sflags && oversample_2x ? 1 : 0 );
2471
2472 // Round result to single precision, but compute with double to be able
2473 // to correctly represent 32 bit longs first.
2474 rd->lpf_cutoff = (double)(rd->inrate < rd->outrate ? rd->inrate : rd->voutrate)/rd->vinrate;
2475 mdebug( "lowpass cutoff: %.8f of virtual %.0f Hz = %.0f Hz"
2476 , rd->lpf_cutoff, 0.5*rd->vinrate, rd->lpf_cutoff*0.5*rd->vinrate );
2477 mdebug("rates final: %ld|%ld -> %ld|%ld"
2478 , rd->inrate, rd->vinrate, rd->voutrate, rd->outrate );
2479 preemp_init(rd);
2480 if(rd->sflags & lowpass_flow)
2481 {
2482 // First cheap stage of rate change smoothing:
2483 // Attempt to dampen spikes from the filter history magnitudes
2484 // not matching changed filter setup by simple rescaling.
2485 // This catches the massive spikes on adding decimation stages.
2486 float scale = lpf_history_scale(rd);
2487 lpf_init(rd);
2488 mdebug( "smooth old scale: %g, new scale: %g"
2489 , scale, lpf_history_scale(rd) );
2490 scale = scale > 1e-10 ? lpf_history_scale(rd) / scale : 1.;
2491 for(unsigned int c=0; c<rd->channels; ++c)
2492 for(unsigned int j=0; j<LPF_MAX_TIMES; ++j)
2493 for(unsigned int i=0; i<LPF_ORDER; ++i)
2494 rd->ch[c].lpf_w[j][i] *= scale;
2495 // Full smoothness with present stage history: Let the lowpass roll.
2496 // Preemp needs to be re-configured beforehand!
2497 // This is the icing on the cake with much more work,
2498 // relaxing the filters to get rid of the trouble.
2499 if(rd->stage_history)
2500 {
2501 // Feed the final stage output through the filters,
2502 // manipulate the interpolator history to match.
2503 float *in = rd->stage_history
2504 + rd->decim_stages*STAGE_HISTORY*rd->channels;
2505 // Pointer to the last 5 filtered samples
2506 float *last5 = NULL;
2507 if(rd->sflags & oversample_2x)
2508 {
2509 rd->lowpass_2x_func(rd,in, STAGE_HISTORY, rd->upbuf);
2510 last5 = rd->upbuf+(2*STAGE_HISTORY-5)*rd->channels;
2511 } else
2512 {
2513 memcpy(rd->prebuf, in, STAGE_HISTORY*sizeof(*in)*rd->channels);
2514 rd->lowpass_func(rd, rd->prebuf, STAGE_HISTORY);
2515 last5 = rd->prebuf+(STAGE_HISTORY-5)*rd->channels;
2516 }
2517 // The above wrote preemp and lowpass channel histories.
2518 // Now store the last samples for the interpolator history.
2519 for(unsigned int c=0; c<rd->channels; ++c)
2520 for(unsigned int i=0; i<5; ++i)
2521 rd->ch[c].x[i] = last5[c+i*rd->channels];
2522 }
2523 }
2524 else
2525 lpf_init(rd);
2526
2527 rd->input_limit = syn123_resample_maxincount(inrate, outrate);
2528 return 0;
2529 setup_resample_cleanup:
2530 resample_free(sh->rd);
2531 sh->rd = NULL;
2532 return err;
2533 }
2534
2535 size_t attribute_align_arg
syn123_resample(syn123_handle * sh,float * MPG123_RESTRICT dst,float * MPG123_RESTRICT src,size_t samples)2536 syn123_resample( syn123_handle *sh,
2537 float * MPG123_RESTRICT dst, float * MPG123_RESTRICT src, size_t samples )
2538 {
2539 if(!sh)
2540 return 0;
2541 struct resample_data *rd = sh->rd;
2542 if(!rd)
2543 return 0;
2544 // Input limit is zero if no resampler configured.
2545 if(!samples || samples > sh->rd->input_limit)
2546 return 0;
2547 mdebug( "calling actual resample function from %p to %p with %"SIZE_P" samples"
2548 , (void*)src, (void*)dst, (size_p)samples );
2549 return rd->resample_func(rd, src, samples, dst);
2550 }
2551