1 // -----------------------------------------------------------------------
2 //
3 // Copyright (C) 2009-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 //
19 // -----------------------------------------------------------------------
20
21 #include "retuner.h"
22 #include <math.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 using namespace LV2AT;
28
Retuner(int fsamp)29 Retuner::Retuner (int fsamp)
30 : _fsamp (fsamp)
31 , _refpitch (440.0f)
32 , _notebias (0.0f)
33 , _corrfilt (1.0f)
34 , _corrgain (1.0f)
35 , _corroffs (0.0f)
36 , _notemask (0xFFF)
37 , _fastmode (false)
38 , _lastfastmode (false)
39 {
40 int i, h;
41 float t, x, y;
42
43 if (_fsamp < 64000) {
44 // At 44.1 and 48 kHz resample to double rate.
45 _upsamp = true;
46 _ipsize = 4096;
47 _fftlen = 2048;
48 _frsize = 128;
49 _resampler.setup (1, 2, 1, 32); // 32 is medium quality.
50 // Prefeed some input samples to remove delay.
51 _resampler.inp_count = _resampler.filtlen () - 1;
52 _resampler.inp_data = 0;
53 _resampler.out_count = 0;
54 _resampler.out_data = 0;
55 _resampler.process ();
56 } else if (_fsamp < 128000) {
57 // 88.2 or 96 kHz.
58 _upsamp = false;
59 _ipsize = 4096;
60 _fftlen = 4096;
61 _frsize = 256;
62 } else {
63 // 192 kHz, double time domain buffers sizes.
64 _upsamp = false;
65 _ipsize = 8192;
66 _fftlen = 8192;
67 _frsize = 512;
68 }
69
70 // Accepted correlation peak range, corresponding to 60..1200 Hz.
71 _ifmin = _fsamp / 1200;
72 _ifmax = _fsamp / 60;
73
74 // Various buffers
75 _ipbuff = new float[_ipsize + 3]; // Resampled or filtered input
76 _xffunc = new float[_frsize]; // Crossfade function
77 _fftTwind = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Window function
78 _fftWcorr = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Autocorrelation of window
79 _fftTdata = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Time domain data for FFT
80 _fftFdata = (fftwf_complex*)fftwf_malloc ((_fftlen / 2 + 1) * sizeof (fftwf_complex));
81
82 // FFTW3 plans
83 _fwdplan = fftwf_plan_dft_r2c_1d (_fftlen, _fftTdata, _fftFdata, FFTW_ESTIMATE);
84 _invplan = fftwf_plan_dft_c2r_1d (_fftlen, _fftFdata, _fftTdata, FFTW_ESTIMATE);
85
86 // Clear input buffer.
87 memset (_ipbuff, 0, (_ipsize + 1) * sizeof (float));
88
89 // Create crossfade function, half of raised cosine.
90 for (i = 0; i < _frsize; i++) {
91 _xffunc[i] = 0.5 * (1 - cosf (M_PI * i / _frsize));
92 }
93
94 // Create window, raised cosine.
95 for (i = 0; i < _fftlen; i++) {
96 _fftTwind[i] = 0.5 * (1 - cosf (2 * M_PI * i / _fftlen));
97 }
98
99 // Compute window autocorrelation and normalise it.
100 fftwf_execute_dft_r2c (_fwdplan, _fftTwind, _fftFdata);
101 h = _fftlen / 2;
102 for (i = 0; i < h; i++) {
103 x = _fftFdata[i][0];
104 y = _fftFdata[i][1];
105 _fftFdata[i][0] = x * x + y * y;
106 _fftFdata[i][1] = 0;
107 }
108 _fftFdata[h][0] = 0;
109 _fftFdata[h][1] = 0;
110 fftwf_execute_dft_c2r (_invplan, _fftFdata, _fftWcorr);
111 t = _fftWcorr[0];
112 for (i = 0; i < _fftlen; i++) {
113 _fftWcorr[i] /= t;
114 }
115
116 // Initialise all counters and other state.
117 _notebits = 0;
118 _lastnote = -1;
119 _count = 0;
120 _cycle = _frsize;
121 _error = 0.0f;
122 _ratio = 1.0f;
123 _xfade = false;
124 _ipindex = 0;
125 _frindex = 0;
126 _frcount = 0;
127 _rindex1 = _ipsize / 2;
128 _rindex2 = 0;
129
130 for (int i = 0; i < 12; ++i) {
131 _notescale[i] = i;
132 }
133
134 if (_upsamp) {
135 _readahed = _ipsize / 2 - _frsize * 2;
136 } else {
137 _readahed = _ipsize / 2 - _frsize;
138 }
139 }
140
~Retuner(void)141 Retuner::~Retuner (void)
142 {
143 delete[] _ipbuff;
144 delete[] _xffunc;
145 fftwf_free (_fftTwind);
146 fftwf_free (_fftWcorr);
147 fftwf_free (_fftTdata);
148 fftwf_free (_fftFdata);
149 fftwf_destroy_plan (_fwdplan);
150 fftwf_destroy_plan (_invplan);
151 }
152
153 int
process(int nfram,float const * inp,float * out)154 Retuner::process (int nfram, float const* inp, float* out)
155 {
156 int i, ii, k, fi, ra, lra;
157 float ph, dp, r1, r2, dr, u1, u2, v;
158
159 // Pitch shifting is done by resampling the input at the
160 // required ratio, and eventually jumping forward or back
161 // by one or more pitch period(s). Processing is done in
162 // fragments of '_frsize' frames, and the decision to jump
163 // forward or back is taken at the start of each fragment.
164 // If a jump happens we crossfade over one fragment size.
165 // Every 4 fragments a new pitch estimate is made. Since
166 // _fftsize = 16 * _frsize, the estimation window moves
167 // by 1/4 of the FFT length.
168
169 fi = _frindex; // Write index in current fragment.
170 r1 = _rindex1; // Read index for current input frame.
171 r2 = _rindex2; // Second read index while crossfading.
172
173 // No assumptions are made about fragments being aligned
174 // with process() calls, so we may be in the middle of
175 // a fragment here.
176
177 // Fast mode allows reading samples directly from the input,
178 // before the pitch has been computed. This is useful in
179 // live situation.
180 ra = _fastmode ? _readahed : 0;
181
182 while (nfram) {
183
184 // Last read-ahead position for crossfading if Fast mode has changed
185 lra = _lastfastmode ? _readahed : 0;
186
187 // Don't go past the end of the current fragment.
188 k = _frsize - fi;
189 if (nfram < k)
190 k = nfram;
191 nfram -= k;
192
193 // At 44.1 and 48 kHz upsample by 2.
194 if (_upsamp) {
195 _resampler.inp_count = k;
196 _resampler.inp_data = inp;
197 _resampler.out_count = 2 * k;
198 _resampler.out_data = _ipbuff + _ipindex;
199 _resampler.process ();
200 _ipindex += 2 * k;
201 }
202 // At higher sample rates apply lowpass filter.
203 else {
204 // Not implemented yet, just copy.
205 memcpy (_ipbuff + _ipindex, inp, k * sizeof (float));
206 _ipindex += k;
207 }
208
209 // Extra samples for interpolation.
210 _ipbuff[_ipsize + 0] = _ipbuff[0];
211 _ipbuff[_ipsize + 1] = _ipbuff[1];
212 _ipbuff[_ipsize + 2] = _ipbuff[2];
213 inp += k;
214 if (_ipindex == _ipsize)
215 _ipindex = 0;
216
217 // Process available samples.
218 dr = _ratio;
219 if (_upsamp)
220 dr *= 2;
221 if (_xfade) {
222 // Interpolate and crossfade.
223 while (k--) {
224 i = (int)r1;
225 ii = i + lra;
226 if (ii >= _ipsize)
227 ii -= _ipsize;
228 u1 = cubic (_ipbuff + ii, r1 - i);
229
230 i = (int)r2;
231 ii = i + ra;
232 if (ii >= _ipsize)
233 ii -= _ipsize;
234 u2 = cubic (_ipbuff + ii, r2 - i);
235
236 v = _xffunc[fi++];
237 *out++ = (1 - v) * u1 + v * u2;
238 r1 += dr;
239 if (r1 >= _ipsize)
240 r1 -= _ipsize;
241 r2 += dr;
242 if (r2 >= _ipsize)
243 r2 -= _ipsize;
244 }
245 } else if (ra == 0 && lra == 0) {
246 // Interpolation only.
247 fi += k;
248 while (k--) {
249 i = (int)r1;
250 *out++ = cubic (_ipbuff + i, r1 - i);
251 r1 += dr;
252 if (r1 >= _ipsize)
253 r1 -= _ipsize;
254 }
255 } else if (ra != lra) {
256 // crossfade from/to fastmode
257 while (k--) {
258 i = (int)r1;
259 ii = i + lra;
260 if (ii >= _ipsize)
261 ii -= _ipsize;
262 u1 = cubic (_ipbuff + ii, r1 - i);
263
264 i = (int)r1;
265 ii = i + ra;
266 if (ii >= _ipsize)
267 ii -= _ipsize;
268 u2 = cubic (_ipbuff + ii, r1 - i);
269
270 v = _xffunc[fi++];
271 *out++ = (1 - v) * u1 + v * u2;
272 r1 += dr;
273 if (r1 >= _ipsize)
274 r1 -= _ipsize;
275 }
276 } else {
277 // Interpolation only.
278 fi += k;
279 while (k--) {
280 i = (int)r1;
281 ii = i + lra;
282 if (ii >= _ipsize)
283 ii -= _ipsize;
284
285 *out++ = cubic (_ipbuff + ii, r1 - i);
286 r1 += dr;
287 if (r1 >= _ipsize)
288 r1 -= _ipsize;
289 }
290 }
291
292
293 _lastfastmode = _fastmode;
294
295
296 // If at end of fragment check for jump.
297 if (fi == _frsize) {
298 fi = 0;
299 // Estimate the pitch every 4th fragment.
300 if (++_frcount == 4) {
301 _frcount = 0;
302 findcycle ();
303 if (_cycle) {
304 // If the pitch estimate succeeds, find the
305 // nearest note and required resampling ratio.
306 _count = 0;
307 finderror ();
308 } else if (++_count > 5) {
309 // If the pitch estimate fails, the current
310 // ratio is kept for 5 fragments. After that
311 // the signal is considered unvoiced and the
312 // pitch error is reset.
313 _count = 5;
314 _cycle = _frsize;
315 _error = 0;
316 } else if (_count == 2) {
317 // Bias is removed after two unvoiced fragments.
318 _lastnote = -1;
319 }
320
321 _ratio = powf (2.0f, _corroffs / 12.0f - _error * _corrgain);
322 }
323
324 // If the previous fragment was crossfading,
325 // the end of the new fragment that was faded
326 // in becomes the current read position.
327 if (_xfade)
328 r1 = r2;
329
330 // A jump must correspond to an integer number
331 // of pitch periods, and to avoid reading outside
332 // the circular input buffer limits it must be at
333 // least one fragment size.
334 dr = _cycle * (int)(ceilf (_frsize / _cycle));
335 dp = dr / _frsize;
336 ph = r1 - _ipindex;
337 if (ph < 0)
338 ph += _ipsize;
339 if (_upsamp) {
340 ph /= 2;
341 dr *= 2;
342 }
343 ph = ph / _frsize + 2 * _ratio - 10;
344 if (ph > 0.5f) {
345 // Jump back by 'dr' frames and crossfade.
346 _xfade = true;
347 r2 = r1 - dr;
348 if (r2 < 0)
349 r2 += _ipsize;
350 } else if (ph + dp < 0.5f) {
351 // Jump forward by 'dr' frames and crossfade.
352 _xfade = true;
353 r2 = r1 + dr;
354 if (r2 >= _ipsize)
355 r2 -= _ipsize;
356 } else
357 _xfade = false;
358 }
359 }
360
361 // Save local state.
362 _frindex = fi;
363 _rindex1 = r1;
364 _rindex2 = r2;
365
366 return 0;
367 }
368
369 void
findcycle(void)370 Retuner::findcycle (void)
371 {
372 int d, h, i, j, k;
373 float f, m, t, x, y, z;
374 #ifdef MOD
375 float rms = 0;
376 #endif
377
378 d = _upsamp ? 2 : 1;
379 h = _fftlen / 2;
380 j = _ipindex;
381 k = _ipsize - 1;
382 for (i = 0; i < _fftlen; i++) {
383 int jk = j & k;
384 _fftTdata[i] = _fftTwind[i] * _ipbuff[jk];
385 #ifdef MOD
386 rms += _ipbuff[jk] * _ipbuff[jk];
387 #endif
388 j += d;
389 }
390 #ifdef MOD // ignore noise-floor
391 if (rms < _fftlen * 0.000031623f) { // signal < -45dBFS/RMS
392 _cycle = 0;
393 return;
394 }
395 #endif
396 fftwf_execute_dft_r2c (_fwdplan, _fftTdata, _fftFdata);
397 f = _fsamp / (_fftlen * 3e3f);
398 for (i = 0; i < h; i++) {
399 x = _fftFdata[i][0];
400 y = _fftFdata[i][1];
401 m = i * f;
402 _fftFdata[i][0] = (x * x + y * y) / (1 + m * m);
403 _fftFdata[i][1] = 0;
404 }
405 _fftFdata[h][0] = 0;
406 _fftFdata[h][1] = 0;
407 fftwf_execute_dft_c2r (_invplan, _fftFdata, _fftTdata);
408 t = _fftTdata[0] + 0.1f;
409 for (i = 0; i < h; i++)
410 _fftTdata[i] /= (t * _fftWcorr[i]);
411 x = _fftTdata[0];
412 for (i = 4; i < _ifmax; i += 4) {
413 y = _fftTdata[i];
414 if (y > x)
415 break;
416 x = y;
417 }
418 i -= 4;
419 _cycle = 0;
420 if (i >= _ifmax)
421 return;
422 if (i < _ifmin)
423 i = _ifmin;
424 x = _fftTdata[--i];
425 y = _fftTdata[++i];
426 m = 0;
427 j = 0;
428 while (i <= _ifmax) {
429 t = y * _fftWcorr[i];
430 z = _fftTdata[++i];
431 if ((t > m) && (y >= x) && (y >= z) && (y > 0.8f)) {
432 j = i - 1;
433 m = t;
434 }
435 x = y;
436 y = z;
437 }
438 if (j) {
439 x = _fftTdata[j - 1];
440 y = _fftTdata[j];
441 z = _fftTdata[j + 1];
442 if (fabsf(z - 2 * y + x) > 2e-9f) {
443 _cycle = j + 0.5f * (x - z) / (z - 2 * y + x - 1e-9f);
444 }
445 }
446 }
447
448 void
finderror(void)449 Retuner::finderror (void)
450 {
451 int i, m, im;
452 float a, am, d, dm, f;
453
454 if (!_notemask) {
455 _error = 0;
456 _lastnote = -1;
457 return;
458 }
459
460 f = log2f (_fsamp / (_cycle * _refpitch));
461 dm = 0;
462 am = 1;
463 im = -1;
464 for (i = 0, m = 1; i < 12; i++, m <<= 1) {
465 if (_notemask & m) {
466 d = f - (_notescale[i] - 9) / 12.0f;
467 d -= floorf (d + 0.5f);
468 a = fabsf (d);
469 if (i == _lastnote)
470 a -= _notebias;
471 if (a < am) {
472 am = a;
473 dm = d;
474 im = i;
475 }
476 }
477 }
478
479 if (_lastnote == im) {
480 _error += _corrfilt * (dm - _error);
481 } else {
482 _error = dm;
483 _lastnote = im;
484 }
485
486 // For display only.
487 _notebits |= 1 << im;
488 }
489
490 float
cubic(float * v,float a)491 Retuner::cubic (float* v, float a)
492 {
493 float b, c;
494
495 b = 1 - a;
496 c = a * b;
497 return (1.0f + 1.5f * c) * (v[1] * b + v[2] * a) - 0.5f * c * (v[0] * b + v[1] + v[2] + v[3] * a);
498 }
499