1 // -----------------------------------------------------------------------
2 //
3 //  Copyright (C) 2009-2011 Fons Adriaensen <fons@linuxaudio.org>
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 2 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program; if not, write to the Free Software
17 //  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 //
19 // -----------------------------------------------------------------------
20 
21 #include "retuner.h"
22 #include <math.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 using namespace LV2AT;
28 
Retuner(int fsamp)29 Retuner::Retuner (int fsamp)
30     : _fsamp (fsamp)
31     , _refpitch (440.0f)
32     , _notebias (0.0f)
33     , _corrfilt (1.0f)
34     , _corrgain (1.0f)
35     , _corroffs (0.0f)
36     , _notemask (0xFFF)
37     , _fastmode (false)
38     , _lastfastmode (false)
39 {
40 	int   i, h;
41 	float t, x, y;
42 
43 	if (_fsamp < 64000) {
44 		// At 44.1 and 48 kHz resample to double rate.
45 		_upsamp = true;
46 		_ipsize = 4096;
47 		_fftlen = 2048;
48 		_frsize = 128;
49 		_resampler.setup (1, 2, 1, 32); // 32 is medium quality.
50 		// Prefeed some input samples to remove delay.
51 		_resampler.inp_count = _resampler.filtlen () - 1;
52 		_resampler.inp_data  = 0;
53 		_resampler.out_count = 0;
54 		_resampler.out_data  = 0;
55 		_resampler.process ();
56 	} else if (_fsamp < 128000) {
57 		// 88.2 or 96 kHz.
58 		_upsamp = false;
59 		_ipsize = 4096;
60 		_fftlen = 4096;
61 		_frsize = 256;
62 	} else {
63 		// 192 kHz, double time domain buffers sizes.
64 		_upsamp = false;
65 		_ipsize = 8192;
66 		_fftlen = 8192;
67 		_frsize = 512;
68 	}
69 
70 	// Accepted correlation peak range, corresponding to 60..1200 Hz.
71 	_ifmin = _fsamp / 1200;
72 	_ifmax = _fsamp / 60;
73 
74 	// Various buffers
75 	_ipbuff   = new float[_ipsize + 3];                          // Resampled or filtered input
76 	_xffunc   = new float[_frsize];                              // Crossfade function
77 	_fftTwind = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Window function
78 	_fftWcorr = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Autocorrelation of window
79 	_fftTdata = (float*)fftwf_malloc (_fftlen * sizeof (float)); // Time domain data for FFT
80 	_fftFdata = (fftwf_complex*)fftwf_malloc ((_fftlen / 2 + 1) * sizeof (fftwf_complex));
81 
82 	// FFTW3 plans
83 	_fwdplan = fftwf_plan_dft_r2c_1d (_fftlen, _fftTdata, _fftFdata, FFTW_ESTIMATE);
84 	_invplan = fftwf_plan_dft_c2r_1d (_fftlen, _fftFdata, _fftTdata, FFTW_ESTIMATE);
85 
86 	// Clear input buffer.
87 	memset (_ipbuff, 0, (_ipsize + 1) * sizeof (float));
88 
89 	// Create crossfade function, half of raised cosine.
90 	for (i = 0; i < _frsize; i++) {
91 		_xffunc[i] = 0.5 * (1 - cosf (M_PI * i / _frsize));
92 	}
93 
94 	// Create window, raised cosine.
95 	for (i = 0; i < _fftlen; i++) {
96 		_fftTwind[i] = 0.5 * (1 - cosf (2 * M_PI * i / _fftlen));
97 	}
98 
99 	// Compute window autocorrelation and normalise it.
100 	fftwf_execute_dft_r2c (_fwdplan, _fftTwind, _fftFdata);
101 	h = _fftlen / 2;
102 	for (i = 0; i < h; i++) {
103 		x               = _fftFdata[i][0];
104 		y               = _fftFdata[i][1];
105 		_fftFdata[i][0] = x * x + y * y;
106 		_fftFdata[i][1] = 0;
107 	}
108 	_fftFdata[h][0] = 0;
109 	_fftFdata[h][1] = 0;
110 	fftwf_execute_dft_c2r (_invplan, _fftFdata, _fftWcorr);
111 	t = _fftWcorr[0];
112 	for (i = 0; i < _fftlen; i++) {
113 		_fftWcorr[i] /= t;
114 	}
115 
116 	// Initialise all counters and other state.
117 	_notebits = 0;
118 	_lastnote = -1;
119 	_count    = 0;
120 	_cycle    = _frsize;
121 	_error    = 0.0f;
122 	_ratio    = 1.0f;
123 	_xfade    = false;
124 	_ipindex  = 0;
125 	_frindex  = 0;
126 	_frcount  = 0;
127 	_rindex1  = _ipsize / 2;
128 	_rindex2  = 0;
129 
130 	for (int i = 0; i < 12; ++i) {
131 		_notescale[i] = i;
132 	}
133 
134 	if (_upsamp) {
135 		_readahed = _ipsize / 2 - _frsize * 2;
136 	} else {
137 		_readahed = _ipsize / 2 - _frsize;
138 	}
139 }
140 
~Retuner(void)141 Retuner::~Retuner (void)
142 {
143 	delete[] _ipbuff;
144 	delete[] _xffunc;
145 	fftwf_free (_fftTwind);
146 	fftwf_free (_fftWcorr);
147 	fftwf_free (_fftTdata);
148 	fftwf_free (_fftFdata);
149 	fftwf_destroy_plan (_fwdplan);
150 	fftwf_destroy_plan (_invplan);
151 }
152 
153 int
process(int nfram,float const * inp,float * out)154 Retuner::process (int nfram, float const* inp, float* out)
155 {
156 	int   i, ii, k, fi, ra, lra;
157 	float ph, dp, r1, r2, dr, u1, u2, v;
158 
159 	// Pitch shifting is done by resampling the input at the
160 	// required ratio, and eventually jumping forward or back
161 	// by one or more pitch period(s). Processing is done in
162 	// fragments of '_frsize' frames, and the decision to jump
163 	// forward or back is taken at the start of each fragment.
164 	// If a jump happens we crossfade over one fragment size.
165 	// Every 4 fragments a new pitch estimate is made. Since
166 	// _fftsize = 16 * _frsize, the estimation window moves
167 	// by 1/4 of the FFT length.
168 
169 	fi = _frindex; // Write index in current fragment.
170 	r1 = _rindex1; // Read index for current input frame.
171 	r2 = _rindex2; // Second read index while crossfading.
172 
173 	// No assumptions are made about fragments being aligned
174 	// with process() calls, so we may be in the middle of
175 	// a fragment here.
176 
177 	// Fast mode allows reading samples directly from the input,
178 	// before the pitch has been computed. This is useful in
179 	// live situation.
180 	ra = _fastmode ? _readahed : 0;
181 
182 	while (nfram) {
183 
184 		// Last read-ahead position for crossfading if Fast mode has changed
185 		lra = _lastfastmode ? _readahed : 0;
186 
187 		// Don't go past the end of the current fragment.
188 		k = _frsize - fi;
189 		if (nfram < k)
190 			k = nfram;
191 		nfram -= k;
192 
193 		// At 44.1 and 48 kHz upsample by 2.
194 		if (_upsamp) {
195 			_resampler.inp_count = k;
196 			_resampler.inp_data  = inp;
197 			_resampler.out_count = 2 * k;
198 			_resampler.out_data  = _ipbuff + _ipindex;
199 			_resampler.process ();
200 			_ipindex += 2 * k;
201 		}
202 		// At higher sample rates apply lowpass filter.
203 		else {
204 			// Not implemented yet, just copy.
205 			memcpy (_ipbuff + _ipindex, inp, k * sizeof (float));
206 			_ipindex += k;
207 		}
208 
209 		// Extra samples for interpolation.
210 		_ipbuff[_ipsize + 0] = _ipbuff[0];
211 		_ipbuff[_ipsize + 1] = _ipbuff[1];
212 		_ipbuff[_ipsize + 2] = _ipbuff[2];
213 		inp += k;
214 		if (_ipindex == _ipsize)
215 			_ipindex = 0;
216 
217 		// Process available samples.
218 		dr = _ratio;
219 		if (_upsamp)
220 			dr *= 2;
221 		if (_xfade) {
222 			// Interpolate and crossfade.
223 			while (k--) {
224 				i  = (int)r1;
225 				ii = i + lra;
226 				if (ii >= _ipsize)
227 					ii -= _ipsize;
228 				u1 = cubic (_ipbuff + ii, r1 - i);
229 
230 				i  = (int)r2;
231 				ii = i + ra;
232 				if (ii >= _ipsize)
233 					ii -= _ipsize;
234 				u2 = cubic (_ipbuff + ii, r2 - i);
235 
236 				v      = _xffunc[fi++];
237 				*out++ = (1 - v) * u1 + v * u2;
238 				r1 += dr;
239 				if (r1 >= _ipsize)
240 					r1 -= _ipsize;
241 				r2 += dr;
242 				if (r2 >= _ipsize)
243 					r2 -= _ipsize;
244 			}
245 		} else if (ra == 0 && lra == 0) {
246 			// Interpolation only.
247 			fi += k;
248 			while (k--) {
249 				i      = (int)r1;
250 				*out++ = cubic (_ipbuff + i, r1 - i);
251 				r1 += dr;
252 				if (r1 >= _ipsize)
253 					r1 -= _ipsize;
254 			}
255 		} else if (ra != lra) {
256 			// crossfade from/to fastmode
257 			while (k--) {
258 				i  = (int)r1;
259 				ii = i + lra;
260 				if (ii >= _ipsize)
261 					ii -= _ipsize;
262 				u1 = cubic (_ipbuff + ii, r1 - i);
263 
264 				i  = (int)r1;
265 				ii = i + ra;
266 				if (ii >= _ipsize)
267 					ii -= _ipsize;
268 				u2 = cubic (_ipbuff + ii, r1 - i);
269 
270 				v      = _xffunc[fi++];
271 				*out++ = (1 - v) * u1 + v * u2;
272 				r1 += dr;
273 				if (r1 >= _ipsize)
274 					r1 -= _ipsize;
275 			}
276 		} else {
277 			// Interpolation only.
278 			fi += k;
279 			while (k--) {
280 				i  = (int)r1;
281 				ii = i + lra;
282 				if (ii >= _ipsize)
283 					ii -= _ipsize;
284 
285 				*out++ = cubic (_ipbuff + ii, r1 - i);
286 				r1 += dr;
287 				if (r1 >= _ipsize)
288 					r1 -= _ipsize;
289 			}
290 		}
291 
292 
293 		_lastfastmode = _fastmode;
294 
295 
296 		// If at end of fragment check for jump.
297 		if (fi == _frsize) {
298 			fi = 0;
299 			// Estimate the pitch every 4th fragment.
300 			if (++_frcount == 4) {
301 				_frcount = 0;
302 				findcycle ();
303 				if (_cycle) {
304 					// If the pitch estimate succeeds, find the
305 					// nearest note and required resampling ratio.
306 					_count = 0;
307 					finderror ();
308 				} else if (++_count > 5) {
309 					// If the pitch estimate fails, the current
310 					// ratio is kept for 5 fragments. After that
311 					// the signal is considered unvoiced and the
312 					// pitch error is reset.
313 					_count = 5;
314 					_cycle = _frsize;
315 					_error = 0;
316 				} else if (_count == 2) {
317 					// Bias is removed after two unvoiced fragments.
318 					_lastnote = -1;
319 				}
320 
321 				_ratio = powf (2.0f, _corroffs / 12.0f - _error * _corrgain);
322 			}
323 
324 			// If the previous fragment was crossfading,
325 			// the end of the new fragment that was faded
326 			// in becomes the current read position.
327 			if (_xfade)
328 				r1 = r2;
329 
330 			// A jump must correspond to an integer number
331 			// of pitch periods, and to avoid reading outside
332 			// the circular input buffer limits it must be at
333 			// least one fragment size.
334 			dr = _cycle * (int)(ceilf (_frsize / _cycle));
335 			dp = dr / _frsize;
336 			ph = r1 - _ipindex;
337 			if (ph < 0)
338 				ph += _ipsize;
339 			if (_upsamp) {
340 				ph /= 2;
341 				dr *= 2;
342 			}
343 			ph = ph / _frsize + 2 * _ratio - 10;
344 			if (ph > 0.5f) {
345 				// Jump back by 'dr' frames and crossfade.
346 				_xfade = true;
347 				r2     = r1 - dr;
348 				if (r2 < 0)
349 					r2 += _ipsize;
350 			} else if (ph + dp < 0.5f) {
351 				// Jump forward by 'dr' frames and crossfade.
352 				_xfade = true;
353 				r2     = r1 + dr;
354 				if (r2 >= _ipsize)
355 					r2 -= _ipsize;
356 			} else
357 				_xfade = false;
358 		}
359 	}
360 
361 	// Save local state.
362 	_frindex = fi;
363 	_rindex1 = r1;
364 	_rindex2 = r2;
365 
366 	return 0;
367 }
368 
369 void
findcycle(void)370 Retuner::findcycle (void)
371 {
372 	int   d, h, i, j, k;
373 	float f, m, t, x, y, z;
374 #ifdef MOD
375 	float rms = 0;
376 #endif
377 
378 	d = _upsamp ? 2 : 1;
379 	h = _fftlen / 2;
380 	j = _ipindex;
381 	k = _ipsize - 1;
382 	for (i = 0; i < _fftlen; i++) {
383 		int jk       = j & k;
384 		_fftTdata[i] = _fftTwind[i] * _ipbuff[jk];
385 #ifdef MOD
386 		rms += _ipbuff[jk] * _ipbuff[jk];
387 #endif
388 		j += d;
389 	}
390 #ifdef MOD                                  // ignore noise-floor
391 	if (rms < _fftlen * 0.000031623f) { // signal < -45dBFS/RMS
392 		_cycle = 0;
393 		return;
394 	}
395 #endif
396 	fftwf_execute_dft_r2c (_fwdplan, _fftTdata, _fftFdata);
397 	f = _fsamp / (_fftlen * 3e3f);
398 	for (i = 0; i < h; i++) {
399 		x               = _fftFdata[i][0];
400 		y               = _fftFdata[i][1];
401 		m               = i * f;
402 		_fftFdata[i][0] = (x * x + y * y) / (1 + m * m);
403 		_fftFdata[i][1] = 0;
404 	}
405 	_fftFdata[h][0] = 0;
406 	_fftFdata[h][1] = 0;
407 	fftwf_execute_dft_c2r (_invplan, _fftFdata, _fftTdata);
408 	t = _fftTdata[0] + 0.1f;
409 	for (i = 0; i < h; i++)
410 		_fftTdata[i] /= (t * _fftWcorr[i]);
411 	x = _fftTdata[0];
412 	for (i = 4; i < _ifmax; i += 4) {
413 		y = _fftTdata[i];
414 		if (y > x)
415 			break;
416 		x = y;
417 	}
418 	i -= 4;
419 	_cycle = 0;
420 	if (i >= _ifmax)
421 		return;
422 	if (i < _ifmin)
423 		i = _ifmin;
424 	x = _fftTdata[--i];
425 	y = _fftTdata[++i];
426 	m = 0;
427 	j = 0;
428 	while (i <= _ifmax) {
429 		t = y * _fftWcorr[i];
430 		z = _fftTdata[++i];
431 		if ((t > m) && (y >= x) && (y >= z) && (y > 0.8f)) {
432 			j = i - 1;
433 			m = t;
434 		}
435 		x = y;
436 		y = z;
437 	}
438 	if (j) {
439 		x      = _fftTdata[j - 1];
440 		y      = _fftTdata[j];
441 		z      = _fftTdata[j + 1];
442 		if (fabsf(z - 2 * y + x) > 2e-9f) {
443 			_cycle = j + 0.5f * (x - z) / (z - 2 * y + x - 1e-9f);
444 		}
445 	}
446 }
447 
448 void
finderror(void)449 Retuner::finderror (void)
450 {
451 	int   i, m, im;
452 	float a, am, d, dm, f;
453 
454 	if (!_notemask) {
455 		_error    = 0;
456 		_lastnote = -1;
457 		return;
458 	}
459 
460 	f  = log2f (_fsamp / (_cycle * _refpitch));
461 	dm = 0;
462 	am = 1;
463 	im = -1;
464 	for (i = 0, m = 1; i < 12; i++, m <<= 1) {
465 		if (_notemask & m) {
466 			d = f - (_notescale[i] - 9) / 12.0f;
467 			d -= floorf (d + 0.5f);
468 			a = fabsf (d);
469 			if (i == _lastnote)
470 				a -= _notebias;
471 			if (a < am) {
472 				am = a;
473 				dm = d;
474 				im = i;
475 			}
476 		}
477 	}
478 
479 	if (_lastnote == im) {
480 		_error += _corrfilt * (dm - _error);
481 	} else {
482 		_error    = dm;
483 		_lastnote = im;
484 	}
485 
486 	// For display only.
487 	_notebits |= 1 << im;
488 }
489 
490 float
cubic(float * v,float a)491 Retuner::cubic (float* v, float a)
492 {
493 	float b, c;
494 
495 	b = 1 - a;
496 	c = a * b;
497 	return (1.0f + 1.5f * c) * (v[1] * b + v[2] * a) - 0.5f * c * (v[0] * b + v[1] + v[2] + v[3] * a);
498 }
499