1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 QM DSP Library
5
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2008-2009 Matthew Davies and QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16 #include "TempoTrackV2.h"
17
18 #include <cmath>
19 #include <cstdlib>
20 #include <iostream>
21
22 #include "maths/MathUtilities.h"
23
24 #define EPS 0.0000008 // just some arbitrary small number
25
TempoTrackV2(float rate,size_t increment)26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
27 m_rate(rate), m_increment(increment) { }
~TempoTrackV2()28 TempoTrackV2::~TempoTrackV2() { }
29
30 void
filter_df(d_vec_t & df)31 TempoTrackV2::filter_df(d_vec_t &df)
32 {
33 d_vec_t a(3);
34 d_vec_t b(3);
35 d_vec_t lp_df(df.size());
36
37 //equivalent in matlab to [b,a] = butter(2,0.4);
38 a[0] = 1.0000;
39 a[1] = -0.3695;
40 a[2] = 0.1958;
41 b[0] = 0.2066;
42 b[1] = 0.4131;
43 b[2] = 0.2066;
44
45 double inp1 = 0.;
46 double inp2 = 0.;
47 double out1 = 0.;
48 double out2 = 0.;
49
50
51 // forwards filtering
52 for (unsigned int i = 0;i < df.size();i++)
53 {
54 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
55 inp2 = inp1;
56 inp1 = df[i];
57 out2 = out1;
58 out1 = lp_df[i];
59 }
60
61 // copy forwards filtering to df...
62 // but, time-reversed, ready for backwards filtering
63 for (unsigned int i = 0;i < df.size();i++)
64 {
65 df[i] = lp_df[df.size()-i-1];
66 }
67
68 for (unsigned int i = 0;i < df.size();i++)
69 {
70 lp_df[i] = 0.;
71 }
72
73 inp1 = 0.; inp2 = 0.;
74 out1 = 0.; out2 = 0.;
75
76 // backwards filetering on time-reversed df
77 for (unsigned int i = 0;i < df.size();i++)
78 {
79 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
80 inp2 = inp1;
81 inp1 = df[i];
82 out2 = out1;
83 out1 = lp_df[i];
84 }
85
86 // write the re-reversed (i.e. forward) version back to df
87 for (unsigned int i = 0;i < df.size();i++)
88 {
89 df[i] = lp_df[df.size()-i-1];
90 }
91 }
92
93
94 // MEPD 28/11/12
95 // This function now allows for a user to specify an inputtempo (in BPM)
96 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
97 // with a gaussian which is centered around the input tempo
98 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
99 // as it was before
100 void
calculateBeatPeriod(const vector<double> & df,vector<double> & beat_period,vector<double> & tempi,double inputtempo,bool constraintempo)101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
102 vector<double> &beat_period,
103 vector<double> &tempi,
104 double inputtempo, bool constraintempo)
105 {
106 // to follow matlab.. split into 512 sample frames with a 128 hop size
107 // calculate the acf,
108 // then the rcf.. and then stick the rcfs as columns of a matrix
109 // then call viterbi decoding with weight vector and transition matrix
110 // and get best path
111
112 unsigned int wv_len = 128;
113
114 // MEPD 28/11/12
115 // the default value of inputtempo in the beat tracking plugin is 120
116 // so if the user specifies a different inputtempo, the rayparam will be updated
117 // accordingly.
118 // note: 60*44100/512 is a magic number
119 // this might (will?) break if a user specifies a different frame rate for the onset detection function
120 double rayparam = (60*44100/512)/inputtempo;
121
122 // these debug statements can be removed.
123 // std::cerr << "inputtempo" << inputtempo << std::endl;
124 // std::cerr << "rayparam" << rayparam << std::endl;
125 // std::cerr << "constraintempo" << constraintempo << std::endl;
126
127 // make rayleigh weighting curve
128 d_vec_t wv(wv_len);
129
130 // check whether or not to use rayleigh weighting (if constraintempo is false)
131 // or use gaussian weighting it (constraintempo is true)
132 if (constraintempo)
133 {
134 for (unsigned int i=0; i<wv.size(); i++)
135 {
136 // MEPD 28/11/12
137 // do a gaussian weighting instead of rayleigh
138 wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
139 }
140 }
141 else
142 {
143 for (unsigned int i=0; i<wv.size(); i++)
144 {
145 // MEPD 28/11/12
146 // standard rayleigh weighting over periodicities
147 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
148 }
149 }
150
151 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
152 unsigned int winlen = 512;
153 unsigned int step = 128;
154
155 // matrix to store output of comb filter bank, increment column of matrix at each frame
156 d_mat_t rcfmat;
157 int col_counter = -1;
158
159 // main loop for beat period calculation
160 for (unsigned int i=0; i+winlen<df.size(); i+=step)
161 {
162 // get dfframe
163 d_vec_t dfframe(winlen);
164 for (unsigned int k=0; k<winlen; k++)
165 {
166 dfframe[k] = df[i+k];
167 }
168 // get rcf vector for current frame
169 d_vec_t rcf(wv_len);
170 get_rcf(dfframe,wv,rcf);
171
172 rcfmat.push_back( d_vec_t() ); // adds a new column
173 col_counter++;
174 for (unsigned int j=0; j<rcf.size(); j++)
175 {
176 rcfmat[col_counter].push_back( rcf[j] );
177 }
178 }
179
180 // now call viterbi decoding function
181 viterbi_decode(rcfmat,wv,beat_period,tempi);
182 }
183
184
185 void
get_rcf(const d_vec_t & dfframe_in,const d_vec_t & wv,d_vec_t & rcf)186 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
187 {
188 // calculate autocorrelation function
189 // then rcf
190 // just hard code for now... don't really need separate functions to do this
191
192 // make acf
193
194 d_vec_t dfframe(dfframe_in);
195
196 MathUtilities::adaptiveThreshold(dfframe);
197
198 d_vec_t acf(dfframe.size());
199
200
201 for (unsigned int lag=0; lag<dfframe.size(); lag++)
202 {
203 double sum = 0.;
204 double tmp = 0.;
205
206 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
207 {
208 tmp = dfframe[n] * dfframe[n+lag];
209 sum += tmp;
210 }
211 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
212 }
213
214 // now apply comb filtering
215 int numelem = 4;
216
217 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
218 {
219 for (int a = 1;a <= numelem;a++) // number of comb elements
220 {
221 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
222 {
223 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
224 }
225 }
226 }
227
228 // apply adaptive threshold to rcf
229 MathUtilities::adaptiveThreshold(rcf);
230
231 double rcfsum =0.;
232 for (unsigned int i=0; i<rcf.size(); i++)
233 {
234 rcf[i] += EPS ;
235 rcfsum += rcf[i];
236 }
237
238 // normalise rcf to sum to unity
239 for (unsigned int i=0; i<rcf.size(); i++)
240 {
241 rcf[i] /= (rcfsum + EPS);
242 }
243 }
244
245 void
viterbi_decode(const d_mat_t & rcfmat,const d_vec_t & wv,d_vec_t & beat_period,d_vec_t & tempi)246 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
247 {
248 // following Kevin Murphy's Viterbi decoding to get best path of
249 // beat periods through rfcmat
250
251 // make transition matrix
252 d_mat_t tmat;
253 for (unsigned int i=0;i<wv.size();i++)
254 {
255 tmat.push_back ( d_vec_t() ); // adds a new column
256 for (unsigned int j=0; j<wv.size(); j++)
257 {
258 tmat[i].push_back(0.); // fill with zeros initially
259 }
260 }
261
262 // variance of Gaussians in transition matrix
263 // formed of Gaussians on diagonal - implies slow tempo change
264 double sigma = 8.;
265 // don't want really short beat periods, or really long ones
266 for (unsigned int i=20;i <wv.size()-20; i++)
267 {
268 for (unsigned int j=20; j<wv.size()-20; j++)
269 {
270 double mu = static_cast<double>(i);
271 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
272 }
273 }
274
275 // parameters for Viterbi decoding... this part is taken from
276 // Murphy's matlab
277
278 d_mat_t delta;
279 i_mat_t psi;
280 for (unsigned int i=0;i <rcfmat.size(); i++)
281 {
282 delta.push_back( d_vec_t());
283 psi.push_back( i_vec_t());
284 for (unsigned int j=0; j<rcfmat[i].size(); j++)
285 {
286 delta[i].push_back(0.); // fill with zeros initially
287 psi[i].push_back(0); // fill with zeros initially
288 }
289 }
290
291
292 unsigned int T = delta.size();
293
294 if (T < 2) return; // can't do anything at all meaningful
295
296 unsigned int Q = delta[0].size();
297
298 // initialize first column of delta
299 for (unsigned int j=0; j<Q; j++)
300 {
301 delta[0][j] = wv[j] * rcfmat[0][j];
302 psi[0][j] = 0;
303 }
304
305 double deltasum = 0.;
306 for (unsigned int i=0; i<Q; i++)
307 {
308 deltasum += delta[0][i];
309 }
310 for (unsigned int i=0; i<Q; i++)
311 {
312 delta[0][i] /= (deltasum + EPS);
313 }
314
315
316 for (unsigned int t=1; t<T; t++)
317 {
318 d_vec_t tmp_vec(Q);
319
320 for (unsigned int j=0; j<Q; j++)
321 {
322 for (unsigned int i=0; i<Q; i++)
323 {
324 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
325 }
326
327 delta[t][j] = get_max_val(tmp_vec);
328
329 psi[t][j] = get_max_ind(tmp_vec);
330
331 delta[t][j] *= rcfmat[t][j];
332 }
333
334 // normalise current delta column
335 double deltasum = 0.;
336 for (unsigned int i=0; i<Q; i++)
337 {
338 deltasum += delta[t][i];
339 }
340 for (unsigned int i=0; i<Q; i++)
341 {
342 delta[t][i] /= (deltasum + EPS);
343 }
344 }
345
346 i_vec_t bestpath(T);
347 d_vec_t tmp_vec(Q);
348 for (unsigned int i=0; i<Q; i++)
349 {
350 tmp_vec[i] = delta[T-1][i];
351 }
352
353 // find starting point - best beat period for "last" frame
354 bestpath[T-1] = get_max_ind(tmp_vec);
355
356 // backtrace through index of maximum values in psi
357 for (unsigned int t=T-2; t>0 ;t--)
358 {
359 bestpath[t] = psi[t+1][bestpath[t+1]];
360 }
361
362 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
363 bestpath[0] = psi[1][bestpath[1]];
364
365 unsigned int lastind = 0;
366 for (unsigned int i=0; i<T; i++)
367 {
368 unsigned int step = 128;
369 for (unsigned int j=0; j<step; j++)
370 {
371 lastind = i*step+j;
372 beat_period[lastind] = bestpath[i];
373 }
374 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
375 }
376
377 //fill in the last values...
378 for (unsigned int i=lastind; i<beat_period.size(); i++)
379 {
380 beat_period[i] = beat_period[lastind];
381 }
382
383 for (unsigned int i = 0; i < beat_period.size(); i++)
384 {
385 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
386 }
387 }
388
389 double
get_max_val(const d_vec_t & df)390 TempoTrackV2::get_max_val(const d_vec_t &df)
391 {
392 double maxval = 0.;
393 for (unsigned int i=0; i<df.size(); i++)
394 {
395 if (maxval < df[i])
396 {
397 maxval = df[i];
398 }
399 }
400
401 return maxval;
402 }
403
404 int
get_max_ind(const d_vec_t & df)405 TempoTrackV2::get_max_ind(const d_vec_t &df)
406 {
407 double maxval = 0.;
408 int ind = 0;
409 for (unsigned int i=0; i<df.size(); i++)
410 {
411 if (maxval < df[i])
412 {
413 maxval = df[i];
414 ind = i;
415 }
416 }
417
418 return ind;
419 }
420
421 void
normalise_vec(d_vec_t & df)422 TempoTrackV2::normalise_vec(d_vec_t &df)
423 {
424 double sum = 0.;
425 for (unsigned int i=0; i<df.size(); i++)
426 {
427 sum += df[i];
428 }
429
430 for (unsigned int i=0; i<df.size(); i++)
431 {
432 df[i]/= (sum + EPS);
433 }
434 }
435
436 // MEPD 28/11/12
437 // this function has been updated to allow the "alpha" and "tightness" parameters
438 // of the dynamic program to be set by the user
439 // the default value of alpha = 0.9 and tightness = 4
440 void
calculateBeats(const vector<double> & df,const vector<double> & beat_period,vector<double> & beats,double alpha,double tightness)441 TempoTrackV2::calculateBeats(const vector<double> &df,
442 const vector<double> &beat_period,
443 vector<double> &beats, double alpha, double tightness)
444 {
445 if (df.empty() || beat_period.empty()) return;
446
447 d_vec_t cumscore(df.size()); // store cumulative score
448 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
449 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
450
451 for (unsigned int i=0; i<df.size(); i++)
452 {
453 localscore[i] = df[i];
454 backlink[i] = -1;
455 }
456
457 //double tightness = 4.;
458 //double alpha = 0.9;
459 // MEPD 28/11/12
460 // debug statements that can be removed.
461 // std::cerr << "alpha" << alpha << std::endl;
462 // std::cerr << "tightness" << tightness << std::endl;
463
464 // main loop
465 for (unsigned int i=0; i<localscore.size(); i++)
466 {
467 int prange_min = -2*beat_period[i];
468 int prange_max = round(-0.5*beat_period[i]);
469
470 // transition range
471 d_vec_t txwt (prange_max - prange_min + 1);
472 d_vec_t scorecands (txwt.size());
473
474 for (unsigned int j=0;j<txwt.size();j++)
475 {
476 double mu = static_cast<double> (beat_period[i]);
477 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
478
479 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
480 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
481
482 int cscore_ind = i+prange_min+j;
483 if (cscore_ind >= 0)
484 {
485 scorecands[j] = txwt[j] * cumscore[cscore_ind];
486 }
487 }
488
489 // find max value and index of maximum value
490 double vv = get_max_val(scorecands);
491 int xx = get_max_ind(scorecands);
492
493 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
494 backlink[i] = i+prange_min+xx;
495
496 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
497 }
498
499 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
500 d_vec_t tmp_vec;
501 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
502 {
503 tmp_vec.push_back(cumscore[i]);
504 }
505
506 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
507
508 // can happen if no results obtained earlier (e.g. input too short)
509 if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
510
511 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
512 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
513 i_vec_t ibeats;
514 ibeats.push_back(startpoint);
515 // std::cerr << "startpoint = " << startpoint << std::endl;
516 while (backlink[ibeats.back()] > 0)
517 {
518 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
519 int b = ibeats.back();
520 if (backlink[b] == b) break; // shouldn't happen... haha
521 ibeats.push_back(backlink[b]);
522 }
523
524 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
525 for (unsigned int i=0; i<ibeats.size(); i++)
526 {
527 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
528 }
529 }
530
531
532