1
2 #ifdef _WIN32
3 #include "malloc.h"
4 #endif
5 #include "stdlib.h" // for OSX compatibility, malloc.h -> stdlib.h
6 #include "stdio.h"
7 #include "assert.h"
8 #include "string.h"
9 #include "math.h"
10 #include <fstream>
11 #include "allegro.h"
12 #include "fft3/FFT3.h"
13 #include "audioreader.h"
14 #include "scorealign.h"
15 #include "gen_chroma.h"
16 #include "comp_chroma.h"
17 #include "mfmidi.h"
18 #include "sautils.h"
19 #ifdef SA_VERBOSE
20 #include <iostream> // cout
21 #endif
22 using namespace std;
23
24 #ifdef min
25 #undef min
26 #endif
27 #define min(x,y) ((x)<(y)?(x):(y))
28
29 #ifdef max
30 #undef max
31 #endif
32 #define max(x,y) ((x)>(y)?(x):(y))
33
34 //if 1, causes printing internally
35 #define PRINT_BIN_ENERGY 1
36
37 #define p1 0.0577622650466621
38 #define p2 2.1011784386926213
39
40 // each row is one chroma vector,
41 // data is stored as an array of chroma vectors:
42 // vector 1, vector 2, ...
43
hz_to_step(float hz)44 float hz_to_step(float hz)
45 {
46 return float((log(hz) - p2) / p1);
47 }
48
49 /* GEN_MAGNITUDE
50 given the real and imaginary portions of a complex FFT function, compute
51 the magnitude of the fft bin.
52
53 NOTE: out should be length n
54 */
gen_Magnitude(float * inR,float * inI,int low,int hi,float * out)55 void gen_Magnitude(float* inR, float* inI, int low, int hi, float* out)
56 {
57 int i;
58
59 for (i = low; i < hi; i++) {
60 float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
61 //printf(" %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
62 out[i]= magVal;
63 #ifdef SA_VERBOSE
64 if (i == 1000) fprintf(dbf, "gen_Magnitude: %d %g\n", i, magVal);
65 #endif
66 }
67 }
68
69
70 /* PRINT_BINS
71 This function is intended for debugging purposes.
72 pass in an array representing the "mid point"
73 of each bin, and the number of bins. The
74 function will print out:
75 i value
76 index falue
77 low range of the bin
78 middle of the bin
79 high range of the bin
80 */
print_Bins(float * bins,int numBins)81 void print_Bins(float* bins, int numBins){
82 printf("BINS: \n");
83 int i;
84 for (i=0; i<numBins; i++) {
85 int index = i % numBins;
86 int indexNext = (index + 1) % numBins;
87 int indexPrev = (index - 1) % numBins;
88
89 float maxValue =(bins[index]+bins[indexNext])/2;
90 float minValue=(bins[index]+bins[indexPrev])/2;
91
92 if(index == 1)
93 maxValue =bins[index]+(bins[index]-((bins[index]+bins[indexPrev])/2));
94 if(index == 2)
95 minValue =bins[index]-(((bins[index]+bins[indexNext])/2)-bins[index]);
96
97 printf("%d (%d) %g||%g||%g\n",i,index,minValue,bins[i],maxValue);
98 }
99 }
100
101 /* MIN_BIN_NUM
102 Returns the index in the array of bins
103 of the "smallest" bin. aka, the bin
104 whose midpoint is the smallest.
105 */
min_Bin_Num(float * bins,int numBins)106 int min_Bin_Num(float* bins, int numBins){
107
108 int i;
109 int minIndex=0;
110 float minValue=bins[0];
111 for (i = 0; i < numBins; i++) {
112 if (minValue > bins[i]) {
113 minValue = bins[i];
114 minIndex = i;
115 }
116 }
117 return minIndex;
118 }
119
120
121 /* GEN_HAMMING
122 given data from reading in a section of a sound file
123 applies the hamming function to each sample.
124 n specifies the length of in and out.
125 */
gen_Hamming(float * h,int n)126 void gen_Hamming(float* h, int n)
127 {
128 int k;
129 for (k = 0; k < n; k++) {
130 float cos_value = (float) cos(2.0 * M_PI * k * (1.0 / n));
131 h[k] = 0.54F + (-0.46F * cos_value);
132 }
133 }
134
135 /* NEXTPOWEROF2
136 given an int n, finds the next power of 2 larger than
137 or equal to n.
138 */
nextPowerOf2(int n)139 int nextPowerOf2(int n)
140 {
141 int result = 1;
142 while (result < n) result = (result << 1);
143 return result;
144 }
145
146
147 // normalize a chroma vector (from audio or midi) to have
148 // mean of 0 and std. dev. of 1
149 //
normalize(float * cv)150 static void normalize(float *cv)
151 {
152 float avg = 0;
153
154 for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
155 avg += cv[i];
156 }
157 avg /= CHROMA_BIN_COUNT;
158
159 /* Normalize this frame to avg. 0 */
160 for (int i = 0; i < CHROMA_BIN_COUNT; i++)
161 cv[i] -= avg;
162
163 /* Calculate std. dev. for this frame */
164 float sum = 0;
165 for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
166 float x = cv[i];
167 sum += x * x;
168 }
169 float dev = sqrt(sum / CHROMA_BIN_COUNT);
170 if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
171
172 /* Normalize this frame to std. dev. 1*/
173 for (int i = 0; i < CHROMA_BIN_COUNT; i++) cv[i] /= dev;
174 }
175
176
177 /* GEN_CHROMA_AUDIO -- compute chroma for an audio file
178 */
179 /*
180 generates the chroma energy for a given sequence
181 with a low cutoff and high cutoff.
182 The chroma energy is placed in the float *chrom_energy.
183 this 2D is an array of pointers.
184 The function returns the number of frames
185 (aka the length of the 1st dimention of chrom_energy)
186 */
gen_chroma_audio(Audio_reader & reader,int hcutoff,int lcutoff,float ** chrom_energy,double * actual_frame_period,int id)187 int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
188 int lcutoff, float **chrom_energy, double *actual_frame_period,
189 int id)
190 {
191 int i;
192 double sample_rate = reader.get_sample_rate();
193 float reg11[CHROMA_BIN_COUNT]; // temp storage1;
194 float reg12[CHROMA_BIN_COUNT]; // temp storage2;
195
196 if (verbose) {
197 printf ("==============FILE %d====================\n", id);
198 reader.print_info();
199 }
200 #if DEBUG_LOG
201 fprintf(dbf, "******** BEGIN AUDIO CHROMA COMPUTATION *********\n");
202 #endif
203 // this seems like a poor way to set actual_frame_period_0 or _1 in
204 // the Scorealign object, but I'm not sure what would be better:
205 *actual_frame_period = float(reader.actual_frame_period);
206
207 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
208 reg11[i] = -999;
209 }
210 for (i = 0; i < CHROMA_BIN_COUNT; i++){
211 reg12[i] = 0;
212 }
213
214 /*=============================================================*/
215
216 // allocate some buffers for use in the loop
217 int full_data_size = nextPowerOf2(reader.samples_per_frame);
218 if (verbose) {
219 printf(" samples per frame is %ld \n", reader.samples_per_frame);
220 printf(" total chroma frames %ld\n", reader.frame_count);
221 // printf(" Window size %g second \n", reader.window_size);
222 printf(" hopsize in samples %ld \n", reader.hop_samples);
223 printf(" fft size %d\n", full_data_size);
224 }
225
226 float *full_data = ALLOC(float, full_data_size);
227 float *fft_dataR = ALLOC(float, full_data_size);
228 float *fft_dataI = ALLOC(float, full_data_size);
229 //set to zero
230 memset(full_data, 0, full_data_size * sizeof(float));
231 memset(fft_dataR, 0, full_data_size * sizeof(float));
232 memset(fft_dataI, 0, full_data_size * sizeof(float));
233 //check to see if memory has been allocated
234 assert(full_data != NULL);
235 assert(fft_dataR != NULL);
236 assert(fft_dataI != NULL);
237
238 int *bin_map = ALLOC(int, full_data_size);
239
240 //set up the chrom_energy array;
241 *chrom_energy = ALLOC(float, reader.frame_count * (CHROMA_BIN_COUNT + 1));
242 int cv_index = 0;
243
244 // set up mapping from spectral bins to chroma bins
245 // ordinarily, we would add 0.5 to round to nearest bin, but we also
246 // want to subtract 0.5 because the bin has a width of +/- 0.5. These
247 // two cancel out, so we can just round down and get the right answer.
248 int num_bins_to_use = (int) (hcutoff * full_data_size / sample_rate);
249 // But then we want to add 1 because the loops will only go to
250 // high_bin - 1:
251 int high_bin = min(num_bins_to_use + 1, full_data_size);
252 //printf("center freq of high bin is %g\n", (high_bin - 1) * sample_rate /
253 // full_data_size);
254 //printf("high freq of high bin is %g\n",
255 // (high_bin - 1 + 0.5) * sample_rate / full_data_size);
256 // If we add 0.5, we'll round to nearest bin center frequency, but
257 // bin covers a frequency range that goes 0.5 bin width lower, so we
258 // add 1 before rounding.
259 int low_bin = (int) (lcutoff * full_data_size / sample_rate);
260 //printf("center freq of low bin is %g\n", low_bin * sample_rate /
261 // full_data_size);
262 //printf("low freq of low bin is %g\n", (low_bin - 0.5) * sample_rate /
263 // full_data_size);
264 //printf("frequency spacing of bins is %g\n",
265 // sample_rate / full_data_size);
266 double freq = low_bin * sample_rate / full_data_size;
267 for (i = low_bin; i < high_bin; i++) {
268 float raw_bin = hz_to_step(float(freq));
269 int round_bin = (int) (raw_bin + 0.5F);
270 int mod_bin = round_bin % 12;
271 bin_map[i] = mod_bin;
272 freq += sample_rate / full_data_size;
273 }
274 // printf("BIN_COUNT is !!!!!!!!!!!!! %d\n",CHROMA_BIN_COUNT);
275
276 // create Hamming window data
277 float *hamming = ALLOC(float, reader.samples_per_frame);
278 gen_Hamming(hamming, reader.samples_per_frame);
279
280 while (reader.read_window(full_data)) {
281 //fill out array with 0's till next power of 2
282 #ifdef SA_VERBOSE
283 fprintf(dbf, "samples_per_frame %d sample %g\n",
284 reader.samples_per_frame, full_data[0]);
285 #endif
286 for (i = reader.samples_per_frame; i < full_data_size; i++)
287 full_data[i] = 0;
288
289 #ifdef SA_VERBOSE
290 fprintf(dbf, "preFFT: full_data[1000] %g\n", full_data[1000]);
291 #endif
292
293 // compute the RMS, then apply the Hamming window to the data
294 float rms = 0.0f;
295 for (i = 0; i < reader.samples_per_frame; i++) {
296 float x = full_data[i];
297 rms += x * x;
298 full_data[i] = x * hamming[i];
299 }
300 rms = sqrt(rms / reader.samples_per_frame);
301
302 #ifdef SA_VERBOSE
303 fprintf(dbf, "preFFT: hammingData[1000] %g\n",
304 full_data[1000]);
305 #endif
306 FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
307
308 //given the fft, compute the energy of each point
309 gen_Magnitude(fft_dataR, fft_dataI, low_bin, high_bin, full_data);
310
311 /*-------------------------------------
312 GENERATE BINS AND PUT
313 THE CORRECT ENERGY IN
314 EACH BIN, CORRESPONDING
315 TO THE CORRECT PITCH
316 -------------------------------------*/
317
318 float binEnergy[CHROMA_BIN_COUNT];
319 int binCount[CHROMA_BIN_COUNT];
320
321 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
322 binCount[i] = 0;
323 binEnergy[i] = 0.0;
324 }
325
326 for (i = low_bin; i < high_bin; i++) {
327 int mod_bin = bin_map[i];
328 binEnergy[mod_bin] += full_data[i];
329 binCount[mod_bin]++;
330 }
331
332 /*-------------------------------------
333 END OF BIN GENERATION
334 -------------------------------------*/
335 /* THE FOLLOWING LOOKS LIKE SOME OLD CODE TO COMPUTE
336 * CHROMA FLUX, BUT IT IS NOT IN USE NOW
337
338 if (PRINT_BIN_ENERGY) {
339 float mao1;
340 float sum=0.;
341
342 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
343 reg12[i]=binEnergy[i] / binCount[i];
344 }
345
346 if (reg11[0]==-999){
347 printf("Chroma Flux \n\n");
348 } else {
349 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
350 }
351 for (int k = 0; k < CHROMA_BIN_COUNT; k++) {
352 float x = reg11[k];
353 float y = reg12[k];
354 float diff = x - y;
355 sum += diff * diff;
356 }
357 mao1 = sqrt(sum);
358 sequence++;
359 sum = 0.;
360 mao1 = 0.;
361 }
362 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
363 reg11[i]=reg12[i];
364 }
365 //fclose(Pointer);
366 }
367 */
368 //put chrom energy into the returned array
369
370 #ifdef SA_VERBOSE
371 fprintf(dbf, "cv_index %d\n", cv_index);
372 #endif
373 assert(cv_index < reader.frame_count);
374 float *cv = AREF1(*chrom_energy, cv_index);
375 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
376 cv[i] = binEnergy[i] / binCount[i];
377 }
378 if (rms < silence_threshold) {
379 // "silence" flag
380 cv[CHROMA_BIN_COUNT] = 1.0f;
381 } else {
382 cv[CHROMA_BIN_COUNT] = 0.0f;
383 // normalize the non-silent frames
384 normalize(cv);
385 }
386 #if DEBUG_LOG
387 fprintf(dbf, "%d@%g) ", cv_index, cv_index * reader.actual_frame_period);
388 for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
389 fprintf(dbf, "%d:%g ", i, cv[i]);
390 }
391 fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
392 #endif
393 cv_index++;
394 if (progress && cv_index % 10 == 0 &&
395 !progress->set_feature_progress(
396 float(cv_index * reader.actual_frame_period))) {
397 break;
398 }
399 } // end of while ((readcount = read_mono_floats...
400
401 free(hamming);
402 free(fft_dataI);
403 free(fft_dataR);
404 free(full_data);
405 if (verbose)
406 printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
407 return cv_index;
408 }
409
410
411 class Event_list {
412 public:
413 Alg_note_ptr note;
414 Event_list *next;
415
Event_list(Alg_event_ptr event_,Event_list * next_)416 Event_list(Alg_event_ptr event_, Event_list *next_) {
417 note = (Alg_note_ptr) event_;
418 next = next_;
419 }
420
~Event_list()421 ~Event_list() {
422 }
423 };
424 typedef Event_list *Event_list_ptr;
425
426
427 /* gen_chroma_midi -- generate chroma vectors for midi file */
428 /*
429 generates the chroma energy for a given sequence
430 with a low cutoff and high cutoff.
431 The chroma energy is placed in the float *chrom_energy.
432 this 2D is an array of pointers.
433 The function returns the number of frames
434 (aka the length of the 1st dimension of chrom_energy)
435 *
436 *
437 Notes: keep a list of notes that are sounding.
438 For each frame,
439 zero the vector
440 while next note starts before end of frame, insert note in list
441 for each note in list, compute weight and add to vector. Remove
442 if note ends before frame start time.
443 How many frames?
444 */
445
gen_chroma_midi(Alg_seq & seq,float dur,int nnotes,int hcutoff,int lcutoff,float ** chrom_energy,double * actual_frame_period,int id)446 int Scorealign::gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
447 int hcutoff, int lcutoff,
448 float **chrom_energy, double *actual_frame_period,
449 int id)
450 {
451 // silence_threshold is compared to the *average* of chroma bins.
452 // Rather than divide the sum by CHROMA_BIN_COUNT to compute the
453 // average, just compute the sum and compare to silence_threshold * 12
454 float threshold = (float) (silence_threshold * CHROMA_BIN_COUNT);
455
456 if (verbose) {
457 printf ("==============FILE %d====================\n", id);
458 SA_V(seq.write(cout, true));
459 }
460 #if DEBUG_LOG
461 fprintf(dbf, "******** BEGIN MIDI CHROMA COMPUTATION *********\n");
462 #endif /*=============================================================*/
463
464 *actual_frame_period = frame_period; // since we don't quantize to samples
465
466 /*=============================================================*/
467
468 seq.convert_to_seconds();
469 ///* find duration */
470 //float dur = 0.0F;
471 //int nnotes = 0;
472 //nnotes = find_midi_duration(seq, &dur);
473
474 /*================================================================*/
475
476 int frame_count= (int)ceil(((float)dur/ frame_period + 1));
477
478 /*================================================================*/
479
480 if (verbose) {
481 printf(" note count = %d\n", nnotes);
482 printf(" duration in sec = %f\n", dur);
483 printf(" chroma frames %d\n", frame_count);
484 }
485
486 //set up the chrom_energy array;
487 (*chrom_energy) = ALLOC(float, frame_count * (CHROMA_BIN_COUNT + 1));
488 Event_list_ptr list = NULL;
489 Alg_iterator iterator(&seq, false);
490 iterator.begin();
491 Alg_event_ptr event = iterator.next();
492 int cv_index;
493 for (cv_index = 0; cv_index < frame_count; cv_index++) {
494
495 /*====================================================*/
496
497 float frame_begin = (float) max(cv_index * frame_period -
498 window_size / 2.0, 0.0);
499 //chooses zero if negative
500
501 float frame_end = (float) (cv_index * frame_period + window_size / 2.0);
502 /*============================================================*/
503 float *cv = AREF1(*chrom_energy, cv_index);
504 /* zero the vector */
505 for (int i = 0; i < CHROMA_BIN_COUNT + 1; i++) cv[i] = 0;
506 /* add new notes that are in the frame */
507 while (event && event->time < frame_end) {
508 if (event->is_note()) {
509 list = new Event_list(event, list);
510 }
511 event = iterator.next();
512 }
513 /* remove notes that are no longer sounding */
514 Event_list_ptr *ptr = &list;
515 while (*ptr) {
516 while ((*ptr) &&
517 (*ptr)->note->time + (*ptr)->note->dur < frame_begin) {
518 Event_list_ptr temp = *ptr;
519 *ptr = (*ptr)->next;
520 delete temp;
521 }
522 if (*ptr) ptr = &((*ptr)->next);
523 }
524 float sum = 0.0;
525 for (Event_list_ptr item = list; item; item = item->next) {
526 /* compute duration of overlap */
527 float overlap =
528 min(frame_end, (float) (item->note->time + item->note->dur)) -
529 max(frame_begin, (float) item->note->time);
530 float velocity = item->note->loud;
531 float weight = overlap * velocity;
532 #if DEBUG_LOG
533 fprintf(dbf, "%3d pitch %g starting %g key %ld overlap %g velocity %g\n",
534 cv_index, item->note->pitch, item->note->time,
535 item->note->get_identifier(), overlap, velocity);
536 #endif
537 cv[(int) item->note->pitch % 12] += weight;
538 sum += weight;
539 }
540
541
542 if (sum < threshold) {
543 cv[CHROMA_BIN_COUNT] = 1.0;
544 } else {
545 normalize(cv);
546 }
547
548
549 #if DEBUG_LOG
550 fprintf(dbf, "%d@%g) ", cv_index, frame_begin);
551 for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
552 fprintf(dbf, "%d:%g ", i, cv[i]);
553 }
554 fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
555 #endif
556 if (cv_index % 10 == 0 && progress &&
557 !progress->set_feature_progress(
558 float(cv_index * *actual_frame_period))) {
559 break;
560 }
561 }
562 while (list) {
563 Event_list_ptr temp = list;
564 list = list->next;
565 delete temp;
566 }
567 iterator.end();
568 if (verbose)
569 printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
570 return frame_count;
571 }
572