1 
2 #ifdef _WIN32
3     #include "malloc.h"
4 #endif
5 #include "stdlib.h" // for OSX compatibility, malloc.h -> stdlib.h
6 #include "stdio.h"
7 #include "assert.h"
8 #include "string.h"
9 #include "math.h"
10 #include <fstream>
11 #include "allegro.h"
12 #include "fft3/FFT3.h"
13 #include "audioreader.h"
14 #include "scorealign.h"
15 #include "gen_chroma.h"
16 #include "comp_chroma.h"
17 #include "mfmidi.h"
18 #include "sautils.h"
19 #ifdef SA_VERBOSE
20 #include <iostream> // cout
21 #endif
22 using namespace std;
23 
24 #ifdef min
25 #undef min
26 #endif
27 #define min(x,y) ((x)<(y)?(x):(y))
28 
29 #ifdef max
30 #undef max
31 #endif
32 #define max(x,y) ((x)>(y)?(x):(y))
33 
34 //if 1, causes printing internally
35 #define PRINT_BIN_ENERGY 1
36 
37 #define p1 0.0577622650466621
38 #define p2 2.1011784386926213
39 
40 // each row is one chroma vector,
41 // data is stored as an array of chroma vectors:
42 // vector 1, vector 2, ...
43 
hz_to_step(float hz)44 float hz_to_step(float hz)
45 {
46     return float((log(hz) - p2) / p1);
47 }
48 
49 /*				GEN_MAGNITUDE
50    given the real and imaginary portions of a complex FFT function, compute
51    the magnitude of the fft bin.
52 
53    NOTE: out should be length n
54 */
gen_Magnitude(float * inR,float * inI,int low,int hi,float * out)55 void gen_Magnitude(float* inR, float* inI, int low, int hi, float* out)
56 {
57     int i;
58 
59     for (i = low; i < hi; i++) {
60       float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
61       //printf("   %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
62       out[i]= magVal;
63 #ifdef SA_VERBOSE
64       if (i == 1000) fprintf(dbf, "gen_Magnitude: %d %g\n", i, magVal);
65 #endif
66     }
67 }
68 
69 
70 /*				PRINT_BINS
71     This function is intended for debugging purposes.
72     pass in an array representing the "mid point"
73     of each bin, and the number of bins.  The
74     function will print out:
75     i value
76     index falue
77     low range of the bin
78     middle of the bin
79     high range of the bin
80 */
print_Bins(float * bins,int numBins)81 void print_Bins(float* bins, int numBins){
82     printf("BINS: \n");
83     int i;
84     for (i=0; i<numBins; i++) {
85       int index = i % numBins;
86       int indexNext = (index + 1) % numBins;
87       int indexPrev = (index - 1) % numBins;
88 
89       float maxValue =(bins[index]+bins[indexNext])/2;
90       float minValue=(bins[index]+bins[indexPrev])/2;
91 
92       if(index == 1)
93         maxValue =bins[index]+(bins[index]-((bins[index]+bins[indexPrev])/2));
94       if(index == 2)
95         minValue =bins[index]-(((bins[index]+bins[indexNext])/2)-bins[index]);
96 
97       printf("%d (%d) %g||%g||%g\n",i,index,minValue,bins[i],maxValue);
98     }
99 }
100 
101 /*				MIN_BIN_NUM
102     Returns the index in the array of bins
103     of the "smallest" bin.  aka, the bin
104     whose midpoint is the smallest.
105 */
min_Bin_Num(float * bins,int numBins)106 int min_Bin_Num(float* bins, int numBins){
107 
108     int i;
109     int minIndex=0;
110     float minValue=bins[0];
111     for (i = 0; i < numBins; i++) {
112       if (minValue > bins[i]) {
113         minValue = bins[i];
114         minIndex = i;
115       }
116     }
117     return minIndex;
118 }
119 
120 
121 /*				GEN_HAMMING
122     given data from reading in a section of a sound file
123     applies the hamming function to each sample.
124     n specifies the length of in and out.
125 */
gen_Hamming(float * h,int n)126 void gen_Hamming(float* h, int n)
127 {
128     int k;
129     for (k = 0; k < n; k++) {
130       float cos_value = (float) cos(2.0 * M_PI * k * (1.0 / n));
131         h[k] = 0.54F + (-0.46F * cos_value);
132     }
133 }
134 
135 /*				NEXTPOWEROF2
136     given an int n, finds the next power of 2 larger than
137     or equal to n.
138 */
nextPowerOf2(int n)139 int nextPowerOf2(int n)
140 {
141     int result = 1;
142     while (result < n) result = (result << 1);
143     return result;
144 }
145 
146 
147 // normalize a chroma vector (from audio or midi) to have
148 // mean of 0 and std. dev. of 1
149 //
normalize(float * cv)150 static void normalize(float *cv)
151 {
152     float avg = 0;
153 
154     for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
155         avg += cv[i];
156     }
157     avg /= CHROMA_BIN_COUNT;
158 
159     /* Normalize this frame to avg. 0 */
160     for (int i = 0; i < CHROMA_BIN_COUNT; i++)
161         cv[i] -= avg;
162 
163     /* Calculate std. dev. for this frame */
164     float sum = 0;
165     for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
166         float x = cv[i];
167         sum += x * x;
168     }
169     float dev = sqrt(sum / CHROMA_BIN_COUNT);
170     if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
171 
172     /* Normalize this frame to std. dev. 1*/
173     for (int i = 0; i < CHROMA_BIN_COUNT; i++) cv[i] /= dev;
174 }
175 
176 
177 /* GEN_CHROMA_AUDIO -- compute chroma for an audio file
178  */
179 /*
180     generates the chroma energy for a given sequence
181     with a low cutoff and high cutoff.
182     The chroma energy is placed in the float *chrom_energy.
183     this 2D is an array of pointers.
184     The function returns the number of frames
185     (aka the length of the 1st dimention of chrom_energy)
186 */
gen_chroma_audio(Audio_reader & reader,int hcutoff,int lcutoff,float ** chrom_energy,double * actual_frame_period,int id)187 int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
188         int lcutoff, float **chrom_energy, double *actual_frame_period,
189         int id)
190 {
191     int i;
192     double sample_rate = reader.get_sample_rate();
193     float reg11[CHROMA_BIN_COUNT]; // temp storage1;
194     float reg12[CHROMA_BIN_COUNT]; // temp storage2;
195 
196     if (verbose) {
197         printf ("==============FILE %d====================\n", id);
198         reader.print_info();
199     }
200 #if DEBUG_LOG
201     fprintf(dbf, "******** BEGIN AUDIO CHROMA COMPUTATION *********\n");
202 #endif
203     // this seems like a poor way to set actual_frame_period_0 or _1 in
204     // the Scorealign object, but I'm not sure what would be better:
205     *actual_frame_period = float(reader.actual_frame_period);
206 
207     for (i = 0; i < CHROMA_BIN_COUNT; i++) {
208         reg11[i] = -999;
209       }
210     for (i = 0; i < CHROMA_BIN_COUNT; i++){
211         reg12[i] = 0;
212       }
213 
214    /*=============================================================*/
215 
216     // allocate some buffers for use in the loop
217     int full_data_size = nextPowerOf2(reader.samples_per_frame);
218     if (verbose) {
219         printf("   samples per frame is %ld \n", reader.samples_per_frame);
220         printf("   total chroma frames %ld\n", reader.frame_count);
221         // printf("   Window size  %g second \n", reader.window_size);
222         printf("   hopsize in samples %ld \n", reader.hop_samples);
223         printf("   fft size %d\n", full_data_size);
224     }
225 
226     float *full_data = ALLOC(float, full_data_size);
227     float *fft_dataR = ALLOC(float, full_data_size);
228     float *fft_dataI = ALLOC(float, full_data_size);
229     //set to zero
230     memset(full_data, 0, full_data_size * sizeof(float));
231     memset(fft_dataR, 0, full_data_size * sizeof(float));
232     memset(fft_dataI, 0, full_data_size * sizeof(float));
233     //check to see if memory has been allocated
234     assert(full_data != NULL);
235     assert(fft_dataR != NULL);
236     assert(fft_dataI != NULL);
237 
238     int *bin_map = ALLOC(int, full_data_size);
239 
240     //set up the chrom_energy array;
241     *chrom_energy = ALLOC(float, reader.frame_count * (CHROMA_BIN_COUNT + 1));
242     int cv_index = 0;
243 
244     // set up mapping from spectral bins to chroma bins
245     // ordinarily, we would add 0.5 to round to nearest bin, but we also
246     // want to subtract 0.5 because the bin has a width of +/- 0.5. These
247     // two cancel out, so we can just round down and get the right answer.
248     int num_bins_to_use = (int) (hcutoff * full_data_size / sample_rate);
249     // But then we want to add 1 because the loops will only go to
250     // high_bin - 1:
251     int high_bin = min(num_bins_to_use + 1, full_data_size);
252     //printf("center freq of high bin is %g\n", (high_bin - 1) * sample_rate /
253     //    full_data_size);
254     //printf("high freq of high bin is %g\n",
255     //     (high_bin - 1 + 0.5) * sample_rate / full_data_size);
256     // If we add 0.5, we'll round to nearest bin center frequency, but
257     // bin covers a frequency range that goes 0.5 bin width lower, so we
258     // add 1 before rounding.
259     int low_bin = (int) (lcutoff * full_data_size / sample_rate);
260     //printf("center freq of low bin is %g\n", low_bin * sample_rate /
261     //    full_data_size);
262     //printf("low freq of low bin is %g\n", (low_bin - 0.5) * sample_rate /
263     //    full_data_size);
264     //printf("frequency spacing of bins is %g\n",
265     //     sample_rate / full_data_size);
266     double freq = low_bin * sample_rate / full_data_size;
267     for (i = low_bin; i < high_bin; i++) {
268         float raw_bin = hz_to_step(float(freq));
269         int round_bin = (int) (raw_bin + 0.5F);
270         int mod_bin = round_bin % 12;
271         bin_map[i] = mod_bin;
272         freq += sample_rate / full_data_size;
273     }
274     // printf("BIN_COUNT is !!!!!!!!!!!!!   %d\n",CHROMA_BIN_COUNT);
275 
276     // create Hamming window data
277     float *hamming = ALLOC(float, reader.samples_per_frame);
278     gen_Hamming(hamming, reader.samples_per_frame);
279 
280     while (reader.read_window(full_data)) {
281         //fill out array with 0's till next power of 2
282 #ifdef SA_VERBOSE
283         fprintf(dbf, "samples_per_frame %d sample %g\n",
284                 reader.samples_per_frame, full_data[0]);
285 #endif
286         for (i = reader.samples_per_frame; i < full_data_size; i++)
287             full_data[i] = 0;
288 
289 #ifdef SA_VERBOSE
290         fprintf(dbf, "preFFT: full_data[1000] %g\n", full_data[1000]);
291 #endif
292 
293         // compute the RMS, then apply the Hamming window to the data
294         float rms = 0.0f;
295         for (i = 0; i < reader.samples_per_frame; i++) {
296             float x = full_data[i];
297             rms += x * x;
298             full_data[i] = x * hamming[i];
299         }
300         rms = sqrt(rms / reader.samples_per_frame);
301 
302 #ifdef SA_VERBOSE
303         fprintf(dbf, "preFFT: hammingData[1000] %g\n",
304                 full_data[1000]);
305 #endif
306         FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
307 
308         //given the fft, compute the energy of each point
309         gen_Magnitude(fft_dataR, fft_dataI, low_bin, high_bin, full_data);
310 
311         /*-------------------------------------
312           GENERATE BINS AND PUT
313           THE CORRECT ENERGY IN
314           EACH BIN, CORRESPONDING
315           TO THE CORRECT PITCH
316           -------------------------------------*/
317 
318         float binEnergy[CHROMA_BIN_COUNT];
319         int binCount[CHROMA_BIN_COUNT];
320 
321         for (i = 0; i < CHROMA_BIN_COUNT; i++) {
322             binCount[i] = 0;
323             binEnergy[i] = 0.0;
324         }
325 
326         for (i = low_bin; i < high_bin; i++) {
327             int mod_bin = bin_map[i];
328             binEnergy[mod_bin] += full_data[i];
329             binCount[mod_bin]++;
330         }
331 
332         /*-------------------------------------
333           END OF BIN GENERATION
334           -------------------------------------*/
335         /* THE FOLLOWING LOOKS LIKE SOME OLD CODE TO COMPUTE
336          * CHROMA FLUX, BUT IT IS NOT IN USE NOW
337 
338         if (PRINT_BIN_ENERGY) {
339             float mao1;
340             float sum=0.;
341 
342             for (i = 0; i < CHROMA_BIN_COUNT; i++) {
343                 reg12[i]=binEnergy[i] / binCount[i];
344             }
345 
346             if (reg11[0]==-999){
347                 printf("Chroma Flux \n\n");
348             } else {
349                 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
350                 }
351                 for (int k = 0; k < CHROMA_BIN_COUNT; k++) {
352                     float x = reg11[k];
353                     float y = reg12[k];
354                     float diff = x - y;
355                     sum += diff * diff;
356                 }
357                 mao1 = sqrt(sum);
358                 sequence++;
359                 sum = 0.;
360                 mao1 = 0.;
361             }
362             for (i = 0; i < CHROMA_BIN_COUNT; i++) {
363                 reg11[i]=reg12[i];
364             }
365             //fclose(Pointer);
366           }
367         */
368         //put chrom energy into the returned array
369 
370 #ifdef SA_VERBOSE
371         fprintf(dbf, "cv_index %d\n", cv_index);
372 #endif
373         assert(cv_index < reader.frame_count);
374         float *cv = AREF1(*chrom_energy, cv_index);
375         for (i = 0;  i < CHROMA_BIN_COUNT; i++) {
376             cv[i] = binEnergy[i] / binCount[i];
377         }
378         if (rms < silence_threshold) {
379             // "silence" flag
380             cv[CHROMA_BIN_COUNT] = 1.0f;
381         } else {
382             cv[CHROMA_BIN_COUNT] = 0.0f;
383             // normalize the non-silent frames
384             normalize(cv);
385         }
386 #if DEBUG_LOG
387         fprintf(dbf, "%d@%g) ", cv_index, cv_index * reader.actual_frame_period);
388         for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
389           fprintf(dbf, "%d:%g ", i, cv[i]);
390         }
391         fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
392 #endif
393         cv_index++;
394         if (progress && cv_index % 10 == 0 &&
395             !progress->set_feature_progress(
396                     float(cv_index * reader.actual_frame_period))) {
397             break;
398         }
399     } // end of while ((readcount = read_mono_floats...
400 
401     free(hamming);
402     free(fft_dataI);
403     free(fft_dataR);
404     free(full_data);
405     if (verbose)
406         printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
407     return cv_index;
408 }
409 
410 
411 class Event_list {
412 public:
413 	Alg_note_ptr note;
414 	Event_list *next;
415 
Event_list(Alg_event_ptr event_,Event_list * next_)416 	Event_list(Alg_event_ptr event_, Event_list *next_) {
417 		note = (Alg_note_ptr) event_;
418 		next = next_;
419 	}
420 
~Event_list()421 	~Event_list() {
422 	}
423 };
424 typedef Event_list *Event_list_ptr;
425 
426 
427 /* gen_chroma_midi -- generate chroma vectors for midi file */
428 /*
429     generates the chroma energy for a given sequence
430     with a low cutoff and high cutoff.
431     The chroma energy is placed in the float *chrom_energy.
432     this 2D is an array of pointers.
433     The function returns the number of frames
434     (aka the length of the 1st dimension of chrom_energy)
435  *
436  *
437   Notes: keep a list of notes that are sounding.
438   For each frame,
439     zero the vector
440     while next note starts before end of frame, insert note in list
441 	  for each note in list, compute weight and add to vector. Remove
442 	  if note ends before frame start time.
443   How many frames?
444  */
445 
gen_chroma_midi(Alg_seq & seq,float dur,int nnotes,int hcutoff,int lcutoff,float ** chrom_energy,double * actual_frame_period,int id)446 int Scorealign::gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
447                     int hcutoff, int lcutoff,
448                     float **chrom_energy, double *actual_frame_period,
449                     int id)
450 {
451     // silence_threshold is compared to the *average* of chroma bins.
452     // Rather than divide the sum by CHROMA_BIN_COUNT to compute the
453     // average, just compute the sum and compare to silence_threshold * 12
454     float threshold = (float) (silence_threshold * CHROMA_BIN_COUNT);
455 
456     if (verbose) {
457         printf ("==============FILE %d====================\n", id);
458         SA_V(seq.write(cout, true));
459     }
460 #if DEBUG_LOG
461     fprintf(dbf, "******** BEGIN MIDI CHROMA COMPUTATION *********\n");
462 #endif    /*=============================================================*/
463 
464     *actual_frame_period = frame_period; // since we don't quantize to samples
465 
466     /*=============================================================*/
467 
468     seq.convert_to_seconds();
469     ///* find duration */
470     //float dur = 0.0F;
471     //int nnotes = 0;
472     //nnotes = find_midi_duration(seq, &dur);
473 
474     /*================================================================*/
475 
476     int frame_count= (int)ceil(((float)dur/ frame_period + 1));
477 
478     /*================================================================*/
479 
480     if (verbose) {
481         printf("   note count = %d\n", nnotes);
482         printf("   duration in sec = %f\n", dur);
483         printf("   chroma frames %d\n", frame_count);
484     }
485 
486     //set up the chrom_energy array;
487     (*chrom_energy) = ALLOC(float, frame_count * (CHROMA_BIN_COUNT + 1));
488     Event_list_ptr list = NULL;
489     Alg_iterator iterator(&seq, false);
490     iterator.begin();
491     Alg_event_ptr event = iterator.next();
492     int cv_index;
493     for (cv_index = 0; cv_index < frame_count; cv_index++) {
494 
495         /*====================================================*/
496 
497         float frame_begin = (float) max(cv_index * frame_period -
498                                         window_size / 2.0, 0.0);
499         //chooses zero if negative
500 
501         float frame_end = (float) (cv_index * frame_period + window_size / 2.0);
502 	/*============================================================*/
503         float *cv = AREF1(*chrom_energy, cv_index);
504         /* zero the vector */
505         for (int i = 0; i < CHROMA_BIN_COUNT + 1; i++) cv[i] = 0;
506         /* add new notes that are in the frame */
507         while (event && event->time < frame_end) {
508             if (event->is_note()) {
509                 list = new Event_list(event, list);
510             }
511             event = iterator.next();
512         }
513         /* remove notes that are no longer sounding */
514         Event_list_ptr *ptr = &list;
515         while (*ptr) {
516             while ((*ptr) &&
517                    (*ptr)->note->time + (*ptr)->note->dur < frame_begin) {
518                 Event_list_ptr temp = *ptr;
519                 *ptr = (*ptr)->next;
520                 delete temp;
521             }
522             if (*ptr) ptr = &((*ptr)->next);
523         }
524         float sum = 0.0;
525         for (Event_list_ptr item = list; item; item = item->next) {
526             /* compute duration of overlap */
527             float overlap =
528                 min(frame_end, (float) (item->note->time + item->note->dur)) -
529                 max(frame_begin, (float) item->note->time);
530             float velocity = item->note->loud;
531             float weight = overlap * velocity;
532 #if DEBUG_LOG
533             fprintf(dbf, "%3d pitch %g starting %g key %ld overlap %g velocity %g\n",
534                     cv_index, item->note->pitch, item->note->time,
535                     item->note->get_identifier(), overlap, velocity);
536 #endif
537             cv[(int) item->note->pitch % 12] += weight;
538             sum += weight;
539         }
540 
541 
542         if (sum < threshold) {
543             cv[CHROMA_BIN_COUNT] = 1.0;
544         } else {
545             normalize(cv);
546         }
547 
548 
549 #if DEBUG_LOG
550         fprintf(dbf, "%d@%g) ", cv_index, frame_begin);
551         for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
552             fprintf(dbf, "%d:%g ", i, cv[i]);
553         }
554         fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
555 #endif
556         if (cv_index % 10 == 0 && progress &&
557             !progress->set_feature_progress(
558                     float(cv_index * *actual_frame_period))) {
559             break;
560         }
561     }
562     while (list) {
563         Event_list_ptr temp = list;
564         list = list->next;
565         delete temp;
566     }
567     iterator.end();
568     if (verbose)
569         printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
570     return frame_count;
571 }
572