1 /* scorealign.h 2 * 3 * RBD 4 */ 5 6 // turn on lots of debugging, comment this line out to disable 7 // #define SA_VERBOSE 1 8 9 #ifdef SA_VERBOSE 10 #define SA_V(stmt) stmt 11 #else 12 #define SA_V(stmt) 13 #endif 14 15 // a class to report (optionally) score alignment progress 16 class SAProgress /* not final */ { 17 public: SAProgress()18 SAProgress() { smoothing = false; } 19 // we need the frame period to convert seconds to work units 20 // call this before set_duration() set_frame_period(double seconds)21 virtual void set_frame_period(double seconds) { frame_period = seconds; }; 22 // index = 0 or 1 to tell which file (first or second) 23 // is_audio = true (audio) or false (midi) 24 // seconds = duration of audio or midi data set_duration(int index,bool audio_flag,double seconds)25 virtual void set_duration(int index, bool audio_flag, double seconds) { 26 durations[index] = seconds; 27 is_audio[index] = audio_flag; }; 28 // if fitting pwl path to path, set smoothing to true set_smoothing(bool s)29 virtual void set_smoothing(bool s) { smoothing = s; } 30 // which alignment phase are we working on? 31 // 0 = first file chroma, 1 = second file chroma, 2 = compute matrix, 32 // 3 = smoothing 33 // Note: set_phase(0) is REQUIRED and must be called only ONCE. 34 // This is when we calculate total work 35 // and initialize any local state needed to handle set_feature_progress() 36 // and set_matrix_progress(). set_phase(int i)37 virtual void set_phase(int i) { phase = i; }; 38 // how many seconds have we processed (in phase 1 or 2) 39 // return value is normally true; false is request to cancel set_feature_progress(float seconds)40 virtual bool set_feature_progress(float seconds) { return true; }; 41 // report that some matrix elements have been computed? 42 // return value is normally true; false is request to cancel set_matrix_progress(int cells)43 virtual bool set_matrix_progress(int cells) { return true; }; 44 // report iterations of line smoothing set_smoothing_progress(int i)45 virtual bool set_smoothing_progress(int i) { return true; }; 46 protected: 47 double frame_period; 48 int phase; 49 double durations[2]; 50 bool is_audio[2]; 51 bool smoothing; 52 }; 53 54 55 enum { 56 SA_SUCCESS = 0, 57 SA_TOOSHORT, 58 SA_CANCEL 59 }; 60 61 62 #define SA_DFT_FRAME_PERIOD 0.2 63 #define SA_DFT_FRAME_PERIOD_TEXT wxT("0.20 secs") 64 65 #define SA_DFT_WINDOW_SIZE 0.2 66 #define SA_DFT_WINDOW_SIZE_TEXT wxT("0.20 secs") 67 68 #define SA_DFT_FORCE_FINAL_ALIGNMENT true 69 #define SA_DFT_FORCE_FINAL_ALIGNMENT_STRING wxT("true") 70 71 #define SA_DFT_IGNORE_SILENCE true 72 #define SA_DFT_IGNORE_SILENCE_STRING wxT("true") 73 74 #define SA_DFT_SILENCE_THRESHOLD 0.1 75 #define SA_DFT_SILENCE_THRESHOLD_TEXT wxT("0.100") 76 77 #define SA_DFT_PRESMOOTH_TIME 0 78 #define SA_DFT_PRESMOOTH_TIME_TEXT wxT("(off)") 79 80 #define SA_DFT_LINE_TIME 0 81 #define SA_DFT_LINE_TIME_TEXT wxT("(off)") 82 83 #define SA_DFT_SMOOTH_TIME 1.75 84 #define SA_DFT_SMOOTH_TIME_TEXT wxT("1.75 secs") 85 86 87 class Scorealign { 88 public: 89 double frame_period; // time in seconds 90 double window_size; 91 double silence_threshold; 92 bool force_final_alignment; 93 bool ignore_silence; 94 double presmooth_time; 95 double line_time; 96 double smooth_time; // duration of smoothing window 97 int smooth; // number of points used to compute the smooth time map 98 99 Scorealign(); 100 ~Scorealign(); 101 102 SAProgress *progress; 103 bool verbose; 104 105 // chromagrams and lengths, path data 106 float *chrom_energy0; 107 int file0_frames; // number of frames in file0 108 float *chrom_energy1; 109 int file1_frames; //number of frames in file1 110 // pathx, pathy, and pathlen describe the shortest path through the 111 // matrix from first_x, first_y to last_x, last_y (from the first 112 // non-silent frame to the last non-silent frame). The length varies 113 // depending upon the amount of silence that is ignored and how many 114 // path steps are diagonal. 115 short *pathx; //for midi (when aligning midi and audio) 116 short *pathy; //for audio (when aligning midi and audio) 117 int pathlen; 118 // first_x, first_y, last_x, last_y are the starting and ending 119 // points of the path. (It's not 0, 0, file0_frames, file1_frames 120 // because silent frames may be trimmed from beginning and ending. 121 int first_x; 122 int first_y; 123 int last_x; 124 int last_y; 125 set_pathlen(int p)126 void set_pathlen(int p) { pathlen = p; } 127 // time_map is, for each sequence 0 frame, the time of the matching 128 // frame in sequence 1. If the path associates a frame of sequence 0 129 // with multiple frames in sequence 1, the sequence 1 frame times 130 // are averaged. The frames that are not mapped to sequence 1 are 131 // marked with a time of -9999 or NOT_MAPPED. 132 // These will be silent frames of sequence 0. 133 #define NOT_MAPPED -9999.0F 134 float *time_map; 135 // smooth_time_map is a smoothed version of time_map. It also has 136 // non-mapped frames marked with times of -9999 or NOT_MAPPED. 137 // Because of smoothing, frames in smooth_time_map may map to 138 // negative times in sequence 1. 139 // These negative times will not be as negative as -9999, but 140 // the recommended coding style is to compare for equality with 141 // NOT_MAPPED to test for that value. 142 float *smooth_time_map; 143 144 // chroma vectors are calculated from an integer number of samples 145 // that approximates the nominal frame_period. Actual frame period 146 // is calculated and stored here: 147 // time in seconds for midi (when aligning midi and audio) 148 double actual_frame_period_0; 149 // time in seconds for audio (when aligning midi and audio) 150 double actual_frame_period_1; 151 152 /* gen_chroma.cpp stuff: 153 generates the chroma energy for a given file 154 with a low cutoff and high cutoff. 155 The chroma energy is placed in the float** chrom_energy. 156 this 2D is an array of pointers. the pointers point to an array 157 of length 12, representing the 12 chroma bins 158 The function returns the number of frames 159 (i.e. the length of the 1st dimention of chrom_energy 160 */ 161 int gen_chroma_audio(Audio_reader &reader, int hcutoff, int lcutoff, 162 float **chrom_energy, double *actual_frame_period, 163 int id); 164 165 int gen_chroma_midi(Alg_seq &seq, float dur, int nnotes, 166 int hcutoff, int lcutoff, 167 float **chrom_energy, double *actual_frame_period, 168 int id); 169 170 /* comp_chroma.cpp stuff */ 171 /* GEN_DIST 172 * 173 * This function generates the Euclidean distance for points i 174 * and j in two chroma vectors for use with dynamic time warping of 175 * the chroma vectors. 176 */ 177 float gen_dist(int i, int j); 178 179 /* scorealign.cpp stuff: */ 180 float map_time(float t1); 181 int align_midi_to_audio(Alg_seq &seq, Audio_reader &reader); 182 int align_midi_to_midi(Alg_seq &seq0, Alg_seq &seq2); 183 int align_audio_to_audio(Audio_reader &reader1, Audio_reader &reader2); 184 int align_chromagrams(); 185 186 int path_count; // for debug log formatting 187 void path_step(int i, int j); 188 void path_reverse(); 189 int sec_to_pathy_index(float sec); 190 int compare_chroma(); 191 void linear_regression(int n, int width, float &a, float &b); 192 void compute_smooth_time_map(); 193 void presmooth(); 194 void compute_regression_lines(); 195 void midi_tempo_align(Alg_seq &seq); 196 }; 197 198 // #define DEBUG_LOG 1 199 #if DEBUG_LOG 200 extern FILE *dbf; 201 #endif 202 203 int find_midi_duration(Alg_seq &seq, float *dur); 204