1 /* scorealign.h
2  *
3  * RBD
4  */
5 
6 // turn on lots of debugging, comment this line out to disable
7 // #define SA_VERBOSE 1
8 
9 #ifdef SA_VERBOSE
10 #define SA_V(stmt) stmt
11 #else
12 #define SA_V(stmt)
13 #endif
14 
15 // a class to report (optionally) score alignment progress
16 class SAProgress /* not final */ {
17   public:
SAProgress()18     SAProgress() { smoothing = false; }
19     // we need the frame period to convert seconds to work units
20     // call this before set_duration()
set_frame_period(double seconds)21     virtual void set_frame_period(double seconds) { frame_period = seconds; };
22     // index = 0 or 1 to tell which file (first or second)
23     // is_audio = true (audio) or false (midi)
24     // seconds = duration of audio or midi data
set_duration(int index,bool audio_flag,double seconds)25     virtual void set_duration(int index, bool audio_flag, double seconds) {
26         durations[index] = seconds;
27         is_audio[index] = audio_flag; };
28     // if fitting pwl path to path, set smoothing to true
set_smoothing(bool s)29     virtual void set_smoothing(bool s) { smoothing = s; }
30     // which alignment phase are we working on?
31     // 0 = first file chroma, 1 = second file chroma, 2 = compute matrix,
32     // 3 = smoothing
33     // Note: set_phase(0) is REQUIRED and must be called only ONCE.
34     // This is when we calculate total work
35     // and initialize any local state needed to handle set_feature_progress()
36     // and set_matrix_progress().
set_phase(int i)37     virtual void set_phase(int i) { phase = i; };
38     // how many seconds have we processed (in phase 1 or 2)
39     // return value is normally true; false is request to cancel
set_feature_progress(float seconds)40     virtual bool set_feature_progress(float seconds) { return true; };
41     // report that some matrix elements have been computed?
42     // return value is normally true; false is request to cancel
set_matrix_progress(int cells)43     virtual bool set_matrix_progress(int cells) { return true; };
44     // report iterations of line smoothing
set_smoothing_progress(int i)45     virtual bool set_smoothing_progress(int i) { return true; };
46   protected:
47     double frame_period;
48     int phase;
49     double durations[2];
50     bool is_audio[2];
51     bool smoothing;
52 };
53 
54 
55 enum {
56   SA_SUCCESS = 0,
57   SA_TOOSHORT,
58   SA_CANCEL
59 };
60 
61 
62 #define SA_DFT_FRAME_PERIOD 0.2
63 #define SA_DFT_FRAME_PERIOD_TEXT wxT("0.20 secs")
64 
65 #define SA_DFT_WINDOW_SIZE 0.2
66 #define SA_DFT_WINDOW_SIZE_TEXT wxT("0.20 secs")
67 
68 #define SA_DFT_FORCE_FINAL_ALIGNMENT true
69 #define SA_DFT_FORCE_FINAL_ALIGNMENT_STRING wxT("true")
70 
71 #define SA_DFT_IGNORE_SILENCE true
72 #define SA_DFT_IGNORE_SILENCE_STRING wxT("true")
73 
74 #define SA_DFT_SILENCE_THRESHOLD 0.1
75 #define SA_DFT_SILENCE_THRESHOLD_TEXT wxT("0.100")
76 
77 #define SA_DFT_PRESMOOTH_TIME 0
78 #define SA_DFT_PRESMOOTH_TIME_TEXT wxT("(off)")
79 
80 #define SA_DFT_LINE_TIME 0
81 #define SA_DFT_LINE_TIME_TEXT wxT("(off)")
82 
83 #define SA_DFT_SMOOTH_TIME 1.75
84 #define SA_DFT_SMOOTH_TIME_TEXT wxT("1.75 secs")
85 
86 
87 class Scorealign {
88  public:
89     double frame_period; // time in seconds
90     double window_size;
91     double silence_threshold;
92     bool force_final_alignment;
93     bool ignore_silence;
94     double presmooth_time;
95     double line_time;
96     double smooth_time; // duration of smoothing window
97     int smooth; // number of points used to compute the smooth time map
98 
99     Scorealign();
100     ~Scorealign();
101 
102     SAProgress *progress;
103     bool verbose;
104 
105     // chromagrams and lengths, path data
106     float *chrom_energy0;
107     int file0_frames; // number of frames in file0
108     float *chrom_energy1;
109     int file1_frames; //number of frames in file1
110     // pathx, pathy, and pathlen describe the shortest path through the
111     // matrix from first_x, first_y to last_x, last_y (from the first
112     // non-silent frame to the last non-silent frame). The length varies
113     // depending upon the amount of silence that is ignored and how many
114     // path steps are diagonal.
115     short *pathx;  //for midi (when aligning midi and audio)
116     short *pathy; //for audio (when aligning midi and audio)
117     int pathlen;
118     // first_x, first_y, last_x, last_y are the starting and ending
119     // points of the path. (It's not 0, 0, file0_frames, file1_frames
120     // because silent frames may be trimmed from beginning and ending.
121     int first_x;
122     int first_y;
123     int last_x;
124     int last_y;
125 
set_pathlen(int p)126     void set_pathlen(int p) { pathlen = p; }
127     // time_map is, for each sequence 0 frame, the time of the matching
128     // frame in sequence 1. If the path associates a frame of sequence 0
129     // with multiple frames in sequence 1, the sequence 1 frame times
130     // are averaged. The frames that are not mapped to sequence 1 are
131     // marked with a time of -9999 or NOT_MAPPED.
132     // These will be silent frames of sequence 0.
133 #define NOT_MAPPED -9999.0F
134     float *time_map;
135     // smooth_time_map is a smoothed version of time_map. It also has
136     // non-mapped frames marked with times of -9999 or NOT_MAPPED.
137     // Because of smoothing, frames in smooth_time_map may map to
138     // negative times in sequence 1.
139     // These negative times will not be as negative as -9999, but
140     // the recommended coding style is to compare for equality with
141     // NOT_MAPPED to test for that value.
142     float *smooth_time_map;
143 
144     // chroma vectors are calculated from an integer number of samples
145     // that approximates the nominal frame_period. Actual frame period
146     // is calculated and stored here:
147     // time in seconds for midi (when aligning midi and audio)
148     double actual_frame_period_0;
149     // time in seconds for audio (when aligning midi and audio)
150     double actual_frame_period_1;
151 
152     /* gen_chroma.cpp stuff:
153        generates the chroma energy for a given file
154        with a low cutoff and high cutoff.
155        The chroma energy is placed in the float** chrom_energy.
156        this 2D is an array of pointers.  the pointers point to an array
157        of length 12, representing the 12 chroma bins
158        The function returns the number of frames
159        (i.e. the length of the 1st dimention of chrom_energy
160     */
161     int gen_chroma_audio(Audio_reader &reader, int hcutoff, int lcutoff,
162                          float **chrom_energy, double *actual_frame_period,
163                          int id);
164 
165     int gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
166                         int hcutoff, int lcutoff,
167                         float **chrom_energy, double *actual_frame_period,
168                         int id);
169 
170     /* comp_chroma.cpp stuff */
171     /*				GEN_DIST
172      *
173      * This function generates the Euclidean distance for points i
174      * and j in two chroma vectors for use with dynamic time warping of
175      * the chroma vectors.
176      */
177     float gen_dist(int i, int j);
178 
179     /* scorealign.cpp stuff: */
180     float map_time(float t1);
181     int align_midi_to_audio(Alg_seq &seq, Audio_reader &reader);
182     int align_midi_to_midi(Alg_seq &seq0, Alg_seq &seq2);
183     int align_audio_to_audio(Audio_reader &reader1, Audio_reader &reader2);
184     int align_chromagrams();
185 
186     int path_count; // for debug log formatting
187     void path_step(int i, int j);
188     void path_reverse();
189     int sec_to_pathy_index(float sec);
190     int compare_chroma();
191     void linear_regression(int n, int width, float &a, float &b);
192     void compute_smooth_time_map();
193     void presmooth();
194     void compute_regression_lines();
195     void midi_tempo_align(Alg_seq &seq);
196 };
197 
198 // #define DEBUG_LOG 1
199 #if DEBUG_LOG
200 extern FILE *dbf;
201 #endif
202 
203 int find_midi_duration(Alg_seq &seq, float *dur);
204