1 // Licensed GNU LGPL v3 or later: http://www.gnu.org/licenses/lgpl.html
2
3 #include <vector>
4 #include <stdio.h>
5 #include <assert.h>
6
7 #include "smaudio.hh"
8 #include "smwavset.hh"
9 #include "smlivedecoder.hh"
10 #include "smmain.hh"
11 #include "sminfile.hh"
12 #include "smutils.hh"
13 #include "smfft.hh"
14 #include "smaudiotool.hh"
15
16 using namespace SpectMorph;
17 using std::vector;
18 using std::min;
19 using std::max;
20 using std::string;
21 using std::set;
22
23 double
vector_delta(const vector<double> & a,const vector<double> & b)24 vector_delta (const vector<double>& a, const vector<double>& b)
25 {
26 assert (a.size() == b.size());
27
28 double d = 0;
29 for (size_t i = 0; i < a.size(); i++)
30 d += (a[i] - b[i]) * (a[i] - b[i]);
31 return d;
32 }
33
34 static void
reconstruct(AudioBlock & audio_block,vector<double> & signal,const Audio & audio)35 reconstruct (AudioBlock& audio_block,
36 vector<double>& signal,
37 const Audio& audio)
38 {
39 for (size_t partial = 0; partial < audio_block.freqs.size(); partial++)
40 {
41 double f = audio_block.freqs_f (partial) * audio.fundamental_freq;
42 double mag = audio_block.mags_f (partial);
43 double phase = audio_block.phases_f (partial);
44
45 // do a phase optimal reconstruction of that partial
46 for (size_t n = 0; n < signal.size(); n++)
47 {
48 signal[n] += sin (phase) * mag;
49 phase += f / audio.mix_freq * 2.0 * M_PI;
50 }
51 }
52 }
53
54 float
mag(float re,float im)55 mag (float re, float im)
56 {
57 return sqrt (re * re + im * im);
58 }
59
60 struct Attack
61 {
62 double attack_start_ms;
63 double attack_end_ms;
64 };
65
66 double
attack_error(const SpectMorph::Audio & audio,const vector<vector<double>> & unscaled_signal,const Attack & attack,vector<double> & out_scale)67 attack_error (const SpectMorph::Audio& audio, const vector< vector<double> >& unscaled_signal, const Attack& attack, vector<double>& out_scale)
68 {
69 const size_t frames = unscaled_signal.size();
70 double total_error = 0;
71
72 for (size_t f = 0; f < frames; f++)
73 {
74 const vector<double>& frame_signal = unscaled_signal[f];
75 size_t zero_values = 0;
76 double scale = 1.0; /* init to get rid of gcc compiler warning */
77
78 for (size_t n = 0; n < frame_signal.size(); n++)
79 {
80 const double n_ms = f * audio.frame_step_ms + n * 1000.0 / audio.mix_freq;
81 double env;
82 scale = (zero_values > 0) ? frame_signal.size() / double (frame_signal.size() - zero_values) : 1.0;
83 if (n_ms < attack.attack_start_ms)
84 {
85 env = 0;
86 zero_values++;
87 }
88 else if (n_ms < attack.attack_end_ms) // during attack
89 {
90 const double attack_len_ms = attack.attack_end_ms - attack.attack_start_ms;
91
92 env = (n_ms - attack.attack_start_ms) / attack_len_ms;
93 }
94 else // after attack
95 {
96 env = 1.0;
97 }
98 const double value = frame_signal[n] * scale * env;
99 const double error = value - audio.contents[f].debug_samples[n];
100 total_error += error * error;
101 }
102 out_scale[f] = scale;
103 }
104 return total_error;
105 }
106
107 void
check_usage(int argc,int need_argc,const string & usage)108 check_usage (int argc, int need_argc, const string& usage)
109 {
110 if (argc != need_argc)
111 {
112 printf ("usage: smtool <sm_file> %s\n", usage.c_str());
113 exit (1);
114 }
115 }
116
117 void
load_or_die(Audio & audio,const string & filename,const string & mode)118 load_or_die (Audio& audio, const string& filename, const string& mode)
119 {
120 AudioLoadOptions load_options = AUDIO_LOAD_DEBUG;
121
122 if (mode == "fundamental-freq" || mode == "freq"
123 || mode == "frameparams" || mode == "noiseparams" || mode == "attack" ||
124 mode == "zero-values-at-start" || mode == "mix-freq")
125 load_options = AUDIO_SKIP_DEBUG;
126
127 Error error = audio.load (filename, load_options);
128 if (error)
129 {
130 fprintf (stderr, "can't load file: %s\n",filename.c_str());
131 exit (1);
132 }
133 }
134
135 static bool
find_nan(vector<float> & data)136 find_nan (vector<float>& data)
137 {
138 for (size_t x = 0; x < data.size(); x++)
139 if (std::isnan (data[x]))
140 return true;
141 return false;
142 }
143
144 static double
compute_energy(const Audio & audio,double percent,bool from_loop)145 compute_energy (const Audio& audio, double percent, bool from_loop)
146 {
147 double percent_start = percent - 5;
148 double percent_stop = percent + 5;
149 if (percent_start < 0 || percent_stop > 100)
150 {
151 fprintf (stderr, "bad volume percentage: %f\n", percent);
152 exit (1);
153 }
154
155 Audio *noloop_audio = audio.clone();
156 if (!from_loop)
157 {
158 noloop_audio->loop_type = Audio::LOOP_NONE; // don't use looped signal, but original signal
159 }
160
161 WavSet smset;
162 WavSetWave new_wave;
163 new_wave.midi_note = 60; // doesn't matter
164 new_wave.channel = 0;
165 new_wave.velocity_range_min = 0;
166 new_wave.velocity_range_max = 127;
167 new_wave.audio = noloop_audio;
168 smset.waves.push_back (new_wave);
169
170 LiveDecoder decoder (&smset);
171 // we need reproducable noise to get the same energy every time
172 decoder.set_noise_seed (42);
173 decoder.retrigger (0, audio.fundamental_freq, 127, audio.mix_freq);
174 vector<float> samples;
175 if (from_loop)
176 {
177 // at least one second, or twice the original len, whatever is longer
178 samples.resize (audio.sample_count + MAX (audio.sample_count, audio.mix_freq));
179 }
180 else
181 {
182 samples.resize (audio.sample_count);
183 }
184 decoder.process (samples.size(), nullptr, &samples[0]);
185
186 double energy = 0;
187 size_t energy_norm = 0;
188 if (from_loop)
189 {
190 // start evaluating energy after end of original data (so we're counting the looped part only
191 for (size_t pos = audio.sample_count; pos < samples.size(); pos++)
192 {
193 energy += samples[pos] * samples[pos];
194 energy_norm++;
195 }
196 }
197 else
198 {
199 for (size_t pos = 0; pos < samples.size(); pos++)
200 {
201 double percent = (pos * 100.0) / samples.size();
202 if (percent > percent_start && percent < percent_stop)
203 {
204 energy += samples[pos] * samples[pos];
205 energy_norm++;
206 }
207 }
208 }
209 return energy / energy_norm;
210 }
211
212 class Command
213 {
214 string m_mode;
215 bool m_need_save;
216 const WavSetWave *m_wave;
217 public:
218 static vector<Command *> *registry();
Command(const string & mode)219 Command (const string& mode)
220 {
221 registry()->push_back (this);
222 m_mode = mode;
223 m_need_save = false;
224 m_wave = NULL;
225 }
226 virtual bool
parse_args(vector<string> & args)227 parse_args (vector<string>& args)
228 {
229 return args.size() == 0;
230 }
231 virtual bool exec (Audio& audio) = 0;
usage(bool one_line)232 virtual void usage (bool one_line)
233 {
234 printf ("\n");
235 }
~Command()236 virtual ~Command()
237 {
238 }
mode() const239 string mode() const
240 {
241 return m_mode;
242 }
243 bool
need_save() const244 need_save() const
245 {
246 return m_need_save;
247 }
248 void
set_need_save(bool s)249 set_need_save (bool s)
250 {
251 m_need_save = s;
252 }
253 const WavSetWave *
wave() const254 wave() const
255 {
256 return m_wave;
257 }
258 void
set_wave(const WavSetWave * wave)259 set_wave (const WavSetWave *wave)
260 {
261 m_wave = wave;
262 }
263 };
264
265 vector<Command *> *
registry()266 Command::registry()
267 {
268 static vector<Command *> *rx = 0;
269 if (!rx)
270 rx = new vector<Command *>;
271 return rx;
272 }
273
274 class VolumeCommand : public Command
275 {
276 double percent;
277 public:
VolumeCommand()278 VolumeCommand() : Command ("volume")
279 {
280 }
281 bool
parse_args(vector<string> & args)282 parse_args (vector<string>& args)
283 {
284 if (args.size() == 1)
285 {
286 percent = sm_atof (args[0].c_str());
287 return true;
288 }
289 return false;
290 }
291 bool
exec(Audio & audio)292 exec (Audio& audio)
293 {
294 const double energy = compute_energy (audio, percent, false);
295 sm_printf ("avg_energy: %.17g\n", energy);
296 return true;
297 }
298 void
usage(bool one_line)299 usage (bool one_line)
300 {
301 printf ("<percent>\n");
302 }
303 } volume_command;
304
305 class FundamentalFreqCommand : public Command
306 {
307 public:
FundamentalFreqCommand()308 FundamentalFreqCommand() : Command ("fundamental-freq")
309 {
310 }
311 bool
exec(Audio & audio)312 exec (Audio& audio)
313 {
314 sm_printf ("fundamental-freq: %f\n", audio.fundamental_freq);
315 return true;
316 }
317 } fundamental_freq_command;
318
319 class MixFreqCommand : public Command
320 {
321 public:
MixFreqCommand()322 MixFreqCommand() : Command ("mix-freq")
323 {
324 }
325 bool
exec(Audio & audio)326 exec (Audio& audio)
327 {
328 sm_printf ("mix-freq: %f\n", audio.mix_freq);
329 return true;
330 }
331 } mix_freq_command;
332
333 class StatsCommand : public Command
334 {
335 public:
StatsCommand()336 StatsCommand() : Command ("stats")
337 {
338 }
339 bool
exec(Audio & audio)340 exec (Audio& audio)
341 {
342 const WavSetWave *w = wave();
343
344 double mag_weight = 0, mag_partials = 0;
345 for (size_t f = 0; f < audio.contents.size(); f++)
346 {
347 const AudioBlock& block = audio.contents[f];
348
349 double mag = 0;
350 for (size_t i = 0; i < block.freqs.size(); i++)
351 mag += block.mags_f (i);
352
353 /* give higher weight to louder audio blocks */
354 mag_weight += mag;
355 mag_partials += mag * block.freqs.size();
356 }
357 mag_partials /= mag_weight;
358 sm_printf ("%d %d %f\n", w ? w->midi_note : -1, int (audio.mix_freq + 0.5), mag_partials);
359 return true;
360 }
361 } stats_command;
362
363 class SampleCountCommand : public Command
364 {
365 public:
SampleCountCommand()366 SampleCountCommand() : Command ("sample-count")
367 {
368 }
369 bool
exec(Audio & audio)370 exec (Audio& audio)
371 {
372 sm_printf ("sample-count: %d\n", audio.sample_count);
373 return true;
374 }
375 } sample_count_command;
376
377 class ZeroValuesAtStartCommand : public Command
378 {
379 public:
ZeroValuesAtStartCommand()380 ZeroValuesAtStartCommand() : Command ("zero-values-at-start")
381 {
382 }
383 bool
exec(Audio & audio)384 exec (Audio& audio)
385 {
386 sm_printf ("zero-values-at-start: %d\n", audio.zero_values_at_start);
387 return true;
388 }
389 } zero_values_at_start_command;
390
391 class AttackCommand : public Command
392 {
393 public:
AttackCommand()394 AttackCommand() : Command ("attack")
395 {
396 }
397 bool
exec(Audio & audio)398 exec (Audio& audio)
399 {
400 sm_printf ("start of attack: %.2f ms\n", audio.attack_start_ms);
401 sm_printf (" end of attack: %.2f ms\n", audio.attack_end_ms);
402 return true;
403 }
404 } attack_command;
405
406 class SizeCommand : public Command
407 {
408 public:
SizeCommand()409 SizeCommand() : Command ("size")
410 {
411 }
412 bool
exec(Audio & audio)413 exec (Audio& audio)
414 {
415 size_t phase_bytes = 0, freq_bytes = 0, mag_bytes = 0, debug_samples_bytes = 0, original_fft_bytes = 0, noise_bytes = 0;
416 for (size_t f = 0; f < audio.contents.size(); f++)
417 {
418 phase_bytes += audio.contents[f].phases.size() * sizeof (uint16_t);
419 freq_bytes += audio.contents[f].freqs.size() * sizeof (uint16_t);
420 mag_bytes += audio.contents[f].mags.size() * sizeof (uint16_t);
421 debug_samples_bytes += audio.contents[f].debug_samples.size() * sizeof (float);
422 original_fft_bytes += audio.contents[f].original_fft.size() * sizeof (float);
423 noise_bytes += audio.contents[f].noise.size() * sizeof (uint16_t);
424 }
425 size_t original_samples_bytes = audio.original_samples.size() * sizeof (float);
426
427 sm_printf ("frequencies : %zd bytes\n", freq_bytes);
428 sm_printf ("mags : %zd bytes\n", mag_bytes);
429 sm_printf ("phases : %zd bytes\n", phase_bytes);
430 sm_printf ("dbgsamples : %zd bytes\n", debug_samples_bytes);
431 sm_printf ("orig_fft : %zd bytes\n", original_fft_bytes);
432 sm_printf ("noise : %zd bytes\n", noise_bytes);
433 sm_printf ("orig_samples : %zd bytes\n", original_samples_bytes);
434
435 size_t total_bytes = (freq_bytes + mag_bytes + phase_bytes + noise_bytes);
436 sm_printf ("data rate : %.2f K/s\n", total_bytes / 1024.0 / (audio.sample_count / audio.mix_freq));
437 return true;
438 }
439 } size_command;
440
441 class LoopParamsCommand : public Command
442 {
443 public:
LoopParamsCommand()444 LoopParamsCommand() : Command ("loop-params")
445 {
446 }
447 bool
exec(Audio & audio)448 exec (Audio& audio)
449 {
450 sm_printf ("frames: %zd\n", audio.contents.size());
451 string loop_str;
452 if (audio.loop_type_to_string (audio.loop_type, loop_str))
453 sm_printf ("loop type: %s\n", loop_str.c_str());
454 else
455 sm_printf ("loop type: *unknown* (%d)\n", audio.loop_type);
456 sm_printf ("loop start: %d\n", audio.loop_start);
457 sm_printf ("loop end: %d\n", audio.loop_end);
458
459 return true;
460 }
461 } loop_params_command;
462
463 class NoiseParamsCommand : public Command
464 {
465 int frame;
466 public:
NoiseParamsCommand()467 NoiseParamsCommand() : Command ("noise-params")
468 {
469 }
470 bool
parse_args(vector<string> & args)471 parse_args (vector<string>& args)
472 {
473 if (args.size() == 1)
474 {
475 frame = atoi (args[0].c_str());
476 return true;
477 }
478 return false;
479 }
480 bool
exec(Audio & audio)481 exec (Audio& audio)
482 {
483 for (size_t i = 0; i < audio.contents[frame].noise.size(); i++)
484 sm_printf ("%.7g\n", audio.contents[frame].noise_f (i));
485 return true;
486 }
487 void
usage(bool one_line)488 usage (bool one_line)
489 {
490 printf ("<frame_no>\n");
491 }
492 } noise_params_command;
493
494 class FrameCommand : public Command
495 {
496 int frame;
497 public:
FrameCommand()498 FrameCommand() : Command ("frame")
499 {
500 }
501 bool
parse_args(vector<string> & args)502 parse_args (vector<string>& args)
503 {
504 if (args.size() == 1)
505 {
506 frame = atoi (args[0].c_str());
507 return true;
508 }
509 return false;
510 }
511 bool
exec(Audio & audio)512 exec (Audio& audio)
513 {
514 int i = frame;
515 size_t frame_size = audio.contents[i].debug_samples.size();
516 vector<double> sines (frame_size);
517 reconstruct (audio.contents[i], sines, audio);
518 for (size_t n = 0; n < audio.contents[i].debug_samples.size(); n++)
519 {
520 double v = audio.contents[i].debug_samples[n];
521 sm_printf ("%zd %f %f %f\n", n, v, sines[n], v - sines[n]);
522 }
523 return true;
524 }
525 void
usage(bool one_line)526 usage (bool one_line)
527 {
528 printf ("<frame_no>\n");
529 }
530 } frame_command;
531
532 class FrameParamsCommand : public Command
533 {
534 int frame;
535 public:
FrameParamsCommand()536 FrameParamsCommand() : Command ("frame-params")
537 {
538 }
539 bool
parse_args(vector<string> & args)540 parse_args (vector<string>& args)
541 {
542 if (args.size() == 1)
543 {
544 frame = atoi (args[0].c_str());
545 return true;
546 }
547 return false;
548 }
549 void
usage(bool one_line)550 usage (bool one_line)
551 {
552 printf ("<frame_no>\n");
553 }
554 bool
exec(Audio & audio)555 exec (Audio& audio)
556 {
557 int i = frame;
558 for(;;)
559 {
560 int maxm = 0;
561 size_t maxp = 0;
562 for (size_t partial = 0; partial < audio.contents[i].freqs.size(); partial++)
563 {
564 const int m = audio.contents[i].mags[partial];
565 if (m > maxm)
566 {
567 maxm = m;
568 maxp = partial;
569 }
570 }
571 if (maxm > 0)
572 {
573 const double freq = audio.contents[i].freqs_f (maxp) * audio.fundamental_freq;
574 const double mag_factor = audio.contents[i].mags_f (maxp);
575 const double mag_db = db_from_factor (mag_factor, -200);
576
577 sm_printf ("%f Hz: %f\n", freq, mag_db);
578 audio.contents[i].mags[maxp] = 0;
579 }
580 else
581 {
582 break;
583 }
584 }
585 return true;
586 }
587 } frame_params_command;
588
589 class TotalNoiseCommand : public Command
590 {
591 public:
TotalNoiseCommand()592 TotalNoiseCommand() : Command ("total-noise")
593 {
594 }
595 bool
exec(Audio & audio)596 exec (Audio& audio)
597 {
598 double total_noise = 0;
599 double peak_noise = 0;
600
601 for (size_t f = 0; f < audio.contents.size(); f++)
602 {
603 for (size_t i = 0; i < audio.contents[f].noise.size(); i++)
604 {
605 const double noise = audio.contents[f].noise_f (i);
606
607 total_noise += noise;
608 peak_noise = max (peak_noise, noise);
609 }
610 }
611 sm_printf ("total-noise: %.17g\n", total_noise);
612 sm_printf ("peak-noise: %.17g\n", peak_noise);
613 return true;
614 }
615 } total_noise_command;
616
617 class NanTestCommand : public Command
618 {
619 public:
NanTestCommand()620 NanTestCommand() : Command ("nan-test")
621 {
622 }
623 bool
exec(Audio & audio)624 exec (Audio& audio)
625 {
626 int nan_ds = 0, nan_fft = 0;
627
628 for (size_t f = 0; f < audio.contents.size(); f++)
629 {
630 if (find_nan (audio.contents[f].debug_samples))
631 nan_ds++;
632 if (find_nan (audio.contents[f].original_fft))
633 nan_fft++;
634 }
635 sm_printf ("nan-debug-samples: %d\n", nan_ds);
636 sm_printf ("nan-original-fft: %d\n", nan_fft);
637 return true;
638 }
639 } nan_test_command;
640
641 class OriginalSamplesCommand : public Command
642 {
643 public:
OriginalSamplesCommand()644 OriginalSamplesCommand() : Command ("original-samples")
645 {
646 }
647 bool
exec(Audio & audio)648 exec (Audio& audio)
649 {
650 for (size_t i = 0; i < audio.original_samples.size(); i++)
651 sm_printf ("%.17g\n", audio.original_samples[i]);
652
653 return true;
654 }
655 } original_samples_command;
656
657 class FreqCommand : public Command
658 {
659 double freq_min, freq_max;
660 public:
FreqCommand()661 FreqCommand() : Command ("freq")
662 {
663 }
664 bool
parse_args(vector<string> & args)665 parse_args (vector<string>& args)
666 {
667 if (args.size() == 2)
668 {
669 freq_min = sm_atof (args[0].c_str());
670 freq_max = sm_atof (args[1].c_str());
671 return true;
672 }
673 return false;
674 }
675 bool
exec(Audio & audio)676 exec (Audio& audio)
677 {
678 for (size_t i = 0; i < audio.contents.size(); i++)
679 {
680 const AudioBlock& block = audio.contents[i];
681 for (size_t n = 0; n < block.freqs.size(); n++)
682 {
683 const double freq = block.freqs_f (n) * audio.fundamental_freq;
684
685 if (freq > freq_min && freq < freq_max)
686 {
687 sm_printf ("%zd %f %f\n", i, freq, block.mags_f (n));
688 }
689 }
690 }
691 return true;
692 }
693 void
usage(bool one_line)694 usage (bool one_line)
695 {
696 printf ("<freq_min> <freq_max>\n");
697 }
698 } freq_command;
699
700 class SpectrumCommand : public Command
701 {
702 int frame;
703 public:
SpectrumCommand()704 SpectrumCommand() : Command ("spectrum")
705 {
706 }
707 bool
parse_args(vector<string> & args)708 parse_args (vector<string>& args)
709 {
710 if (args.size() == 1)
711 {
712 frame = atoi (args[0].c_str());
713 return true;
714 }
715 return false;
716 }
717 void
usage(bool one_line)718 usage (bool one_line)
719 {
720 printf ("<frame_no>\n");
721 }
722 bool
exec(Audio & audio)723 exec (Audio& audio)
724 {
725 size_t frame_size = audio.frame_size_ms * audio.mix_freq / 1000;
726 int i = frame;
727 vector<double> spectrum;
728 vector<double> sines (frame_size);
729
730 reconstruct (audio.contents[i], sines, audio);
731
732 /* compute block size from frame size (smallest 2^k value >= frame_size) */
733 size_t block_size = 1;
734 while (block_size < frame_size)
735 block_size *= 2;
736
737 // construct window
738 vector<float> window (block_size);
739 for (guint i = 0; i < window.size(); i++)
740 {
741 if (i < frame_size)
742 window[i] = window_cos (2.0 * i / frame_size - 1.0);
743 else
744 window[i] = 0;
745 }
746
747 // apply window to reconstructed signal
748 sines.resize (block_size);
749 for (guint i = 0; i < sines.size(); i++)
750 sines[i] *= window[i];
751
752 // zeropad
753 const int zeropad = 4;
754 sines.resize (block_size * zeropad);
755 vector<double> out (block_size * zeropad);
756
757 float *fft_in = FFT::new_array_float (sines.size());
758 float *fft_out = FFT::new_array_float (sines.size());
759
760 std::copy (sines.begin(), sines.end(), fft_in);
761 FFT::fftar_float (sines.size(), fft_in, fft_out);
762 std::copy (fft_out, fft_out + sines.size(), out.begin());
763
764 FFT::free_array_float (fft_out);
765 FFT::free_array_float (fft_in);
766
767 vector<double> sines_spectrum;
768 for (size_t n = 0; n < audio.contents[i].original_fft.size(); n += 2)
769 {
770 double re = audio.contents[i].original_fft[n];
771 double im = audio.contents[i].original_fft[n + 1];
772 spectrum.push_back (sqrt (re * re + im * im));
773 sines_spectrum.push_back (mag (out[n], out[n+1]));
774 }
775 for (size_t n = 0; n < spectrum.size(); n++)
776 {
777 double s = 0;
778 for (size_t r = 0; r < 1; r++)
779 {
780 if (n + r < spectrum.size())
781 s = std::max (s, spectrum[n + r]);
782 if (r < n)
783 s = std::max (s, spectrum[n - r]);
784 }
785 sm_printf ("%f %f %f\n", n * 0.5 * audio.mix_freq / spectrum.size(), s, sines_spectrum[n]);
786 }
787 return true;
788 }
789 } spectrum_command;
790
791 class AutoLoopCommand : public Command
792 {
793 double percent;
794 public:
AutoLoopCommand()795 AutoLoopCommand() : Command ("auto-loop")
796 {
797 set_need_save (true);
798 }
799 bool
parse_args(vector<string> & args)800 parse_args (vector<string>& args)
801 {
802 if (args.size() == 1)
803 {
804 percent = sm_atof (args[0].c_str());
805 if (percent < 0 || percent > 100)
806 {
807 fprintf (stderr, "bad loop percentage: %f\n", percent);
808 return false;
809 }
810 return true;
811 }
812 return false;
813 }
814 void
usage(bool one_line)815 usage (bool one_line)
816 {
817 printf ("<percent>\n");
818 }
819 bool
exec(Audio & audio)820 exec (Audio& audio)
821 {
822 int loop_point = audio.contents.size() * percent / 100;
823 if (loop_point < 0)
824 loop_point = 0;
825 if (size_t (loop_point) >= (audio.contents.size() - 1))
826 loop_point = audio.contents.size() - 1;
827 audio.loop_type = Audio::LOOP_FRAME_FORWARD;
828 audio.loop_start = loop_point;
829 audio.loop_end = loop_point;
830 return true;
831 }
832 } auto_loop_command;
833
834 class TailLoopCommand : public Command
835 {
836 public:
TailLoopCommand()837 TailLoopCommand() : Command ("tail-loop")
838 {
839 set_need_save (true);
840 }
841 bool
exec(Audio & audio)842 exec (Audio& audio)
843 {
844 int loop_point = -1;
845 size_t frame_size = audio.frame_size_ms * audio.mix_freq / 1000;
846 const int frame_step = audio.frame_step_ms * audio.mix_freq / 1000;
847
848 // we need the largest frame that doesn't include any data beyond the original file end
849 for (size_t i = 0; i < audio.contents.size(); i++)
850 {
851 if (i * frame_step + frame_size < size_t (audio.sample_count))
852 loop_point = i;
853 }
854 audio.loop_type = Audio::LOOP_FRAME_FORWARD;
855 audio.loop_start = loop_point;
856 audio.loop_end = loop_point;
857
858 return true;
859 }
860 } tail_loop_command;
861
862 double
freq_ratio_to_cent(double freq_ratio)863 freq_ratio_to_cent (double freq_ratio)
864 {
865 return log (freq_ratio) / log (2) * 1200;
866 }
867
868 class AutoTuneCommand : public Command
869 {
870 public:
AutoTuneCommand()871 AutoTuneCommand() : Command ("auto-tune")
872 {
873 }
874 bool
exec(Audio & audio)875 exec (Audio& audio)
876 {
877 double tune_factor;
878 if (AudioTool::get_auto_tune_factor (audio, tune_factor))
879 {
880 AudioTool::apply_auto_tune_factor (audio, tune_factor);
881
882 double input_fundamental_freq = audio.fundamental_freq / tune_factor;
883 sm_printf ("%.17g %.17g %.3f cent\n", audio.fundamental_freq, input_fundamental_freq, freq_ratio_to_cent (tune_factor));
884
885 set_need_save (true);
886 }
887 return true;
888 }
889 } auto_tune_command;
890
891 class TuneAllFramesCommand : public Command
892 {
893 int fundamental_est_n; /* number of partials to use for fundamental estimation in range [1,3] */
894 public:
TuneAllFramesCommand()895 TuneAllFramesCommand() : Command ("tune-all-frames")
896 {
897 }
898 bool
parse_args(vector<string> & args)899 parse_args (vector<string>& args)
900 {
901 if (args.size() == 1)
902 {
903 fundamental_est_n = atoi (args[0].c_str());
904 assert (fundamental_est_n >= 1 && fundamental_est_n <= 3);
905 return true;
906 }
907 return false;
908 }
909 void
usage(bool one_line)910 usage (bool one_line)
911 {
912 printf ("<n_partials>\n");
913 }
914 void
update_fundamental_estimate(int n,const AudioBlock & block,double freq_min,double freq_max,double & f_out,double & m_out)915 update_fundamental_estimate (int n, const AudioBlock& block, double freq_min, double freq_max, double& f_out, double& m_out)
916 {
917 if (n > fundamental_est_n) /* use this partial for fundamental estimation? */
918 return;
919
920 double freq = 0, mag = 0;
921
922 for (size_t p = 0; p < block.mags.size(); p++)
923 {
924 if (block.freqs_f (p) > freq_min && block.freqs_f (p) < freq_max && block.mags_f (p) > mag)
925 {
926 mag = block.mags_f (p);
927 freq = block.freqs_f (p) / n;
928 }
929 }
930 if (mag > 0)
931 {
932 m_out += mag;
933 f_out += freq * mag;
934 }
935 }
936 bool
exec(Audio & audio)937 exec (Audio& audio)
938 {
939 double weighted_tune_factor = 0, mag_weight = 0; /* gather statistics to output average tuning */
940 for (size_t f = 0; f < audio.contents.size(); f++)
941 {
942 AudioBlock& block = audio.contents[f];
943
944 double est_freq = 0, est_mag = 0;
945
946 update_fundamental_estimate (1, block, 0.8, 1.25, est_freq, est_mag);
947 update_fundamental_estimate (2, block, 1.5, 2.5, est_freq, est_mag);
948 update_fundamental_estimate (3, block, 2.5, 3.5, est_freq, est_mag);
949
950 if (est_mag > 0)
951 {
952 est_freq /= est_mag;
953 const double tune_factor = 1 / est_freq;
954
955 /* debug printf ("TAF %f %f\n", audio.fundamental_freq, freq_ratio_to_cent (tune_factor)); */
956
957 for (size_t p = 0; p < block.freqs.size(); p++)
958 {
959 const double freq = block.freqs_f (p) * tune_factor;
960 block.freqs[p] = sm_freq2ifreq (freq);
961 set_need_save (true);
962
963 /* assume orthogonal waves -> mags can be added */
964 weighted_tune_factor += block.mags_f (p) * tune_factor;
965 mag_weight += block.mags_f (p);
966 }
967 }
968 }
969
970 /* we can't give the exact tuning for all frames, so we compute a crude approximation
971 * for the input frequency and "average" tuning factor, using a higher weight
972 * for louder frames
973 */
974 weighted_tune_factor /= mag_weight;
975 const double input_fundamental_freq = audio.fundamental_freq / weighted_tune_factor;
976 sm_printf ("%.17g %.17g %.3f cent\n", audio.fundamental_freq, input_fundamental_freq, freq_ratio_to_cent (weighted_tune_factor));
977 return true;
978 }
979 } tune_all_frames_command;
980
981 class SmoothTuneCommand : public Command
982 {
983 int fundamental_est_n; /* number of partials to use for fundamental estimation in range [1,3] */
984 double smooth_ms;
985 double smooth_percent;
986 public:
SmoothTuneCommand()987 SmoothTuneCommand() : Command ("smooth-tune")
988 {
989 }
990 bool
parse_args(vector<string> & args)991 parse_args (vector<string>& args)
992 {
993 if (args.size() == 3)
994 {
995 fundamental_est_n = atoi (args[0].c_str());
996 assert (fundamental_est_n >= 1 && fundamental_est_n <= 3);
997 smooth_ms = sm_atof (args[1].c_str());
998 assert (smooth_ms > 10 && smooth_ms < 5000);
999 smooth_percent = sm_atof (args[2].c_str());
1000 assert (smooth_percent > -1 && smooth_percent < 101);
1001 return true;
1002 }
1003 return false;
1004 }
1005 void
usage(bool one_line)1006 usage (bool one_line)
1007 {
1008 printf ("<n_partials> <smooth_ms> <smooth_percent>\n");
1009 }
1010 bool
exec(Audio & audio)1011 exec (Audio& audio)
1012 {
1013 AudioTool::auto_tune_smooth (audio, fundamental_est_n, smooth_ms, smooth_percent);
1014 set_need_save (true);
1015
1016 return true;
1017 }
1018 } smooth_tune_command;
1019
1020 static void
normalize_energy(double energy,Audio & audio)1021 normalize_energy (double energy, Audio& audio)
1022 {
1023 const double target_energy = 0.05;
1024 const double norm = sqrt (target_energy / energy);
1025 sm_printf ("avg_energy: %.17g\n", energy);
1026 sm_printf ("norm: %.17g\n", norm);
1027
1028 AudioTool::normalize_factor (norm, audio);
1029 }
1030
1031 class AutoVolumeCommand : public Command
1032 {
1033 double percent;
1034 public:
AutoVolumeCommand()1035 AutoVolumeCommand() : Command ("auto-volume")
1036 {
1037 set_need_save (true);
1038 }
1039 bool
parse_args(vector<string> & args)1040 parse_args (vector<string>& args)
1041 {
1042 if (args.size() == 1)
1043 {
1044 percent = sm_atof (args[0].c_str());
1045 if (percent < 0 || percent > 100)
1046 {
1047 fprintf (stderr, "bad volume percentage: %f\n", percent);
1048 return false;
1049 }
1050 return true;
1051 }
1052 return false;
1053 }
1054 void
usage(bool one_line)1055 usage (bool one_line)
1056 {
1057 printf ("<percent>\n");
1058 }
1059 bool
exec(Audio & audio)1060 exec (Audio& audio)
1061 {
1062 double energy = compute_energy (audio, percent, false);
1063 normalize_energy (energy, audio);
1064 return true;
1065 }
1066 } auto_volume_command;
1067
1068 class AutoVolumeFromLoopCommand : public Command
1069 {
1070 public:
AutoVolumeFromLoopCommand()1071 AutoVolumeFromLoopCommand() : Command ("auto-volume-from-loop")
1072 {
1073 set_need_save (true);
1074 }
1075 bool
exec(Audio & audio)1076 exec (Audio& audio)
1077 {
1078 double energy = compute_energy (audio, /* dummy */ 50, true);
1079 normalize_energy (energy, audio);
1080 return true;
1081 }
1082 } auto_volume_from_loop_command;
1083
1084 class GlobalVolumeCommand : public Command
1085 {
1086 double norm_db;
1087 public:
GlobalVolumeCommand()1088 GlobalVolumeCommand() : Command ("global-volume")
1089 {
1090 set_need_save (true);
1091 }
1092 bool
parse_args(vector<string> & args)1093 parse_args (vector<string>& args)
1094 {
1095 if (args.size() == 1)
1096 {
1097 norm_db = sm_atof (args[0].c_str());
1098 return true;
1099 }
1100 return false;
1101 }
1102 void
usage(bool one_line)1103 usage (bool one_line)
1104 {
1105 printf ("<db>\n");
1106 }
1107 bool
exec(Audio & audio)1108 exec (Audio& audio)
1109 {
1110 AudioTool::normalize_factor (db_to_factor (norm_db), audio);
1111 return true;
1112 }
1113 } global_volume_command;
1114
1115
1116 class StripCommand : public Command
1117 {
1118 public:
StripCommand()1119 StripCommand() : Command ("strip")
1120 {
1121 set_need_save (true);
1122 }
1123 bool
exec(Audio & audio)1124 exec (Audio& audio)
1125 {
1126 /* it would be nice if we could have options like --keep-samples
1127 * but for now we just do the default thing
1128 */
1129 for (size_t i = 0; i < audio.contents.size(); i++)
1130 {
1131 audio.contents[i].debug_samples.clear();
1132 audio.contents[i].original_fft.clear();
1133 }
1134 audio.original_samples.clear();
1135 return true;
1136 }
1137 } strip_command;
1138
1139 class StripAllCommand : public Command
1140 {
1141 public:
StripAllCommand()1142 StripAllCommand() : Command ("strip-all")
1143 {
1144 set_need_save (true);
1145 }
1146 bool
exec(Audio & audio)1147 exec (Audio& audio)
1148 {
1149 for (size_t i = 0; i < audio.contents.size(); i++)
1150 {
1151 audio.contents[i].phases.clear();
1152 audio.contents[i].debug_samples.clear();
1153 audio.contents[i].original_fft.clear();
1154 }
1155 int loop_end = -1;
1156 switch (audio.loop_type)
1157 {
1158 case Audio::LOOP_FRAME_FORWARD:
1159 case Audio::LOOP_FRAME_PING_PONG:
1160 /* strip frames */
1161 loop_end = audio.loop_end;
1162 break;
1163
1164 case Audio::LOOP_NONE:
1165 case Audio::LOOP_TIME_FORWARD:
1166 case Audio::LOOP_TIME_PING_PONG:
1167 /* don't strip frames */
1168 break;
1169 }
1170 if (loop_end >= 0)
1171 {
1172 size_t new_size = loop_end + 1;
1173
1174 if (new_size < audio.contents.size())
1175 audio.contents.resize (new_size);
1176 }
1177 audio.original_samples.clear();
1178 return true;
1179 }
1180 } strip_all_command;
1181
1182 class ExtractSMCommand : public Command
1183 {
1184 int note;
1185 string filename;
1186 public:
ExtractSMCommand()1187 ExtractSMCommand() : Command ("extract-sm")
1188 {
1189 }
1190 bool
parse_args(vector<string> & args)1191 parse_args (vector<string>& args)
1192 {
1193 if (args.size() == 2)
1194 {
1195 note = atoi (args[0].c_str());
1196 filename = args[1];
1197 return true;
1198 }
1199 return false;
1200 }
1201 void
usage(bool one_line)1202 usage (bool one_line)
1203 {
1204 printf ("<note> <filename>\n");
1205 }
1206 bool
exec(Audio & audio)1207 exec (Audio& audio)
1208 {
1209 const WavSetWave *w = wave();
1210 if (w && w->midi_note == note)
1211 {
1212 printf ("saving note %d to %s\n", w->midi_note, filename.c_str());
1213 audio.save (filename);
1214 }
1215 return true;
1216 }
1217 } extract_sm_command;
1218
1219 int
main(int argc,char ** argv)1220 main (int argc, char **argv)
1221 {
1222 Main main (&argc, &argv);
1223
1224 if (argc < 3)
1225 {
1226 printf ("usage: smtool <sm_file> <mode> [ <mode_specific_args> ]\n");
1227 printf ("\n");
1228 printf ("mode specific args:\n\n");
1229
1230 for (vector<Command *>::iterator ci = Command::registry()->begin(); ci != Command::registry()->end(); ci++)
1231 {
1232 printf (" smtool <sm_file> %s ", (*ci)->mode().c_str());
1233 (*ci)->usage (true);
1234 }
1235 return 1;
1236 }
1237
1238 const string& mode = argv[2];
1239
1240 /* figure out file type (we support SpectMorph::WavSet and SpectMorph::Audio) */
1241 InFile *file = new InFile (argv[1]);
1242 if (!file->open_ok())
1243 {
1244 fprintf (stderr, "%s: can't open input file: %s\n", argv[0], argv[1]);
1245 exit (1);
1246 }
1247 string file_type = file->file_type();
1248 delete file;
1249
1250 Audio *audio = NULL;
1251 WavSet *wav_set = NULL;
1252 if (file_type == "SpectMorph::Audio")
1253 {
1254 audio = new Audio;
1255 load_or_die (*audio, argv[1], mode);
1256 }
1257 else if (file_type == "SpectMorph::WavSet")
1258 {
1259 wav_set = new WavSet;
1260 Error error = wav_set->load (argv[1]);
1261 if (error)
1262 {
1263 fprintf (stderr, "smtool: can't load file: %s\n", argv[1]);
1264 return 1;
1265 }
1266 }
1267 else
1268 {
1269 g_printerr ("unknown file_type: %s\n", file_type.c_str());
1270 return 1;
1271 }
1272
1273 bool need_save = false;
1274 bool found_command = false;
1275
1276 vector<string> args;
1277 for (int i = 3; i < argc; i++)
1278 args.push_back (argv[i]);
1279
1280 for (vector<Command *>::iterator ci = Command::registry()->begin(); ci != Command::registry()->end(); ci++)
1281 {
1282 Command *cmd = *ci;
1283 if (cmd->mode() == mode)
1284 {
1285 assert (!found_command);
1286 found_command = true;
1287
1288 if (!cmd->parse_args (args))
1289 {
1290 printf ("usage: smtool <sm_file> %s ", cmd->mode().c_str());
1291 cmd->usage (true);
1292 return 1;
1293 }
1294 if (audio)
1295 cmd->exec (*audio);
1296 if (wav_set)
1297 {
1298 set<Audio *> done;
1299
1300 for (vector<WavSetWave>::iterator wi = wav_set->waves.begin(); wi != wav_set->waves.end(); wi++)
1301 {
1302 sm_printf ("## midi_note=%d channel=%d velocity_range=%d..%d\n", wi->midi_note, wi->channel,
1303 wi->velocity_range_min, wi->velocity_range_max);
1304 if (done.find (wi->audio) != done.end())
1305 {
1306 sm_printf ("## ==> skipped (was processed earlier)\n");
1307 }
1308 else
1309 {
1310 cmd->set_wave (&*wi);
1311 cmd->exec (*wi->audio);
1312 done.insert (wi->audio);
1313 }
1314 }
1315 }
1316
1317 need_save = cmd->need_save();
1318 }
1319 }
1320 if (!found_command)
1321 {
1322 g_printerr ("unknown mode: %s\n", mode.c_str());
1323 return 1;
1324 }
1325 if (need_save)
1326 {
1327 if (audio)
1328 {
1329 Error error = audio->save (argv[1]);
1330 if (error)
1331 {
1332 fprintf (stderr, "error saving audio file: %s\n", argv[1]);
1333 return 1;
1334 }
1335 }
1336 if (wav_set)
1337 {
1338 Error error = wav_set->save (argv[1]);
1339 if (error)
1340 {
1341 fprintf (stderr, "error saving wavset file: %s\n", argv[1]);
1342 return 1;
1343 }
1344 }
1345 }
1346 if (wav_set)
1347 {
1348 delete wav_set;
1349 wav_set = 0;
1350 }
1351 if (audio)
1352 {
1353 delete audio;
1354 audio = 0;
1355 }
1356 }
1357