1 // Licensed GNU LGPL v3 or later: http://www.gnu.org/licenses/lgpl.html
2 
3 #include <vector>
4 #include <stdio.h>
5 #include <assert.h>
6 
7 #include "smaudio.hh"
8 #include "smwavset.hh"
9 #include "smlivedecoder.hh"
10 #include "smmain.hh"
11 #include "sminfile.hh"
12 #include "smutils.hh"
13 #include "smfft.hh"
14 #include "smaudiotool.hh"
15 
16 using namespace SpectMorph;
17 using std::vector;
18 using std::min;
19 using std::max;
20 using std::string;
21 using std::set;
22 
23 double
vector_delta(const vector<double> & a,const vector<double> & b)24 vector_delta (const vector<double>& a, const vector<double>& b)
25 {
26   assert (a.size() == b.size());
27 
28   double d = 0;
29   for (size_t i = 0; i < a.size(); i++)
30     d += (a[i] - b[i]) * (a[i] - b[i]);
31   return d;
32 }
33 
34 static void
reconstruct(AudioBlock & audio_block,vector<double> & signal,const Audio & audio)35 reconstruct (AudioBlock&     audio_block,
36              vector<double>& signal,
37              const Audio&    audio)
38 {
39   for (size_t partial = 0; partial < audio_block.freqs.size(); partial++)
40     {
41       double f     = audio_block.freqs_f (partial) * audio.fundamental_freq;
42       double mag   = audio_block.mags_f (partial);
43       double phase = audio_block.phases_f (partial);
44 
45       // do a phase optimal reconstruction of that partial
46       for (size_t n = 0; n < signal.size(); n++)
47         {
48           signal[n] += sin (phase) * mag;
49           phase += f / audio.mix_freq * 2.0 * M_PI;
50         }
51     }
52 }
53 
54 float
mag(float re,float im)55 mag (float re, float im)
56 {
57   return sqrt (re * re + im * im);
58 }
59 
60 struct Attack
61 {
62   double attack_start_ms;
63   double attack_end_ms;
64 };
65 
66 double
attack_error(const SpectMorph::Audio & audio,const vector<vector<double>> & unscaled_signal,const Attack & attack,vector<double> & out_scale)67 attack_error (const SpectMorph::Audio& audio, const vector< vector<double> >& unscaled_signal, const Attack& attack, vector<double>& out_scale)
68 {
69   const size_t frames = unscaled_signal.size();
70   double total_error = 0;
71 
72   for (size_t f = 0; f < frames; f++)
73     {
74       const vector<double>& frame_signal = unscaled_signal[f];
75       size_t zero_values = 0;
76       double scale = 1.0; /* init to get rid of gcc compiler warning */
77 
78       for (size_t n = 0; n < frame_signal.size(); n++)
79         {
80           const double n_ms = f * audio.frame_step_ms + n * 1000.0 / audio.mix_freq;
81           double env;
82           scale = (zero_values > 0) ? frame_signal.size() / double (frame_signal.size() - zero_values) : 1.0;
83           if (n_ms < attack.attack_start_ms)
84             {
85               env = 0;
86               zero_values++;
87             }
88           else if (n_ms < attack.attack_end_ms)  // during attack
89             {
90               const double attack_len_ms = attack.attack_end_ms - attack.attack_start_ms;
91 
92               env = (n_ms - attack.attack_start_ms) / attack_len_ms;
93             }
94           else // after attack
95             {
96               env = 1.0;
97             }
98           const double value = frame_signal[n] * scale * env;
99           const double error = value - audio.contents[f].debug_samples[n];
100           total_error += error * error;
101         }
102       out_scale[f] = scale;
103     }
104   return total_error;
105 }
106 
107 void
check_usage(int argc,int need_argc,const string & usage)108 check_usage (int argc, int need_argc, const string& usage)
109 {
110   if (argc != need_argc)
111     {
112       printf ("usage: smtool <sm_file> %s\n", usage.c_str());
113       exit (1);
114     }
115 }
116 
117 void
load_or_die(Audio & audio,const string & filename,const string & mode)118 load_or_die (Audio& audio, const string& filename, const string& mode)
119 {
120   AudioLoadOptions load_options = AUDIO_LOAD_DEBUG;
121 
122   if (mode == "fundamental-freq" || mode == "freq"
123   ||  mode == "frameparams" || mode == "noiseparams" || mode == "attack" ||
124       mode == "zero-values-at-start" || mode == "mix-freq")
125     load_options = AUDIO_SKIP_DEBUG;
126 
127   Error error = audio.load (filename, load_options);
128   if (error)
129     {
130       fprintf (stderr, "can't load file: %s\n",filename.c_str());
131       exit (1);
132     }
133 }
134 
135 static bool
find_nan(vector<float> & data)136 find_nan (vector<float>& data)
137 {
138   for (size_t x = 0; x < data.size(); x++)
139     if (std::isnan (data[x]))
140       return true;
141   return false;
142 }
143 
144 static double
compute_energy(const Audio & audio,double percent,bool from_loop)145 compute_energy (const Audio& audio, double percent, bool from_loop)
146 {
147   double percent_start = percent - 5;
148   double percent_stop = percent + 5;
149   if (percent_start < 0 || percent_stop > 100)
150     {
151       fprintf (stderr, "bad volume percentage: %f\n", percent);
152       exit (1);
153     }
154 
155   Audio *noloop_audio = audio.clone();
156   if (!from_loop)
157     {
158       noloop_audio->loop_type = Audio::LOOP_NONE;  // don't use looped signal, but original signal
159     }
160 
161   WavSet smset;
162   WavSetWave new_wave;
163   new_wave.midi_note = 60; // doesn't matter
164   new_wave.channel = 0;
165   new_wave.velocity_range_min = 0;
166   new_wave.velocity_range_max = 127;
167   new_wave.audio = noloop_audio;
168   smset.waves.push_back (new_wave);
169 
170   LiveDecoder decoder (&smset);
171   // we need reproducable noise to get the same energy every time
172   decoder.set_noise_seed (42);
173   decoder.retrigger (0, audio.fundamental_freq, 127, audio.mix_freq);
174   vector<float> samples;
175   if (from_loop)
176     {
177       // at least one second, or twice the original len, whatever is longer
178       samples.resize (audio.sample_count + MAX (audio.sample_count, audio.mix_freq));
179     }
180   else
181     {
182       samples.resize (audio.sample_count);
183     }
184   decoder.process (samples.size(), nullptr, &samples[0]);
185 
186   double energy = 0;
187   size_t energy_norm = 0;
188   if (from_loop)
189     {
190       // start evaluating energy after end of original data (so we're counting the looped part only
191       for (size_t pos = audio.sample_count; pos < samples.size(); pos++)
192         {
193           energy += samples[pos] * samples[pos];
194           energy_norm++;
195         }
196     }
197   else
198     {
199       for (size_t pos = 0; pos < samples.size(); pos++)
200         {
201           double percent = (pos * 100.0) / samples.size();
202           if (percent > percent_start && percent < percent_stop)
203             {
204               energy += samples[pos] * samples[pos];
205               energy_norm++;
206             }
207         }
208     }
209   return energy / energy_norm;
210 }
211 
212 class Command
213 {
214   string            m_mode;
215   bool              m_need_save;
216   const WavSetWave *m_wave;
217 public:
218   static vector<Command *> *registry();
Command(const string & mode)219   Command (const string& mode)
220   {
221     registry()->push_back (this);
222     m_mode = mode;
223     m_need_save = false;
224     m_wave = NULL;
225   }
226   virtual bool
parse_args(vector<string> & args)227   parse_args (vector<string>& args)
228   {
229     return args.size() == 0;
230   }
231   virtual bool exec (Audio& audio) = 0;
usage(bool one_line)232   virtual void usage (bool one_line)
233   {
234     printf ("\n");
235   }
~Command()236   virtual ~Command()
237   {
238   }
mode() const239   string mode() const
240   {
241     return m_mode;
242   }
243   bool
need_save() const244   need_save() const
245   {
246     return m_need_save;
247   }
248   void
set_need_save(bool s)249   set_need_save (bool s)
250   {
251     m_need_save = s;
252   }
253   const WavSetWave *
wave() const254   wave() const
255   {
256     return m_wave;
257   }
258   void
set_wave(const WavSetWave * wave)259   set_wave (const WavSetWave *wave)
260   {
261     m_wave = wave;
262   }
263 };
264 
265 vector<Command *> *
registry()266 Command::registry()
267 {
268   static vector<Command *> *rx = 0;
269   if (!rx)
270     rx = new vector<Command *>;
271   return rx;
272 }
273 
274 class VolumeCommand : public Command
275 {
276   double percent;
277 public:
VolumeCommand()278   VolumeCommand() : Command ("volume")
279   {
280   }
281   bool
parse_args(vector<string> & args)282   parse_args (vector<string>& args)
283   {
284     if (args.size() == 1)
285       {
286         percent = sm_atof (args[0].c_str());
287         return true;
288       }
289     return false;
290   }
291   bool
exec(Audio & audio)292   exec (Audio& audio)
293   {
294     const double energy = compute_energy (audio, percent, false);
295     sm_printf ("avg_energy: %.17g\n", energy);
296     return true;
297   }
298   void
usage(bool one_line)299   usage (bool one_line)
300   {
301     printf ("<percent>\n");
302   }
303 } volume_command;
304 
305 class FundamentalFreqCommand : public Command
306 {
307 public:
FundamentalFreqCommand()308   FundamentalFreqCommand() : Command ("fundamental-freq")
309   {
310   }
311   bool
exec(Audio & audio)312   exec (Audio& audio)
313   {
314     sm_printf ("fundamental-freq: %f\n", audio.fundamental_freq);
315     return true;
316   }
317 } fundamental_freq_command;
318 
319 class MixFreqCommand : public Command
320 {
321 public:
MixFreqCommand()322   MixFreqCommand() : Command ("mix-freq")
323   {
324   }
325   bool
exec(Audio & audio)326   exec (Audio& audio)
327   {
328     sm_printf ("mix-freq: %f\n", audio.mix_freq);
329     return true;
330   }
331 } mix_freq_command;
332 
333 class StatsCommand : public Command
334 {
335 public:
StatsCommand()336   StatsCommand() : Command ("stats")
337   {
338   }
339   bool
exec(Audio & audio)340   exec (Audio& audio)
341   {
342     const WavSetWave *w = wave();
343 
344     double mag_weight = 0, mag_partials = 0;
345     for (size_t f = 0; f < audio.contents.size(); f++)
346       {
347         const AudioBlock& block = audio.contents[f];
348 
349         double mag = 0;
350         for (size_t i = 0; i < block.freqs.size(); i++)
351           mag += block.mags_f (i);
352 
353         /* give higher weight to louder audio blocks */
354         mag_weight   += mag;
355         mag_partials += mag * block.freqs.size();
356       }
357     mag_partials /= mag_weight;
358     sm_printf ("%d %d %f\n", w ? w->midi_note : -1, int (audio.mix_freq + 0.5), mag_partials);
359     return true;
360   }
361 } stats_command;
362 
363 class SampleCountCommand : public Command
364 {
365 public:
SampleCountCommand()366   SampleCountCommand() : Command ("sample-count")
367   {
368   }
369   bool
exec(Audio & audio)370   exec (Audio& audio)
371   {
372     sm_printf ("sample-count: %d\n", audio.sample_count);
373     return true;
374   }
375 } sample_count_command;
376 
377 class ZeroValuesAtStartCommand : public Command
378 {
379 public:
ZeroValuesAtStartCommand()380   ZeroValuesAtStartCommand() : Command ("zero-values-at-start")
381   {
382   }
383   bool
exec(Audio & audio)384   exec (Audio& audio)
385   {
386     sm_printf ("zero-values-at-start: %d\n", audio.zero_values_at_start);
387     return true;
388   }
389 } zero_values_at_start_command;
390 
391 class AttackCommand : public Command
392 {
393 public:
AttackCommand()394   AttackCommand() : Command ("attack")
395   {
396   }
397   bool
exec(Audio & audio)398   exec (Audio& audio)
399   {
400     sm_printf ("start of attack: %.2f ms\n", audio.attack_start_ms);
401     sm_printf ("  end of attack: %.2f ms\n", audio.attack_end_ms);
402     return true;
403   }
404 } attack_command;
405 
406 class SizeCommand : public Command
407 {
408 public:
SizeCommand()409   SizeCommand() : Command ("size")
410   {
411   }
412   bool
exec(Audio & audio)413   exec (Audio& audio)
414   {
415     size_t phase_bytes = 0, freq_bytes = 0, mag_bytes = 0, debug_samples_bytes = 0, original_fft_bytes = 0, noise_bytes = 0;
416     for (size_t f = 0; f < audio.contents.size(); f++)
417       {
418         phase_bytes += audio.contents[f].phases.size() * sizeof (uint16_t);
419         freq_bytes += audio.contents[f].freqs.size() * sizeof (uint16_t);
420         mag_bytes += audio.contents[f].mags.size() * sizeof (uint16_t);
421         debug_samples_bytes += audio.contents[f].debug_samples.size() * sizeof (float);
422         original_fft_bytes += audio.contents[f].original_fft.size() * sizeof (float);
423         noise_bytes += audio.contents[f].noise.size() * sizeof (uint16_t);
424       }
425     size_t original_samples_bytes = audio.original_samples.size() * sizeof (float);
426 
427     sm_printf ("frequencies  : %zd bytes\n", freq_bytes);
428     sm_printf ("mags         : %zd bytes\n", mag_bytes);
429     sm_printf ("phases       : %zd bytes\n", phase_bytes);
430     sm_printf ("dbgsamples   : %zd bytes\n", debug_samples_bytes);
431     sm_printf ("orig_fft     : %zd bytes\n", original_fft_bytes);
432     sm_printf ("noise        : %zd bytes\n", noise_bytes);
433     sm_printf ("orig_samples : %zd bytes\n", original_samples_bytes);
434 
435     size_t total_bytes = (freq_bytes + mag_bytes + phase_bytes + noise_bytes);
436     sm_printf ("data rate    : %.2f K/s\n", total_bytes / 1024.0 / (audio.sample_count / audio.mix_freq));
437     return true;
438   }
439 } size_command;
440 
441 class LoopParamsCommand : public Command
442 {
443 public:
LoopParamsCommand()444   LoopParamsCommand() : Command ("loop-params")
445   {
446   }
447   bool
exec(Audio & audio)448   exec (Audio& audio)
449   {
450     sm_printf ("frames: %zd\n", audio.contents.size());
451     string loop_str;
452     if (audio.loop_type_to_string (audio.loop_type, loop_str))
453       sm_printf ("loop type: %s\n", loop_str.c_str());
454     else
455       sm_printf ("loop type: *unknown* (%d)\n", audio.loop_type);
456     sm_printf ("loop start: %d\n", audio.loop_start);
457     sm_printf ("loop end: %d\n", audio.loop_end);
458 
459     return true;
460   }
461 } loop_params_command;
462 
463 class NoiseParamsCommand : public Command
464 {
465   int frame;
466 public:
NoiseParamsCommand()467   NoiseParamsCommand() : Command ("noise-params")
468   {
469   }
470   bool
parse_args(vector<string> & args)471   parse_args (vector<string>& args)
472   {
473     if (args.size() == 1)
474       {
475         frame = atoi (args[0].c_str());
476         return true;
477       }
478     return false;
479   }
480   bool
exec(Audio & audio)481   exec (Audio& audio)
482   {
483     for (size_t i = 0; i < audio.contents[frame].noise.size(); i++)
484       sm_printf ("%.7g\n", audio.contents[frame].noise_f (i));
485     return true;
486   }
487   void
usage(bool one_line)488   usage (bool one_line)
489   {
490     printf ("<frame_no>\n");
491   }
492 } noise_params_command;
493 
494 class FrameCommand : public Command
495 {
496   int frame;
497 public:
FrameCommand()498   FrameCommand() : Command ("frame")
499   {
500   }
501   bool
parse_args(vector<string> & args)502   parse_args (vector<string>& args)
503   {
504     if (args.size() == 1)
505       {
506         frame = atoi (args[0].c_str());
507         return true;
508       }
509     return false;
510   }
511   bool
exec(Audio & audio)512   exec (Audio& audio)
513   {
514     int i = frame;
515     size_t frame_size = audio.contents[i].debug_samples.size();
516     vector<double> sines (frame_size);
517     reconstruct (audio.contents[i], sines, audio);
518     for (size_t n = 0; n < audio.contents[i].debug_samples.size(); n++)
519       {
520         double v = audio.contents[i].debug_samples[n];
521         sm_printf ("%zd %f %f %f\n", n, v, sines[n], v - sines[n]);
522       }
523     return true;
524   }
525   void
usage(bool one_line)526   usage (bool one_line)
527   {
528     printf ("<frame_no>\n");
529   }
530 } frame_command;
531 
532 class FrameParamsCommand : public Command
533 {
534   int frame;
535 public:
FrameParamsCommand()536   FrameParamsCommand() : Command ("frame-params")
537   {
538   }
539   bool
parse_args(vector<string> & args)540   parse_args (vector<string>& args)
541   {
542     if (args.size() == 1)
543       {
544         frame = atoi (args[0].c_str());
545         return true;
546       }
547     return false;
548   }
549   void
usage(bool one_line)550   usage (bool one_line)
551   {
552     printf ("<frame_no>\n");
553   }
554   bool
exec(Audio & audio)555   exec (Audio& audio)
556   {
557     int i = frame;
558     for(;;)
559       {
560         int    maxm = 0;
561         size_t maxp = 0;
562         for (size_t partial = 0; partial < audio.contents[i].freqs.size(); partial++)
563           {
564             const int m = audio.contents[i].mags[partial];
565             if (m > maxm)
566               {
567                 maxm = m;
568                 maxp = partial;
569               }
570           }
571         if (maxm > 0)
572           {
573             const double freq = audio.contents[i].freqs_f (maxp) * audio.fundamental_freq;
574             const double mag_factor = audio.contents[i].mags_f (maxp);
575             const double mag_db = db_from_factor (mag_factor, -200);
576 
577             sm_printf ("%f Hz: %f\n", freq, mag_db);
578             audio.contents[i].mags[maxp] = 0;
579           }
580         else
581           {
582             break;
583           }
584       }
585     return true;
586   }
587 } frame_params_command;
588 
589 class TotalNoiseCommand : public Command
590 {
591 public:
TotalNoiseCommand()592   TotalNoiseCommand() : Command ("total-noise")
593   {
594   }
595   bool
exec(Audio & audio)596   exec (Audio& audio)
597   {
598     double total_noise = 0;
599     double peak_noise  = 0;
600 
601     for (size_t f = 0; f < audio.contents.size(); f++)
602       {
603         for (size_t i = 0; i < audio.contents[f].noise.size(); i++)
604           {
605             const double noise = audio.contents[f].noise_f (i);
606 
607             total_noise += noise;
608             peak_noise   = max (peak_noise, noise);
609           }
610       }
611     sm_printf ("total-noise: %.17g\n", total_noise);
612     sm_printf ("peak-noise:  %.17g\n", peak_noise);
613     return true;
614   }
615 } total_noise_command;
616 
617 class NanTestCommand : public Command
618 {
619 public:
NanTestCommand()620   NanTestCommand() : Command ("nan-test")
621   {
622   }
623   bool
exec(Audio & audio)624   exec (Audio& audio)
625   {
626     int nan_ds = 0, nan_fft = 0;
627 
628     for (size_t f = 0; f < audio.contents.size(); f++)
629       {
630         if (find_nan (audio.contents[f].debug_samples))
631           nan_ds++;
632         if (find_nan (audio.contents[f].original_fft))
633           nan_fft++;
634       }
635     sm_printf ("nan-debug-samples: %d\n", nan_ds);
636     sm_printf ("nan-original-fft:  %d\n", nan_fft);
637     return true;
638   }
639 } nan_test_command;
640 
641 class OriginalSamplesCommand : public Command
642 {
643 public:
OriginalSamplesCommand()644   OriginalSamplesCommand() : Command ("original-samples")
645   {
646   }
647   bool
exec(Audio & audio)648   exec (Audio& audio)
649   {
650     for (size_t i = 0; i < audio.original_samples.size(); i++)
651       sm_printf ("%.17g\n", audio.original_samples[i]);
652 
653     return true;
654   }
655 } original_samples_command;
656 
657 class FreqCommand : public Command
658 {
659   double freq_min, freq_max;
660 public:
FreqCommand()661   FreqCommand() : Command ("freq")
662   {
663   }
664   bool
parse_args(vector<string> & args)665   parse_args (vector<string>& args)
666   {
667     if (args.size() == 2)
668       {
669         freq_min = sm_atof (args[0].c_str());
670         freq_max = sm_atof (args[1].c_str());
671         return true;
672       }
673     return false;
674   }
675   bool
exec(Audio & audio)676   exec (Audio& audio)
677   {
678     for (size_t i = 0; i < audio.contents.size(); i++)
679       {
680         const AudioBlock& block = audio.contents[i];
681         for (size_t n = 0; n < block.freqs.size(); n++)
682           {
683             const double freq = block.freqs_f (n) * audio.fundamental_freq;
684 
685             if (freq > freq_min && freq < freq_max)
686               {
687                 sm_printf ("%zd %f %f\n", i, freq, block.mags_f (n));
688               }
689           }
690       }
691     return true;
692   }
693   void
usage(bool one_line)694   usage (bool one_line)
695   {
696     printf ("<freq_min> <freq_max>\n");
697   }
698 } freq_command;
699 
700 class SpectrumCommand : public Command
701 {
702   int frame;
703 public:
SpectrumCommand()704   SpectrumCommand() : Command ("spectrum")
705   {
706   }
707   bool
parse_args(vector<string> & args)708   parse_args (vector<string>& args)
709   {
710     if (args.size() == 1)
711       {
712         frame = atoi (args[0].c_str());
713         return true;
714       }
715     return false;
716   }
717   void
usage(bool one_line)718   usage (bool one_line)
719   {
720     printf ("<frame_no>\n");
721   }
722   bool
exec(Audio & audio)723   exec (Audio& audio)
724   {
725     size_t frame_size = audio.frame_size_ms * audio.mix_freq / 1000;
726     int i = frame;
727     vector<double> spectrum;
728     vector<double> sines (frame_size);
729 
730     reconstruct (audio.contents[i], sines, audio);
731 
732     /* compute block size from frame size (smallest 2^k value >= frame_size) */
733     size_t block_size = 1;
734     while (block_size < frame_size)
735       block_size *= 2;
736 
737     // construct window
738     vector<float> window (block_size);
739     for (guint i = 0; i < window.size(); i++)
740       {
741         if (i < frame_size)
742           window[i] = window_cos (2.0 * i / frame_size - 1.0);
743         else
744           window[i] = 0;
745       }
746 
747     // apply window to reconstructed signal
748     sines.resize (block_size);
749     for (guint i = 0; i < sines.size(); i++)
750       sines[i] *= window[i];
751 
752     // zeropad
753     const int    zeropad  = 4;
754     sines.resize (block_size * zeropad);
755     vector<double> out (block_size * zeropad);
756 
757     float *fft_in = FFT::new_array_float (sines.size());
758     float *fft_out = FFT::new_array_float (sines.size());
759 
760     std::copy (sines.begin(), sines.end(), fft_in);
761     FFT::fftar_float (sines.size(), fft_in, fft_out);
762     std::copy (fft_out, fft_out + sines.size(), out.begin());
763 
764     FFT::free_array_float (fft_out);
765     FFT::free_array_float (fft_in);
766 
767     vector<double> sines_spectrum;
768     for (size_t n = 0; n < audio.contents[i].original_fft.size(); n += 2)
769       {
770         double re = audio.contents[i].original_fft[n];
771         double im = audio.contents[i].original_fft[n + 1];
772         spectrum.push_back (sqrt (re * re + im * im));
773         sines_spectrum.push_back (mag (out[n], out[n+1]));
774       }
775     for (size_t n = 0; n < spectrum.size(); n++)
776       {
777         double s = 0;
778         for (size_t r = 0; r < 1; r++)
779           {
780             if (n + r < spectrum.size())
781               s = std::max (s, spectrum[n + r]);
782             if (r < n)
783               s = std::max (s, spectrum[n - r]);
784           }
785         sm_printf ("%f %f %f\n", n * 0.5 * audio.mix_freq / spectrum.size(), s, sines_spectrum[n]);
786       }
787     return true;
788   }
789 } spectrum_command;
790 
791 class AutoLoopCommand : public Command
792 {
793   double percent;
794 public:
AutoLoopCommand()795   AutoLoopCommand() : Command ("auto-loop")
796   {
797     set_need_save (true);
798   }
799   bool
parse_args(vector<string> & args)800   parse_args (vector<string>& args)
801   {
802     if (args.size() == 1)
803       {
804         percent = sm_atof (args[0].c_str());
805         if (percent < 0 || percent > 100)
806           {
807             fprintf (stderr, "bad loop percentage: %f\n", percent);
808             return false;
809           }
810         return true;
811       }
812     return false;
813   }
814   void
usage(bool one_line)815   usage (bool one_line)
816   {
817     printf ("<percent>\n");
818   }
819   bool
exec(Audio & audio)820   exec (Audio& audio)
821   {
822     int loop_point = audio.contents.size() * percent / 100;
823     if (loop_point < 0)
824       loop_point = 0;
825     if (size_t (loop_point) >= (audio.contents.size() - 1))
826       loop_point = audio.contents.size() - 1;
827     audio.loop_type = Audio::LOOP_FRAME_FORWARD;
828     audio.loop_start = loop_point;
829     audio.loop_end = loop_point;
830     return true;
831   }
832 } auto_loop_command;
833 
834 class TailLoopCommand : public Command
835 {
836 public:
TailLoopCommand()837   TailLoopCommand() : Command ("tail-loop")
838   {
839     set_need_save (true);
840   }
841   bool
exec(Audio & audio)842   exec (Audio& audio)
843   {
844     int loop_point = -1;
845     size_t frame_size = audio.frame_size_ms * audio.mix_freq / 1000;
846     const int frame_step = audio.frame_step_ms * audio.mix_freq / 1000;
847 
848     // we need the largest frame that doesn't include any data beyond the original file end
849     for (size_t i = 0; i < audio.contents.size(); i++)
850       {
851         if (i * frame_step + frame_size < size_t (audio.sample_count))
852           loop_point = i;
853       }
854     audio.loop_type = Audio::LOOP_FRAME_FORWARD;
855     audio.loop_start = loop_point;
856     audio.loop_end = loop_point;
857 
858     return true;
859   }
860 } tail_loop_command;
861 
862 double
freq_ratio_to_cent(double freq_ratio)863 freq_ratio_to_cent (double freq_ratio)
864 {
865   return log (freq_ratio) / log (2) * 1200;
866 }
867 
868 class AutoTuneCommand : public Command
869 {
870 public:
AutoTuneCommand()871   AutoTuneCommand() : Command ("auto-tune")
872   {
873   }
874   bool
exec(Audio & audio)875   exec (Audio& audio)
876   {
877     double tune_factor;
878     if (AudioTool::get_auto_tune_factor (audio, tune_factor))
879       {
880         AudioTool::apply_auto_tune_factor (audio, tune_factor);
881 
882         double input_fundamental_freq = audio.fundamental_freq / tune_factor;
883         sm_printf ("%.17g  %.17g  %.3f cent\n", audio.fundamental_freq, input_fundamental_freq, freq_ratio_to_cent (tune_factor));
884 
885         set_need_save (true);
886       }
887     return true;
888   }
889 } auto_tune_command;
890 
891 class TuneAllFramesCommand : public Command
892 {
893   int fundamental_est_n; /* number of partials to use for fundamental estimation in range [1,3] */
894 public:
TuneAllFramesCommand()895   TuneAllFramesCommand() : Command ("tune-all-frames")
896   {
897   }
898   bool
parse_args(vector<string> & args)899   parse_args (vector<string>& args)
900   {
901     if (args.size() == 1)
902       {
903         fundamental_est_n = atoi (args[0].c_str());
904         assert (fundamental_est_n >= 1 && fundamental_est_n <= 3);
905         return true;
906       }
907     return false;
908   }
909   void
usage(bool one_line)910   usage (bool one_line)
911   {
912     printf ("<n_partials>\n");
913   }
914   void
update_fundamental_estimate(int n,const AudioBlock & block,double freq_min,double freq_max,double & f_out,double & m_out)915   update_fundamental_estimate (int n, const AudioBlock& block, double freq_min, double freq_max, double& f_out, double& m_out)
916   {
917     if (n > fundamental_est_n) /* use this partial for fundamental estimation? */
918       return;
919 
920     double freq = 0, mag = 0;
921 
922     for (size_t p = 0; p < block.mags.size(); p++)
923       {
924         if (block.freqs_f (p) > freq_min && block.freqs_f (p) < freq_max && block.mags_f (p) > mag)
925           {
926             mag = block.mags_f (p);
927             freq = block.freqs_f (p) / n;
928           }
929       }
930     if (mag > 0)
931       {
932         m_out += mag;
933         f_out += freq * mag;
934       }
935   }
936   bool
exec(Audio & audio)937   exec (Audio& audio)
938   {
939     double weighted_tune_factor = 0, mag_weight = 0; /* gather statistics to output average tuning */
940     for (size_t f = 0; f < audio.contents.size(); f++)
941       {
942         AudioBlock& block = audio.contents[f];
943 
944         double est_freq = 0, est_mag = 0;
945 
946         update_fundamental_estimate (1, block, 0.8, 1.25, est_freq, est_mag);
947         update_fundamental_estimate (2, block, 1.5, 2.5,  est_freq, est_mag);
948         update_fundamental_estimate (3, block, 2.5, 3.5,  est_freq, est_mag);
949 
950         if (est_mag > 0)
951           {
952             est_freq /= est_mag;
953             const double tune_factor = 1 / est_freq;
954 
955             /* debug printf ("TAF %f %f\n", audio.fundamental_freq, freq_ratio_to_cent (tune_factor)); */
956 
957             for (size_t p = 0; p < block.freqs.size(); p++)
958               {
959                 const double freq = block.freqs_f (p) * tune_factor;
960                 block.freqs[p] = sm_freq2ifreq (freq);
961                 set_need_save (true);
962 
963                 /* assume orthogonal waves -> mags can be added */
964                 weighted_tune_factor += block.mags_f (p) * tune_factor;
965                 mag_weight += block.mags_f (p);
966               }
967           }
968       }
969 
970     /* we can't give the exact tuning for all frames, so we compute a crude approximation
971      * for the input frequency and "average" tuning factor, using a higher weight
972      * for louder frames
973      */
974     weighted_tune_factor /= mag_weight;
975     const double input_fundamental_freq = audio.fundamental_freq / weighted_tune_factor;
976     sm_printf ("%.17g  %.17g  %.3f cent\n", audio.fundamental_freq, input_fundamental_freq, freq_ratio_to_cent (weighted_tune_factor));
977     return true;
978   }
979 } tune_all_frames_command;
980 
981 class SmoothTuneCommand : public Command
982 {
983   int fundamental_est_n; /* number of partials to use for fundamental estimation in range [1,3] */
984   double smooth_ms;
985   double smooth_percent;
986 public:
SmoothTuneCommand()987   SmoothTuneCommand() : Command ("smooth-tune")
988   {
989   }
990   bool
parse_args(vector<string> & args)991   parse_args (vector<string>& args)
992   {
993     if (args.size() == 3)
994       {
995         fundamental_est_n = atoi (args[0].c_str());
996         assert (fundamental_est_n >= 1 && fundamental_est_n <= 3);
997         smooth_ms = sm_atof (args[1].c_str());
998         assert (smooth_ms > 10 && smooth_ms < 5000);
999         smooth_percent = sm_atof (args[2].c_str());
1000         assert (smooth_percent > -1 && smooth_percent < 101);
1001         return true;
1002       }
1003     return false;
1004   }
1005   void
usage(bool one_line)1006   usage (bool one_line)
1007   {
1008     printf ("<n_partials> <smooth_ms> <smooth_percent>\n");
1009   }
1010   bool
exec(Audio & audio)1011   exec (Audio& audio)
1012   {
1013     AudioTool::auto_tune_smooth (audio, fundamental_est_n, smooth_ms, smooth_percent);
1014     set_need_save (true);
1015 
1016     return true;
1017   }
1018 } smooth_tune_command;
1019 
1020 static void
normalize_energy(double energy,Audio & audio)1021 normalize_energy (double energy, Audio& audio)
1022 {
1023   const double target_energy = 0.05;
1024   const double norm = sqrt (target_energy / energy);
1025   sm_printf ("avg_energy: %.17g\n", energy);
1026   sm_printf ("norm:       %.17g\n", norm);
1027 
1028   AudioTool::normalize_factor (norm, audio);
1029 }
1030 
1031 class AutoVolumeCommand : public Command
1032 {
1033   double percent;
1034 public:
AutoVolumeCommand()1035   AutoVolumeCommand() : Command ("auto-volume")
1036   {
1037     set_need_save (true);
1038   }
1039   bool
parse_args(vector<string> & args)1040   parse_args (vector<string>& args)
1041   {
1042     if (args.size() == 1)
1043       {
1044         percent = sm_atof (args[0].c_str());
1045         if (percent < 0 || percent > 100)
1046           {
1047             fprintf (stderr, "bad volume percentage: %f\n", percent);
1048             return false;
1049           }
1050         return true;
1051       }
1052     return false;
1053   }
1054   void
usage(bool one_line)1055   usage (bool one_line)
1056   {
1057     printf ("<percent>\n");
1058   }
1059   bool
exec(Audio & audio)1060   exec (Audio& audio)
1061   {
1062     double energy = compute_energy (audio, percent, false);
1063     normalize_energy (energy, audio);
1064     return true;
1065   }
1066 } auto_volume_command;
1067 
1068 class AutoVolumeFromLoopCommand : public Command
1069 {
1070 public:
AutoVolumeFromLoopCommand()1071   AutoVolumeFromLoopCommand() : Command ("auto-volume-from-loop")
1072   {
1073     set_need_save (true);
1074   }
1075   bool
exec(Audio & audio)1076   exec (Audio& audio)
1077   {
1078     double energy = compute_energy (audio, /* dummy */ 50, true);
1079     normalize_energy (energy, audio);
1080     return true;
1081   }
1082 } auto_volume_from_loop_command;
1083 
1084 class GlobalVolumeCommand : public Command
1085 {
1086   double norm_db;
1087 public:
GlobalVolumeCommand()1088   GlobalVolumeCommand() : Command ("global-volume")
1089   {
1090     set_need_save (true);
1091   }
1092   bool
parse_args(vector<string> & args)1093   parse_args (vector<string>& args)
1094   {
1095     if (args.size() == 1)
1096       {
1097         norm_db = sm_atof (args[0].c_str());
1098         return true;
1099       }
1100     return false;
1101   }
1102   void
usage(bool one_line)1103   usage (bool one_line)
1104   {
1105     printf ("<db>\n");
1106   }
1107   bool
exec(Audio & audio)1108   exec (Audio& audio)
1109   {
1110     AudioTool::normalize_factor (db_to_factor (norm_db), audio);
1111     return true;
1112   }
1113 } global_volume_command;
1114 
1115 
1116 class StripCommand : public Command
1117 {
1118 public:
StripCommand()1119   StripCommand() : Command ("strip")
1120   {
1121     set_need_save (true);
1122   }
1123   bool
exec(Audio & audio)1124   exec (Audio& audio)
1125   {
1126     /* it would be nice if we could have options like --keep-samples
1127      * but for now we just do the default thing
1128      */
1129     for (size_t i = 0; i < audio.contents.size(); i++)
1130       {
1131         audio.contents[i].debug_samples.clear();
1132         audio.contents[i].original_fft.clear();
1133       }
1134     audio.original_samples.clear();
1135     return true;
1136   }
1137 } strip_command;
1138 
1139 class StripAllCommand : public Command
1140 {
1141 public:
StripAllCommand()1142   StripAllCommand() : Command ("strip-all")
1143   {
1144     set_need_save (true);
1145   }
1146   bool
exec(Audio & audio)1147   exec (Audio& audio)
1148   {
1149     for (size_t i = 0; i < audio.contents.size(); i++)
1150       {
1151         audio.contents[i].phases.clear();
1152         audio.contents[i].debug_samples.clear();
1153         audio.contents[i].original_fft.clear();
1154       }
1155     int loop_end = -1;
1156     switch (audio.loop_type)
1157       {
1158         case Audio::LOOP_FRAME_FORWARD:
1159         case Audio::LOOP_FRAME_PING_PONG:
1160           /* strip frames */
1161           loop_end = audio.loop_end;
1162           break;
1163 
1164         case Audio::LOOP_NONE:
1165         case Audio::LOOP_TIME_FORWARD:
1166         case Audio::LOOP_TIME_PING_PONG:
1167           /* don't strip frames */
1168           break;
1169       }
1170     if (loop_end >= 0)
1171       {
1172         size_t new_size = loop_end + 1;
1173 
1174         if (new_size < audio.contents.size())
1175           audio.contents.resize (new_size);
1176       }
1177     audio.original_samples.clear();
1178     return true;
1179   }
1180 } strip_all_command;
1181 
1182 class ExtractSMCommand : public Command
1183 {
1184   int note;
1185   string filename;
1186 public:
ExtractSMCommand()1187   ExtractSMCommand() : Command ("extract-sm")
1188   {
1189   }
1190   bool
parse_args(vector<string> & args)1191   parse_args (vector<string>& args)
1192   {
1193     if (args.size() == 2)
1194       {
1195         note = atoi (args[0].c_str());
1196         filename = args[1];
1197         return true;
1198       }
1199     return false;
1200   }
1201   void
usage(bool one_line)1202   usage (bool one_line)
1203   {
1204     printf ("<note> <filename>\n");
1205   }
1206   bool
exec(Audio & audio)1207   exec (Audio& audio)
1208   {
1209     const WavSetWave *w = wave();
1210     if (w && w->midi_note == note)
1211       {
1212         printf ("saving note %d to %s\n", w->midi_note, filename.c_str());
1213         audio.save (filename);
1214       }
1215     return true;
1216   }
1217 } extract_sm_command;
1218 
1219 int
main(int argc,char ** argv)1220 main (int argc, char **argv)
1221 {
1222   Main main (&argc, &argv);
1223 
1224   if (argc < 3)
1225     {
1226       printf ("usage: smtool <sm_file> <mode> [ <mode_specific_args> ]\n");
1227       printf ("\n");
1228       printf ("mode specific args:\n\n");
1229 
1230       for (vector<Command *>::iterator ci = Command::registry()->begin(); ci != Command::registry()->end(); ci++)
1231         {
1232           printf ("  smtool <sm_file> %s ", (*ci)->mode().c_str());
1233           (*ci)->usage (true);
1234         }
1235       return 1;
1236     }
1237 
1238   const string& mode = argv[2];
1239 
1240   /* figure out file type (we support SpectMorph::WavSet and SpectMorph::Audio) */
1241   InFile *file = new InFile (argv[1]);
1242   if (!file->open_ok())
1243     {
1244       fprintf (stderr, "%s: can't open input file: %s\n", argv[0], argv[1]);
1245       exit (1);
1246     }
1247   string file_type = file->file_type();
1248   delete file;
1249 
1250   Audio *audio = NULL;
1251   WavSet *wav_set = NULL;
1252   if (file_type == "SpectMorph::Audio")
1253     {
1254       audio = new Audio;
1255       load_or_die (*audio, argv[1], mode);
1256     }
1257   else if (file_type == "SpectMorph::WavSet")
1258     {
1259       wav_set = new WavSet;
1260       Error error = wav_set->load (argv[1]);
1261       if (error)
1262         {
1263           fprintf (stderr, "smtool: can't load file: %s\n", argv[1]);
1264           return 1;
1265         }
1266     }
1267   else
1268     {
1269       g_printerr ("unknown file_type: %s\n", file_type.c_str());
1270       return 1;
1271     }
1272 
1273   bool need_save = false;
1274   bool found_command = false;
1275 
1276   vector<string> args;
1277   for (int i = 3; i < argc; i++)
1278     args.push_back (argv[i]);
1279 
1280   for (vector<Command *>::iterator ci = Command::registry()->begin(); ci != Command::registry()->end(); ci++)
1281     {
1282       Command *cmd = *ci;
1283       if (cmd->mode() == mode)
1284         {
1285           assert (!found_command);
1286           found_command = true;
1287 
1288           if (!cmd->parse_args (args))
1289             {
1290               printf ("usage: smtool <sm_file> %s ", cmd->mode().c_str());
1291               cmd->usage (true);
1292               return 1;
1293             }
1294           if (audio)
1295             cmd->exec (*audio);
1296           if (wav_set)
1297             {
1298               set<Audio *> done;
1299 
1300               for (vector<WavSetWave>::iterator wi = wav_set->waves.begin(); wi != wav_set->waves.end(); wi++)
1301                 {
1302                   sm_printf ("## midi_note=%d channel=%d velocity_range=%d..%d\n", wi->midi_note, wi->channel,
1303                              wi->velocity_range_min, wi->velocity_range_max);
1304                   if (done.find (wi->audio) != done.end())
1305                     {
1306                       sm_printf ("## ==> skipped (was processed earlier)\n");
1307                     }
1308                   else
1309                     {
1310                       cmd->set_wave (&*wi);
1311                       cmd->exec (*wi->audio);
1312                       done.insert (wi->audio);
1313                     }
1314                 }
1315             }
1316 
1317           need_save = cmd->need_save();
1318         }
1319     }
1320   if (!found_command)
1321     {
1322       g_printerr ("unknown mode: %s\n", mode.c_str());
1323       return 1;
1324     }
1325   if (need_save)
1326     {
1327       if (audio)
1328         {
1329           Error error = audio->save (argv[1]);
1330           if (error)
1331             {
1332               fprintf (stderr, "error saving audio file: %s\n", argv[1]);
1333               return 1;
1334             }
1335         }
1336       if (wav_set)
1337         {
1338           Error error = wav_set->save (argv[1]);
1339           if (error)
1340             {
1341               fprintf (stderr, "error saving wavset file: %s\n", argv[1]);
1342               return 1;
1343             }
1344         }
1345     }
1346   if (wav_set)
1347     {
1348       delete wav_set;
1349       wav_set = 0;
1350     }
1351   if (audio)
1352     {
1353       delete audio;
1354       audio = 0;
1355     }
1356 }
1357