1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3 4"""aubio command line tool 5 6This file was written by Paul Brossier <piem@aubio.org> and is released under 7the GNU/GPL v3. 8 9Note: this script is mostly about parsing command line arguments. For more 10readable code examples, check out the `python/demos` folder.""" 11 12import sys 13import argparse 14import warnings 15import aubio 16 17def aubio_parser(): 18 epilog = 'use "%(prog)s <command> --help" for more info about each command' 19 parser = argparse.ArgumentParser(epilog=epilog) 20 parser.add_argument('-V', '--version', help="show version", 21 action="store_true", dest="show_version") 22 23 subparsers = parser.add_subparsers(title='commands', dest='command', 24 parser_class= AubioArgumentParser, 25 metavar="") 26 27 parser_add_subcommand_help(subparsers) 28 29 parser_add_subcommand_onset(subparsers) 30 parser_add_subcommand_pitch(subparsers) 31 parser_add_subcommand_beat(subparsers) 32 parser_add_subcommand_tempo(subparsers) 33 parser_add_subcommand_notes(subparsers) 34 parser_add_subcommand_mfcc(subparsers) 35 parser_add_subcommand_melbands(subparsers) 36 parser_add_subcommand_quiet(subparsers) 37 parser_add_subcommand_cut(subparsers) 38 39 return parser 40 41def parser_add_subcommand_help(subparsers): 42 # global help subcommand 43 subparsers.add_parser('help', 44 help='show help message', 45 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 46 47def parser_add_subcommand_onset(subparsers): 48 # onset subcommand 49 subparser = subparsers.add_parser('onset', 50 help='estimate time of onsets (beginning of sound event)', 51 formatter_class = argparse.ArgumentDefaultsHelpFormatter) 52 subparser.add_input() 53 subparser.add_buf_hop_size() 54 helpstr = "onset novelty function" 55 helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>" 56 subparser.add_method(helpstr=helpstr) 57 subparser.add_threshold() 58 subparser.add_silence() 59 subparser.add_minioi() 60 subparser.add_time_format() 61 subparser.add_verbose_help() 62 subparser.set_defaults(process=process_onset) 63 64def parser_add_subcommand_pitch(subparsers): 65 # pitch subcommand 66 subparser = subparsers.add_parser('pitch', 67 help='estimate fundamental frequency (monophonic)') 68 subparser.add_input() 69 subparser.add_buf_hop_size(buf_size=2048) 70 helpstr = "pitch detection method <default|yinfft|yin|mcomb|fcomb|schmitt>" 71 subparser.add_method(helpstr=helpstr) 72 subparser.add_threshold() 73 subparser.add_pitch_unit() 74 subparser.add_silence() 75 subparser.add_time_format() 76 subparser.add_verbose_help() 77 subparser.set_defaults(process=process_pitch) 78 79def parser_add_subcommand_beat(subparsers): 80 # beat subcommand 81 subparser = subparsers.add_parser('beat', 82 help='estimate location of beats') 83 subparser.add_input() 84 subparser.add_buf_hop_size(buf_size=1024, hop_size=512) 85 subparser.add_time_format() 86 subparser.add_verbose_help() 87 subparser.set_defaults(process=process_beat) 88 89def parser_add_subcommand_tempo(subparsers): 90 # tempo subcommand 91 subparser = subparsers.add_parser('tempo', 92 help='estimate overall tempo in bpm') 93 subparser.add_input() 94 subparser.add_buf_hop_size(buf_size=1024, hop_size=512) 95 subparser.add_time_format() 96 subparser.add_verbose_help() 97 subparser.set_defaults(process=process_tempo) 98 99def parser_add_subcommand_notes(subparsers): 100 # notes subcommand 101 subparser = subparsers.add_parser('notes', 102 help='estimate midi-like notes (monophonic)') 103 subparser.add_input() 104 subparser.add_buf_hop_size() 105 subparser.add_silence() 106 subparser.add_release_drop() 107 subparser.add_time_format() 108 subparser.add_verbose_help() 109 subparser.set_defaults(process=process_notes) 110 111def parser_add_subcommand_mfcc(subparsers): 112 # mfcc subcommand 113 subparser = subparsers.add_parser('mfcc', 114 help='extract Mel-Frequency Cepstrum Coefficients') 115 subparser.add_input() 116 subparser.add_buf_hop_size() 117 subparser.add_time_format() 118 subparser.add_verbose_help() 119 subparser.set_defaults(process=process_mfcc) 120 121def parser_add_subcommand_melbands(subparsers): 122 # melbands subcommand 123 subparser = subparsers.add_parser('melbands', 124 help='extract energies in Mel-frequency bands') 125 subparser.add_input() 126 subparser.add_buf_hop_size() 127 subparser.add_time_format() 128 subparser.add_verbose_help() 129 subparser.set_defaults(process=process_melbands) 130 131def parser_add_subcommand_quiet(subparsers): 132 # quiet subcommand 133 subparser = subparsers.add_parser('quiet', 134 help='extract timestamps of quiet and loud regions') 135 subparser.add_input() 136 subparser.add_hop_size() 137 subparser.add_silence() 138 subparser.add_time_format() 139 subparser.add_verbose_help() 140 subparser.set_defaults(process=process_quiet) 141 142def parser_add_subcommand_cut(subparsers): 143 # cut subcommand 144 subparser = subparsers.add_parser('cut', 145 help='slice at timestamps') 146 subparser.add_input() 147 helpstr = "onset novelty function" 148 helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>" 149 subparser.add_method(helpstr=helpstr) 150 subparser.add_buf_hop_size() 151 subparser.add_silence() 152 subparser.add_threshold(default=0.3) 153 subparser.add_minioi() 154 subparser.add_slicer_options() 155 subparser.add_time_format() 156 subparser.add_verbose_help() 157 subparser.set_defaults(process=process_cut) 158 159class AubioArgumentParser(argparse.ArgumentParser): 160 161 def add_input(self): 162 self.add_argument("source_uri", default=None, nargs='?', 163 help="input sound file to analyse", metavar = "<source_uri>") 164 self.add_argument("-i", "--input", dest = "source_uri2", 165 help="input sound file to analyse", metavar = "<source_uri>") 166 self.add_argument("-r", "--samplerate", 167 metavar = "<freq>", type=int, 168 action="store", dest="samplerate", default=0, 169 help="samplerate at which the file should be represented") 170 171 def add_verbose_help(self): 172 self.add_argument("-v", "--verbose", 173 action="count", dest="verbose", default=1, 174 help="make lots of noise [default]") 175 self.add_argument("-q", "--quiet", 176 action="store_const", dest="verbose", const=0, 177 help="be quiet") 178 179 def add_buf_hop_size(self, buf_size=512, hop_size=256): 180 self.add_buf_size(buf_size=buf_size) 181 self.add_hop_size(hop_size=hop_size) 182 183 def add_buf_size(self, buf_size=512): 184 self.add_argument("-B", "--bufsize", 185 action="store", dest="buf_size", default=buf_size, 186 metavar = "<size>", type=int, 187 help="buffer size [default=%d]" % buf_size) 188 189 def add_hop_size(self, hop_size=256): 190 self.add_argument("-H", "--hopsize", 191 metavar = "<size>", type=int, 192 action="store", dest="hop_size", default=hop_size, 193 help="overlap size [default=%d]" % hop_size) 194 195 def add_method(self, method='default', helpstr='method'): 196 self.add_argument("-m", "--method", 197 metavar = "<method>", type=str, 198 action="store", dest="method", default=method, 199 help="%s [default=%s]" % (helpstr, method)) 200 201 def add_threshold(self, default=None): 202 self.add_argument("-t", "--threshold", 203 metavar = "<threshold>", type=float, 204 action="store", dest="threshold", default=default, 205 help="threshold [default=%s]" % default) 206 207 def add_silence(self): 208 self.add_argument("-s", "--silence", 209 metavar = "<value>", type=float, 210 action="store", dest="silence", default=-70, 211 help="silence threshold") 212 213 def add_release_drop(self): 214 self.add_argument("-d", "--release-drop", 215 metavar = "<value>", type=float, 216 action="store", dest="release_drop", default=10, 217 help="release drop threshold") 218 219 def add_minioi(self, default="12ms"): 220 self.add_argument("-M", "--minioi", 221 metavar = "<value>", type=str, 222 action="store", dest="minioi", default=default, 223 help="minimum Inter-Onset Interval [default=%s]" % default) 224 225 def add_pitch_unit(self, default="Hz"): 226 help_str = "frequency unit, should be one of Hz, midi, bin, cent" 227 help_str += " [default=%s]" % default 228 self.add_argument("-u", "--pitch-unit", 229 metavar = "<value>", type=str, 230 action="store", dest="pitch_unit", default=default, 231 help=help_str) 232 233 def add_time_format(self): 234 helpstr = "select time values output format (samples, ms, seconds)" 235 helpstr += " [default=seconds]" 236 self.add_argument("-T", "--time-format", 237 metavar='format', 238 dest="time_format", 239 default=None, 240 help=helpstr) 241 242 def add_slicer_options(self): 243 self.add_argument("-o", "--output", type = str, 244 metavar = "<outputdir>", 245 action="store", dest="output_directory", default=None, 246 help="specify path where slices of the original file should" 247 " be created") 248 self.add_argument("--cut-until-nsamples", type = int, 249 metavar = "<samples>", 250 action = "store", dest = "cut_until_nsamples", default = None, 251 help="how many extra samples should be added at the end of" 252 " each slice") 253 self.add_argument("--cut-every-nslices", type = int, 254 metavar = "<samples>", 255 action = "store", dest = "cut_every_nslices", default = None, 256 help="how many slices should be groupped together at each cut") 257 self.add_argument("--cut-until-nslices", type = int, 258 metavar = "<slices>", 259 action = "store", dest = "cut_until_nslices", default = None, 260 help="how many extra slices should be added at the end of" 261 " each slice") 262 self.add_argument("--create-first", 263 action = "store_true", dest = "create_first", default = False, 264 help="always include first slice") 265 266# some utilities 267 268def samples2seconds(n_frames, samplerate): 269 return "%f\t" % (n_frames / float(samplerate)) 270 271def samples2milliseconds(n_frames, samplerate): 272 return "%f\t" % (1000. * n_frames / float(samplerate)) 273 274def samples2samples(n_frames, _samplerate): 275 return "%d\t" % n_frames 276 277def timefunc(mode): 278 if mode is None or mode == 'seconds' or mode == 's': 279 return samples2seconds 280 elif mode == 'ms' or mode == 'milliseconds': 281 return samples2milliseconds 282 elif mode == 'samples': 283 return samples2samples 284 else: 285 raise ValueError("invalid time format '%s'" % mode) 286 287# definition of processing classes 288 289class default_process(object): 290 def __init__(self, args): 291 if 'time_format' in args: 292 self.time2string = timefunc(args.time_format) 293 if args.verbose > 2 and hasattr(self, 'options'): 294 name = type(self).__name__.split('_')[1] 295 optstr = ' '.join(['running', name, 'with options', 296 repr(self.options), '\n']) 297 sys.stderr.write(optstr) 298 def flush(self, frames_read, samplerate): 299 # optionally called at the end of process 300 pass 301 302 def parse_options(self, args, valid_opts): 303 # get any valid options found in a dictionnary of arguments 304 options = {k: v for k, v in vars(args).items() if k in valid_opts} 305 self.options = options 306 307 def remap_pvoc_options(self, options): 308 # FIXME: we need to remap buf_size to win_s, hop_size to hop_s 309 # adjust python/ext/py-phasevoc.c to understand buf_size/hop_size 310 if 'buf_size' in options: 311 options['win_s'] = options['buf_size'] 312 del options['buf_size'] 313 if 'hop_size' in options: 314 options['hop_s'] = options['hop_size'] 315 del options['hop_size'] 316 self.options = options 317 318class process_onset(default_process): 319 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] 320 def __init__(self, args): 321 self.parse_options(args, self.valid_opts) 322 self.onset = aubio.onset(**self.options) 323 if args.threshold is not None: 324 self.onset.set_threshold(args.threshold) 325 if args.minioi: 326 if args.minioi.endswith('ms'): 327 self.onset.set_minioi_ms(float(args.minioi[:-2])) 328 elif args.minioi.endswith('s'): 329 self.onset.set_minioi_s(float(args.minioi[:-1])) 330 else: 331 self.onset.set_minioi(int(args.minioi)) 332 if args.silence: 333 self.onset.set_silence(args.silence) 334 super(process_onset, self).__init__(args) 335 def __call__(self, block): 336 return self.onset(block) 337 def repr_res(self, res, _frames_read, samplerate): 338 if res[0] != 0: 339 outstr = self.time2string(self.onset.get_last(), samplerate) 340 sys.stdout.write(outstr + '\n') 341 342class process_pitch(default_process): 343 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] 344 def __init__(self, args): 345 self.parse_options(args, self.valid_opts) 346 self.pitch = aubio.pitch(**self.options) 347 if args.pitch_unit is not None: 348 self.pitch.set_unit(args.pitch_unit) 349 if args.threshold is not None: 350 self.pitch.set_tolerance(args.threshold) 351 if args.silence is not None: 352 self.pitch.set_silence(args.silence) 353 super(process_pitch, self).__init__(args) 354 def __call__(self, block): 355 return self.pitch(block) 356 def repr_res(self, res, frames_read, samplerate): 357 fmt_out = self.time2string(frames_read, samplerate) 358 sys.stdout.write(fmt_out + "%.6f\n" % res[0]) 359 360class process_beat(default_process): 361 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] 362 def __init__(self, args): 363 self.parse_options(args, self.valid_opts) 364 self.tempo = aubio.tempo(**self.options) 365 super(process_beat, self).__init__(args) 366 def __call__(self, block): 367 return self.tempo(block) 368 def repr_res(self, res, _frames_read, samplerate): 369 if res[0] != 0: 370 outstr = self.time2string(self.tempo.get_last(), samplerate) 371 sys.stdout.write(outstr + '\n') 372 373class process_tempo(process_beat): 374 def __init__(self, args): 375 super(process_tempo, self).__init__(args) 376 self.beat_locations = [] 377 def repr_res(self, res, _frames_read, samplerate): 378 if res[0] != 0: 379 self.beat_locations.append(self.tempo.get_last_s()) 380 def flush(self, frames_read, samplerate): 381 import numpy as np 382 if len(self.beat_locations) < 2: 383 outstr = "unknown bpm" 384 else: 385 bpms = 60. / np.diff(self.beat_locations) 386 median_bpm = np.mean(bpms) 387 if len(self.beat_locations) < 10: 388 outstr = "%.2f bpm (uncertain)" % median_bpm 389 else: 390 outstr = "%.2f bpm" % median_bpm 391 sys.stdout.write(outstr + '\n') 392 393class process_notes(default_process): 394 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] 395 def __init__(self, args): 396 self.parse_options(args, self.valid_opts) 397 self.notes = aubio.notes(**self.options) 398 if args.silence is not None: 399 self.notes.set_silence(args.silence) 400 if args.release_drop is not None: 401 self.notes.set_release_drop(args.release_drop) 402 super(process_notes, self).__init__(args) 403 def __call__(self, block): 404 return self.notes(block) 405 def repr_res(self, res, frames_read, samplerate): 406 if res[2] != 0: # note off 407 fmt_out = self.time2string(frames_read, samplerate) 408 sys.stdout.write(fmt_out + '\n') 409 if res[0] != 0: # note on 410 lastmidi = res[0] 411 fmt_out = "%f\t" % lastmidi 412 fmt_out += self.time2string(frames_read, samplerate) 413 sys.stdout.write(fmt_out) # + '\t') 414 def flush(self, frames_read, samplerate): 415 eof = self.time2string(frames_read, samplerate) 416 sys.stdout.write(eof + '\n') 417 418class process_mfcc(default_process): 419 def __init__(self, args): 420 valid_opts1 = ['hop_size', 'buf_size'] 421 self.parse_options(args, valid_opts1) 422 self.remap_pvoc_options(self.options) 423 self.pv = aubio.pvoc(**self.options) 424 425 valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate'] 426 self.parse_options(args, valid_opts2) 427 self.mfcc = aubio.mfcc(**self.options) 428 429 # remember all options 430 self.parse_options(args, list(set(valid_opts1 + valid_opts2))) 431 432 super(process_mfcc, self).__init__(args) 433 434 def __call__(self, block): 435 fftgrain = self.pv(block) 436 return self.mfcc(fftgrain) 437 def repr_res(self, res, frames_read, samplerate): 438 fmt_out = self.time2string(frames_read, samplerate) 439 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()]) 440 sys.stdout.write(fmt_out + '\n') 441 442class process_melbands(default_process): 443 def __init__(self, args): 444 self.args = args 445 valid_opts = ['hop_size', 'buf_size'] 446 self.parse_options(args, valid_opts) 447 self.remap_pvoc_options(self.options) 448 self.pv = aubio.pvoc(**self.options) 449 450 valid_opts = ['buf_size', 'n_filters'] 451 self.parse_options(args, valid_opts) 452 self.remap_pvoc_options(self.options) 453 self.filterbank = aubio.filterbank(**self.options) 454 self.filterbank.set_mel_coeffs_slaney(args.samplerate) 455 456 super(process_melbands, self).__init__(args) 457 def __call__(self, block): 458 fftgrain = self.pv(block) 459 return self.filterbank(fftgrain) 460 def repr_res(self, res, frames_read, samplerate): 461 fmt_out = self.time2string(frames_read, samplerate) 462 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()]) 463 sys.stdout.write(fmt_out + '\n') 464 465class process_quiet(default_process): 466 def __init__(self, args): 467 self.args = args 468 valid_opts = ['hop_size', 'silence'] 469 self.parse_options(args, valid_opts) 470 self.wassilence = 1 471 472 if args.silence is not None: 473 self.silence = args.silence 474 super(process_quiet, self).__init__(args) 475 476 def __call__(self, block): 477 if aubio.silence_detection(block, self.silence) == 1: 478 if self.wassilence != 1: 479 self.wassilence = 1 480 return 2 # newly found silence 481 return 1 # silence again 482 else: 483 if self.wassilence != 0: 484 self.wassilence = 0 485 return -1 # newly found noise 486 return 0 # noise again 487 488 def repr_res(self, res, frames_read, samplerate): 489 fmt_out = None 490 if res == -1: 491 fmt_out = "NOISY: " 492 if res == 2: 493 fmt_out = "QUIET: " 494 if fmt_out is not None: 495 fmt_out += self.time2string(frames_read, samplerate) 496 sys.stdout.write(fmt_out + '\n') 497 498class process_cut(process_onset): 499 def __init__(self, args): 500 super(process_cut, self).__init__(args) 501 self.slices = [] 502 self.options = args 503 504 def __call__(self, block): 505 ret = super(process_cut, self).__call__(block) 506 if ret: 507 self.slices.append(self.onset.get_last()) 508 return ret 509 510 def flush(self, frames_read, samplerate): 511 _cut_slice(self.options, self.slices) 512 duration = float(frames_read) / float(samplerate) 513 base_info = '%(source_file)s' % \ 514 {'source_file': self.options.source_uri} 515 base_info += ' (total %(duration).2fs at %(samplerate)dHz)\n' % \ 516 {'duration': duration, 'samplerate': samplerate} 517 info = "created %d slices from " % len(self.slices) 518 info += base_info 519 sys.stderr.write(info) 520 521def _cut_slice(options, timestamps): 522 # cutting pass 523 nstamps = len(timestamps) 524 if nstamps > 0: 525 # generate output files 526 timestamps_end = None 527 if options.cut_every_nslices: 528 timestamps = timestamps[::options.cut_every_nslices] 529 nstamps = len(timestamps) 530 if options.cut_until_nslices and options.cut_until_nsamples: 531 msg = "using cut_until_nslices, but cut_until_nsamples is set" 532 warnings.warn(msg) 533 if options.cut_until_nsamples: 534 lag = options.cut_until_nsamples 535 timestamps_end = [t + lag for t in timestamps[1:]] 536 timestamps_end += [1e120] 537 if options.cut_until_nslices: 538 slice_lag = options.cut_until_nslices 539 timestamps_end = [t for t in timestamps[1 + slice_lag:]] 540 timestamps_end += [1e120] * (options.cut_until_nslices + 1) 541 aubio.slice_source_at_stamps(options.source_uri, 542 timestamps, timestamps_end = timestamps_end, 543 output_dir = options.output_directory, 544 samplerate = options.samplerate, 545 create_first = options.create_first) 546 547def main(): 548 parser = aubio_parser() 549 if sys.version_info[0] != 3: 550 # on py2, create a dummy ArgumentParser to workaround the 551 # optional subcommand issue. See https://bugs.python.org/issue9253 552 # This ensures that: 553 # - version string is shown when only '-V' is passed 554 # - help is printed if '-V' is passed with any other argument 555 # - any other argument get forwarded to the real parser 556 parser_root = argparse.ArgumentParser(add_help=False) 557 parser_root.add_argument('-V', '--version', help="show version", 558 action="store_true", dest="show_version") 559 args, extras = parser_root.parse_known_args() 560 if not args.show_version: # no -V, forward to parser 561 args = parser.parse_args(extras, namespace=args) 562 elif len(extras) != 0: # -V with other arguments, print help 563 parser.print_help() 564 sys.exit(1) 565 else: # in py3, we can simply use parser directly 566 args = parser.parse_args() 567 if 'show_version' in args and args.show_version: 568 sys.stdout.write('aubio version ' + aubio.version + '\n') 569 sys.exit(0) 570 elif 'verbose' in args and args.verbose > 3: 571 sys.stderr.write('aubio version ' + aubio.version + '\n') 572 if 'command' not in args or args.command is None \ 573 or args.command in ['help']: 574 # no command given, print help and return 1 575 parser.print_help() 576 if args.command and args.command in ['help']: 577 sys.exit(0) 578 else: 579 sys.exit(1) 580 elif not args.source_uri and not args.source_uri2: 581 sys.stderr.write("Error: a source is required\n") 582 parser.print_help() 583 sys.exit(1) 584 elif args.source_uri2 is not None: 585 args.source_uri = args.source_uri2 586 try: 587 # open source_uri 588 with aubio.source(args.source_uri, hop_size=args.hop_size, 589 samplerate=args.samplerate) as a_source: 590 # always update args.samplerate to native samplerate, in case 591 # source was opened with args.samplerate=0 592 args.samplerate = a_source.samplerate 593 # create the processor for this subcommand 594 processor = args.process(args) 595 frames_read = 0 596 while True: 597 # read new block from source 598 block, read = a_source() 599 # execute processor on this block 600 res = processor(block) 601 # print results for this block 602 if args.verbose > 0: 603 processor.repr_res(res, frames_read, a_source.samplerate) 604 # increment total number of frames read 605 frames_read += read 606 # exit loop at end of file 607 if read < a_source.hop_size: 608 break 609 # flush the processor if needed 610 processor.flush(frames_read, a_source.samplerate) 611 if args.verbose > 1: 612 fmt_string = "read {:.2f}s" 613 fmt_string += " ({:d} samples in {:d} blocks of {:d})" 614 fmt_string += " from {:s} at {:d}Hz\n" 615 sys.stderr.write(fmt_string.format( 616 frames_read / float(a_source.samplerate), 617 frames_read, 618 frames_read // a_source.hop_size + 1, 619 a_source.hop_size, 620 a_source.uri, 621 a_source.samplerate)) 622 except KeyboardInterrupt: 623 sys.exit(1) 624