1 /* Silence effect for SoX 2 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001 3 * Major Modifications by Chris Bagwell 06.08.2001 4 * Minor addition by Donnie Smith 13.08.2003 5 * 6 * This effect can delete samples from the start of a sound file 7 * until it sees a specified count of samples exceed a given threshold 8 * (any of the channels). 9 * This effect can also delete samples from the end of a sound file 10 * when it sees a specified count of samples below a given threshold 11 * (all channels). 12 * It may also be used to delete samples anywhere in a sound file. 13 * Thesholds can be given as either a percentage or in decibels. 14 */ 15 16 #include "sox_i.h" 17 18 #include <string.h> 19 20 /* Private data for silence effect. */ 21 22 #define SILENCE_TRIM 0 23 #define SILENCE_TRIM_FLUSH 1 24 #define SILENCE_COPY 2 25 #define SILENCE_COPY_FLUSH 3 26 #define SILENCE_STOP 4 27 28 typedef struct { 29 char start; 30 int start_periods; 31 char *start_duration_str; 32 size_t start_duration; 33 double start_threshold; 34 char start_unit; /* "d" for decibels or "%" for percent. */ 35 int restart; 36 37 sox_sample_t *start_holdoff; 38 size_t start_holdoff_offset; 39 size_t start_holdoff_end; 40 int start_found_periods; 41 42 char stop; 43 int stop_periods; 44 char *stop_duration_str; 45 size_t stop_duration; 46 double stop_threshold; 47 char stop_unit; 48 49 sox_sample_t *stop_holdoff; 50 size_t stop_holdoff_offset; 51 size_t stop_holdoff_end; 52 int stop_found_periods; 53 54 double *window; 55 double *window_current; 56 double *window_end; 57 size_t window_size; 58 double rms_sum; 59 60 char leave_silence; 61 62 /* State Machine */ 63 char mode; 64 } priv_t; 65 66 static void clear_rms(sox_effect_t * effp) 67 68 { 69 priv_t * silence = (priv_t *) effp->priv; 70 71 memset(silence->window, 0, 72 silence->window_size * sizeof(double)); 73 74 silence->window_current = silence->window; 75 silence->window_end = silence->window + silence->window_size; 76 silence->rms_sum = 0; 77 } 78 79 static int sox_silence_getopts(sox_effect_t * effp, int argc, char **argv) 80 { 81 priv_t * silence = (priv_t *) effp->priv; 82 int parse_count; 83 uint64_t temp; 84 const char *n; 85 --argc, ++argv; 86 87 /* check for option switches */ 88 silence->leave_silence = sox_false; 89 if (argc > 0) 90 { 91 if (!strcmp("-l", *argv)) { 92 argc--; argv++; 93 silence->leave_silence = sox_true; 94 } 95 } 96 97 if (argc < 1) 98 return lsx_usage(effp); 99 100 /* Parse data related to trimming front side */ 101 silence->start = sox_false; 102 if (sscanf(argv[0], "%d", &silence->start_periods) != 1) 103 return lsx_usage(effp); 104 if (silence->start_periods < 0) 105 { 106 lsx_fail("Periods must not be negative"); 107 return(SOX_EOF); 108 } 109 argv++; 110 argc--; 111 112 if (silence->start_periods > 0) 113 { 114 silence->start = sox_true; 115 if (argc < 2) 116 return lsx_usage(effp); 117 118 /* We do not know the sample rate so we can not fully 119 * parse the duration info yet. So save argument off 120 * for future processing. 121 */ 122 silence->start_duration_str = lsx_strdup(argv[0]); 123 /* Perform a fake parse to do error checking */ 124 n = lsx_parsesamples(0.,silence->start_duration_str,&temp,'s'); 125 if (!n || *n) 126 return lsx_usage(effp); 127 silence->start_duration = temp; 128 129 parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold, 130 &silence->start_unit); 131 if (parse_count < 1) 132 return lsx_usage(effp); 133 else if (parse_count < 2) 134 silence->start_unit = '%'; 135 136 argv++; argv++; 137 argc--; argc--; 138 } 139 140 silence->stop = sox_false; 141 /* Parse data needed for trimming of backside */ 142 if (argc > 0) 143 { 144 if (argc < 3) 145 return lsx_usage(effp); 146 if (sscanf(argv[0], "%d", &silence->stop_periods) != 1) 147 return lsx_usage(effp); 148 if (silence->stop_periods < 0) 149 { 150 silence->stop_periods = -silence->stop_periods; 151 silence->restart = 1; 152 } 153 else 154 silence->restart = 0; 155 silence->stop = sox_true; 156 argv++; 157 argc--; 158 159 /* We do not know the sample rate so we can not fully 160 * parse the duration info yet. So save argument off 161 * for future processing. 162 */ 163 silence->stop_duration_str = lsx_strdup(argv[0]); 164 /* Perform a fake parse to do error checking */ 165 n = lsx_parsesamples(0.,silence->stop_duration_str,&temp,'s'); 166 if (!n || *n) 167 return lsx_usage(effp); 168 silence->stop_duration = temp; 169 170 parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold, 171 &silence->stop_unit); 172 if (parse_count < 1) 173 return lsx_usage(effp); 174 else if (parse_count < 2) 175 silence->stop_unit = '%'; 176 177 argv++; argv++; 178 argc--; argc--; 179 } 180 181 /* Error checking */ 182 if (silence->start) 183 { 184 if ((silence->start_unit != '%') && (silence->start_unit != 'd')) 185 { 186 lsx_fail("Invalid unit specified"); 187 return lsx_usage(effp); 188 } 189 if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0) 190 || (silence->start_threshold > 100.0))) 191 { 192 lsx_fail("silence threshold should be between 0.0 and 100.0 %%"); 193 return (SOX_EOF); 194 } 195 if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0)) 196 { 197 lsx_fail("silence threshold should be less than 0.0 dB"); 198 return(SOX_EOF); 199 } 200 } 201 202 if (silence->stop) 203 { 204 if ((silence->stop_unit != '%') && (silence->stop_unit != 'd')) 205 { 206 lsx_fail("Invalid unit specified"); 207 return(SOX_EOF); 208 } 209 if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) || 210 (silence->stop_threshold > 100.0))) 211 { 212 lsx_fail("silence threshold should be between 0.0 and 100.0 %%"); 213 return (SOX_EOF); 214 } 215 if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0)) 216 { 217 lsx_fail("silence threshold should be less than 0.0 dB"); 218 return(SOX_EOF); 219 } 220 } 221 return(SOX_SUCCESS); 222 } 223 224 static int sox_silence_start(sox_effect_t * effp) 225 { 226 priv_t *silence = (priv_t *)effp->priv; 227 uint64_t temp; 228 229 /* When you want to remove silence, small window sizes are 230 * better or else RMS will look like non-silence at 231 * aburpt changes from load to silence. 232 */ 233 silence->window_size = (effp->in_signal.rate / 50) * 234 effp->in_signal.channels; 235 silence->window = lsx_malloc(silence->window_size * sizeof(double)); 236 237 clear_rms(effp); 238 239 /* Now that we know sample rate, reparse duration. */ 240 if (silence->start) 241 { 242 if (lsx_parsesamples(effp->in_signal.rate, silence->start_duration_str, 243 &temp, 's') == NULL) 244 return lsx_usage(effp); 245 silence->start_duration = temp * effp->in_signal.channels; 246 } 247 if (silence->stop) 248 { 249 if (lsx_parsesamples(effp->in_signal.rate,silence->stop_duration_str, 250 &temp,'s') == NULL) 251 return lsx_usage(effp); 252 silence->stop_duration = temp * effp->in_signal.channels; 253 } 254 255 if (silence->start) 256 silence->mode = SILENCE_TRIM; 257 else 258 silence->mode = SILENCE_COPY; 259 260 silence->start_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->start_duration); 261 silence->start_holdoff_offset = 0; 262 silence->start_holdoff_end = 0; 263 silence->start_found_periods = 0; 264 265 silence->stop_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->stop_duration); 266 silence->stop_holdoff_offset = 0; 267 silence->stop_holdoff_end = 0; 268 silence->stop_found_periods = 0; 269 270 effp->out_signal.length = SOX_UNKNOWN_LEN; /* depends on input data */ 271 272 return(SOX_SUCCESS); 273 } 274 275 static sox_bool aboveThreshold(sox_effect_t const * effp, 276 sox_sample_t value /* >= 0 */, double threshold, int unit) 277 { 278 /* When scaling low bit data, noise values got scaled way up */ 279 /* Only consider the original bits when looking for silence */ 280 sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision)); 281 282 double scaled_value = (double)masked_value / SOX_SAMPLE_MAX; 283 284 if (unit == '%') 285 scaled_value *= 100; 286 else if (unit == 'd') 287 scaled_value = linear_to_dB(scaled_value); 288 289 return scaled_value > threshold; 290 } 291 292 static sox_sample_t compute_rms(sox_effect_t * effp, sox_sample_t sample) 293 { 294 priv_t * silence = (priv_t *) effp->priv; 295 double new_sum; 296 sox_sample_t rms; 297 298 new_sum = silence->rms_sum; 299 new_sum -= *silence->window_current; 300 new_sum += ((double)sample * (double)sample); 301 302 rms = sqrt(new_sum / silence->window_size); 303 304 return (rms); 305 } 306 307 static void update_rms(sox_effect_t * effp, sox_sample_t sample) 308 { 309 priv_t * silence = (priv_t *) effp->priv; 310 311 silence->rms_sum -= *silence->window_current; 312 *silence->window_current = ((double)sample * (double)sample); 313 silence->rms_sum += *silence->window_current; 314 315 silence->window_current++; 316 if (silence->window_current >= silence->window_end) 317 silence->window_current = silence->window; 318 } 319 320 /* Process signed long samples from ibuf to obuf. */ 321 /* Return number of samples processed in isamp and osamp. */ 322 static int sox_silence_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf, 323 size_t *isamp, size_t *osamp) 324 { 325 priv_t * silence = (priv_t *) effp->priv; 326 int threshold; 327 size_t i, j; 328 size_t nrOfTicks, /* sometimes wide, sometimes non-wide samples */ 329 nrOfInSamplesRead, nrOfOutSamplesWritten; /* non-wide samples */ 330 331 nrOfInSamplesRead = 0; 332 nrOfOutSamplesWritten = 0; 333 334 switch (silence->mode) 335 { 336 case SILENCE_TRIM: 337 /* Reads and discards all input data until it detects a 338 * sample that is above the specified threshold. Turns on 339 * copy mode when detected. 340 * Need to make sure and copy input in groups of "channels" to 341 * prevent getting buffers out of sync. 342 * nrOfTicks counts wide samples here. 343 */ 344 silence_trim: 345 nrOfTicks = min((*isamp-nrOfInSamplesRead), 346 (*osamp-nrOfOutSamplesWritten)) / 347 effp->in_signal.channels; 348 for(i = 0; i < nrOfTicks; i++) 349 { 350 threshold = 0; 351 for (j = 0; j < effp->in_signal.channels; j++) 352 { 353 threshold |= aboveThreshold(effp, 354 compute_rms(effp, ibuf[j]), 355 silence->start_threshold, 356 silence->start_unit); 357 } 358 359 if (threshold) 360 { 361 /* Add to holdoff buffer */ 362 for (j = 0; j < effp->in_signal.channels; j++) 363 { 364 update_rms(effp, *ibuf); 365 silence->start_holdoff[ 366 silence->start_holdoff_end++] = *ibuf++; 367 nrOfInSamplesRead++; 368 } 369 370 if (silence->start_holdoff_end >= 371 silence->start_duration) 372 { 373 if (++silence->start_found_periods >= 374 silence->start_periods) 375 { 376 silence->mode = SILENCE_TRIM_FLUSH; 377 goto silence_trim_flush; 378 } 379 /* Trash holdoff buffer since its not 380 * needed. Start looking again. 381 */ 382 silence->start_holdoff_offset = 0; 383 silence->start_holdoff_end = 0; 384 } 385 } 386 else /* !above Threshold */ 387 { 388 silence->start_holdoff_end = 0; 389 for (j = 0; j < effp->in_signal.channels; j++) 390 { 391 update_rms(effp, ibuf[j]); 392 } 393 ibuf += effp->in_signal.channels; 394 nrOfInSamplesRead += effp->in_signal.channels; 395 } 396 } /* for nrOfTicks */ 397 break; 398 399 case SILENCE_TRIM_FLUSH: 400 /* nrOfTicks counts non-wide samples here. */ 401 silence_trim_flush: 402 nrOfTicks = min((silence->start_holdoff_end - 403 silence->start_holdoff_offset), 404 (*osamp-nrOfOutSamplesWritten)); 405 nrOfTicks -= nrOfTicks % effp->in_signal.channels; 406 for(i = 0; i < nrOfTicks; i++) 407 { 408 *obuf++ = silence->start_holdoff[silence->start_holdoff_offset++]; 409 nrOfOutSamplesWritten++; 410 } 411 412 /* If fully drained holdoff then switch to copy mode */ 413 if (silence->start_holdoff_offset == silence->start_holdoff_end) 414 { 415 silence->start_holdoff_offset = 0; 416 silence->start_holdoff_end = 0; 417 silence->mode = SILENCE_COPY; 418 goto silence_copy; 419 } 420 break; 421 422 case SILENCE_COPY: 423 /* Attempts to copy samples into output buffer. 424 * 425 * Case B: 426 * If not looking for silence to terminate copy then 427 * blindly copy data into output buffer. 428 * 429 * Case A: 430 * 431 * Case 1a: 432 * If previous silence was detect then see if input sample is 433 * above threshold. If found then flush out hold off buffer 434 * and copy over to output buffer. 435 * 436 * Case 1b: 437 * If no previous silence detect then see if input sample 438 * is above threshold. If found then copy directly 439 * to output buffer. 440 * 441 * Case 2: 442 * If not above threshold then silence is detect so 443 * store in hold off buffer and do not write to output 444 * buffer. Even though it wasn't put in output 445 * buffer, inform user that input was consumed. 446 * 447 * If hold off buffer is full after this then stop 448 * copying data and discard data in hold off buffer. 449 * 450 * Special leave_silence logic: 451 * 452 * During this mode, go ahead and copy input 453 * samples to output buffer instead of holdoff buffer 454 * Then also short ciruit any flushes that would occur 455 * when non-silence is detect since samples were already 456 * copied. This has the effect of always leaving 457 * holdoff[] amount of silence but deleting any 458 * beyond that amount. 459 * 460 * nrOfTicks counts wide samples here. 461 */ 462 silence_copy: 463 nrOfTicks = min((*isamp-nrOfInSamplesRead), 464 (*osamp-nrOfOutSamplesWritten)) / 465 effp->in_signal.channels; 466 if (silence->stop) 467 { 468 /* Case A */ 469 for(i = 0; i < nrOfTicks; i++) 470 { 471 threshold = 1; 472 for (j = 0; j < effp->in_signal.channels; j++) 473 { 474 threshold &= aboveThreshold(effp, 475 compute_rms(effp, ibuf[j]), 476 silence->stop_threshold, 477 silence->stop_unit); 478 } 479 480 /* Case 1a 481 * If above threshold, check to see if we where holding 482 * off previously. If so then flush this buffer. 483 * We haven't incremented any pointers yet so nothing 484 * is lost. 485 * 486 * If user wants to leave_silence, then we 487 * were already copying the data and so no 488 * need to flush the old data. Just resume 489 * copying as if we were not holding off. 490 */ 491 if (threshold && silence->stop_holdoff_end 492 && !silence->leave_silence) 493 { 494 silence->mode = SILENCE_COPY_FLUSH; 495 goto silence_copy_flush; 496 } 497 /* Case 1b */ 498 else if (threshold) 499 { 500 /* Not holding off so copy into output buffer */ 501 for (j = 0; j < effp->in_signal.channels; j++) 502 { 503 update_rms(effp, *ibuf); 504 *obuf++ = *ibuf++; 505 nrOfInSamplesRead++; 506 nrOfOutSamplesWritten++; 507 } 508 } 509 /* Case 2 */ 510 else if (!threshold) 511 { 512 /* Add to holdoff buffer */ 513 for (j = 0; j < effp->in_signal.channels; j++) 514 { 515 update_rms(effp, *ibuf); 516 if (silence->leave_silence) { 517 *obuf++ = *ibuf; 518 nrOfOutSamplesWritten++; 519 } 520 silence->stop_holdoff[ 521 silence->stop_holdoff_end++] = *ibuf++; 522 nrOfInSamplesRead++; 523 } 524 525 /* Check if holdoff buffer is greater than duration 526 */ 527 if (silence->stop_holdoff_end >= 528 silence->stop_duration) 529 { 530 /* Increment found counter and see if this 531 * is the last period. If so then exit. 532 */ 533 if (++silence->stop_found_periods >= 534 silence->stop_periods) 535 { 536 silence->stop_holdoff_offset = 0; 537 silence->stop_holdoff_end = 0; 538 if (!silence->restart) 539 { 540 *isamp = nrOfInSamplesRead; 541 *osamp = nrOfOutSamplesWritten; 542 silence->mode = SILENCE_STOP; 543 /* Return SOX_EOF since no more processing */ 544 return (SOX_EOF); 545 } 546 else 547 { 548 silence->stop_found_periods = 0; 549 silence->start_found_periods = 0; 550 silence->start_holdoff_offset = 0; 551 silence->start_holdoff_end = 0; 552 clear_rms(effp); 553 silence->mode = SILENCE_TRIM; 554 555 goto silence_trim; 556 } 557 } 558 else 559 { 560 /* Flush this buffer and start 561 * looking again. 562 */ 563 silence->mode = SILENCE_COPY_FLUSH; 564 goto silence_copy_flush; 565 } 566 break; 567 } /* Filled holdoff buffer */ 568 } /* Detected silence */ 569 } /* For # of samples */ 570 } /* Trimming off backend */ 571 else /* !(silence->stop) */ 572 { 573 /* Case B */ 574 memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks* 575 effp->in_signal.channels); 576 nrOfInSamplesRead += (nrOfTicks*effp->in_signal.channels); 577 nrOfOutSamplesWritten += (nrOfTicks*effp->in_signal.channels); 578 } 579 break; 580 581 case SILENCE_COPY_FLUSH: 582 /* nrOfTicks counts non-wide samples here. */ 583 silence_copy_flush: 584 nrOfTicks = min((silence->stop_holdoff_end - 585 silence->stop_holdoff_offset), 586 (*osamp-nrOfOutSamplesWritten)); 587 nrOfTicks -= nrOfTicks % effp->in_signal.channels; 588 589 for(i = 0; i < nrOfTicks; i++) 590 { 591 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++]; 592 nrOfOutSamplesWritten++; 593 } 594 595 /* If fully drained holdoff then return to copy mode */ 596 if (silence->stop_holdoff_offset == silence->stop_holdoff_end) 597 { 598 silence->stop_holdoff_offset = 0; 599 silence->stop_holdoff_end = 0; 600 silence->mode = SILENCE_COPY; 601 goto silence_copy; 602 } 603 break; 604 605 case SILENCE_STOP: 606 /* This code can't be reached. */ 607 nrOfInSamplesRead = *isamp; 608 break; 609 } 610 611 *isamp = nrOfInSamplesRead; 612 *osamp = nrOfOutSamplesWritten; 613 614 return (SOX_SUCCESS); 615 } 616 617 static int sox_silence_drain(sox_effect_t * effp, sox_sample_t *obuf, size_t *osamp) 618 { 619 priv_t * silence = (priv_t *) effp->priv; 620 size_t i; 621 size_t nrOfTicks, nrOfOutSamplesWritten = 0; /* non-wide samples */ 622 623 /* Only if in flush mode will there be possible samples to write 624 * out during drain() call. 625 */ 626 if (silence->mode == SILENCE_COPY_FLUSH || 627 silence->mode == SILENCE_COPY) 628 { 629 nrOfTicks = min((silence->stop_holdoff_end - 630 silence->stop_holdoff_offset), *osamp); 631 nrOfTicks -= nrOfTicks % effp->in_signal.channels; 632 for(i = 0; i < nrOfTicks; i++) 633 { 634 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++]; 635 nrOfOutSamplesWritten++; 636 } 637 638 /* If fully drained holdoff then stop */ 639 if (silence->stop_holdoff_offset == silence->stop_holdoff_end) 640 { 641 silence->stop_holdoff_offset = 0; 642 silence->stop_holdoff_end = 0; 643 silence->mode = SILENCE_STOP; 644 } 645 } 646 647 *osamp = nrOfOutSamplesWritten; 648 if (silence->mode == SILENCE_STOP || *osamp == 0) 649 return SOX_EOF; 650 else 651 return SOX_SUCCESS; 652 } 653 654 static int sox_silence_stop(sox_effect_t * effp) 655 { 656 priv_t * silence = (priv_t *) effp->priv; 657 658 free(silence->window); 659 free(silence->start_holdoff); 660 free(silence->stop_holdoff); 661 662 return(SOX_SUCCESS); 663 } 664 665 static int lsx_kill(sox_effect_t * effp) 666 { 667 priv_t * silence = (priv_t *) effp->priv; 668 669 free(silence->start_duration_str); 670 free(silence->stop_duration_str); 671 672 return SOX_SUCCESS; 673 } 674 675 static sox_effect_handler_t sox_silence_effect = { 676 "silence", 677 "[ -l ] above_periods [ duration threshold[d|%] ] [ below_periods duration threshold[d|%] ]", 678 SOX_EFF_MCHAN | SOX_EFF_MODIFY | SOX_EFF_LENGTH, 679 sox_silence_getopts, 680 sox_silence_start, 681 sox_silence_flow, 682 sox_silence_drain, 683 sox_silence_stop, 684 lsx_kill, sizeof(priv_t) 685 }; 686 687 const sox_effect_handler_t *lsx_silence_effect_fn(void) 688 { 689 return &sox_silence_effect; 690 } 691