1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 2 3 /* 4 Rubber Band Library 5 An audio time-stretching and pitch-shifting library. 6 Copyright 2007-2021 Particular Programs Ltd. 7 8 This program is free software; you can redistribute it and/or 9 modify it under the terms of the GNU General Public License as 10 published by the Free Software Foundation; either version 2 of the 11 License, or (at your option) any later version. See the file 12 COPYING included with this distribution for more information. 13 14 Alternatively, if you have a valid commercial licence for the 15 Rubber Band Library obtained by agreement with the copyright 16 holders, you may redistribute and/or modify it under the terms 17 described in that licence. 18 19 If you wish to distribute code using the Rubber Band Library 20 under terms other than those of the GNU General Public License, 21 you must obtain a valid commercial licence before doing so. 22 */ 23 24 #include "StretcherImpl.h" 25 26 #include "audiocurves/PercussiveAudioCurve.h" 27 #include "audiocurves/HighFrequencyAudioCurve.h" 28 #include "audiocurves/SpectralDifferenceAudioCurve.h" 29 #include "audiocurves/SilentAudioCurve.h" 30 #include "audiocurves/ConstantAudioCurve.h" 31 #include "audiocurves/CompoundAudioCurve.h" 32 33 #include "dsp/Resampler.h" 34 35 #include "StretchCalculator.h" 36 #include "StretcherChannelData.h" 37 38 #include "base/Profiler.h" 39 40 #include "system/sysutils.h" 41 42 #include <cassert> 43 #include <cmath> 44 #include <set> 45 #include <map> 46 #include <algorithm> 47 48 using namespace RubberBand; 49 50 using std::cerr; 51 using std::endl; 52 using std::vector; 53 using std::map; 54 using std::set; 55 using std::max; 56 using std::min; 57 58 namespace RubberBand { 59 60 const size_t 61 RubberBandStretcher::Impl::m_defaultIncrement = 256; 62 63 const size_t 64 RubberBandStretcher::Impl::m_defaultFftSize = 2048; 65 66 int 67 RubberBandStretcher::Impl::m_defaultDebugLevel = 0; 68 69 static bool _initialised = false; 70 71 RubberBandStretcher::Impl::Impl(size_t sampleRate, 72 size_t channels, 73 Options options, 74 double initialTimeRatio, 75 double initialPitchScale) : 76 m_sampleRate(sampleRate), 77 m_channels(channels), 78 m_timeRatio(initialTimeRatio), 79 m_pitchScale(initialPitchScale), 80 m_fftSize(m_defaultFftSize), 81 m_aWindowSize(m_defaultFftSize), 82 m_sWindowSize(m_defaultFftSize), 83 m_increment(m_defaultIncrement), 84 m_outbufSize(m_defaultFftSize * 2), 85 m_maxProcessSize(m_defaultFftSize), 86 m_expectedInputDuration(0), 87 #ifndef NO_THREADING 88 m_threaded(false), 89 #endif 90 m_realtime(false), 91 m_options(options), 92 m_debugLevel(m_defaultDebugLevel), 93 m_mode(JustCreated), 94 m_awindow(0), 95 m_afilter(0), 96 m_swindow(0), 97 m_studyFFT(0), 98 #ifndef NO_THREADING 99 m_spaceAvailable("space"), 100 #endif 101 m_inputDuration(0), 102 m_detectorType(CompoundAudioCurve::CompoundDetector), 103 m_silentHistory(0), 104 m_lastProcessOutputIncrements(16), 105 m_lastProcessPhaseResetDf(16), 106 m_emergencyScavenger(10, 4), 107 m_phaseResetAudioCurve(0), 108 m_stretchAudioCurve(0), 109 m_silentAudioCurve(0), 110 m_stretchCalculator(0), 111 m_freq0(600), 112 m_freq1(1200), 113 m_freq2(12000), 114 m_baseFftSize(m_defaultFftSize) 115 { 116 if (!_initialised) { 117 system_specific_initialise(); 118 _initialised = true; 119 } 120 121 if (m_debugLevel > 0) { 122 cerr << "RubberBandStretcher::Impl::Impl: rate = " << m_sampleRate << ", options = " << options << endl; 123 } 124 125 // Window size will vary according to the audio sample rate, but 126 // we don't let it drop below the 48k default 127 m_rateMultiple = float(m_sampleRate) / 48000.f; 128 // if (m_rateMultiple < 1.f) m_rateMultiple = 1.f; 129 m_baseFftSize = roundUp(int(m_defaultFftSize * m_rateMultiple)); 130 131 if ((options & OptionWindowShort) || (options & OptionWindowLong)) { 132 if ((options & OptionWindowShort) && (options & OptionWindowLong)) { 133 cerr << "RubberBandStretcher::Impl::Impl: Cannot specify OptionWindowLong and OptionWindowShort together; falling back to OptionWindowStandard" << endl; 134 } else if (options & OptionWindowShort) { 135 m_baseFftSize = m_baseFftSize / 2; 136 if (m_debugLevel > 0) { 137 cerr << "setting baseFftSize to " << m_baseFftSize << endl; 138 } 139 } else if (options & OptionWindowLong) { 140 m_baseFftSize = m_baseFftSize * 2; 141 if (m_debugLevel > 0) { 142 cerr << "setting baseFftSize to " << m_baseFftSize << endl; 143 } 144 } 145 m_fftSize = m_baseFftSize; 146 m_aWindowSize = m_baseFftSize; 147 m_sWindowSize = m_baseFftSize; 148 m_outbufSize = m_sWindowSize * 2; 149 m_maxProcessSize = m_aWindowSize; 150 } 151 152 if (m_options & OptionProcessRealTime) { 153 154 m_realtime = true; 155 156 if (!(m_options & OptionStretchPrecise)) { 157 m_options |= OptionStretchPrecise; 158 } 159 } 160 161 #ifndef NO_THREADING 162 if (m_channels > 1) { 163 164 m_threaded = true; 165 166 if (m_realtime) { 167 m_threaded = false; 168 } else if (m_options & OptionThreadingNever) { 169 m_threaded = false; 170 } else if (!(m_options & OptionThreadingAlways) && 171 !system_is_multiprocessor()) { 172 m_threaded = false; 173 } 174 175 if (m_threaded && m_debugLevel > 0) { 176 cerr << "Going multithreaded..." << endl; 177 } 178 } 179 #endif 180 181 configure(); 182 } 183 184 RubberBandStretcher::Impl::~Impl() 185 { 186 #ifndef NO_THREADING 187 if (m_threaded) { 188 MutexLocker locker(&m_threadSetMutex); 189 for (set<ProcessThread *>::iterator i = m_threadSet.begin(); 190 i != m_threadSet.end(); ++i) { 191 if (m_debugLevel > 0) { 192 cerr << "RubberBandStretcher::~RubberBandStretcher: joining (channel " << *i << ")" << endl; 193 } 194 (*i)->abandon(); 195 (*i)->wait(); 196 delete *i; 197 } 198 } 199 #endif 200 201 for (size_t c = 0; c < m_channels; ++c) { 202 delete m_channelData[c]; 203 } 204 205 delete m_phaseResetAudioCurve; 206 delete m_stretchAudioCurve; 207 delete m_silentAudioCurve; 208 delete m_stretchCalculator; 209 delete m_studyFFT; 210 211 for (map<size_t, Window<float> *>::iterator i = m_windows.begin(); 212 i != m_windows.end(); ++i) { 213 delete i->second; 214 } 215 for (map<size_t, SincWindow<float> *>::iterator i = m_sincs.begin(); 216 i != m_sincs.end(); ++i) { 217 delete i->second; 218 } 219 } 220 221 void 222 RubberBandStretcher::Impl::reset() 223 { 224 #ifndef NO_THREADING 225 if (m_threaded) { 226 m_threadSetMutex.lock(); 227 for (set<ProcessThread *>::iterator i = m_threadSet.begin(); 228 i != m_threadSet.end(); ++i) { 229 if (m_debugLevel > 0) { 230 cerr << "RubberBandStretcher::~RubberBandStretcher: joining (channel " << *i << ")" << endl; 231 } 232 (*i)->abandon(); 233 (*i)->wait(); 234 delete *i; 235 } 236 m_threadSet.clear(); 237 } 238 #endif 239 240 m_emergencyScavenger.scavenge(); 241 242 if (m_stretchCalculator) { 243 m_stretchCalculator->reset(); 244 } 245 246 for (size_t c = 0; c < m_channels; ++c) { 247 m_channelData[c]->reset(); 248 } 249 250 m_mode = JustCreated; 251 if (m_phaseResetAudioCurve) m_phaseResetAudioCurve->reset(); 252 if (m_stretchAudioCurve) m_stretchAudioCurve->reset(); 253 if (m_silentAudioCurve) m_silentAudioCurve->reset(); 254 m_inputDuration = 0; 255 m_silentHistory = 0; 256 257 #ifndef NO_THREADING 258 if (m_threaded) m_threadSetMutex.unlock(); 259 #endif 260 261 reconfigure(); 262 } 263 264 void 265 RubberBandStretcher::Impl::setTimeRatio(double ratio) 266 { 267 if (!m_realtime) { 268 if (m_mode == Studying || m_mode == Processing) { 269 cerr << "RubberBandStretcher::Impl::setTimeRatio: Cannot set ratio while studying or processing in non-RT mode" << endl; 270 return; 271 } 272 } 273 274 if (ratio == m_timeRatio) return; 275 m_timeRatio = ratio; 276 277 reconfigure(); 278 } 279 280 void 281 RubberBandStretcher::Impl::setPitchScale(double fs) 282 { 283 if (!m_realtime) { 284 if (m_mode == Studying || m_mode == Processing) { 285 cerr << "RubberBandStretcher::Impl::setPitchScale: Cannot set ratio while studying or processing in non-RT mode" << endl; 286 return; 287 } 288 } 289 290 if (fs == m_pitchScale) return; 291 292 bool was1 = (m_pitchScale == 1.f); 293 bool rbs = resampleBeforeStretching(); 294 295 m_pitchScale = fs; 296 297 reconfigure(); 298 299 if (!(m_options & OptionPitchHighConsistency) && 300 (was1 || resampleBeforeStretching() != rbs) && 301 m_pitchScale != 1.f) { 302 303 // resampling mode has changed 304 for (int c = 0; c < int(m_channels); ++c) { 305 if (m_channelData[c]->resampler) { 306 m_channelData[c]->resampler->reset(); 307 } 308 } 309 } 310 } 311 312 double 313 RubberBandStretcher::Impl::getTimeRatio() const 314 { 315 return m_timeRatio; 316 } 317 318 double 319 RubberBandStretcher::Impl::getPitchScale() const 320 { 321 return m_pitchScale; 322 } 323 324 void 325 RubberBandStretcher::Impl::setExpectedInputDuration(size_t samples) 326 { 327 if (samples == m_expectedInputDuration) return; 328 m_expectedInputDuration = samples; 329 330 reconfigure(); 331 } 332 333 void 334 RubberBandStretcher::Impl::setMaxProcessSize(size_t samples) 335 { 336 if (samples <= m_maxProcessSize) return; 337 m_maxProcessSize = samples; 338 339 reconfigure(); 340 } 341 342 void 343 RubberBandStretcher::Impl::setKeyFrameMap(const std::map<size_t, size_t> & 344 mapping) 345 { 346 if (m_realtime) { 347 cerr << "RubberBandStretcher::Impl::setKeyFrameMap: Cannot specify key frame map in RT mode" << endl; 348 return; 349 } 350 if (m_mode == Processing) { 351 cerr << "RubberBandStretcher::Impl::setKeyFrameMap: Cannot specify key frame map after process() has begun" << endl; 352 return; 353 } 354 355 if (m_stretchCalculator) { 356 m_stretchCalculator->setKeyFrameMap(mapping); 357 } 358 } 359 360 float 361 RubberBandStretcher::Impl::getFrequencyCutoff(int n) const 362 { 363 switch (n) { 364 case 0: return m_freq0; 365 case 1: return m_freq1; 366 case 2: return m_freq2; 367 } 368 return 0.f; 369 } 370 371 void 372 RubberBandStretcher::Impl::setFrequencyCutoff(int n, float f) 373 { 374 switch (n) { 375 case 0: m_freq0 = f; break; 376 case 1: m_freq1 = f; break; 377 case 2: m_freq2 = f; break; 378 } 379 } 380 381 double 382 RubberBandStretcher::Impl::getEffectiveRatio() const 383 { 384 // Returns the ratio that the internal time stretcher needs to 385 // achieve, not the resulting duration ratio of the output (which 386 // is simply m_timeRatio). 387 388 // A frequency shift is achieved using an additional time shift, 389 // followed by resampling back to the original time shift to 390 // change the pitch. Note that the resulting frequency change is 391 // fixed, as it is effected by the resampler -- in contrast to 392 // time shifting, which is variable aiming to place the majority 393 // of the stretch or squash in low-interest regions of audio. 394 395 return m_timeRatio * m_pitchScale; 396 } 397 398 size_t 399 RubberBandStretcher::Impl::roundUp(size_t value) 400 { 401 if (!(value & (value - 1))) return value; 402 int bits = 0; 403 while (value) { ++bits; value >>= 1; } 404 value = 1 << bits; 405 return value; 406 } 407 408 void 409 RubberBandStretcher::Impl::calculateSizes() 410 { 411 size_t inputIncrement = m_defaultIncrement; 412 size_t windowSize = m_baseFftSize; 413 size_t outputIncrement; 414 415 if (m_pitchScale <= 0.0) { 416 // This special case is likelier than one might hope, because 417 // of naive initialisations in programs that set it from a 418 // variable 419 std::cerr << "RubberBandStretcher: WARNING: Pitch scale must be greater than zero!\nResetting it from " << m_pitchScale << " to the default of 1.0: no pitch change will occur" << std::endl; 420 m_pitchScale = 1.0; 421 } 422 if (m_timeRatio <= 0.0) { 423 // Likewise 424 std::cerr << "RubberBandStretcher: WARNING: Time ratio must be greater than zero!\nResetting it from " << m_timeRatio << " to the default of 1.0: no time stretch will occur" << std::endl; 425 m_timeRatio = 1.0; 426 } 427 428 double r = getEffectiveRatio(); 429 430 if (m_realtime) { 431 432 if (r < 1) { 433 434 bool rsb = (m_pitchScale < 1.0 && !resampleBeforeStretching()); 435 float windowIncrRatio = 4.5; 436 if (r == 1.0) windowIncrRatio = 4; 437 else if (rsb) windowIncrRatio = 4.5; 438 else windowIncrRatio = 6; 439 440 inputIncrement = int(windowSize / windowIncrRatio); 441 outputIncrement = int(floor(inputIncrement * r)); 442 443 // Very long stretch or very low pitch shift 444 if (outputIncrement < m_defaultIncrement / 4) { 445 if (outputIncrement < 1) outputIncrement = 1; 446 while (outputIncrement < m_defaultIncrement / 4 && 447 windowSize < m_baseFftSize * 4) { 448 outputIncrement *= 2; 449 inputIncrement = lrint(ceil(outputIncrement / r)); 450 windowSize = roundUp(lrint(ceil(inputIncrement * windowIncrRatio))); 451 } 452 } 453 454 } else { 455 456 bool rsb = (m_pitchScale > 1.0 && resampleBeforeStretching()); 457 float windowIncrRatio = 4.5; 458 if (r == 1.0) windowIncrRatio = 4; 459 else if (rsb) windowIncrRatio = 4.5; 460 else windowIncrRatio = 8; 461 462 outputIncrement = int(windowSize / windowIncrRatio); 463 inputIncrement = int(outputIncrement / r); 464 while (outputIncrement > 1024 * m_rateMultiple && 465 inputIncrement > 1) { 466 outputIncrement /= 2; 467 inputIncrement = int(outputIncrement / r); 468 } 469 while (inputIncrement < 1) { 470 outputIncrement *= 2; 471 inputIncrement = int(outputIncrement / r); 472 } 473 size_t minwin = roundUp(lrint(outputIncrement * windowIncrRatio)); 474 if (windowSize < minwin) windowSize = minwin; 475 476 if (rsb) { 477 // cerr << "adjusting window size from " << windowSize; 478 size_t newWindowSize = roundUp(lrint(windowSize / m_pitchScale)); 479 if (newWindowSize < 512) newWindowSize = 512; 480 size_t div = windowSize / newWindowSize; 481 if (inputIncrement > div && outputIncrement > div) { 482 inputIncrement /= div; 483 outputIncrement /= div; 484 windowSize /= div; 485 } 486 // cerr << " to " << windowSize << " (inputIncrement = " << inputIncrement << ", outputIncrement = " << outputIncrement << ")" << endl; 487 } 488 } 489 490 } else { 491 492 if (r < 1) { 493 inputIncrement = windowSize / 4; 494 while (inputIncrement >= 512) inputIncrement /= 2; 495 outputIncrement = int(floor(inputIncrement * r)); 496 if (outputIncrement < 1) { 497 outputIncrement = 1; 498 inputIncrement = roundUp(lrint(ceil(outputIncrement / r))); 499 windowSize = inputIncrement * 4; 500 } 501 } else { 502 outputIncrement = windowSize / 6; 503 inputIncrement = int(outputIncrement / r); 504 while (outputIncrement > 1024 && inputIncrement > 1) { 505 outputIncrement /= 2; 506 inputIncrement = int(outputIncrement / r); 507 } 508 while (inputIncrement < 1) { 509 outputIncrement *= 2; 510 inputIncrement = int(outputIncrement / r); 511 } 512 windowSize = std::max(windowSize, roundUp(outputIncrement * 6)); 513 if (r > 5) while (windowSize < 8192) windowSize *= 2; 514 } 515 } 516 517 if (m_expectedInputDuration > 0) { 518 while (inputIncrement * 4 > m_expectedInputDuration && 519 inputIncrement > 1) { 520 inputIncrement /= 2; 521 } 522 } 523 524 // m_fftSize can be almost anything, but it can't be greater than 525 // 4 * m_baseFftSize unless ratio is less than 1/1024. 526 527 m_fftSize = windowSize; 528 529 if (m_options & OptionSmoothingOn) { 530 m_aWindowSize = windowSize * 2; 531 m_sWindowSize = windowSize * 2; 532 } else { 533 m_aWindowSize = windowSize; 534 m_sWindowSize = windowSize; 535 } 536 537 m_increment = inputIncrement; 538 539 // When squashing, the greatest theoretically possible output 540 // increment is the input increment. When stretching adaptively 541 // the sky's the limit in principle, but we expect 542 // StretchCalculator to restrict itself to using no more than 543 // twice the basic output increment (i.e. input increment times 544 // ratio) for any chunk. 545 546 if (m_debugLevel > 0) { 547 cerr << "calculateSizes: time ratio = " << m_timeRatio << ", pitch scale = " << m_pitchScale << ", effective ratio = " << getEffectiveRatio() << endl; 548 cerr << "calculateSizes: analysis window size = " << m_aWindowSize << ", synthesis window size = " << m_sWindowSize << ", fft size = " << m_fftSize << ", increment = " << m_increment << " (approx output increment = " << int(lrint(m_increment * getEffectiveRatio())) << ")" << endl; 549 } 550 551 if (std::max(m_aWindowSize, m_sWindowSize) > m_maxProcessSize) { 552 m_maxProcessSize = std::max(m_aWindowSize, m_sWindowSize); 553 } 554 555 m_outbufSize = 556 size_t 557 (ceil(max 558 (m_maxProcessSize / m_pitchScale, 559 m_maxProcessSize * 2 * (m_timeRatio > 1.f ? m_timeRatio : 1.f)))); 560 561 if (m_realtime) { 562 // This headroom is so as to try to avoid reallocation when 563 // the pitch scale changes 564 m_outbufSize = m_outbufSize * 16; 565 } else { 566 #ifndef NO_THREADING 567 if (m_threaded) { 568 // This headroom is to permit the processing threads to 569 // run ahead of the buffer output drainage; the exact 570 // amount of headroom is a question of tuning rather than 571 // results 572 m_outbufSize = m_outbufSize * 16; 573 } 574 #endif 575 } 576 577 if (m_debugLevel > 0) { 578 cerr << "calculateSizes: outbuf size = " << m_outbufSize << endl; 579 } 580 } 581 582 void 583 RubberBandStretcher::Impl::configure() 584 { 585 if (m_debugLevel > 0) { 586 std::cerr << "configure[" << this << "]: realtime = " << m_realtime << ", pitch scale = " 587 << m_pitchScale << ", channels = " << m_channels << std::endl; 588 } 589 590 size_t prevFftSize = m_fftSize; 591 size_t prevAWindowSize = m_aWindowSize; 592 size_t prevSWindowSize = m_sWindowSize; 593 size_t prevOutbufSize = m_outbufSize; 594 if (m_windows.empty()) { 595 prevFftSize = 0; 596 prevAWindowSize = 0; 597 prevSWindowSize = 0; 598 prevOutbufSize = 0; 599 } 600 601 calculateSizes(); 602 603 bool fftSizeChanged = (prevFftSize != m_fftSize); 604 bool windowSizeChanged = ((prevAWindowSize != m_aWindowSize) || 605 (prevSWindowSize != m_sWindowSize)); 606 bool outbufSizeChanged = (prevOutbufSize != m_outbufSize); 607 608 // This function may be called at any time in non-RT mode, after a 609 // parameter has changed. It shouldn't be legal to call it after 610 // processing has already begun. 611 612 // This function is only called once (on construction) in RT 613 // mode. After that reconfigure() does the work in a hopefully 614 // RT-safe way. 615 616 set<size_t> windowSizes; 617 if (m_realtime) { 618 windowSizes.insert(m_baseFftSize); 619 windowSizes.insert(m_baseFftSize / 2); 620 windowSizes.insert(m_baseFftSize * 2); 621 // windowSizes.insert(m_baseFftSize * 4); 622 } 623 windowSizes.insert(m_fftSize); 624 windowSizes.insert(m_aWindowSize); 625 windowSizes.insert(m_sWindowSize); 626 627 if (windowSizeChanged) { 628 629 for (set<size_t>::const_iterator i = windowSizes.begin(); 630 i != windowSizes.end(); ++i) { 631 if (m_windows.find(*i) == m_windows.end()) { 632 m_windows[*i] = new Window<float>(HanningWindow, *i); 633 } 634 if (m_sincs.find(*i) == m_sincs.end()) { 635 m_sincs[*i] = new SincWindow<float>(*i, *i); 636 } 637 } 638 m_awindow = m_windows[m_aWindowSize]; 639 m_afilter = m_sincs[m_aWindowSize]; 640 m_swindow = m_windows[m_sWindowSize]; 641 642 if (m_debugLevel > 0) { 643 cerr << "Window area: " << m_awindow->getArea() << "; synthesis window area: " << m_swindow->getArea() << endl; 644 } 645 } 646 647 if (windowSizeChanged || outbufSizeChanged) { 648 649 for (size_t c = 0; c < m_channelData.size(); ++c) { 650 delete m_channelData[c]; 651 } 652 m_channelData.clear(); 653 654 for (size_t c = 0; c < m_channels; ++c) { 655 m_channelData.push_back 656 (new ChannelData(windowSizes, 657 std::max(m_aWindowSize, m_sWindowSize), 658 m_fftSize, 659 m_outbufSize)); 660 } 661 } 662 663 if (!m_realtime && fftSizeChanged) { 664 delete m_studyFFT; 665 m_studyFFT = new FFT(m_fftSize, m_debugLevel); 666 m_studyFFT->initFloat(); 667 } 668 669 if (m_pitchScale != 1.0 || 670 (m_options & OptionPitchHighConsistency) || 671 m_realtime) { 672 673 for (size_t c = 0; c < m_channels; ++c) { 674 675 if (m_channelData[c]->resampler) continue; 676 677 Resampler::Parameters params; 678 params.quality = Resampler::FastestTolerable; 679 680 if (m_realtime) { 681 params.dynamism = Resampler::RatioOftenChanging; 682 params.ratioChange = Resampler::SmoothRatioChange; 683 } else { 684 // ratio can't be changed in offline mode 685 params.dynamism = Resampler::RatioMostlyFixed; 686 params.ratioChange = Resampler::SuddenRatioChange; 687 } 688 689 params.maxBufferSize = 4096 * 16; 690 params.debugLevel = (m_debugLevel > 0 ? m_debugLevel-1 : 0); 691 692 m_channelData[c]->resampler = new Resampler(params, 1); 693 694 // rbs is the amount of buffer space we think we'll need 695 // for resampling; but allocate a sensible amount in case 696 // the pitch scale changes during use 697 size_t rbs = 698 lrintf(ceil((m_increment * m_timeRatio * 2) / m_pitchScale)); 699 if (rbs < m_increment * 16) rbs = m_increment * 16; 700 m_channelData[c]->setResampleBufSize(rbs); 701 } 702 } 703 704 // stretchAudioCurve is unused in RT mode; phaseResetAudioCurve, 705 // silentAudioCurve and stretchCalculator however are used in all 706 // modes 707 708 delete m_phaseResetAudioCurve; 709 m_phaseResetAudioCurve = new CompoundAudioCurve 710 (CompoundAudioCurve::Parameters(m_sampleRate, m_fftSize)); 711 m_phaseResetAudioCurve->setType(m_detectorType); 712 713 delete m_silentAudioCurve; 714 m_silentAudioCurve = new SilentAudioCurve 715 (SilentAudioCurve::Parameters(m_sampleRate, m_fftSize)); 716 717 if (!m_realtime) { 718 delete m_stretchAudioCurve; 719 if (!(m_options & OptionStretchPrecise)) { 720 m_stretchAudioCurve = new SpectralDifferenceAudioCurve 721 (SpectralDifferenceAudioCurve::Parameters(m_sampleRate, m_fftSize)); 722 } else { 723 m_stretchAudioCurve = new ConstantAudioCurve 724 (ConstantAudioCurve::Parameters(m_sampleRate, m_fftSize)); 725 } 726 } 727 728 delete m_stretchCalculator; 729 m_stretchCalculator = new StretchCalculator 730 (m_sampleRate, m_increment, 731 !(m_options & OptionTransientsSmooth)); 732 733 m_stretchCalculator->setDebugLevel(m_debugLevel); 734 m_inputDuration = 0; 735 736 // Prepare the inbufs with half a chunk of emptiness. The centre 737 // point of the first processing chunk for the onset detector 738 // should be the first sample of the audio, and we continue until 739 // we can no longer centre a chunk within the input audio. The 740 // number of onset detector chunks will be the number of audio 741 // samples input, divided by the input increment, plus one. 742 743 // In real-time mode, we don't do this prefill -- it's better to 744 // start with a swoosh than introduce more latency, and we don't 745 // want gaps when the ratio changes. 746 747 if (!m_realtime) { 748 if (m_debugLevel > 1) { 749 cerr << "Not real time mode: prefilling with " << m_aWindowSize/2 << " samples" << endl; 750 } 751 for (size_t c = 0; c < m_channels; ++c) { 752 m_channelData[c]->reset(); 753 m_channelData[c]->inbuf->zero(m_aWindowSize/2); 754 } 755 } 756 } 757 758 759 void 760 RubberBandStretcher::Impl::reconfigure() 761 { 762 if (!m_realtime) { 763 if (m_mode == Studying) { 764 // stop and calculate the stretch curve so far, then reset 765 // the df vectors 766 calculateStretch(); 767 m_phaseResetDf.clear(); 768 m_stretchDf.clear(); 769 m_silence.clear(); 770 m_inputDuration = 0; 771 } 772 configure(); 773 } 774 775 size_t prevFftSize = m_fftSize; 776 size_t prevAWindowSize = m_aWindowSize; 777 size_t prevSWindowSize = m_sWindowSize; 778 size_t prevOutbufSize = m_outbufSize; 779 780 calculateSizes(); 781 782 bool somethingChanged = false; 783 784 // There are various allocations in this function, but they should 785 // never happen in normal use -- they just recover from the case 786 // where not all of the things we need were correctly created when 787 // we first configured (for whatever reason). This is intended to 788 // be "effectively" realtime safe. The same goes for 789 // ChannelData::setOutbufSize and setSizes. 790 791 if (m_aWindowSize != prevAWindowSize || 792 m_sWindowSize != prevSWindowSize) { 793 794 if (m_windows.find(m_aWindowSize) == m_windows.end()) { 795 std::cerr << "WARNING: reconfigure(): window allocation (size " << m_aWindowSize << ") required in RT mode" << std::endl; 796 m_windows[m_aWindowSize] = new Window<float> 797 (HanningWindow, m_aWindowSize); 798 m_sincs[m_aWindowSize] = new SincWindow<float> 799 (m_aWindowSize, m_aWindowSize); 800 } 801 802 if (m_windows.find(m_sWindowSize) == m_windows.end()) { 803 std::cerr << "WARNING: reconfigure(): window allocation (size " << m_sWindowSize << ") required in RT mode" << std::endl; 804 m_windows[m_sWindowSize] = new Window<float> 805 (HanningWindow, m_sWindowSize); 806 m_sincs[m_sWindowSize] = new SincWindow<float> 807 (m_sWindowSize, m_sWindowSize); 808 } 809 810 m_awindow = m_windows[m_aWindowSize]; 811 m_afilter = m_sincs[m_aWindowSize]; 812 m_swindow = m_windows[m_sWindowSize]; 813 814 for (size_t c = 0; c < m_channels; ++c) { 815 m_channelData[c]->setSizes(std::max(m_aWindowSize, m_sWindowSize), 816 m_fftSize); 817 } 818 819 somethingChanged = true; 820 } 821 822 if (m_outbufSize != prevOutbufSize) { 823 for (size_t c = 0; c < m_channels; ++c) { 824 m_channelData[c]->setOutbufSize(m_outbufSize); 825 } 826 somethingChanged = true; 827 } 828 829 if (m_pitchScale != 1.0) { 830 for (size_t c = 0; c < m_channels; ++c) { 831 832 if (m_channelData[c]->resampler) continue; 833 834 std::cerr << "WARNING: reconfigure(): resampler construction required in RT mode" << std::endl; 835 836 Resampler::Parameters params; 837 params.quality = Resampler::FastestTolerable; 838 params.dynamism = Resampler::RatioOftenChanging; 839 params.ratioChange = Resampler::SmoothRatioChange; 840 params.maxBufferSize = m_sWindowSize; 841 params.debugLevel = (m_debugLevel > 0 ? m_debugLevel-1 : 0); 842 843 m_channelData[c]->resampler = new Resampler(params, 1); 844 845 size_t rbs = 846 lrintf(ceil((m_increment * m_timeRatio * 2) / m_pitchScale)); 847 if (rbs < m_increment * 16) rbs = m_increment * 16; 848 m_channelData[c]->setResampleBufSize(rbs); 849 850 somethingChanged = true; 851 } 852 } 853 854 if (m_fftSize != prevFftSize) { 855 m_phaseResetAudioCurve->setFftSize(m_fftSize); 856 m_silentAudioCurve->setFftSize(m_fftSize); 857 if (m_stretchAudioCurve) { 858 m_stretchAudioCurve->setFftSize(m_fftSize); 859 } 860 somethingChanged = true; 861 } 862 863 if (m_debugLevel > 0) { 864 if (somethingChanged) { 865 std::cerr << "reconfigure: at least one parameter changed" << std::endl; 866 } else { 867 std::cerr << "reconfigure: nothing changed" << std::endl; 868 } 869 } 870 } 871 872 size_t 873 RubberBandStretcher::Impl::getLatency() const 874 { 875 if (!m_realtime) return 0; 876 return lrint((m_aWindowSize/2) / m_pitchScale); 877 } 878 879 void 880 RubberBandStretcher::Impl::setTransientsOption(Options options) 881 { 882 if (!m_realtime) { 883 cerr << "RubberBandStretcher::Impl::setTransientsOption: Not permissible in non-realtime mode" << endl; 884 return; 885 } 886 int mask = (OptionTransientsMixed | OptionTransientsSmooth | OptionTransientsCrisp); 887 m_options &= ~mask; 888 options &= mask; 889 m_options |= options; 890 891 m_stretchCalculator->setUseHardPeaks 892 (!(m_options & OptionTransientsSmooth)); 893 } 894 895 void 896 RubberBandStretcher::Impl::setDetectorOption(Options options) 897 { 898 if (!m_realtime) { 899 cerr << "RubberBandStretcher::Impl::setDetectorOption: Not permissible in non-realtime mode" << endl; 900 return; 901 } 902 int mask = (OptionDetectorPercussive | OptionDetectorCompound | OptionDetectorSoft); 903 m_options &= ~mask; 904 options &= mask; 905 m_options |= options; 906 907 CompoundAudioCurve::Type dt = CompoundAudioCurve::CompoundDetector; 908 if (m_options & OptionDetectorPercussive) dt = CompoundAudioCurve::PercussiveDetector; 909 else if (m_options & OptionDetectorSoft) dt = CompoundAudioCurve::SoftDetector; 910 911 if (dt == m_detectorType) return; 912 m_detectorType = dt; 913 914 if (m_phaseResetAudioCurve) { 915 m_phaseResetAudioCurve->setType(m_detectorType); 916 } 917 } 918 919 void 920 RubberBandStretcher::Impl::setPhaseOption(Options options) 921 { 922 int mask = (OptionPhaseLaminar | OptionPhaseIndependent); 923 m_options &= ~mask; 924 options &= mask; 925 m_options |= options; 926 } 927 928 void 929 RubberBandStretcher::Impl::setFormantOption(Options options) 930 { 931 int mask = (OptionFormantShifted | OptionFormantPreserved); 932 m_options &= ~mask; 933 options &= mask; 934 m_options |= options; 935 } 936 937 void 938 RubberBandStretcher::Impl::setPitchOption(Options options) 939 { 940 if (!m_realtime) { 941 cerr << "RubberBandStretcher::Impl::setPitchOption: Pitch option is not used in non-RT mode" << endl; 942 return; 943 } 944 945 Options prior = m_options; 946 947 int mask = (OptionPitchHighQuality | 948 OptionPitchHighSpeed | 949 OptionPitchHighConsistency); 950 m_options &= ~mask; 951 options &= mask; 952 m_options |= options; 953 954 if (prior != m_options) reconfigure(); 955 } 956 957 void 958 RubberBandStretcher::Impl::study(const float *const *input, size_t samples, bool final) 959 { 960 Profiler profiler("RubberBandStretcher::Impl::study"); 961 962 if (m_realtime) { 963 if (m_debugLevel > 1) { 964 cerr << "RubberBandStretcher::Impl::study: Not meaningful in realtime mode" << endl; 965 } 966 return; 967 } 968 969 if (m_mode == Processing || m_mode == Finished) { 970 cerr << "RubberBandStretcher::Impl::study: Cannot study after processing" << endl; 971 return; 972 } 973 m_mode = Studying; 974 975 size_t consumed = 0; 976 977 ChannelData &cd = *m_channelData[0]; 978 RingBuffer<float> &inbuf = *cd.inbuf; 979 980 const float *mixdown; 981 float *mdalloc = 0; 982 983 if (m_channels > 1 || final) { 984 // mix down into a single channel for analysis 985 mdalloc = new float[samples]; 986 for (size_t i = 0; i < samples; ++i) { 987 if (i < samples) { 988 mdalloc[i] = input[0][i]; 989 } else { 990 mdalloc[i] = 0.f; 991 } 992 } 993 for (size_t c = 1; c < m_channels; ++c) { 994 for (size_t i = 0; i < samples; ++i) { 995 mdalloc[i] += input[c][i]; 996 } 997 } 998 for (size_t i = 0; i < samples; ++i) { 999 mdalloc[i] /= m_channels; 1000 } 1001 mixdown = mdalloc; 1002 } else { 1003 mixdown = input[0]; 1004 } 1005 1006 while (consumed < samples) { 1007 1008 size_t writable = inbuf.getWriteSpace(); 1009 writable = min(writable, samples - consumed); 1010 1011 if (writable == 0) { 1012 // warn 1013 cerr << "WARNING: writable == 0 (consumed = " << consumed << ", samples = " << samples << ")" << endl; 1014 } else { 1015 inbuf.write(mixdown + consumed, writable); 1016 consumed += writable; 1017 } 1018 1019 while ((inbuf.getReadSpace() >= int(m_aWindowSize)) || 1020 (final && (inbuf.getReadSpace() >= int(m_aWindowSize/2)))) { 1021 1022 // We know we have at least m_aWindowSize samples 1023 // available in m_inbuf. We need to peek m_aWindowSize of 1024 // them for processing, and then skip m_increment to 1025 // advance the read pointer. 1026 1027 // cd.accumulator is not otherwise used during studying, 1028 // so we can use it as a temporary buffer here 1029 1030 size_t ready = inbuf.getReadSpace(); 1031 assert(final || ready >= m_aWindowSize); 1032 inbuf.peek(cd.accumulator, std::min(ready, m_aWindowSize)); 1033 1034 if (m_aWindowSize == m_fftSize) { 1035 1036 // We don't need the fftshift for studying, as we're 1037 // only interested in magnitude. 1038 1039 m_awindow->cut(cd.accumulator); 1040 1041 } else { 1042 1043 // If we need to fold (i.e. if the window size is 1044 // greater than the fft size so we are doing a 1045 // time-aliased presum fft) or zero-pad, then we might 1046 // as well use our standard function for it. This 1047 // means we retain the m_afilter cut if folding as well, 1048 // which is good for consistency with real-time mode. 1049 // We get fftshift as well, which we don't want, but 1050 // the penalty is nominal. 1051 1052 // Note that we can't do this in-place. Pity 1053 1054 float *tmp = (float *)alloca 1055 (std::max(m_fftSize, m_aWindowSize) * sizeof(float)); 1056 1057 if (m_aWindowSize > m_fftSize) { 1058 m_afilter->cut(cd.accumulator); 1059 } 1060 1061 cutShiftAndFold(tmp, m_fftSize, cd.accumulator, m_awindow); 1062 v_copy(cd.accumulator, tmp, m_fftSize); 1063 } 1064 1065 m_studyFFT->forwardMagnitude(cd.accumulator, cd.fltbuf); 1066 1067 float df = m_phaseResetAudioCurve->processFloat(cd.fltbuf, m_increment); 1068 m_phaseResetDf.push_back(df); 1069 1070 // cout << m_phaseResetDf.size() << " [" << final << "] -> " << df << " \t: "; 1071 1072 df = m_stretchAudioCurve->processFloat(cd.fltbuf, m_increment); 1073 m_stretchDf.push_back(df); 1074 1075 df = m_silentAudioCurve->processFloat(cd.fltbuf, m_increment); 1076 bool silent = (df > 0.f); 1077 if (silent && m_debugLevel > 1) { 1078 cerr << "silence found at " << m_inputDuration << endl; 1079 } 1080 m_silence.push_back(silent); 1081 1082 // cout << df << endl; 1083 1084 // We have augmented the input by m_aWindowSize/2 so that 1085 // the first chunk is centred on the first audio sample. 1086 // We want to ensure that m_inputDuration contains the 1087 // exact input duration without including this extra bit. 1088 // We just add up all the increments here, and deduct the 1089 // extra afterwards. 1090 1091 m_inputDuration += m_increment; 1092 // cerr << "incr input duration by increment: " << m_increment << " -> " << m_inputDuration << endl; 1093 inbuf.skip(m_increment); 1094 } 1095 } 1096 1097 if (final) { 1098 int rs = inbuf.getReadSpace(); 1099 m_inputDuration += rs; 1100 // cerr << "incr input duration by read space: " << rs << " -> " << m_inputDuration << endl; 1101 1102 if (m_inputDuration > m_aWindowSize/2) { // deducting the extra 1103 m_inputDuration -= m_aWindowSize/2; 1104 } 1105 } 1106 1107 if (m_channels > 1 || final) delete[] mdalloc; 1108 } 1109 1110 vector<int> 1111 RubberBandStretcher::Impl::getOutputIncrements() const 1112 { 1113 if (!m_realtime) { 1114 return m_outputIncrements; 1115 } else { 1116 vector<int> increments; 1117 while (m_lastProcessOutputIncrements.getReadSpace() > 0) { 1118 increments.push_back(m_lastProcessOutputIncrements.readOne()); 1119 } 1120 return increments; 1121 } 1122 } 1123 1124 vector<float> 1125 RubberBandStretcher::Impl::getPhaseResetCurve() const 1126 { 1127 if (!m_realtime) { 1128 return m_phaseResetDf; 1129 } else { 1130 vector<float> df; 1131 while (m_lastProcessPhaseResetDf.getReadSpace() > 0) { 1132 df.push_back(m_lastProcessPhaseResetDf.readOne()); 1133 } 1134 return df; 1135 } 1136 } 1137 1138 vector<int> 1139 RubberBandStretcher::Impl::getExactTimePoints() const 1140 { 1141 std::vector<int> points; 1142 if (!m_realtime) { 1143 std::vector<StretchCalculator::Peak> peaks = 1144 m_stretchCalculator->getLastCalculatedPeaks(); 1145 for (size_t i = 0; i < peaks.size(); ++i) { 1146 points.push_back(peaks[i].chunk); 1147 } 1148 } 1149 return points; 1150 } 1151 1152 void 1153 RubberBandStretcher::Impl::calculateStretch() 1154 { 1155 Profiler profiler("RubberBandStretcher::Impl::calculateStretch"); 1156 1157 size_t inputDuration = m_inputDuration; 1158 1159 if (!m_realtime && m_expectedInputDuration > 0) { 1160 if (m_expectedInputDuration != inputDuration) { 1161 std::cerr << "RubberBandStretcher: WARNING: Actual study() duration differs from duration set by setExpectedInputDuration (" << m_inputDuration << " vs " << m_expectedInputDuration << ", diff = " << (m_expectedInputDuration - m_inputDuration) << "), using the latter for calculation" << std::endl; 1162 inputDuration = m_expectedInputDuration; 1163 } 1164 } 1165 1166 double prdm = 0, sdm = 0; 1167 if (!m_phaseResetDf.empty()) { 1168 for (int i = 0; i < (int)m_phaseResetDf.size(); ++i) { 1169 prdm += m_phaseResetDf[i]; 1170 } 1171 prdm /= m_phaseResetDf.size(); 1172 } 1173 if (!m_stretchDf.empty()) { 1174 for (int i = 0; i < (int)m_stretchDf.size(); ++i) { 1175 sdm += m_stretchDf[i]; 1176 } 1177 sdm /= m_stretchDf.size(); 1178 } 1179 // std::cerr << "phase reset df mean = " << prdm << ", stretch df mean = " << sdm << std::endl; 1180 1181 std::vector<int> increments = m_stretchCalculator->calculate 1182 (getEffectiveRatio(), 1183 inputDuration, 1184 m_phaseResetDf, 1185 m_stretchDf); 1186 1187 int history = 0; 1188 for (size_t i = 0; i < increments.size(); ++i) { 1189 if (i >= m_silence.size()) break; 1190 if (m_silence[i]) ++history; 1191 else history = 0; 1192 if (history >= int(m_aWindowSize / m_increment) && increments[i] >= 0) { 1193 increments[i] = -increments[i]; 1194 if (m_debugLevel > 1) { 1195 std::cerr << "phase reset on silence (silent history == " 1196 << history << ")" << std::endl; 1197 } 1198 } 1199 } 1200 1201 if (m_outputIncrements.empty()) m_outputIncrements = increments; 1202 else { 1203 for (size_t i = 0; i < increments.size(); ++i) { 1204 m_outputIncrements.push_back(increments[i]); 1205 } 1206 } 1207 1208 return; 1209 } 1210 1211 void 1212 RubberBandStretcher::Impl::setDebugLevel(int level) 1213 { 1214 m_debugLevel = level; 1215 if (m_stretchCalculator) m_stretchCalculator->setDebugLevel(level); 1216 } 1217 1218 size_t 1219 RubberBandStretcher::Impl::getSamplesRequired() const 1220 { 1221 Profiler profiler("RubberBandStretcher::Impl::getSamplesRequired"); 1222 1223 size_t reqd = 0; 1224 1225 for (size_t c = 0; c < m_channels; ++c) { 1226 1227 size_t reqdHere = 0; 1228 1229 ChannelData &cd = *m_channelData[c]; 1230 RingBuffer<float> &inbuf = *cd.inbuf; 1231 RingBuffer<float> &outbuf = *cd.outbuf; 1232 1233 size_t rs = inbuf.getReadSpace(); 1234 size_t ws = outbuf.getReadSpace(); 1235 1236 if (m_debugLevel > 2) { 1237 cerr << "getSamplesRequired: ws = " << ws << ", rs = " << rs << ", m_aWindowSize = " << m_aWindowSize << endl; 1238 } 1239 1240 // We should never return zero in non-threaded modes if 1241 // available() would also return zero, i.e. if ws == 0. If we 1242 // do that, nothing will ever happen again! We need to demand 1243 // at least one increment (i.e. a nominal amount) to feed the 1244 // engine. 1245 1246 if (ws == 0 && reqd == 0) reqd = m_increment; 1247 1248 // See notes in testInbufReadSpace 1249 1250 if (rs < m_aWindowSize && !cd.draining) { 1251 1252 if (cd.inputSize == -1) { 1253 reqdHere = m_aWindowSize - rs; 1254 if (reqdHere > reqd) reqd = reqdHere; 1255 continue; 1256 } 1257 1258 if (rs == 0) { 1259 reqdHere = m_aWindowSize; 1260 if (reqdHere > reqd) reqd = reqdHere; 1261 continue; 1262 } 1263 } 1264 } 1265 1266 return reqd; 1267 } 1268 1269 void 1270 RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bool final) 1271 { 1272 Profiler profiler("RubberBandStretcher::Impl::process"); 1273 1274 if (m_mode == Finished) { 1275 cerr << "RubberBandStretcher::Impl::process: Cannot process again after final chunk" << endl; 1276 return; 1277 } 1278 1279 if (m_mode == JustCreated || m_mode == Studying) { 1280 1281 if (m_mode == Studying) { 1282 1283 calculateStretch(); 1284 1285 if (!m_realtime) { 1286 // See note in configure() above. Of course, we should 1287 // never enter Studying unless we are non-RT anyway 1288 if (m_debugLevel > 1) { 1289 cerr << "Not real time mode: prefilling" << endl; 1290 } 1291 for (size_t c = 0; c < m_channels; ++c) { 1292 m_channelData[c]->reset(); 1293 m_channelData[c]->inbuf->zero(m_aWindowSize/2); 1294 } 1295 } 1296 } 1297 1298 #ifndef NO_THREADING 1299 if (m_threaded) { 1300 MutexLocker locker(&m_threadSetMutex); 1301 1302 for (size_t c = 0; c < m_channels; ++c) { 1303 ProcessThread *thread = new ProcessThread(this, c); 1304 m_threadSet.insert(thread); 1305 thread->start(); 1306 } 1307 1308 if (m_debugLevel > 0) { 1309 cerr << m_channels << " threads created" << endl; 1310 } 1311 } 1312 #endif 1313 1314 m_mode = Processing; 1315 } 1316 1317 bool allConsumed = false; 1318 1319 size_t *consumed = (size_t *)alloca(m_channels * sizeof(size_t)); 1320 for (size_t c = 0; c < m_channels; ++c) { 1321 consumed[c] = 0; 1322 } 1323 1324 while (!allConsumed) { 1325 1326 // In a threaded mode, our "consumed" counters only indicate 1327 // the number of samples that have been taken into the input 1328 // ring buffers waiting to be processed by the process thread. 1329 // In non-threaded mode, "consumed" counts the number that 1330 // have actually been processed. 1331 1332 allConsumed = true; 1333 1334 for (size_t c = 0; c < m_channels; ++c) { 1335 consumed[c] += consumeChannel(c, 1336 input, 1337 consumed[c], 1338 samples - consumed[c], 1339 final); 1340 if (consumed[c] < samples) { 1341 allConsumed = false; 1342 // cerr << "process: waiting on input consumption for channel " << c << endl; 1343 } else { 1344 if (final) { 1345 m_channelData[c]->inputSize = m_channelData[c]->inCount; 1346 } 1347 // cerr << "process: happy with channel " << c << endl; 1348 } 1349 if ( 1350 #ifndef NO_THREADING 1351 !m_threaded && 1352 #endif 1353 !m_realtime) { 1354 bool any = false, last = false; 1355 processChunks(c, any, last); 1356 } 1357 } 1358 1359 if (m_realtime) { 1360 // When running in real time, we need to process both 1361 // channels in step because we will need to use the sum of 1362 // their frequency domain representations as the input to 1363 // the realtime onset detector 1364 processOneChunk(); 1365 } 1366 #ifndef NO_THREADING 1367 if (m_threaded) { 1368 for (ThreadSet::iterator i = m_threadSet.begin(); 1369 i != m_threadSet.end(); ++i) { 1370 (*i)->signalDataAvailable(); 1371 } 1372 m_spaceAvailable.lock(); 1373 if (!allConsumed) { 1374 m_spaceAvailable.wait(500); 1375 } 1376 m_spaceAvailable.unlock(); 1377 } 1378 #endif 1379 1380 if (m_debugLevel > 1) { 1381 if (!allConsumed) cerr << "process looping" << endl; 1382 } 1383 } 1384 1385 if (m_debugLevel > 1) { 1386 cerr << "process returning" << endl; 1387 } 1388 1389 if (final) m_mode = Finished; 1390 } 1391 1392 1393 } 1394 1395